{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 700.0, "eval_steps": 25.0, "global_step": 29400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023880597014925373, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 76.8913, "step": 1 }, { "epoch": 0.04776119402985075, "grad_norm": 395.8459777832031, "learning_rate": 1e-05, "loss": 77.3235, "step": 2 }, { "epoch": 0.07164179104477612, "grad_norm": 161.73968505859375, "learning_rate": 9.988095238095239e-06, "loss": 67.3668, "step": 3 }, { "epoch": 0.0955223880597015, "grad_norm": 145.1616973876953, "learning_rate": 9.976190476190477e-06, "loss": 62.8542, "step": 4 }, { "epoch": 0.11940298507462686, "grad_norm": 52.10374069213867, "learning_rate": 9.964285714285714e-06, "loss": 59.8627, "step": 5 }, { "epoch": 0.14328358208955225, "grad_norm": 31.430763244628906, "learning_rate": 9.952380952380954e-06, "loss": 59.2378, "step": 6 }, { "epoch": 0.16716417910447762, "grad_norm": 37.42692947387695, "learning_rate": 9.940476190476192e-06, "loss": 58.3114, "step": 7 }, { "epoch": 0.191044776119403, "grad_norm": 32.284332275390625, "learning_rate": 9.92857142857143e-06, "loss": 57.3408, "step": 8 }, { "epoch": 0.21492537313432836, "grad_norm": 28.520849227905273, "learning_rate": 9.916666666666668e-06, "loss": 56.9256, "step": 9 }, { "epoch": 0.23880597014925373, "grad_norm": 40.480167388916016, "learning_rate": 9.904761904761906e-06, "loss": 57.4842, "step": 10 }, { "epoch": 0.2626865671641791, "grad_norm": 20.3512020111084, "learning_rate": 9.892857142857143e-06, "loss": 56.7597, "step": 11 }, { "epoch": 0.2865671641791045, "grad_norm": 22.308382034301758, "learning_rate": 9.880952380952381e-06, "loss": 56.594, "step": 12 }, { "epoch": 0.31044776119402984, "grad_norm": 14.086284637451172, "learning_rate": 9.869047619047621e-06, "loss": 56.6327, "step": 13 }, { "epoch": 0.33432835820895523, "grad_norm": NaN, "learning_rate": 9.857142857142859e-06, "loss": 63.0791, "step": 14 }, { "epoch": 0.3582089552238806, "grad_norm": 17.055604934692383, "learning_rate": 9.857142857142859e-06, "loss": 56.1551, "step": 15 }, { "epoch": 0.382089552238806, "grad_norm": 18.52287483215332, "learning_rate": 9.845238095238097e-06, "loss": 54.9502, "step": 16 }, { "epoch": 0.4059701492537313, "grad_norm": 32.66905212402344, "learning_rate": 9.833333333333333e-06, "loss": 55.6494, "step": 17 }, { "epoch": 0.4298507462686567, "grad_norm": 24.075742721557617, "learning_rate": 9.821428571428573e-06, "loss": 55.4766, "step": 18 }, { "epoch": 0.4537313432835821, "grad_norm": 31.505783081054688, "learning_rate": 9.80952380952381e-06, "loss": 55.1481, "step": 19 }, { "epoch": 0.47761194029850745, "grad_norm": 30.53020477294922, "learning_rate": 9.797619047619048e-06, "loss": 55.158, "step": 20 }, { "epoch": 0.5014925373134328, "grad_norm": 14.44444751739502, "learning_rate": 9.785714285714286e-06, "loss": 54.8286, "step": 21 }, { "epoch": 0.5253731343283582, "grad_norm": 53.601078033447266, "learning_rate": 9.773809523809524e-06, "loss": 55.1466, "step": 22 }, { "epoch": 0.5492537313432836, "grad_norm": 37.156028747558594, "learning_rate": 9.761904761904762e-06, "loss": 54.629, "step": 23 }, { "epoch": 0.573134328358209, "grad_norm": 41.84994125366211, "learning_rate": 9.75e-06, "loss": 55.2766, "step": 24 }, { "epoch": 0.5970149253731343, "grad_norm": 50.61705017089844, "learning_rate": 9.73809523809524e-06, "loss": 54.3497, "step": 25 }, { "epoch": 0.6208955223880597, "grad_norm": 16.964982986450195, "learning_rate": 9.726190476190477e-06, "loss": 55.1673, "step": 26 }, { "epoch": 0.6447761194029851, "grad_norm": 22.71157455444336, "learning_rate": 9.714285714285715e-06, "loss": 54.8001, "step": 27 }, { "epoch": 0.6686567164179105, "grad_norm": 16.372802734375, "learning_rate": 9.702380952380953e-06, "loss": 54.4506, "step": 28 }, { "epoch": 0.6925373134328359, "grad_norm": 13.21664047241211, "learning_rate": 9.690476190476191e-06, "loss": 53.0488, "step": 29 }, { "epoch": 0.7164179104477612, "grad_norm": NaN, "learning_rate": 9.678571428571429e-06, "loss": 74.7707, "step": 30 }, { "epoch": 0.7402985074626866, "grad_norm": 23.045652389526367, "learning_rate": 9.678571428571429e-06, "loss": 54.1909, "step": 31 }, { "epoch": 0.764179104477612, "grad_norm": 15.034178733825684, "learning_rate": 9.666666666666667e-06, "loss": 53.3253, "step": 32 }, { "epoch": 0.7880597014925373, "grad_norm": 14.148232460021973, "learning_rate": 9.654761904761906e-06, "loss": 53.693, "step": 33 }, { "epoch": 0.8119402985074626, "grad_norm": NaN, "learning_rate": 9.642857142857144e-06, "loss": 61.3517, "step": 34 }, { "epoch": 0.835820895522388, "grad_norm": 14.757994651794434, "learning_rate": 9.642857142857144e-06, "loss": 53.3175, "step": 35 }, { "epoch": 0.8597014925373134, "grad_norm": 10.875706672668457, "learning_rate": 9.630952380952382e-06, "loss": 54.2592, "step": 36 }, { "epoch": 0.8835820895522388, "grad_norm": 9.926539421081543, "learning_rate": 9.61904761904762e-06, "loss": 53.8721, "step": 37 }, { "epoch": 0.9074626865671642, "grad_norm": 17.697235107421875, "learning_rate": 9.607142857142858e-06, "loss": 54.2901, "step": 38 }, { "epoch": 0.9313432835820895, "grad_norm": 29.19430160522461, "learning_rate": 9.595238095238096e-06, "loss": 53.1261, "step": 39 }, { "epoch": 0.9552238805970149, "grad_norm": 12.3985595703125, "learning_rate": 9.583333333333335e-06, "loss": 53.9815, "step": 40 }, { "epoch": 0.9791044776119403, "grad_norm": 17.109691619873047, "learning_rate": 9.571428571428573e-06, "loss": 52.5838, "step": 41 }, { "epoch": 1.0, "grad_norm": 14.529239654541016, "learning_rate": 9.559523809523811e-06, "loss": 46.1888, "step": 42 }, { "epoch": 1.0238805970149254, "grad_norm": 15.683514595031738, "learning_rate": 9.547619047619049e-06, "loss": 52.2043, "step": 43 }, { "epoch": 1.0477611940298508, "grad_norm": 26.219507217407227, "learning_rate": 9.535714285714287e-06, "loss": 53.09, "step": 44 }, { "epoch": 1.0716417910447762, "grad_norm": 19.859697341918945, "learning_rate": 9.523809523809525e-06, "loss": 52.5858, "step": 45 }, { "epoch": 1.0955223880597016, "grad_norm": 11.090332984924316, "learning_rate": 9.511904761904763e-06, "loss": 53.7674, "step": 46 }, { "epoch": 1.1194029850746268, "grad_norm": 15.586993217468262, "learning_rate": 9.5e-06, "loss": 53.4901, "step": 47 }, { "epoch": 1.1432835820895522, "grad_norm": 22.734928131103516, "learning_rate": 9.488095238095238e-06, "loss": 53.6667, "step": 48 }, { "epoch": 1.1671641791044776, "grad_norm": 16.382047653198242, "learning_rate": 9.476190476190476e-06, "loss": 53.2914, "step": 49 }, { "epoch": 1.191044776119403, "grad_norm": 15.916092872619629, "learning_rate": 9.464285714285714e-06, "loss": 51.729, "step": 50 }, { "epoch": 1.2149253731343284, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 66.3151, "step": 51 }, { "epoch": 1.2388059701492538, "grad_norm": 20.121395111083984, "learning_rate": 9.452380952380952e-06, "loss": 53.866, "step": 52 }, { "epoch": 1.2626865671641792, "grad_norm": NaN, "learning_rate": 9.440476190476192e-06, "loss": 67.3538, "step": 53 }, { "epoch": 1.2865671641791045, "grad_norm": 13.869222640991211, "learning_rate": 9.440476190476192e-06, "loss": 53.5431, "step": 54 }, { "epoch": 1.31044776119403, "grad_norm": 25.478107452392578, "learning_rate": 9.42857142857143e-06, "loss": 52.7126, "step": 55 }, { "epoch": 1.3343283582089551, "grad_norm": 35.76942825317383, "learning_rate": 9.416666666666667e-06, "loss": 53.8082, "step": 56 }, { "epoch": 1.3582089552238805, "grad_norm": 17.95109748840332, "learning_rate": 9.404761904761905e-06, "loss": 53.0275, "step": 57 }, { "epoch": 1.382089552238806, "grad_norm": 26.80129623413086, "learning_rate": 9.392857142857143e-06, "loss": 53.3295, "step": 58 }, { "epoch": 1.4059701492537313, "grad_norm": 25.803054809570312, "learning_rate": 9.380952380952381e-06, "loss": 52.3771, "step": 59 }, { "epoch": 1.4298507462686567, "grad_norm": 35.39850616455078, "learning_rate": 9.36904761904762e-06, "loss": 52.9467, "step": 60 }, { "epoch": 1.4537313432835821, "grad_norm": 27.43315887451172, "learning_rate": 9.357142857142859e-06, "loss": 53.1359, "step": 61 }, { "epoch": 1.4776119402985075, "grad_norm": 33.431400299072266, "learning_rate": 9.345238095238096e-06, "loss": 52.2006, "step": 62 }, { "epoch": 1.5014925373134327, "grad_norm": 33.08237075805664, "learning_rate": 9.333333333333334e-06, "loss": 52.6816, "step": 63 }, { "epoch": 1.5253731343283583, "grad_norm": 25.711997985839844, "learning_rate": 9.321428571428572e-06, "loss": 52.6113, "step": 64 }, { "epoch": 1.5492537313432835, "grad_norm": 32.688297271728516, "learning_rate": 9.30952380952381e-06, "loss": 51.9086, "step": 65 }, { "epoch": 1.573134328358209, "grad_norm": 31.856857299804688, "learning_rate": 9.297619047619048e-06, "loss": 53.1913, "step": 66 }, { "epoch": 1.5970149253731343, "grad_norm": 26.231773376464844, "learning_rate": 9.285714285714288e-06, "loss": 51.464, "step": 67 }, { "epoch": 1.6208955223880597, "grad_norm": 29.39109230041504, "learning_rate": 9.273809523809525e-06, "loss": 52.0572, "step": 68 }, { "epoch": 1.644776119402985, "grad_norm": 28.86277198791504, "learning_rate": 9.261904761904763e-06, "loss": 52.7115, "step": 69 }, { "epoch": 1.6686567164179105, "grad_norm": 28.82640266418457, "learning_rate": 9.250000000000001e-06, "loss": 53.1756, "step": 70 }, { "epoch": 1.6925373134328359, "grad_norm": 31.32577133178711, "learning_rate": 9.238095238095239e-06, "loss": 52.369, "step": 71 }, { "epoch": 1.716417910447761, "grad_norm": 18.739269256591797, "learning_rate": 9.226190476190477e-06, "loss": 52.6631, "step": 72 }, { "epoch": 1.7402985074626867, "grad_norm": 22.889320373535156, "learning_rate": 9.214285714285715e-06, "loss": 52.4786, "step": 73 }, { "epoch": 1.7641791044776118, "grad_norm": 24.175626754760742, "learning_rate": 9.202380952380953e-06, "loss": 52.1792, "step": 74 }, { "epoch": 1.7880597014925375, "grad_norm": 25.257095336914062, "learning_rate": 9.19047619047619e-06, "loss": 51.39, "step": 75 }, { "epoch": 1.8119402985074626, "grad_norm": 35.745208740234375, "learning_rate": 9.178571428571429e-06, "loss": 53.331, "step": 76 }, { "epoch": 1.835820895522388, "grad_norm": 23.815813064575195, "learning_rate": 9.166666666666666e-06, "loss": 52.7632, "step": 77 }, { "epoch": 1.8597014925373134, "grad_norm": 32.405757904052734, "learning_rate": 9.154761904761906e-06, "loss": 52.3165, "step": 78 }, { "epoch": 1.8835820895522388, "grad_norm": 38.95046615600586, "learning_rate": 9.142857142857144e-06, "loss": 52.0931, "step": 79 }, { "epoch": 1.9074626865671642, "grad_norm": 22.412342071533203, "learning_rate": 9.130952380952382e-06, "loss": 51.8732, "step": 80 }, { "epoch": 1.9313432835820894, "grad_norm": 35.088253021240234, "learning_rate": 9.11904761904762e-06, "loss": 52.0182, "step": 81 }, { "epoch": 1.955223880597015, "grad_norm": 20.136964797973633, "learning_rate": 9.107142857142858e-06, "loss": 52.7956, "step": 82 }, { "epoch": 1.9791044776119402, "grad_norm": 20.860034942626953, "learning_rate": 9.095238095238095e-06, "loss": 50.339, "step": 83 }, { "epoch": 2.0, "grad_norm": 20.38931655883789, "learning_rate": 9.083333333333333e-06, "loss": 45.3651, "step": 84 }, { "epoch": 2.023880597014925, "grad_norm": 21.519498825073242, "learning_rate": 9.071428571428573e-06, "loss": 52.1228, "step": 85 }, { "epoch": 2.047761194029851, "grad_norm": 30.47000503540039, "learning_rate": 9.05952380952381e-06, "loss": 51.1968, "step": 86 }, { "epoch": 2.071641791044776, "grad_norm": 19.728044509887695, "learning_rate": 9.047619047619049e-06, "loss": 52.1514, "step": 87 }, { "epoch": 2.0955223880597016, "grad_norm": 42.647281646728516, "learning_rate": 9.035714285714287e-06, "loss": 52.5597, "step": 88 }, { "epoch": 2.1194029850746268, "grad_norm": 55.45186233520508, "learning_rate": 9.023809523809524e-06, "loss": 53.7621, "step": 89 }, { "epoch": 2.1432835820895524, "grad_norm": 14.879026412963867, "learning_rate": 9.011904761904762e-06, "loss": 52.0743, "step": 90 }, { "epoch": 2.1671641791044776, "grad_norm": 48.55704116821289, "learning_rate": 9e-06, "loss": 52.0543, "step": 91 }, { "epoch": 2.191044776119403, "grad_norm": 37.96782302856445, "learning_rate": 8.98809523809524e-06, "loss": 52.4684, "step": 92 }, { "epoch": 2.2149253731343284, "grad_norm": 27.714475631713867, "learning_rate": 8.976190476190478e-06, "loss": 52.725, "step": 93 }, { "epoch": 2.2388059701492535, "grad_norm": 32.12433624267578, "learning_rate": 8.964285714285716e-06, "loss": 52.7161, "step": 94 }, { "epoch": 2.262686567164179, "grad_norm": 23.8153018951416, "learning_rate": 8.952380952380953e-06, "loss": 51.3124, "step": 95 }, { "epoch": 2.2865671641791043, "grad_norm": 31.269794464111328, "learning_rate": 8.940476190476191e-06, "loss": 51.9646, "step": 96 }, { "epoch": 2.31044776119403, "grad_norm": 16.611865997314453, "learning_rate": 8.92857142857143e-06, "loss": 51.8503, "step": 97 }, { "epoch": 2.334328358208955, "grad_norm": 26.69631004333496, "learning_rate": 8.916666666666667e-06, "loss": 52.4857, "step": 98 }, { "epoch": 2.3582089552238807, "grad_norm": 21.10638999938965, "learning_rate": 8.904761904761905e-06, "loss": 52.0022, "step": 99 }, { "epoch": 2.382089552238806, "grad_norm": 16.273351669311523, "learning_rate": 8.892857142857143e-06, "loss": 50.4367, "step": 100 }, { "epoch": 2.405970149253731, "grad_norm": 16.407167434692383, "learning_rate": 8.88095238095238e-06, "loss": 51.2079, "step": 101 }, { "epoch": 2.4298507462686567, "grad_norm": 16.22024154663086, "learning_rate": 8.869047619047619e-06, "loss": 50.4939, "step": 102 }, { "epoch": 2.4537313432835823, "grad_norm": 27.11235809326172, "learning_rate": 8.857142857142858e-06, "loss": 50.0872, "step": 103 }, { "epoch": 2.4776119402985075, "grad_norm": 18.912181854248047, "learning_rate": 8.845238095238096e-06, "loss": 51.8135, "step": 104 }, { "epoch": 2.5014925373134327, "grad_norm": 29.597028732299805, "learning_rate": 8.833333333333334e-06, "loss": 49.4789, "step": 105 }, { "epoch": 2.5253731343283583, "grad_norm": 30.51687240600586, "learning_rate": 8.821428571428572e-06, "loss": 52.5555, "step": 106 }, { "epoch": 2.5492537313432835, "grad_norm": 31.4583797454834, "learning_rate": 8.80952380952381e-06, "loss": 51.0073, "step": 107 }, { "epoch": 2.573134328358209, "grad_norm": 30.35653305053711, "learning_rate": 8.797619047619048e-06, "loss": 50.9501, "step": 108 }, { "epoch": 2.5970149253731343, "grad_norm": 24.041545867919922, "learning_rate": 8.785714285714286e-06, "loss": 49.5162, "step": 109 }, { "epoch": 2.62089552238806, "grad_norm": 23.52166175842285, "learning_rate": 8.773809523809525e-06, "loss": 52.9747, "step": 110 }, { "epoch": 2.644776119402985, "grad_norm": 28.871065139770508, "learning_rate": 8.761904761904763e-06, "loss": 50.2273, "step": 111 }, { "epoch": 2.6686567164179102, "grad_norm": 26.484140396118164, "learning_rate": 8.750000000000001e-06, "loss": 51.2286, "step": 112 }, { "epoch": 2.692537313432836, "grad_norm": 37.570743560791016, "learning_rate": 8.738095238095239e-06, "loss": 49.7131, "step": 113 }, { "epoch": 2.716417910447761, "grad_norm": 23.827178955078125, "learning_rate": 8.726190476190477e-06, "loss": 51.913, "step": 114 }, { "epoch": 2.7402985074626867, "grad_norm": 33.89924621582031, "learning_rate": 8.714285714285715e-06, "loss": 52.2382, "step": 115 }, { "epoch": 2.764179104477612, "grad_norm": 29.397851943969727, "learning_rate": 8.702380952380952e-06, "loss": 52.1548, "step": 116 }, { "epoch": 2.7880597014925375, "grad_norm": 28.73517417907715, "learning_rate": 8.690476190476192e-06, "loss": 51.2892, "step": 117 }, { "epoch": 2.8119402985074626, "grad_norm": 32.068138122558594, "learning_rate": 8.67857142857143e-06, "loss": 51.542, "step": 118 }, { "epoch": 2.835820895522388, "grad_norm": 22.76898956298828, "learning_rate": 8.666666666666668e-06, "loss": 50.373, "step": 119 }, { "epoch": 2.8597014925373134, "grad_norm": 33.528263092041016, "learning_rate": 8.654761904761906e-06, "loss": 51.3075, "step": 120 }, { "epoch": 2.883582089552239, "grad_norm": 21.655696868896484, "learning_rate": 8.642857142857144e-06, "loss": 51.1397, "step": 121 }, { "epoch": 2.9074626865671642, "grad_norm": 25.94880485534668, "learning_rate": 8.630952380952381e-06, "loss": 51.4326, "step": 122 }, { "epoch": 2.9313432835820894, "grad_norm": 36.14421844482422, "learning_rate": 8.61904761904762e-06, "loss": 50.9524, "step": 123 }, { "epoch": 2.955223880597015, "grad_norm": 28.361459732055664, "learning_rate": 8.607142857142859e-06, "loss": 51.3171, "step": 124 }, { "epoch": 2.97910447761194, "grad_norm": 30.784954071044922, "learning_rate": 8.595238095238097e-06, "loss": 49.9797, "step": 125 }, { "epoch": 3.0, "grad_norm": 22.98565101623535, "learning_rate": 8.583333333333333e-06, "loss": 44.3471, "step": 126 }, { "epoch": 3.023880597014925, "grad_norm": 25.601985931396484, "learning_rate": 8.571428571428571e-06, "loss": 51.6574, "step": 127 }, { "epoch": 3.047761194029851, "grad_norm": 27.648792266845703, "learning_rate": 8.55952380952381e-06, "loss": 51.3385, "step": 128 }, { "epoch": 3.071641791044776, "grad_norm": 18.773529052734375, "learning_rate": 8.547619047619048e-06, "loss": 51.1451, "step": 129 }, { "epoch": 3.0955223880597016, "grad_norm": 29.439353942871094, "learning_rate": 8.535714285714286e-06, "loss": 51.6092, "step": 130 }, { "epoch": 3.1194029850746268, "grad_norm": 32.41486740112305, "learning_rate": 8.523809523809524e-06, "loss": 50.9068, "step": 131 }, { "epoch": 3.1432835820895524, "grad_norm": 23.441896438598633, "learning_rate": 8.511904761904762e-06, "loss": 51.7453, "step": 132 }, { "epoch": 3.1671641791044776, "grad_norm": 29.218734741210938, "learning_rate": 8.5e-06, "loss": 49.9124, "step": 133 }, { "epoch": 3.191044776119403, "grad_norm": 20.988981246948242, "learning_rate": 8.488095238095238e-06, "loss": 50.9788, "step": 134 }, { "epoch": 3.2149253731343284, "grad_norm": 22.57052993774414, "learning_rate": 8.476190476190477e-06, "loss": 51.4228, "step": 135 }, { "epoch": 3.2388059701492535, "grad_norm": 26.112573623657227, "learning_rate": 8.464285714285715e-06, "loss": 50.3332, "step": 136 }, { "epoch": 3.262686567164179, "grad_norm": 23.8747615814209, "learning_rate": 8.452380952380953e-06, "loss": 51.1763, "step": 137 }, { "epoch": 3.2865671641791043, "grad_norm": 24.12811851501465, "learning_rate": 8.440476190476191e-06, "loss": 49.8539, "step": 138 }, { "epoch": 3.31044776119403, "grad_norm": 27.462984085083008, "learning_rate": 8.428571428571429e-06, "loss": 50.7766, "step": 139 }, { "epoch": 3.334328358208955, "grad_norm": 31.261472702026367, "learning_rate": 8.416666666666667e-06, "loss": 49.7599, "step": 140 }, { "epoch": 3.3582089552238807, "grad_norm": 21.049545288085938, "learning_rate": 8.404761904761905e-06, "loss": 49.6827, "step": 141 }, { "epoch": 3.382089552238806, "grad_norm": 30.103389739990234, "learning_rate": 8.392857142857144e-06, "loss": 49.3866, "step": 142 }, { "epoch": 3.405970149253731, "grad_norm": 31.348888397216797, "learning_rate": 8.380952380952382e-06, "loss": 51.4607, "step": 143 }, { "epoch": 3.4298507462686567, "grad_norm": 28.910200119018555, "learning_rate": 8.36904761904762e-06, "loss": 51.2337, "step": 144 }, { "epoch": 3.4537313432835823, "grad_norm": 21.00281524658203, "learning_rate": 8.357142857142858e-06, "loss": 50.9557, "step": 145 }, { "epoch": 3.4776119402985075, "grad_norm": 45.842002868652344, "learning_rate": 8.345238095238096e-06, "loss": 49.7377, "step": 146 }, { "epoch": 3.5014925373134327, "grad_norm": 30.77996253967285, "learning_rate": 8.333333333333334e-06, "loss": 51.1234, "step": 147 }, { "epoch": 3.5253731343283583, "grad_norm": 31.492767333984375, "learning_rate": 8.321428571428573e-06, "loss": 50.5733, "step": 148 }, { "epoch": 3.5492537313432835, "grad_norm": 36.57206344604492, "learning_rate": 8.309523809523811e-06, "loss": 50.6762, "step": 149 }, { "epoch": 3.573134328358209, "grad_norm": 33.86347198486328, "learning_rate": 8.297619047619049e-06, "loss": 50.8281, "step": 150 }, { "epoch": 3.5970149253731343, "grad_norm": 30.812152862548828, "learning_rate": 8.285714285714287e-06, "loss": 50.6509, "step": 151 }, { "epoch": 3.62089552238806, "grad_norm": 24.536882400512695, "learning_rate": 8.273809523809523e-06, "loss": 50.1112, "step": 152 }, { "epoch": 3.644776119402985, "grad_norm": 29.8430233001709, "learning_rate": 8.261904761904763e-06, "loss": 50.846, "step": 153 }, { "epoch": 3.6686567164179102, "grad_norm": 26.18596076965332, "learning_rate": 8.25e-06, "loss": 50.3806, "step": 154 }, { "epoch": 3.692537313432836, "grad_norm": 38.75019836425781, "learning_rate": 8.238095238095239e-06, "loss": 49.8915, "step": 155 }, { "epoch": 3.716417910447761, "grad_norm": 34.30149841308594, "learning_rate": 8.226190476190476e-06, "loss": 50.7886, "step": 156 }, { "epoch": 3.7402985074626867, "grad_norm": 33.179298400878906, "learning_rate": 8.214285714285714e-06, "loss": 50.8175, "step": 157 }, { "epoch": 3.764179104477612, "grad_norm": 34.90909957885742, "learning_rate": 8.202380952380952e-06, "loss": 50.3521, "step": 158 }, { "epoch": 3.7880597014925375, "grad_norm": 33.2717399597168, "learning_rate": 8.190476190476192e-06, "loss": 51.2006, "step": 159 }, { "epoch": 3.8119402985074626, "grad_norm": 33.082672119140625, "learning_rate": 8.17857142857143e-06, "loss": 49.5627, "step": 160 }, { "epoch": 3.835820895522388, "grad_norm": 23.65228843688965, "learning_rate": 8.166666666666668e-06, "loss": 49.9631, "step": 161 }, { "epoch": 3.8597014925373134, "grad_norm": 37.3172492980957, "learning_rate": 8.154761904761905e-06, "loss": 50.7175, "step": 162 }, { "epoch": 3.883582089552239, "grad_norm": 29.369930267333984, "learning_rate": 8.142857142857143e-06, "loss": 51.1435, "step": 163 }, { "epoch": 3.9074626865671642, "grad_norm": 28.807470321655273, "learning_rate": 8.130952380952381e-06, "loss": 50.3349, "step": 164 }, { "epoch": 3.9313432835820894, "grad_norm": 33.90628433227539, "learning_rate": 8.119047619047619e-06, "loss": 50.7241, "step": 165 }, { "epoch": 3.955223880597015, "grad_norm": 21.72952651977539, "learning_rate": 8.107142857142859e-06, "loss": 49.9013, "step": 166 }, { "epoch": 3.97910447761194, "grad_norm": 26.831520080566406, "learning_rate": 8.095238095238097e-06, "loss": 51.0161, "step": 167 }, { "epoch": 4.0, "grad_norm": 24.49069595336914, "learning_rate": 8.083333333333334e-06, "loss": 44.6758, "step": 168 }, { "epoch": 4.023880597014926, "grad_norm": 36.32711410522461, "learning_rate": 8.071428571428572e-06, "loss": 49.8601, "step": 169 }, { "epoch": 4.04776119402985, "grad_norm": 29.862812042236328, "learning_rate": 8.05952380952381e-06, "loss": 51.712, "step": 170 }, { "epoch": 4.071641791044776, "grad_norm": 40.245887756347656, "learning_rate": 8.047619047619048e-06, "loss": 50.3353, "step": 171 }, { "epoch": 4.095522388059702, "grad_norm": 34.22684097290039, "learning_rate": 8.035714285714286e-06, "loss": 50.6474, "step": 172 }, { "epoch": 4.119402985074627, "grad_norm": 36.754669189453125, "learning_rate": 8.023809523809526e-06, "loss": 50.1623, "step": 173 }, { "epoch": 4.143283582089552, "grad_norm": 35.76541519165039, "learning_rate": 8.011904761904763e-06, "loss": 50.2426, "step": 174 }, { "epoch": 4.167164179104478, "grad_norm": 25.851362228393555, "learning_rate": 8.000000000000001e-06, "loss": 49.9525, "step": 175 }, { "epoch": 4.191044776119403, "grad_norm": 24.48278045654297, "learning_rate": 7.98809523809524e-06, "loss": 49.1466, "step": 176 }, { "epoch": 4.214925373134328, "grad_norm": 28.79146385192871, "learning_rate": 7.976190476190477e-06, "loss": 49.9365, "step": 177 }, { "epoch": 4.2388059701492535, "grad_norm": 29.29482650756836, "learning_rate": 7.964285714285715e-06, "loss": 50.7427, "step": 178 }, { "epoch": 4.262686567164179, "grad_norm": 23.50571060180664, "learning_rate": 7.952380952380953e-06, "loss": 49.7287, "step": 179 }, { "epoch": 4.286567164179105, "grad_norm": 27.805828094482422, "learning_rate": 7.94047619047619e-06, "loss": 50.4316, "step": 180 }, { "epoch": 4.3104477611940295, "grad_norm": 28.323888778686523, "learning_rate": 7.928571428571429e-06, "loss": 50.0263, "step": 181 }, { "epoch": 4.334328358208955, "grad_norm": 25.43438148498535, "learning_rate": 7.916666666666667e-06, "loss": 49.426, "step": 182 }, { "epoch": 4.358208955223881, "grad_norm": 22.169496536254883, "learning_rate": 7.904761904761904e-06, "loss": 51.1048, "step": 183 }, { "epoch": 4.382089552238806, "grad_norm": 33.660545349121094, "learning_rate": 7.892857142857144e-06, "loss": 49.7654, "step": 184 }, { "epoch": 4.405970149253731, "grad_norm": 24.276273727416992, "learning_rate": 7.880952380952382e-06, "loss": 50.4976, "step": 185 }, { "epoch": 4.429850746268657, "grad_norm": 41.48741149902344, "learning_rate": 7.86904761904762e-06, "loss": 52.0386, "step": 186 }, { "epoch": 4.453731343283582, "grad_norm": 25.86789894104004, "learning_rate": 7.857142857142858e-06, "loss": 49.129, "step": 187 }, { "epoch": 4.477611940298507, "grad_norm": 26.607038497924805, "learning_rate": 7.845238095238096e-06, "loss": 49.3561, "step": 188 }, { "epoch": 4.501492537313433, "grad_norm": 43.54303741455078, "learning_rate": 7.833333333333333e-06, "loss": 50.1143, "step": 189 }, { "epoch": 4.525373134328358, "grad_norm": 45.6146354675293, "learning_rate": 7.821428571428571e-06, "loss": 49.3217, "step": 190 }, { "epoch": 4.549253731343284, "grad_norm": 24.00080680847168, "learning_rate": 7.809523809523811e-06, "loss": 50.484, "step": 191 }, { "epoch": 4.573134328358209, "grad_norm": 29.736740112304688, "learning_rate": 7.797619047619049e-06, "loss": 49.748, "step": 192 }, { "epoch": 4.597014925373134, "grad_norm": 33.08702850341797, "learning_rate": 7.785714285714287e-06, "loss": 50.2142, "step": 193 }, { "epoch": 4.62089552238806, "grad_norm": 19.16411018371582, "learning_rate": 7.773809523809525e-06, "loss": 50.1073, "step": 194 }, { "epoch": 4.6447761194029855, "grad_norm": 32.145721435546875, "learning_rate": 7.761904761904762e-06, "loss": 48.5769, "step": 195 }, { "epoch": 4.66865671641791, "grad_norm": 38.768341064453125, "learning_rate": 7.75e-06, "loss": 49.681, "step": 196 }, { "epoch": 4.692537313432836, "grad_norm": 26.108245849609375, "learning_rate": 7.738095238095238e-06, "loss": 49.9193, "step": 197 }, { "epoch": 4.7164179104477615, "grad_norm": 28.86294174194336, "learning_rate": 7.726190476190478e-06, "loss": 50.4584, "step": 198 }, { "epoch": 4.740298507462686, "grad_norm": 31.089380264282227, "learning_rate": 7.714285714285716e-06, "loss": 50.7873, "step": 199 }, { "epoch": 4.764179104477612, "grad_norm": 22.934032440185547, "learning_rate": 7.702380952380954e-06, "loss": 50.611, "step": 200 }, { "epoch": 4.7880597014925375, "grad_norm": 27.986371994018555, "learning_rate": 7.690476190476191e-06, "loss": 49.275, "step": 201 }, { "epoch": 4.811940298507462, "grad_norm": 23.44196319580078, "learning_rate": 7.67857142857143e-06, "loss": 50.2035, "step": 202 }, { "epoch": 4.835820895522388, "grad_norm": 22.05059242248535, "learning_rate": 7.666666666666667e-06, "loss": 48.9595, "step": 203 }, { "epoch": 4.859701492537313, "grad_norm": 29.709396362304688, "learning_rate": 7.654761904761905e-06, "loss": 50.5343, "step": 204 }, { "epoch": 4.883582089552239, "grad_norm": 23.702781677246094, "learning_rate": 7.642857142857143e-06, "loss": 50.2627, "step": 205 }, { "epoch": 4.907462686567165, "grad_norm": 20.144807815551758, "learning_rate": 7.630952380952381e-06, "loss": 51.0125, "step": 206 }, { "epoch": 4.931343283582089, "grad_norm": 28.83676528930664, "learning_rate": 7.61904761904762e-06, "loss": 50.5985, "step": 207 }, { "epoch": 4.955223880597015, "grad_norm": 34.40160369873047, "learning_rate": 7.6071428571428575e-06, "loss": 49.6469, "step": 208 }, { "epoch": 4.979104477611941, "grad_norm": 26.982925415039062, "learning_rate": 7.595238095238095e-06, "loss": 50.1666, "step": 209 }, { "epoch": 5.0, "grad_norm": 19.569746017456055, "learning_rate": 7.583333333333333e-06, "loss": 43.6715, "step": 210 }, { "epoch": 5.023880597014926, "grad_norm": 23.753328323364258, "learning_rate": 7.571428571428572e-06, "loss": 49.9273, "step": 211 }, { "epoch": 5.04776119402985, "grad_norm": 22.463659286499023, "learning_rate": 7.55952380952381e-06, "loss": 48.8499, "step": 212 }, { "epoch": 5.071641791044776, "grad_norm": 24.507875442504883, "learning_rate": 7.547619047619048e-06, "loss": 49.3275, "step": 213 }, { "epoch": 5.095522388059702, "grad_norm": 21.727603912353516, "learning_rate": 7.5357142857142865e-06, "loss": 49.1879, "step": 214 }, { "epoch": 5.119402985074627, "grad_norm": 26.122251510620117, "learning_rate": 7.523809523809524e-06, "loss": 50.1094, "step": 215 }, { "epoch": 5.143283582089552, "grad_norm": 24.142263412475586, "learning_rate": 7.511904761904762e-06, "loss": 50.2708, "step": 216 }, { "epoch": 5.167164179104478, "grad_norm": 22.762237548828125, "learning_rate": 7.500000000000001e-06, "loss": 50.441, "step": 217 }, { "epoch": 5.191044776119403, "grad_norm": 35.74570846557617, "learning_rate": 7.488095238095239e-06, "loss": 48.5121, "step": 218 }, { "epoch": 5.214925373134328, "grad_norm": 30.92180824279785, "learning_rate": 7.476190476190477e-06, "loss": 49.4257, "step": 219 }, { "epoch": 5.2388059701492535, "grad_norm": 26.90997314453125, "learning_rate": 7.464285714285715e-06, "loss": 50.9712, "step": 220 }, { "epoch": 5.262686567164179, "grad_norm": 35.544700622558594, "learning_rate": 7.4523809523809534e-06, "loss": 49.7908, "step": 221 }, { "epoch": 5.286567164179105, "grad_norm": 33.78145217895508, "learning_rate": 7.440476190476191e-06, "loss": 49.2105, "step": 222 }, { "epoch": 5.3104477611940295, "grad_norm": 32.16508102416992, "learning_rate": 7.428571428571429e-06, "loss": 49.8545, "step": 223 }, { "epoch": 5.334328358208955, "grad_norm": 30.4263973236084, "learning_rate": 7.416666666666668e-06, "loss": 50.0994, "step": 224 }, { "epoch": 5.358208955223881, "grad_norm": 25.801084518432617, "learning_rate": 7.404761904761906e-06, "loss": 49.6227, "step": 225 }, { "epoch": 5.382089552238806, "grad_norm": 27.16851234436035, "learning_rate": 7.392857142857144e-06, "loss": 50.0005, "step": 226 }, { "epoch": 5.405970149253731, "grad_norm": 30.102867126464844, "learning_rate": 7.380952380952382e-06, "loss": 50.7114, "step": 227 }, { "epoch": 5.429850746268657, "grad_norm": 26.032968521118164, "learning_rate": 7.36904761904762e-06, "loss": 48.99, "step": 228 }, { "epoch": 5.453731343283582, "grad_norm": 24.074424743652344, "learning_rate": 7.357142857142858e-06, "loss": 49.6965, "step": 229 }, { "epoch": 5.477611940298507, "grad_norm": 24.5870361328125, "learning_rate": 7.345238095238096e-06, "loss": 48.8593, "step": 230 }, { "epoch": 5.501492537313433, "grad_norm": 22.831932067871094, "learning_rate": 7.333333333333333e-06, "loss": 49.9975, "step": 231 }, { "epoch": 5.525373134328358, "grad_norm": 26.88197135925293, "learning_rate": 7.321428571428572e-06, "loss": 49.5131, "step": 232 }, { "epoch": 5.549253731343284, "grad_norm": 28.986154556274414, "learning_rate": 7.30952380952381e-06, "loss": 48.9042, "step": 233 }, { "epoch": 5.573134328358209, "grad_norm": 17.220605850219727, "learning_rate": 7.297619047619048e-06, "loss": 49.453, "step": 234 }, { "epoch": 5.597014925373134, "grad_norm": 25.110107421875, "learning_rate": 7.285714285714286e-06, "loss": 49.8196, "step": 235 }, { "epoch": 5.62089552238806, "grad_norm": 34.680870056152344, "learning_rate": 7.273809523809524e-06, "loss": 49.9709, "step": 236 }, { "epoch": 5.6447761194029855, "grad_norm": 24.10121726989746, "learning_rate": 7.261904761904762e-06, "loss": 49.4685, "step": 237 }, { "epoch": 5.66865671641791, "grad_norm": 28.65550422668457, "learning_rate": 7.25e-06, "loss": 50.3439, "step": 238 }, { "epoch": 5.692537313432836, "grad_norm": 27.49604606628418, "learning_rate": 7.238095238095239e-06, "loss": 50.0419, "step": 239 }, { "epoch": 5.7164179104477615, "grad_norm": 22.843509674072266, "learning_rate": 7.226190476190477e-06, "loss": 49.0357, "step": 240 }, { "epoch": 5.740298507462686, "grad_norm": 36.56801223754883, "learning_rate": 7.2142857142857145e-06, "loss": 49.4478, "step": 241 }, { "epoch": 5.764179104477612, "grad_norm": 38.233734130859375, "learning_rate": 7.202380952380953e-06, "loss": 50.4473, "step": 242 }, { "epoch": 5.7880597014925375, "grad_norm": 29.198333740234375, "learning_rate": 7.190476190476191e-06, "loss": 49.8598, "step": 243 }, { "epoch": 5.811940298507462, "grad_norm": 34.49404525756836, "learning_rate": 7.178571428571429e-06, "loss": 49.1441, "step": 244 }, { "epoch": 5.835820895522388, "grad_norm": 35.568359375, "learning_rate": 7.166666666666667e-06, "loss": 49.9402, "step": 245 }, { "epoch": 5.859701492537313, "grad_norm": 31.041446685791016, "learning_rate": 7.154761904761906e-06, "loss": 50.1265, "step": 246 }, { "epoch": 5.883582089552239, "grad_norm": 48.34186935424805, "learning_rate": 7.1428571428571436e-06, "loss": 50.7649, "step": 247 }, { "epoch": 5.907462686567165, "grad_norm": 39.171661376953125, "learning_rate": 7.1309523809523814e-06, "loss": 48.943, "step": 248 }, { "epoch": 5.931343283582089, "grad_norm": 28.724523544311523, "learning_rate": 7.11904761904762e-06, "loss": 50.8039, "step": 249 }, { "epoch": 5.955223880597015, "grad_norm": 36.57830810546875, "learning_rate": 7.107142857142858e-06, "loss": 50.2311, "step": 250 }, { "epoch": 5.979104477611941, "grad_norm": 39.91551971435547, "learning_rate": 7.095238095238096e-06, "loss": 49.1617, "step": 251 }, { "epoch": 6.0, "grad_norm": 17.104145050048828, "learning_rate": 7.083333333333335e-06, "loss": 42.8003, "step": 252 }, { "epoch": 6.023880597014926, "grad_norm": 33.03441619873047, "learning_rate": 7.0714285714285726e-06, "loss": 48.2896, "step": 253 }, { "epoch": 6.04776119402985, "grad_norm": 26.487470626831055, "learning_rate": 7.0595238095238105e-06, "loss": 49.205, "step": 254 }, { "epoch": 6.071641791044776, "grad_norm": 26.752981185913086, "learning_rate": 7.047619047619048e-06, "loss": 50.3943, "step": 255 }, { "epoch": 6.095522388059702, "grad_norm": 22.44376564025879, "learning_rate": 7.035714285714287e-06, "loss": 49.285, "step": 256 }, { "epoch": 6.119402985074627, "grad_norm": 31.066368103027344, "learning_rate": 7.023809523809524e-06, "loss": 49.3131, "step": 257 }, { "epoch": 6.143283582089552, "grad_norm": 28.67262840270996, "learning_rate": 7.011904761904762e-06, "loss": 50.6188, "step": 258 }, { "epoch": 6.167164179104478, "grad_norm": 24.013134002685547, "learning_rate": 7e-06, "loss": 50.4382, "step": 259 }, { "epoch": 6.191044776119403, "grad_norm": 26.5673828125, "learning_rate": 6.988095238095239e-06, "loss": 49.7058, "step": 260 }, { "epoch": 6.214925373134328, "grad_norm": 20.803695678710938, "learning_rate": 6.9761904761904765e-06, "loss": 48.9389, "step": 261 }, { "epoch": 6.2388059701492535, "grad_norm": 23.450183868408203, "learning_rate": 6.964285714285714e-06, "loss": 49.0091, "step": 262 }, { "epoch": 6.262686567164179, "grad_norm": 36.94446563720703, "learning_rate": 6.952380952380952e-06, "loss": 50.2589, "step": 263 }, { "epoch": 6.286567164179105, "grad_norm": 39.548095703125, "learning_rate": 6.940476190476191e-06, "loss": 49.3129, "step": 264 }, { "epoch": 6.3104477611940295, "grad_norm": 30.536083221435547, "learning_rate": 6.928571428571429e-06, "loss": 49.1838, "step": 265 }, { "epoch": 6.334328358208955, "grad_norm": 27.97296714782715, "learning_rate": 6.916666666666667e-06, "loss": 50.3184, "step": 266 }, { "epoch": 6.358208955223881, "grad_norm": 25.69655418395996, "learning_rate": 6.9047619047619055e-06, "loss": 49.2226, "step": 267 }, { "epoch": 6.382089552238806, "grad_norm": 22.114097595214844, "learning_rate": 6.892857142857143e-06, "loss": 49.4455, "step": 268 }, { "epoch": 6.405970149253731, "grad_norm": 30.47511100769043, "learning_rate": 6.880952380952381e-06, "loss": 49.7409, "step": 269 }, { "epoch": 6.429850746268657, "grad_norm": 26.32929039001465, "learning_rate": 6.86904761904762e-06, "loss": 50.3336, "step": 270 }, { "epoch": 6.453731343283582, "grad_norm": 28.09309196472168, "learning_rate": 6.857142857142858e-06, "loss": 49.6044, "step": 271 }, { "epoch": 6.477611940298507, "grad_norm": 25.840974807739258, "learning_rate": 6.845238095238096e-06, "loss": 49.9185, "step": 272 }, { "epoch": 6.501492537313433, "grad_norm": 31.89126205444336, "learning_rate": 6.833333333333334e-06, "loss": 48.2732, "step": 273 }, { "epoch": 6.525373134328358, "grad_norm": 24.013029098510742, "learning_rate": 6.8214285714285724e-06, "loss": 49.9752, "step": 274 }, { "epoch": 6.549253731343284, "grad_norm": 25.509836196899414, "learning_rate": 6.80952380952381e-06, "loss": 50.5493, "step": 275 }, { "epoch": 6.573134328358209, "grad_norm": 35.25442886352539, "learning_rate": 6.797619047619048e-06, "loss": 49.2553, "step": 276 }, { "epoch": 6.597014925373134, "grad_norm": 29.42585563659668, "learning_rate": 6.785714285714287e-06, "loss": 48.776, "step": 277 }, { "epoch": 6.62089552238806, "grad_norm": 25.90894889831543, "learning_rate": 6.773809523809525e-06, "loss": 49.1964, "step": 278 }, { "epoch": 6.6447761194029855, "grad_norm": 25.63600730895996, "learning_rate": 6.761904761904763e-06, "loss": 48.4528, "step": 279 }, { "epoch": 6.66865671641791, "grad_norm": 29.943740844726562, "learning_rate": 6.750000000000001e-06, "loss": 49.9026, "step": 280 }, { "epoch": 6.692537313432836, "grad_norm": 33.253910064697266, "learning_rate": 6.738095238095239e-06, "loss": 49.2364, "step": 281 }, { "epoch": 6.7164179104477615, "grad_norm": 23.465354919433594, "learning_rate": 6.726190476190477e-06, "loss": 49.2759, "step": 282 }, { "epoch": 6.740298507462686, "grad_norm": 31.023218154907227, "learning_rate": 6.714285714285714e-06, "loss": 49.3256, "step": 283 }, { "epoch": 6.764179104477612, "grad_norm": 32.376991271972656, "learning_rate": 6.702380952380952e-06, "loss": 47.7239, "step": 284 }, { "epoch": 6.7880597014925375, "grad_norm": 18.388896942138672, "learning_rate": 6.690476190476191e-06, "loss": 49.5751, "step": 285 }, { "epoch": 6.811940298507462, "grad_norm": 22.1639404296875, "learning_rate": 6.678571428571429e-06, "loss": 48.8153, "step": 286 }, { "epoch": 6.835820895522388, "grad_norm": 31.39455223083496, "learning_rate": 6.666666666666667e-06, "loss": 49.7872, "step": 287 }, { "epoch": 6.859701492537313, "grad_norm": 36.480533599853516, "learning_rate": 6.654761904761905e-06, "loss": 48.9679, "step": 288 }, { "epoch": 6.883582089552239, "grad_norm": 23.432872772216797, "learning_rate": 6.642857142857143e-06, "loss": 48.4627, "step": 289 }, { "epoch": 6.907462686567165, "grad_norm": 33.31097412109375, "learning_rate": 6.630952380952381e-06, "loss": 49.617, "step": 290 }, { "epoch": 6.931343283582089, "grad_norm": 34.07685852050781, "learning_rate": 6.619047619047619e-06, "loss": 49.3365, "step": 291 }, { "epoch": 6.955223880597015, "grad_norm": 25.68811798095703, "learning_rate": 6.607142857142858e-06, "loss": 49.7821, "step": 292 }, { "epoch": 6.979104477611941, "grad_norm": 24.179588317871094, "learning_rate": 6.595238095238096e-06, "loss": 48.5466, "step": 293 }, { "epoch": 7.0, "grad_norm": 26.158781051635742, "learning_rate": 6.5833333333333335e-06, "loss": 43.2838, "step": 294 }, { "epoch": 7.023880597014926, "grad_norm": 22.84689712524414, "learning_rate": 6.571428571428572e-06, "loss": 49.3253, "step": 295 }, { "epoch": 7.04776119402985, "grad_norm": NaN, "learning_rate": 6.55952380952381e-06, "loss": 75.2786, "step": 296 }, { "epoch": 7.071641791044776, "grad_norm": 31.13886070251465, "learning_rate": 6.55952380952381e-06, "loss": 49.984, "step": 297 }, { "epoch": 7.095522388059702, "grad_norm": 32.37982940673828, "learning_rate": 6.547619047619048e-06, "loss": 49.6632, "step": 298 }, { "epoch": 7.119402985074627, "grad_norm": 22.977916717529297, "learning_rate": 6.535714285714286e-06, "loss": 48.7802, "step": 299 }, { "epoch": 7.143283582089552, "grad_norm": NaN, "learning_rate": 6.523809523809525e-06, "loss": 60.3381, "step": 300 }, { "epoch": 7.167164179104478, "grad_norm": 32.18650817871094, "learning_rate": 6.523809523809525e-06, "loss": 49.2689, "step": 301 }, { "epoch": 7.191044776119403, "grad_norm": 30.0800724029541, "learning_rate": 6.5119047619047626e-06, "loss": 49.3891, "step": 302 }, { "epoch": 7.214925373134328, "grad_norm": 32.35110855102539, "learning_rate": 6.5000000000000004e-06, "loss": 48.4497, "step": 303 }, { "epoch": 7.2388059701492535, "grad_norm": 34.08786392211914, "learning_rate": 6.488095238095239e-06, "loss": 49.1321, "step": 304 }, { "epoch": 7.262686567164179, "grad_norm": 25.25969696044922, "learning_rate": 6.476190476190477e-06, "loss": 49.0524, "step": 305 }, { "epoch": 7.286567164179105, "grad_norm": 25.843929290771484, "learning_rate": 6.464285714285715e-06, "loss": 49.8077, "step": 306 }, { "epoch": 7.3104477611940295, "grad_norm": 34.57284927368164, "learning_rate": 6.452380952380954e-06, "loss": 49.5393, "step": 307 }, { "epoch": 7.334328358208955, "grad_norm": 33.44814682006836, "learning_rate": 6.4404761904761916e-06, "loss": 49.0375, "step": 308 }, { "epoch": 7.358208955223881, "grad_norm": 25.127429962158203, "learning_rate": 6.4285714285714295e-06, "loss": 48.8145, "step": 309 }, { "epoch": 7.382089552238806, "grad_norm": 31.81999969482422, "learning_rate": 6.416666666666667e-06, "loss": 49.6432, "step": 310 }, { "epoch": 7.405970149253731, "grad_norm": 22.428335189819336, "learning_rate": 6.404761904761904e-06, "loss": 47.6232, "step": 311 }, { "epoch": 7.429850746268657, "grad_norm": 45.87803268432617, "learning_rate": 6.392857142857143e-06, "loss": 48.3479, "step": 312 }, { "epoch": 7.453731343283582, "grad_norm": 37.441253662109375, "learning_rate": 6.380952380952381e-06, "loss": 48.593, "step": 313 }, { "epoch": 7.477611940298507, "grad_norm": 23.15785789489746, "learning_rate": 6.369047619047619e-06, "loss": 49.1204, "step": 314 }, { "epoch": 7.501492537313433, "grad_norm": 35.8905029296875, "learning_rate": 6.357142857142858e-06, "loss": 49.2918, "step": 315 }, { "epoch": 7.525373134328358, "grad_norm": 37.41954040527344, "learning_rate": 6.3452380952380955e-06, "loss": 47.7495, "step": 316 }, { "epoch": 7.549253731343284, "grad_norm": 31.173114776611328, "learning_rate": 6.333333333333333e-06, "loss": 49.539, "step": 317 }, { "epoch": 7.573134328358209, "grad_norm": 23.941965103149414, "learning_rate": 6.321428571428571e-06, "loss": 49.0958, "step": 318 }, { "epoch": 7.597014925373134, "grad_norm": 31.949769973754883, "learning_rate": 6.30952380952381e-06, "loss": 49.1945, "step": 319 }, { "epoch": 7.62089552238806, "grad_norm": 21.299409866333008, "learning_rate": 6.297619047619048e-06, "loss": 49.3823, "step": 320 }, { "epoch": 7.6447761194029855, "grad_norm": 34.93647766113281, "learning_rate": 6.285714285714286e-06, "loss": 48.8867, "step": 321 }, { "epoch": 7.66865671641791, "grad_norm": 30.189655303955078, "learning_rate": 6.2738095238095245e-06, "loss": 49.8644, "step": 322 }, { "epoch": 7.692537313432836, "grad_norm": 19.964523315429688, "learning_rate": 6.261904761904762e-06, "loss": 49.6489, "step": 323 }, { "epoch": 7.7164179104477615, "grad_norm": 22.253337860107422, "learning_rate": 6.25e-06, "loss": 48.0582, "step": 324 }, { "epoch": 7.740298507462686, "grad_norm": 26.631391525268555, "learning_rate": 6.238095238095239e-06, "loss": 48.5585, "step": 325 }, { "epoch": 7.764179104477612, "grad_norm": 26.0469913482666, "learning_rate": 6.226190476190477e-06, "loss": 49.4969, "step": 326 }, { "epoch": 7.7880597014925375, "grad_norm": 30.000507354736328, "learning_rate": 6.214285714285715e-06, "loss": 49.6044, "step": 327 }, { "epoch": 7.811940298507462, "grad_norm": 29.44800567626953, "learning_rate": 6.202380952380953e-06, "loss": 50.3622, "step": 328 }, { "epoch": 7.835820895522388, "grad_norm": 24.83717918395996, "learning_rate": 6.1904761904761914e-06, "loss": 50.0974, "step": 329 }, { "epoch": 7.859701492537313, "grad_norm": 30.0760555267334, "learning_rate": 6.178571428571429e-06, "loss": 48.9307, "step": 330 }, { "epoch": 7.883582089552239, "grad_norm": 21.087966918945312, "learning_rate": 6.166666666666667e-06, "loss": 49.3432, "step": 331 }, { "epoch": 7.907462686567165, "grad_norm": 23.193716049194336, "learning_rate": 6.154761904761906e-06, "loss": 48.6664, "step": 332 }, { "epoch": 7.931343283582089, "grad_norm": 22.764123916625977, "learning_rate": 6.142857142857144e-06, "loss": 49.3497, "step": 333 }, { "epoch": 7.955223880597015, "grad_norm": 22.411897659301758, "learning_rate": 6.130952380952382e-06, "loss": 49.4106, "step": 334 }, { "epoch": 7.979104477611941, "grad_norm": 29.535375595092773, "learning_rate": 6.11904761904762e-06, "loss": 49.0695, "step": 335 }, { "epoch": 8.0, "grad_norm": 21.094457626342773, "learning_rate": 6.107142857142858e-06, "loss": 42.1367, "step": 336 }, { "epoch": 8.023880597014925, "grad_norm": 33.74859619140625, "learning_rate": 6.095238095238096e-06, "loss": 47.0065, "step": 337 }, { "epoch": 8.047761194029851, "grad_norm": 32.539127349853516, "learning_rate": 6.083333333333333e-06, "loss": 47.9697, "step": 338 }, { "epoch": 8.071641791044776, "grad_norm": 19.168655395507812, "learning_rate": 6.071428571428571e-06, "loss": 49.4919, "step": 339 }, { "epoch": 8.0955223880597, "grad_norm": 30.041269302368164, "learning_rate": 6.05952380952381e-06, "loss": 48.7887, "step": 340 }, { "epoch": 8.119402985074627, "grad_norm": 21.070598602294922, "learning_rate": 6.047619047619048e-06, "loss": 48.5064, "step": 341 }, { "epoch": 8.143283582089552, "grad_norm": 29.560287475585938, "learning_rate": 6.035714285714286e-06, "loss": 47.7472, "step": 342 }, { "epoch": 8.167164179104478, "grad_norm": 24.256393432617188, "learning_rate": 6.023809523809524e-06, "loss": 48.8917, "step": 343 }, { "epoch": 8.191044776119403, "grad_norm": 29.970674514770508, "learning_rate": 6.011904761904762e-06, "loss": 48.3464, "step": 344 }, { "epoch": 8.214925373134328, "grad_norm": 25.274595260620117, "learning_rate": 6e-06, "loss": 49.1565, "step": 345 }, { "epoch": 8.238805970149254, "grad_norm": 24.000280380249023, "learning_rate": 5.988095238095238e-06, "loss": 49.3396, "step": 346 }, { "epoch": 8.26268656716418, "grad_norm": 25.110261917114258, "learning_rate": 5.976190476190477e-06, "loss": 49.786, "step": 347 }, { "epoch": 8.286567164179104, "grad_norm": 26.188514709472656, "learning_rate": 5.964285714285715e-06, "loss": 50.1652, "step": 348 }, { "epoch": 8.31044776119403, "grad_norm": 18.536714553833008, "learning_rate": 5.9523809523809525e-06, "loss": 49.7224, "step": 349 }, { "epoch": 8.334328358208955, "grad_norm": 33.79502868652344, "learning_rate": 5.940476190476191e-06, "loss": 48.2923, "step": 350 }, { "epoch": 8.35820895522388, "grad_norm": 33.03609085083008, "learning_rate": 5.928571428571429e-06, "loss": 49.128, "step": 351 }, { "epoch": 8.382089552238806, "grad_norm": 23.88555145263672, "learning_rate": 5.916666666666667e-06, "loss": 49.6072, "step": 352 }, { "epoch": 8.405970149253731, "grad_norm": 29.688135147094727, "learning_rate": 5.904761904761905e-06, "loss": 49.0984, "step": 353 }, { "epoch": 8.429850746268656, "grad_norm": 23.166162490844727, "learning_rate": 5.892857142857144e-06, "loss": 48.8104, "step": 354 }, { "epoch": 8.453731343283582, "grad_norm": 27.68876838684082, "learning_rate": 5.8809523809523816e-06, "loss": 48.7745, "step": 355 }, { "epoch": 8.477611940298507, "grad_norm": 26.520286560058594, "learning_rate": 5.8690476190476194e-06, "loss": 47.883, "step": 356 }, { "epoch": 8.501492537313434, "grad_norm": 28.830135345458984, "learning_rate": 5.857142857142858e-06, "loss": 49.1347, "step": 357 }, { "epoch": 8.525373134328358, "grad_norm": 27.387250900268555, "learning_rate": 5.845238095238096e-06, "loss": 48.2092, "step": 358 }, { "epoch": 8.549253731343283, "grad_norm": 23.53616714477539, "learning_rate": 5.833333333333334e-06, "loss": 48.437, "step": 359 }, { "epoch": 8.57313432835821, "grad_norm": 25.665664672851562, "learning_rate": 5.821428571428573e-06, "loss": 49.3006, "step": 360 }, { "epoch": 8.597014925373134, "grad_norm": 24.35331153869629, "learning_rate": 5.8095238095238106e-06, "loss": 49.5249, "step": 361 }, { "epoch": 8.620895522388059, "grad_norm": 28.612688064575195, "learning_rate": 5.7976190476190485e-06, "loss": 50.1344, "step": 362 }, { "epoch": 8.644776119402986, "grad_norm": 25.055545806884766, "learning_rate": 5.785714285714286e-06, "loss": 48.6014, "step": 363 }, { "epoch": 8.66865671641791, "grad_norm": 27.645490646362305, "learning_rate": 5.773809523809523e-06, "loss": 48.953, "step": 364 }, { "epoch": 8.692537313432837, "grad_norm": 26.791471481323242, "learning_rate": 5.761904761904762e-06, "loss": 49.5912, "step": 365 }, { "epoch": 8.716417910447761, "grad_norm": 27.57213592529297, "learning_rate": 5.75e-06, "loss": 48.9958, "step": 366 }, { "epoch": 8.740298507462686, "grad_norm": 20.936344146728516, "learning_rate": 5.738095238095238e-06, "loss": 48.3449, "step": 367 }, { "epoch": 8.764179104477613, "grad_norm": 31.695810317993164, "learning_rate": 5.726190476190477e-06, "loss": 49.1015, "step": 368 }, { "epoch": 8.788059701492537, "grad_norm": 31.584064483642578, "learning_rate": 5.7142857142857145e-06, "loss": 48.8249, "step": 369 }, { "epoch": 8.811940298507462, "grad_norm": 30.70412826538086, "learning_rate": 5.702380952380952e-06, "loss": 49.2984, "step": 370 }, { "epoch": 8.835820895522389, "grad_norm": 36.31315231323242, "learning_rate": 5.690476190476191e-06, "loss": 48.6769, "step": 371 }, { "epoch": 8.859701492537313, "grad_norm": 28.98838996887207, "learning_rate": 5.678571428571429e-06, "loss": 50.2101, "step": 372 }, { "epoch": 8.883582089552238, "grad_norm": 29.07052230834961, "learning_rate": 5.666666666666667e-06, "loss": 49.9206, "step": 373 }, { "epoch": 8.907462686567165, "grad_norm": 31.653087615966797, "learning_rate": 5.654761904761905e-06, "loss": 48.3035, "step": 374 }, { "epoch": 8.93134328358209, "grad_norm": 27.019704818725586, "learning_rate": 5.6428571428571435e-06, "loss": 48.6833, "step": 375 }, { "epoch": 8.955223880597014, "grad_norm": 30.919578552246094, "learning_rate": 5.630952380952381e-06, "loss": 47.7973, "step": 376 }, { "epoch": 8.97910447761194, "grad_norm": 28.002975463867188, "learning_rate": 5.619047619047619e-06, "loss": 49.5539, "step": 377 }, { "epoch": 9.0, "grad_norm": 27.587263107299805, "learning_rate": 5.607142857142858e-06, "loss": 42.9343, "step": 378 }, { "epoch": 9.023880597014925, "grad_norm": 31.024024963378906, "learning_rate": 5.595238095238096e-06, "loss": 48.6774, "step": 379 }, { "epoch": 9.047761194029851, "grad_norm": 27.262426376342773, "learning_rate": 5.583333333333334e-06, "loss": 47.8833, "step": 380 }, { "epoch": 9.071641791044776, "grad_norm": 29.223133087158203, "learning_rate": 5.571428571428572e-06, "loss": 47.1563, "step": 381 }, { "epoch": 9.0955223880597, "grad_norm": 21.004749298095703, "learning_rate": 5.5595238095238104e-06, "loss": 49.0407, "step": 382 }, { "epoch": 9.119402985074627, "grad_norm": 25.157907485961914, "learning_rate": 5.547619047619048e-06, "loss": 48.5233, "step": 383 }, { "epoch": 9.143283582089552, "grad_norm": 17.611478805541992, "learning_rate": 5.535714285714286e-06, "loss": 47.4846, "step": 384 }, { "epoch": 9.167164179104478, "grad_norm": 21.00395965576172, "learning_rate": 5.523809523809525e-06, "loss": 49.7533, "step": 385 }, { "epoch": 9.191044776119403, "grad_norm": 22.07697296142578, "learning_rate": 5.511904761904763e-06, "loss": 48.5003, "step": 386 }, { "epoch": 9.214925373134328, "grad_norm": 21.743778228759766, "learning_rate": 5.500000000000001e-06, "loss": 48.149, "step": 387 }, { "epoch": 9.238805970149254, "grad_norm": 23.499980926513672, "learning_rate": 5.4880952380952394e-06, "loss": 48.2213, "step": 388 }, { "epoch": 9.26268656716418, "grad_norm": 22.22580337524414, "learning_rate": 5.476190476190477e-06, "loss": 48.4671, "step": 389 }, { "epoch": 9.286567164179104, "grad_norm": 26.5915470123291, "learning_rate": 5.464285714285714e-06, "loss": 49.2343, "step": 390 }, { "epoch": 9.31044776119403, "grad_norm": 22.510892868041992, "learning_rate": 5.452380952380952e-06, "loss": 48.9363, "step": 391 }, { "epoch": 9.334328358208955, "grad_norm": 27.17405128479004, "learning_rate": 5.44047619047619e-06, "loss": 49.1814, "step": 392 }, { "epoch": 9.35820895522388, "grad_norm": 29.143529891967773, "learning_rate": 5.428571428571429e-06, "loss": 48.4786, "step": 393 }, { "epoch": 9.382089552238806, "grad_norm": 20.24784278869629, "learning_rate": 5.416666666666667e-06, "loss": 49.2987, "step": 394 }, { "epoch": 9.405970149253731, "grad_norm": 31.44426155090332, "learning_rate": 5.404761904761905e-06, "loss": 49.9466, "step": 395 }, { "epoch": 9.429850746268656, "grad_norm": 23.775951385498047, "learning_rate": 5.392857142857143e-06, "loss": 49.1681, "step": 396 }, { "epoch": 9.453731343283582, "grad_norm": 22.168636322021484, "learning_rate": 5.380952380952381e-06, "loss": 48.8523, "step": 397 }, { "epoch": 9.477611940298507, "grad_norm": 20.944936752319336, "learning_rate": 5.369047619047619e-06, "loss": 48.7369, "step": 398 }, { "epoch": 9.501492537313434, "grad_norm": 23.880292892456055, "learning_rate": 5.357142857142857e-06, "loss": 48.4703, "step": 399 }, { "epoch": 9.525373134328358, "grad_norm": 25.316978454589844, "learning_rate": 5.345238095238096e-06, "loss": 48.3752, "step": 400 }, { "epoch": 9.549253731343283, "grad_norm": 24.398311614990234, "learning_rate": 5.333333333333334e-06, "loss": 47.532, "step": 401 }, { "epoch": 9.57313432835821, "grad_norm": 23.157140731811523, "learning_rate": 5.3214285714285715e-06, "loss": 49.1824, "step": 402 }, { "epoch": 9.597014925373134, "grad_norm": 21.641061782836914, "learning_rate": 5.30952380952381e-06, "loss": 49.6601, "step": 403 }, { "epoch": 9.620895522388059, "grad_norm": 23.863712310791016, "learning_rate": 5.297619047619048e-06, "loss": 49.2146, "step": 404 }, { "epoch": 9.644776119402986, "grad_norm": 21.876007080078125, "learning_rate": 5.285714285714286e-06, "loss": 48.0027, "step": 405 }, { "epoch": 9.66865671641791, "grad_norm": 25.783042907714844, "learning_rate": 5.273809523809525e-06, "loss": 48.2702, "step": 406 }, { "epoch": 9.692537313432837, "grad_norm": 18.782087326049805, "learning_rate": 5.261904761904763e-06, "loss": 48.9365, "step": 407 }, { "epoch": 9.716417910447761, "grad_norm": 20.206588745117188, "learning_rate": 5.2500000000000006e-06, "loss": 49.4144, "step": 408 }, { "epoch": 9.740298507462686, "grad_norm": 20.98710823059082, "learning_rate": 5.2380952380952384e-06, "loss": 49.7442, "step": 409 }, { "epoch": 9.764179104477613, "grad_norm": 19.24452018737793, "learning_rate": 5.226190476190477e-06, "loss": 49.0249, "step": 410 }, { "epoch": 9.788059701492537, "grad_norm": 23.18075180053711, "learning_rate": 5.214285714285715e-06, "loss": 48.8795, "step": 411 }, { "epoch": 9.811940298507462, "grad_norm": 17.233261108398438, "learning_rate": 5.202380952380953e-06, "loss": 49.2985, "step": 412 }, { "epoch": 9.835820895522389, "grad_norm": 24.74007797241211, "learning_rate": 5.190476190476192e-06, "loss": 48.8793, "step": 413 }, { "epoch": 9.859701492537313, "grad_norm": 20.26863670349121, "learning_rate": 5.1785714285714296e-06, "loss": 49.6989, "step": 414 }, { "epoch": 9.883582089552238, "grad_norm": 26.168167114257812, "learning_rate": 5.1666666666666675e-06, "loss": 48.7413, "step": 415 }, { "epoch": 9.907462686567165, "grad_norm": 29.008501052856445, "learning_rate": 5.1547619047619045e-06, "loss": 48.7414, "step": 416 }, { "epoch": 9.93134328358209, "grad_norm": 18.459829330444336, "learning_rate": 5.142857142857142e-06, "loss": 47.7865, "step": 417 }, { "epoch": 9.955223880597014, "grad_norm": 20.898181915283203, "learning_rate": 5.130952380952381e-06, "loss": 47.7274, "step": 418 }, { "epoch": 9.97910447761194, "grad_norm": 23.5065860748291, "learning_rate": 5.119047619047619e-06, "loss": 48.471, "step": 419 }, { "epoch": 10.0, "grad_norm": 23.147043228149414, "learning_rate": 5.107142857142857e-06, "loss": 42.3971, "step": 420 }, { "epoch": 10.023880597014925, "grad_norm": 28.423707962036133, "learning_rate": 5.095238095238096e-06, "loss": 49.4977, "step": 421 }, { "epoch": 10.047761194029851, "grad_norm": 22.017820358276367, "learning_rate": 5.0833333333333335e-06, "loss": 47.0638, "step": 422 }, { "epoch": 10.071641791044776, "grad_norm": 18.173845291137695, "learning_rate": 5.071428571428571e-06, "loss": 48.338, "step": 423 }, { "epoch": 10.0955223880597, "grad_norm": 17.628551483154297, "learning_rate": 5.05952380952381e-06, "loss": 48.2847, "step": 424 }, { "epoch": 10.119402985074627, "grad_norm": 19.974040985107422, "learning_rate": 5.047619047619048e-06, "loss": 49.2284, "step": 425 }, { "epoch": 10.143283582089552, "grad_norm": 22.45549774169922, "learning_rate": 5.035714285714286e-06, "loss": 49.6345, "step": 426 }, { "epoch": 10.167164179104478, "grad_norm": 21.609479904174805, "learning_rate": 5.023809523809524e-06, "loss": 48.2098, "step": 427 }, { "epoch": 10.191044776119403, "grad_norm": 24.7137451171875, "learning_rate": 5.0119047619047625e-06, "loss": 47.9527, "step": 428 }, { "epoch": 10.214925373134328, "grad_norm": 22.888975143432617, "learning_rate": 5e-06, "loss": 49.781, "step": 429 }, { "epoch": 10.238805970149254, "grad_norm": 25.53217124938965, "learning_rate": 4.988095238095238e-06, "loss": 48.9902, "step": 430 }, { "epoch": 10.26268656716418, "grad_norm": 27.80384063720703, "learning_rate": 4.976190476190477e-06, "loss": 48.2545, "step": 431 }, { "epoch": 10.286567164179104, "grad_norm": 21.421342849731445, "learning_rate": 4.964285714285715e-06, "loss": 49.1483, "step": 432 }, { "epoch": 10.31044776119403, "grad_norm": 26.178152084350586, "learning_rate": 4.952380952380953e-06, "loss": 49.1129, "step": 433 }, { "epoch": 10.334328358208955, "grad_norm": 27.993371963500977, "learning_rate": 4.940476190476191e-06, "loss": 48.1783, "step": 434 }, { "epoch": 10.35820895522388, "grad_norm": 26.75821876525879, "learning_rate": 4.928571428571429e-06, "loss": 48.1773, "step": 435 }, { "epoch": 10.382089552238806, "grad_norm": 25.641353607177734, "learning_rate": 4.9166666666666665e-06, "loss": 48.9295, "step": 436 }, { "epoch": 10.405970149253731, "grad_norm": 23.26271629333496, "learning_rate": 4.904761904761905e-06, "loss": 49.5486, "step": 437 }, { "epoch": 10.429850746268656, "grad_norm": 23.637466430664062, "learning_rate": 4.892857142857143e-06, "loss": 48.1263, "step": 438 }, { "epoch": 10.453731343283582, "grad_norm": 29.285432815551758, "learning_rate": 4.880952380952381e-06, "loss": 48.2424, "step": 439 }, { "epoch": 10.477611940298507, "grad_norm": 29.91914939880371, "learning_rate": 4.86904761904762e-06, "loss": 48.3695, "step": 440 }, { "epoch": 10.501492537313434, "grad_norm": 25.249099731445312, "learning_rate": 4.857142857142858e-06, "loss": 48.3644, "step": 441 }, { "epoch": 10.525373134328358, "grad_norm": 22.37591552734375, "learning_rate": 4.8452380952380955e-06, "loss": 49.397, "step": 442 }, { "epoch": 10.549253731343283, "grad_norm": 22.805437088012695, "learning_rate": 4.833333333333333e-06, "loss": 48.6522, "step": 443 }, { "epoch": 10.57313432835821, "grad_norm": 21.229095458984375, "learning_rate": 4.821428571428572e-06, "loss": 47.6681, "step": 444 }, { "epoch": 10.597014925373134, "grad_norm": 23.359468460083008, "learning_rate": 4.80952380952381e-06, "loss": 48.602, "step": 445 }, { "epoch": 10.620895522388059, "grad_norm": 20.953310012817383, "learning_rate": 4.797619047619048e-06, "loss": 49.3366, "step": 446 }, { "epoch": 10.644776119402986, "grad_norm": 21.970388412475586, "learning_rate": 4.785714285714287e-06, "loss": 46.964, "step": 447 }, { "epoch": 10.66865671641791, "grad_norm": 24.282426834106445, "learning_rate": 4.7738095238095245e-06, "loss": 48.2676, "step": 448 }, { "epoch": 10.692537313432837, "grad_norm": 15.47967529296875, "learning_rate": 4.761904761904762e-06, "loss": 48.1993, "step": 449 }, { "epoch": 10.716417910447761, "grad_norm": 23.230947494506836, "learning_rate": 4.75e-06, "loss": 48.5229, "step": 450 }, { "epoch": 10.740298507462686, "grad_norm": 20.514225006103516, "learning_rate": 4.738095238095238e-06, "loss": 48.062, "step": 451 }, { "epoch": 10.764179104477613, "grad_norm": 19.060667037963867, "learning_rate": 4.726190476190476e-06, "loss": 48.3893, "step": 452 }, { "epoch": 10.788059701492537, "grad_norm": 29.78558349609375, "learning_rate": 4.714285714285715e-06, "loss": 48.9921, "step": 453 }, { "epoch": 10.811940298507462, "grad_norm": 23.262001037597656, "learning_rate": 4.702380952380953e-06, "loss": 48.5597, "step": 454 }, { "epoch": 10.835820895522389, "grad_norm": 25.83403778076172, "learning_rate": 4.6904761904761905e-06, "loss": 49.2911, "step": 455 }, { "epoch": 10.859701492537313, "grad_norm": 21.846391677856445, "learning_rate": 4.678571428571429e-06, "loss": 47.3256, "step": 456 }, { "epoch": 10.883582089552238, "grad_norm": 17.09532356262207, "learning_rate": 4.666666666666667e-06, "loss": 48.3647, "step": 457 }, { "epoch": 10.907462686567165, "grad_norm": 31.050525665283203, "learning_rate": 4.654761904761905e-06, "loss": 48.3605, "step": 458 }, { "epoch": 10.93134328358209, "grad_norm": 22.532379150390625, "learning_rate": 4.642857142857144e-06, "loss": 49.0826, "step": 459 }, { "epoch": 10.955223880597014, "grad_norm": 23.585033416748047, "learning_rate": 4.630952380952382e-06, "loss": 48.5111, "step": 460 }, { "epoch": 10.97910447761194, "grad_norm": NaN, "learning_rate": 4.6190476190476196e-06, "loss": 66.9717, "step": 461 }, { "epoch": 11.0, "grad_norm": 24.73590087890625, "learning_rate": 4.6190476190476196e-06, "loss": 41.9122, "step": 462 }, { "epoch": 11.023880597014925, "grad_norm": 27.4709415435791, "learning_rate": 4.6071428571428574e-06, "loss": 48.4682, "step": 463 }, { "epoch": 11.047761194029851, "grad_norm": 26.158245086669922, "learning_rate": 4.595238095238095e-06, "loss": 48.1845, "step": 464 }, { "epoch": 11.071641791044776, "grad_norm": 25.14693260192871, "learning_rate": 4.583333333333333e-06, "loss": 48.4229, "step": 465 }, { "epoch": 11.0955223880597, "grad_norm": 22.229764938354492, "learning_rate": 4.571428571428572e-06, "loss": 47.8876, "step": 466 }, { "epoch": 11.119402985074627, "grad_norm": 24.202686309814453, "learning_rate": 4.55952380952381e-06, "loss": 48.4304, "step": 467 }, { "epoch": 11.143283582089552, "grad_norm": 21.449726104736328, "learning_rate": 4.547619047619048e-06, "loss": 47.6457, "step": 468 }, { "epoch": 11.167164179104478, "grad_norm": 23.769763946533203, "learning_rate": 4.5357142857142865e-06, "loss": 49.1031, "step": 469 }, { "epoch": 11.191044776119403, "grad_norm": 21.20684814453125, "learning_rate": 4.523809523809524e-06, "loss": 47.6488, "step": 470 }, { "epoch": 11.214925373134328, "grad_norm": 17.992631912231445, "learning_rate": 4.511904761904762e-06, "loss": 47.9435, "step": 471 }, { "epoch": 11.238805970149254, "grad_norm": 22.017776489257812, "learning_rate": 4.5e-06, "loss": 48.5224, "step": 472 }, { "epoch": 11.26268656716418, "grad_norm": 22.98673439025879, "learning_rate": 4.488095238095239e-06, "loss": 47.9258, "step": 473 }, { "epoch": 11.286567164179104, "grad_norm": 16.146743774414062, "learning_rate": 4.476190476190477e-06, "loss": 48.3957, "step": 474 }, { "epoch": 11.31044776119403, "grad_norm": 23.30071258544922, "learning_rate": 4.464285714285715e-06, "loss": 48.5472, "step": 475 }, { "epoch": 11.334328358208955, "grad_norm": 24.949913024902344, "learning_rate": 4.4523809523809525e-06, "loss": 48.2387, "step": 476 }, { "epoch": 11.35820895522388, "grad_norm": 23.10662841796875, "learning_rate": 4.44047619047619e-06, "loss": 49.0681, "step": 477 }, { "epoch": 11.382089552238806, "grad_norm": 19.024614334106445, "learning_rate": 4.428571428571429e-06, "loss": 49.3255, "step": 478 }, { "epoch": 11.405970149253731, "grad_norm": 22.34437370300293, "learning_rate": 4.416666666666667e-06, "loss": 47.0069, "step": 479 }, { "epoch": 11.429850746268656, "grad_norm": 23.563596725463867, "learning_rate": 4.404761904761905e-06, "loss": 46.8188, "step": 480 }, { "epoch": 11.453731343283582, "grad_norm": 20.5488338470459, "learning_rate": 4.392857142857143e-06, "loss": 47.8277, "step": 481 }, { "epoch": 11.477611940298507, "grad_norm": 18.416519165039062, "learning_rate": 4.3809523809523815e-06, "loss": 48.2203, "step": 482 }, { "epoch": 11.501492537313434, "grad_norm": 28.21132469177246, "learning_rate": 4.369047619047619e-06, "loss": 48.0691, "step": 483 }, { "epoch": 11.525373134328358, "grad_norm": 21.36182975769043, "learning_rate": 4.357142857142857e-06, "loss": 48.273, "step": 484 }, { "epoch": 11.549253731343283, "grad_norm": 25.726530075073242, "learning_rate": 4.345238095238096e-06, "loss": 48.7529, "step": 485 }, { "epoch": 11.57313432835821, "grad_norm": 21.686412811279297, "learning_rate": 4.333333333333334e-06, "loss": 48.3005, "step": 486 }, { "epoch": 11.597014925373134, "grad_norm": 20.56638526916504, "learning_rate": 4.321428571428572e-06, "loss": 50.1248, "step": 487 }, { "epoch": 11.620895522388059, "grad_norm": 24.193323135375977, "learning_rate": 4.30952380952381e-06, "loss": 48.6031, "step": 488 }, { "epoch": 11.644776119402986, "grad_norm": 17.18548583984375, "learning_rate": 4.297619047619048e-06, "loss": 49.2039, "step": 489 }, { "epoch": 11.66865671641791, "grad_norm": 19.07050895690918, "learning_rate": 4.2857142857142855e-06, "loss": 48.0961, "step": 490 }, { "epoch": 11.692537313432837, "grad_norm": 19.831188201904297, "learning_rate": 4.273809523809524e-06, "loss": 48.5481, "step": 491 }, { "epoch": 11.716417910447761, "grad_norm": 23.408592224121094, "learning_rate": 4.261904761904762e-06, "loss": 48.583, "step": 492 }, { "epoch": 11.740298507462686, "grad_norm": 22.152788162231445, "learning_rate": 4.25e-06, "loss": 48.6684, "step": 493 }, { "epoch": 11.764179104477613, "grad_norm": NaN, "learning_rate": 4.238095238095239e-06, "loss": 54.7097, "step": 494 }, { "epoch": 11.788059701492537, "grad_norm": 23.1225528717041, "learning_rate": 4.238095238095239e-06, "loss": 48.5439, "step": 495 }, { "epoch": 11.811940298507462, "grad_norm": 24.673904418945312, "learning_rate": 4.226190476190477e-06, "loss": 48.2645, "step": 496 }, { "epoch": 11.835820895522389, "grad_norm": 23.318784713745117, "learning_rate": 4.2142857142857145e-06, "loss": 47.9159, "step": 497 }, { "epoch": 11.859701492537313, "grad_norm": 24.62889289855957, "learning_rate": 4.202380952380952e-06, "loss": 48.1392, "step": 498 }, { "epoch": 11.883582089552238, "grad_norm": 17.315168380737305, "learning_rate": 4.190476190476191e-06, "loss": 49.399, "step": 499 }, { "epoch": 11.907462686567165, "grad_norm": 24.458532333374023, "learning_rate": 4.178571428571429e-06, "loss": 49.2189, "step": 500 }, { "epoch": 11.93134328358209, "grad_norm": 28.294036865234375, "learning_rate": 4.166666666666667e-06, "loss": 48.4759, "step": 501 }, { "epoch": 11.955223880597014, "grad_norm": 22.393577575683594, "learning_rate": 4.154761904761906e-06, "loss": 48.9718, "step": 502 }, { "epoch": 11.97910447761194, "grad_norm": 20.199522018432617, "learning_rate": 4.1428571428571435e-06, "loss": 47.5364, "step": 503 }, { "epoch": 12.0, "grad_norm": 22.080204010009766, "learning_rate": 4.130952380952381e-06, "loss": 42.5308, "step": 504 }, { "epoch": 12.023880597014925, "grad_norm": 28.897024154663086, "learning_rate": 4.119047619047619e-06, "loss": 48.9022, "step": 505 }, { "epoch": 12.047761194029851, "grad_norm": 28.31342887878418, "learning_rate": 4.107142857142857e-06, "loss": 47.6489, "step": 506 }, { "epoch": 12.071641791044776, "grad_norm": 22.62079620361328, "learning_rate": 4.095238095238096e-06, "loss": 48.1606, "step": 507 }, { "epoch": 12.0955223880597, "grad_norm": 33.49858474731445, "learning_rate": 4.083333333333334e-06, "loss": 47.8462, "step": 508 }, { "epoch": 12.119402985074627, "grad_norm": 22.20858383178711, "learning_rate": 4.071428571428572e-06, "loss": 47.2505, "step": 509 }, { "epoch": 12.143283582089552, "grad_norm": 25.425495147705078, "learning_rate": 4.0595238095238095e-06, "loss": 48.6289, "step": 510 }, { "epoch": 12.167164179104478, "grad_norm": 29.32784652709961, "learning_rate": 4.047619047619048e-06, "loss": 47.7772, "step": 511 }, { "epoch": 12.191044776119403, "grad_norm": 20.661781311035156, "learning_rate": 4.035714285714286e-06, "loss": 47.1414, "step": 512 }, { "epoch": 12.214925373134328, "grad_norm": 31.4210205078125, "learning_rate": 4.023809523809524e-06, "loss": 47.4312, "step": 513 }, { "epoch": 12.238805970149254, "grad_norm": 32.390071868896484, "learning_rate": 4.011904761904763e-06, "loss": 49.9899, "step": 514 }, { "epoch": 12.26268656716418, "grad_norm": 17.431835174560547, "learning_rate": 4.000000000000001e-06, "loss": 48.8975, "step": 515 }, { "epoch": 12.286567164179104, "grad_norm": 29.32766342163086, "learning_rate": 3.9880952380952386e-06, "loss": 48.8764, "step": 516 }, { "epoch": 12.31044776119403, "grad_norm": 29.523069381713867, "learning_rate": 3.9761904761904764e-06, "loss": 48.2602, "step": 517 }, { "epoch": 12.334328358208955, "grad_norm": 23.866840362548828, "learning_rate": 3.964285714285714e-06, "loss": 47.4016, "step": 518 }, { "epoch": 12.35820895522388, "grad_norm": 27.464962005615234, "learning_rate": 3.952380952380952e-06, "loss": 48.2559, "step": 519 }, { "epoch": 12.382089552238806, "grad_norm": 19.796552658081055, "learning_rate": 3.940476190476191e-06, "loss": 48.7665, "step": 520 }, { "epoch": 12.405970149253731, "grad_norm": 18.637983322143555, "learning_rate": 3.928571428571429e-06, "loss": 48.1456, "step": 521 }, { "epoch": 12.429850746268656, "grad_norm": 22.065799713134766, "learning_rate": 3.916666666666667e-06, "loss": 48.7803, "step": 522 }, { "epoch": 12.453731343283582, "grad_norm": 22.648218154907227, "learning_rate": 3.9047619047619055e-06, "loss": 47.3376, "step": 523 }, { "epoch": 12.477611940298507, "grad_norm": 17.55946922302246, "learning_rate": 3.892857142857143e-06, "loss": 47.6002, "step": 524 }, { "epoch": 12.501492537313434, "grad_norm": 19.173139572143555, "learning_rate": 3.880952380952381e-06, "loss": 48.8976, "step": 525 }, { "epoch": 12.525373134328358, "grad_norm": 24.052696228027344, "learning_rate": 3.869047619047619e-06, "loss": 48.1851, "step": 526 }, { "epoch": 12.549253731343283, "grad_norm": 19.28683090209961, "learning_rate": 3.857142857142858e-06, "loss": 48.0342, "step": 527 }, { "epoch": 12.57313432835821, "grad_norm": 21.528470993041992, "learning_rate": 3.845238095238096e-06, "loss": 49.3597, "step": 528 }, { "epoch": 12.597014925373134, "grad_norm": 22.880159378051758, "learning_rate": 3.833333333333334e-06, "loss": 47.9594, "step": 529 }, { "epoch": 12.620895522388059, "grad_norm": 19.00438117980957, "learning_rate": 3.8214285714285715e-06, "loss": 47.2837, "step": 530 }, { "epoch": 12.644776119402986, "grad_norm": 22.21845054626465, "learning_rate": 3.80952380952381e-06, "loss": 47.1453, "step": 531 }, { "epoch": 12.66865671641791, "grad_norm": 18.551712036132812, "learning_rate": 3.7976190476190477e-06, "loss": 47.9594, "step": 532 }, { "epoch": 12.692537313432837, "grad_norm": 17.805360794067383, "learning_rate": 3.785714285714286e-06, "loss": 49.1036, "step": 533 }, { "epoch": 12.716417910447761, "grad_norm": 14.508918762207031, "learning_rate": 3.773809523809524e-06, "loss": 48.1203, "step": 534 }, { "epoch": 12.740298507462686, "grad_norm": 19.395994186401367, "learning_rate": 3.761904761904762e-06, "loss": 47.7891, "step": 535 }, { "epoch": 12.764179104477613, "grad_norm": 27.492908477783203, "learning_rate": 3.7500000000000005e-06, "loss": 48.9027, "step": 536 }, { "epoch": 12.788059701492537, "grad_norm": 21.751968383789062, "learning_rate": 3.7380952380952384e-06, "loss": 48.0929, "step": 537 }, { "epoch": 12.811940298507462, "grad_norm": 24.78274917602539, "learning_rate": 3.7261904761904767e-06, "loss": 48.1678, "step": 538 }, { "epoch": 12.835820895522389, "grad_norm": 26.319196701049805, "learning_rate": 3.7142857142857146e-06, "loss": 49.1874, "step": 539 }, { "epoch": 12.859701492537313, "grad_norm": 20.670148849487305, "learning_rate": 3.702380952380953e-06, "loss": 48.8441, "step": 540 }, { "epoch": 12.883582089552238, "grad_norm": 23.578706741333008, "learning_rate": 3.690476190476191e-06, "loss": 47.1627, "step": 541 }, { "epoch": 12.907462686567165, "grad_norm": 23.807973861694336, "learning_rate": 3.678571428571429e-06, "loss": 47.493, "step": 542 }, { "epoch": 12.93134328358209, "grad_norm": 20.977373123168945, "learning_rate": 3.6666666666666666e-06, "loss": 49.3489, "step": 543 }, { "epoch": 12.955223880597014, "grad_norm": 21.219995498657227, "learning_rate": 3.654761904761905e-06, "loss": 49.8562, "step": 544 }, { "epoch": 12.97910447761194, "grad_norm": 17.777210235595703, "learning_rate": 3.642857142857143e-06, "loss": 48.4018, "step": 545 }, { "epoch": 13.0, "grad_norm": 17.52475929260254, "learning_rate": 3.630952380952381e-06, "loss": 42.3621, "step": 546 }, { "epoch": 13.023880597014925, "grad_norm": 23.431884765625, "learning_rate": 3.6190476190476194e-06, "loss": 49.0982, "step": 547 }, { "epoch": 13.047761194029851, "grad_norm": 25.512338638305664, "learning_rate": 3.6071428571428573e-06, "loss": 47.5758, "step": 548 }, { "epoch": 13.071641791044776, "grad_norm": 25.41205406188965, "learning_rate": 3.5952380952380956e-06, "loss": 49.0519, "step": 549 }, { "epoch": 13.0955223880597, "grad_norm": 20.511945724487305, "learning_rate": 3.5833333333333335e-06, "loss": 48.9739, "step": 550 }, { "epoch": 13.119402985074627, "grad_norm": 18.88302993774414, "learning_rate": 3.5714285714285718e-06, "loss": 47.0551, "step": 551 }, { "epoch": 13.143283582089552, "grad_norm": 17.176782608032227, "learning_rate": 3.55952380952381e-06, "loss": 48.0771, "step": 552 }, { "epoch": 13.167164179104478, "grad_norm": 19.72154426574707, "learning_rate": 3.547619047619048e-06, "loss": 49.5084, "step": 553 }, { "epoch": 13.191044776119403, "grad_norm": 24.780994415283203, "learning_rate": 3.5357142857142863e-06, "loss": 46.5557, "step": 554 }, { "epoch": 13.214925373134328, "grad_norm": 20.380996704101562, "learning_rate": 3.523809523809524e-06, "loss": 48.841, "step": 555 }, { "epoch": 13.238805970149254, "grad_norm": 26.90860939025879, "learning_rate": 3.511904761904762e-06, "loss": 47.6185, "step": 556 }, { "epoch": 13.26268656716418, "grad_norm": 21.40388298034668, "learning_rate": 3.5e-06, "loss": 47.787, "step": 557 }, { "epoch": 13.286567164179104, "grad_norm": 24.708845138549805, "learning_rate": 3.4880952380952383e-06, "loss": 47.1974, "step": 558 }, { "epoch": 13.31044776119403, "grad_norm": 25.317148208618164, "learning_rate": 3.476190476190476e-06, "loss": 49.2282, "step": 559 }, { "epoch": 13.334328358208955, "grad_norm": 22.903011322021484, "learning_rate": 3.4642857142857145e-06, "loss": 47.0762, "step": 560 }, { "epoch": 13.35820895522388, "grad_norm": 23.626604080200195, "learning_rate": 3.4523809523809528e-06, "loss": 47.3622, "step": 561 }, { "epoch": 13.382089552238806, "grad_norm": 16.69061279296875, "learning_rate": 3.4404761904761907e-06, "loss": 48.5621, "step": 562 }, { "epoch": 13.405970149253731, "grad_norm": 20.52508544921875, "learning_rate": 3.428571428571429e-06, "loss": 47.6565, "step": 563 }, { "epoch": 13.429850746268656, "grad_norm": 25.125743865966797, "learning_rate": 3.416666666666667e-06, "loss": 48.1353, "step": 564 }, { "epoch": 13.453731343283582, "grad_norm": 20.697166442871094, "learning_rate": 3.404761904761905e-06, "loss": 47.9368, "step": 565 }, { "epoch": 13.477611940298507, "grad_norm": 22.396892547607422, "learning_rate": 3.3928571428571435e-06, "loss": 48.2956, "step": 566 }, { "epoch": 13.501492537313434, "grad_norm": 24.770437240600586, "learning_rate": 3.3809523809523814e-06, "loss": 48.4467, "step": 567 }, { "epoch": 13.525373134328358, "grad_norm": 19.44706153869629, "learning_rate": 3.3690476190476197e-06, "loss": 48.3155, "step": 568 }, { "epoch": 13.549253731343283, "grad_norm": 27.680660247802734, "learning_rate": 3.357142857142857e-06, "loss": 47.9039, "step": 569 }, { "epoch": 13.57313432835821, "grad_norm": 21.89419174194336, "learning_rate": 3.3452380952380954e-06, "loss": 47.9416, "step": 570 }, { "epoch": 13.597014925373134, "grad_norm": 19.10918426513672, "learning_rate": 3.3333333333333333e-06, "loss": 48.45, "step": 571 }, { "epoch": 13.620895522388059, "grad_norm": 29.83106231689453, "learning_rate": 3.3214285714285716e-06, "loss": 48.9583, "step": 572 }, { "epoch": 13.644776119402986, "grad_norm": 28.05882453918457, "learning_rate": 3.3095238095238095e-06, "loss": 49.108, "step": 573 }, { "epoch": 13.66865671641791, "grad_norm": 17.379384994506836, "learning_rate": 3.297619047619048e-06, "loss": 48.4707, "step": 574 }, { "epoch": 13.692537313432837, "grad_norm": 19.15117645263672, "learning_rate": 3.285714285714286e-06, "loss": 48.476, "step": 575 }, { "epoch": 13.716417910447761, "grad_norm": 23.892152786254883, "learning_rate": 3.273809523809524e-06, "loss": 48.0321, "step": 576 }, { "epoch": 13.740298507462686, "grad_norm": 18.658008575439453, "learning_rate": 3.2619047619047623e-06, "loss": 47.2192, "step": 577 }, { "epoch": 13.764179104477613, "grad_norm": 16.940099716186523, "learning_rate": 3.2500000000000002e-06, "loss": 49.2263, "step": 578 }, { "epoch": 13.788059701492537, "grad_norm": 25.7972412109375, "learning_rate": 3.2380952380952385e-06, "loss": 47.5039, "step": 579 }, { "epoch": 13.811940298507462, "grad_norm": 28.928129196166992, "learning_rate": 3.226190476190477e-06, "loss": 47.9264, "step": 580 }, { "epoch": 13.835820895522389, "grad_norm": 23.67597007751465, "learning_rate": 3.2142857142857147e-06, "loss": 49.1464, "step": 581 }, { "epoch": 13.859701492537313, "grad_norm": 18.345443725585938, "learning_rate": 3.202380952380952e-06, "loss": 47.888, "step": 582 }, { "epoch": 13.883582089552238, "grad_norm": 19.80716896057129, "learning_rate": 3.1904761904761905e-06, "loss": 47.4324, "step": 583 }, { "epoch": 13.907462686567165, "grad_norm": 20.488346099853516, "learning_rate": 3.178571428571429e-06, "loss": 48.3033, "step": 584 }, { "epoch": 13.93134328358209, "grad_norm": 22.3657283782959, "learning_rate": 3.1666666666666667e-06, "loss": 48.1474, "step": 585 }, { "epoch": 13.955223880597014, "grad_norm": 17.457408905029297, "learning_rate": 3.154761904761905e-06, "loss": 47.2418, "step": 586 }, { "epoch": 13.97910447761194, "grad_norm": NaN, "learning_rate": 3.142857142857143e-06, "loss": 54.1812, "step": 587 }, { "epoch": 14.0, "grad_norm": 17.137672424316406, "learning_rate": 3.142857142857143e-06, "loss": 42.3703, "step": 588 }, { "epoch": 14.023880597014925, "grad_norm": 20.55642318725586, "learning_rate": 3.130952380952381e-06, "loss": 49.4628, "step": 589 }, { "epoch": 14.047761194029851, "grad_norm": 19.925596237182617, "learning_rate": 3.1190476190476195e-06, "loss": 47.5266, "step": 590 }, { "epoch": 14.071641791044776, "grad_norm": 12.49276065826416, "learning_rate": 3.1071428571428574e-06, "loss": 47.8654, "step": 591 }, { "epoch": 14.0955223880597, "grad_norm": 17.266550064086914, "learning_rate": 3.0952380952380957e-06, "loss": 48.4362, "step": 592 }, { "epoch": 14.119402985074627, "grad_norm": 18.234397888183594, "learning_rate": 3.0833333333333336e-06, "loss": 48.9532, "step": 593 }, { "epoch": 14.143283582089552, "grad_norm": 19.880165100097656, "learning_rate": 3.071428571428572e-06, "loss": 48.0088, "step": 594 }, { "epoch": 14.167164179104478, "grad_norm": 23.04216766357422, "learning_rate": 3.05952380952381e-06, "loss": 48.0934, "step": 595 }, { "epoch": 14.191044776119403, "grad_norm": 19.199676513671875, "learning_rate": 3.047619047619048e-06, "loss": 48.3845, "step": 596 }, { "epoch": 14.214925373134328, "grad_norm": 20.758337020874023, "learning_rate": 3.0357142857142856e-06, "loss": 47.4652, "step": 597 }, { "epoch": 14.238805970149254, "grad_norm": 17.532787322998047, "learning_rate": 3.023809523809524e-06, "loss": 48.0212, "step": 598 }, { "epoch": 14.26268656716418, "grad_norm": 16.547094345092773, "learning_rate": 3.011904761904762e-06, "loss": 48.6113, "step": 599 }, { "epoch": 14.286567164179104, "grad_norm": 16.324464797973633, "learning_rate": 3e-06, "loss": 47.9735, "step": 600 }, { "epoch": 14.31044776119403, "grad_norm": 16.54167938232422, "learning_rate": 2.9880952380952384e-06, "loss": 47.4436, "step": 601 }, { "epoch": 14.334328358208955, "grad_norm": 23.455759048461914, "learning_rate": 2.9761904761904763e-06, "loss": 47.6631, "step": 602 }, { "epoch": 14.35820895522388, "grad_norm": 19.159008026123047, "learning_rate": 2.9642857142857146e-06, "loss": 48.0291, "step": 603 }, { "epoch": 14.382089552238806, "grad_norm": 18.66881561279297, "learning_rate": 2.9523809523809525e-06, "loss": 46.4582, "step": 604 }, { "epoch": 14.405970149253731, "grad_norm": 19.129064559936523, "learning_rate": 2.9404761904761908e-06, "loss": 49.4455, "step": 605 }, { "epoch": 14.429850746268656, "grad_norm": NaN, "learning_rate": 2.928571428571429e-06, "loss": 78.6564, "step": 606 }, { "epoch": 14.453731343283582, "grad_norm": 18.47364044189453, "learning_rate": 2.928571428571429e-06, "loss": 48.1748, "step": 607 }, { "epoch": 14.477611940298507, "grad_norm": 17.920883178710938, "learning_rate": 2.916666666666667e-06, "loss": 47.6447, "step": 608 }, { "epoch": 14.501492537313434, "grad_norm": 18.263038635253906, "learning_rate": 2.9047619047619053e-06, "loss": 48.7324, "step": 609 }, { "epoch": 14.525373134328358, "grad_norm": 24.323266983032227, "learning_rate": 2.892857142857143e-06, "loss": 48.6135, "step": 610 }, { "epoch": 14.549253731343283, "grad_norm": 21.56492042541504, "learning_rate": 2.880952380952381e-06, "loss": 47.0007, "step": 611 }, { "epoch": 14.57313432835821, "grad_norm": 17.741748809814453, "learning_rate": 2.869047619047619e-06, "loss": 46.3136, "step": 612 }, { "epoch": 14.597014925373134, "grad_norm": 17.218914031982422, "learning_rate": 2.8571428571428573e-06, "loss": 47.6417, "step": 613 }, { "epoch": 14.620895522388059, "grad_norm": 22.856996536254883, "learning_rate": 2.8452380952380956e-06, "loss": 47.6898, "step": 614 }, { "epoch": 14.644776119402986, "grad_norm": NaN, "learning_rate": 2.8333333333333335e-06, "loss": 53.529, "step": 615 }, { "epoch": 14.66865671641791, "grad_norm": 23.29751968383789, "learning_rate": 2.8333333333333335e-06, "loss": 48.164, "step": 616 }, { "epoch": 14.692537313432837, "grad_norm": 15.633321762084961, "learning_rate": 2.8214285714285718e-06, "loss": 46.9866, "step": 617 }, { "epoch": 14.716417910447761, "grad_norm": 21.713376998901367, "learning_rate": 2.8095238095238096e-06, "loss": 48.2856, "step": 618 }, { "epoch": 14.740298507462686, "grad_norm": 17.07369613647461, "learning_rate": 2.797619047619048e-06, "loss": 46.4404, "step": 619 }, { "epoch": 14.764179104477613, "grad_norm": 14.855449676513672, "learning_rate": 2.785714285714286e-06, "loss": 48.2668, "step": 620 }, { "epoch": 14.788059701492537, "grad_norm": 16.479616165161133, "learning_rate": 2.773809523809524e-06, "loss": 49.2661, "step": 621 }, { "epoch": 14.811940298507462, "grad_norm": 14.471490859985352, "learning_rate": 2.7619047619047625e-06, "loss": 47.0484, "step": 622 }, { "epoch": 14.835820895522389, "grad_norm": 19.018714904785156, "learning_rate": 2.7500000000000004e-06, "loss": 49.2253, "step": 623 }, { "epoch": 14.859701492537313, "grad_norm": 16.21799087524414, "learning_rate": 2.7380952380952387e-06, "loss": 49.0738, "step": 624 }, { "epoch": 14.883582089552238, "grad_norm": 20.86383628845215, "learning_rate": 2.726190476190476e-06, "loss": 48.4231, "step": 625 }, { "epoch": 14.907462686567165, "grad_norm": 20.60930633544922, "learning_rate": 2.7142857142857144e-06, "loss": 47.7464, "step": 626 }, { "epoch": 14.93134328358209, "grad_norm": 20.909135818481445, "learning_rate": 2.7023809523809523e-06, "loss": 48.519, "step": 627 }, { "epoch": 14.955223880597014, "grad_norm": 18.555694580078125, "learning_rate": 2.6904761904761906e-06, "loss": 48.1625, "step": 628 }, { "epoch": 14.97910447761194, "grad_norm": 18.154813766479492, "learning_rate": 2.6785714285714285e-06, "loss": 48.9444, "step": 629 }, { "epoch": 15.0, "grad_norm": 19.308523178100586, "learning_rate": 2.666666666666667e-06, "loss": 42.1936, "step": 630 }, { "epoch": 15.023880597014925, "grad_norm": 22.725357055664062, "learning_rate": 2.654761904761905e-06, "loss": 49.5597, "step": 631 }, { "epoch": 15.047761194029851, "grad_norm": 18.862451553344727, "learning_rate": 2.642857142857143e-06, "loss": 46.914, "step": 632 }, { "epoch": 15.071641791044776, "grad_norm": 19.017065048217773, "learning_rate": 2.6309523809523813e-06, "loss": 47.7233, "step": 633 }, { "epoch": 15.0955223880597, "grad_norm": 19.03627586364746, "learning_rate": 2.6190476190476192e-06, "loss": 48.0406, "step": 634 }, { "epoch": 15.119402985074627, "grad_norm": 18.53116798400879, "learning_rate": 2.6071428571428575e-06, "loss": 47.3259, "step": 635 }, { "epoch": 15.143283582089552, "grad_norm": 19.265275955200195, "learning_rate": 2.595238095238096e-06, "loss": 47.2465, "step": 636 }, { "epoch": 15.167164179104478, "grad_norm": 19.497289657592773, "learning_rate": 2.5833333333333337e-06, "loss": 48.5984, "step": 637 }, { "epoch": 15.191044776119403, "grad_norm": 20.183780670166016, "learning_rate": 2.571428571428571e-06, "loss": 46.6221, "step": 638 }, { "epoch": 15.214925373134328, "grad_norm": 22.911672592163086, "learning_rate": 2.5595238095238095e-06, "loss": 48.0178, "step": 639 }, { "epoch": 15.238805970149254, "grad_norm": 20.678709030151367, "learning_rate": 2.547619047619048e-06, "loss": 47.0322, "step": 640 }, { "epoch": 15.26268656716418, "grad_norm": 18.579042434692383, "learning_rate": 2.5357142857142857e-06, "loss": 48.1428, "step": 641 }, { "epoch": 15.286567164179104, "grad_norm": 23.61576271057129, "learning_rate": 2.523809523809524e-06, "loss": 48.444, "step": 642 }, { "epoch": 15.31044776119403, "grad_norm": 19.602746963500977, "learning_rate": 2.511904761904762e-06, "loss": 48.2582, "step": 643 }, { "epoch": 15.334328358208955, "grad_norm": 12.509607315063477, "learning_rate": 2.5e-06, "loss": 48.8531, "step": 644 }, { "epoch": 15.35820895522388, "grad_norm": 18.749767303466797, "learning_rate": 2.4880952380952385e-06, "loss": 47.6453, "step": 645 }, { "epoch": 15.382089552238806, "grad_norm": 20.612041473388672, "learning_rate": 2.4761904761904764e-06, "loss": 48.7038, "step": 646 }, { "epoch": 15.405970149253731, "grad_norm": 18.65719985961914, "learning_rate": 2.4642857142857147e-06, "loss": 47.7954, "step": 647 }, { "epoch": 15.429850746268656, "grad_norm": 22.636686325073242, "learning_rate": 2.4523809523809526e-06, "loss": 48.1164, "step": 648 }, { "epoch": 15.453731343283582, "grad_norm": 20.93446922302246, "learning_rate": 2.4404761904761905e-06, "loss": 48.5955, "step": 649 }, { "epoch": 15.477611940298507, "grad_norm": 20.77125358581543, "learning_rate": 2.428571428571429e-06, "loss": 48.4369, "step": 650 }, { "epoch": 15.501492537313434, "grad_norm": 17.003498077392578, "learning_rate": 2.4166666666666667e-06, "loss": 49.0355, "step": 651 }, { "epoch": 15.525373134328358, "grad_norm": 20.743436813354492, "learning_rate": 2.404761904761905e-06, "loss": 47.8368, "step": 652 }, { "epoch": 15.549253731343283, "grad_norm": NaN, "learning_rate": 2.3928571428571433e-06, "loss": 41.6371, "step": 653 }, { "epoch": 15.57313432835821, "grad_norm": 21.716781616210938, "learning_rate": 2.3928571428571433e-06, "loss": 48.5806, "step": 654 }, { "epoch": 15.597014925373134, "grad_norm": 18.8812198638916, "learning_rate": 2.380952380952381e-06, "loss": 49.0707, "step": 655 }, { "epoch": 15.620895522388059, "grad_norm": 22.305049896240234, "learning_rate": 2.369047619047619e-06, "loss": 47.7556, "step": 656 }, { "epoch": 15.644776119402986, "grad_norm": 20.51401710510254, "learning_rate": 2.3571428571428574e-06, "loss": 48.1588, "step": 657 }, { "epoch": 15.66865671641791, "grad_norm": 17.691770553588867, "learning_rate": 2.3452380952380953e-06, "loss": 47.5187, "step": 658 }, { "epoch": 15.692537313432837, "grad_norm": 22.343585968017578, "learning_rate": 2.3333333333333336e-06, "loss": 47.5725, "step": 659 }, { "epoch": 15.716417910447761, "grad_norm": 21.656587600708008, "learning_rate": 2.321428571428572e-06, "loss": 47.6903, "step": 660 }, { "epoch": 15.740298507462686, "grad_norm": 20.632055282592773, "learning_rate": 2.3095238095238098e-06, "loss": 47.4526, "step": 661 }, { "epoch": 15.764179104477613, "grad_norm": 22.324811935424805, "learning_rate": 2.2976190476190477e-06, "loss": 47.3316, "step": 662 }, { "epoch": 15.788059701492537, "grad_norm": 19.320737838745117, "learning_rate": 2.285714285714286e-06, "loss": 48.2315, "step": 663 }, { "epoch": 15.811940298507462, "grad_norm": 18.58050537109375, "learning_rate": 2.273809523809524e-06, "loss": 47.9147, "step": 664 }, { "epoch": 15.835820895522389, "grad_norm": 20.37384796142578, "learning_rate": 2.261904761904762e-06, "loss": 47.5874, "step": 665 }, { "epoch": 15.859701492537313, "grad_norm": 20.893856048583984, "learning_rate": 2.25e-06, "loss": 49.225, "step": 666 }, { "epoch": 15.883582089552238, "grad_norm": 18.4589786529541, "learning_rate": 2.2380952380952384e-06, "loss": 47.3042, "step": 667 }, { "epoch": 15.907462686567165, "grad_norm": 20.845996856689453, "learning_rate": 2.2261904761904763e-06, "loss": 47.3255, "step": 668 }, { "epoch": 15.93134328358209, "grad_norm": 20.149137496948242, "learning_rate": 2.2142857142857146e-06, "loss": 48.6543, "step": 669 }, { "epoch": 15.955223880597014, "grad_norm": 14.768882751464844, "learning_rate": 2.2023809523809525e-06, "loss": 46.8274, "step": 670 }, { "epoch": 15.97910447761194, "grad_norm": 26.926074981689453, "learning_rate": 2.1904761904761908e-06, "loss": 48.2035, "step": 671 }, { "epoch": 16.0, "grad_norm": 22.840618133544922, "learning_rate": 2.1785714285714286e-06, "loss": 42.8242, "step": 672 }, { "epoch": 16.023880597014927, "grad_norm": 16.183008193969727, "learning_rate": 2.166666666666667e-06, "loss": 47.8309, "step": 673 }, { "epoch": 16.04776119402985, "grad_norm": 20.603744506835938, "learning_rate": 2.154761904761905e-06, "loss": 48.5197, "step": 674 }, { "epoch": 16.071641791044776, "grad_norm": 26.492107391357422, "learning_rate": 2.1428571428571427e-06, "loss": 47.2312, "step": 675 }, { "epoch": 16.095522388059702, "grad_norm": 19.786901473999023, "learning_rate": 2.130952380952381e-06, "loss": 49.6201, "step": 676 }, { "epoch": 16.119402985074625, "grad_norm": 18.150909423828125, "learning_rate": 2.1190476190476194e-06, "loss": 48.7407, "step": 677 }, { "epoch": 16.143283582089552, "grad_norm": 18.797983169555664, "learning_rate": 2.1071428571428572e-06, "loss": 47.0801, "step": 678 }, { "epoch": 16.16716417910448, "grad_norm": 16.088953018188477, "learning_rate": 2.0952380952380955e-06, "loss": 47.6509, "step": 679 }, { "epoch": 16.1910447761194, "grad_norm": 20.359085083007812, "learning_rate": 2.0833333333333334e-06, "loss": 48.9226, "step": 680 }, { "epoch": 16.214925373134328, "grad_norm": 21.99265480041504, "learning_rate": 2.0714285714285717e-06, "loss": 47.3775, "step": 681 }, { "epoch": 16.238805970149254, "grad_norm": 18.616743087768555, "learning_rate": 2.0595238095238096e-06, "loss": 45.8448, "step": 682 }, { "epoch": 16.262686567164177, "grad_norm": 19.6337947845459, "learning_rate": 2.047619047619048e-06, "loss": 48.2077, "step": 683 }, { "epoch": 16.286567164179104, "grad_norm": 23.881439208984375, "learning_rate": 2.035714285714286e-06, "loss": 48.6796, "step": 684 }, { "epoch": 16.31044776119403, "grad_norm": 19.665023803710938, "learning_rate": 2.023809523809524e-06, "loss": 48.7275, "step": 685 }, { "epoch": 16.334328358208957, "grad_norm": 18.438793182373047, "learning_rate": 2.011904761904762e-06, "loss": 49.585, "step": 686 }, { "epoch": 16.35820895522388, "grad_norm": 17.073816299438477, "learning_rate": 2.0000000000000003e-06, "loss": 47.4548, "step": 687 }, { "epoch": 16.382089552238806, "grad_norm": 20.504276275634766, "learning_rate": 1.9880952380952382e-06, "loss": 47.3555, "step": 688 }, { "epoch": 16.405970149253733, "grad_norm": 21.564546585083008, "learning_rate": 1.976190476190476e-06, "loss": 47.6304, "step": 689 }, { "epoch": 16.429850746268656, "grad_norm": 16.773197174072266, "learning_rate": 1.9642857142857144e-06, "loss": 49.078, "step": 690 }, { "epoch": 16.453731343283582, "grad_norm": 22.77934455871582, "learning_rate": 1.9523809523809527e-06, "loss": 47.8289, "step": 691 }, { "epoch": 16.47761194029851, "grad_norm": 17.375993728637695, "learning_rate": 1.9404761904761906e-06, "loss": 48.4812, "step": 692 }, { "epoch": 16.501492537313432, "grad_norm": 21.407329559326172, "learning_rate": 1.928571428571429e-06, "loss": 48.2934, "step": 693 }, { "epoch": 16.52537313432836, "grad_norm": 15.673316955566406, "learning_rate": 1.916666666666667e-06, "loss": 46.7304, "step": 694 }, { "epoch": 16.549253731343285, "grad_norm": 24.577089309692383, "learning_rate": 1.904761904761905e-06, "loss": 47.9352, "step": 695 }, { "epoch": 16.573134328358208, "grad_norm": 24.46076774597168, "learning_rate": 1.892857142857143e-06, "loss": 48.8173, "step": 696 }, { "epoch": 16.597014925373134, "grad_norm": 14.248388290405273, "learning_rate": 1.880952380952381e-06, "loss": 48.5858, "step": 697 }, { "epoch": 16.62089552238806, "grad_norm": 16.925329208374023, "learning_rate": 1.8690476190476192e-06, "loss": 47.8278, "step": 698 }, { "epoch": 16.644776119402984, "grad_norm": 25.52614402770996, "learning_rate": 1.8571428571428573e-06, "loss": 48.1248, "step": 699 }, { "epoch": 16.66865671641791, "grad_norm": 21.011341094970703, "learning_rate": 1.8452380952380954e-06, "loss": 47.8154, "step": 700 }, { "epoch": 16.692537313432837, "grad_norm": 14.694896697998047, "learning_rate": 1.8333333333333333e-06, "loss": 47.9668, "step": 701 }, { "epoch": 16.71641791044776, "grad_norm": 22.32903480529785, "learning_rate": 1.8214285714285716e-06, "loss": 48.6784, "step": 702 }, { "epoch": 16.740298507462686, "grad_norm": 17.19482421875, "learning_rate": 1.8095238095238097e-06, "loss": 46.9973, "step": 703 }, { "epoch": 16.764179104477613, "grad_norm": 14.590733528137207, "learning_rate": 1.7976190476190478e-06, "loss": 47.2393, "step": 704 }, { "epoch": 16.788059701492536, "grad_norm": 17.131982803344727, "learning_rate": 1.7857142857142859e-06, "loss": 47.9412, "step": 705 }, { "epoch": 16.811940298507462, "grad_norm": 18.513992309570312, "learning_rate": 1.773809523809524e-06, "loss": 48.8777, "step": 706 }, { "epoch": 16.83582089552239, "grad_norm": 17.625539779663086, "learning_rate": 1.761904761904762e-06, "loss": 48.3885, "step": 707 }, { "epoch": 16.85970149253731, "grad_norm": 16.540056228637695, "learning_rate": 1.75e-06, "loss": 47.8561, "step": 708 }, { "epoch": 16.883582089552238, "grad_norm": 20.070533752441406, "learning_rate": 1.738095238095238e-06, "loss": 46.6418, "step": 709 }, { "epoch": 16.907462686567165, "grad_norm": 18.742460250854492, "learning_rate": 1.7261904761904764e-06, "loss": 46.7471, "step": 710 }, { "epoch": 16.93134328358209, "grad_norm": 17.491954803466797, "learning_rate": 1.7142857142857145e-06, "loss": 47.5558, "step": 711 }, { "epoch": 16.955223880597014, "grad_norm": 17.457130432128906, "learning_rate": 1.7023809523809526e-06, "loss": 47.4441, "step": 712 }, { "epoch": 16.97910447761194, "grad_norm": 21.053844451904297, "learning_rate": 1.6904761904761907e-06, "loss": 48.1931, "step": 713 }, { "epoch": 17.0, "grad_norm": 16.943801879882812, "learning_rate": 1.6785714285714286e-06, "loss": 41.9934, "step": 714 }, { "epoch": 17.023880597014927, "grad_norm": 21.56785011291504, "learning_rate": 1.6666666666666667e-06, "loss": 47.1652, "step": 715 }, { "epoch": 17.04776119402985, "grad_norm": 21.193382263183594, "learning_rate": 1.6547619047619048e-06, "loss": 47.6751, "step": 716 }, { "epoch": 17.071641791044776, "grad_norm": 16.245115280151367, "learning_rate": 1.642857142857143e-06, "loss": 47.4133, "step": 717 }, { "epoch": 17.095522388059702, "grad_norm": 18.834646224975586, "learning_rate": 1.6309523809523812e-06, "loss": 48.145, "step": 718 }, { "epoch": 17.119402985074625, "grad_norm": 15.769698143005371, "learning_rate": 1.6190476190476193e-06, "loss": 48.1181, "step": 719 }, { "epoch": 17.143283582089552, "grad_norm": 13.460511207580566, "learning_rate": 1.6071428571428574e-06, "loss": 49.1229, "step": 720 }, { "epoch": 17.16716417910448, "grad_norm": 18.58087158203125, "learning_rate": 1.5952380952380953e-06, "loss": 47.5095, "step": 721 }, { "epoch": 17.1910447761194, "grad_norm": 18.607332229614258, "learning_rate": 1.5833333333333333e-06, "loss": 49.1334, "step": 722 }, { "epoch": 17.214925373134328, "grad_norm": 15.046488761901855, "learning_rate": 1.5714285714285714e-06, "loss": 47.6151, "step": 723 }, { "epoch": 17.238805970149254, "grad_norm": 17.442358016967773, "learning_rate": 1.5595238095238098e-06, "loss": 47.3771, "step": 724 }, { "epoch": 17.262686567164177, "grad_norm": 11.690101623535156, "learning_rate": 1.5476190476190479e-06, "loss": 48.1095, "step": 725 }, { "epoch": 17.286567164179104, "grad_norm": 17.945192337036133, "learning_rate": 1.535714285714286e-06, "loss": 47.8941, "step": 726 }, { "epoch": 17.31044776119403, "grad_norm": 13.878116607666016, "learning_rate": 1.523809523809524e-06, "loss": 47.6422, "step": 727 }, { "epoch": 17.334328358208957, "grad_norm": 15.942928314208984, "learning_rate": 1.511904761904762e-06, "loss": 46.9964, "step": 728 }, { "epoch": 17.35820895522388, "grad_norm": 13.57482624053955, "learning_rate": 1.5e-06, "loss": 47.1832, "step": 729 }, { "epoch": 17.382089552238806, "grad_norm": 13.781617164611816, "learning_rate": 1.4880952380952381e-06, "loss": 48.9621, "step": 730 }, { "epoch": 17.405970149253733, "grad_norm": 14.26857852935791, "learning_rate": 1.4761904761904762e-06, "loss": 48.6631, "step": 731 }, { "epoch": 17.429850746268656, "grad_norm": 16.23444938659668, "learning_rate": 1.4642857142857145e-06, "loss": 46.84, "step": 732 }, { "epoch": 17.453731343283582, "grad_norm": 17.442630767822266, "learning_rate": 1.4523809523809526e-06, "loss": 48.2996, "step": 733 }, { "epoch": 17.47761194029851, "grad_norm": 14.329082489013672, "learning_rate": 1.4404761904761905e-06, "loss": 47.396, "step": 734 }, { "epoch": 17.501492537313432, "grad_norm": 14.772257804870605, "learning_rate": 1.4285714285714286e-06, "loss": 48.5733, "step": 735 }, { "epoch": 17.52537313432836, "grad_norm": 14.331324577331543, "learning_rate": 1.4166666666666667e-06, "loss": 48.2969, "step": 736 }, { "epoch": 17.549253731343285, "grad_norm": 17.498600006103516, "learning_rate": 1.4047619047619048e-06, "loss": 48.0221, "step": 737 }, { "epoch": 17.573134328358208, "grad_norm": 16.155025482177734, "learning_rate": 1.392857142857143e-06, "loss": 47.9848, "step": 738 }, { "epoch": 17.597014925373134, "grad_norm": 15.552813529968262, "learning_rate": 1.3809523809523812e-06, "loss": 48.4413, "step": 739 }, { "epoch": 17.62089552238806, "grad_norm": 15.887310981750488, "learning_rate": 1.3690476190476193e-06, "loss": 47.6463, "step": 740 }, { "epoch": 17.644776119402984, "grad_norm": 17.783411026000977, "learning_rate": 1.3571428571428572e-06, "loss": 47.8009, "step": 741 }, { "epoch": 17.66865671641791, "grad_norm": 17.108932495117188, "learning_rate": 1.3452380952380953e-06, "loss": 47.9888, "step": 742 }, { "epoch": 17.692537313432837, "grad_norm": 19.79203224182129, "learning_rate": 1.3333333333333334e-06, "loss": 48.5732, "step": 743 }, { "epoch": 17.71641791044776, "grad_norm": 17.06324005126953, "learning_rate": 1.3214285714285715e-06, "loss": 48.4815, "step": 744 }, { "epoch": 17.740298507462686, "grad_norm": 17.399097442626953, "learning_rate": 1.3095238095238096e-06, "loss": 47.5591, "step": 745 }, { "epoch": 17.764179104477613, "grad_norm": 15.836935997009277, "learning_rate": 1.297619047619048e-06, "loss": 47.994, "step": 746 }, { "epoch": 17.788059701492536, "grad_norm": 18.20856475830078, "learning_rate": 1.2857142857142856e-06, "loss": 47.9979, "step": 747 }, { "epoch": 17.811940298507462, "grad_norm": 19.10239601135254, "learning_rate": 1.273809523809524e-06, "loss": 48.1196, "step": 748 }, { "epoch": 17.83582089552239, "grad_norm": 17.21087646484375, "learning_rate": 1.261904761904762e-06, "loss": 47.8816, "step": 749 }, { "epoch": 17.85970149253731, "grad_norm": 14.792268753051758, "learning_rate": 1.25e-06, "loss": 47.8182, "step": 750 }, { "epoch": 17.883582089552238, "grad_norm": 13.695488929748535, "learning_rate": 1.2380952380952382e-06, "loss": 47.4298, "step": 751 }, { "epoch": 17.907462686567165, "grad_norm": 15.197646141052246, "learning_rate": 1.2261904761904763e-06, "loss": 47.7132, "step": 752 }, { "epoch": 17.93134328358209, "grad_norm": 19.13431739807129, "learning_rate": 1.2142857142857144e-06, "loss": 48.431, "step": 753 }, { "epoch": 17.955223880597014, "grad_norm": 15.690411567687988, "learning_rate": 1.2023809523809525e-06, "loss": 47.4529, "step": 754 }, { "epoch": 17.97910447761194, "grad_norm": 14.75414752960205, "learning_rate": 1.1904761904761906e-06, "loss": 47.9668, "step": 755 }, { "epoch": 18.0, "grad_norm": 11.497115135192871, "learning_rate": 1.1785714285714287e-06, "loss": 41.8653, "step": 756 }, { "epoch": 18.023880597014927, "grad_norm": 16.20159339904785, "learning_rate": 1.1666666666666668e-06, "loss": 47.2871, "step": 757 }, { "epoch": 18.04776119402985, "grad_norm": 15.400497436523438, "learning_rate": 1.1547619047619049e-06, "loss": 46.3673, "step": 758 }, { "epoch": 18.071641791044776, "grad_norm": 12.16773509979248, "learning_rate": 1.142857142857143e-06, "loss": 47.7463, "step": 759 }, { "epoch": 18.095522388059702, "grad_norm": 19.978351593017578, "learning_rate": 1.130952380952381e-06, "loss": 47.4632, "step": 760 }, { "epoch": 18.119402985074625, "grad_norm": 14.090561866760254, "learning_rate": 1.1190476190476192e-06, "loss": 48.9356, "step": 761 }, { "epoch": 18.143283582089552, "grad_norm": 13.143173217773438, "learning_rate": 1.1071428571428573e-06, "loss": 48.1129, "step": 762 }, { "epoch": 18.16716417910448, "grad_norm": 15.609000205993652, "learning_rate": 1.0952380952380954e-06, "loss": 48.8554, "step": 763 }, { "epoch": 18.1910447761194, "grad_norm": 14.012611389160156, "learning_rate": 1.0833333333333335e-06, "loss": 47.6785, "step": 764 }, { "epoch": 18.214925373134328, "grad_norm": 13.417494773864746, "learning_rate": 1.0714285714285714e-06, "loss": 48.2733, "step": 765 }, { "epoch": 18.238805970149254, "grad_norm": 15.562864303588867, "learning_rate": 1.0595238095238097e-06, "loss": 48.0488, "step": 766 }, { "epoch": 18.262686567164177, "grad_norm": 17.083723068237305, "learning_rate": 1.0476190476190478e-06, "loss": 49.3136, "step": 767 }, { "epoch": 18.286567164179104, "grad_norm": 16.564395904541016, "learning_rate": 1.0357142857142859e-06, "loss": 48.1835, "step": 768 }, { "epoch": 18.31044776119403, "grad_norm": 14.694929122924805, "learning_rate": 1.023809523809524e-06, "loss": 48.8632, "step": 769 }, { "epoch": 18.334328358208957, "grad_norm": 13.4928560256958, "learning_rate": 1.011904761904762e-06, "loss": 47.3333, "step": 770 }, { "epoch": 18.35820895522388, "grad_norm": 12.980204582214355, "learning_rate": 1.0000000000000002e-06, "loss": 47.7961, "step": 771 }, { "epoch": 18.382089552238806, "grad_norm": 14.19666862487793, "learning_rate": 9.88095238095238e-07, "loss": 47.7806, "step": 772 }, { "epoch": 18.405970149253733, "grad_norm": 13.2017183303833, "learning_rate": 9.761904761904764e-07, "loss": 46.1119, "step": 773 }, { "epoch": 18.429850746268656, "grad_norm": 15.064650535583496, "learning_rate": 9.642857142857145e-07, "loss": 47.8105, "step": 774 }, { "epoch": 18.453731343283582, "grad_norm": 14.713834762573242, "learning_rate": 9.523809523809525e-07, "loss": 47.5723, "step": 775 }, { "epoch": 18.47761194029851, "grad_norm": 13.394201278686523, "learning_rate": 9.404761904761906e-07, "loss": 48.1632, "step": 776 }, { "epoch": 18.501492537313432, "grad_norm": 10.742532730102539, "learning_rate": 9.285714285714287e-07, "loss": 48.6118, "step": 777 }, { "epoch": 18.52537313432836, "grad_norm": 12.402650833129883, "learning_rate": 9.166666666666666e-07, "loss": 48.6597, "step": 778 }, { "epoch": 18.549253731343285, "grad_norm": 15.73616886138916, "learning_rate": 9.047619047619048e-07, "loss": 47.9931, "step": 779 }, { "epoch": 18.573134328358208, "grad_norm": 14.188780784606934, "learning_rate": 8.928571428571429e-07, "loss": 47.0081, "step": 780 }, { "epoch": 18.597014925373134, "grad_norm": 12.516701698303223, "learning_rate": 8.80952380952381e-07, "loss": 49.0287, "step": 781 }, { "epoch": 18.62089552238806, "grad_norm": 15.069429397583008, "learning_rate": 8.69047619047619e-07, "loss": 47.8721, "step": 782 }, { "epoch": 18.644776119402984, "grad_norm": 13.091047286987305, "learning_rate": 8.571428571428572e-07, "loss": 48.1678, "step": 783 }, { "epoch": 18.66865671641791, "grad_norm": 15.017065048217773, "learning_rate": 8.452380952380953e-07, "loss": 47.1277, "step": 784 }, { "epoch": 18.692537313432837, "grad_norm": 12.091531753540039, "learning_rate": 8.333333333333333e-07, "loss": 47.4962, "step": 785 }, { "epoch": 18.71641791044776, "grad_norm": 15.20182991027832, "learning_rate": 8.214285714285715e-07, "loss": 48.6224, "step": 786 }, { "epoch": 18.740298507462686, "grad_norm": 11.17827320098877, "learning_rate": 8.095238095238096e-07, "loss": 47.9759, "step": 787 }, { "epoch": 18.764179104477613, "grad_norm": 14.884525299072266, "learning_rate": 7.976190476190476e-07, "loss": 47.9749, "step": 788 }, { "epoch": 18.788059701492536, "grad_norm": 14.360984802246094, "learning_rate": 7.857142857142857e-07, "loss": 48.9952, "step": 789 }, { "epoch": 18.811940298507462, "grad_norm": 11.265621185302734, "learning_rate": 7.738095238095239e-07, "loss": 47.4274, "step": 790 }, { "epoch": 18.83582089552239, "grad_norm": 18.072290420532227, "learning_rate": 7.61904761904762e-07, "loss": 47.8815, "step": 791 }, { "epoch": 18.85970149253731, "grad_norm": 15.310029029846191, "learning_rate": 7.5e-07, "loss": 47.3907, "step": 792 }, { "epoch": 18.883582089552238, "grad_norm": 14.032752990722656, "learning_rate": 7.380952380952381e-07, "loss": 48.0883, "step": 793 }, { "epoch": 18.907462686567165, "grad_norm": 12.853668212890625, "learning_rate": 7.261904761904763e-07, "loss": 47.1452, "step": 794 }, { "epoch": 18.93134328358209, "grad_norm": 15.6067476272583, "learning_rate": 7.142857142857143e-07, "loss": 46.6303, "step": 795 }, { "epoch": 18.955223880597014, "grad_norm": 12.828201293945312, "learning_rate": 7.023809523809524e-07, "loss": 47.9885, "step": 796 }, { "epoch": 18.97910447761194, "grad_norm": 13.336589813232422, "learning_rate": 6.904761904761906e-07, "loss": 48.2315, "step": 797 }, { "epoch": 19.0, "grad_norm": 13.629434585571289, "learning_rate": 6.785714285714286e-07, "loss": 41.9374, "step": 798 }, { "epoch": 19.023880597014927, "grad_norm": 13.237930297851562, "learning_rate": 6.666666666666667e-07, "loss": 46.6802, "step": 799 }, { "epoch": 19.04776119402985, "grad_norm": 13.715863227844238, "learning_rate": 6.547619047619048e-07, "loss": 49.0494, "step": 800 }, { "epoch": 19.071641791044776, "grad_norm": 13.439970016479492, "learning_rate": 6.428571428571428e-07, "loss": 46.3647, "step": 801 }, { "epoch": 19.095522388059702, "grad_norm": 15.468942642211914, "learning_rate": 6.30952380952381e-07, "loss": 48.4725, "step": 802 }, { "epoch": 19.119402985074625, "grad_norm": 14.160257339477539, "learning_rate": 6.190476190476191e-07, "loss": 47.4033, "step": 803 }, { "epoch": 19.143283582089552, "grad_norm": 13.667155265808105, "learning_rate": 6.071428571428572e-07, "loss": 48.4729, "step": 804 }, { "epoch": 19.16716417910448, "grad_norm": 12.428313255310059, "learning_rate": 5.952380952380953e-07, "loss": 48.8939, "step": 805 }, { "epoch": 19.1910447761194, "grad_norm": 12.985882759094238, "learning_rate": 5.833333333333334e-07, "loss": 47.0663, "step": 806 }, { "epoch": 19.214925373134328, "grad_norm": 12.827404975891113, "learning_rate": 5.714285714285715e-07, "loss": 47.5614, "step": 807 }, { "epoch": 19.238805970149254, "grad_norm": 11.078653335571289, "learning_rate": 5.595238095238096e-07, "loss": 48.564, "step": 808 }, { "epoch": 19.262686567164177, "grad_norm": 13.346016883850098, "learning_rate": 5.476190476190477e-07, "loss": 48.0823, "step": 809 }, { "epoch": 19.286567164179104, "grad_norm": 14.523963928222656, "learning_rate": 5.357142857142857e-07, "loss": 48.4225, "step": 810 }, { "epoch": 19.31044776119403, "grad_norm": 12.598445892333984, "learning_rate": 5.238095238095239e-07, "loss": 47.2514, "step": 811 }, { "epoch": 19.334328358208957, "grad_norm": 12.203497886657715, "learning_rate": 5.11904761904762e-07, "loss": 47.217, "step": 812 }, { "epoch": 19.35820895522388, "grad_norm": 12.144754409790039, "learning_rate": 5.000000000000001e-07, "loss": 47.191, "step": 813 }, { "epoch": 19.382089552238806, "grad_norm": 12.585047721862793, "learning_rate": 4.880952380952382e-07, "loss": 48.4947, "step": 814 }, { "epoch": 19.405970149253733, "grad_norm": 11.295561790466309, "learning_rate": 4.7619047619047623e-07, "loss": 46.9444, "step": 815 }, { "epoch": 19.429850746268656, "grad_norm": 13.055256843566895, "learning_rate": 4.642857142857143e-07, "loss": 48.4469, "step": 816 }, { "epoch": 19.453731343283582, "grad_norm": 12.051807403564453, "learning_rate": 4.523809523809524e-07, "loss": 48.0547, "step": 817 }, { "epoch": 19.47761194029851, "grad_norm": 13.44185733795166, "learning_rate": 4.404761904761905e-07, "loss": 48.3155, "step": 818 }, { "epoch": 19.501492537313432, "grad_norm": 12.405723571777344, "learning_rate": 4.285714285714286e-07, "loss": 48.3982, "step": 819 }, { "epoch": 19.52537313432836, "grad_norm": 14.900402069091797, "learning_rate": 4.1666666666666667e-07, "loss": 48.2653, "step": 820 }, { "epoch": 19.549253731343285, "grad_norm": 10.70801067352295, "learning_rate": 4.047619047619048e-07, "loss": 48.0384, "step": 821 }, { "epoch": 19.573134328358208, "grad_norm": 12.318074226379395, "learning_rate": 3.9285714285714286e-07, "loss": 47.554, "step": 822 }, { "epoch": 19.597014925373134, "grad_norm": 12.898431777954102, "learning_rate": 3.80952380952381e-07, "loss": 48.3586, "step": 823 }, { "epoch": 19.62089552238806, "grad_norm": 15.45779800415039, "learning_rate": 3.6904761904761906e-07, "loss": 48.4193, "step": 824 }, { "epoch": 19.644776119402984, "grad_norm": 11.230570793151855, "learning_rate": 3.5714285714285716e-07, "loss": 48.5294, "step": 825 }, { "epoch": 19.66865671641791, "grad_norm": 13.647272109985352, "learning_rate": 3.452380952380953e-07, "loss": 47.2569, "step": 826 }, { "epoch": 19.692537313432837, "grad_norm": 11.521178245544434, "learning_rate": 3.3333333333333335e-07, "loss": 47.2899, "step": 827 }, { "epoch": 19.71641791044776, "grad_norm": 11.537907600402832, "learning_rate": 3.214285714285714e-07, "loss": 46.6462, "step": 828 }, { "epoch": 19.740298507462686, "grad_norm": 11.670267105102539, "learning_rate": 3.0952380952380955e-07, "loss": 47.9797, "step": 829 }, { "epoch": 19.764179104477613, "grad_norm": 11.660557746887207, "learning_rate": 2.9761904761904765e-07, "loss": 47.9744, "step": 830 }, { "epoch": 19.788059701492536, "grad_norm": 12.332269668579102, "learning_rate": 2.8571428571428575e-07, "loss": 48.6015, "step": 831 }, { "epoch": 19.811940298507462, "grad_norm": 12.228848457336426, "learning_rate": 2.7380952380952385e-07, "loss": 47.3215, "step": 832 }, { "epoch": 19.83582089552239, "grad_norm": 13.780754089355469, "learning_rate": 2.6190476190476194e-07, "loss": 48.853, "step": 833 }, { "epoch": 19.85970149253731, "grad_norm": 11.639240264892578, "learning_rate": 2.5000000000000004e-07, "loss": 48.8199, "step": 834 }, { "epoch": 19.883582089552238, "grad_norm": 10.796862602233887, "learning_rate": 2.3809523809523811e-07, "loss": 47.5373, "step": 835 }, { "epoch": 19.907462686567165, "grad_norm": 13.573180198669434, "learning_rate": 2.261904761904762e-07, "loss": 47.8368, "step": 836 }, { "epoch": 19.93134328358209, "grad_norm": 11.497776985168457, "learning_rate": 2.142857142857143e-07, "loss": 47.8226, "step": 837 }, { "epoch": 19.955223880597014, "grad_norm": 10.777889251708984, "learning_rate": 2.023809523809524e-07, "loss": 47.6424, "step": 838 }, { "epoch": 19.97910447761194, "grad_norm": 10.77852725982666, "learning_rate": 1.904761904761905e-07, "loss": 46.379, "step": 839 }, { "epoch": 20.0, "grad_norm": 13.582564353942871, "learning_rate": 1.7857142857142858e-07, "loss": 42.5239, "step": 840 }, { "epoch": 20.0, "step": 840, "total_flos": 4.130470305428237e+16, "train_loss": 49.47331008002872, "train_runtime": 26137.3223, "train_samples_per_second": 4.095, "train_steps_per_second": 0.032 }, { "epoch": 20.023880597014927, "grad_norm": 21.18770408630371, "learning_rate": 1e-05, "loss": 48.1474, "step": 841 }, { "epoch": 20.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 60.2758, "step": 842 }, { "epoch": 20.071641791044776, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 61.7211, "step": 843 }, { "epoch": 20.095522388059702, "grad_norm": 504.4407958984375, "learning_rate": 9.99404761904762e-06, "loss": 60.8189, "step": 844 }, { "epoch": 20.119402985074625, "grad_norm": 221.96849060058594, "learning_rate": 9.988095238095239e-06, "loss": 54.4658, "step": 845 }, { "epoch": 20.143283582089552, "grad_norm": 110.4036865234375, "learning_rate": 9.982142857142858e-06, "loss": 52.4242, "step": 846 }, { "epoch": 20.16716417910448, "grad_norm": 82.75493621826172, "learning_rate": 9.976190476190477e-06, "loss": 50.3129, "step": 847 }, { "epoch": 20.1910447761194, "grad_norm": 62.56040573120117, "learning_rate": 9.970238095238096e-06, "loss": 49.7171, "step": 848 }, { "epoch": 20.214925373134328, "grad_norm": 70.04007720947266, "learning_rate": 9.964285714285714e-06, "loss": 48.185, "step": 849 }, { "epoch": 20.238805970149254, "grad_norm": 56.70342254638672, "learning_rate": 9.958333333333334e-06, "loss": 49.5787, "step": 850 }, { "epoch": 20.262686567164177, "grad_norm": 64.66405487060547, "learning_rate": 9.952380952380954e-06, "loss": 49.6106, "step": 851 }, { "epoch": 20.286567164179104, "grad_norm": 43.37612533569336, "learning_rate": 9.946428571428572e-06, "loss": 49.2966, "step": 852 }, { "epoch": 20.31044776119403, "grad_norm": 42.66206359863281, "learning_rate": 9.940476190476192e-06, "loss": 48.7073, "step": 853 }, { "epoch": 20.334328358208957, "grad_norm": 37.17741775512695, "learning_rate": 9.93452380952381e-06, "loss": 48.7592, "step": 854 }, { "epoch": 20.35820895522388, "grad_norm": 39.27332305908203, "learning_rate": 9.92857142857143e-06, "loss": 48.1181, "step": 855 }, { "epoch": 20.382089552238806, "grad_norm": 31.37261390686035, "learning_rate": 9.922619047619048e-06, "loss": 47.4873, "step": 856 }, { "epoch": 20.405970149253733, "grad_norm": 41.693809509277344, "learning_rate": 9.916666666666668e-06, "loss": 48.9428, "step": 857 }, { "epoch": 20.429850746268656, "grad_norm": 29.33939552307129, "learning_rate": 9.910714285714288e-06, "loss": 49.2928, "step": 858 }, { "epoch": 20.453731343283582, "grad_norm": 30.606157302856445, "learning_rate": 9.904761904761906e-06, "loss": 49.1506, "step": 859 }, { "epoch": 20.47761194029851, "grad_norm": 27.273784637451172, "learning_rate": 9.898809523809525e-06, "loss": 46.6136, "step": 860 }, { "epoch": 20.501492537313432, "grad_norm": 24.410682678222656, "learning_rate": 9.892857142857143e-06, "loss": 48.3989, "step": 861 }, { "epoch": 20.52537313432836, "grad_norm": 24.138607025146484, "learning_rate": 9.886904761904763e-06, "loss": 49.3858, "step": 862 }, { "epoch": 20.549253731343285, "grad_norm": 27.50669288635254, "learning_rate": 9.880952380952381e-06, "loss": 48.5058, "step": 863 }, { "epoch": 20.573134328358208, "grad_norm": 27.739347457885742, "learning_rate": 9.875000000000001e-06, "loss": 49.676, "step": 864 }, { "epoch": 20.597014925373134, "grad_norm": 22.63895034790039, "learning_rate": 9.869047619047621e-06, "loss": 47.6998, "step": 865 }, { "epoch": 20.62089552238806, "grad_norm": 26.80891990661621, "learning_rate": 9.863095238095239e-06, "loss": 47.9571, "step": 866 }, { "epoch": 20.644776119402984, "grad_norm": 26.259008407592773, "learning_rate": 9.857142857142859e-06, "loss": 48.8771, "step": 867 }, { "epoch": 20.66865671641791, "grad_norm": 23.716773986816406, "learning_rate": 9.851190476190477e-06, "loss": 47.1255, "step": 868 }, { "epoch": 20.692537313432837, "grad_norm": 26.96156120300293, "learning_rate": 9.845238095238097e-06, "loss": 47.2227, "step": 869 }, { "epoch": 20.71641791044776, "grad_norm": 25.1954345703125, "learning_rate": 9.839285714285715e-06, "loss": 47.6847, "step": 870 }, { "epoch": 20.740298507462686, "grad_norm": 21.56642723083496, "learning_rate": 9.833333333333333e-06, "loss": 47.7292, "step": 871 }, { "epoch": 20.764179104477613, "grad_norm": 25.091773986816406, "learning_rate": 9.827380952380953e-06, "loss": 46.6588, "step": 872 }, { "epoch": 20.788059701492536, "grad_norm": 26.45799446105957, "learning_rate": 9.821428571428573e-06, "loss": 47.3963, "step": 873 }, { "epoch": 20.811940298507462, "grad_norm": 25.865068435668945, "learning_rate": 9.81547619047619e-06, "loss": 48.8823, "step": 874 }, { "epoch": 20.83582089552239, "grad_norm": 27.056106567382812, "learning_rate": 9.80952380952381e-06, "loss": 47.2222, "step": 875 }, { "epoch": 20.85970149253731, "grad_norm": 27.02417755126953, "learning_rate": 9.803571428571428e-06, "loss": 47.4543, "step": 876 }, { "epoch": 20.883582089552238, "grad_norm": 23.681915283203125, "learning_rate": 9.797619047619048e-06, "loss": 47.7518, "step": 877 }, { "epoch": 20.907462686567165, "grad_norm": 20.77193260192871, "learning_rate": 9.791666666666666e-06, "loss": 46.3164, "step": 878 }, { "epoch": 20.93134328358209, "grad_norm": 24.61642837524414, "learning_rate": 9.785714285714286e-06, "loss": 48.6711, "step": 879 }, { "epoch": 20.955223880597014, "grad_norm": 20.59898567199707, "learning_rate": 9.779761904761906e-06, "loss": 49.114, "step": 880 }, { "epoch": 20.97910447761194, "grad_norm": 24.815736770629883, "learning_rate": 9.773809523809524e-06, "loss": 48.4315, "step": 881 }, { "epoch": 21.0, "grad_norm": 17.920352935791016, "learning_rate": 9.767857142857144e-06, "loss": 41.3634, "step": 882 }, { "epoch": 21.023880597014927, "grad_norm": 26.69571876525879, "learning_rate": 9.761904761904762e-06, "loss": 47.8968, "step": 883 }, { "epoch": 21.04776119402985, "grad_norm": 23.156524658203125, "learning_rate": 9.755952380952382e-06, "loss": 48.5914, "step": 884 }, { "epoch": 21.071641791044776, "grad_norm": 21.612483978271484, "learning_rate": 9.75e-06, "loss": 47.8711, "step": 885 }, { "epoch": 21.095522388059702, "grad_norm": 24.346399307250977, "learning_rate": 9.74404761904762e-06, "loss": 48.8689, "step": 886 }, { "epoch": 21.119402985074625, "grad_norm": 21.973896026611328, "learning_rate": 9.73809523809524e-06, "loss": 46.7465, "step": 887 }, { "epoch": 21.143283582089552, "grad_norm": 20.034557342529297, "learning_rate": 9.732142857142858e-06, "loss": 47.1505, "step": 888 }, { "epoch": 21.16716417910448, "grad_norm": 20.113008499145508, "learning_rate": 9.726190476190477e-06, "loss": 47.9234, "step": 889 }, { "epoch": 21.1910447761194, "grad_norm": 24.743249893188477, "learning_rate": 9.720238095238095e-06, "loss": 47.4432, "step": 890 }, { "epoch": 21.214925373134328, "grad_norm": 25.538530349731445, "learning_rate": 9.714285714285715e-06, "loss": 47.0015, "step": 891 }, { "epoch": 21.238805970149254, "grad_norm": 27.43077278137207, "learning_rate": 9.708333333333333e-06, "loss": 48.6757, "step": 892 }, { "epoch": 21.262686567164177, "grad_norm": 25.34470558166504, "learning_rate": 9.702380952380953e-06, "loss": 46.8118, "step": 893 }, { "epoch": 21.286567164179104, "grad_norm": 29.590490341186523, "learning_rate": 9.696428571428573e-06, "loss": 47.5079, "step": 894 }, { "epoch": 21.31044776119403, "grad_norm": 16.418222427368164, "learning_rate": 9.690476190476191e-06, "loss": 48.7679, "step": 895 }, { "epoch": 21.334328358208957, "grad_norm": 30.906719207763672, "learning_rate": 9.68452380952381e-06, "loss": 48.4926, "step": 896 }, { "epoch": 21.35820895522388, "grad_norm": 30.252347946166992, "learning_rate": 9.678571428571429e-06, "loss": 48.9318, "step": 897 }, { "epoch": 21.382089552238806, "grad_norm": 30.137592315673828, "learning_rate": 9.672619047619049e-06, "loss": 47.0388, "step": 898 }, { "epoch": 21.405970149253733, "grad_norm": 25.297151565551758, "learning_rate": 9.666666666666667e-06, "loss": 47.334, "step": 899 }, { "epoch": 21.429850746268656, "grad_norm": 31.72736358642578, "learning_rate": 9.660714285714287e-06, "loss": 47.8769, "step": 900 }, { "epoch": 21.453731343283582, "grad_norm": 24.4852294921875, "learning_rate": 9.654761904761906e-06, "loss": 47.4009, "step": 901 }, { "epoch": 21.47761194029851, "grad_norm": 31.223567962646484, "learning_rate": 9.648809523809524e-06, "loss": 48.4972, "step": 902 }, { "epoch": 21.501492537313432, "grad_norm": 24.1851806640625, "learning_rate": 9.642857142857144e-06, "loss": 46.1818, "step": 903 }, { "epoch": 21.52537313432836, "grad_norm": NaN, "learning_rate": 9.636904761904762e-06, "loss": 70.1176, "step": 904 }, { "epoch": 21.549253731343285, "grad_norm": 29.140161514282227, "learning_rate": 9.636904761904762e-06, "loss": 47.1614, "step": 905 }, { "epoch": 21.573134328358208, "grad_norm": 31.186546325683594, "learning_rate": 9.630952380952382e-06, "loss": 47.3643, "step": 906 }, { "epoch": 21.597014925373134, "grad_norm": 24.395353317260742, "learning_rate": 9.625e-06, "loss": 48.3591, "step": 907 }, { "epoch": 21.62089552238806, "grad_norm": 29.287492752075195, "learning_rate": 9.61904761904762e-06, "loss": 47.261, "step": 908 }, { "epoch": 21.644776119402984, "grad_norm": 26.76996612548828, "learning_rate": 9.61309523809524e-06, "loss": 48.7017, "step": 909 }, { "epoch": 21.66865671641791, "grad_norm": 29.820920944213867, "learning_rate": 9.607142857142858e-06, "loss": 48.5165, "step": 910 }, { "epoch": 21.692537313432837, "grad_norm": 30.011823654174805, "learning_rate": 9.601190476190478e-06, "loss": 46.5558, "step": 911 }, { "epoch": 21.71641791044776, "grad_norm": 32.796905517578125, "learning_rate": 9.595238095238096e-06, "loss": 47.276, "step": 912 }, { "epoch": 21.740298507462686, "grad_norm": 28.798233032226562, "learning_rate": 9.589285714285716e-06, "loss": 47.6033, "step": 913 }, { "epoch": 21.764179104477613, "grad_norm": 31.51072120666504, "learning_rate": 9.583333333333335e-06, "loss": 48.1236, "step": 914 }, { "epoch": 21.788059701492536, "grad_norm": 20.611305236816406, "learning_rate": 9.577380952380953e-06, "loss": 48.2839, "step": 915 }, { "epoch": 21.811940298507462, "grad_norm": 26.748571395874023, "learning_rate": 9.571428571428573e-06, "loss": 48.2225, "step": 916 }, { "epoch": 21.83582089552239, "grad_norm": 22.262859344482422, "learning_rate": 9.565476190476191e-06, "loss": 46.661, "step": 917 }, { "epoch": 21.85970149253731, "grad_norm": 34.15045166015625, "learning_rate": 9.559523809523811e-06, "loss": 47.3229, "step": 918 }, { "epoch": 21.883582089552238, "grad_norm": 24.26387596130371, "learning_rate": 9.55357142857143e-06, "loss": 47.4686, "step": 919 }, { "epoch": 21.907462686567165, "grad_norm": 29.463472366333008, "learning_rate": 9.547619047619049e-06, "loss": 47.6019, "step": 920 }, { "epoch": 21.93134328358209, "grad_norm": 31.184497833251953, "learning_rate": 9.541666666666669e-06, "loss": 47.3228, "step": 921 }, { "epoch": 21.955223880597014, "grad_norm": 26.506031036376953, "learning_rate": 9.535714285714287e-06, "loss": 47.9961, "step": 922 }, { "epoch": 21.97910447761194, "grad_norm": 30.547340393066406, "learning_rate": 9.529761904761905e-06, "loss": 47.8973, "step": 923 }, { "epoch": 22.0, "grad_norm": 22.91999053955078, "learning_rate": 9.523809523809525e-06, "loss": 41.3426, "step": 924 }, { "epoch": 22.023880597014927, "grad_norm": 28.242450714111328, "learning_rate": 9.517857142857143e-06, "loss": 47.2478, "step": 925 }, { "epoch": 22.04776119402985, "grad_norm": 33.07649612426758, "learning_rate": 9.511904761904763e-06, "loss": 47.6489, "step": 926 }, { "epoch": 22.071641791044776, "grad_norm": 28.14696502685547, "learning_rate": 9.50595238095238e-06, "loss": 46.521, "step": 927 }, { "epoch": 22.095522388059702, "grad_norm": 34.472206115722656, "learning_rate": 9.5e-06, "loss": 47.6476, "step": 928 }, { "epoch": 22.119402985074625, "grad_norm": 25.370718002319336, "learning_rate": 9.494047619047619e-06, "loss": 47.7215, "step": 929 }, { "epoch": 22.143283582089552, "grad_norm": 31.77129554748535, "learning_rate": 9.488095238095238e-06, "loss": 46.5566, "step": 930 }, { "epoch": 22.16716417910448, "grad_norm": 25.42667579650879, "learning_rate": 9.482142857142858e-06, "loss": 47.9832, "step": 931 }, { "epoch": 22.1910447761194, "grad_norm": 26.3134765625, "learning_rate": 9.476190476190476e-06, "loss": 47.9402, "step": 932 }, { "epoch": 22.214925373134328, "grad_norm": 31.683523178100586, "learning_rate": 9.470238095238096e-06, "loss": 47.4404, "step": 933 }, { "epoch": 22.238805970149254, "grad_norm": 31.90761375427246, "learning_rate": 9.464285714285714e-06, "loss": 47.7601, "step": 934 }, { "epoch": 22.262686567164177, "grad_norm": 24.635921478271484, "learning_rate": 9.458333333333334e-06, "loss": 46.2573, "step": 935 }, { "epoch": 22.286567164179104, "grad_norm": 25.32915496826172, "learning_rate": 9.452380952380952e-06, "loss": 48.4756, "step": 936 }, { "epoch": 22.31044776119403, "grad_norm": 28.117773056030273, "learning_rate": 9.446428571428572e-06, "loss": 48.6971, "step": 937 }, { "epoch": 22.334328358208957, "grad_norm": 22.504152297973633, "learning_rate": 9.440476190476192e-06, "loss": 47.4534, "step": 938 }, { "epoch": 22.35820895522388, "grad_norm": 31.765676498413086, "learning_rate": 9.43452380952381e-06, "loss": 48.0168, "step": 939 }, { "epoch": 22.382089552238806, "grad_norm": 27.647945404052734, "learning_rate": 9.42857142857143e-06, "loss": 48.0918, "step": 940 }, { "epoch": 22.405970149253733, "grad_norm": 33.35643005371094, "learning_rate": 9.422619047619048e-06, "loss": 48.295, "step": 941 }, { "epoch": 22.429850746268656, "grad_norm": 26.12603187561035, "learning_rate": 9.416666666666667e-06, "loss": 48.8921, "step": 942 }, { "epoch": 22.453731343283582, "grad_norm": 23.728809356689453, "learning_rate": 9.410714285714286e-06, "loss": 47.3206, "step": 943 }, { "epoch": 22.47761194029851, "grad_norm": 28.772401809692383, "learning_rate": 9.404761904761905e-06, "loss": 47.6536, "step": 944 }, { "epoch": 22.501492537313432, "grad_norm": 28.205202102661133, "learning_rate": 9.398809523809525e-06, "loss": 47.1952, "step": 945 }, { "epoch": 22.52537313432836, "grad_norm": 33.80730438232422, "learning_rate": 9.392857142857143e-06, "loss": 47.1336, "step": 946 }, { "epoch": 22.549253731343285, "grad_norm": 25.538846969604492, "learning_rate": 9.386904761904763e-06, "loss": 46.4229, "step": 947 }, { "epoch": 22.573134328358208, "grad_norm": 41.13503646850586, "learning_rate": 9.380952380952381e-06, "loss": 46.8325, "step": 948 }, { "epoch": 22.597014925373134, "grad_norm": 36.823001861572266, "learning_rate": 9.375000000000001e-06, "loss": 47.205, "step": 949 }, { "epoch": 22.62089552238806, "grad_norm": 29.992229461669922, "learning_rate": 9.36904761904762e-06, "loss": 46.683, "step": 950 }, { "epoch": 22.644776119402984, "grad_norm": 40.20172882080078, "learning_rate": 9.363095238095239e-06, "loss": 48.4859, "step": 951 }, { "epoch": 22.66865671641791, "grad_norm": 27.357097625732422, "learning_rate": 9.357142857142859e-06, "loss": 47.2987, "step": 952 }, { "epoch": 22.692537313432837, "grad_norm": 40.66689682006836, "learning_rate": 9.351190476190477e-06, "loss": 46.3579, "step": 953 }, { "epoch": 22.71641791044776, "grad_norm": 35.37788391113281, "learning_rate": 9.345238095238096e-06, "loss": 47.3369, "step": 954 }, { "epoch": 22.740298507462686, "grad_norm": 36.279151916503906, "learning_rate": 9.339285714285715e-06, "loss": 47.1137, "step": 955 }, { "epoch": 22.764179104477613, "grad_norm": 27.949628829956055, "learning_rate": 9.333333333333334e-06, "loss": 47.1438, "step": 956 }, { "epoch": 22.788059701492536, "grad_norm": 45.424556732177734, "learning_rate": 9.327380952380954e-06, "loss": 48.3171, "step": 957 }, { "epoch": 22.811940298507462, "grad_norm": 27.726537704467773, "learning_rate": 9.321428571428572e-06, "loss": 47.1718, "step": 958 }, { "epoch": 22.83582089552239, "grad_norm": 58.36731719970703, "learning_rate": 9.315476190476192e-06, "loss": 47.5895, "step": 959 }, { "epoch": 22.85970149253731, "grad_norm": 58.96028137207031, "learning_rate": 9.30952380952381e-06, "loss": 47.4109, "step": 960 }, { "epoch": 22.883582089552238, "grad_norm": 24.928117752075195, "learning_rate": 9.30357142857143e-06, "loss": 48.1841, "step": 961 }, { "epoch": 22.907462686567165, "grad_norm": 38.36846160888672, "learning_rate": 9.297619047619048e-06, "loss": 47.7438, "step": 962 }, { "epoch": 22.93134328358209, "grad_norm": 37.60481643676758, "learning_rate": 9.291666666666668e-06, "loss": 46.5067, "step": 963 }, { "epoch": 22.955223880597014, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 78.3124, "step": 964 }, { "epoch": 22.97910447761194, "grad_norm": 28.587425231933594, "learning_rate": 9.285714285714288e-06, "loss": 47.1599, "step": 965 }, { "epoch": 23.0, "grad_norm": 41.493404388427734, "learning_rate": 9.279761904761906e-06, "loss": 41.2983, "step": 966 }, { "epoch": 23.023880597014927, "grad_norm": 41.00606918334961, "learning_rate": 9.273809523809525e-06, "loss": 46.8696, "step": 967 }, { "epoch": 23.04776119402985, "grad_norm": 31.043148040771484, "learning_rate": 9.267857142857144e-06, "loss": 46.4614, "step": 968 }, { "epoch": 23.071641791044776, "grad_norm": 36.815940856933594, "learning_rate": 9.261904761904763e-06, "loss": 47.5987, "step": 969 }, { "epoch": 23.095522388059702, "grad_norm": 35.73536682128906, "learning_rate": 9.255952380952381e-06, "loss": 47.8339, "step": 970 }, { "epoch": 23.119402985074625, "grad_norm": 26.95656967163086, "learning_rate": 9.250000000000001e-06, "loss": 48.0632, "step": 971 }, { "epoch": 23.143283582089552, "grad_norm": 40.408348083496094, "learning_rate": 9.244047619047621e-06, "loss": 47.5458, "step": 972 }, { "epoch": 23.16716417910448, "grad_norm": 41.97018051147461, "learning_rate": 9.238095238095239e-06, "loss": 48.8528, "step": 973 }, { "epoch": 23.1910447761194, "grad_norm": 23.809162139892578, "learning_rate": 9.232142857142859e-06, "loss": 47.663, "step": 974 }, { "epoch": 23.214925373134328, "grad_norm": 36.0232048034668, "learning_rate": 9.226190476190477e-06, "loss": 47.496, "step": 975 }, { "epoch": 23.238805970149254, "grad_norm": 32.06623077392578, "learning_rate": 9.220238095238097e-06, "loss": 47.4472, "step": 976 }, { "epoch": 23.262686567164177, "grad_norm": 30.663307189941406, "learning_rate": 9.214285714285715e-06, "loss": 47.1342, "step": 977 }, { "epoch": 23.286567164179104, "grad_norm": 39.121437072753906, "learning_rate": 9.208333333333333e-06, "loss": 47.9977, "step": 978 }, { "epoch": 23.31044776119403, "grad_norm": 31.75649642944336, "learning_rate": 9.202380952380953e-06, "loss": 49.2196, "step": 979 }, { "epoch": 23.334328358208957, "grad_norm": 50.10381317138672, "learning_rate": 9.196428571428571e-06, "loss": 47.6487, "step": 980 }, { "epoch": 23.35820895522388, "grad_norm": 36.412906646728516, "learning_rate": 9.19047619047619e-06, "loss": 47.0012, "step": 981 }, { "epoch": 23.382089552238806, "grad_norm": 40.47570037841797, "learning_rate": 9.18452380952381e-06, "loss": 45.4449, "step": 982 }, { "epoch": 23.405970149253733, "grad_norm": 43.92324447631836, "learning_rate": 9.178571428571429e-06, "loss": 47.8727, "step": 983 }, { "epoch": 23.429850746268656, "grad_norm": 28.896121978759766, "learning_rate": 9.172619047619048e-06, "loss": 47.8489, "step": 984 }, { "epoch": 23.453731343283582, "grad_norm": 37.02536392211914, "learning_rate": 9.166666666666666e-06, "loss": 48.4484, "step": 985 }, { "epoch": 23.47761194029851, "grad_norm": 26.289518356323242, "learning_rate": 9.160714285714286e-06, "loss": 47.0221, "step": 986 }, { "epoch": 23.501492537313432, "grad_norm": 33.60945129394531, "learning_rate": 9.154761904761906e-06, "loss": 47.7454, "step": 987 }, { "epoch": 23.52537313432836, "grad_norm": 31.284311294555664, "learning_rate": 9.148809523809524e-06, "loss": 47.0558, "step": 988 }, { "epoch": 23.549253731343285, "grad_norm": 30.488906860351562, "learning_rate": 9.142857142857144e-06, "loss": 46.4408, "step": 989 }, { "epoch": 23.573134328358208, "grad_norm": 34.29289245605469, "learning_rate": 9.136904761904762e-06, "loss": 46.2796, "step": 990 }, { "epoch": 23.597014925373134, "grad_norm": 22.803457260131836, "learning_rate": 9.130952380952382e-06, "loss": 47.2684, "step": 991 }, { "epoch": 23.62089552238806, "grad_norm": 33.18730926513672, "learning_rate": 9.125e-06, "loss": 47.5, "step": 992 }, { "epoch": 23.644776119402984, "grad_norm": 28.421592712402344, "learning_rate": 9.11904761904762e-06, "loss": 46.7508, "step": 993 }, { "epoch": 23.66865671641791, "grad_norm": 23.492319107055664, "learning_rate": 9.11309523809524e-06, "loss": 46.6042, "step": 994 }, { "epoch": 23.692537313432837, "grad_norm": 30.10877227783203, "learning_rate": 9.107142857142858e-06, "loss": 46.2632, "step": 995 }, { "epoch": 23.71641791044776, "grad_norm": 23.64444351196289, "learning_rate": 9.101190476190477e-06, "loss": 47.2817, "step": 996 }, { "epoch": 23.740298507462686, "grad_norm": 28.243606567382812, "learning_rate": 9.095238095238095e-06, "loss": 47.1196, "step": 997 }, { "epoch": 23.764179104477613, "grad_norm": 26.84208869934082, "learning_rate": 9.089285714285715e-06, "loss": 46.6631, "step": 998 }, { "epoch": 23.788059701492536, "grad_norm": 29.558794021606445, "learning_rate": 9.083333333333333e-06, "loss": 45.8711, "step": 999 }, { "epoch": 23.811940298507462, "grad_norm": 25.105928421020508, "learning_rate": 9.077380952380953e-06, "loss": 47.8253, "step": 1000 }, { "epoch": 23.83582089552239, "grad_norm": NaN, "learning_rate": 9.071428571428573e-06, "loss": 82.5048, "step": 1001 }, { "epoch": 23.85970149253731, "grad_norm": 25.548643112182617, "learning_rate": 9.071428571428573e-06, "loss": 47.5042, "step": 1002 }, { "epoch": 23.883582089552238, "grad_norm": 28.8011531829834, "learning_rate": 9.065476190476191e-06, "loss": 47.0084, "step": 1003 }, { "epoch": 23.907462686567165, "grad_norm": 31.907651901245117, "learning_rate": 9.05952380952381e-06, "loss": 48.287, "step": 1004 }, { "epoch": 23.93134328358209, "grad_norm": 32.044986724853516, "learning_rate": 9.053571428571429e-06, "loss": 47.276, "step": 1005 }, { "epoch": 23.955223880597014, "grad_norm": 31.224260330200195, "learning_rate": 9.047619047619049e-06, "loss": 47.4774, "step": 1006 }, { "epoch": 23.97910447761194, "grad_norm": 29.830835342407227, "learning_rate": 9.041666666666667e-06, "loss": 47.7031, "step": 1007 }, { "epoch": 24.0, "grad_norm": 25.12934112548828, "learning_rate": 9.035714285714287e-06, "loss": 41.8156, "step": 1008 }, { "epoch": 24.023880597014927, "grad_norm": 31.172348022460938, "learning_rate": 9.029761904761906e-06, "loss": 48.0591, "step": 1009 }, { "epoch": 24.04776119402985, "grad_norm": 26.59412956237793, "learning_rate": 9.023809523809524e-06, "loss": 47.6291, "step": 1010 }, { "epoch": 24.071641791044776, "grad_norm": 29.16905975341797, "learning_rate": 9.017857142857144e-06, "loss": 47.4587, "step": 1011 }, { "epoch": 24.095522388059702, "grad_norm": 33.05836868286133, "learning_rate": 9.011904761904762e-06, "loss": 47.8748, "step": 1012 }, { "epoch": 24.119402985074625, "grad_norm": 26.13016700744629, "learning_rate": 9.005952380952382e-06, "loss": 48.0003, "step": 1013 }, { "epoch": 24.143283582089552, "grad_norm": 29.883411407470703, "learning_rate": 9e-06, "loss": 47.0188, "step": 1014 }, { "epoch": 24.16716417910448, "grad_norm": 29.039255142211914, "learning_rate": 8.99404761904762e-06, "loss": 46.8844, "step": 1015 }, { "epoch": 24.1910447761194, "grad_norm": 26.532760620117188, "learning_rate": 8.98809523809524e-06, "loss": 47.1817, "step": 1016 }, { "epoch": 24.214925373134328, "grad_norm": 30.146087646484375, "learning_rate": 8.982142857142858e-06, "loss": 46.4863, "step": 1017 }, { "epoch": 24.238805970149254, "grad_norm": 27.027935028076172, "learning_rate": 8.976190476190478e-06, "loss": 45.7162, "step": 1018 }, { "epoch": 24.262686567164177, "grad_norm": 27.315515518188477, "learning_rate": 8.970238095238096e-06, "loss": 46.6337, "step": 1019 }, { "epoch": 24.286567164179104, "grad_norm": 25.63303565979004, "learning_rate": 8.964285714285716e-06, "loss": 46.5452, "step": 1020 }, { "epoch": 24.31044776119403, "grad_norm": 22.407268524169922, "learning_rate": 8.958333333333334e-06, "loss": 47.3262, "step": 1021 }, { "epoch": 24.334328358208957, "grad_norm": 19.4051513671875, "learning_rate": 8.952380952380953e-06, "loss": 46.8407, "step": 1022 }, { "epoch": 24.35820895522388, "grad_norm": NaN, "learning_rate": 8.946428571428573e-06, "loss": 77.1735, "step": 1023 }, { "epoch": 24.382089552238806, "grad_norm": 26.870166778564453, "learning_rate": 8.946428571428573e-06, "loss": 46.9395, "step": 1024 }, { "epoch": 24.405970149253733, "grad_norm": 21.54165267944336, "learning_rate": 8.940476190476191e-06, "loss": 47.2505, "step": 1025 }, { "epoch": 24.429850746268656, "grad_norm": 29.317501068115234, "learning_rate": 8.934523809523811e-06, "loss": 48.0554, "step": 1026 }, { "epoch": 24.453731343283582, "grad_norm": 27.45809555053711, "learning_rate": 8.92857142857143e-06, "loss": 47.0495, "step": 1027 }, { "epoch": 24.47761194029851, "grad_norm": NaN, "learning_rate": 8.922619047619049e-06, "loss": 66.51, "step": 1028 }, { "epoch": 24.501492537313432, "grad_norm": 23.169204711914062, "learning_rate": 8.922619047619049e-06, "loss": 47.5902, "step": 1029 }, { "epoch": 24.52537313432836, "grad_norm": 31.986774444580078, "learning_rate": 8.916666666666667e-06, "loss": 47.4281, "step": 1030 }, { "epoch": 24.549253731343285, "grad_norm": 27.15190315246582, "learning_rate": 8.910714285714287e-06, "loss": 46.3638, "step": 1031 }, { "epoch": 24.573134328358208, "grad_norm": 26.88028335571289, "learning_rate": 8.904761904761905e-06, "loss": 45.0491, "step": 1032 }, { "epoch": 24.597014925373134, "grad_norm": 27.693952560424805, "learning_rate": 8.898809523809525e-06, "loss": 47.6471, "step": 1033 }, { "epoch": 24.62089552238806, "grad_norm": 33.45442581176758, "learning_rate": 8.892857142857143e-06, "loss": 47.1459, "step": 1034 }, { "epoch": 24.644776119402984, "grad_norm": 29.933320999145508, "learning_rate": 8.886904761904763e-06, "loss": 46.9218, "step": 1035 }, { "epoch": 24.66865671641791, "grad_norm": 26.401569366455078, "learning_rate": 8.88095238095238e-06, "loss": 47.7027, "step": 1036 }, { "epoch": 24.692537313432837, "grad_norm": 26.92498016357422, "learning_rate": 8.875e-06, "loss": 47.2302, "step": 1037 }, { "epoch": 24.71641791044776, "grad_norm": 28.368043899536133, "learning_rate": 8.869047619047619e-06, "loss": 47.4479, "step": 1038 }, { "epoch": 24.740298507462686, "grad_norm": 27.319650650024414, "learning_rate": 8.863095238095238e-06, "loss": 47.4652, "step": 1039 }, { "epoch": 24.764179104477613, "grad_norm": 37.10929870605469, "learning_rate": 8.857142857142858e-06, "loss": 47.3543, "step": 1040 }, { "epoch": 24.788059701492536, "grad_norm": 32.430416107177734, "learning_rate": 8.851190476190476e-06, "loss": 46.1406, "step": 1041 }, { "epoch": 24.811940298507462, "grad_norm": 33.29399108886719, "learning_rate": 8.845238095238096e-06, "loss": 47.1917, "step": 1042 }, { "epoch": 24.83582089552239, "grad_norm": 32.72507095336914, "learning_rate": 8.839285714285714e-06, "loss": 47.677, "step": 1043 }, { "epoch": 24.85970149253731, "grad_norm": 25.997148513793945, "learning_rate": 8.833333333333334e-06, "loss": 47.6851, "step": 1044 }, { "epoch": 24.883582089552238, "grad_norm": 33.00047302246094, "learning_rate": 8.827380952380952e-06, "loss": 47.5326, "step": 1045 }, { "epoch": 24.907462686567165, "grad_norm": 33.95719528198242, "learning_rate": 8.821428571428572e-06, "loss": 47.2836, "step": 1046 }, { "epoch": 24.93134328358209, "grad_norm": 31.353008270263672, "learning_rate": 8.815476190476192e-06, "loss": 47.8318, "step": 1047 }, { "epoch": 24.955223880597014, "grad_norm": 27.4250545501709, "learning_rate": 8.80952380952381e-06, "loss": 47.0066, "step": 1048 }, { "epoch": 24.97910447761194, "grad_norm": 30.22010612487793, "learning_rate": 8.80357142857143e-06, "loss": 46.6991, "step": 1049 }, { "epoch": 25.0, "grad_norm": 19.693180084228516, "learning_rate": 8.797619047619048e-06, "loss": 41.6055, "step": 1050 }, { "epoch": 25.023880597014927, "grad_norm": 24.590662002563477, "learning_rate": 8.791666666666667e-06, "loss": 46.3536, "step": 1051 }, { "epoch": 25.04776119402985, "grad_norm": 28.37199592590332, "learning_rate": 8.785714285714286e-06, "loss": 47.8334, "step": 1052 }, { "epoch": 25.071641791044776, "grad_norm": 26.38755226135254, "learning_rate": 8.779761904761905e-06, "loss": 47.6486, "step": 1053 }, { "epoch": 25.095522388059702, "grad_norm": 27.338485717773438, "learning_rate": 8.773809523809525e-06, "loss": 47.3044, "step": 1054 }, { "epoch": 25.119402985074625, "grad_norm": 25.308486938476562, "learning_rate": 8.767857142857143e-06, "loss": 45.6873, "step": 1055 }, { "epoch": 25.143283582089552, "grad_norm": 30.886962890625, "learning_rate": 8.761904761904763e-06, "loss": 46.8938, "step": 1056 }, { "epoch": 25.16716417910448, "grad_norm": 25.25688934326172, "learning_rate": 8.755952380952381e-06, "loss": 47.4858, "step": 1057 }, { "epoch": 25.1910447761194, "grad_norm": 30.462963104248047, "learning_rate": 8.750000000000001e-06, "loss": 46.6334, "step": 1058 }, { "epoch": 25.214925373134328, "grad_norm": 22.87471580505371, "learning_rate": 8.744047619047619e-06, "loss": 46.0966, "step": 1059 }, { "epoch": 25.238805970149254, "grad_norm": 23.413904190063477, "learning_rate": 8.738095238095239e-06, "loss": 46.8938, "step": 1060 }, { "epoch": 25.262686567164177, "grad_norm": 26.926279067993164, "learning_rate": 8.732142857142859e-06, "loss": 46.3773, "step": 1061 }, { "epoch": 25.286567164179104, "grad_norm": 27.595348358154297, "learning_rate": 8.726190476190477e-06, "loss": 48.0235, "step": 1062 }, { "epoch": 25.31044776119403, "grad_norm": 26.124523162841797, "learning_rate": 8.720238095238096e-06, "loss": 46.6863, "step": 1063 }, { "epoch": 25.334328358208957, "grad_norm": 28.308120727539062, "learning_rate": 8.714285714285715e-06, "loss": 47.7158, "step": 1064 }, { "epoch": 25.35820895522388, "grad_norm": 23.434846878051758, "learning_rate": 8.708333333333334e-06, "loss": 47.2951, "step": 1065 }, { "epoch": 25.382089552238806, "grad_norm": 26.917911529541016, "learning_rate": 8.702380952380952e-06, "loss": 45.7266, "step": 1066 }, { "epoch": 25.405970149253733, "grad_norm": 19.7725772857666, "learning_rate": 8.696428571428572e-06, "loss": 46.5458, "step": 1067 }, { "epoch": 25.429850746268656, "grad_norm": 27.18629264831543, "learning_rate": 8.690476190476192e-06, "loss": 46.3133, "step": 1068 }, { "epoch": 25.453731343283582, "grad_norm": 26.112865447998047, "learning_rate": 8.68452380952381e-06, "loss": 46.6383, "step": 1069 }, { "epoch": 25.47761194029851, "grad_norm": 19.385990142822266, "learning_rate": 8.67857142857143e-06, "loss": 46.5541, "step": 1070 }, { "epoch": 25.501492537313432, "grad_norm": 26.713350296020508, "learning_rate": 8.672619047619048e-06, "loss": 48.045, "step": 1071 }, { "epoch": 25.52537313432836, "grad_norm": 29.80147933959961, "learning_rate": 8.666666666666668e-06, "loss": 47.4443, "step": 1072 }, { "epoch": 25.549253731343285, "grad_norm": 23.674266815185547, "learning_rate": 8.660714285714286e-06, "loss": 46.6662, "step": 1073 }, { "epoch": 25.573134328358208, "grad_norm": 46.435401916503906, "learning_rate": 8.654761904761906e-06, "loss": 46.9276, "step": 1074 }, { "epoch": 25.597014925373134, "grad_norm": 35.016502380371094, "learning_rate": 8.648809523809526e-06, "loss": 47.6811, "step": 1075 }, { "epoch": 25.62089552238806, "grad_norm": 42.57990646362305, "learning_rate": 8.642857142857144e-06, "loss": 46.5684, "step": 1076 }, { "epoch": 25.644776119402984, "grad_norm": 36.2376708984375, "learning_rate": 8.636904761904763e-06, "loss": 46.1807, "step": 1077 }, { "epoch": 25.66865671641791, "grad_norm": 41.44023895263672, "learning_rate": 8.630952380952381e-06, "loss": 46.0823, "step": 1078 }, { "epoch": 25.692537313432837, "grad_norm": 43.62863540649414, "learning_rate": 8.625000000000001e-06, "loss": 47.9958, "step": 1079 }, { "epoch": 25.71641791044776, "grad_norm": 34.232120513916016, "learning_rate": 8.61904761904762e-06, "loss": 47.9585, "step": 1080 }, { "epoch": 25.740298507462686, "grad_norm": 38.023197174072266, "learning_rate": 8.61309523809524e-06, "loss": 47.7344, "step": 1081 }, { "epoch": 25.764179104477613, "grad_norm": 37.89833068847656, "learning_rate": 8.607142857142859e-06, "loss": 47.8956, "step": 1082 }, { "epoch": 25.788059701492536, "grad_norm": 33.03269958496094, "learning_rate": 8.601190476190477e-06, "loss": 47.7408, "step": 1083 }, { "epoch": 25.811940298507462, "grad_norm": 44.31171798706055, "learning_rate": 8.595238095238097e-06, "loss": 47.6232, "step": 1084 }, { "epoch": 25.83582089552239, "grad_norm": 42.54961395263672, "learning_rate": 8.589285714285715e-06, "loss": 47.9757, "step": 1085 }, { "epoch": 25.85970149253731, "grad_norm": 27.695526123046875, "learning_rate": 8.583333333333333e-06, "loss": 47.0934, "step": 1086 }, { "epoch": 25.883582089552238, "grad_norm": 32.62801742553711, "learning_rate": 8.577380952380953e-06, "loss": 47.1186, "step": 1087 }, { "epoch": 25.907462686567165, "grad_norm": 26.777305603027344, "learning_rate": 8.571428571428571e-06, "loss": 47.2931, "step": 1088 }, { "epoch": 25.93134328358209, "grad_norm": 24.382678985595703, "learning_rate": 8.56547619047619e-06, "loss": 46.4698, "step": 1089 }, { "epoch": 25.955223880597014, "grad_norm": 34.310150146484375, "learning_rate": 8.55952380952381e-06, "loss": 46.0509, "step": 1090 }, { "epoch": 25.97910447761194, "grad_norm": 27.468976974487305, "learning_rate": 8.553571428571429e-06, "loss": 46.9123, "step": 1091 }, { "epoch": 26.0, "grad_norm": 27.90901756286621, "learning_rate": 8.547619047619048e-06, "loss": 41.8265, "step": 1092 }, { "epoch": 26.023880597014927, "grad_norm": 28.853416442871094, "learning_rate": 8.541666666666666e-06, "loss": 47.9612, "step": 1093 }, { "epoch": 26.04776119402985, "grad_norm": 31.96144676208496, "learning_rate": 8.535714285714286e-06, "loss": 46.8167, "step": 1094 }, { "epoch": 26.071641791044776, "grad_norm": 33.179141998291016, "learning_rate": 8.529761904761904e-06, "loss": 48.0464, "step": 1095 }, { "epoch": 26.095522388059702, "grad_norm": 32.18705368041992, "learning_rate": 8.523809523809524e-06, "loss": 45.6743, "step": 1096 }, { "epoch": 26.119402985074625, "grad_norm": 26.125934600830078, "learning_rate": 8.517857142857144e-06, "loss": 46.4944, "step": 1097 }, { "epoch": 26.143283582089552, "grad_norm": 31.666461944580078, "learning_rate": 8.511904761904762e-06, "loss": 47.6152, "step": 1098 }, { "epoch": 26.16716417910448, "grad_norm": 29.90437889099121, "learning_rate": 8.505952380952382e-06, "loss": 46.4497, "step": 1099 }, { "epoch": 26.1910447761194, "grad_norm": 32.59938430786133, "learning_rate": 8.5e-06, "loss": 47.1877, "step": 1100 }, { "epoch": 26.214925373134328, "grad_norm": 28.368562698364258, "learning_rate": 8.49404761904762e-06, "loss": 46.4898, "step": 1101 }, { "epoch": 26.238805970149254, "grad_norm": 31.274070739746094, "learning_rate": 8.488095238095238e-06, "loss": 47.1507, "step": 1102 }, { "epoch": 26.262686567164177, "grad_norm": 24.63444709777832, "learning_rate": 8.482142857142858e-06, "loss": 47.3659, "step": 1103 }, { "epoch": 26.286567164179104, "grad_norm": 35.413970947265625, "learning_rate": 8.476190476190477e-06, "loss": 46.6459, "step": 1104 }, { "epoch": 26.31044776119403, "grad_norm": 27.774656295776367, "learning_rate": 8.470238095238095e-06, "loss": 47.4369, "step": 1105 }, { "epoch": 26.334328358208957, "grad_norm": 32.258155822753906, "learning_rate": 8.464285714285715e-06, "loss": 47.487, "step": 1106 }, { "epoch": 26.35820895522388, "grad_norm": 18.22418785095215, "learning_rate": 8.458333333333333e-06, "loss": 46.948, "step": 1107 }, { "epoch": 26.382089552238806, "grad_norm": 24.50945472717285, "learning_rate": 8.452380952380953e-06, "loss": 45.7291, "step": 1108 }, { "epoch": 26.405970149253733, "grad_norm": 23.20486831665039, "learning_rate": 8.446428571428571e-06, "loss": 46.8704, "step": 1109 }, { "epoch": 26.429850746268656, "grad_norm": 20.810514450073242, "learning_rate": 8.440476190476191e-06, "loss": 47.3419, "step": 1110 }, { "epoch": 26.453731343283582, "grad_norm": 27.68440818786621, "learning_rate": 8.434523809523811e-06, "loss": 47.0641, "step": 1111 }, { "epoch": 26.47761194029851, "grad_norm": 26.989046096801758, "learning_rate": 8.428571428571429e-06, "loss": 46.9324, "step": 1112 }, { "epoch": 26.501492537313432, "grad_norm": 25.986888885498047, "learning_rate": 8.422619047619049e-06, "loss": 48.6179, "step": 1113 }, { "epoch": 26.52537313432836, "grad_norm": 28.111356735229492, "learning_rate": 8.416666666666667e-06, "loss": 46.4251, "step": 1114 }, { "epoch": 26.549253731343285, "grad_norm": 24.306228637695312, "learning_rate": 8.410714285714287e-06, "loss": 46.4379, "step": 1115 }, { "epoch": 26.573134328358208, "grad_norm": 23.894895553588867, "learning_rate": 8.404761904761905e-06, "loss": 46.665, "step": 1116 }, { "epoch": 26.597014925373134, "grad_norm": 25.917400360107422, "learning_rate": 8.398809523809525e-06, "loss": 46.6619, "step": 1117 }, { "epoch": 26.62089552238806, "grad_norm": 21.423585891723633, "learning_rate": 8.392857142857144e-06, "loss": 46.3447, "step": 1118 }, { "epoch": 26.644776119402984, "grad_norm": 29.13437271118164, "learning_rate": 8.386904761904762e-06, "loss": 46.4292, "step": 1119 }, { "epoch": 26.66865671641791, "grad_norm": 25.711469650268555, "learning_rate": 8.380952380952382e-06, "loss": 46.6156, "step": 1120 }, { "epoch": 26.692537313432837, "grad_norm": 26.55695915222168, "learning_rate": 8.375e-06, "loss": 46.7429, "step": 1121 }, { "epoch": 26.71641791044776, "grad_norm": 27.66262435913086, "learning_rate": 8.36904761904762e-06, "loss": 47.1275, "step": 1122 }, { "epoch": 26.740298507462686, "grad_norm": 33.85395050048828, "learning_rate": 8.36309523809524e-06, "loss": 46.7244, "step": 1123 }, { "epoch": 26.764179104477613, "grad_norm": 29.51833152770996, "learning_rate": 8.357142857142858e-06, "loss": 47.2072, "step": 1124 }, { "epoch": 26.788059701492536, "grad_norm": 26.21416664123535, "learning_rate": 8.351190476190478e-06, "loss": 47.8298, "step": 1125 }, { "epoch": 26.811940298507462, "grad_norm": 31.24039649963379, "learning_rate": 8.345238095238096e-06, "loss": 46.8069, "step": 1126 }, { "epoch": 26.83582089552239, "grad_norm": 32.19520568847656, "learning_rate": 8.339285714285716e-06, "loss": 47.1494, "step": 1127 }, { "epoch": 26.85970149253731, "grad_norm": 29.194063186645508, "learning_rate": 8.333333333333334e-06, "loss": 46.1827, "step": 1128 }, { "epoch": 26.883582089552238, "grad_norm": 28.723541259765625, "learning_rate": 8.327380952380954e-06, "loss": 46.8513, "step": 1129 }, { "epoch": 26.907462686567165, "grad_norm": 26.91135597229004, "learning_rate": 8.321428571428573e-06, "loss": 46.561, "step": 1130 }, { "epoch": 26.93134328358209, "grad_norm": 25.60898208618164, "learning_rate": 8.315476190476191e-06, "loss": 46.4706, "step": 1131 }, { "epoch": 26.955223880597014, "grad_norm": 23.72539520263672, "learning_rate": 8.309523809523811e-06, "loss": 46.7871, "step": 1132 }, { "epoch": 26.97910447761194, "grad_norm": 25.030731201171875, "learning_rate": 8.30357142857143e-06, "loss": 46.2433, "step": 1133 }, { "epoch": 27.0, "grad_norm": 25.439281463623047, "learning_rate": 8.297619047619049e-06, "loss": 40.9101, "step": 1134 }, { "epoch": 27.023880597014927, "grad_norm": 29.52981948852539, "learning_rate": 8.291666666666667e-06, "loss": 46.9385, "step": 1135 }, { "epoch": 27.04776119402985, "grad_norm": 22.007299423217773, "learning_rate": 8.285714285714287e-06, "loss": 47.2751, "step": 1136 }, { "epoch": 27.071641791044776, "grad_norm": 31.965675354003906, "learning_rate": 8.279761904761905e-06, "loss": 47.7763, "step": 1137 }, { "epoch": 27.095522388059702, "grad_norm": 23.38637351989746, "learning_rate": 8.273809523809523e-06, "loss": 46.0412, "step": 1138 }, { "epoch": 27.119402985074625, "grad_norm": 24.295711517333984, "learning_rate": 8.267857142857143e-06, "loss": 46.8552, "step": 1139 }, { "epoch": 27.143283582089552, "grad_norm": 20.915624618530273, "learning_rate": 8.261904761904763e-06, "loss": 46.3041, "step": 1140 }, { "epoch": 27.16716417910448, "grad_norm": 28.25569725036621, "learning_rate": 8.25595238095238e-06, "loss": 45.8736, "step": 1141 }, { "epoch": 27.1910447761194, "grad_norm": 24.8399658203125, "learning_rate": 8.25e-06, "loss": 46.0629, "step": 1142 }, { "epoch": 27.214925373134328, "grad_norm": 21.237272262573242, "learning_rate": 8.244047619047619e-06, "loss": 47.1252, "step": 1143 }, { "epoch": 27.238805970149254, "grad_norm": 24.35887336730957, "learning_rate": 8.238095238095239e-06, "loss": 46.9687, "step": 1144 }, { "epoch": 27.262686567164177, "grad_norm": 26.583545684814453, "learning_rate": 8.232142857142857e-06, "loss": 47.1486, "step": 1145 }, { "epoch": 27.286567164179104, "grad_norm": 23.712989807128906, "learning_rate": 8.226190476190476e-06, "loss": 46.4769, "step": 1146 }, { "epoch": 27.31044776119403, "grad_norm": 18.466094970703125, "learning_rate": 8.220238095238096e-06, "loss": 46.6911, "step": 1147 }, { "epoch": 27.334328358208957, "grad_norm": 31.812236785888672, "learning_rate": 8.214285714285714e-06, "loss": 47.6261, "step": 1148 }, { "epoch": 27.35820895522388, "grad_norm": 23.437780380249023, "learning_rate": 8.208333333333334e-06, "loss": 46.1776, "step": 1149 }, { "epoch": 27.382089552238806, "grad_norm": 27.252187728881836, "learning_rate": 8.202380952380952e-06, "loss": 44.8499, "step": 1150 }, { "epoch": 27.405970149253733, "grad_norm": 24.425500869750977, "learning_rate": 8.196428571428572e-06, "loss": 46.5397, "step": 1151 }, { "epoch": 27.429850746268656, "grad_norm": 28.237712860107422, "learning_rate": 8.190476190476192e-06, "loss": 46.8922, "step": 1152 }, { "epoch": 27.453731343283582, "grad_norm": 23.262300491333008, "learning_rate": 8.18452380952381e-06, "loss": 47.4204, "step": 1153 }, { "epoch": 27.47761194029851, "grad_norm": 20.69318389892578, "learning_rate": 8.17857142857143e-06, "loss": 47.1507, "step": 1154 }, { "epoch": 27.501492537313432, "grad_norm": 30.182701110839844, "learning_rate": 8.172619047619048e-06, "loss": 46.198, "step": 1155 }, { "epoch": 27.52537313432836, "grad_norm": 28.804855346679688, "learning_rate": 8.166666666666668e-06, "loss": 46.1366, "step": 1156 }, { "epoch": 27.549253731343285, "grad_norm": 26.992097854614258, "learning_rate": 8.160714285714286e-06, "loss": 47.3639, "step": 1157 }, { "epoch": 27.573134328358208, "grad_norm": 27.22978401184082, "learning_rate": 8.154761904761905e-06, "loss": 46.7295, "step": 1158 }, { "epoch": 27.597014925373134, "grad_norm": 24.036380767822266, "learning_rate": 8.148809523809525e-06, "loss": 46.7824, "step": 1159 }, { "epoch": 27.62089552238806, "grad_norm": 28.67648696899414, "learning_rate": 8.142857142857143e-06, "loss": 46.9712, "step": 1160 }, { "epoch": 27.644776119402984, "grad_norm": 27.389991760253906, "learning_rate": 8.136904761904763e-06, "loss": 47.6263, "step": 1161 }, { "epoch": 27.66865671641791, "grad_norm": 28.887022018432617, "learning_rate": 8.130952380952381e-06, "loss": 47.7553, "step": 1162 }, { "epoch": 27.692537313432837, "grad_norm": 17.354753494262695, "learning_rate": 8.125000000000001e-06, "loss": 46.1359, "step": 1163 }, { "epoch": 27.71641791044776, "grad_norm": 26.72220230102539, "learning_rate": 8.119047619047619e-06, "loss": 46.3653, "step": 1164 }, { "epoch": 27.740298507462686, "grad_norm": 22.09579849243164, "learning_rate": 8.113095238095239e-06, "loss": 46.856, "step": 1165 }, { "epoch": 27.764179104477613, "grad_norm": 30.197189331054688, "learning_rate": 8.107142857142859e-06, "loss": 46.4435, "step": 1166 }, { "epoch": 27.788059701492536, "grad_norm": 28.597610473632812, "learning_rate": 8.101190476190477e-06, "loss": 48.3097, "step": 1167 }, { "epoch": 27.811940298507462, "grad_norm": 22.391801834106445, "learning_rate": 8.095238095238097e-06, "loss": 47.2598, "step": 1168 }, { "epoch": 27.83582089552239, "grad_norm": 28.523584365844727, "learning_rate": 8.089285714285715e-06, "loss": 46.3123, "step": 1169 }, { "epoch": 27.85970149253731, "grad_norm": 21.646997451782227, "learning_rate": 8.083333333333334e-06, "loss": 46.2853, "step": 1170 }, { "epoch": 27.883582089552238, "grad_norm": 22.68369483947754, "learning_rate": 8.077380952380953e-06, "loss": 46.1355, "step": 1171 }, { "epoch": 27.907462686567165, "grad_norm": 25.581987380981445, "learning_rate": 8.071428571428572e-06, "loss": 45.5431, "step": 1172 }, { "epoch": 27.93134328358209, "grad_norm": 26.512523651123047, "learning_rate": 8.065476190476192e-06, "loss": 47.1898, "step": 1173 }, { "epoch": 27.955223880597014, "grad_norm": 26.89809226989746, "learning_rate": 8.05952380952381e-06, "loss": 46.8108, "step": 1174 }, { "epoch": 27.97910447761194, "grad_norm": 23.638704299926758, "learning_rate": 8.05357142857143e-06, "loss": 47.1454, "step": 1175 }, { "epoch": 28.0, "grad_norm": 28.61042022705078, "learning_rate": 8.047619047619048e-06, "loss": 41.7161, "step": 1176 }, { "epoch": 28.023880597014927, "grad_norm": 31.2153377532959, "learning_rate": 8.041666666666668e-06, "loss": 46.8925, "step": 1177 }, { "epoch": 28.04776119402985, "grad_norm": 26.284482955932617, "learning_rate": 8.035714285714286e-06, "loss": 45.7065, "step": 1178 }, { "epoch": 28.071641791044776, "grad_norm": 30.96581268310547, "learning_rate": 8.029761904761906e-06, "loss": 46.5612, "step": 1179 }, { "epoch": 28.095522388059702, "grad_norm": 24.686336517333984, "learning_rate": 8.023809523809526e-06, "loss": 46.4376, "step": 1180 }, { "epoch": 28.119402985074625, "grad_norm": 27.783416748046875, "learning_rate": 8.017857142857144e-06, "loss": 47.247, "step": 1181 }, { "epoch": 28.143283582089552, "grad_norm": 33.3108024597168, "learning_rate": 8.011904761904763e-06, "loss": 47.3171, "step": 1182 }, { "epoch": 28.16716417910448, "grad_norm": 30.010536193847656, "learning_rate": 8.005952380952382e-06, "loss": 45.9761, "step": 1183 }, { "epoch": 28.1910447761194, "grad_norm": 29.399965286254883, "learning_rate": 8.000000000000001e-06, "loss": 47.3345, "step": 1184 }, { "epoch": 28.214925373134328, "grad_norm": 25.835142135620117, "learning_rate": 7.99404761904762e-06, "loss": 46.3395, "step": 1185 }, { "epoch": 28.238805970149254, "grad_norm": 26.06570053100586, "learning_rate": 7.98809523809524e-06, "loss": 45.842, "step": 1186 }, { "epoch": 28.262686567164177, "grad_norm": 28.64603042602539, "learning_rate": 7.982142857142859e-06, "loss": 46.4802, "step": 1187 }, { "epoch": 28.286567164179104, "grad_norm": 27.157583236694336, "learning_rate": 7.976190476190477e-06, "loss": 45.7376, "step": 1188 }, { "epoch": 28.31044776119403, "grad_norm": 22.073328018188477, "learning_rate": 7.970238095238097e-06, "loss": 47.0787, "step": 1189 }, { "epoch": 28.334328358208957, "grad_norm": 21.545568466186523, "learning_rate": 7.964285714285715e-06, "loss": 45.6691, "step": 1190 }, { "epoch": 28.35820895522388, "grad_norm": 26.17327880859375, "learning_rate": 7.958333333333333e-06, "loss": 46.2058, "step": 1191 }, { "epoch": 28.382089552238806, "grad_norm": 24.443920135498047, "learning_rate": 7.952380952380953e-06, "loss": 45.3531, "step": 1192 }, { "epoch": 28.405970149253733, "grad_norm": 27.207778930664062, "learning_rate": 7.946428571428571e-06, "loss": 46.5519, "step": 1193 }, { "epoch": 28.429850746268656, "grad_norm": 23.15156364440918, "learning_rate": 7.94047619047619e-06, "loss": 46.9284, "step": 1194 }, { "epoch": 28.453731343283582, "grad_norm": 27.96567153930664, "learning_rate": 7.93452380952381e-06, "loss": 46.3696, "step": 1195 }, { "epoch": 28.47761194029851, "grad_norm": 25.828689575195312, "learning_rate": 7.928571428571429e-06, "loss": 46.2933, "step": 1196 }, { "epoch": 28.501492537313432, "grad_norm": 30.69676971435547, "learning_rate": 7.922619047619048e-06, "loss": 46.7471, "step": 1197 }, { "epoch": 28.52537313432836, "grad_norm": 24.977018356323242, "learning_rate": 7.916666666666667e-06, "loss": 47.045, "step": 1198 }, { "epoch": 28.549253731343285, "grad_norm": 26.286821365356445, "learning_rate": 7.910714285714286e-06, "loss": 47.0185, "step": 1199 }, { "epoch": 28.573134328358208, "grad_norm": 25.324783325195312, "learning_rate": 7.904761904761904e-06, "loss": 46.4036, "step": 1200 }, { "epoch": 28.597014925373134, "grad_norm": 34.25847625732422, "learning_rate": 7.898809523809524e-06, "loss": 46.8307, "step": 1201 }, { "epoch": 28.62089552238806, "grad_norm": 24.739521026611328, "learning_rate": 7.892857142857144e-06, "loss": 46.515, "step": 1202 }, { "epoch": 28.644776119402984, "grad_norm": 36.694252014160156, "learning_rate": 7.886904761904762e-06, "loss": 45.7078, "step": 1203 }, { "epoch": 28.66865671641791, "grad_norm": 34.95314025878906, "learning_rate": 7.880952380952382e-06, "loss": 47.6302, "step": 1204 }, { "epoch": 28.692537313432837, "grad_norm": 26.474821090698242, "learning_rate": 7.875e-06, "loss": 47.2158, "step": 1205 }, { "epoch": 28.71641791044776, "grad_norm": 30.19892692565918, "learning_rate": 7.86904761904762e-06, "loss": 46.1515, "step": 1206 }, { "epoch": 28.740298507462686, "grad_norm": 32.16860580444336, "learning_rate": 7.863095238095238e-06, "loss": 46.3963, "step": 1207 }, { "epoch": 28.764179104477613, "grad_norm": 27.323444366455078, "learning_rate": 7.857142857142858e-06, "loss": 47.6511, "step": 1208 }, { "epoch": 28.788059701492536, "grad_norm": 24.073701858520508, "learning_rate": 7.851190476190477e-06, "loss": 46.7283, "step": 1209 }, { "epoch": 28.811940298507462, "grad_norm": 25.73206329345703, "learning_rate": 7.845238095238096e-06, "loss": 47.3957, "step": 1210 }, { "epoch": 28.83582089552239, "grad_norm": 23.368709564208984, "learning_rate": 7.839285714285715e-06, "loss": 46.1844, "step": 1211 }, { "epoch": 28.85970149253731, "grad_norm": 24.563371658325195, "learning_rate": 7.833333333333333e-06, "loss": 47.0752, "step": 1212 }, { "epoch": 28.883582089552238, "grad_norm": 20.747081756591797, "learning_rate": 7.827380952380953e-06, "loss": 45.9425, "step": 1213 }, { "epoch": 28.907462686567165, "grad_norm": 22.27573013305664, "learning_rate": 7.821428571428571e-06, "loss": 46.746, "step": 1214 }, { "epoch": 28.93134328358209, "grad_norm": 23.162179946899414, "learning_rate": 7.815476190476191e-06, "loss": 46.808, "step": 1215 }, { "epoch": 28.955223880597014, "grad_norm": 23.585325241088867, "learning_rate": 7.809523809523811e-06, "loss": 47.2584, "step": 1216 }, { "epoch": 28.97910447761194, "grad_norm": 29.979564666748047, "learning_rate": 7.803571428571429e-06, "loss": 47.3433, "step": 1217 }, { "epoch": 29.0, "grad_norm": 25.872072219848633, "learning_rate": 7.797619047619049e-06, "loss": 41.1844, "step": 1218 }, { "epoch": 29.023880597014927, "grad_norm": 25.673351287841797, "learning_rate": 7.791666666666667e-06, "loss": 46.4546, "step": 1219 }, { "epoch": 29.04776119402985, "grad_norm": 29.831058502197266, "learning_rate": 7.785714285714287e-06, "loss": 46.8122, "step": 1220 }, { "epoch": 29.071641791044776, "grad_norm": 27.548316955566406, "learning_rate": 7.779761904761905e-06, "loss": 47.748, "step": 1221 }, { "epoch": 29.095522388059702, "grad_norm": 26.399370193481445, "learning_rate": 7.773809523809525e-06, "loss": 46.736, "step": 1222 }, { "epoch": 29.119402985074625, "grad_norm": 25.127031326293945, "learning_rate": 7.767857142857144e-06, "loss": 46.8307, "step": 1223 }, { "epoch": 29.143283582089552, "grad_norm": 26.624732971191406, "learning_rate": 7.761904761904762e-06, "loss": 46.2401, "step": 1224 }, { "epoch": 29.16716417910448, "grad_norm": 30.770824432373047, "learning_rate": 7.755952380952382e-06, "loss": 46.7194, "step": 1225 }, { "epoch": 29.1910447761194, "grad_norm": 23.830007553100586, "learning_rate": 7.75e-06, "loss": 46.4737, "step": 1226 }, { "epoch": 29.214925373134328, "grad_norm": 32.90129470825195, "learning_rate": 7.74404761904762e-06, "loss": 47.4361, "step": 1227 }, { "epoch": 29.238805970149254, "grad_norm": 23.381397247314453, "learning_rate": 7.738095238095238e-06, "loss": 45.3297, "step": 1228 }, { "epoch": 29.262686567164177, "grad_norm": 32.836387634277344, "learning_rate": 7.732142857142858e-06, "loss": 46.0574, "step": 1229 }, { "epoch": 29.286567164179104, "grad_norm": 25.803264617919922, "learning_rate": 7.726190476190478e-06, "loss": 45.757, "step": 1230 }, { "epoch": 29.31044776119403, "grad_norm": 29.38982391357422, "learning_rate": 7.720238095238096e-06, "loss": 46.7099, "step": 1231 }, { "epoch": 29.334328358208957, "grad_norm": 26.39947509765625, "learning_rate": 7.714285714285716e-06, "loss": 47.5944, "step": 1232 }, { "epoch": 29.35820895522388, "grad_norm": 25.958354949951172, "learning_rate": 7.708333333333334e-06, "loss": 46.1395, "step": 1233 }, { "epoch": 29.382089552238806, "grad_norm": 28.697542190551758, "learning_rate": 7.702380952380954e-06, "loss": 46.1713, "step": 1234 }, { "epoch": 29.405970149253733, "grad_norm": 19.471586227416992, "learning_rate": 7.696428571428572e-06, "loss": 47.7724, "step": 1235 }, { "epoch": 29.429850746268656, "grad_norm": 29.924991607666016, "learning_rate": 7.690476190476191e-06, "loss": 47.2203, "step": 1236 }, { "epoch": 29.453731343283582, "grad_norm": 20.516891479492188, "learning_rate": 7.684523809523811e-06, "loss": 46.2945, "step": 1237 }, { "epoch": 29.47761194029851, "grad_norm": 30.605262756347656, "learning_rate": 7.67857142857143e-06, "loss": 47.1786, "step": 1238 }, { "epoch": 29.501492537313432, "grad_norm": 16.288013458251953, "learning_rate": 7.672619047619049e-06, "loss": 45.3413, "step": 1239 }, { "epoch": 29.52537313432836, "grad_norm": 23.54091453552246, "learning_rate": 7.666666666666667e-06, "loss": 45.9196, "step": 1240 }, { "epoch": 29.549253731343285, "grad_norm": 20.33724021911621, "learning_rate": 7.660714285714287e-06, "loss": 47.0275, "step": 1241 }, { "epoch": 29.573134328358208, "grad_norm": 27.460975646972656, "learning_rate": 7.654761904761905e-06, "loss": 44.8995, "step": 1242 }, { "epoch": 29.597014925373134, "grad_norm": 25.58623695373535, "learning_rate": 7.648809523809523e-06, "loss": 46.0706, "step": 1243 }, { "epoch": 29.62089552238806, "grad_norm": 27.997203826904297, "learning_rate": 7.642857142857143e-06, "loss": 47.2368, "step": 1244 }, { "epoch": 29.644776119402984, "grad_norm": 31.361181259155273, "learning_rate": 7.636904761904763e-06, "loss": 46.8056, "step": 1245 }, { "epoch": 29.66865671641791, "grad_norm": 29.266433715820312, "learning_rate": 7.630952380952381e-06, "loss": 45.323, "step": 1246 }, { "epoch": 29.692537313432837, "grad_norm": 24.066415786743164, "learning_rate": 7.625e-06, "loss": 46.9221, "step": 1247 }, { "epoch": 29.71641791044776, "grad_norm": 25.790491104125977, "learning_rate": 7.61904761904762e-06, "loss": 45.7051, "step": 1248 }, { "epoch": 29.740298507462686, "grad_norm": 24.202716827392578, "learning_rate": 7.6130952380952386e-06, "loss": 47.1067, "step": 1249 }, { "epoch": 29.764179104477613, "grad_norm": 27.302003860473633, "learning_rate": 7.6071428571428575e-06, "loss": 46.6039, "step": 1250 }, { "epoch": 29.788059701492536, "grad_norm": 22.75196075439453, "learning_rate": 7.6011904761904765e-06, "loss": 46.3265, "step": 1251 }, { "epoch": 29.811940298507462, "grad_norm": 30.963153839111328, "learning_rate": 7.595238095238095e-06, "loss": 46.3283, "step": 1252 }, { "epoch": 29.83582089552239, "grad_norm": 21.538162231445312, "learning_rate": 7.589285714285714e-06, "loss": 46.5345, "step": 1253 }, { "epoch": 29.85970149253731, "grad_norm": 28.09955596923828, "learning_rate": 7.583333333333333e-06, "loss": 47.1, "step": 1254 }, { "epoch": 29.883582089552238, "grad_norm": 20.88216781616211, "learning_rate": 7.577380952380953e-06, "loss": 45.3354, "step": 1255 }, { "epoch": 29.907462686567165, "grad_norm": 24.15240478515625, "learning_rate": 7.571428571428572e-06, "loss": 46.299, "step": 1256 }, { "epoch": 29.93134328358209, "grad_norm": 22.839298248291016, "learning_rate": 7.565476190476191e-06, "loss": 46.3436, "step": 1257 }, { "epoch": 29.955223880597014, "grad_norm": 26.582752227783203, "learning_rate": 7.55952380952381e-06, "loss": 45.9107, "step": 1258 }, { "epoch": 29.97910447761194, "grad_norm": 24.98562240600586, "learning_rate": 7.553571428571429e-06, "loss": 46.7134, "step": 1259 }, { "epoch": 30.0, "grad_norm": 23.327436447143555, "learning_rate": 7.547619047619048e-06, "loss": 41.2325, "step": 1260 }, { "epoch": 30.023880597014927, "grad_norm": 20.400623321533203, "learning_rate": 7.541666666666667e-06, "loss": 46.9564, "step": 1261 }, { "epoch": 30.04776119402985, "grad_norm": NaN, "learning_rate": 7.5357142857142865e-06, "loss": 68.2215, "step": 1262 }, { "epoch": 30.071641791044776, "grad_norm": 22.870811462402344, "learning_rate": 7.5357142857142865e-06, "loss": 46.5712, "step": 1263 }, { "epoch": 30.095522388059702, "grad_norm": 24.057098388671875, "learning_rate": 7.5297619047619055e-06, "loss": 46.8943, "step": 1264 }, { "epoch": 30.119402985074625, "grad_norm": 25.820720672607422, "learning_rate": 7.523809523809524e-06, "loss": 46.0747, "step": 1265 }, { "epoch": 30.143283582089552, "grad_norm": 28.460693359375, "learning_rate": 7.517857142857143e-06, "loss": 47.0127, "step": 1266 }, { "epoch": 30.16716417910448, "grad_norm": 21.60432243347168, "learning_rate": 7.511904761904762e-06, "loss": 45.8081, "step": 1267 }, { "epoch": 30.1910447761194, "grad_norm": 29.013648986816406, "learning_rate": 7.505952380952381e-06, "loss": 46.6712, "step": 1268 }, { "epoch": 30.214925373134328, "grad_norm": 24.865493774414062, "learning_rate": 7.500000000000001e-06, "loss": 46.6816, "step": 1269 }, { "epoch": 30.238805970149254, "grad_norm": 23.676206588745117, "learning_rate": 7.49404761904762e-06, "loss": 46.2663, "step": 1270 }, { "epoch": 30.262686567164177, "grad_norm": 27.889135360717773, "learning_rate": 7.488095238095239e-06, "loss": 45.7052, "step": 1271 }, { "epoch": 30.286567164179104, "grad_norm": 29.024211883544922, "learning_rate": 7.482142857142858e-06, "loss": 45.5005, "step": 1272 }, { "epoch": 30.31044776119403, "grad_norm": 25.8428955078125, "learning_rate": 7.476190476190477e-06, "loss": 46.788, "step": 1273 }, { "epoch": 30.334328358208957, "grad_norm": 26.765539169311523, "learning_rate": 7.470238095238096e-06, "loss": 46.454, "step": 1274 }, { "epoch": 30.35820895522388, "grad_norm": 34.80079650878906, "learning_rate": 7.464285714285715e-06, "loss": 47.6929, "step": 1275 }, { "epoch": 30.382089552238806, "grad_norm": 25.589618682861328, "learning_rate": 7.4583333333333345e-06, "loss": 46.0104, "step": 1276 }, { "epoch": 30.405970149253733, "grad_norm": 27.0733699798584, "learning_rate": 7.4523809523809534e-06, "loss": 45.4742, "step": 1277 }, { "epoch": 30.429850746268656, "grad_norm": 26.662338256835938, "learning_rate": 7.446428571428572e-06, "loss": 46.8066, "step": 1278 }, { "epoch": 30.453731343283582, "grad_norm": 28.389951705932617, "learning_rate": 7.440476190476191e-06, "loss": 46.9716, "step": 1279 }, { "epoch": 30.47761194029851, "grad_norm": NaN, "learning_rate": 7.43452380952381e-06, "loss": 52.2915, "step": 1280 }, { "epoch": 30.501492537313432, "grad_norm": 26.77708625793457, "learning_rate": 7.43452380952381e-06, "loss": 44.919, "step": 1281 }, { "epoch": 30.52537313432836, "grad_norm": 25.423444747924805, "learning_rate": 7.428571428571429e-06, "loss": 46.5057, "step": 1282 }, { "epoch": 30.549253731343285, "grad_norm": 24.04167366027832, "learning_rate": 7.422619047619048e-06, "loss": 46.3685, "step": 1283 }, { "epoch": 30.573134328358208, "grad_norm": 23.51607894897461, "learning_rate": 7.416666666666668e-06, "loss": 45.9694, "step": 1284 }, { "epoch": 30.597014925373134, "grad_norm": 26.216157913208008, "learning_rate": 7.410714285714287e-06, "loss": 47.2582, "step": 1285 }, { "epoch": 30.62089552238806, "grad_norm": 24.339780807495117, "learning_rate": 7.404761904761906e-06, "loss": 44.8052, "step": 1286 }, { "epoch": 30.644776119402984, "grad_norm": 19.203577041625977, "learning_rate": 7.398809523809525e-06, "loss": 47.0301, "step": 1287 }, { "epoch": 30.66865671641791, "grad_norm": 22.252805709838867, "learning_rate": 7.392857142857144e-06, "loss": 45.5993, "step": 1288 }, { "epoch": 30.692537313432837, "grad_norm": 25.316205978393555, "learning_rate": 7.386904761904763e-06, "loss": 46.1157, "step": 1289 }, { "epoch": 30.71641791044776, "grad_norm": 18.311643600463867, "learning_rate": 7.380952380952382e-06, "loss": 46.5986, "step": 1290 }, { "epoch": 30.740298507462686, "grad_norm": 31.84505271911621, "learning_rate": 7.375000000000001e-06, "loss": 46.9177, "step": 1291 }, { "epoch": 30.764179104477613, "grad_norm": 26.221525192260742, "learning_rate": 7.36904761904762e-06, "loss": 47.0897, "step": 1292 }, { "epoch": 30.788059701492536, "grad_norm": 27.029104232788086, "learning_rate": 7.363095238095239e-06, "loss": 45.3724, "step": 1293 }, { "epoch": 30.811940298507462, "grad_norm": 33.51012420654297, "learning_rate": 7.357142857142858e-06, "loss": 46.7046, "step": 1294 }, { "epoch": 30.83582089552239, "grad_norm": 26.42972183227539, "learning_rate": 7.351190476190477e-06, "loss": 46.7606, "step": 1295 }, { "epoch": 30.85970149253731, "grad_norm": 30.91115951538086, "learning_rate": 7.345238095238096e-06, "loss": 47.5485, "step": 1296 }, { "epoch": 30.883582089552238, "grad_norm": 28.296560287475586, "learning_rate": 7.339285714285714e-06, "loss": 46.4997, "step": 1297 }, { "epoch": 30.907462686567165, "grad_norm": 32.054561614990234, "learning_rate": 7.333333333333333e-06, "loss": 46.4953, "step": 1298 }, { "epoch": 30.93134328358209, "grad_norm": 31.635595321655273, "learning_rate": 7.327380952380952e-06, "loss": 46.5325, "step": 1299 }, { "epoch": 30.955223880597014, "grad_norm": 25.557523727416992, "learning_rate": 7.321428571428572e-06, "loss": 45.56, "step": 1300 }, { "epoch": 30.97910447761194, "grad_norm": 30.01810073852539, "learning_rate": 7.315476190476191e-06, "loss": 46.6149, "step": 1301 }, { "epoch": 31.0, "grad_norm": 24.6826114654541, "learning_rate": 7.30952380952381e-06, "loss": 40.8651, "step": 1302 }, { "epoch": 31.023880597014927, "grad_norm": 24.378164291381836, "learning_rate": 7.303571428571429e-06, "loss": 46.0721, "step": 1303 }, { "epoch": 31.04776119402985, "grad_norm": 20.247482299804688, "learning_rate": 7.297619047619048e-06, "loss": 45.8819, "step": 1304 }, { "epoch": 31.071641791044776, "grad_norm": 25.636112213134766, "learning_rate": 7.291666666666667e-06, "loss": 47.1987, "step": 1305 }, { "epoch": 31.095522388059702, "grad_norm": 30.428096771240234, "learning_rate": 7.285714285714286e-06, "loss": 46.6961, "step": 1306 }, { "epoch": 31.119402985074625, "grad_norm": 21.404991149902344, "learning_rate": 7.279761904761905e-06, "loss": 46.6841, "step": 1307 }, { "epoch": 31.143283582089552, "grad_norm": 31.655052185058594, "learning_rate": 7.273809523809524e-06, "loss": 47.7781, "step": 1308 }, { "epoch": 31.16716417910448, "grad_norm": 24.327327728271484, "learning_rate": 7.267857142857143e-06, "loss": 46.002, "step": 1309 }, { "epoch": 31.1910447761194, "grad_norm": 26.230745315551758, "learning_rate": 7.261904761904762e-06, "loss": 47.3903, "step": 1310 }, { "epoch": 31.214925373134328, "grad_norm": 27.337961196899414, "learning_rate": 7.255952380952381e-06, "loss": 46.0999, "step": 1311 }, { "epoch": 31.238805970149254, "grad_norm": 35.14864730834961, "learning_rate": 7.25e-06, "loss": 46.5187, "step": 1312 }, { "epoch": 31.262686567164177, "grad_norm": 26.60109519958496, "learning_rate": 7.24404761904762e-06, "loss": 44.5864, "step": 1313 }, { "epoch": 31.286567164179104, "grad_norm": 33.15165710449219, "learning_rate": 7.238095238095239e-06, "loss": 46.4779, "step": 1314 }, { "epoch": 31.31044776119403, "grad_norm": 26.3510684967041, "learning_rate": 7.232142857142858e-06, "loss": 47.0845, "step": 1315 }, { "epoch": 31.334328358208957, "grad_norm": 47.12569046020508, "learning_rate": 7.226190476190477e-06, "loss": 47.2947, "step": 1316 }, { "epoch": 31.35820895522388, "grad_norm": 40.15263748168945, "learning_rate": 7.220238095238096e-06, "loss": 45.8788, "step": 1317 }, { "epoch": 31.382089552238806, "grad_norm": 36.59072494506836, "learning_rate": 7.2142857142857145e-06, "loss": 45.991, "step": 1318 }, { "epoch": 31.405970149253733, "grad_norm": 36.895408630371094, "learning_rate": 7.2083333333333335e-06, "loss": 46.197, "step": 1319 }, { "epoch": 31.429850746268656, "grad_norm": NaN, "learning_rate": 7.202380952380953e-06, "loss": 38.9024, "step": 1320 }, { "epoch": 31.453731343283582, "grad_norm": 27.446247100830078, "learning_rate": 7.202380952380953e-06, "loss": 45.5293, "step": 1321 }, { "epoch": 31.47761194029851, "grad_norm": 27.48939323425293, "learning_rate": 7.196428571428572e-06, "loss": 46.8754, "step": 1322 }, { "epoch": 31.501492537313432, "grad_norm": 22.736833572387695, "learning_rate": 7.190476190476191e-06, "loss": 44.4905, "step": 1323 }, { "epoch": 31.52537313432836, "grad_norm": 23.413612365722656, "learning_rate": 7.18452380952381e-06, "loss": 47.0714, "step": 1324 }, { "epoch": 31.549253731343285, "grad_norm": 29.154848098754883, "learning_rate": 7.178571428571429e-06, "loss": 46.393, "step": 1325 }, { "epoch": 31.573134328358208, "grad_norm": 28.130638122558594, "learning_rate": 7.172619047619048e-06, "loss": 46.1857, "step": 1326 }, { "epoch": 31.597014925373134, "grad_norm": 19.745920181274414, "learning_rate": 7.166666666666667e-06, "loss": 45.2873, "step": 1327 }, { "epoch": 31.62089552238806, "grad_norm": 27.630279541015625, "learning_rate": 7.160714285714287e-06, "loss": 46.5475, "step": 1328 }, { "epoch": 31.644776119402984, "grad_norm": 20.568862915039062, "learning_rate": 7.154761904761906e-06, "loss": 46.231, "step": 1329 }, { "epoch": 31.66865671641791, "grad_norm": 17.769695281982422, "learning_rate": 7.148809523809525e-06, "loss": 46.8431, "step": 1330 }, { "epoch": 31.692537313432837, "grad_norm": 29.941057205200195, "learning_rate": 7.1428571428571436e-06, "loss": 44.842, "step": 1331 }, { "epoch": 31.71641791044776, "grad_norm": 21.054975509643555, "learning_rate": 7.1369047619047625e-06, "loss": 45.1147, "step": 1332 }, { "epoch": 31.740298507462686, "grad_norm": 23.80388069152832, "learning_rate": 7.1309523809523814e-06, "loss": 46.1839, "step": 1333 }, { "epoch": 31.764179104477613, "grad_norm": 30.561933517456055, "learning_rate": 7.125e-06, "loss": 46.2703, "step": 1334 }, { "epoch": 31.788059701492536, "grad_norm": 23.752151489257812, "learning_rate": 7.11904761904762e-06, "loss": 46.7347, "step": 1335 }, { "epoch": 31.811940298507462, "grad_norm": 32.00548553466797, "learning_rate": 7.113095238095239e-06, "loss": 46.1236, "step": 1336 }, { "epoch": 31.83582089552239, "grad_norm": 26.685504913330078, "learning_rate": 7.107142857142858e-06, "loss": 47.8881, "step": 1337 }, { "epoch": 31.85970149253731, "grad_norm": 26.5799503326416, "learning_rate": 7.101190476190477e-06, "loss": 46.1187, "step": 1338 }, { "epoch": 31.883582089552238, "grad_norm": 28.78062629699707, "learning_rate": 7.095238095238096e-06, "loss": 46.8058, "step": 1339 }, { "epoch": 31.907462686567165, "grad_norm": 26.98428726196289, "learning_rate": 7.089285714285715e-06, "loss": 46.3602, "step": 1340 }, { "epoch": 31.93134328358209, "grad_norm": 32.5291633605957, "learning_rate": 7.083333333333335e-06, "loss": 46.464, "step": 1341 }, { "epoch": 31.955223880597014, "grad_norm": 25.088685989379883, "learning_rate": 7.077380952380954e-06, "loss": 47.0542, "step": 1342 }, { "epoch": 31.97910447761194, "grad_norm": 32.58052444458008, "learning_rate": 7.0714285714285726e-06, "loss": 46.3364, "step": 1343 }, { "epoch": 32.0, "grad_norm": 22.65249252319336, "learning_rate": 7.0654761904761915e-06, "loss": 39.8201, "step": 1344 }, { "epoch": 32.02388059701492, "grad_norm": 27.03556251525879, "learning_rate": 7.0595238095238105e-06, "loss": 47.7819, "step": 1345 }, { "epoch": 32.04776119402985, "grad_norm": 25.712047576904297, "learning_rate": 7.053571428571429e-06, "loss": 46.1116, "step": 1346 }, { "epoch": 32.071641791044776, "grad_norm": 21.99336051940918, "learning_rate": 7.047619047619048e-06, "loss": 46.3745, "step": 1347 }, { "epoch": 32.0955223880597, "grad_norm": 28.53151512145996, "learning_rate": 7.041666666666668e-06, "loss": 46.5998, "step": 1348 }, { "epoch": 32.11940298507463, "grad_norm": 20.151912689208984, "learning_rate": 7.035714285714287e-06, "loss": 45.4197, "step": 1349 }, { "epoch": 32.14328358208955, "grad_norm": 21.491193771362305, "learning_rate": 7.029761904761905e-06, "loss": 46.0246, "step": 1350 }, { "epoch": 32.167164179104475, "grad_norm": 20.057588577270508, "learning_rate": 7.023809523809524e-06, "loss": 46.2149, "step": 1351 }, { "epoch": 32.191044776119405, "grad_norm": 16.675336837768555, "learning_rate": 7.017857142857143e-06, "loss": 46.5231, "step": 1352 }, { "epoch": 32.21492537313433, "grad_norm": 22.007305145263672, "learning_rate": 7.011904761904762e-06, "loss": 44.8665, "step": 1353 }, { "epoch": 32.23880597014925, "grad_norm": 22.947837829589844, "learning_rate": 7.005952380952381e-06, "loss": 45.0394, "step": 1354 }, { "epoch": 32.26268656716418, "grad_norm": 25.444522857666016, "learning_rate": 7e-06, "loss": 46.0367, "step": 1355 }, { "epoch": 32.286567164179104, "grad_norm": 22.319833755493164, "learning_rate": 6.994047619047619e-06, "loss": 47.0455, "step": 1356 }, { "epoch": 32.31044776119403, "grad_norm": 20.41710090637207, "learning_rate": 6.988095238095239e-06, "loss": 45.5119, "step": 1357 }, { "epoch": 32.33432835820896, "grad_norm": 29.03120994567871, "learning_rate": 6.9821428571428576e-06, "loss": 45.1962, "step": 1358 }, { "epoch": 32.35820895522388, "grad_norm": 22.10372543334961, "learning_rate": 6.9761904761904765e-06, "loss": 47.379, "step": 1359 }, { "epoch": 32.3820895522388, "grad_norm": 29.49492073059082, "learning_rate": 6.9702380952380955e-06, "loss": 48.2375, "step": 1360 }, { "epoch": 32.40597014925373, "grad_norm": 26.655149459838867, "learning_rate": 6.964285714285714e-06, "loss": 45.8468, "step": 1361 }, { "epoch": 32.429850746268656, "grad_norm": 27.994979858398438, "learning_rate": 6.958333333333333e-06, "loss": 46.4883, "step": 1362 }, { "epoch": 32.45373134328358, "grad_norm": 25.787900924682617, "learning_rate": 6.952380952380952e-06, "loss": 47.0159, "step": 1363 }, { "epoch": 32.47761194029851, "grad_norm": 29.429485321044922, "learning_rate": 6.946428571428572e-06, "loss": 45.182, "step": 1364 }, { "epoch": 32.50149253731343, "grad_norm": 21.825122833251953, "learning_rate": 6.940476190476191e-06, "loss": 47.4224, "step": 1365 }, { "epoch": 32.525373134328355, "grad_norm": 26.284622192382812, "learning_rate": 6.93452380952381e-06, "loss": 45.7025, "step": 1366 }, { "epoch": 32.549253731343285, "grad_norm": 21.384979248046875, "learning_rate": 6.928571428571429e-06, "loss": 45.6267, "step": 1367 }, { "epoch": 32.57313432835821, "grad_norm": 21.64442253112793, "learning_rate": 6.922619047619048e-06, "loss": 46.8577, "step": 1368 }, { "epoch": 32.59701492537313, "grad_norm": 22.377302169799805, "learning_rate": 6.916666666666667e-06, "loss": 46.5022, "step": 1369 }, { "epoch": 32.62089552238806, "grad_norm": 18.1933536529541, "learning_rate": 6.910714285714286e-06, "loss": 46.7098, "step": 1370 }, { "epoch": 32.644776119402984, "grad_norm": NaN, "learning_rate": 6.9047619047619055e-06, "loss": 59.6159, "step": 1371 }, { "epoch": 32.668656716417914, "grad_norm": 20.35690689086914, "learning_rate": 6.9047619047619055e-06, "loss": 47.4638, "step": 1372 }, { "epoch": 32.69253731343284, "grad_norm": 29.140775680541992, "learning_rate": 6.8988095238095245e-06, "loss": 46.242, "step": 1373 }, { "epoch": 32.71641791044776, "grad_norm": 25.27906608581543, "learning_rate": 6.892857142857143e-06, "loss": 45.7122, "step": 1374 }, { "epoch": 32.74029850746269, "grad_norm": 19.000076293945312, "learning_rate": 6.886904761904762e-06, "loss": 46.4813, "step": 1375 }, { "epoch": 32.76417910447761, "grad_norm": 25.048797607421875, "learning_rate": 6.880952380952381e-06, "loss": 45.5569, "step": 1376 }, { "epoch": 32.788059701492536, "grad_norm": 24.078060150146484, "learning_rate": 6.875e-06, "loss": 45.9708, "step": 1377 }, { "epoch": 32.811940298507466, "grad_norm": 23.822643280029297, "learning_rate": 6.86904761904762e-06, "loss": 47.5914, "step": 1378 }, { "epoch": 32.83582089552239, "grad_norm": 29.267864227294922, "learning_rate": 6.863095238095239e-06, "loss": 45.2741, "step": 1379 }, { "epoch": 32.85970149253731, "grad_norm": 19.477649688720703, "learning_rate": 6.857142857142858e-06, "loss": 46.3849, "step": 1380 }, { "epoch": 32.88358208955224, "grad_norm": 33.31391525268555, "learning_rate": 6.851190476190477e-06, "loss": 44.9609, "step": 1381 }, { "epoch": 32.907462686567165, "grad_norm": 23.064956665039062, "learning_rate": 6.845238095238096e-06, "loss": 45.8295, "step": 1382 }, { "epoch": 32.93134328358209, "grad_norm": 30.366653442382812, "learning_rate": 6.839285714285715e-06, "loss": 44.3142, "step": 1383 }, { "epoch": 32.95522388059702, "grad_norm": 25.059572219848633, "learning_rate": 6.833333333333334e-06, "loss": 46.5768, "step": 1384 }, { "epoch": 32.97910447761194, "grad_norm": 23.186697006225586, "learning_rate": 6.8273809523809535e-06, "loss": 45.185, "step": 1385 }, { "epoch": 33.0, "grad_norm": 21.550168991088867, "learning_rate": 6.8214285714285724e-06, "loss": 39.1732, "step": 1386 }, { "epoch": 33.02388059701492, "grad_norm": 22.417282104492188, "learning_rate": 6.815476190476191e-06, "loss": 47.6667, "step": 1387 }, { "epoch": 33.04776119402985, "grad_norm": 26.805702209472656, "learning_rate": 6.80952380952381e-06, "loss": 46.4091, "step": 1388 }, { "epoch": 33.071641791044776, "grad_norm": 23.723695755004883, "learning_rate": 6.803571428571429e-06, "loss": 46.3798, "step": 1389 }, { "epoch": 33.0955223880597, "grad_norm": 30.029897689819336, "learning_rate": 6.797619047619048e-06, "loss": 45.9736, "step": 1390 }, { "epoch": 33.11940298507463, "grad_norm": 19.387653350830078, "learning_rate": 6.791666666666667e-06, "loss": 45.1998, "step": 1391 }, { "epoch": 33.14328358208955, "grad_norm": 33.68477249145508, "learning_rate": 6.785714285714287e-06, "loss": 45.4435, "step": 1392 }, { "epoch": 33.167164179104475, "grad_norm": 26.001699447631836, "learning_rate": 6.779761904761906e-06, "loss": 45.6725, "step": 1393 }, { "epoch": 33.191044776119405, "grad_norm": 34.19535827636719, "learning_rate": 6.773809523809525e-06, "loss": 46.6387, "step": 1394 }, { "epoch": 33.21492537313433, "grad_norm": 24.243515014648438, "learning_rate": 6.767857142857144e-06, "loss": 46.4235, "step": 1395 }, { "epoch": 33.23880597014925, "grad_norm": 33.013675689697266, "learning_rate": 6.761904761904763e-06, "loss": 46.7151, "step": 1396 }, { "epoch": 33.26268656716418, "grad_norm": 30.15135955810547, "learning_rate": 6.755952380952382e-06, "loss": 46.3002, "step": 1397 }, { "epoch": 33.286567164179104, "grad_norm": 31.58100128173828, "learning_rate": 6.750000000000001e-06, "loss": 46.6084, "step": 1398 }, { "epoch": 33.31044776119403, "grad_norm": 26.23592185974121, "learning_rate": 6.74404761904762e-06, "loss": 45.5745, "step": 1399 }, { "epoch": 33.33432835820896, "grad_norm": 32.273311614990234, "learning_rate": 6.738095238095239e-06, "loss": 45.1131, "step": 1400 }, { "epoch": 33.35820895522388, "grad_norm": 29.7532958984375, "learning_rate": 6.732142857142858e-06, "loss": 45.9739, "step": 1401 }, { "epoch": 33.3820895522388, "grad_norm": 32.648704528808594, "learning_rate": 6.726190476190477e-06, "loss": 46.6293, "step": 1402 }, { "epoch": 33.40597014925373, "grad_norm": 26.455778121948242, "learning_rate": 6.720238095238096e-06, "loss": 46.5187, "step": 1403 }, { "epoch": 33.429850746268656, "grad_norm": 30.5809326171875, "learning_rate": 6.714285714285714e-06, "loss": 46.5477, "step": 1404 }, { "epoch": 33.45373134328358, "grad_norm": 29.604442596435547, "learning_rate": 6.708333333333333e-06, "loss": 45.462, "step": 1405 }, { "epoch": 33.47761194029851, "grad_norm": 36.19733428955078, "learning_rate": 6.702380952380952e-06, "loss": 46.7046, "step": 1406 }, { "epoch": 33.50149253731343, "grad_norm": 37.733619689941406, "learning_rate": 6.696428571428571e-06, "loss": 46.2156, "step": 1407 }, { "epoch": 33.525373134328355, "grad_norm": 26.49405288696289, "learning_rate": 6.690476190476191e-06, "loss": 45.373, "step": 1408 }, { "epoch": 33.549253731343285, "grad_norm": 30.09432601928711, "learning_rate": 6.68452380952381e-06, "loss": 46.3868, "step": 1409 }, { "epoch": 33.57313432835821, "grad_norm": 25.85702896118164, "learning_rate": 6.678571428571429e-06, "loss": 45.805, "step": 1410 }, { "epoch": 33.59701492537313, "grad_norm": 28.564380645751953, "learning_rate": 6.672619047619048e-06, "loss": 46.4158, "step": 1411 }, { "epoch": 33.62089552238806, "grad_norm": 19.878551483154297, "learning_rate": 6.666666666666667e-06, "loss": 46.5922, "step": 1412 }, { "epoch": 33.644776119402984, "grad_norm": 22.83441734313965, "learning_rate": 6.660714285714286e-06, "loss": 45.1216, "step": 1413 }, { "epoch": 33.668656716417914, "grad_norm": 31.372957229614258, "learning_rate": 6.654761904761905e-06, "loss": 47.111, "step": 1414 }, { "epoch": 33.69253731343284, "grad_norm": 23.98666763305664, "learning_rate": 6.648809523809524e-06, "loss": 47.1762, "step": 1415 }, { "epoch": 33.71641791044776, "grad_norm": 27.895401000976562, "learning_rate": 6.642857142857143e-06, "loss": 45.6151, "step": 1416 }, { "epoch": 33.74029850746269, "grad_norm": 21.776100158691406, "learning_rate": 6.636904761904762e-06, "loss": 45.7198, "step": 1417 }, { "epoch": 33.76417910447761, "grad_norm": 30.373878479003906, "learning_rate": 6.630952380952381e-06, "loss": 45.2212, "step": 1418 }, { "epoch": 33.788059701492536, "grad_norm": 26.604324340820312, "learning_rate": 6.625e-06, "loss": 45.2001, "step": 1419 }, { "epoch": 33.811940298507466, "grad_norm": 29.38104248046875, "learning_rate": 6.619047619047619e-06, "loss": 46.711, "step": 1420 }, { "epoch": 33.83582089552239, "grad_norm": 24.36806869506836, "learning_rate": 6.613095238095239e-06, "loss": 46.3608, "step": 1421 }, { "epoch": 33.85970149253731, "grad_norm": 33.40534210205078, "learning_rate": 6.607142857142858e-06, "loss": 45.5189, "step": 1422 }, { "epoch": 33.88358208955224, "grad_norm": 25.91522789001465, "learning_rate": 6.601190476190477e-06, "loss": 47.3604, "step": 1423 }, { "epoch": 33.907462686567165, "grad_norm": 25.26549530029297, "learning_rate": 6.595238095238096e-06, "loss": 46.483, "step": 1424 }, { "epoch": 33.93134328358209, "grad_norm": 26.101816177368164, "learning_rate": 6.589285714285715e-06, "loss": 45.7998, "step": 1425 }, { "epoch": 33.95522388059702, "grad_norm": 27.942903518676758, "learning_rate": 6.5833333333333335e-06, "loss": 46.4593, "step": 1426 }, { "epoch": 33.97910447761194, "grad_norm": 21.551429748535156, "learning_rate": 6.5773809523809525e-06, "loss": 45.458, "step": 1427 }, { "epoch": 34.0, "grad_norm": 32.26907730102539, "learning_rate": 6.571428571428572e-06, "loss": 38.5718, "step": 1428 }, { "epoch": 34.02388059701492, "grad_norm": 32.16934585571289, "learning_rate": 6.565476190476191e-06, "loss": 45.5812, "step": 1429 }, { "epoch": 34.04776119402985, "grad_norm": 19.646459579467773, "learning_rate": 6.55952380952381e-06, "loss": 44.9032, "step": 1430 }, { "epoch": 34.071641791044776, "grad_norm": 28.886430740356445, "learning_rate": 6.553571428571429e-06, "loss": 45.4187, "step": 1431 }, { "epoch": 34.0955223880597, "grad_norm": 22.722471237182617, "learning_rate": 6.547619047619048e-06, "loss": 45.468, "step": 1432 }, { "epoch": 34.11940298507463, "grad_norm": 25.334766387939453, "learning_rate": 6.541666666666667e-06, "loss": 47.3534, "step": 1433 }, { "epoch": 34.14328358208955, "grad_norm": 28.49740982055664, "learning_rate": 6.535714285714286e-06, "loss": 47.4733, "step": 1434 }, { "epoch": 34.167164179104475, "grad_norm": 27.773820877075195, "learning_rate": 6.529761904761906e-06, "loss": 45.3215, "step": 1435 }, { "epoch": 34.191044776119405, "grad_norm": 24.25234031677246, "learning_rate": 6.523809523809525e-06, "loss": 46.0011, "step": 1436 }, { "epoch": 34.21492537313433, "grad_norm": 28.666475296020508, "learning_rate": 6.517857142857144e-06, "loss": 45.9091, "step": 1437 }, { "epoch": 34.23880597014925, "grad_norm": 24.367712020874023, "learning_rate": 6.5119047619047626e-06, "loss": 46.5004, "step": 1438 }, { "epoch": 34.26268656716418, "grad_norm": 23.11983299255371, "learning_rate": 6.5059523809523815e-06, "loss": 47.3335, "step": 1439 }, { "epoch": 34.286567164179104, "grad_norm": 20.672304153442383, "learning_rate": 6.5000000000000004e-06, "loss": 47.1491, "step": 1440 }, { "epoch": 34.31044776119403, "grad_norm": 23.815290451049805, "learning_rate": 6.49404761904762e-06, "loss": 46.7084, "step": 1441 }, { "epoch": 34.33432835820896, "grad_norm": 20.582489013671875, "learning_rate": 6.488095238095239e-06, "loss": 46.9707, "step": 1442 }, { "epoch": 34.35820895522388, "grad_norm": 18.315673828125, "learning_rate": 6.482142857142858e-06, "loss": 47.5359, "step": 1443 }, { "epoch": 34.3820895522388, "grad_norm": 24.396499633789062, "learning_rate": 6.476190476190477e-06, "loss": 46.052, "step": 1444 }, { "epoch": 34.40597014925373, "grad_norm": 21.200523376464844, "learning_rate": 6.470238095238096e-06, "loss": 46.5843, "step": 1445 }, { "epoch": 34.429850746268656, "grad_norm": 17.59020233154297, "learning_rate": 6.464285714285715e-06, "loss": 46.0017, "step": 1446 }, { "epoch": 34.45373134328358, "grad_norm": 21.810382843017578, "learning_rate": 6.458333333333334e-06, "loss": 46.4232, "step": 1447 }, { "epoch": 34.47761194029851, "grad_norm": 27.78464126586914, "learning_rate": 6.452380952380954e-06, "loss": 46.0973, "step": 1448 }, { "epoch": 34.50149253731343, "grad_norm": 29.360275268554688, "learning_rate": 6.446428571428573e-06, "loss": 45.4821, "step": 1449 }, { "epoch": 34.525373134328355, "grad_norm": 26.914587020874023, "learning_rate": 6.4404761904761916e-06, "loss": 45.2982, "step": 1450 }, { "epoch": 34.549253731343285, "grad_norm": 22.19925880432129, "learning_rate": 6.4345238095238105e-06, "loss": 46.6693, "step": 1451 }, { "epoch": 34.57313432835821, "grad_norm": 25.39541244506836, "learning_rate": 6.4285714285714295e-06, "loss": 45.8936, "step": 1452 }, { "epoch": 34.59701492537313, "grad_norm": 20.633222579956055, "learning_rate": 6.422619047619048e-06, "loss": 44.6061, "step": 1453 }, { "epoch": 34.62089552238806, "grad_norm": 22.513790130615234, "learning_rate": 6.416666666666667e-06, "loss": 45.5503, "step": 1454 }, { "epoch": 34.644776119402984, "grad_norm": 25.715484619140625, "learning_rate": 6.410714285714287e-06, "loss": 45.7485, "step": 1455 }, { "epoch": 34.668656716417914, "grad_norm": 21.964609146118164, "learning_rate": 6.404761904761904e-06, "loss": 46.3223, "step": 1456 }, { "epoch": 34.69253731343284, "grad_norm": 20.32435417175293, "learning_rate": 6.398809523809524e-06, "loss": 45.1507, "step": 1457 }, { "epoch": 34.71641791044776, "grad_norm": 24.32924461364746, "learning_rate": 6.392857142857143e-06, "loss": 45.8221, "step": 1458 }, { "epoch": 34.74029850746269, "grad_norm": 19.200895309448242, "learning_rate": 6.386904761904762e-06, "loss": 45.0915, "step": 1459 }, { "epoch": 34.76417910447761, "grad_norm": 24.436569213867188, "learning_rate": 6.380952380952381e-06, "loss": 45.5892, "step": 1460 }, { "epoch": 34.788059701492536, "grad_norm": 24.381568908691406, "learning_rate": 6.375e-06, "loss": 45.5295, "step": 1461 }, { "epoch": 34.811940298507466, "grad_norm": 19.64159393310547, "learning_rate": 6.369047619047619e-06, "loss": 46.244, "step": 1462 }, { "epoch": 34.83582089552239, "grad_norm": 27.420351028442383, "learning_rate": 6.363095238095238e-06, "loss": 45.9723, "step": 1463 }, { "epoch": 34.85970149253731, "grad_norm": 18.136165618896484, "learning_rate": 6.357142857142858e-06, "loss": 45.5106, "step": 1464 }, { "epoch": 34.88358208955224, "grad_norm": 21.70622444152832, "learning_rate": 6.3511904761904766e-06, "loss": 46.4965, "step": 1465 }, { "epoch": 34.907462686567165, "grad_norm": 23.573131561279297, "learning_rate": 6.3452380952380955e-06, "loss": 46.0698, "step": 1466 }, { "epoch": 34.93134328358209, "grad_norm": 21.20003890991211, "learning_rate": 6.3392857142857145e-06, "loss": 45.6992, "step": 1467 }, { "epoch": 34.95522388059702, "grad_norm": 23.745859146118164, "learning_rate": 6.333333333333333e-06, "loss": 45.8431, "step": 1468 }, { "epoch": 34.97910447761194, "grad_norm": 21.26241683959961, "learning_rate": 6.327380952380952e-06, "loss": 45.6577, "step": 1469 }, { "epoch": 35.0, "grad_norm": 22.033447265625, "learning_rate": 6.321428571428571e-06, "loss": 39.8491, "step": 1470 }, { "epoch": 35.02388059701492, "grad_norm": NaN, "learning_rate": 6.315476190476191e-06, "loss": 68.4405, "step": 1471 }, { "epoch": 35.04776119402985, "grad_norm": 22.06501007080078, "learning_rate": 6.315476190476191e-06, "loss": 44.971, "step": 1472 }, { "epoch": 35.071641791044776, "grad_norm": 23.923011779785156, "learning_rate": 6.30952380952381e-06, "loss": 45.4865, "step": 1473 }, { "epoch": 35.0955223880597, "grad_norm": 18.272428512573242, "learning_rate": 6.303571428571429e-06, "loss": 46.6551, "step": 1474 }, { "epoch": 35.11940298507463, "grad_norm": 23.046764373779297, "learning_rate": 6.297619047619048e-06, "loss": 46.3486, "step": 1475 }, { "epoch": 35.14328358208955, "grad_norm": 23.790733337402344, "learning_rate": 6.291666666666667e-06, "loss": 46.7032, "step": 1476 }, { "epoch": 35.167164179104475, "grad_norm": 23.891183853149414, "learning_rate": 6.285714285714286e-06, "loss": 44.9916, "step": 1477 }, { "epoch": 35.191044776119405, "grad_norm": 25.107316970825195, "learning_rate": 6.279761904761906e-06, "loss": 46.2358, "step": 1478 }, { "epoch": 35.21492537313433, "grad_norm": 20.48590660095215, "learning_rate": 6.2738095238095245e-06, "loss": 46.0048, "step": 1479 }, { "epoch": 35.23880597014925, "grad_norm": 25.425119400024414, "learning_rate": 6.2678571428571435e-06, "loss": 44.0941, "step": 1480 }, { "epoch": 35.26268656716418, "grad_norm": 28.264352798461914, "learning_rate": 6.261904761904762e-06, "loss": 46.5301, "step": 1481 }, { "epoch": 35.286567164179104, "grad_norm": 23.869232177734375, "learning_rate": 6.255952380952381e-06, "loss": 45.681, "step": 1482 }, { "epoch": 35.31044776119403, "grad_norm": 28.840408325195312, "learning_rate": 6.25e-06, "loss": 43.7517, "step": 1483 }, { "epoch": 35.33432835820896, "grad_norm": 26.768037796020508, "learning_rate": 6.244047619047619e-06, "loss": 46.1423, "step": 1484 }, { "epoch": 35.35820895522388, "grad_norm": 23.532470703125, "learning_rate": 6.238095238095239e-06, "loss": 45.6669, "step": 1485 }, { "epoch": 35.3820895522388, "grad_norm": 25.94774055480957, "learning_rate": 6.232142857142858e-06, "loss": 45.7672, "step": 1486 }, { "epoch": 35.40597014925373, "grad_norm": 23.215801239013672, "learning_rate": 6.226190476190477e-06, "loss": 45.6991, "step": 1487 }, { "epoch": 35.429850746268656, "grad_norm": 22.13661003112793, "learning_rate": 6.220238095238096e-06, "loss": 44.5214, "step": 1488 }, { "epoch": 35.45373134328358, "grad_norm": 24.596481323242188, "learning_rate": 6.214285714285715e-06, "loss": 46.1515, "step": 1489 }, { "epoch": 35.47761194029851, "grad_norm": 19.416872024536133, "learning_rate": 6.208333333333334e-06, "loss": 45.7596, "step": 1490 }, { "epoch": 35.50149253731343, "grad_norm": 23.993833541870117, "learning_rate": 6.202380952380953e-06, "loss": 46.1668, "step": 1491 }, { "epoch": 35.525373134328355, "grad_norm": 21.481637954711914, "learning_rate": 6.1964285714285725e-06, "loss": 45.1812, "step": 1492 }, { "epoch": 35.549253731343285, "grad_norm": 19.26917839050293, "learning_rate": 6.1904761904761914e-06, "loss": 45.9316, "step": 1493 }, { "epoch": 35.57313432835821, "grad_norm": 22.80115509033203, "learning_rate": 6.18452380952381e-06, "loss": 45.9088, "step": 1494 }, { "epoch": 35.59701492537313, "grad_norm": 21.33648109436035, "learning_rate": 6.178571428571429e-06, "loss": 46.7602, "step": 1495 }, { "epoch": 35.62089552238806, "grad_norm": 28.059947967529297, "learning_rate": 6.172619047619048e-06, "loss": 46.1767, "step": 1496 }, { "epoch": 35.644776119402984, "grad_norm": 21.1577205657959, "learning_rate": 6.166666666666667e-06, "loss": 45.6847, "step": 1497 }, { "epoch": 35.668656716417914, "grad_norm": 23.277509689331055, "learning_rate": 6.160714285714286e-06, "loss": 45.6145, "step": 1498 }, { "epoch": 35.69253731343284, "grad_norm": 16.815677642822266, "learning_rate": 6.154761904761906e-06, "loss": 45.515, "step": 1499 }, { "epoch": 35.71641791044776, "grad_norm": 24.218280792236328, "learning_rate": 6.148809523809525e-06, "loss": 47.6329, "step": 1500 }, { "epoch": 35.74029850746269, "grad_norm": 20.943737030029297, "learning_rate": 6.142857142857144e-06, "loss": 45.7388, "step": 1501 }, { "epoch": 35.76417910447761, "grad_norm": 20.344369888305664, "learning_rate": 6.136904761904763e-06, "loss": 45.9404, "step": 1502 }, { "epoch": 35.788059701492536, "grad_norm": 25.980487823486328, "learning_rate": 6.130952380952382e-06, "loss": 46.6928, "step": 1503 }, { "epoch": 35.811940298507466, "grad_norm": 19.285552978515625, "learning_rate": 6.125000000000001e-06, "loss": 46.4614, "step": 1504 }, { "epoch": 35.83582089552239, "grad_norm": 27.701011657714844, "learning_rate": 6.11904761904762e-06, "loss": 45.258, "step": 1505 }, { "epoch": 35.85970149253731, "grad_norm": 24.963760375976562, "learning_rate": 6.113095238095239e-06, "loss": 47.0721, "step": 1506 }, { "epoch": 35.88358208955224, "grad_norm": 25.08616828918457, "learning_rate": 6.107142857142858e-06, "loss": 45.9668, "step": 1507 }, { "epoch": 35.907462686567165, "grad_norm": 18.00580406188965, "learning_rate": 6.101190476190477e-06, "loss": 46.1049, "step": 1508 }, { "epoch": 35.93134328358209, "grad_norm": 24.686004638671875, "learning_rate": 6.095238095238096e-06, "loss": 46.6996, "step": 1509 }, { "epoch": 35.95522388059702, "grad_norm": 18.304157257080078, "learning_rate": 6.089285714285714e-06, "loss": 46.694, "step": 1510 }, { "epoch": 35.97910447761194, "grad_norm": 23.10132598876953, "learning_rate": 6.083333333333333e-06, "loss": 46.3807, "step": 1511 }, { "epoch": 36.0, "grad_norm": 19.077655792236328, "learning_rate": 6.077380952380952e-06, "loss": 41.1702, "step": 1512 }, { "epoch": 36.02388059701492, "grad_norm": 26.49584197998047, "learning_rate": 6.071428571428571e-06, "loss": 45.382, "step": 1513 }, { "epoch": 36.04776119402985, "grad_norm": 24.438323974609375, "learning_rate": 6.065476190476191e-06, "loss": 45.9433, "step": 1514 }, { "epoch": 36.071641791044776, "grad_norm": 30.8107852935791, "learning_rate": 6.05952380952381e-06, "loss": 45.6688, "step": 1515 }, { "epoch": 36.0955223880597, "grad_norm": 31.754154205322266, "learning_rate": 6.053571428571429e-06, "loss": 45.9768, "step": 1516 }, { "epoch": 36.11940298507463, "grad_norm": 26.034778594970703, "learning_rate": 6.047619047619048e-06, "loss": 46.022, "step": 1517 }, { "epoch": 36.14328358208955, "grad_norm": 31.643035888671875, "learning_rate": 6.041666666666667e-06, "loss": 44.5987, "step": 1518 }, { "epoch": 36.167164179104475, "grad_norm": 24.322874069213867, "learning_rate": 6.035714285714286e-06, "loss": 45.3774, "step": 1519 }, { "epoch": 36.191044776119405, "grad_norm": 29.067466735839844, "learning_rate": 6.029761904761905e-06, "loss": 46.1784, "step": 1520 }, { "epoch": 36.21492537313433, "grad_norm": 30.415788650512695, "learning_rate": 6.023809523809524e-06, "loss": 46.7259, "step": 1521 }, { "epoch": 36.23880597014925, "grad_norm": 19.417943954467773, "learning_rate": 6.017857142857143e-06, "loss": 46.0544, "step": 1522 }, { "epoch": 36.26268656716418, "grad_norm": 27.239500045776367, "learning_rate": 6.011904761904762e-06, "loss": 46.9344, "step": 1523 }, { "epoch": 36.286567164179104, "grad_norm": 27.671018600463867, "learning_rate": 6.005952380952381e-06, "loss": 45.78, "step": 1524 }, { "epoch": 36.31044776119403, "grad_norm": 25.103811264038086, "learning_rate": 6e-06, "loss": 45.7153, "step": 1525 }, { "epoch": 36.33432835820896, "grad_norm": 26.25937843322754, "learning_rate": 5.994047619047619e-06, "loss": 45.3151, "step": 1526 }, { "epoch": 36.35820895522388, "grad_norm": 18.400033950805664, "learning_rate": 5.988095238095238e-06, "loss": 46.5614, "step": 1527 }, { "epoch": 36.3820895522388, "grad_norm": 35.505374908447266, "learning_rate": 5.982142857142858e-06, "loss": 45.8805, "step": 1528 }, { "epoch": 36.40597014925373, "grad_norm": 31.476438522338867, "learning_rate": 5.976190476190477e-06, "loss": 46.189, "step": 1529 }, { "epoch": 36.429850746268656, "grad_norm": 26.192047119140625, "learning_rate": 5.970238095238096e-06, "loss": 45.7026, "step": 1530 }, { "epoch": 36.45373134328358, "grad_norm": 29.712961196899414, "learning_rate": 5.964285714285715e-06, "loss": 44.86, "step": 1531 }, { "epoch": 36.47761194029851, "grad_norm": 28.22374153137207, "learning_rate": 5.958333333333334e-06, "loss": 45.7644, "step": 1532 }, { "epoch": 36.50149253731343, "grad_norm": 23.614940643310547, "learning_rate": 5.9523809523809525e-06, "loss": 45.0373, "step": 1533 }, { "epoch": 36.525373134328355, "grad_norm": 27.78896141052246, "learning_rate": 5.9464285714285715e-06, "loss": 46.9277, "step": 1534 }, { "epoch": 36.549253731343285, "grad_norm": 18.64702606201172, "learning_rate": 5.940476190476191e-06, "loss": 45.277, "step": 1535 }, { "epoch": 36.57313432835821, "grad_norm": 27.2061710357666, "learning_rate": 5.93452380952381e-06, "loss": 46.8394, "step": 1536 }, { "epoch": 36.59701492537313, "grad_norm": 26.296287536621094, "learning_rate": 5.928571428571429e-06, "loss": 44.8519, "step": 1537 }, { "epoch": 36.62089552238806, "grad_norm": 26.594314575195312, "learning_rate": 5.922619047619048e-06, "loss": 45.1743, "step": 1538 }, { "epoch": 36.644776119402984, "grad_norm": 24.076461791992188, "learning_rate": 5.916666666666667e-06, "loss": 45.4145, "step": 1539 }, { "epoch": 36.668656716417914, "grad_norm": 23.31978416442871, "learning_rate": 5.910714285714286e-06, "loss": 45.7526, "step": 1540 }, { "epoch": 36.69253731343284, "grad_norm": 22.630998611450195, "learning_rate": 5.904761904761905e-06, "loss": 46.4197, "step": 1541 }, { "epoch": 36.71641791044776, "grad_norm": 32.66592025756836, "learning_rate": 5.898809523809525e-06, "loss": 45.0123, "step": 1542 }, { "epoch": 36.74029850746269, "grad_norm": 24.478839874267578, "learning_rate": 5.892857142857144e-06, "loss": 46.1418, "step": 1543 }, { "epoch": 36.76417910447761, "grad_norm": 33.325775146484375, "learning_rate": 5.886904761904763e-06, "loss": 45.8228, "step": 1544 }, { "epoch": 36.788059701492536, "grad_norm": 29.264528274536133, "learning_rate": 5.8809523809523816e-06, "loss": 46.1921, "step": 1545 }, { "epoch": 36.811940298507466, "grad_norm": 31.78297233581543, "learning_rate": 5.8750000000000005e-06, "loss": 45.4564, "step": 1546 }, { "epoch": 36.83582089552239, "grad_norm": 27.223127365112305, "learning_rate": 5.8690476190476194e-06, "loss": 45.5277, "step": 1547 }, { "epoch": 36.85970149253731, "grad_norm": 26.29422950744629, "learning_rate": 5.863095238095239e-06, "loss": 46.2285, "step": 1548 }, { "epoch": 36.88358208955224, "grad_norm": 27.933652877807617, "learning_rate": 5.857142857142858e-06, "loss": 46.4441, "step": 1549 }, { "epoch": 36.907462686567165, "grad_norm": 25.306129455566406, "learning_rate": 5.851190476190477e-06, "loss": 45.9724, "step": 1550 }, { "epoch": 36.93134328358209, "grad_norm": 23.481304168701172, "learning_rate": 5.845238095238096e-06, "loss": 46.2544, "step": 1551 }, { "epoch": 36.95522388059702, "grad_norm": 20.86615562438965, "learning_rate": 5.839285714285715e-06, "loss": 47.4502, "step": 1552 }, { "epoch": 36.97910447761194, "grad_norm": 21.519290924072266, "learning_rate": 5.833333333333334e-06, "loss": 45.0165, "step": 1553 }, { "epoch": 37.0, "grad_norm": 22.031705856323242, "learning_rate": 5.827380952380953e-06, "loss": 40.6199, "step": 1554 }, { "epoch": 37.02388059701492, "grad_norm": 29.273820877075195, "learning_rate": 5.821428571428573e-06, "loss": 46.5836, "step": 1555 }, { "epoch": 37.04776119402985, "grad_norm": 24.417945861816406, "learning_rate": 5.815476190476192e-06, "loss": 44.9549, "step": 1556 }, { "epoch": 37.071641791044776, "grad_norm": 24.60706901550293, "learning_rate": 5.8095238095238106e-06, "loss": 44.8607, "step": 1557 }, { "epoch": 37.0955223880597, "grad_norm": 24.76397132873535, "learning_rate": 5.8035714285714295e-06, "loss": 44.9875, "step": 1558 }, { "epoch": 37.11940298507463, "grad_norm": 24.380352020263672, "learning_rate": 5.7976190476190485e-06, "loss": 45.4835, "step": 1559 }, { "epoch": 37.14328358208955, "grad_norm": 19.852746963500977, "learning_rate": 5.791666666666667e-06, "loss": 45.1303, "step": 1560 }, { "epoch": 37.167164179104475, "grad_norm": 23.550888061523438, "learning_rate": 5.785714285714286e-06, "loss": 46.1086, "step": 1561 }, { "epoch": 37.191044776119405, "grad_norm": 24.31315803527832, "learning_rate": 5.7797619047619044e-06, "loss": 45.8181, "step": 1562 }, { "epoch": 37.21492537313433, "grad_norm": 19.324602127075195, "learning_rate": 5.773809523809523e-06, "loss": 44.8606, "step": 1563 }, { "epoch": 37.23880597014925, "grad_norm": 26.747098922729492, "learning_rate": 5.767857142857143e-06, "loss": 45.753, "step": 1564 }, { "epoch": 37.26268656716418, "grad_norm": 22.472572326660156, "learning_rate": 5.761904761904762e-06, "loss": 46.0156, "step": 1565 }, { "epoch": 37.286567164179104, "grad_norm": 20.813426971435547, "learning_rate": 5.755952380952381e-06, "loss": 46.7466, "step": 1566 }, { "epoch": 37.31044776119403, "grad_norm": 27.869413375854492, "learning_rate": 5.75e-06, "loss": 46.287, "step": 1567 }, { "epoch": 37.33432835820896, "grad_norm": 23.257444381713867, "learning_rate": 5.744047619047619e-06, "loss": 45.9862, "step": 1568 }, { "epoch": 37.35820895522388, "grad_norm": 24.715946197509766, "learning_rate": 5.738095238095238e-06, "loss": 47.3128, "step": 1569 }, { "epoch": 37.3820895522388, "grad_norm": 21.670385360717773, "learning_rate": 5.732142857142857e-06, "loss": 46.121, "step": 1570 }, { "epoch": 37.40597014925373, "grad_norm": 24.53063201904297, "learning_rate": 5.726190476190477e-06, "loss": 46.5441, "step": 1571 }, { "epoch": 37.429850746268656, "grad_norm": 19.584630966186523, "learning_rate": 5.7202380952380956e-06, "loss": 46.0683, "step": 1572 }, { "epoch": 37.45373134328358, "grad_norm": 26.179149627685547, "learning_rate": 5.7142857142857145e-06, "loss": 46.3294, "step": 1573 }, { "epoch": 37.47761194029851, "grad_norm": 21.13595199584961, "learning_rate": 5.7083333333333335e-06, "loss": 45.7853, "step": 1574 }, { "epoch": 37.50149253731343, "grad_norm": 28.440006256103516, "learning_rate": 5.702380952380952e-06, "loss": 46.5029, "step": 1575 }, { "epoch": 37.525373134328355, "grad_norm": 27.941879272460938, "learning_rate": 5.696428571428571e-06, "loss": 45.6132, "step": 1576 }, { "epoch": 37.549253731343285, "grad_norm": 25.952688217163086, "learning_rate": 5.690476190476191e-06, "loss": 45.6803, "step": 1577 }, { "epoch": 37.57313432835821, "grad_norm": 23.551633834838867, "learning_rate": 5.68452380952381e-06, "loss": 45.1563, "step": 1578 }, { "epoch": 37.59701492537313, "grad_norm": 23.119415283203125, "learning_rate": 5.678571428571429e-06, "loss": 47.2717, "step": 1579 }, { "epoch": 37.62089552238806, "grad_norm": 27.995214462280273, "learning_rate": 5.672619047619048e-06, "loss": 46.1847, "step": 1580 }, { "epoch": 37.644776119402984, "grad_norm": 28.0698299407959, "learning_rate": 5.666666666666667e-06, "loss": 46.4639, "step": 1581 }, { "epoch": 37.668656716417914, "grad_norm": 23.09457015991211, "learning_rate": 5.660714285714286e-06, "loss": 45.0939, "step": 1582 }, { "epoch": 37.69253731343284, "grad_norm": 25.94692611694336, "learning_rate": 5.654761904761905e-06, "loss": 45.216, "step": 1583 }, { "epoch": 37.71641791044776, "grad_norm": 20.192176818847656, "learning_rate": 5.648809523809525e-06, "loss": 45.7997, "step": 1584 }, { "epoch": 37.74029850746269, "grad_norm": 26.115283966064453, "learning_rate": 5.6428571428571435e-06, "loss": 44.8405, "step": 1585 }, { "epoch": 37.76417910447761, "grad_norm": 24.431346893310547, "learning_rate": 5.6369047619047625e-06, "loss": 46.5067, "step": 1586 }, { "epoch": 37.788059701492536, "grad_norm": 25.838623046875, "learning_rate": 5.630952380952381e-06, "loss": 46.1806, "step": 1587 }, { "epoch": 37.811940298507466, "grad_norm": 20.44222640991211, "learning_rate": 5.625e-06, "loss": 45.7445, "step": 1588 }, { "epoch": 37.83582089552239, "grad_norm": 19.459331512451172, "learning_rate": 5.619047619047619e-06, "loss": 45.7875, "step": 1589 }, { "epoch": 37.85970149253731, "grad_norm": 17.49920082092285, "learning_rate": 5.613095238095238e-06, "loss": 44.2889, "step": 1590 }, { "epoch": 37.88358208955224, "grad_norm": 18.541828155517578, "learning_rate": 5.607142857142858e-06, "loss": 46.7668, "step": 1591 }, { "epoch": 37.907462686567165, "grad_norm": 16.22308349609375, "learning_rate": 5.601190476190477e-06, "loss": 45.0406, "step": 1592 }, { "epoch": 37.93134328358209, "grad_norm": 21.068069458007812, "learning_rate": 5.595238095238096e-06, "loss": 44.0997, "step": 1593 }, { "epoch": 37.95522388059702, "grad_norm": 18.877992630004883, "learning_rate": 5.589285714285715e-06, "loss": 46.5816, "step": 1594 }, { "epoch": 37.97910447761194, "grad_norm": 20.14031410217285, "learning_rate": 5.583333333333334e-06, "loss": 44.8537, "step": 1595 }, { "epoch": 38.0, "grad_norm": 19.989953994750977, "learning_rate": 5.577380952380953e-06, "loss": 39.8501, "step": 1596 }, { "epoch": 38.02388059701492, "grad_norm": 23.484283447265625, "learning_rate": 5.571428571428572e-06, "loss": 46.3864, "step": 1597 }, { "epoch": 38.04776119402985, "grad_norm": 20.579587936401367, "learning_rate": 5.5654761904761915e-06, "loss": 46.1473, "step": 1598 }, { "epoch": 38.071641791044776, "grad_norm": 19.48423194885254, "learning_rate": 5.5595238095238104e-06, "loss": 45.3255, "step": 1599 }, { "epoch": 38.0955223880597, "grad_norm": 23.766077041625977, "learning_rate": 5.553571428571429e-06, "loss": 45.4387, "step": 1600 }, { "epoch": 38.11940298507463, "grad_norm": 17.605247497558594, "learning_rate": 5.547619047619048e-06, "loss": 46.1065, "step": 1601 }, { "epoch": 38.14328358208955, "grad_norm": 20.179826736450195, "learning_rate": 5.541666666666667e-06, "loss": 45.974, "step": 1602 }, { "epoch": 38.167164179104475, "grad_norm": 28.50605583190918, "learning_rate": 5.535714285714286e-06, "loss": 46.0505, "step": 1603 }, { "epoch": 38.191044776119405, "grad_norm": 16.770771026611328, "learning_rate": 5.529761904761905e-06, "loss": 46.4403, "step": 1604 }, { "epoch": 38.21492537313433, "grad_norm": NaN, "learning_rate": 5.523809523809525e-06, "loss": 69.3153, "step": 1605 }, { "epoch": 38.23880597014925, "grad_norm": 25.01431655883789, "learning_rate": 5.523809523809525e-06, "loss": 46.8119, "step": 1606 }, { "epoch": 38.26268656716418, "grad_norm": 20.459747314453125, "learning_rate": 5.517857142857144e-06, "loss": 47.7687, "step": 1607 }, { "epoch": 38.286567164179104, "grad_norm": 21.603086471557617, "learning_rate": 5.511904761904763e-06, "loss": 44.6093, "step": 1608 }, { "epoch": 38.31044776119403, "grad_norm": 25.284805297851562, "learning_rate": 5.505952380952382e-06, "loss": 45.0834, "step": 1609 }, { "epoch": 38.33432835820896, "grad_norm": 21.638917922973633, "learning_rate": 5.500000000000001e-06, "loss": 45.3904, "step": 1610 }, { "epoch": 38.35820895522388, "grad_norm": 22.443374633789062, "learning_rate": 5.49404761904762e-06, "loss": 43.7163, "step": 1611 }, { "epoch": 38.3820895522388, "grad_norm": 23.427288055419922, "learning_rate": 5.4880952380952394e-06, "loss": 44.7692, "step": 1612 }, { "epoch": 38.40597014925373, "grad_norm": 22.346813201904297, "learning_rate": 5.482142857142858e-06, "loss": 45.0674, "step": 1613 }, { "epoch": 38.429850746268656, "grad_norm": 20.567325592041016, "learning_rate": 5.476190476190477e-06, "loss": 45.5367, "step": 1614 }, { "epoch": 38.45373134328358, "grad_norm": 23.872394561767578, "learning_rate": 5.470238095238096e-06, "loss": 46.2728, "step": 1615 }, { "epoch": 38.47761194029851, "grad_norm": 23.790176391601562, "learning_rate": 5.464285714285714e-06, "loss": 46.3734, "step": 1616 }, { "epoch": 38.50149253731343, "grad_norm": 22.707136154174805, "learning_rate": 5.458333333333333e-06, "loss": 44.577, "step": 1617 }, { "epoch": 38.525373134328355, "grad_norm": 26.203781127929688, "learning_rate": 5.452380952380952e-06, "loss": 45.6794, "step": 1618 }, { "epoch": 38.549253731343285, "grad_norm": 22.935991287231445, "learning_rate": 5.446428571428571e-06, "loss": 45.7815, "step": 1619 }, { "epoch": 38.57313432835821, "grad_norm": 28.275053024291992, "learning_rate": 5.44047619047619e-06, "loss": 45.0312, "step": 1620 }, { "epoch": 38.59701492537313, "grad_norm": 23.848264694213867, "learning_rate": 5.43452380952381e-06, "loss": 46.7093, "step": 1621 }, { "epoch": 38.62089552238806, "grad_norm": 25.240819931030273, "learning_rate": 5.428571428571429e-06, "loss": 46.6751, "step": 1622 }, { "epoch": 38.644776119402984, "grad_norm": 26.2618350982666, "learning_rate": 5.422619047619048e-06, "loss": 47.5501, "step": 1623 }, { "epoch": 38.668656716417914, "grad_norm": 23.986392974853516, "learning_rate": 5.416666666666667e-06, "loss": 45.6208, "step": 1624 }, { "epoch": 38.69253731343284, "grad_norm": 22.11539077758789, "learning_rate": 5.410714285714286e-06, "loss": 44.4163, "step": 1625 }, { "epoch": 38.71641791044776, "grad_norm": 22.9071044921875, "learning_rate": 5.404761904761905e-06, "loss": 45.5715, "step": 1626 }, { "epoch": 38.74029850746269, "grad_norm": 22.759733200073242, "learning_rate": 5.398809523809524e-06, "loss": 45.1706, "step": 1627 }, { "epoch": 38.76417910447761, "grad_norm": 23.66644287109375, "learning_rate": 5.392857142857143e-06, "loss": 45.4343, "step": 1628 }, { "epoch": 38.788059701492536, "grad_norm": 20.179203033447266, "learning_rate": 5.386904761904762e-06, "loss": 45.9163, "step": 1629 }, { "epoch": 38.811940298507466, "grad_norm": 22.327817916870117, "learning_rate": 5.380952380952381e-06, "loss": 44.1558, "step": 1630 }, { "epoch": 38.83582089552239, "grad_norm": 22.10496711730957, "learning_rate": 5.375e-06, "loss": 45.764, "step": 1631 }, { "epoch": 38.85970149253731, "grad_norm": 24.25627326965332, "learning_rate": 5.369047619047619e-06, "loss": 46.6394, "step": 1632 }, { "epoch": 38.88358208955224, "grad_norm": 20.797740936279297, "learning_rate": 5.363095238095238e-06, "loss": 45.6251, "step": 1633 }, { "epoch": 38.907462686567165, "grad_norm": 24.14659309387207, "learning_rate": 5.357142857142857e-06, "loss": 45.5603, "step": 1634 }, { "epoch": 38.93134328358209, "grad_norm": 23.259584426879883, "learning_rate": 5.351190476190477e-06, "loss": 46.055, "step": 1635 }, { "epoch": 38.95522388059702, "grad_norm": 23.72128677368164, "learning_rate": 5.345238095238096e-06, "loss": 45.9729, "step": 1636 }, { "epoch": 38.97910447761194, "grad_norm": 22.746183395385742, "learning_rate": 5.339285714285715e-06, "loss": 46.1893, "step": 1637 }, { "epoch": 39.0, "grad_norm": 22.067306518554688, "learning_rate": 5.333333333333334e-06, "loss": 39.8095, "step": 1638 }, { "epoch": 39.02388059701492, "grad_norm": 22.888097763061523, "learning_rate": 5.327380952380953e-06, "loss": 45.3095, "step": 1639 }, { "epoch": 39.04776119402985, "grad_norm": 23.86408233642578, "learning_rate": 5.3214285714285715e-06, "loss": 46.3774, "step": 1640 }, { "epoch": 39.071641791044776, "grad_norm": 21.418088912963867, "learning_rate": 5.3154761904761905e-06, "loss": 45.6404, "step": 1641 }, { "epoch": 39.0955223880597, "grad_norm": 21.521831512451172, "learning_rate": 5.30952380952381e-06, "loss": 46.4895, "step": 1642 }, { "epoch": 39.11940298507463, "grad_norm": 20.189105987548828, "learning_rate": 5.303571428571429e-06, "loss": 44.6538, "step": 1643 }, { "epoch": 39.14328358208955, "grad_norm": 19.73761558532715, "learning_rate": 5.297619047619048e-06, "loss": 45.5941, "step": 1644 }, { "epoch": 39.167164179104475, "grad_norm": 25.631227493286133, "learning_rate": 5.291666666666667e-06, "loss": 44.4105, "step": 1645 }, { "epoch": 39.191044776119405, "grad_norm": 19.47798728942871, "learning_rate": 5.285714285714286e-06, "loss": 46.3286, "step": 1646 }, { "epoch": 39.21492537313433, "grad_norm": 19.627609252929688, "learning_rate": 5.279761904761905e-06, "loss": 46.0707, "step": 1647 }, { "epoch": 39.23880597014925, "grad_norm": 19.668777465820312, "learning_rate": 5.273809523809525e-06, "loss": 44.8447, "step": 1648 }, { "epoch": 39.26268656716418, "grad_norm": 23.311546325683594, "learning_rate": 5.267857142857144e-06, "loss": 45.9081, "step": 1649 }, { "epoch": 39.286567164179104, "grad_norm": 21.426624298095703, "learning_rate": 5.261904761904763e-06, "loss": 45.4256, "step": 1650 }, { "epoch": 39.31044776119403, "grad_norm": 19.545969009399414, "learning_rate": 5.255952380952382e-06, "loss": 45.4264, "step": 1651 }, { "epoch": 39.33432835820896, "grad_norm": 22.78704833984375, "learning_rate": 5.2500000000000006e-06, "loss": 47.6822, "step": 1652 }, { "epoch": 39.35820895522388, "grad_norm": 18.759178161621094, "learning_rate": 5.2440476190476195e-06, "loss": 44.5254, "step": 1653 }, { "epoch": 39.3820895522388, "grad_norm": 19.855981826782227, "learning_rate": 5.2380952380952384e-06, "loss": 46.249, "step": 1654 }, { "epoch": 39.40597014925373, "grad_norm": 18.817089080810547, "learning_rate": 5.232142857142858e-06, "loss": 45.2813, "step": 1655 }, { "epoch": 39.429850746268656, "grad_norm": 19.587581634521484, "learning_rate": 5.226190476190477e-06, "loss": 45.0445, "step": 1656 }, { "epoch": 39.45373134328358, "grad_norm": 19.9105167388916, "learning_rate": 5.220238095238096e-06, "loss": 46.8658, "step": 1657 }, { "epoch": 39.47761194029851, "grad_norm": 19.529748916625977, "learning_rate": 5.214285714285715e-06, "loss": 46.6175, "step": 1658 }, { "epoch": 39.50149253731343, "grad_norm": 18.63764762878418, "learning_rate": 5.208333333333334e-06, "loss": 46.3122, "step": 1659 }, { "epoch": 39.525373134328355, "grad_norm": 19.58228874206543, "learning_rate": 5.202380952380953e-06, "loss": 44.6263, "step": 1660 }, { "epoch": 39.549253731343285, "grad_norm": 21.451528549194336, "learning_rate": 5.196428571428572e-06, "loss": 46.2707, "step": 1661 }, { "epoch": 39.57313432835821, "grad_norm": 22.756628036499023, "learning_rate": 5.190476190476192e-06, "loss": 45.0001, "step": 1662 }, { "epoch": 39.59701492537313, "grad_norm": 24.481945037841797, "learning_rate": 5.184523809523811e-06, "loss": 45.3038, "step": 1663 }, { "epoch": 39.62089552238806, "grad_norm": 19.3010196685791, "learning_rate": 5.1785714285714296e-06, "loss": 46.1894, "step": 1664 }, { "epoch": 39.644776119402984, "grad_norm": 24.840822219848633, "learning_rate": 5.1726190476190485e-06, "loss": 46.6593, "step": 1665 }, { "epoch": 39.668656716417914, "grad_norm": 20.712875366210938, "learning_rate": 5.1666666666666675e-06, "loss": 46.7594, "step": 1666 }, { "epoch": 39.69253731343284, "grad_norm": 20.431598663330078, "learning_rate": 5.160714285714286e-06, "loss": 46.4969, "step": 1667 }, { "epoch": 39.71641791044776, "grad_norm": 21.094484329223633, "learning_rate": 5.1547619047619045e-06, "loss": 44.5114, "step": 1668 }, { "epoch": 39.74029850746269, "grad_norm": 22.929946899414062, "learning_rate": 5.1488095238095234e-06, "loss": 44.5251, "step": 1669 }, { "epoch": 39.76417910447761, "grad_norm": 17.285877227783203, "learning_rate": 5.142857142857142e-06, "loss": 45.312, "step": 1670 }, { "epoch": 39.788059701492536, "grad_norm": 18.29960823059082, "learning_rate": 5.136904761904762e-06, "loss": 44.984, "step": 1671 }, { "epoch": 39.811940298507466, "grad_norm": 25.79044532775879, "learning_rate": 5.130952380952381e-06, "loss": 44.8192, "step": 1672 }, { "epoch": 39.83582089552239, "grad_norm": 21.014759063720703, "learning_rate": 5.125e-06, "loss": 46.9319, "step": 1673 }, { "epoch": 39.85970149253731, "grad_norm": 24.91911506652832, "learning_rate": 5.119047619047619e-06, "loss": 46.7778, "step": 1674 }, { "epoch": 39.88358208955224, "grad_norm": 25.174942016601562, "learning_rate": 5.113095238095238e-06, "loss": 44.948, "step": 1675 }, { "epoch": 39.907462686567165, "grad_norm": 22.642148971557617, "learning_rate": 5.107142857142857e-06, "loss": 45.5964, "step": 1676 }, { "epoch": 39.93134328358209, "grad_norm": 24.867389678955078, "learning_rate": 5.101190476190476e-06, "loss": 45.446, "step": 1677 }, { "epoch": 39.95522388059702, "grad_norm": 21.888269424438477, "learning_rate": 5.095238095238096e-06, "loss": 45.414, "step": 1678 }, { "epoch": 39.97910447761194, "grad_norm": 25.071487426757812, "learning_rate": 5.0892857142857146e-06, "loss": 44.9464, "step": 1679 }, { "epoch": 40.0, "grad_norm": 19.389556884765625, "learning_rate": 5.0833333333333335e-06, "loss": 39.5515, "step": 1680 }, { "epoch": 40.0, "step": 1680, "total_flos": 8.26172747445074e+16, "train_loss": 23.38366504396711, "train_runtime": 26137.4766, "train_samples_per_second": 8.191, "train_steps_per_second": 0.064 }, { "epoch": 40.02388059701492, "grad_norm": 18.99544334411621, "learning_rate": 1e-05, "loss": 46.1194, "step": 1681 }, { "epoch": 40.04776119402985, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.6718, "step": 1682 }, { "epoch": 40.071641791044776, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.4703, "step": 1683 }, { "epoch": 40.0955223880597, "grad_norm": 416.26324462890625, "learning_rate": 9.996031746031746e-06, "loss": 53.5676, "step": 1684 }, { "epoch": 40.11940298507463, "grad_norm": 147.0504608154297, "learning_rate": 9.992063492063493e-06, "loss": 50.2561, "step": 1685 }, { "epoch": 40.14328358208955, "grad_norm": 122.7557601928711, "learning_rate": 9.988095238095239e-06, "loss": 50.4153, "step": 1686 }, { "epoch": 40.167164179104475, "grad_norm": 97.062744140625, "learning_rate": 9.984126984126986e-06, "loss": 47.2739, "step": 1687 }, { "epoch": 40.191044776119405, "grad_norm": 73.37904357910156, "learning_rate": 9.980158730158731e-06, "loss": 48.0252, "step": 1688 }, { "epoch": 40.21492537313433, "grad_norm": 68.98373413085938, "learning_rate": 9.976190476190477e-06, "loss": 47.6782, "step": 1689 }, { "epoch": 40.23880597014925, "grad_norm": 56.258548736572266, "learning_rate": 9.972222222222224e-06, "loss": 47.5786, "step": 1690 }, { "epoch": 40.26268656716418, "grad_norm": 68.9515609741211, "learning_rate": 9.968253968253969e-06, "loss": 46.3938, "step": 1691 }, { "epoch": 40.286567164179104, "grad_norm": 39.17803955078125, "learning_rate": 9.964285714285714e-06, "loss": 45.9047, "step": 1692 }, { "epoch": 40.31044776119403, "grad_norm": 51.936981201171875, "learning_rate": 9.960317460317462e-06, "loss": 45.6047, "step": 1693 }, { "epoch": 40.33432835820896, "grad_norm": 43.64280700683594, "learning_rate": 9.956349206349207e-06, "loss": 46.6234, "step": 1694 }, { "epoch": 40.35820895522388, "grad_norm": 58.56443405151367, "learning_rate": 9.952380952380954e-06, "loss": 47.255, "step": 1695 }, { "epoch": 40.3820895522388, "grad_norm": 37.53863525390625, "learning_rate": 9.9484126984127e-06, "loss": 47.1183, "step": 1696 }, { "epoch": 40.40597014925373, "grad_norm": 35.800628662109375, "learning_rate": 9.944444444444445e-06, "loss": 46.3602, "step": 1697 }, { "epoch": 40.429850746268656, "grad_norm": 39.58418655395508, "learning_rate": 9.940476190476192e-06, "loss": 46.3082, "step": 1698 }, { "epoch": 40.45373134328358, "grad_norm": 30.6373233795166, "learning_rate": 9.936507936507937e-06, "loss": 45.2231, "step": 1699 }, { "epoch": 40.47761194029851, "grad_norm": 34.47962951660156, "learning_rate": 9.932539682539684e-06, "loss": 46.3243, "step": 1700 }, { "epoch": 40.50149253731343, "grad_norm": 23.599184036254883, "learning_rate": 9.92857142857143e-06, "loss": 46.3045, "step": 1701 }, { "epoch": 40.525373134328355, "grad_norm": 27.183767318725586, "learning_rate": 9.924603174603175e-06, "loss": 45.3216, "step": 1702 }, { "epoch": 40.549253731343285, "grad_norm": 27.263038635253906, "learning_rate": 9.920634920634922e-06, "loss": 46.8117, "step": 1703 }, { "epoch": 40.57313432835821, "grad_norm": 30.570518493652344, "learning_rate": 9.916666666666668e-06, "loss": 46.0951, "step": 1704 }, { "epoch": 40.59701492537313, "grad_norm": 23.30783462524414, "learning_rate": 9.912698412698413e-06, "loss": 45.5407, "step": 1705 }, { "epoch": 40.62089552238806, "grad_norm": 29.269088745117188, "learning_rate": 9.90873015873016e-06, "loss": 45.9624, "step": 1706 }, { "epoch": 40.644776119402984, "grad_norm": NaN, "learning_rate": 9.904761904761906e-06, "loss": 75.1575, "step": 1707 }, { "epoch": 40.668656716417914, "grad_norm": 25.785404205322266, "learning_rate": 9.904761904761906e-06, "loss": 45.9263, "step": 1708 }, { "epoch": 40.69253731343284, "grad_norm": 34.729549407958984, "learning_rate": 9.900793650793653e-06, "loss": 45.5276, "step": 1709 }, { "epoch": 40.71641791044776, "grad_norm": 28.62750816345215, "learning_rate": 9.896825396825398e-06, "loss": 46.2797, "step": 1710 }, { "epoch": 40.74029850746269, "grad_norm": 31.081378936767578, "learning_rate": 9.892857142857143e-06, "loss": 45.1643, "step": 1711 }, { "epoch": 40.76417910447761, "grad_norm": 28.92620086669922, "learning_rate": 9.88888888888889e-06, "loss": 46.3105, "step": 1712 }, { "epoch": 40.788059701492536, "grad_norm": 23.232866287231445, "learning_rate": 9.884920634920636e-06, "loss": 46.6131, "step": 1713 }, { "epoch": 40.811940298507466, "grad_norm": 25.97928810119629, "learning_rate": 9.880952380952381e-06, "loss": 45.5054, "step": 1714 }, { "epoch": 40.83582089552239, "grad_norm": 27.382034301757812, "learning_rate": 9.876984126984128e-06, "loss": 45.593, "step": 1715 }, { "epoch": 40.85970149253731, "grad_norm": 23.762460708618164, "learning_rate": 9.873015873015874e-06, "loss": 45.7414, "step": 1716 }, { "epoch": 40.88358208955224, "grad_norm": 29.6158390045166, "learning_rate": 9.869047619047621e-06, "loss": 45.0669, "step": 1717 }, { "epoch": 40.907462686567165, "grad_norm": 24.66147804260254, "learning_rate": 9.865079365079366e-06, "loss": 45.6125, "step": 1718 }, { "epoch": 40.93134328358209, "grad_norm": 28.167495727539062, "learning_rate": 9.861111111111112e-06, "loss": 46.099, "step": 1719 }, { "epoch": 40.95522388059702, "grad_norm": 27.325531005859375, "learning_rate": 9.857142857142859e-06, "loss": 45.1728, "step": 1720 }, { "epoch": 40.97910447761194, "grad_norm": 23.650911331176758, "learning_rate": 9.853174603174604e-06, "loss": 44.6743, "step": 1721 }, { "epoch": 41.0, "grad_norm": 22.53518295288086, "learning_rate": 9.849206349206351e-06, "loss": 39.1464, "step": 1722 }, { "epoch": 41.02388059701492, "grad_norm": 28.995275497436523, "learning_rate": 9.845238095238097e-06, "loss": 44.5823, "step": 1723 }, { "epoch": 41.04776119402985, "grad_norm": 28.680805206298828, "learning_rate": 9.841269841269842e-06, "loss": 44.7002, "step": 1724 }, { "epoch": 41.071641791044776, "grad_norm": 24.10047149658203, "learning_rate": 9.837301587301588e-06, "loss": 46.232, "step": 1725 }, { "epoch": 41.0955223880597, "grad_norm": 25.722291946411133, "learning_rate": 9.833333333333333e-06, "loss": 45.1447, "step": 1726 }, { "epoch": 41.11940298507463, "grad_norm": 22.944278717041016, "learning_rate": 9.82936507936508e-06, "loss": 46.57, "step": 1727 }, { "epoch": 41.14328358208955, "grad_norm": 25.734941482543945, "learning_rate": 9.825396825396825e-06, "loss": 45.8386, "step": 1728 }, { "epoch": 41.167164179104475, "grad_norm": 23.644197463989258, "learning_rate": 9.821428571428573e-06, "loss": 46.2608, "step": 1729 }, { "epoch": 41.191044776119405, "grad_norm": 22.163721084594727, "learning_rate": 9.817460317460318e-06, "loss": 45.2914, "step": 1730 }, { "epoch": 41.21492537313433, "grad_norm": 33.71270751953125, "learning_rate": 9.813492063492063e-06, "loss": 44.6372, "step": 1731 }, { "epoch": 41.23880597014925, "grad_norm": 28.478361129760742, "learning_rate": 9.80952380952381e-06, "loss": 45.472, "step": 1732 }, { "epoch": 41.26268656716418, "grad_norm": 27.120990753173828, "learning_rate": 9.805555555555556e-06, "loss": 46.445, "step": 1733 }, { "epoch": 41.286567164179104, "grad_norm": 25.342784881591797, "learning_rate": 9.801587301587301e-06, "loss": 45.8317, "step": 1734 }, { "epoch": 41.31044776119403, "grad_norm": 28.94765853881836, "learning_rate": 9.797619047619048e-06, "loss": 46.0677, "step": 1735 }, { "epoch": 41.33432835820896, "grad_norm": 22.983802795410156, "learning_rate": 9.793650793650794e-06, "loss": 45.8029, "step": 1736 }, { "epoch": 41.35820895522388, "grad_norm": 24.97469711303711, "learning_rate": 9.78968253968254e-06, "loss": 46.7215, "step": 1737 }, { "epoch": 41.3820895522388, "grad_norm": 26.136960983276367, "learning_rate": 9.785714285714286e-06, "loss": 45.8042, "step": 1738 }, { "epoch": 41.40597014925373, "grad_norm": 21.150083541870117, "learning_rate": 9.781746031746032e-06, "loss": 45.836, "step": 1739 }, { "epoch": 41.429850746268656, "grad_norm": 19.56538963317871, "learning_rate": 9.777777777777779e-06, "loss": 46.0126, "step": 1740 }, { "epoch": 41.45373134328358, "grad_norm": 26.608108520507812, "learning_rate": 9.773809523809524e-06, "loss": 45.3108, "step": 1741 }, { "epoch": 41.47761194029851, "grad_norm": 19.020097732543945, "learning_rate": 9.769841269841271e-06, "loss": 46.278, "step": 1742 }, { "epoch": 41.50149253731343, "grad_norm": 25.4818172454834, "learning_rate": 9.765873015873017e-06, "loss": 45.8142, "step": 1743 }, { "epoch": 41.525373134328355, "grad_norm": 21.7120304107666, "learning_rate": 9.761904761904762e-06, "loss": 46.0221, "step": 1744 }, { "epoch": 41.549253731343285, "grad_norm": 24.395984649658203, "learning_rate": 9.757936507936509e-06, "loss": 45.3654, "step": 1745 }, { "epoch": 41.57313432835821, "grad_norm": 26.8757381439209, "learning_rate": 9.753968253968254e-06, "loss": 46.0073, "step": 1746 }, { "epoch": 41.59701492537313, "grad_norm": 27.31254768371582, "learning_rate": 9.75e-06, "loss": 45.465, "step": 1747 }, { "epoch": 41.62089552238806, "grad_norm": 23.271629333496094, "learning_rate": 9.746031746031747e-06, "loss": 46.0739, "step": 1748 }, { "epoch": 41.644776119402984, "grad_norm": 24.240131378173828, "learning_rate": 9.742063492063492e-06, "loss": 45.6978, "step": 1749 }, { "epoch": 41.668656716417914, "grad_norm": 23.16962242126465, "learning_rate": 9.73809523809524e-06, "loss": 45.9961, "step": 1750 }, { "epoch": 41.69253731343284, "grad_norm": 29.63677406311035, "learning_rate": 9.734126984126985e-06, "loss": 45.0859, "step": 1751 }, { "epoch": 41.71641791044776, "grad_norm": 20.725126266479492, "learning_rate": 9.73015873015873e-06, "loss": 45.3546, "step": 1752 }, { "epoch": 41.74029850746269, "grad_norm": 23.172834396362305, "learning_rate": 9.726190476190477e-06, "loss": 45.3822, "step": 1753 }, { "epoch": 41.76417910447761, "grad_norm": 30.179182052612305, "learning_rate": 9.722222222222223e-06, "loss": 45.0901, "step": 1754 }, { "epoch": 41.788059701492536, "grad_norm": 17.276126861572266, "learning_rate": 9.71825396825397e-06, "loss": 45.4555, "step": 1755 }, { "epoch": 41.811940298507466, "grad_norm": 24.585174560546875, "learning_rate": 9.714285714285715e-06, "loss": 43.8513, "step": 1756 }, { "epoch": 41.83582089552239, "grad_norm": 23.242969512939453, "learning_rate": 9.71031746031746e-06, "loss": 45.7996, "step": 1757 }, { "epoch": 41.85970149253731, "grad_norm": 21.585342407226562, "learning_rate": 9.706349206349208e-06, "loss": 45.2616, "step": 1758 }, { "epoch": 41.88358208955224, "grad_norm": 28.802600860595703, "learning_rate": 9.702380952380953e-06, "loss": 45.6062, "step": 1759 }, { "epoch": 41.907462686567165, "grad_norm": 23.895822525024414, "learning_rate": 9.698412698412698e-06, "loss": 44.3029, "step": 1760 }, { "epoch": 41.93134328358209, "grad_norm": 26.175247192382812, "learning_rate": 9.694444444444446e-06, "loss": 45.6048, "step": 1761 }, { "epoch": 41.95522388059702, "grad_norm": 23.499914169311523, "learning_rate": 9.690476190476191e-06, "loss": 45.4891, "step": 1762 }, { "epoch": 41.97910447761194, "grad_norm": 22.244211196899414, "learning_rate": 9.686507936507938e-06, "loss": 44.1723, "step": 1763 }, { "epoch": 42.0, "grad_norm": 20.29228401184082, "learning_rate": 9.682539682539683e-06, "loss": 39.7896, "step": 1764 }, { "epoch": 42.02388059701492, "grad_norm": 27.773515701293945, "learning_rate": 9.678571428571429e-06, "loss": 45.7383, "step": 1765 }, { "epoch": 42.04776119402985, "grad_norm": 27.289716720581055, "learning_rate": 9.674603174603176e-06, "loss": 45.2073, "step": 1766 }, { "epoch": 42.071641791044776, "grad_norm": 21.16016387939453, "learning_rate": 9.670634920634921e-06, "loss": 45.3415, "step": 1767 }, { "epoch": 42.0955223880597, "grad_norm": 28.878597259521484, "learning_rate": 9.666666666666667e-06, "loss": 45.1139, "step": 1768 }, { "epoch": 42.11940298507463, "grad_norm": 29.504600524902344, "learning_rate": 9.662698412698414e-06, "loss": 46.185, "step": 1769 }, { "epoch": 42.14328358208955, "grad_norm": 20.372560501098633, "learning_rate": 9.65873015873016e-06, "loss": 46.4996, "step": 1770 }, { "epoch": 42.167164179104475, "grad_norm": 27.437274932861328, "learning_rate": 9.654761904761906e-06, "loss": 43.77, "step": 1771 }, { "epoch": 42.191044776119405, "grad_norm": 23.735233306884766, "learning_rate": 9.650793650793652e-06, "loss": 43.9415, "step": 1772 }, { "epoch": 42.21492537313433, "grad_norm": 26.434886932373047, "learning_rate": 9.646825396825397e-06, "loss": 46.6163, "step": 1773 }, { "epoch": 42.23880597014925, "grad_norm": 26.843782424926758, "learning_rate": 9.642857142857144e-06, "loss": 46.1987, "step": 1774 }, { "epoch": 42.26268656716418, "grad_norm": 25.86046600341797, "learning_rate": 9.63888888888889e-06, "loss": 46.635, "step": 1775 }, { "epoch": 42.286567164179104, "grad_norm": 25.95208740234375, "learning_rate": 9.634920634920637e-06, "loss": 44.6339, "step": 1776 }, { "epoch": 42.31044776119403, "grad_norm": 21.243392944335938, "learning_rate": 9.630952380952382e-06, "loss": 45.1151, "step": 1777 }, { "epoch": 42.33432835820896, "grad_norm": 22.445972442626953, "learning_rate": 9.626984126984127e-06, "loss": 45.1704, "step": 1778 }, { "epoch": 42.35820895522388, "grad_norm": 37.871681213378906, "learning_rate": 9.623015873015875e-06, "loss": 45.116, "step": 1779 }, { "epoch": 42.3820895522388, "grad_norm": 25.75882339477539, "learning_rate": 9.61904761904762e-06, "loss": 45.2748, "step": 1780 }, { "epoch": 42.40597014925373, "grad_norm": 32.44329071044922, "learning_rate": 9.615079365079365e-06, "loss": 45.0782, "step": 1781 }, { "epoch": 42.429850746268656, "grad_norm": 25.74696159362793, "learning_rate": 9.611111111111112e-06, "loss": 46.1405, "step": 1782 }, { "epoch": 42.45373134328358, "grad_norm": 44.88374710083008, "learning_rate": 9.607142857142858e-06, "loss": 45.7843, "step": 1783 }, { "epoch": 42.47761194029851, "grad_norm": 29.956615447998047, "learning_rate": 9.603174603174605e-06, "loss": 46.7361, "step": 1784 }, { "epoch": 42.50149253731343, "grad_norm": 41.191864013671875, "learning_rate": 9.59920634920635e-06, "loss": 45.7368, "step": 1785 }, { "epoch": 42.525373134328355, "grad_norm": 32.30370330810547, "learning_rate": 9.595238095238096e-06, "loss": 45.4091, "step": 1786 }, { "epoch": 42.549253731343285, "grad_norm": 32.65694046020508, "learning_rate": 9.591269841269843e-06, "loss": 44.8837, "step": 1787 }, { "epoch": 42.57313432835821, "grad_norm": 29.783634185791016, "learning_rate": 9.587301587301588e-06, "loss": 46.0239, "step": 1788 }, { "epoch": 42.59701492537313, "grad_norm": 32.415035247802734, "learning_rate": 9.583333333333335e-06, "loss": 44.7968, "step": 1789 }, { "epoch": 42.62089552238806, "grad_norm": 31.461589813232422, "learning_rate": 9.57936507936508e-06, "loss": 44.5408, "step": 1790 }, { "epoch": 42.644776119402984, "grad_norm": 27.083560943603516, "learning_rate": 9.575396825396826e-06, "loss": 44.9716, "step": 1791 }, { "epoch": 42.668656716417914, "grad_norm": 34.453102111816406, "learning_rate": 9.571428571428573e-06, "loss": 44.8527, "step": 1792 }, { "epoch": 42.69253731343284, "grad_norm": 24.403902053833008, "learning_rate": 9.567460317460319e-06, "loss": 44.6635, "step": 1793 }, { "epoch": 42.71641791044776, "grad_norm": 43.89455795288086, "learning_rate": 9.563492063492064e-06, "loss": 45.9798, "step": 1794 }, { "epoch": 42.74029850746269, "grad_norm": 33.704498291015625, "learning_rate": 9.559523809523811e-06, "loss": 45.8182, "step": 1795 }, { "epoch": 42.76417910447761, "grad_norm": 38.266357421875, "learning_rate": 9.555555555555556e-06, "loss": 44.8923, "step": 1796 }, { "epoch": 42.788059701492536, "grad_norm": 36.38774490356445, "learning_rate": 9.551587301587304e-06, "loss": 45.5987, "step": 1797 }, { "epoch": 42.811940298507466, "grad_norm": 33.449737548828125, "learning_rate": 9.547619047619049e-06, "loss": 46.2494, "step": 1798 }, { "epoch": 42.83582089552239, "grad_norm": 29.902509689331055, "learning_rate": 9.543650793650794e-06, "loss": 44.7438, "step": 1799 }, { "epoch": 42.85970149253731, "grad_norm": 35.025184631347656, "learning_rate": 9.539682539682541e-06, "loss": 44.7825, "step": 1800 }, { "epoch": 42.88358208955224, "grad_norm": 30.783037185668945, "learning_rate": 9.535714285714287e-06, "loss": 45.3493, "step": 1801 }, { "epoch": 42.907462686567165, "grad_norm": 28.61165428161621, "learning_rate": 9.531746031746032e-06, "loss": 46.5537, "step": 1802 }, { "epoch": 42.93134328358209, "grad_norm": 34.27008056640625, "learning_rate": 9.527777777777778e-06, "loss": 44.0439, "step": 1803 }, { "epoch": 42.95522388059702, "grad_norm": 31.05691146850586, "learning_rate": 9.523809523809525e-06, "loss": 46.1128, "step": 1804 }, { "epoch": 42.97910447761194, "grad_norm": 28.658565521240234, "learning_rate": 9.51984126984127e-06, "loss": 46.2442, "step": 1805 }, { "epoch": 43.0, "grad_norm": 20.02385139465332, "learning_rate": 9.515873015873016e-06, "loss": 39.8537, "step": 1806 }, { "epoch": 43.02388059701492, "grad_norm": 32.5422248840332, "learning_rate": 9.511904761904763e-06, "loss": 44.0765, "step": 1807 }, { "epoch": 43.04776119402985, "grad_norm": 22.364904403686523, "learning_rate": 9.507936507936508e-06, "loss": 45.6789, "step": 1808 }, { "epoch": 43.071641791044776, "grad_norm": 35.576072692871094, "learning_rate": 9.503968253968255e-06, "loss": 45.6707, "step": 1809 }, { "epoch": 43.0955223880597, "grad_norm": 27.892908096313477, "learning_rate": 9.5e-06, "loss": 46.348, "step": 1810 }, { "epoch": 43.11940298507463, "grad_norm": 22.283756256103516, "learning_rate": 9.496031746031746e-06, "loss": 44.8757, "step": 1811 }, { "epoch": 43.14328358208955, "grad_norm": 34.38758087158203, "learning_rate": 9.492063492063493e-06, "loss": 45.0544, "step": 1812 }, { "epoch": 43.167164179104475, "grad_norm": 26.720060348510742, "learning_rate": 9.488095238095238e-06, "loss": 46.2092, "step": 1813 }, { "epoch": 43.191044776119405, "grad_norm": 35.375362396240234, "learning_rate": 9.484126984126984e-06, "loss": 46.0173, "step": 1814 }, { "epoch": 43.21492537313433, "grad_norm": 24.92397117614746, "learning_rate": 9.480158730158731e-06, "loss": 45.5031, "step": 1815 }, { "epoch": 43.23880597014925, "grad_norm": 35.76795959472656, "learning_rate": 9.476190476190476e-06, "loss": 44.8149, "step": 1816 }, { "epoch": 43.26268656716418, "grad_norm": 29.861675262451172, "learning_rate": 9.472222222222223e-06, "loss": 45.4173, "step": 1817 }, { "epoch": 43.286567164179104, "grad_norm": 33.83314895629883, "learning_rate": 9.468253968253969e-06, "loss": 44.8036, "step": 1818 }, { "epoch": 43.31044776119403, "grad_norm": 32.994483947753906, "learning_rate": 9.464285714285714e-06, "loss": 46.5555, "step": 1819 }, { "epoch": 43.33432835820896, "grad_norm": 23.94085693359375, "learning_rate": 9.460317460317461e-06, "loss": 45.9566, "step": 1820 }, { "epoch": 43.35820895522388, "grad_norm": 34.10947799682617, "learning_rate": 9.456349206349207e-06, "loss": 45.2182, "step": 1821 }, { "epoch": 43.3820895522388, "grad_norm": 23.844850540161133, "learning_rate": 9.452380952380952e-06, "loss": 45.5904, "step": 1822 }, { "epoch": 43.40597014925373, "grad_norm": 46.643768310546875, "learning_rate": 9.4484126984127e-06, "loss": 46.6924, "step": 1823 }, { "epoch": 43.429850746268656, "grad_norm": 32.49457931518555, "learning_rate": 9.444444444444445e-06, "loss": 44.024, "step": 1824 }, { "epoch": 43.45373134328358, "grad_norm": 32.36979293823242, "learning_rate": 9.440476190476192e-06, "loss": 45.5752, "step": 1825 }, { "epoch": 43.47761194029851, "grad_norm": 28.21212387084961, "learning_rate": 9.436507936507937e-06, "loss": 43.3649, "step": 1826 }, { "epoch": 43.50149253731343, "grad_norm": 30.494169235229492, "learning_rate": 9.432539682539682e-06, "loss": 44.4726, "step": 1827 }, { "epoch": 43.525373134328355, "grad_norm": 29.817806243896484, "learning_rate": 9.42857142857143e-06, "loss": 45.4315, "step": 1828 }, { "epoch": 43.549253731343285, "grad_norm": 32.68490219116211, "learning_rate": 9.424603174603175e-06, "loss": 45.1985, "step": 1829 }, { "epoch": 43.57313432835821, "grad_norm": 28.48166275024414, "learning_rate": 9.420634920634922e-06, "loss": 45.6737, "step": 1830 }, { "epoch": 43.59701492537313, "grad_norm": 30.532995223999023, "learning_rate": 9.416666666666667e-06, "loss": 45.9931, "step": 1831 }, { "epoch": 43.62089552238806, "grad_norm": 24.953765869140625, "learning_rate": 9.412698412698413e-06, "loss": 44.4189, "step": 1832 }, { "epoch": 43.644776119402984, "grad_norm": 23.647258758544922, "learning_rate": 9.40873015873016e-06, "loss": 44.6757, "step": 1833 }, { "epoch": 43.668656716417914, "grad_norm": 28.41623878479004, "learning_rate": 9.404761904761905e-06, "loss": 45.5732, "step": 1834 }, { "epoch": 43.69253731343284, "grad_norm": 25.599082946777344, "learning_rate": 9.40079365079365e-06, "loss": 45.0382, "step": 1835 }, { "epoch": 43.71641791044776, "grad_norm": 39.584144592285156, "learning_rate": 9.396825396825398e-06, "loss": 45.1531, "step": 1836 }, { "epoch": 43.74029850746269, "grad_norm": 30.606550216674805, "learning_rate": 9.392857142857143e-06, "loss": 44.3696, "step": 1837 }, { "epoch": 43.76417910447761, "grad_norm": 37.25154495239258, "learning_rate": 9.38888888888889e-06, "loss": 45.4297, "step": 1838 }, { "epoch": 43.788059701492536, "grad_norm": 30.60915184020996, "learning_rate": 9.384920634920636e-06, "loss": 45.2441, "step": 1839 }, { "epoch": 43.811940298507466, "grad_norm": 32.886268615722656, "learning_rate": 9.380952380952381e-06, "loss": 45.3913, "step": 1840 }, { "epoch": 43.83582089552239, "grad_norm": 27.98761749267578, "learning_rate": 9.376984126984128e-06, "loss": 45.2191, "step": 1841 }, { "epoch": 43.85970149253731, "grad_norm": 33.787261962890625, "learning_rate": 9.373015873015874e-06, "loss": 45.0051, "step": 1842 }, { "epoch": 43.88358208955224, "grad_norm": 26.90253257751465, "learning_rate": 9.36904761904762e-06, "loss": 45.9333, "step": 1843 }, { "epoch": 43.907462686567165, "grad_norm": 29.78704261779785, "learning_rate": 9.365079365079366e-06, "loss": 45.3598, "step": 1844 }, { "epoch": 43.93134328358209, "grad_norm": 24.871315002441406, "learning_rate": 9.361111111111111e-06, "loss": 44.4159, "step": 1845 }, { "epoch": 43.95522388059702, "grad_norm": 22.998323440551758, "learning_rate": 9.357142857142859e-06, "loss": 44.9989, "step": 1846 }, { "epoch": 43.97910447761194, "grad_norm": 28.724388122558594, "learning_rate": 9.353174603174604e-06, "loss": 46.0748, "step": 1847 }, { "epoch": 44.0, "grad_norm": 19.870426177978516, "learning_rate": 9.34920634920635e-06, "loss": 41.7823, "step": 1848 }, { "epoch": 44.02388059701492, "grad_norm": 28.000728607177734, "learning_rate": 9.345238095238096e-06, "loss": 44.3686, "step": 1849 }, { "epoch": 44.04776119402985, "grad_norm": 25.330766677856445, "learning_rate": 9.341269841269842e-06, "loss": 44.8766, "step": 1850 }, { "epoch": 44.071641791044776, "grad_norm": 30.589149475097656, "learning_rate": 9.337301587301589e-06, "loss": 46.3777, "step": 1851 }, { "epoch": 44.0955223880597, "grad_norm": 27.803207397460938, "learning_rate": 9.333333333333334e-06, "loss": 45.7627, "step": 1852 }, { "epoch": 44.11940298507463, "grad_norm": 28.11823081970215, "learning_rate": 9.32936507936508e-06, "loss": 44.9612, "step": 1853 }, { "epoch": 44.14328358208955, "grad_norm": 24.24823570251465, "learning_rate": 9.325396825396827e-06, "loss": 45.6512, "step": 1854 }, { "epoch": 44.167164179104475, "grad_norm": 29.81229019165039, "learning_rate": 9.321428571428572e-06, "loss": 43.7322, "step": 1855 }, { "epoch": 44.191044776119405, "grad_norm": 24.88245964050293, "learning_rate": 9.317460317460318e-06, "loss": 45.288, "step": 1856 }, { "epoch": 44.21492537313433, "grad_norm": 31.246389389038086, "learning_rate": 9.313492063492065e-06, "loss": 44.6547, "step": 1857 }, { "epoch": 44.23880597014925, "grad_norm": 29.363845825195312, "learning_rate": 9.30952380952381e-06, "loss": 44.7851, "step": 1858 }, { "epoch": 44.26268656716418, "grad_norm": 32.35028839111328, "learning_rate": 9.305555555555557e-06, "loss": 44.5643, "step": 1859 }, { "epoch": 44.286567164179104, "grad_norm": 31.52218246459961, "learning_rate": 9.301587301587303e-06, "loss": 45.293, "step": 1860 }, { "epoch": 44.31044776119403, "grad_norm": 29.180295944213867, "learning_rate": 9.297619047619048e-06, "loss": 45.298, "step": 1861 }, { "epoch": 44.33432835820896, "grad_norm": 27.626508712768555, "learning_rate": 9.293650793650795e-06, "loss": 45.1187, "step": 1862 }, { "epoch": 44.35820895522388, "grad_norm": 28.44379425048828, "learning_rate": 9.28968253968254e-06, "loss": 45.0835, "step": 1863 }, { "epoch": 44.3820895522388, "grad_norm": 29.45343017578125, "learning_rate": 9.285714285714288e-06, "loss": 45.5642, "step": 1864 }, { "epoch": 44.40597014925373, "grad_norm": 21.64850425720215, "learning_rate": 9.281746031746033e-06, "loss": 45.6837, "step": 1865 }, { "epoch": 44.429850746268656, "grad_norm": 35.32088088989258, "learning_rate": 9.277777777777778e-06, "loss": 44.9266, "step": 1866 }, { "epoch": 44.45373134328358, "grad_norm": 28.638429641723633, "learning_rate": 9.273809523809525e-06, "loss": 45.9407, "step": 1867 }, { "epoch": 44.47761194029851, "grad_norm": 31.444725036621094, "learning_rate": 9.26984126984127e-06, "loss": 45.4442, "step": 1868 }, { "epoch": 44.50149253731343, "grad_norm": 26.114784240722656, "learning_rate": 9.265873015873016e-06, "loss": 45.1998, "step": 1869 }, { "epoch": 44.525373134328355, "grad_norm": 24.51571273803711, "learning_rate": 9.261904761904763e-06, "loss": 45.0705, "step": 1870 }, { "epoch": 44.549253731343285, "grad_norm": 24.52007293701172, "learning_rate": 9.257936507936509e-06, "loss": 43.9359, "step": 1871 }, { "epoch": 44.57313432835821, "grad_norm": 17.876834869384766, "learning_rate": 9.253968253968256e-06, "loss": 44.5254, "step": 1872 }, { "epoch": 44.59701492537313, "grad_norm": 21.0299015045166, "learning_rate": 9.250000000000001e-06, "loss": 46.0916, "step": 1873 }, { "epoch": 44.62089552238806, "grad_norm": 30.12071990966797, "learning_rate": 9.246031746031747e-06, "loss": 44.1769, "step": 1874 }, { "epoch": 44.644776119402984, "grad_norm": 23.94618797302246, "learning_rate": 9.242063492063494e-06, "loss": 45.628, "step": 1875 }, { "epoch": 44.668656716417914, "grad_norm": 29.615930557250977, "learning_rate": 9.238095238095239e-06, "loss": 45.2762, "step": 1876 }, { "epoch": 44.69253731343284, "grad_norm": 30.00957489013672, "learning_rate": 9.234126984126986e-06, "loss": 45.6399, "step": 1877 }, { "epoch": 44.71641791044776, "grad_norm": 26.414703369140625, "learning_rate": 9.230158730158732e-06, "loss": 44.6988, "step": 1878 }, { "epoch": 44.74029850746269, "grad_norm": 28.785755157470703, "learning_rate": 9.226190476190477e-06, "loss": 45.4551, "step": 1879 }, { "epoch": 44.76417910447761, "grad_norm": 23.4616756439209, "learning_rate": 9.222222222222224e-06, "loss": 44.8668, "step": 1880 }, { "epoch": 44.788059701492536, "grad_norm": 25.046113967895508, "learning_rate": 9.218253968253968e-06, "loss": 45.8905, "step": 1881 }, { "epoch": 44.811940298507466, "grad_norm": 31.216581344604492, "learning_rate": 9.214285714285715e-06, "loss": 46.6996, "step": 1882 }, { "epoch": 44.83582089552239, "grad_norm": 22.215465545654297, "learning_rate": 9.21031746031746e-06, "loss": 46.1791, "step": 1883 }, { "epoch": 44.85970149253731, "grad_norm": 33.831214904785156, "learning_rate": 9.206349206349207e-06, "loss": 45.3197, "step": 1884 }, { "epoch": 44.88358208955224, "grad_norm": 24.447084426879883, "learning_rate": 9.202380952380953e-06, "loss": 45.2949, "step": 1885 }, { "epoch": 44.907462686567165, "grad_norm": 31.735240936279297, "learning_rate": 9.198412698412698e-06, "loss": 46.4555, "step": 1886 }, { "epoch": 44.93134328358209, "grad_norm": 27.23394012451172, "learning_rate": 9.194444444444445e-06, "loss": 45.9441, "step": 1887 }, { "epoch": 44.95522388059702, "grad_norm": 27.79869270324707, "learning_rate": 9.19047619047619e-06, "loss": 45.387, "step": 1888 }, { "epoch": 44.97910447761194, "grad_norm": 24.329313278198242, "learning_rate": 9.186507936507936e-06, "loss": 44.2934, "step": 1889 }, { "epoch": 45.0, "grad_norm": 22.191181182861328, "learning_rate": 9.182539682539683e-06, "loss": 39.0195, "step": 1890 }, { "epoch": 45.02388059701492, "grad_norm": 32.1130256652832, "learning_rate": 9.178571428571429e-06, "loss": 45.9272, "step": 1891 }, { "epoch": 45.04776119402985, "grad_norm": 19.621145248413086, "learning_rate": 9.174603174603176e-06, "loss": 44.3447, "step": 1892 }, { "epoch": 45.071641791044776, "grad_norm": 39.10493087768555, "learning_rate": 9.170634920634921e-06, "loss": 46.001, "step": 1893 }, { "epoch": 45.0955223880597, "grad_norm": 23.7473201751709, "learning_rate": 9.166666666666666e-06, "loss": 45.5786, "step": 1894 }, { "epoch": 45.11940298507463, "grad_norm": 30.535781860351562, "learning_rate": 9.162698412698414e-06, "loss": 46.3373, "step": 1895 }, { "epoch": 45.14328358208955, "grad_norm": 26.53186798095703, "learning_rate": 9.158730158730159e-06, "loss": 44.6074, "step": 1896 }, { "epoch": 45.167164179104475, "grad_norm": 31.9615478515625, "learning_rate": 9.154761904761906e-06, "loss": 43.229, "step": 1897 }, { "epoch": 45.191044776119405, "grad_norm": 28.577655792236328, "learning_rate": 9.150793650793651e-06, "loss": 45.6407, "step": 1898 }, { "epoch": 45.21492537313433, "grad_norm": 18.897531509399414, "learning_rate": 9.146825396825397e-06, "loss": 45.4092, "step": 1899 }, { "epoch": 45.23880597014925, "grad_norm": 31.126819610595703, "learning_rate": 9.142857142857144e-06, "loss": 43.966, "step": 1900 }, { "epoch": 45.26268656716418, "grad_norm": 26.312490463256836, "learning_rate": 9.13888888888889e-06, "loss": 45.3819, "step": 1901 }, { "epoch": 45.286567164179104, "grad_norm": 28.255640029907227, "learning_rate": 9.134920634920635e-06, "loss": 43.9926, "step": 1902 }, { "epoch": 45.31044776119403, "grad_norm": 27.333642959594727, "learning_rate": 9.130952380952382e-06, "loss": 45.7229, "step": 1903 }, { "epoch": 45.33432835820896, "grad_norm": 27.024580001831055, "learning_rate": 9.126984126984127e-06, "loss": 45.24, "step": 1904 }, { "epoch": 45.35820895522388, "grad_norm": 31.131914138793945, "learning_rate": 9.123015873015874e-06, "loss": 44.4842, "step": 1905 }, { "epoch": 45.3820895522388, "grad_norm": 27.244861602783203, "learning_rate": 9.11904761904762e-06, "loss": 45.0392, "step": 1906 }, { "epoch": 45.40597014925373, "grad_norm": 30.606016159057617, "learning_rate": 9.115079365079365e-06, "loss": 44.4968, "step": 1907 }, { "epoch": 45.429850746268656, "grad_norm": 22.56324577331543, "learning_rate": 9.111111111111112e-06, "loss": 45.5149, "step": 1908 }, { "epoch": 45.45373134328358, "grad_norm": 31.586326599121094, "learning_rate": 9.107142857142858e-06, "loss": 45.9413, "step": 1909 }, { "epoch": 45.47761194029851, "grad_norm": 23.143661499023438, "learning_rate": 9.103174603174603e-06, "loss": 44.3301, "step": 1910 }, { "epoch": 45.50149253731343, "grad_norm": 33.158111572265625, "learning_rate": 9.09920634920635e-06, "loss": 45.3503, "step": 1911 }, { "epoch": 45.525373134328355, "grad_norm": 26.259010314941406, "learning_rate": 9.095238095238095e-06, "loss": 44.125, "step": 1912 }, { "epoch": 45.549253731343285, "grad_norm": 25.72600555419922, "learning_rate": 9.091269841269843e-06, "loss": 45.8252, "step": 1913 }, { "epoch": 45.57313432835821, "grad_norm": 29.651403427124023, "learning_rate": 9.087301587301588e-06, "loss": 44.7603, "step": 1914 }, { "epoch": 45.59701492537313, "grad_norm": 24.896892547607422, "learning_rate": 9.083333333333333e-06, "loss": 45.3582, "step": 1915 }, { "epoch": 45.62089552238806, "grad_norm": 26.172271728515625, "learning_rate": 9.07936507936508e-06, "loss": 45.418, "step": 1916 }, { "epoch": 45.644776119402984, "grad_norm": 31.333498001098633, "learning_rate": 9.075396825396826e-06, "loss": 45.5952, "step": 1917 }, { "epoch": 45.668656716417914, "grad_norm": 23.452194213867188, "learning_rate": 9.071428571428573e-06, "loss": 45.8141, "step": 1918 }, { "epoch": 45.69253731343284, "grad_norm": 30.300634384155273, "learning_rate": 9.067460317460318e-06, "loss": 46.1877, "step": 1919 }, { "epoch": 45.71641791044776, "grad_norm": 24.516042709350586, "learning_rate": 9.063492063492064e-06, "loss": 44.0542, "step": 1920 }, { "epoch": 45.74029850746269, "grad_norm": 26.41005516052246, "learning_rate": 9.05952380952381e-06, "loss": 44.2296, "step": 1921 }, { "epoch": 45.76417910447761, "grad_norm": 23.099822998046875, "learning_rate": 9.055555555555556e-06, "loss": 45.2567, "step": 1922 }, { "epoch": 45.788059701492536, "grad_norm": 18.7821044921875, "learning_rate": 9.051587301587302e-06, "loss": 44.5807, "step": 1923 }, { "epoch": 45.811940298507466, "grad_norm": 31.705181121826172, "learning_rate": 9.047619047619049e-06, "loss": 45.1571, "step": 1924 }, { "epoch": 45.83582089552239, "grad_norm": 25.712608337402344, "learning_rate": 9.043650793650794e-06, "loss": 44.9665, "step": 1925 }, { "epoch": 45.85970149253731, "grad_norm": 31.790864944458008, "learning_rate": 9.039682539682541e-06, "loss": 45.6095, "step": 1926 }, { "epoch": 45.88358208955224, "grad_norm": 27.735107421875, "learning_rate": 9.035714285714287e-06, "loss": 45.8988, "step": 1927 }, { "epoch": 45.907462686567165, "grad_norm": 30.94534683227539, "learning_rate": 9.031746031746032e-06, "loss": 45.8302, "step": 1928 }, { "epoch": 45.93134328358209, "grad_norm": 23.146005630493164, "learning_rate": 9.027777777777779e-06, "loss": 45.3911, "step": 1929 }, { "epoch": 45.95522388059702, "grad_norm": 24.59404945373535, "learning_rate": 9.023809523809524e-06, "loss": 45.1403, "step": 1930 }, { "epoch": 45.97910447761194, "grad_norm": 25.62955665588379, "learning_rate": 9.019841269841272e-06, "loss": 44.8934, "step": 1931 }, { "epoch": 46.0, "grad_norm": 20.037391662597656, "learning_rate": 9.015873015873017e-06, "loss": 39.4122, "step": 1932 }, { "epoch": 46.02388059701492, "grad_norm": 25.78251075744629, "learning_rate": 9.011904761904762e-06, "loss": 45.7163, "step": 1933 }, { "epoch": 46.04776119402985, "grad_norm": 28.0667781829834, "learning_rate": 9.00793650793651e-06, "loss": 44.6447, "step": 1934 }, { "epoch": 46.071641791044776, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 61.1269, "step": 1935 }, { "epoch": 46.0955223880597, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 57.8669, "step": 1936 }, { "epoch": 46.11940298507463, "grad_norm": 19.022104263305664, "learning_rate": 9.003968253968255e-06, "loss": 46.0546, "step": 1937 }, { "epoch": 46.14328358208955, "grad_norm": 28.844619750976562, "learning_rate": 9e-06, "loss": 44.5077, "step": 1938 }, { "epoch": 46.167164179104475, "grad_norm": 23.570850372314453, "learning_rate": 8.996031746031747e-06, "loss": 44.8965, "step": 1939 }, { "epoch": 46.191044776119405, "grad_norm": 27.71855354309082, "learning_rate": 8.992063492063493e-06, "loss": 45.3302, "step": 1940 }, { "epoch": 46.21492537313433, "grad_norm": 23.61193084716797, "learning_rate": 8.98809523809524e-06, "loss": 45.4048, "step": 1941 }, { "epoch": 46.23880597014925, "grad_norm": 27.16132926940918, "learning_rate": 8.984126984126985e-06, "loss": 44.535, "step": 1942 }, { "epoch": 46.26268656716418, "grad_norm": 25.254039764404297, "learning_rate": 8.98015873015873e-06, "loss": 45.2944, "step": 1943 }, { "epoch": 46.286567164179104, "grad_norm": 28.196325302124023, "learning_rate": 8.976190476190478e-06, "loss": 44.0106, "step": 1944 }, { "epoch": 46.31044776119403, "grad_norm": 24.75798988342285, "learning_rate": 8.972222222222223e-06, "loss": 44.949, "step": 1945 }, { "epoch": 46.33432835820896, "grad_norm": 30.992849349975586, "learning_rate": 8.968253968253968e-06, "loss": 44.6185, "step": 1946 }, { "epoch": 46.35820895522388, "grad_norm": 28.122825622558594, "learning_rate": 8.964285714285716e-06, "loss": 46.7498, "step": 1947 }, { "epoch": 46.3820895522388, "grad_norm": 25.130678176879883, "learning_rate": 8.960317460317461e-06, "loss": 45.7823, "step": 1948 }, { "epoch": 46.40597014925373, "grad_norm": 26.97332763671875, "learning_rate": 8.956349206349208e-06, "loss": 44.8217, "step": 1949 }, { "epoch": 46.429850746268656, "grad_norm": 21.403100967407227, "learning_rate": 8.952380952380953e-06, "loss": 45.4608, "step": 1950 }, { "epoch": 46.45373134328358, "grad_norm": 30.794330596923828, "learning_rate": 8.948412698412699e-06, "loss": 45.0327, "step": 1951 }, { "epoch": 46.47761194029851, "grad_norm": 26.035839080810547, "learning_rate": 8.944444444444446e-06, "loss": 44.6979, "step": 1952 }, { "epoch": 46.50149253731343, "grad_norm": 21.501266479492188, "learning_rate": 8.940476190476191e-06, "loss": 44.6421, "step": 1953 }, { "epoch": 46.525373134328355, "grad_norm": 27.67610740661621, "learning_rate": 8.936507936507938e-06, "loss": 44.5721, "step": 1954 }, { "epoch": 46.549253731343285, "grad_norm": 24.71251678466797, "learning_rate": 8.932539682539684e-06, "loss": 45.2891, "step": 1955 }, { "epoch": 46.57313432835821, "grad_norm": 32.72700500488281, "learning_rate": 8.92857142857143e-06, "loss": 45.0829, "step": 1956 }, { "epoch": 46.59701492537313, "grad_norm": 26.203643798828125, "learning_rate": 8.924603174603176e-06, "loss": 44.9264, "step": 1957 }, { "epoch": 46.62089552238806, "grad_norm": 25.362638473510742, "learning_rate": 8.920634920634922e-06, "loss": 45.1448, "step": 1958 }, { "epoch": 46.644776119402984, "grad_norm": 25.224456787109375, "learning_rate": 8.916666666666667e-06, "loss": 45.6017, "step": 1959 }, { "epoch": 46.668656716417914, "grad_norm": 29.02377700805664, "learning_rate": 8.912698412698414e-06, "loss": 45.5859, "step": 1960 }, { "epoch": 46.69253731343284, "grad_norm": 25.2493896484375, "learning_rate": 8.90873015873016e-06, "loss": 44.3262, "step": 1961 }, { "epoch": 46.71641791044776, "grad_norm": 24.432043075561523, "learning_rate": 8.904761904761905e-06, "loss": 44.0005, "step": 1962 }, { "epoch": 46.74029850746269, "grad_norm": 23.06245994567871, "learning_rate": 8.90079365079365e-06, "loss": 45.2406, "step": 1963 }, { "epoch": 46.76417910447761, "grad_norm": 27.603015899658203, "learning_rate": 8.896825396825398e-06, "loss": 45.2547, "step": 1964 }, { "epoch": 46.788059701492536, "grad_norm": 26.66181182861328, "learning_rate": 8.892857142857143e-06, "loss": 45.0288, "step": 1965 }, { "epoch": 46.811940298507466, "grad_norm": 19.665678024291992, "learning_rate": 8.888888888888888e-06, "loss": 45.1412, "step": 1966 }, { "epoch": 46.83582089552239, "grad_norm": 31.3046932220459, "learning_rate": 8.884920634920635e-06, "loss": 45.7144, "step": 1967 }, { "epoch": 46.85970149253731, "grad_norm": 24.661293029785156, "learning_rate": 8.88095238095238e-06, "loss": 43.9468, "step": 1968 }, { "epoch": 46.88358208955224, "grad_norm": 25.421525955200195, "learning_rate": 8.876984126984128e-06, "loss": 45.4404, "step": 1969 }, { "epoch": 46.907462686567165, "grad_norm": 30.11313247680664, "learning_rate": 8.873015873015873e-06, "loss": 44.4083, "step": 1970 }, { "epoch": 46.93134328358209, "grad_norm": 24.19677734375, "learning_rate": 8.869047619047619e-06, "loss": 45.5387, "step": 1971 }, { "epoch": 46.95522388059702, "grad_norm": 25.183414459228516, "learning_rate": 8.865079365079366e-06, "loss": 45.2725, "step": 1972 }, { "epoch": 46.97910447761194, "grad_norm": 22.570981979370117, "learning_rate": 8.861111111111111e-06, "loss": 44.1263, "step": 1973 }, { "epoch": 47.0, "grad_norm": 27.16869354248047, "learning_rate": 8.857142857142858e-06, "loss": 39.0382, "step": 1974 }, { "epoch": 47.02388059701492, "grad_norm": 27.326980590820312, "learning_rate": 8.853174603174604e-06, "loss": 45.0956, "step": 1975 }, { "epoch": 47.04776119402985, "grad_norm": 25.321685791015625, "learning_rate": 8.849206349206349e-06, "loss": 45.1531, "step": 1976 }, { "epoch": 47.071641791044776, "grad_norm": 29.480770111083984, "learning_rate": 8.845238095238096e-06, "loss": 44.7925, "step": 1977 }, { "epoch": 47.0955223880597, "grad_norm": 29.82880210876465, "learning_rate": 8.841269841269842e-06, "loss": 45.6435, "step": 1978 }, { "epoch": 47.11940298507463, "grad_norm": 31.852386474609375, "learning_rate": 8.837301587301587e-06, "loss": 45.0481, "step": 1979 }, { "epoch": 47.14328358208955, "grad_norm": 27.80265235900879, "learning_rate": 8.833333333333334e-06, "loss": 44.7472, "step": 1980 }, { "epoch": 47.167164179104475, "grad_norm": NaN, "learning_rate": 8.82936507936508e-06, "loss": 38.8619, "step": 1981 }, { "epoch": 47.191044776119405, "grad_norm": 24.525455474853516, "learning_rate": 8.82936507936508e-06, "loss": 44.8093, "step": 1982 }, { "epoch": 47.21492537313433, "grad_norm": 26.450302124023438, "learning_rate": 8.825396825396827e-06, "loss": 44.7615, "step": 1983 }, { "epoch": 47.23880597014925, "grad_norm": 22.493268966674805, "learning_rate": 8.821428571428572e-06, "loss": 44.5445, "step": 1984 }, { "epoch": 47.26268656716418, "grad_norm": 26.506013870239258, "learning_rate": 8.817460317460317e-06, "loss": 45.4412, "step": 1985 }, { "epoch": 47.286567164179104, "grad_norm": 23.09911346435547, "learning_rate": 8.813492063492064e-06, "loss": 44.8791, "step": 1986 }, { "epoch": 47.31044776119403, "grad_norm": 21.34832191467285, "learning_rate": 8.80952380952381e-06, "loss": 44.8867, "step": 1987 }, { "epoch": 47.33432835820896, "grad_norm": 25.69770050048828, "learning_rate": 8.805555555555557e-06, "loss": 45.0307, "step": 1988 }, { "epoch": 47.35820895522388, "grad_norm": 27.75917625427246, "learning_rate": 8.801587301587302e-06, "loss": 43.7733, "step": 1989 }, { "epoch": 47.3820895522388, "grad_norm": 24.314449310302734, "learning_rate": 8.797619047619048e-06, "loss": 44.8685, "step": 1990 }, { "epoch": 47.40597014925373, "grad_norm": 22.21106719970703, "learning_rate": 8.793650793650795e-06, "loss": 45.2589, "step": 1991 }, { "epoch": 47.429850746268656, "grad_norm": 28.61949920654297, "learning_rate": 8.78968253968254e-06, "loss": 45.7972, "step": 1992 }, { "epoch": 47.45373134328358, "grad_norm": 27.726839065551758, "learning_rate": 8.785714285714286e-06, "loss": 44.0989, "step": 1993 }, { "epoch": 47.47761194029851, "grad_norm": 24.9364013671875, "learning_rate": 8.781746031746033e-06, "loss": 44.9365, "step": 1994 }, { "epoch": 47.50149253731343, "grad_norm": 23.380905151367188, "learning_rate": 8.777777777777778e-06, "loss": 44.9662, "step": 1995 }, { "epoch": 47.525373134328355, "grad_norm": 22.02720832824707, "learning_rate": 8.773809523809525e-06, "loss": 45.1456, "step": 1996 }, { "epoch": 47.549253731343285, "grad_norm": NaN, "learning_rate": 8.76984126984127e-06, "loss": 60.0243, "step": 1997 }, { "epoch": 47.57313432835821, "grad_norm": 21.263904571533203, "learning_rate": 8.76984126984127e-06, "loss": 44.6697, "step": 1998 }, { "epoch": 47.59701492537313, "grad_norm": 25.381332397460938, "learning_rate": 8.765873015873016e-06, "loss": 44.9032, "step": 1999 }, { "epoch": 47.62089552238806, "grad_norm": 24.297027587890625, "learning_rate": 8.761904761904763e-06, "loss": 44.5833, "step": 2000 }, { "epoch": 47.644776119402984, "grad_norm": 26.303585052490234, "learning_rate": 8.757936507936508e-06, "loss": 45.252, "step": 2001 }, { "epoch": 47.668656716417914, "grad_norm": 23.310070037841797, "learning_rate": 8.753968253968254e-06, "loss": 45.0068, "step": 2002 }, { "epoch": 47.69253731343284, "grad_norm": 30.19032859802246, "learning_rate": 8.750000000000001e-06, "loss": 46.1286, "step": 2003 }, { "epoch": 47.71641791044776, "grad_norm": 27.43839454650879, "learning_rate": 8.746031746031746e-06, "loss": 46.5151, "step": 2004 }, { "epoch": 47.74029850746269, "grad_norm": 24.49736976623535, "learning_rate": 8.742063492063493e-06, "loss": 45.2309, "step": 2005 }, { "epoch": 47.76417910447761, "grad_norm": 32.9915885925293, "learning_rate": 8.738095238095239e-06, "loss": 44.221, "step": 2006 }, { "epoch": 47.788059701492536, "grad_norm": 27.080114364624023, "learning_rate": 8.734126984126984e-06, "loss": 44.4515, "step": 2007 }, { "epoch": 47.811940298507466, "grad_norm": 34.84925079345703, "learning_rate": 8.730158730158731e-06, "loss": 44.5223, "step": 2008 }, { "epoch": 47.83582089552239, "grad_norm": 28.061695098876953, "learning_rate": 8.726190476190477e-06, "loss": 45.6776, "step": 2009 }, { "epoch": 47.85970149253731, "grad_norm": 35.316009521484375, "learning_rate": 8.722222222222224e-06, "loss": 45.6784, "step": 2010 }, { "epoch": 47.88358208955224, "grad_norm": 29.395872116088867, "learning_rate": 8.71825396825397e-06, "loss": 46.054, "step": 2011 }, { "epoch": 47.907462686567165, "grad_norm": 31.359512329101562, "learning_rate": 8.714285714285715e-06, "loss": 44.6921, "step": 2012 }, { "epoch": 47.93134328358209, "grad_norm": 24.621870040893555, "learning_rate": 8.710317460317462e-06, "loss": 45.8119, "step": 2013 }, { "epoch": 47.95522388059702, "grad_norm": 30.466150283813477, "learning_rate": 8.706349206349207e-06, "loss": 44.5282, "step": 2014 }, { "epoch": 47.97910447761194, "grad_norm": 29.490886688232422, "learning_rate": 8.702380952380952e-06, "loss": 45.2275, "step": 2015 }, { "epoch": 48.0, "grad_norm": 18.86721420288086, "learning_rate": 8.6984126984127e-06, "loss": 38.1757, "step": 2016 }, { "epoch": 48.02388059701492, "grad_norm": 34.39149856567383, "learning_rate": 8.694444444444445e-06, "loss": 45.4931, "step": 2017 }, { "epoch": 48.04776119402985, "grad_norm": 28.87833023071289, "learning_rate": 8.690476190476192e-06, "loss": 45.3396, "step": 2018 }, { "epoch": 48.071641791044776, "grad_norm": 36.20280838012695, "learning_rate": 8.686507936507937e-06, "loss": 44.7758, "step": 2019 }, { "epoch": 48.0955223880597, "grad_norm": 30.76156234741211, "learning_rate": 8.682539682539683e-06, "loss": 44.2899, "step": 2020 }, { "epoch": 48.11940298507463, "grad_norm": 36.33967208862305, "learning_rate": 8.67857142857143e-06, "loss": 44.6879, "step": 2021 }, { "epoch": 48.14328358208955, "grad_norm": 30.22699737548828, "learning_rate": 8.674603174603175e-06, "loss": 45.8113, "step": 2022 }, { "epoch": 48.167164179104475, "grad_norm": 30.748640060424805, "learning_rate": 8.670634920634922e-06, "loss": 44.048, "step": 2023 }, { "epoch": 48.191044776119405, "grad_norm": 25.484418869018555, "learning_rate": 8.666666666666668e-06, "loss": 44.9645, "step": 2024 }, { "epoch": 48.21492537313433, "grad_norm": 33.34728240966797, "learning_rate": 8.662698412698413e-06, "loss": 44.2533, "step": 2025 }, { "epoch": 48.23880597014925, "grad_norm": 24.65802764892578, "learning_rate": 8.65873015873016e-06, "loss": 45.9453, "step": 2026 }, { "epoch": 48.26268656716418, "grad_norm": 30.4432373046875, "learning_rate": 8.654761904761906e-06, "loss": 45.8027, "step": 2027 }, { "epoch": 48.286567164179104, "grad_norm": 22.55684471130371, "learning_rate": 8.650793650793651e-06, "loss": 45.6855, "step": 2028 }, { "epoch": 48.31044776119403, "grad_norm": 22.167613983154297, "learning_rate": 8.646825396825398e-06, "loss": 44.3946, "step": 2029 }, { "epoch": 48.33432835820896, "grad_norm": 27.42496681213379, "learning_rate": 8.642857142857144e-06, "loss": 45.3506, "step": 2030 }, { "epoch": 48.35820895522388, "grad_norm": 24.647188186645508, "learning_rate": 8.63888888888889e-06, "loss": 44.3746, "step": 2031 }, { "epoch": 48.3820895522388, "grad_norm": 28.068981170654297, "learning_rate": 8.634920634920636e-06, "loss": 44.7821, "step": 2032 }, { "epoch": 48.40597014925373, "grad_norm": 22.093984603881836, "learning_rate": 8.630952380952381e-06, "loss": 43.8444, "step": 2033 }, { "epoch": 48.429850746268656, "grad_norm": 33.278778076171875, "learning_rate": 8.626984126984129e-06, "loss": 44.8849, "step": 2034 }, { "epoch": 48.45373134328358, "grad_norm": 23.357349395751953, "learning_rate": 8.623015873015874e-06, "loss": 44.8346, "step": 2035 }, { "epoch": 48.47761194029851, "grad_norm": 29.543947219848633, "learning_rate": 8.61904761904762e-06, "loss": 45.8072, "step": 2036 }, { "epoch": 48.50149253731343, "grad_norm": 24.81306266784668, "learning_rate": 8.615079365079366e-06, "loss": 43.6868, "step": 2037 }, { "epoch": 48.525373134328355, "grad_norm": 30.09635353088379, "learning_rate": 8.611111111111112e-06, "loss": 45.1631, "step": 2038 }, { "epoch": 48.549253731343285, "grad_norm": 26.751686096191406, "learning_rate": 8.607142857142859e-06, "loss": 44.5276, "step": 2039 }, { "epoch": 48.57313432835821, "grad_norm": 22.96086883544922, "learning_rate": 8.603174603174604e-06, "loss": 45.5322, "step": 2040 }, { "epoch": 48.59701492537313, "grad_norm": 30.90753173828125, "learning_rate": 8.59920634920635e-06, "loss": 44.5476, "step": 2041 }, { "epoch": 48.62089552238806, "grad_norm": 22.072256088256836, "learning_rate": 8.595238095238097e-06, "loss": 45.3412, "step": 2042 }, { "epoch": 48.644776119402984, "grad_norm": 37.27132034301758, "learning_rate": 8.591269841269842e-06, "loss": 43.9968, "step": 2043 }, { "epoch": 48.668656716417914, "grad_norm": 31.473464965820312, "learning_rate": 8.587301587301588e-06, "loss": 46.7003, "step": 2044 }, { "epoch": 48.69253731343284, "grad_norm": 41.3200798034668, "learning_rate": 8.583333333333333e-06, "loss": 44.9254, "step": 2045 }, { "epoch": 48.71641791044776, "grad_norm": 28.326889038085938, "learning_rate": 8.57936507936508e-06, "loss": 45.4611, "step": 2046 }, { "epoch": 48.74029850746269, "grad_norm": 42.016624450683594, "learning_rate": 8.575396825396826e-06, "loss": 45.9752, "step": 2047 }, { "epoch": 48.76417910447761, "grad_norm": 39.264827728271484, "learning_rate": 8.571428571428571e-06, "loss": 45.9133, "step": 2048 }, { "epoch": 48.788059701492536, "grad_norm": 36.876461029052734, "learning_rate": 8.567460317460318e-06, "loss": 44.052, "step": 2049 }, { "epoch": 48.811940298507466, "grad_norm": 33.36867141723633, "learning_rate": 8.563492063492063e-06, "loss": 44.8014, "step": 2050 }, { "epoch": 48.83582089552239, "grad_norm": 33.16298294067383, "learning_rate": 8.55952380952381e-06, "loss": 44.005, "step": 2051 }, { "epoch": 48.85970149253731, "grad_norm": 32.4409065246582, "learning_rate": 8.555555555555556e-06, "loss": 44.2993, "step": 2052 }, { "epoch": 48.88358208955224, "grad_norm": 32.56459426879883, "learning_rate": 8.551587301587301e-06, "loss": 45.2025, "step": 2053 }, { "epoch": 48.907462686567165, "grad_norm": 30.31665802001953, "learning_rate": 8.547619047619048e-06, "loss": 43.8506, "step": 2054 }, { "epoch": 48.93134328358209, "grad_norm": 29.07672119140625, "learning_rate": 8.543650793650794e-06, "loss": 44.2567, "step": 2055 }, { "epoch": 48.95522388059702, "grad_norm": 24.603849411010742, "learning_rate": 8.53968253968254e-06, "loss": 44.5072, "step": 2056 }, { "epoch": 48.97910447761194, "grad_norm": 26.305355072021484, "learning_rate": 8.535714285714286e-06, "loss": 45.2023, "step": 2057 }, { "epoch": 49.0, "grad_norm": 20.483905792236328, "learning_rate": 8.531746031746032e-06, "loss": 38.3416, "step": 2058 }, { "epoch": 49.02388059701492, "grad_norm": 18.845535278320312, "learning_rate": 8.527777777777779e-06, "loss": 44.0003, "step": 2059 }, { "epoch": 49.04776119402985, "grad_norm": 20.018390655517578, "learning_rate": 8.523809523809524e-06, "loss": 45.5951, "step": 2060 }, { "epoch": 49.071641791044776, "grad_norm": 18.276540756225586, "learning_rate": 8.51984126984127e-06, "loss": 45.4302, "step": 2061 }, { "epoch": 49.0955223880597, "grad_norm": 18.592966079711914, "learning_rate": 8.515873015873017e-06, "loss": 44.9415, "step": 2062 }, { "epoch": 49.11940298507463, "grad_norm": NaN, "learning_rate": 8.511904761904762e-06, "loss": 77.195, "step": 2063 }, { "epoch": 49.14328358208955, "grad_norm": 23.695045471191406, "learning_rate": 8.511904761904762e-06, "loss": 45.1853, "step": 2064 }, { "epoch": 49.167164179104475, "grad_norm": 16.90850830078125, "learning_rate": 8.507936507936509e-06, "loss": 44.0122, "step": 2065 }, { "epoch": 49.191044776119405, "grad_norm": 30.50786781311035, "learning_rate": 8.503968253968255e-06, "loss": 44.8398, "step": 2066 }, { "epoch": 49.21492537313433, "grad_norm": 24.35599136352539, "learning_rate": 8.5e-06, "loss": 43.4544, "step": 2067 }, { "epoch": 49.23880597014925, "grad_norm": 29.541887283325195, "learning_rate": 8.496031746031747e-06, "loss": 45.1471, "step": 2068 }, { "epoch": 49.26268656716418, "grad_norm": 20.277528762817383, "learning_rate": 8.492063492063492e-06, "loss": 45.1862, "step": 2069 }, { "epoch": 49.286567164179104, "grad_norm": 33.5463752746582, "learning_rate": 8.488095238095238e-06, "loss": 43.5467, "step": 2070 }, { "epoch": 49.31044776119403, "grad_norm": 23.218936920166016, "learning_rate": 8.484126984126985e-06, "loss": 44.6577, "step": 2071 }, { "epoch": 49.33432835820896, "grad_norm": 36.53571701049805, "learning_rate": 8.48015873015873e-06, "loss": 46.4774, "step": 2072 }, { "epoch": 49.35820895522388, "grad_norm": 32.15842819213867, "learning_rate": 8.476190476190477e-06, "loss": 45.3236, "step": 2073 }, { "epoch": 49.3820895522388, "grad_norm": 29.57740020751953, "learning_rate": 8.472222222222223e-06, "loss": 44.7034, "step": 2074 }, { "epoch": 49.40597014925373, "grad_norm": 28.12784194946289, "learning_rate": 8.468253968253968e-06, "loss": 43.741, "step": 2075 }, { "epoch": 49.429850746268656, "grad_norm": 28.08392906188965, "learning_rate": 8.464285714285715e-06, "loss": 45.326, "step": 2076 }, { "epoch": 49.45373134328358, "grad_norm": 24.909330368041992, "learning_rate": 8.46031746031746e-06, "loss": 45.979, "step": 2077 }, { "epoch": 49.47761194029851, "grad_norm": 26.343902587890625, "learning_rate": 8.456349206349208e-06, "loss": 44.1665, "step": 2078 }, { "epoch": 49.50149253731343, "grad_norm": 30.070533752441406, "learning_rate": 8.452380952380953e-06, "loss": 45.1331, "step": 2079 }, { "epoch": 49.525373134328355, "grad_norm": 26.733827590942383, "learning_rate": 8.448412698412699e-06, "loss": 43.9576, "step": 2080 }, { "epoch": 49.549253731343285, "grad_norm": 31.43610191345215, "learning_rate": 8.444444444444446e-06, "loss": 44.3933, "step": 2081 }, { "epoch": 49.57313432835821, "grad_norm": 24.856496810913086, "learning_rate": 8.440476190476191e-06, "loss": 44.561, "step": 2082 }, { "epoch": 49.59701492537313, "grad_norm": 30.097368240356445, "learning_rate": 8.436507936507936e-06, "loss": 44.617, "step": 2083 }, { "epoch": 49.62089552238806, "grad_norm": 26.63928985595703, "learning_rate": 8.432539682539684e-06, "loss": 45.1091, "step": 2084 }, { "epoch": 49.644776119402984, "grad_norm": 33.428932189941406, "learning_rate": 8.428571428571429e-06, "loss": 45.8576, "step": 2085 }, { "epoch": 49.668656716417914, "grad_norm": 26.33061408996582, "learning_rate": 8.424603174603176e-06, "loss": 46.6266, "step": 2086 }, { "epoch": 49.69253731343284, "grad_norm": 35.67467498779297, "learning_rate": 8.420634920634921e-06, "loss": 43.8886, "step": 2087 }, { "epoch": 49.71641791044776, "grad_norm": 33.62556076049805, "learning_rate": 8.416666666666667e-06, "loss": 44.819, "step": 2088 }, { "epoch": 49.74029850746269, "grad_norm": 29.146684646606445, "learning_rate": 8.412698412698414e-06, "loss": 45.1877, "step": 2089 }, { "epoch": 49.76417910447761, "grad_norm": 29.51055335998535, "learning_rate": 8.40873015873016e-06, "loss": 44.9054, "step": 2090 }, { "epoch": 49.788059701492536, "grad_norm": 31.709413528442383, "learning_rate": 8.404761904761905e-06, "loss": 44.8456, "step": 2091 }, { "epoch": 49.811940298507466, "grad_norm": 26.646390914916992, "learning_rate": 8.400793650793652e-06, "loss": 44.1815, "step": 2092 }, { "epoch": 49.83582089552239, "grad_norm": 35.582496643066406, "learning_rate": 8.396825396825397e-06, "loss": 44.9951, "step": 2093 }, { "epoch": 49.85970149253731, "grad_norm": 25.587371826171875, "learning_rate": 8.392857142857144e-06, "loss": 44.3349, "step": 2094 }, { "epoch": 49.88358208955224, "grad_norm": 29.13399887084961, "learning_rate": 8.38888888888889e-06, "loss": 45.28, "step": 2095 }, { "epoch": 49.907462686567165, "grad_norm": 21.462890625, "learning_rate": 8.384920634920635e-06, "loss": 44.4383, "step": 2096 }, { "epoch": 49.93134328358209, "grad_norm": 31.970626831054688, "learning_rate": 8.380952380952382e-06, "loss": 45.989, "step": 2097 }, { "epoch": 49.95522388059702, "grad_norm": 21.948705673217773, "learning_rate": 8.376984126984128e-06, "loss": 44.0871, "step": 2098 }, { "epoch": 49.97910447761194, "grad_norm": 35.07805252075195, "learning_rate": 8.373015873015875e-06, "loss": 44.709, "step": 2099 }, { "epoch": 50.0, "grad_norm": 21.554956436157227, "learning_rate": 8.36904761904762e-06, "loss": 38.6725, "step": 2100 }, { "epoch": 50.02388059701492, "grad_norm": 35.4162712097168, "learning_rate": 8.365079365079365e-06, "loss": 44.2866, "step": 2101 }, { "epoch": 50.04776119402985, "grad_norm": 31.357215881347656, "learning_rate": 8.361111111111113e-06, "loss": 44.9399, "step": 2102 }, { "epoch": 50.071641791044776, "grad_norm": 28.055850982666016, "learning_rate": 8.357142857142858e-06, "loss": 44.2145, "step": 2103 }, { "epoch": 50.0955223880597, "grad_norm": 27.62700080871582, "learning_rate": 8.353174603174603e-06, "loss": 44.715, "step": 2104 }, { "epoch": 50.11940298507463, "grad_norm": 32.586219787597656, "learning_rate": 8.34920634920635e-06, "loss": 45.6174, "step": 2105 }, { "epoch": 50.14328358208955, "grad_norm": 24.922584533691406, "learning_rate": 8.345238095238096e-06, "loss": 46.0653, "step": 2106 }, { "epoch": 50.167164179104475, "grad_norm": 29.282079696655273, "learning_rate": 8.341269841269843e-06, "loss": 44.8826, "step": 2107 }, { "epoch": 50.191044776119405, "grad_norm": 25.85003089904785, "learning_rate": 8.337301587301588e-06, "loss": 43.7337, "step": 2108 }, { "epoch": 50.21492537313433, "grad_norm": 26.331398010253906, "learning_rate": 8.333333333333334e-06, "loss": 44.9624, "step": 2109 }, { "epoch": 50.23880597014925, "grad_norm": 19.595951080322266, "learning_rate": 8.32936507936508e-06, "loss": 45.0561, "step": 2110 }, { "epoch": 50.26268656716418, "grad_norm": 18.431438446044922, "learning_rate": 8.325396825396826e-06, "loss": 44.6963, "step": 2111 }, { "epoch": 50.286567164179104, "grad_norm": 20.670730590820312, "learning_rate": 8.321428571428573e-06, "loss": 44.6057, "step": 2112 }, { "epoch": 50.31044776119403, "grad_norm": 20.497106552124023, "learning_rate": 8.317460317460319e-06, "loss": 45.6219, "step": 2113 }, { "epoch": 50.33432835820896, "grad_norm": 21.33808708190918, "learning_rate": 8.313492063492064e-06, "loss": 43.6802, "step": 2114 }, { "epoch": 50.35820895522388, "grad_norm": 17.015180587768555, "learning_rate": 8.309523809523811e-06, "loss": 45.6156, "step": 2115 }, { "epoch": 50.3820895522388, "grad_norm": 25.82108497619629, "learning_rate": 8.305555555555557e-06, "loss": 45.529, "step": 2116 }, { "epoch": 50.40597014925373, "grad_norm": 20.37699317932129, "learning_rate": 8.301587301587302e-06, "loss": 44.4007, "step": 2117 }, { "epoch": 50.429850746268656, "grad_norm": 24.1844482421875, "learning_rate": 8.297619047619049e-06, "loss": 45.0155, "step": 2118 }, { "epoch": 50.45373134328358, "grad_norm": 21.229581832885742, "learning_rate": 8.293650793650794e-06, "loss": 44.8109, "step": 2119 }, { "epoch": 50.47761194029851, "grad_norm": 23.752500534057617, "learning_rate": 8.289682539682542e-06, "loss": 45.1129, "step": 2120 }, { "epoch": 50.50149253731343, "grad_norm": 19.724092483520508, "learning_rate": 8.285714285714287e-06, "loss": 44.1519, "step": 2121 }, { "epoch": 50.525373134328355, "grad_norm": 21.154827117919922, "learning_rate": 8.281746031746032e-06, "loss": 43.8136, "step": 2122 }, { "epoch": 50.549253731343285, "grad_norm": 21.17751121520996, "learning_rate": 8.277777777777778e-06, "loss": 44.7593, "step": 2123 }, { "epoch": 50.57313432835821, "grad_norm": 24.729738235473633, "learning_rate": 8.273809523809523e-06, "loss": 44.7794, "step": 2124 }, { "epoch": 50.59701492537313, "grad_norm": 18.432241439819336, "learning_rate": 8.26984126984127e-06, "loss": 44.0237, "step": 2125 }, { "epoch": 50.62089552238806, "grad_norm": 26.357515335083008, "learning_rate": 8.265873015873016e-06, "loss": 45.2566, "step": 2126 }, { "epoch": 50.644776119402984, "grad_norm": 24.270259857177734, "learning_rate": 8.261904761904763e-06, "loss": 44.1182, "step": 2127 }, { "epoch": 50.668656716417914, "grad_norm": 20.756067276000977, "learning_rate": 8.257936507936508e-06, "loss": 46.2374, "step": 2128 }, { "epoch": 50.69253731343284, "grad_norm": 23.159393310546875, "learning_rate": 8.253968253968254e-06, "loss": 44.1878, "step": 2129 }, { "epoch": 50.71641791044776, "grad_norm": 22.44221305847168, "learning_rate": 8.25e-06, "loss": 45.3746, "step": 2130 }, { "epoch": 50.74029850746269, "grad_norm": 20.27827262878418, "learning_rate": 8.246031746031746e-06, "loss": 44.1278, "step": 2131 }, { "epoch": 50.76417910447761, "grad_norm": 21.407669067382812, "learning_rate": 8.242063492063493e-06, "loss": 44.8487, "step": 2132 }, { "epoch": 50.788059701492536, "grad_norm": 24.570688247680664, "learning_rate": 8.238095238095239e-06, "loss": 44.2913, "step": 2133 }, { "epoch": 50.811940298507466, "grad_norm": 23.73247528076172, "learning_rate": 8.234126984126984e-06, "loss": 45.4539, "step": 2134 }, { "epoch": 50.83582089552239, "grad_norm": 20.265886306762695, "learning_rate": 8.230158730158731e-06, "loss": 43.1901, "step": 2135 }, { "epoch": 50.85970149253731, "grad_norm": 16.51488494873047, "learning_rate": 8.226190476190476e-06, "loss": 45.0321, "step": 2136 }, { "epoch": 50.88358208955224, "grad_norm": 19.107425689697266, "learning_rate": 8.222222222222222e-06, "loss": 44.3746, "step": 2137 }, { "epoch": 50.907462686567165, "grad_norm": 19.300790786743164, "learning_rate": 8.218253968253969e-06, "loss": 45.1466, "step": 2138 }, { "epoch": 50.93134328358209, "grad_norm": 19.817272186279297, "learning_rate": 8.214285714285714e-06, "loss": 44.9703, "step": 2139 }, { "epoch": 50.95522388059702, "grad_norm": 22.794174194335938, "learning_rate": 8.210317460317461e-06, "loss": 43.917, "step": 2140 }, { "epoch": 50.97910447761194, "grad_norm": 18.948871612548828, "learning_rate": 8.206349206349207e-06, "loss": 44.4099, "step": 2141 }, { "epoch": 51.0, "grad_norm": 13.966577529907227, "learning_rate": 8.202380952380952e-06, "loss": 38.9733, "step": 2142 }, { "epoch": 51.02388059701492, "grad_norm": 29.5616397857666, "learning_rate": 8.1984126984127e-06, "loss": 44.8355, "step": 2143 }, { "epoch": 51.04776119402985, "grad_norm": 22.391014099121094, "learning_rate": 8.194444444444445e-06, "loss": 44.6835, "step": 2144 }, { "epoch": 51.071641791044776, "grad_norm": 28.830854415893555, "learning_rate": 8.190476190476192e-06, "loss": 43.3011, "step": 2145 }, { "epoch": 51.0955223880597, "grad_norm": 21.114011764526367, "learning_rate": 8.186507936507937e-06, "loss": 44.4223, "step": 2146 }, { "epoch": 51.11940298507463, "grad_norm": 28.902416229248047, "learning_rate": 8.182539682539683e-06, "loss": 44.0485, "step": 2147 }, { "epoch": 51.14328358208955, "grad_norm": 21.923168182373047, "learning_rate": 8.17857142857143e-06, "loss": 45.3272, "step": 2148 }, { "epoch": 51.167164179104475, "grad_norm": 28.772884368896484, "learning_rate": 8.174603174603175e-06, "loss": 45.6205, "step": 2149 }, { "epoch": 51.191044776119405, "grad_norm": 23.949098587036133, "learning_rate": 8.17063492063492e-06, "loss": 45.0204, "step": 2150 }, { "epoch": 51.21492537313433, "grad_norm": 26.735624313354492, "learning_rate": 8.166666666666668e-06, "loss": 45.6338, "step": 2151 }, { "epoch": 51.23880597014925, "grad_norm": 28.049888610839844, "learning_rate": 8.162698412698413e-06, "loss": 44.2502, "step": 2152 }, { "epoch": 51.26268656716418, "grad_norm": 23.256439208984375, "learning_rate": 8.15873015873016e-06, "loss": 44.1981, "step": 2153 }, { "epoch": 51.286567164179104, "grad_norm": 32.3640022277832, "learning_rate": 8.154761904761905e-06, "loss": 43.6928, "step": 2154 }, { "epoch": 51.31044776119403, "grad_norm": 23.900907516479492, "learning_rate": 8.15079365079365e-06, "loss": 45.3594, "step": 2155 }, { "epoch": 51.33432835820896, "grad_norm": 39.41314697265625, "learning_rate": 8.146825396825398e-06, "loss": 44.5862, "step": 2156 }, { "epoch": 51.35820895522388, "grad_norm": 31.826566696166992, "learning_rate": 8.142857142857143e-06, "loss": 44.6213, "step": 2157 }, { "epoch": 51.3820895522388, "grad_norm": 35.3351936340332, "learning_rate": 8.138888888888889e-06, "loss": 44.9952, "step": 2158 }, { "epoch": 51.40597014925373, "grad_norm": 33.0169677734375, "learning_rate": 8.134920634920636e-06, "loss": 44.7576, "step": 2159 }, { "epoch": 51.429850746268656, "grad_norm": 32.347251892089844, "learning_rate": 8.130952380952381e-06, "loss": 45.0997, "step": 2160 }, { "epoch": 51.45373134328358, "grad_norm": 25.79857635498047, "learning_rate": 8.126984126984128e-06, "loss": 45.8578, "step": 2161 }, { "epoch": 51.47761194029851, "grad_norm": 33.378108978271484, "learning_rate": 8.123015873015874e-06, "loss": 44.6084, "step": 2162 }, { "epoch": 51.50149253731343, "grad_norm": 27.625028610229492, "learning_rate": 8.119047619047619e-06, "loss": 45.1928, "step": 2163 }, { "epoch": 51.525373134328355, "grad_norm": 32.47718811035156, "learning_rate": 8.115079365079366e-06, "loss": 44.38, "step": 2164 }, { "epoch": 51.549253731343285, "grad_norm": 31.10133934020996, "learning_rate": 8.111111111111112e-06, "loss": 44.1878, "step": 2165 }, { "epoch": 51.57313432835821, "grad_norm": 33.062007904052734, "learning_rate": 8.107142857142859e-06, "loss": 44.6587, "step": 2166 }, { "epoch": 51.59701492537313, "grad_norm": 31.35774803161621, "learning_rate": 8.103174603174604e-06, "loss": 44.0408, "step": 2167 }, { "epoch": 51.62089552238806, "grad_norm": 35.262237548828125, "learning_rate": 8.09920634920635e-06, "loss": 45.3717, "step": 2168 }, { "epoch": 51.644776119402984, "grad_norm": 32.77524948120117, "learning_rate": 8.095238095238097e-06, "loss": 44.8105, "step": 2169 }, { "epoch": 51.668656716417914, "grad_norm": 28.838821411132812, "learning_rate": 8.091269841269842e-06, "loss": 44.3364, "step": 2170 }, { "epoch": 51.69253731343284, "grad_norm": 26.18807029724121, "learning_rate": 8.087301587301587e-06, "loss": 44.5054, "step": 2171 }, { "epoch": 51.71641791044776, "grad_norm": 31.639286041259766, "learning_rate": 8.083333333333334e-06, "loss": 45.4023, "step": 2172 }, { "epoch": 51.74029850746269, "grad_norm": 27.998628616333008, "learning_rate": 8.07936507936508e-06, "loss": 44.8306, "step": 2173 }, { "epoch": 51.76417910447761, "grad_norm": 30.69230079650879, "learning_rate": 8.075396825396827e-06, "loss": 45.1802, "step": 2174 }, { "epoch": 51.788059701492536, "grad_norm": 23.640962600708008, "learning_rate": 8.071428571428572e-06, "loss": 43.7667, "step": 2175 }, { "epoch": 51.811940298507466, "grad_norm": 29.017114639282227, "learning_rate": 8.067460317460318e-06, "loss": 43.9821, "step": 2176 }, { "epoch": 51.83582089552239, "grad_norm": 21.79175567626953, "learning_rate": 8.063492063492065e-06, "loss": 45.0959, "step": 2177 }, { "epoch": 51.85970149253731, "grad_norm": 25.505756378173828, "learning_rate": 8.05952380952381e-06, "loss": 44.1622, "step": 2178 }, { "epoch": 51.88358208955224, "grad_norm": 19.43979263305664, "learning_rate": 8.055555555555557e-06, "loss": 43.4959, "step": 2179 }, { "epoch": 51.907462686567165, "grad_norm": 32.855037689208984, "learning_rate": 8.051587301587303e-06, "loss": 44.3206, "step": 2180 }, { "epoch": 51.93134328358209, "grad_norm": 23.80797576904297, "learning_rate": 8.047619047619048e-06, "loss": 43.6716, "step": 2181 }, { "epoch": 51.95522388059702, "grad_norm": 37.09321594238281, "learning_rate": 8.043650793650795e-06, "loss": 45.3091, "step": 2182 }, { "epoch": 51.97910447761194, "grad_norm": 25.76487922668457, "learning_rate": 8.03968253968254e-06, "loss": 44.5829, "step": 2183 }, { "epoch": 52.0, "grad_norm": 24.34773063659668, "learning_rate": 8.035714285714286e-06, "loss": 39.637, "step": 2184 }, { "epoch": 52.02388059701492, "grad_norm": 24.28459358215332, "learning_rate": 8.031746031746033e-06, "loss": 42.8823, "step": 2185 }, { "epoch": 52.04776119402985, "grad_norm": 31.015172958374023, "learning_rate": 8.027777777777778e-06, "loss": 43.6859, "step": 2186 }, { "epoch": 52.071641791044776, "grad_norm": 27.413232803344727, "learning_rate": 8.023809523809526e-06, "loss": 44.0734, "step": 2187 }, { "epoch": 52.0955223880597, "grad_norm": 34.3042106628418, "learning_rate": 8.019841269841271e-06, "loss": 44.4303, "step": 2188 }, { "epoch": 52.11940298507463, "grad_norm": 25.737226486206055, "learning_rate": 8.015873015873016e-06, "loss": 45.6858, "step": 2189 }, { "epoch": 52.14328358208955, "grad_norm": 33.09044647216797, "learning_rate": 8.011904761904763e-06, "loss": 44.0591, "step": 2190 }, { "epoch": 52.167164179104475, "grad_norm": 26.903594970703125, "learning_rate": 8.007936507936509e-06, "loss": 44.4434, "step": 2191 }, { "epoch": 52.191044776119405, "grad_norm": 32.05507278442383, "learning_rate": 8.003968253968254e-06, "loss": 44.1334, "step": 2192 }, { "epoch": 52.21492537313433, "grad_norm": 23.954050064086914, "learning_rate": 8.000000000000001e-06, "loss": 45.4077, "step": 2193 }, { "epoch": 52.23880597014925, "grad_norm": 25.273069381713867, "learning_rate": 7.996031746031747e-06, "loss": 44.4704, "step": 2194 }, { "epoch": 52.26268656716418, "grad_norm": 24.762975692749023, "learning_rate": 7.992063492063494e-06, "loss": 44.9846, "step": 2195 }, { "epoch": 52.286567164179104, "grad_norm": 31.624853134155273, "learning_rate": 7.98809523809524e-06, "loss": 44.6678, "step": 2196 }, { "epoch": 52.31044776119403, "grad_norm": 20.407798767089844, "learning_rate": 7.984126984126985e-06, "loss": 44.5191, "step": 2197 }, { "epoch": 52.33432835820896, "grad_norm": 35.610721588134766, "learning_rate": 7.980158730158732e-06, "loss": 43.797, "step": 2198 }, { "epoch": 52.35820895522388, "grad_norm": 23.916271209716797, "learning_rate": 7.976190476190477e-06, "loss": 44.5035, "step": 2199 }, { "epoch": 52.3820895522388, "grad_norm": 30.07246971130371, "learning_rate": 7.972222222222224e-06, "loss": 44.8658, "step": 2200 }, { "epoch": 52.40597014925373, "grad_norm": 26.69670295715332, "learning_rate": 7.968253968253968e-06, "loss": 43.1086, "step": 2201 }, { "epoch": 52.429850746268656, "grad_norm": 35.99201583862305, "learning_rate": 7.964285714285715e-06, "loss": 43.8965, "step": 2202 }, { "epoch": 52.45373134328358, "grad_norm": 26.909433364868164, "learning_rate": 7.96031746031746e-06, "loss": 44.3023, "step": 2203 }, { "epoch": 52.47761194029851, "grad_norm": 31.2402286529541, "learning_rate": 7.956349206349206e-06, "loss": 43.8009, "step": 2204 }, { "epoch": 52.50149253731343, "grad_norm": 28.230714797973633, "learning_rate": 7.952380952380953e-06, "loss": 45.6781, "step": 2205 }, { "epoch": 52.525373134328355, "grad_norm": 32.47516632080078, "learning_rate": 7.948412698412698e-06, "loss": 46.0123, "step": 2206 }, { "epoch": 52.549253731343285, "grad_norm": 29.042253494262695, "learning_rate": 7.944444444444445e-06, "loss": 46.4036, "step": 2207 }, { "epoch": 52.57313432835821, "grad_norm": 24.23044776916504, "learning_rate": 7.94047619047619e-06, "loss": 44.0722, "step": 2208 }, { "epoch": 52.59701492537313, "grad_norm": 25.844972610473633, "learning_rate": 7.936507936507936e-06, "loss": 44.1403, "step": 2209 }, { "epoch": 52.62089552238806, "grad_norm": 25.40447235107422, "learning_rate": 7.932539682539683e-06, "loss": 43.699, "step": 2210 }, { "epoch": 52.644776119402984, "grad_norm": 24.027687072753906, "learning_rate": 7.928571428571429e-06, "loss": 45.1803, "step": 2211 }, { "epoch": 52.668656716417914, "grad_norm": 22.707393646240234, "learning_rate": 7.924603174603174e-06, "loss": 43.7808, "step": 2212 }, { "epoch": 52.69253731343284, "grad_norm": 17.410104751586914, "learning_rate": 7.920634920634921e-06, "loss": 44.7556, "step": 2213 }, { "epoch": 52.71641791044776, "grad_norm": 19.376863479614258, "learning_rate": 7.916666666666667e-06, "loss": 45.3176, "step": 2214 }, { "epoch": 52.74029850746269, "grad_norm": 21.29641342163086, "learning_rate": 7.912698412698414e-06, "loss": 44.8597, "step": 2215 }, { "epoch": 52.76417910447761, "grad_norm": 21.937013626098633, "learning_rate": 7.908730158730159e-06, "loss": 44.3548, "step": 2216 }, { "epoch": 52.788059701492536, "grad_norm": 27.38592529296875, "learning_rate": 7.904761904761904e-06, "loss": 45.204, "step": 2217 }, { "epoch": 52.811940298507466, "grad_norm": 21.232566833496094, "learning_rate": 7.900793650793652e-06, "loss": 43.9788, "step": 2218 }, { "epoch": 52.83582089552239, "grad_norm": 22.52651023864746, "learning_rate": 7.896825396825397e-06, "loss": 44.161, "step": 2219 }, { "epoch": 52.85970149253731, "grad_norm": 23.06977081298828, "learning_rate": 7.892857142857144e-06, "loss": 44.5394, "step": 2220 }, { "epoch": 52.88358208955224, "grad_norm": 19.71670150756836, "learning_rate": 7.88888888888889e-06, "loss": 44.4384, "step": 2221 }, { "epoch": 52.907462686567165, "grad_norm": 19.651142120361328, "learning_rate": 7.884920634920635e-06, "loss": 45.3143, "step": 2222 }, { "epoch": 52.93134328358209, "grad_norm": 23.386962890625, "learning_rate": 7.880952380952382e-06, "loss": 44.4246, "step": 2223 }, { "epoch": 52.95522388059702, "grad_norm": 19.763513565063477, "learning_rate": 7.876984126984127e-06, "loss": 45.6001, "step": 2224 }, { "epoch": 52.97910447761194, "grad_norm": 21.81069564819336, "learning_rate": 7.873015873015873e-06, "loss": 45.6176, "step": 2225 }, { "epoch": 53.0, "grad_norm": 18.15079689025879, "learning_rate": 7.86904761904762e-06, "loss": 39.4819, "step": 2226 }, { "epoch": 53.02388059701492, "grad_norm": 17.333694458007812, "learning_rate": 7.865079365079365e-06, "loss": 45.612, "step": 2227 }, { "epoch": 53.04776119402985, "grad_norm": 18.72818946838379, "learning_rate": 7.861111111111112e-06, "loss": 43.5423, "step": 2228 }, { "epoch": 53.071641791044776, "grad_norm": 18.34732437133789, "learning_rate": 7.857142857142858e-06, "loss": 43.972, "step": 2229 }, { "epoch": 53.0955223880597, "grad_norm": 21.568077087402344, "learning_rate": 7.853174603174603e-06, "loss": 44.8122, "step": 2230 }, { "epoch": 53.11940298507463, "grad_norm": 20.801836013793945, "learning_rate": 7.84920634920635e-06, "loss": 43.8729, "step": 2231 }, { "epoch": 53.14328358208955, "grad_norm": 23.20212745666504, "learning_rate": 7.845238095238096e-06, "loss": 45.3738, "step": 2232 }, { "epoch": 53.167164179104475, "grad_norm": 24.016311645507812, "learning_rate": 7.841269841269843e-06, "loss": 45.0862, "step": 2233 }, { "epoch": 53.191044776119405, "grad_norm": 18.800554275512695, "learning_rate": 7.837301587301588e-06, "loss": 43.2166, "step": 2234 }, { "epoch": 53.21492537313433, "grad_norm": 20.73765754699707, "learning_rate": 7.833333333333333e-06, "loss": 43.3917, "step": 2235 }, { "epoch": 53.23880597014925, "grad_norm": 23.1943302154541, "learning_rate": 7.82936507936508e-06, "loss": 44.0957, "step": 2236 }, { "epoch": 53.26268656716418, "grad_norm": 18.450380325317383, "learning_rate": 7.825396825396826e-06, "loss": 44.1782, "step": 2237 }, { "epoch": 53.286567164179104, "grad_norm": 24.16314697265625, "learning_rate": 7.821428571428571e-06, "loss": 45.0735, "step": 2238 }, { "epoch": 53.31044776119403, "grad_norm": 25.004743576049805, "learning_rate": 7.817460317460318e-06, "loss": 44.4628, "step": 2239 }, { "epoch": 53.33432835820896, "grad_norm": 17.29636573791504, "learning_rate": 7.813492063492064e-06, "loss": 45.2476, "step": 2240 }, { "epoch": 53.35820895522388, "grad_norm": 24.759471893310547, "learning_rate": 7.809523809523811e-06, "loss": 44.71, "step": 2241 }, { "epoch": 53.3820895522388, "grad_norm": 21.52720832824707, "learning_rate": 7.805555555555556e-06, "loss": 44.9929, "step": 2242 }, { "epoch": 53.40597014925373, "grad_norm": 22.760278701782227, "learning_rate": 7.801587301587302e-06, "loss": 43.6639, "step": 2243 }, { "epoch": 53.429850746268656, "grad_norm": 19.5325927734375, "learning_rate": 7.797619047619049e-06, "loss": 44.2974, "step": 2244 }, { "epoch": 53.45373134328358, "grad_norm": 25.756797790527344, "learning_rate": 7.793650793650794e-06, "loss": 45.401, "step": 2245 }, { "epoch": 53.47761194029851, "grad_norm": 19.75324058532715, "learning_rate": 7.78968253968254e-06, "loss": 44.6426, "step": 2246 }, { "epoch": 53.50149253731343, "grad_norm": 25.47930145263672, "learning_rate": 7.785714285714287e-06, "loss": 42.2875, "step": 2247 }, { "epoch": 53.525373134328355, "grad_norm": 21.61121368408203, "learning_rate": 7.781746031746032e-06, "loss": 45.7982, "step": 2248 }, { "epoch": 53.549253731343285, "grad_norm": 24.11342430114746, "learning_rate": 7.77777777777778e-06, "loss": 43.6397, "step": 2249 }, { "epoch": 53.57313432835821, "grad_norm": 25.151281356811523, "learning_rate": 7.773809523809525e-06, "loss": 44.0536, "step": 2250 }, { "epoch": 53.59701492537313, "grad_norm": 21.925559997558594, "learning_rate": 7.76984126984127e-06, "loss": 45.2035, "step": 2251 }, { "epoch": 53.62089552238806, "grad_norm": 22.38170623779297, "learning_rate": 7.765873015873017e-06, "loss": 44.3272, "step": 2252 }, { "epoch": 53.644776119402984, "grad_norm": 24.35360336303711, "learning_rate": 7.761904761904762e-06, "loss": 45.687, "step": 2253 }, { "epoch": 53.668656716417914, "grad_norm": 20.127119064331055, "learning_rate": 7.75793650793651e-06, "loss": 44.001, "step": 2254 }, { "epoch": 53.69253731343284, "grad_norm": 20.66204833984375, "learning_rate": 7.753968253968255e-06, "loss": 45.1368, "step": 2255 }, { "epoch": 53.71641791044776, "grad_norm": 22.565038681030273, "learning_rate": 7.75e-06, "loss": 43.7021, "step": 2256 }, { "epoch": 53.74029850746269, "grad_norm": 20.893674850463867, "learning_rate": 7.746031746031747e-06, "loss": 44.381, "step": 2257 }, { "epoch": 53.76417910447761, "grad_norm": 21.53620147705078, "learning_rate": 7.742063492063493e-06, "loss": 45.2511, "step": 2258 }, { "epoch": 53.788059701492536, "grad_norm": 20.66484832763672, "learning_rate": 7.738095238095238e-06, "loss": 45.167, "step": 2259 }, { "epoch": 53.811940298507466, "grad_norm": 24.964414596557617, "learning_rate": 7.734126984126985e-06, "loss": 44.6754, "step": 2260 }, { "epoch": 53.83582089552239, "grad_norm": 22.17997169494629, "learning_rate": 7.73015873015873e-06, "loss": 44.1696, "step": 2261 }, { "epoch": 53.85970149253731, "grad_norm": 19.715208053588867, "learning_rate": 7.726190476190478e-06, "loss": 43.8961, "step": 2262 }, { "epoch": 53.88358208955224, "grad_norm": 27.562166213989258, "learning_rate": 7.722222222222223e-06, "loss": 43.9035, "step": 2263 }, { "epoch": 53.907462686567165, "grad_norm": 21.021081924438477, "learning_rate": 7.718253968253969e-06, "loss": 45.5108, "step": 2264 }, { "epoch": 53.93134328358209, "grad_norm": 25.675813674926758, "learning_rate": 7.714285714285716e-06, "loss": 44.5437, "step": 2265 }, { "epoch": 53.95522388059702, "grad_norm": 25.80996322631836, "learning_rate": 7.710317460317461e-06, "loss": 44.7417, "step": 2266 }, { "epoch": 53.97910447761194, "grad_norm": 23.055217742919922, "learning_rate": 7.706349206349208e-06, "loss": 44.3122, "step": 2267 }, { "epoch": 54.0, "grad_norm": 14.958904266357422, "learning_rate": 7.702380952380954e-06, "loss": 40.1174, "step": 2268 }, { "epoch": 54.02388059701492, "grad_norm": 23.825021743774414, "learning_rate": 7.698412698412699e-06, "loss": 43.6857, "step": 2269 }, { "epoch": 54.04776119402985, "grad_norm": 20.29824447631836, "learning_rate": 7.694444444444446e-06, "loss": 43.9979, "step": 2270 }, { "epoch": 54.071641791044776, "grad_norm": 19.905017852783203, "learning_rate": 7.690476190476191e-06, "loss": 44.1637, "step": 2271 }, { "epoch": 54.0955223880597, "grad_norm": 16.342378616333008, "learning_rate": 7.686507936507937e-06, "loss": 43.9998, "step": 2272 }, { "epoch": 54.11940298507463, "grad_norm": 22.551780700683594, "learning_rate": 7.682539682539684e-06, "loss": 44.6528, "step": 2273 }, { "epoch": 54.14328358208955, "grad_norm": 16.87897491455078, "learning_rate": 7.67857142857143e-06, "loss": 44.4259, "step": 2274 }, { "epoch": 54.167164179104475, "grad_norm": 27.494592666625977, "learning_rate": 7.674603174603176e-06, "loss": 45.7648, "step": 2275 }, { "epoch": 54.191044776119405, "grad_norm": 22.326452255249023, "learning_rate": 7.670634920634922e-06, "loss": 44.1077, "step": 2276 }, { "epoch": 54.21492537313433, "grad_norm": 30.2500057220459, "learning_rate": 7.666666666666667e-06, "loss": 44.5322, "step": 2277 }, { "epoch": 54.23880597014925, "grad_norm": 28.212095260620117, "learning_rate": 7.662698412698414e-06, "loss": 43.6225, "step": 2278 }, { "epoch": 54.26268656716418, "grad_norm": 23.524145126342773, "learning_rate": 7.65873015873016e-06, "loss": 44.9014, "step": 2279 }, { "epoch": 54.286567164179104, "grad_norm": 29.799076080322266, "learning_rate": 7.654761904761905e-06, "loss": 44.6654, "step": 2280 }, { "epoch": 54.31044776119403, "grad_norm": 20.350683212280273, "learning_rate": 7.65079365079365e-06, "loss": 44.254, "step": 2281 }, { "epoch": 54.33432835820896, "grad_norm": 25.748899459838867, "learning_rate": 7.646825396825398e-06, "loss": 44.4278, "step": 2282 }, { "epoch": 54.35820895522388, "grad_norm": 25.086284637451172, "learning_rate": 7.642857142857143e-06, "loss": 43.8974, "step": 2283 }, { "epoch": 54.3820895522388, "grad_norm": 24.49972915649414, "learning_rate": 7.638888888888888e-06, "loss": 44.4423, "step": 2284 }, { "epoch": 54.40597014925373, "grad_norm": 18.78260612487793, "learning_rate": 7.634920634920635e-06, "loss": 44.5666, "step": 2285 }, { "epoch": 54.429850746268656, "grad_norm": 20.125263214111328, "learning_rate": 7.630952380952381e-06, "loss": 44.4853, "step": 2286 }, { "epoch": 54.45373134328358, "grad_norm": 20.763385772705078, "learning_rate": 7.626984126984127e-06, "loss": 43.6951, "step": 2287 }, { "epoch": 54.47761194029851, "grad_norm": 22.267620086669922, "learning_rate": 7.623015873015873e-06, "loss": 45.0328, "step": 2288 }, { "epoch": 54.50149253731343, "grad_norm": 24.786283493041992, "learning_rate": 7.61904761904762e-06, "loss": 45.0047, "step": 2289 }, { "epoch": 54.525373134328355, "grad_norm": 18.581987380981445, "learning_rate": 7.615079365079365e-06, "loss": 45.0172, "step": 2290 }, { "epoch": 54.549253731343285, "grad_norm": 27.262859344482422, "learning_rate": 7.611111111111111e-06, "loss": 44.1982, "step": 2291 }, { "epoch": 54.57313432835821, "grad_norm": 25.255537033081055, "learning_rate": 7.6071428571428575e-06, "loss": 43.8743, "step": 2292 }, { "epoch": 54.59701492537313, "grad_norm": 17.982698440551758, "learning_rate": 7.603174603174604e-06, "loss": 43.4799, "step": 2293 }, { "epoch": 54.62089552238806, "grad_norm": 25.1834716796875, "learning_rate": 7.599206349206349e-06, "loss": 44.8257, "step": 2294 }, { "epoch": 54.644776119402984, "grad_norm": 19.047700881958008, "learning_rate": 7.595238095238095e-06, "loss": 45.2678, "step": 2295 }, { "epoch": 54.668656716417914, "grad_norm": 26.404882431030273, "learning_rate": 7.591269841269842e-06, "loss": 43.3948, "step": 2296 }, { "epoch": 54.69253731343284, "grad_norm": 19.84337615966797, "learning_rate": 7.587301587301588e-06, "loss": 45.7629, "step": 2297 }, { "epoch": 54.71641791044776, "grad_norm": 19.9034481048584, "learning_rate": 7.583333333333333e-06, "loss": 44.4071, "step": 2298 }, { "epoch": 54.74029850746269, "grad_norm": NaN, "learning_rate": 7.5793650793650795e-06, "loss": 62.5737, "step": 2299 }, { "epoch": 54.76417910447761, "grad_norm": 21.210494995117188, "learning_rate": 7.5793650793650795e-06, "loss": 44.7093, "step": 2300 }, { "epoch": 54.788059701492536, "grad_norm": 21.403833389282227, "learning_rate": 7.575396825396826e-06, "loss": 44.6465, "step": 2301 }, { "epoch": 54.811940298507466, "grad_norm": 21.515085220336914, "learning_rate": 7.571428571428572e-06, "loss": 44.3846, "step": 2302 }, { "epoch": 54.83582089552239, "grad_norm": 25.024738311767578, "learning_rate": 7.567460317460317e-06, "loss": 44.7177, "step": 2303 }, { "epoch": 54.85970149253731, "grad_norm": 18.742982864379883, "learning_rate": 7.563492063492064e-06, "loss": 44.0076, "step": 2304 }, { "epoch": 54.88358208955224, "grad_norm": 20.333248138427734, "learning_rate": 7.55952380952381e-06, "loss": 43.6154, "step": 2305 }, { "epoch": 54.907462686567165, "grad_norm": 21.4791316986084, "learning_rate": 7.555555555555556e-06, "loss": 44.2151, "step": 2306 }, { "epoch": 54.93134328358209, "grad_norm": 26.272165298461914, "learning_rate": 7.551587301587302e-06, "loss": 45.5681, "step": 2307 }, { "epoch": 54.95522388059702, "grad_norm": 24.541637420654297, "learning_rate": 7.547619047619048e-06, "loss": 42.8248, "step": 2308 }, { "epoch": 54.97910447761194, "grad_norm": 23.563505172729492, "learning_rate": 7.543650793650794e-06, "loss": 45.1566, "step": 2309 }, { "epoch": 55.0, "grad_norm": 18.819583892822266, "learning_rate": 7.53968253968254e-06, "loss": 39.9674, "step": 2310 }, { "epoch": 55.02388059701492, "grad_norm": 21.50135040283203, "learning_rate": 7.5357142857142865e-06, "loss": 44.7008, "step": 2311 }, { "epoch": 55.04776119402985, "grad_norm": 20.28955078125, "learning_rate": 7.531746031746032e-06, "loss": 45.7705, "step": 2312 }, { "epoch": 55.071641791044776, "grad_norm": 19.94631004333496, "learning_rate": 7.527777777777778e-06, "loss": 44.3223, "step": 2313 }, { "epoch": 55.0955223880597, "grad_norm": 22.5699520111084, "learning_rate": 7.523809523809524e-06, "loss": 45.2447, "step": 2314 }, { "epoch": 55.11940298507463, "grad_norm": 17.17287254333496, "learning_rate": 7.519841269841271e-06, "loss": 44.4756, "step": 2315 }, { "epoch": 55.14328358208955, "grad_norm": 17.727617263793945, "learning_rate": 7.515873015873016e-06, "loss": 44.3907, "step": 2316 }, { "epoch": 55.167164179104475, "grad_norm": 18.320112228393555, "learning_rate": 7.511904761904762e-06, "loss": 44.3684, "step": 2317 }, { "epoch": 55.191044776119405, "grad_norm": 14.576784133911133, "learning_rate": 7.5079365079365085e-06, "loss": 43.8936, "step": 2318 }, { "epoch": 55.21492537313433, "grad_norm": 18.880218505859375, "learning_rate": 7.503968253968255e-06, "loss": 44.5556, "step": 2319 }, { "epoch": 55.23880597014925, "grad_norm": 19.764198303222656, "learning_rate": 7.500000000000001e-06, "loss": 43.8549, "step": 2320 }, { "epoch": 55.26268656716418, "grad_norm": NaN, "learning_rate": 7.4960317460317464e-06, "loss": 44.1908, "step": 2321 }, { "epoch": 55.286567164179104, "grad_norm": 19.977001190185547, "learning_rate": 7.4960317460317464e-06, "loss": 44.3724, "step": 2322 }, { "epoch": 55.31044776119403, "grad_norm": 21.85407066345215, "learning_rate": 7.492063492063493e-06, "loss": 44.2829, "step": 2323 }, { "epoch": 55.33432835820896, "grad_norm": 22.168100357055664, "learning_rate": 7.488095238095239e-06, "loss": 44.4898, "step": 2324 }, { "epoch": 55.35820895522388, "grad_norm": 18.234298706054688, "learning_rate": 7.484126984126985e-06, "loss": 44.3711, "step": 2325 }, { "epoch": 55.3820895522388, "grad_norm": 30.803691864013672, "learning_rate": 7.4801587301587306e-06, "loss": 43.825, "step": 2326 }, { "epoch": 55.40597014925373, "grad_norm": 22.18082046508789, "learning_rate": 7.476190476190477e-06, "loss": 44.8057, "step": 2327 }, { "epoch": 55.429850746268656, "grad_norm": 32.34336853027344, "learning_rate": 7.472222222222223e-06, "loss": 45.0128, "step": 2328 }, { "epoch": 55.45373134328358, "grad_norm": 28.040363311767578, "learning_rate": 7.468253968253969e-06, "loss": 45.2428, "step": 2329 }, { "epoch": 55.47761194029851, "grad_norm": 31.505037307739258, "learning_rate": 7.464285714285715e-06, "loss": 43.6846, "step": 2330 }, { "epoch": 55.50149253731343, "grad_norm": 21.62251853942871, "learning_rate": 7.460317460317461e-06, "loss": 44.467, "step": 2331 }, { "epoch": 55.525373134328355, "grad_norm": 29.035741806030273, "learning_rate": 7.456349206349207e-06, "loss": 43.7965, "step": 2332 }, { "epoch": 55.549253731343285, "grad_norm": 17.466440200805664, "learning_rate": 7.4523809523809534e-06, "loss": 44.8456, "step": 2333 }, { "epoch": 55.57313432835821, "grad_norm": 37.01939392089844, "learning_rate": 7.448412698412699e-06, "loss": 44.4359, "step": 2334 }, { "epoch": 55.59701492537313, "grad_norm": 24.612354278564453, "learning_rate": 7.444444444444445e-06, "loss": 43.7534, "step": 2335 }, { "epoch": 55.62089552238806, "grad_norm": 39.57061004638672, "learning_rate": 7.440476190476191e-06, "loss": 45.5826, "step": 2336 }, { "epoch": 55.644776119402984, "grad_norm": 33.687538146972656, "learning_rate": 7.4365079365079376e-06, "loss": 44.3013, "step": 2337 }, { "epoch": 55.668656716417914, "grad_norm": 38.39813995361328, "learning_rate": 7.432539682539684e-06, "loss": 44.7874, "step": 2338 }, { "epoch": 55.69253731343284, "grad_norm": 38.15765380859375, "learning_rate": 7.428571428571429e-06, "loss": 44.6177, "step": 2339 }, { "epoch": 55.71641791044776, "grad_norm": 27.5556640625, "learning_rate": 7.4246031746031754e-06, "loss": 43.1288, "step": 2340 }, { "epoch": 55.74029850746269, "grad_norm": 30.958349227905273, "learning_rate": 7.420634920634922e-06, "loss": 45.4904, "step": 2341 }, { "epoch": 55.76417910447761, "grad_norm": 21.71723747253418, "learning_rate": 7.416666666666668e-06, "loss": 43.0391, "step": 2342 }, { "epoch": 55.788059701492536, "grad_norm": 24.229736328125, "learning_rate": 7.412698412698413e-06, "loss": 44.0514, "step": 2343 }, { "epoch": 55.811940298507466, "grad_norm": 32.11771011352539, "learning_rate": 7.40873015873016e-06, "loss": 44.5521, "step": 2344 }, { "epoch": 55.83582089552239, "grad_norm": 21.694486618041992, "learning_rate": 7.404761904761906e-06, "loss": 43.9067, "step": 2345 }, { "epoch": 55.85970149253731, "grad_norm": 31.469402313232422, "learning_rate": 7.400793650793652e-06, "loss": 43.3506, "step": 2346 }, { "epoch": 55.88358208955224, "grad_norm": 28.431011199951172, "learning_rate": 7.3968253968253975e-06, "loss": 43.7056, "step": 2347 }, { "epoch": 55.907462686567165, "grad_norm": 34.616065979003906, "learning_rate": 7.392857142857144e-06, "loss": 44.0428, "step": 2348 }, { "epoch": 55.93134328358209, "grad_norm": 27.52882194519043, "learning_rate": 7.38888888888889e-06, "loss": 43.8619, "step": 2349 }, { "epoch": 55.95522388059702, "grad_norm": 36.93627166748047, "learning_rate": 7.384920634920636e-06, "loss": 44.1589, "step": 2350 }, { "epoch": 55.97910447761194, "grad_norm": 37.746578216552734, "learning_rate": 7.380952380952382e-06, "loss": 46.0174, "step": 2351 }, { "epoch": 56.0, "grad_norm": 20.20071029663086, "learning_rate": 7.376984126984128e-06, "loss": 37.7123, "step": 2352 }, { "epoch": 56.02388059701492, "grad_norm": 25.446529388427734, "learning_rate": 7.373015873015874e-06, "loss": 43.4657, "step": 2353 }, { "epoch": 56.04776119402985, "grad_norm": 25.78912353515625, "learning_rate": 7.36904761904762e-06, "loss": 44.7379, "step": 2354 }, { "epoch": 56.071641791044776, "grad_norm": 24.028154373168945, "learning_rate": 7.3650793650793666e-06, "loss": 43.4876, "step": 2355 }, { "epoch": 56.0955223880597, "grad_norm": 30.157793045043945, "learning_rate": 7.361111111111112e-06, "loss": 44.2346, "step": 2356 }, { "epoch": 56.11940298507463, "grad_norm": 29.836891174316406, "learning_rate": 7.357142857142858e-06, "loss": 44.0793, "step": 2357 }, { "epoch": 56.14328358208955, "grad_norm": 30.458818435668945, "learning_rate": 7.3531746031746045e-06, "loss": 44.7587, "step": 2358 }, { "epoch": 56.167164179104475, "grad_norm": 28.30854034423828, "learning_rate": 7.349206349206351e-06, "loss": 44.9153, "step": 2359 }, { "epoch": 56.191044776119405, "grad_norm": 29.497085571289062, "learning_rate": 7.345238095238096e-06, "loss": 44.194, "step": 2360 }, { "epoch": 56.21492537313433, "grad_norm": 21.790063858032227, "learning_rate": 7.3412698412698415e-06, "loss": 44.1507, "step": 2361 }, { "epoch": 56.23880597014925, "grad_norm": 27.489242553710938, "learning_rate": 7.337301587301588e-06, "loss": 44.3505, "step": 2362 }, { "epoch": 56.26268656716418, "grad_norm": 22.906452178955078, "learning_rate": 7.333333333333333e-06, "loss": 44.9329, "step": 2363 }, { "epoch": 56.286567164179104, "grad_norm": 33.46257400512695, "learning_rate": 7.329365079365079e-06, "loss": 43.1323, "step": 2364 }, { "epoch": 56.31044776119403, "grad_norm": 22.410470962524414, "learning_rate": 7.325396825396826e-06, "loss": 43.6305, "step": 2365 }, { "epoch": 56.33432835820896, "grad_norm": 32.74277114868164, "learning_rate": 7.321428571428572e-06, "loss": 43.6165, "step": 2366 }, { "epoch": 56.35820895522388, "grad_norm": 31.368419647216797, "learning_rate": 7.317460317460317e-06, "loss": 44.8837, "step": 2367 }, { "epoch": 56.3820895522388, "grad_norm": 30.941593170166016, "learning_rate": 7.3134920634920635e-06, "loss": 44.2282, "step": 2368 }, { "epoch": 56.40597014925373, "grad_norm": 34.12104415893555, "learning_rate": 7.30952380952381e-06, "loss": 45.2191, "step": 2369 }, { "epoch": 56.429850746268656, "grad_norm": 28.09603500366211, "learning_rate": 7.305555555555556e-06, "loss": 44.9848, "step": 2370 }, { "epoch": 56.45373134328358, "grad_norm": 29.32378578186035, "learning_rate": 7.301587301587301e-06, "loss": 44.2658, "step": 2371 }, { "epoch": 56.47761194029851, "grad_norm": 26.269071578979492, "learning_rate": 7.297619047619048e-06, "loss": 46.2933, "step": 2372 }, { "epoch": 56.50149253731343, "grad_norm": 25.23761558532715, "learning_rate": 7.293650793650794e-06, "loss": 44.4726, "step": 2373 }, { "epoch": 56.525373134328355, "grad_norm": 27.6646728515625, "learning_rate": 7.28968253968254e-06, "loss": 43.9823, "step": 2374 }, { "epoch": 56.549253731343285, "grad_norm": 23.01959228515625, "learning_rate": 7.285714285714286e-06, "loss": 44.5168, "step": 2375 }, { "epoch": 56.57313432835821, "grad_norm": 31.120128631591797, "learning_rate": 7.281746031746032e-06, "loss": 44.6706, "step": 2376 }, { "epoch": 56.59701492537313, "grad_norm": 24.57699203491211, "learning_rate": 7.277777777777778e-06, "loss": 45.1781, "step": 2377 }, { "epoch": 56.62089552238806, "grad_norm": 28.831064224243164, "learning_rate": 7.273809523809524e-06, "loss": 43.7437, "step": 2378 }, { "epoch": 56.644776119402984, "grad_norm": 26.15156364440918, "learning_rate": 7.2698412698412705e-06, "loss": 45.1624, "step": 2379 }, { "epoch": 56.668656716417914, "grad_norm": 28.825542449951172, "learning_rate": 7.265873015873016e-06, "loss": 44.66, "step": 2380 }, { "epoch": 56.69253731343284, "grad_norm": 26.763559341430664, "learning_rate": 7.261904761904762e-06, "loss": 43.6022, "step": 2381 }, { "epoch": 56.71641791044776, "grad_norm": 26.9444522857666, "learning_rate": 7.257936507936508e-06, "loss": 44.025, "step": 2382 }, { "epoch": 56.74029850746269, "grad_norm": 24.93570327758789, "learning_rate": 7.253968253968255e-06, "loss": 45.3596, "step": 2383 }, { "epoch": 56.76417910447761, "grad_norm": 29.15943717956543, "learning_rate": 7.25e-06, "loss": 44.2434, "step": 2384 }, { "epoch": 56.788059701492536, "grad_norm": 25.154356002807617, "learning_rate": 7.246031746031746e-06, "loss": 44.1319, "step": 2385 }, { "epoch": 56.811940298507466, "grad_norm": 24.86849021911621, "learning_rate": 7.2420634920634925e-06, "loss": 44.5886, "step": 2386 }, { "epoch": 56.83582089552239, "grad_norm": 22.996164321899414, "learning_rate": 7.238095238095239e-06, "loss": 43.9457, "step": 2387 }, { "epoch": 56.85970149253731, "grad_norm": 21.19574737548828, "learning_rate": 7.234126984126984e-06, "loss": 42.4004, "step": 2388 }, { "epoch": 56.88358208955224, "grad_norm": 22.6845645904541, "learning_rate": 7.23015873015873e-06, "loss": 43.7031, "step": 2389 }, { "epoch": 56.907462686567165, "grad_norm": 24.80902099609375, "learning_rate": 7.226190476190477e-06, "loss": 45.4586, "step": 2390 }, { "epoch": 56.93134328358209, "grad_norm": 23.157859802246094, "learning_rate": 7.222222222222223e-06, "loss": 43.4031, "step": 2391 }, { "epoch": 56.95522388059702, "grad_norm": 16.62670135498047, "learning_rate": 7.218253968253969e-06, "loss": 43.9552, "step": 2392 }, { "epoch": 56.97910447761194, "grad_norm": 31.313417434692383, "learning_rate": 7.2142857142857145e-06, "loss": 44.5037, "step": 2393 }, { "epoch": 57.0, "grad_norm": 17.58053970336914, "learning_rate": 7.210317460317461e-06, "loss": 38.2111, "step": 2394 }, { "epoch": 57.02388059701492, "grad_norm": 28.399444580078125, "learning_rate": 7.206349206349207e-06, "loss": 43.0462, "step": 2395 }, { "epoch": 57.04776119402985, "grad_norm": 28.965984344482422, "learning_rate": 7.202380952380953e-06, "loss": 45.5986, "step": 2396 }, { "epoch": 57.071641791044776, "grad_norm": 20.43558120727539, "learning_rate": 7.198412698412699e-06, "loss": 43.5525, "step": 2397 }, { "epoch": 57.0955223880597, "grad_norm": 29.135162353515625, "learning_rate": 7.194444444444445e-06, "loss": 44.4881, "step": 2398 }, { "epoch": 57.11940298507463, "grad_norm": 23.580699920654297, "learning_rate": 7.190476190476191e-06, "loss": 43.7271, "step": 2399 }, { "epoch": 57.14328358208955, "grad_norm": 22.67143440246582, "learning_rate": 7.186507936507937e-06, "loss": 42.7561, "step": 2400 }, { "epoch": 57.167164179104475, "grad_norm": 21.757719039916992, "learning_rate": 7.182539682539683e-06, "loss": 44.4426, "step": 2401 }, { "epoch": 57.191044776119405, "grad_norm": 22.878713607788086, "learning_rate": 7.178571428571429e-06, "loss": 43.9361, "step": 2402 }, { "epoch": 57.21492537313433, "grad_norm": 21.213376998901367, "learning_rate": 7.174603174603175e-06, "loss": 44.4992, "step": 2403 }, { "epoch": 57.23880597014925, "grad_norm": 26.92378807067871, "learning_rate": 7.1706349206349215e-06, "loss": 43.3883, "step": 2404 }, { "epoch": 57.26268656716418, "grad_norm": 21.269611358642578, "learning_rate": 7.166666666666667e-06, "loss": 45.1554, "step": 2405 }, { "epoch": 57.286567164179104, "grad_norm": 24.775962829589844, "learning_rate": 7.162698412698413e-06, "loss": 44.6757, "step": 2406 }, { "epoch": 57.31044776119403, "grad_norm": 23.0452880859375, "learning_rate": 7.1587301587301594e-06, "loss": 44.0688, "step": 2407 }, { "epoch": 57.33432835820896, "grad_norm": 25.50167465209961, "learning_rate": 7.154761904761906e-06, "loss": 44.7899, "step": 2408 }, { "epoch": 57.35820895522388, "grad_norm": 21.560483932495117, "learning_rate": 7.150793650793652e-06, "loss": 44.3325, "step": 2409 }, { "epoch": 57.3820895522388, "grad_norm": 22.5815486907959, "learning_rate": 7.146825396825397e-06, "loss": 44.1531, "step": 2410 }, { "epoch": 57.40597014925373, "grad_norm": 22.582660675048828, "learning_rate": 7.1428571428571436e-06, "loss": 45.738, "step": 2411 }, { "epoch": 57.429850746268656, "grad_norm": 21.388498306274414, "learning_rate": 7.13888888888889e-06, "loss": 44.2737, "step": 2412 }, { "epoch": 57.45373134328358, "grad_norm": 25.76162338256836, "learning_rate": 7.134920634920636e-06, "loss": 45.273, "step": 2413 }, { "epoch": 57.47761194029851, "grad_norm": 22.20672035217285, "learning_rate": 7.1309523809523814e-06, "loss": 44.8847, "step": 2414 }, { "epoch": 57.50149253731343, "grad_norm": 22.07602310180664, "learning_rate": 7.126984126984128e-06, "loss": 45.0162, "step": 2415 }, { "epoch": 57.525373134328355, "grad_norm": 23.339630126953125, "learning_rate": 7.123015873015874e-06, "loss": 45.1952, "step": 2416 }, { "epoch": 57.549253731343285, "grad_norm": 21.66541290283203, "learning_rate": 7.11904761904762e-06, "loss": 43.586, "step": 2417 }, { "epoch": 57.57313432835821, "grad_norm": 19.015748977661133, "learning_rate": 7.115079365079366e-06, "loss": 44.391, "step": 2418 }, { "epoch": 57.59701492537313, "grad_norm": 20.501789093017578, "learning_rate": 7.111111111111112e-06, "loss": 43.8462, "step": 2419 }, { "epoch": 57.62089552238806, "grad_norm": 20.447154998779297, "learning_rate": 7.107142857142858e-06, "loss": 44.0195, "step": 2420 }, { "epoch": 57.644776119402984, "grad_norm": 17.749000549316406, "learning_rate": 7.103174603174604e-06, "loss": 43.0026, "step": 2421 }, { "epoch": 57.668656716417914, "grad_norm": 22.757408142089844, "learning_rate": 7.09920634920635e-06, "loss": 44.1692, "step": 2422 }, { "epoch": 57.69253731343284, "grad_norm": 17.98983383178711, "learning_rate": 7.095238095238096e-06, "loss": 43.4101, "step": 2423 }, { "epoch": 57.71641791044776, "grad_norm": 24.20079803466797, "learning_rate": 7.091269841269842e-06, "loss": 44.8966, "step": 2424 }, { "epoch": 57.74029850746269, "grad_norm": 22.89764404296875, "learning_rate": 7.0873015873015884e-06, "loss": 43.2216, "step": 2425 }, { "epoch": 57.76417910447761, "grad_norm": 22.002994537353516, "learning_rate": 7.083333333333335e-06, "loss": 44.6504, "step": 2426 }, { "epoch": 57.788059701492536, "grad_norm": 21.76395606994629, "learning_rate": 7.07936507936508e-06, "loss": 44.2117, "step": 2427 }, { "epoch": 57.811940298507466, "grad_norm": 21.774457931518555, "learning_rate": 7.075396825396826e-06, "loss": 42.8879, "step": 2428 }, { "epoch": 57.83582089552239, "grad_norm": 20.503652572631836, "learning_rate": 7.0714285714285726e-06, "loss": 45.954, "step": 2429 }, { "epoch": 57.85970149253731, "grad_norm": 23.904399871826172, "learning_rate": 7.067460317460319e-06, "loss": 43.356, "step": 2430 }, { "epoch": 57.88358208955224, "grad_norm": 19.87941551208496, "learning_rate": 7.063492063492064e-06, "loss": 43.2895, "step": 2431 }, { "epoch": 57.907462686567165, "grad_norm": 21.451114654541016, "learning_rate": 7.0595238095238105e-06, "loss": 45.5597, "step": 2432 }, { "epoch": 57.93134328358209, "grad_norm": 19.936561584472656, "learning_rate": 7.055555555555557e-06, "loss": 44.9952, "step": 2433 }, { "epoch": 57.95522388059702, "grad_norm": 28.258575439453125, "learning_rate": 7.051587301587303e-06, "loss": 44.6988, "step": 2434 }, { "epoch": 57.97910447761194, "grad_norm": 17.210622787475586, "learning_rate": 7.047619047619048e-06, "loss": 43.6215, "step": 2435 }, { "epoch": 58.0, "grad_norm": 24.241130828857422, "learning_rate": 7.043650793650795e-06, "loss": 38.4149, "step": 2436 }, { "epoch": 58.02388059701492, "grad_norm": 16.801782608032227, "learning_rate": 7.039682539682541e-06, "loss": 43.5018, "step": 2437 }, { "epoch": 58.04776119402985, "grad_norm": 29.925647735595703, "learning_rate": 7.035714285714287e-06, "loss": 43.2125, "step": 2438 }, { "epoch": 58.071641791044776, "grad_norm": 18.95758056640625, "learning_rate": 7.0317460317460325e-06, "loss": 44.4259, "step": 2439 }, { "epoch": 58.0955223880597, "grad_norm": 23.302980422973633, "learning_rate": 7.027777777777778e-06, "loss": 44.0357, "step": 2440 }, { "epoch": 58.11940298507463, "grad_norm": 20.59381866455078, "learning_rate": 7.023809523809524e-06, "loss": 43.307, "step": 2441 }, { "epoch": 58.14328358208955, "grad_norm": 22.13384246826172, "learning_rate": 7.0198412698412695e-06, "loss": 44.4651, "step": 2442 }, { "epoch": 58.167164179104475, "grad_norm": 19.862943649291992, "learning_rate": 7.015873015873016e-06, "loss": 44.2492, "step": 2443 }, { "epoch": 58.191044776119405, "grad_norm": 17.979585647583008, "learning_rate": 7.011904761904762e-06, "loss": 44.8333, "step": 2444 }, { "epoch": 58.21492537313433, "grad_norm": 24.35641860961914, "learning_rate": 7.007936507936508e-06, "loss": 43.9583, "step": 2445 }, { "epoch": 58.23880597014925, "grad_norm": 20.068201065063477, "learning_rate": 7.0039682539682545e-06, "loss": 44.2379, "step": 2446 }, { "epoch": 58.26268656716418, "grad_norm": 24.976778030395508, "learning_rate": 7e-06, "loss": 45.3054, "step": 2447 }, { "epoch": 58.286567164179104, "grad_norm": 19.772478103637695, "learning_rate": 6.996031746031746e-06, "loss": 44.7523, "step": 2448 }, { "epoch": 58.31044776119403, "grad_norm": 24.084999084472656, "learning_rate": 6.992063492063492e-06, "loss": 43.7454, "step": 2449 }, { "epoch": 58.33432835820896, "grad_norm": 23.313718795776367, "learning_rate": 6.988095238095239e-06, "loss": 44.8838, "step": 2450 }, { "epoch": 58.35820895522388, "grad_norm": 22.206872940063477, "learning_rate": 6.984126984126984e-06, "loss": 45.3141, "step": 2451 }, { "epoch": 58.3820895522388, "grad_norm": 17.005720138549805, "learning_rate": 6.98015873015873e-06, "loss": 43.8783, "step": 2452 }, { "epoch": 58.40597014925373, "grad_norm": NaN, "learning_rate": 6.9761904761904765e-06, "loss": 43.2019, "step": 2453 }, { "epoch": 58.429850746268656, "grad_norm": 20.627288818359375, "learning_rate": 6.9761904761904765e-06, "loss": 44.6086, "step": 2454 }, { "epoch": 58.45373134328358, "grad_norm": 19.47835350036621, "learning_rate": 6.972222222222223e-06, "loss": 44.6011, "step": 2455 }, { "epoch": 58.47761194029851, "grad_norm": 23.258609771728516, "learning_rate": 6.968253968253968e-06, "loss": 44.0184, "step": 2456 }, { "epoch": 58.50149253731343, "grad_norm": 22.79289436340332, "learning_rate": 6.964285714285714e-06, "loss": 44.1053, "step": 2457 }, { "epoch": 58.525373134328355, "grad_norm": 20.517322540283203, "learning_rate": 6.960317460317461e-06, "loss": 43.8647, "step": 2458 }, { "epoch": 58.549253731343285, "grad_norm": 15.967523574829102, "learning_rate": 6.956349206349207e-06, "loss": 44.9457, "step": 2459 }, { "epoch": 58.57313432835821, "grad_norm": 26.704971313476562, "learning_rate": 6.952380952380952e-06, "loss": 42.5455, "step": 2460 }, { "epoch": 58.59701492537313, "grad_norm": 17.790040969848633, "learning_rate": 6.9484126984126985e-06, "loss": 45.0774, "step": 2461 }, { "epoch": 58.62089552238806, "grad_norm": 24.100841522216797, "learning_rate": 6.944444444444445e-06, "loss": 44.7616, "step": 2462 }, { "epoch": 58.644776119402984, "grad_norm": 17.18956184387207, "learning_rate": 6.940476190476191e-06, "loss": 44.6506, "step": 2463 }, { "epoch": 58.668656716417914, "grad_norm": 25.185026168823242, "learning_rate": 6.936507936507937e-06, "loss": 44.9502, "step": 2464 }, { "epoch": 58.69253731343284, "grad_norm": 22.515111923217773, "learning_rate": 6.932539682539683e-06, "loss": 43.802, "step": 2465 }, { "epoch": 58.71641791044776, "grad_norm": 26.777843475341797, "learning_rate": 6.928571428571429e-06, "loss": 44.0202, "step": 2466 }, { "epoch": 58.74029850746269, "grad_norm": 22.73821449279785, "learning_rate": 6.924603174603175e-06, "loss": 44.1536, "step": 2467 }, { "epoch": 58.76417910447761, "grad_norm": 20.890169143676758, "learning_rate": 6.920634920634921e-06, "loss": 43.6729, "step": 2468 }, { "epoch": 58.788059701492536, "grad_norm": 21.856304168701172, "learning_rate": 6.916666666666667e-06, "loss": 44.5367, "step": 2469 }, { "epoch": 58.811940298507466, "grad_norm": 23.25510025024414, "learning_rate": 6.912698412698413e-06, "loss": 44.8127, "step": 2470 }, { "epoch": 58.83582089552239, "grad_norm": 26.32556915283203, "learning_rate": 6.908730158730159e-06, "loss": 44.8174, "step": 2471 }, { "epoch": 58.85970149253731, "grad_norm": 17.11307716369629, "learning_rate": 6.9047619047619055e-06, "loss": 42.9014, "step": 2472 }, { "epoch": 58.88358208955224, "grad_norm": 22.380199432373047, "learning_rate": 6.900793650793651e-06, "loss": 43.9276, "step": 2473 }, { "epoch": 58.907462686567165, "grad_norm": 17.855484008789062, "learning_rate": 6.896825396825397e-06, "loss": 43.5554, "step": 2474 }, { "epoch": 58.93134328358209, "grad_norm": 18.112934112548828, "learning_rate": 6.892857142857143e-06, "loss": 43.1567, "step": 2475 }, { "epoch": 58.95522388059702, "grad_norm": 21.682844161987305, "learning_rate": 6.88888888888889e-06, "loss": 44.4182, "step": 2476 }, { "epoch": 58.97910447761194, "grad_norm": 22.78960609436035, "learning_rate": 6.884920634920635e-06, "loss": 44.5085, "step": 2477 }, { "epoch": 59.0, "grad_norm": 26.30175018310547, "learning_rate": 6.880952380952381e-06, "loss": 38.3404, "step": 2478 }, { "epoch": 59.02388059701492, "grad_norm": 23.714672088623047, "learning_rate": 6.8769841269841275e-06, "loss": 44.131, "step": 2479 }, { "epoch": 59.04776119402985, "grad_norm": 21.14238929748535, "learning_rate": 6.873015873015874e-06, "loss": 44.0208, "step": 2480 }, { "epoch": 59.071641791044776, "grad_norm": 25.2884464263916, "learning_rate": 6.86904761904762e-06, "loss": 43.4771, "step": 2481 }, { "epoch": 59.0955223880597, "grad_norm": 21.62645149230957, "learning_rate": 6.8650793650793654e-06, "loss": 43.3618, "step": 2482 }, { "epoch": 59.11940298507463, "grad_norm": 21.61750602722168, "learning_rate": 6.861111111111112e-06, "loss": 45.1006, "step": 2483 }, { "epoch": 59.14328358208955, "grad_norm": 25.124187469482422, "learning_rate": 6.857142857142858e-06, "loss": 44.2154, "step": 2484 }, { "epoch": 59.167164179104475, "grad_norm": 21.88067626953125, "learning_rate": 6.853174603174604e-06, "loss": 44.6725, "step": 2485 }, { "epoch": 59.191044776119405, "grad_norm": 26.95779037475586, "learning_rate": 6.8492063492063496e-06, "loss": 43.2419, "step": 2486 }, { "epoch": 59.21492537313433, "grad_norm": 18.35979461669922, "learning_rate": 6.845238095238096e-06, "loss": 42.7172, "step": 2487 }, { "epoch": 59.23880597014925, "grad_norm": 27.409034729003906, "learning_rate": 6.841269841269842e-06, "loss": 44.7929, "step": 2488 }, { "epoch": 59.26268656716418, "grad_norm": 23.257780075073242, "learning_rate": 6.837301587301588e-06, "loss": 45.0742, "step": 2489 }, { "epoch": 59.286567164179104, "grad_norm": 23.64436912536621, "learning_rate": 6.833333333333334e-06, "loss": 45.3608, "step": 2490 }, { "epoch": 59.31044776119403, "grad_norm": 19.836320877075195, "learning_rate": 6.82936507936508e-06, "loss": 43.3152, "step": 2491 }, { "epoch": 59.33432835820896, "grad_norm": 23.7291259765625, "learning_rate": 6.825396825396826e-06, "loss": 45.0224, "step": 2492 }, { "epoch": 59.35820895522388, "grad_norm": 22.776365280151367, "learning_rate": 6.8214285714285724e-06, "loss": 44.0495, "step": 2493 }, { "epoch": 59.3820895522388, "grad_norm": 19.556560516357422, "learning_rate": 6.817460317460318e-06, "loss": 43.0716, "step": 2494 }, { "epoch": 59.40597014925373, "grad_norm": 19.592493057250977, "learning_rate": 6.813492063492064e-06, "loss": 43.8102, "step": 2495 }, { "epoch": 59.429850746268656, "grad_norm": 18.4060001373291, "learning_rate": 6.80952380952381e-06, "loss": 42.5119, "step": 2496 }, { "epoch": 59.45373134328358, "grad_norm": 18.86701202392578, "learning_rate": 6.8055555555555566e-06, "loss": 42.7576, "step": 2497 }, { "epoch": 59.47761194029851, "grad_norm": 19.602235794067383, "learning_rate": 6.801587301587303e-06, "loss": 44.8432, "step": 2498 }, { "epoch": 59.50149253731343, "grad_norm": NaN, "learning_rate": 6.797619047619048e-06, "loss": 77.3141, "step": 2499 }, { "epoch": 59.525373134328355, "grad_norm": 18.09695816040039, "learning_rate": 6.797619047619048e-06, "loss": 44.5263, "step": 2500 }, { "epoch": 59.549253731343285, "grad_norm": 20.728633880615234, "learning_rate": 6.7936507936507944e-06, "loss": 45.2417, "step": 2501 }, { "epoch": 59.57313432835821, "grad_norm": 19.164405822753906, "learning_rate": 6.789682539682541e-06, "loss": 44.1673, "step": 2502 }, { "epoch": 59.59701492537313, "grad_norm": 17.986604690551758, "learning_rate": 6.785714285714287e-06, "loss": 44.9311, "step": 2503 }, { "epoch": 59.62089552238806, "grad_norm": 21.212223052978516, "learning_rate": 6.781746031746032e-06, "loss": 43.9186, "step": 2504 }, { "epoch": 59.644776119402984, "grad_norm": 19.34587287902832, "learning_rate": 6.777777777777779e-06, "loss": 43.9205, "step": 2505 }, { "epoch": 59.668656716417914, "grad_norm": 21.107257843017578, "learning_rate": 6.773809523809525e-06, "loss": 43.7522, "step": 2506 }, { "epoch": 59.69253731343284, "grad_norm": 19.897724151611328, "learning_rate": 6.769841269841271e-06, "loss": 43.7826, "step": 2507 }, { "epoch": 59.71641791044776, "grad_norm": 22.272457122802734, "learning_rate": 6.7658730158730165e-06, "loss": 44.4984, "step": 2508 }, { "epoch": 59.74029850746269, "grad_norm": 20.087844848632812, "learning_rate": 6.761904761904763e-06, "loss": 43.6682, "step": 2509 }, { "epoch": 59.76417910447761, "grad_norm": 22.083215713500977, "learning_rate": 6.757936507936509e-06, "loss": 43.1799, "step": 2510 }, { "epoch": 59.788059701492536, "grad_norm": 18.583234786987305, "learning_rate": 6.753968253968255e-06, "loss": 44.9017, "step": 2511 }, { "epoch": 59.811940298507466, "grad_norm": 20.20134735107422, "learning_rate": 6.750000000000001e-06, "loss": 44.1051, "step": 2512 }, { "epoch": 59.83582089552239, "grad_norm": 20.973419189453125, "learning_rate": 6.746031746031747e-06, "loss": 44.138, "step": 2513 }, { "epoch": 59.85970149253731, "grad_norm": 15.97662353515625, "learning_rate": 6.742063492063493e-06, "loss": 44.9675, "step": 2514 }, { "epoch": 59.88358208955224, "grad_norm": 23.126541137695312, "learning_rate": 6.738095238095239e-06, "loss": 44.4417, "step": 2515 }, { "epoch": 59.907462686567165, "grad_norm": 19.715782165527344, "learning_rate": 6.7341269841269856e-06, "loss": 44.768, "step": 2516 }, { "epoch": 59.93134328358209, "grad_norm": 27.000070571899414, "learning_rate": 6.730158730158731e-06, "loss": 45.0485, "step": 2517 }, { "epoch": 59.95522388059702, "grad_norm": 21.746152877807617, "learning_rate": 6.726190476190477e-06, "loss": 44.5848, "step": 2518 }, { "epoch": 59.97910447761194, "grad_norm": 17.86555290222168, "learning_rate": 6.7222222222222235e-06, "loss": 44.0213, "step": 2519 }, { "epoch": 60.0, "grad_norm": 20.369977951049805, "learning_rate": 6.718253968253968e-06, "loss": 38.2947, "step": 2520 }, { "epoch": 60.0, "step": 2520, "total_flos": 1.2389502240404026e+17, "train_loss": 14.973776844569615, "train_runtime": 26580.6785, "train_samples_per_second": 12.081, "train_steps_per_second": 0.095 }, { "epoch": 60.02388059701492, "grad_norm": 22.268356323242188, "learning_rate": 1e-05, "loss": 43.4551, "step": 2521 }, { "epoch": 60.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99702380952381e-06, "loss": 49.1255, "step": 2522 }, { "epoch": 60.071641791044776, "grad_norm": 202.42481994628906, "learning_rate": 9.99702380952381e-06, "loss": 48.2461, "step": 2523 }, { "epoch": 60.0955223880597, "grad_norm": 190.5668182373047, "learning_rate": 9.99404761904762e-06, "loss": 49.3017, "step": 2524 }, { "epoch": 60.11940298507463, "grad_norm": 69.69708251953125, "learning_rate": 9.99107142857143e-06, "loss": 47.4179, "step": 2525 }, { "epoch": 60.14328358208955, "grad_norm": 94.14574432373047, "learning_rate": 9.988095238095239e-06, "loss": 46.2633, "step": 2526 }, { "epoch": 60.167164179104475, "grad_norm": 59.666481018066406, "learning_rate": 9.985119047619048e-06, "loss": 45.9356, "step": 2527 }, { "epoch": 60.191044776119405, "grad_norm": 66.49242401123047, "learning_rate": 9.982142857142858e-06, "loss": 46.0376, "step": 2528 }, { "epoch": 60.21492537313433, "grad_norm": 46.52798080444336, "learning_rate": 9.979166666666668e-06, "loss": 42.4879, "step": 2529 }, { "epoch": 60.23880597014925, "grad_norm": 41.28635025024414, "learning_rate": 9.976190476190477e-06, "loss": 43.5567, "step": 2530 }, { "epoch": 60.26268656716418, "grad_norm": 48.749210357666016, "learning_rate": 9.973214285714287e-06, "loss": 44.6395, "step": 2531 }, { "epoch": 60.286567164179104, "grad_norm": 40.452842712402344, "learning_rate": 9.970238095238096e-06, "loss": 44.65, "step": 2532 }, { "epoch": 60.31044776119403, "grad_norm": 41.35492706298828, "learning_rate": 9.967261904761905e-06, "loss": 44.8214, "step": 2533 }, { "epoch": 60.33432835820896, "grad_norm": 32.109527587890625, "learning_rate": 9.964285714285714e-06, "loss": 46.1763, "step": 2534 }, { "epoch": 60.35820895522388, "grad_norm": 28.22223472595215, "learning_rate": 9.961309523809525e-06, "loss": 45.4326, "step": 2535 }, { "epoch": 60.3820895522388, "grad_norm": 42.460872650146484, "learning_rate": 9.958333333333334e-06, "loss": 44.2028, "step": 2536 }, { "epoch": 60.40597014925373, "grad_norm": 24.426565170288086, "learning_rate": 9.955357142857143e-06, "loss": 44.141, "step": 2537 }, { "epoch": 60.429850746268656, "grad_norm": 30.63353729248047, "learning_rate": 9.952380952380954e-06, "loss": 44.1465, "step": 2538 }, { "epoch": 60.45373134328358, "grad_norm": 24.15091323852539, "learning_rate": 9.949404761904763e-06, "loss": 44.4579, "step": 2539 }, { "epoch": 60.47761194029851, "grad_norm": 30.757854461669922, "learning_rate": 9.946428571428572e-06, "loss": 44.251, "step": 2540 }, { "epoch": 60.50149253731343, "grad_norm": 24.651243209838867, "learning_rate": 9.943452380952381e-06, "loss": 45.6042, "step": 2541 }, { "epoch": 60.525373134328355, "grad_norm": 23.414180755615234, "learning_rate": 9.940476190476192e-06, "loss": 43.8686, "step": 2542 }, { "epoch": 60.549253731343285, "grad_norm": 29.41202735900879, "learning_rate": 9.937500000000001e-06, "loss": 45.111, "step": 2543 }, { "epoch": 60.57313432835821, "grad_norm": 25.947559356689453, "learning_rate": 9.93452380952381e-06, "loss": 43.6131, "step": 2544 }, { "epoch": 60.59701492537313, "grad_norm": 30.613439559936523, "learning_rate": 9.93154761904762e-06, "loss": 44.9413, "step": 2545 }, { "epoch": 60.62089552238806, "grad_norm": 28.363725662231445, "learning_rate": 9.92857142857143e-06, "loss": 43.3202, "step": 2546 }, { "epoch": 60.644776119402984, "grad_norm": 24.05493927001953, "learning_rate": 9.925595238095239e-06, "loss": 45.434, "step": 2547 }, { "epoch": 60.668656716417914, "grad_norm": 30.285140991210938, "learning_rate": 9.922619047619048e-06, "loss": 44.2144, "step": 2548 }, { "epoch": 60.69253731343284, "grad_norm": 24.850799560546875, "learning_rate": 9.919642857142859e-06, "loss": 43.4151, "step": 2549 }, { "epoch": 60.71641791044776, "grad_norm": 23.70008087158203, "learning_rate": 9.916666666666668e-06, "loss": 44.0035, "step": 2550 }, { "epoch": 60.74029850746269, "grad_norm": 29.343976974487305, "learning_rate": 9.913690476190477e-06, "loss": 44.9651, "step": 2551 }, { "epoch": 60.76417910447761, "grad_norm": 19.914142608642578, "learning_rate": 9.910714285714288e-06, "loss": 44.3699, "step": 2552 }, { "epoch": 60.788059701492536, "grad_norm": 23.31308937072754, "learning_rate": 9.907738095238097e-06, "loss": 45.7724, "step": 2553 }, { "epoch": 60.811940298507466, "grad_norm": 26.089832305908203, "learning_rate": 9.904761904761906e-06, "loss": 44.9611, "step": 2554 }, { "epoch": 60.83582089552239, "grad_norm": 22.15082550048828, "learning_rate": 9.901785714285715e-06, "loss": 43.1386, "step": 2555 }, { "epoch": 60.85970149253731, "grad_norm": 30.4031925201416, "learning_rate": 9.898809523809525e-06, "loss": 44.324, "step": 2556 }, { "epoch": 60.88358208955224, "grad_norm": 22.16672134399414, "learning_rate": 9.895833333333334e-06, "loss": 43.6286, "step": 2557 }, { "epoch": 60.907462686567165, "grad_norm": 21.490468978881836, "learning_rate": 9.892857142857143e-06, "loss": 42.8494, "step": 2558 }, { "epoch": 60.93134328358209, "grad_norm": 28.823944091796875, "learning_rate": 9.889880952380954e-06, "loss": 42.9944, "step": 2559 }, { "epoch": 60.95522388059702, "grad_norm": 19.950031280517578, "learning_rate": 9.886904761904763e-06, "loss": 44.7105, "step": 2560 }, { "epoch": 60.97910447761194, "grad_norm": 31.069194793701172, "learning_rate": 9.883928571428572e-06, "loss": 43.6064, "step": 2561 }, { "epoch": 61.0, "grad_norm": 19.411388397216797, "learning_rate": 9.880952380952381e-06, "loss": 38.8114, "step": 2562 }, { "epoch": 61.02388059701492, "grad_norm": 26.66782569885254, "learning_rate": 9.877976190476192e-06, "loss": 43.0697, "step": 2563 }, { "epoch": 61.04776119402985, "grad_norm": 25.78309440612793, "learning_rate": 9.875000000000001e-06, "loss": 43.8682, "step": 2564 }, { "epoch": 61.071641791044776, "grad_norm": 19.312572479248047, "learning_rate": 9.87202380952381e-06, "loss": 44.7347, "step": 2565 }, { "epoch": 61.0955223880597, "grad_norm": 33.189598083496094, "learning_rate": 9.869047619047621e-06, "loss": 43.6253, "step": 2566 }, { "epoch": 61.11940298507463, "grad_norm": 27.351518630981445, "learning_rate": 9.86607142857143e-06, "loss": 43.6716, "step": 2567 }, { "epoch": 61.14328358208955, "grad_norm": 21.69624900817871, "learning_rate": 9.863095238095239e-06, "loss": 44.2277, "step": 2568 }, { "epoch": 61.167164179104475, "grad_norm": 27.58108901977539, "learning_rate": 9.860119047619048e-06, "loss": 44.3117, "step": 2569 }, { "epoch": 61.191044776119405, "grad_norm": 18.802303314208984, "learning_rate": 9.857142857142859e-06, "loss": 44.4119, "step": 2570 }, { "epoch": 61.21492537313433, "grad_norm": 21.36333656311035, "learning_rate": 9.854166666666668e-06, "loss": 45.0113, "step": 2571 }, { "epoch": 61.23880597014925, "grad_norm": 23.86919593811035, "learning_rate": 9.851190476190477e-06, "loss": 43.8577, "step": 2572 }, { "epoch": 61.26268656716418, "grad_norm": 20.259685516357422, "learning_rate": 9.848214285714288e-06, "loss": 43.921, "step": 2573 }, { "epoch": 61.286567164179104, "grad_norm": 29.262182235717773, "learning_rate": 9.845238095238097e-06, "loss": 44.1589, "step": 2574 }, { "epoch": 61.31044776119403, "grad_norm": 21.13313102722168, "learning_rate": 9.842261904761906e-06, "loss": 44.3854, "step": 2575 }, { "epoch": 61.33432835820896, "grad_norm": 23.83458137512207, "learning_rate": 9.839285714285715e-06, "loss": 43.5571, "step": 2576 }, { "epoch": 61.35820895522388, "grad_norm": 18.778934478759766, "learning_rate": 9.836309523809524e-06, "loss": 43.827, "step": 2577 }, { "epoch": 61.3820895522388, "grad_norm": 22.37734031677246, "learning_rate": 9.833333333333333e-06, "loss": 45.207, "step": 2578 }, { "epoch": 61.40597014925373, "grad_norm": 25.046817779541016, "learning_rate": 9.830357142857144e-06, "loss": 43.7649, "step": 2579 }, { "epoch": 61.429850746268656, "grad_norm": 21.867618560791016, "learning_rate": 9.827380952380953e-06, "loss": 43.7025, "step": 2580 }, { "epoch": 61.45373134328358, "grad_norm": 22.729969024658203, "learning_rate": 9.824404761904762e-06, "loss": 44.3454, "step": 2581 }, { "epoch": 61.47761194029851, "grad_norm": 23.03755760192871, "learning_rate": 9.821428571428573e-06, "loss": 43.6942, "step": 2582 }, { "epoch": 61.50149253731343, "grad_norm": 20.736244201660156, "learning_rate": 9.818452380952382e-06, "loss": 44.6758, "step": 2583 }, { "epoch": 61.525373134328355, "grad_norm": 17.63121795654297, "learning_rate": 9.81547619047619e-06, "loss": 43.3541, "step": 2584 }, { "epoch": 61.549253731343285, "grad_norm": 18.32825469970703, "learning_rate": 9.8125e-06, "loss": 43.8749, "step": 2585 }, { "epoch": 61.57313432835821, "grad_norm": 19.190811157226562, "learning_rate": 9.80952380952381e-06, "loss": 44.1191, "step": 2586 }, { "epoch": 61.59701492537313, "grad_norm": 17.29913330078125, "learning_rate": 9.80654761904762e-06, "loss": 44.5885, "step": 2587 }, { "epoch": 61.62089552238806, "grad_norm": 21.12825584411621, "learning_rate": 9.803571428571428e-06, "loss": 43.9206, "step": 2588 }, { "epoch": 61.644776119402984, "grad_norm": 26.83173942565918, "learning_rate": 9.80059523809524e-06, "loss": 44.7224, "step": 2589 }, { "epoch": 61.668656716417914, "grad_norm": 18.34907341003418, "learning_rate": 9.797619047619048e-06, "loss": 44.3625, "step": 2590 }, { "epoch": 61.69253731343284, "grad_norm": 18.709016799926758, "learning_rate": 9.794642857142857e-06, "loss": 43.2573, "step": 2591 }, { "epoch": 61.71641791044776, "grad_norm": 24.946210861206055, "learning_rate": 9.791666666666666e-06, "loss": 43.4754, "step": 2592 }, { "epoch": 61.74029850746269, "grad_norm": 22.253814697265625, "learning_rate": 9.788690476190477e-06, "loss": 43.7911, "step": 2593 }, { "epoch": 61.76417910447761, "grad_norm": 21.44365692138672, "learning_rate": 9.785714285714286e-06, "loss": 44.9724, "step": 2594 }, { "epoch": 61.788059701492536, "grad_norm": 19.002973556518555, "learning_rate": 9.782738095238095e-06, "loss": 44.5918, "step": 2595 }, { "epoch": 61.811940298507466, "grad_norm": 23.515329360961914, "learning_rate": 9.779761904761906e-06, "loss": 43.7888, "step": 2596 }, { "epoch": 61.83582089552239, "grad_norm": 22.531938552856445, "learning_rate": 9.776785714285715e-06, "loss": 43.3796, "step": 2597 }, { "epoch": 61.85970149253731, "grad_norm": 21.187646865844727, "learning_rate": 9.773809523809524e-06, "loss": 44.5443, "step": 2598 }, { "epoch": 61.88358208955224, "grad_norm": 16.221466064453125, "learning_rate": 9.770833333333333e-06, "loss": 43.3915, "step": 2599 }, { "epoch": 61.907462686567165, "grad_norm": 26.778657913208008, "learning_rate": 9.767857142857144e-06, "loss": 43.5025, "step": 2600 }, { "epoch": 61.93134328358209, "grad_norm": 22.768152236938477, "learning_rate": 9.764880952380953e-06, "loss": 44.5111, "step": 2601 }, { "epoch": 61.95522388059702, "grad_norm": 27.74826431274414, "learning_rate": 9.761904761904762e-06, "loss": 44.5974, "step": 2602 }, { "epoch": 61.97910447761194, "grad_norm": 22.05244255065918, "learning_rate": 9.758928571428573e-06, "loss": 43.5759, "step": 2603 }, { "epoch": 62.0, "grad_norm": 19.424118041992188, "learning_rate": 9.755952380952382e-06, "loss": 39.0078, "step": 2604 }, { "epoch": 62.02388059701492, "grad_norm": 26.683176040649414, "learning_rate": 9.75297619047619e-06, "loss": 43.6656, "step": 2605 }, { "epoch": 62.04776119402985, "grad_norm": 20.35538673400879, "learning_rate": 9.75e-06, "loss": 45.2448, "step": 2606 }, { "epoch": 62.071641791044776, "grad_norm": 20.601633071899414, "learning_rate": 9.74702380952381e-06, "loss": 45.3538, "step": 2607 }, { "epoch": 62.0955223880597, "grad_norm": 17.472143173217773, "learning_rate": 9.74404761904762e-06, "loss": 44.3077, "step": 2608 }, { "epoch": 62.11940298507463, "grad_norm": 21.365177154541016, "learning_rate": 9.741071428571429e-06, "loss": 43.7227, "step": 2609 }, { "epoch": 62.14328358208955, "grad_norm": 25.560060501098633, "learning_rate": 9.73809523809524e-06, "loss": 44.6434, "step": 2610 }, { "epoch": 62.167164179104475, "grad_norm": 20.327442169189453, "learning_rate": 9.735119047619048e-06, "loss": 43.5369, "step": 2611 }, { "epoch": 62.191044776119405, "grad_norm": 19.99593734741211, "learning_rate": 9.732142857142858e-06, "loss": 44.1446, "step": 2612 }, { "epoch": 62.21492537313433, "grad_norm": 19.648574829101562, "learning_rate": 9.729166666666667e-06, "loss": 43.4092, "step": 2613 }, { "epoch": 62.23880597014925, "grad_norm": 26.606019973754883, "learning_rate": 9.726190476190477e-06, "loss": 44.3531, "step": 2614 }, { "epoch": 62.26268656716418, "grad_norm": 27.989334106445312, "learning_rate": 9.723214285714286e-06, "loss": 44.3962, "step": 2615 }, { "epoch": 62.286567164179104, "grad_norm": 22.545223236083984, "learning_rate": 9.720238095238095e-06, "loss": 42.8718, "step": 2616 }, { "epoch": 62.31044776119403, "grad_norm": 26.62592315673828, "learning_rate": 9.717261904761906e-06, "loss": 43.004, "step": 2617 }, { "epoch": 62.33432835820896, "grad_norm": 26.52608299255371, "learning_rate": 9.714285714285715e-06, "loss": 43.2152, "step": 2618 }, { "epoch": 62.35820895522388, "grad_norm": 20.17901611328125, "learning_rate": 9.711309523809524e-06, "loss": 43.2228, "step": 2619 }, { "epoch": 62.3820895522388, "grad_norm": 30.335596084594727, "learning_rate": 9.708333333333333e-06, "loss": 45.0079, "step": 2620 }, { "epoch": 62.40597014925373, "grad_norm": 26.02729606628418, "learning_rate": 9.705357142857144e-06, "loss": 43.3155, "step": 2621 }, { "epoch": 62.429850746268656, "grad_norm": 25.07903480529785, "learning_rate": 9.702380952380953e-06, "loss": 45.2378, "step": 2622 }, { "epoch": 62.45373134328358, "grad_norm": 28.89750099182129, "learning_rate": 9.699404761904762e-06, "loss": 42.2074, "step": 2623 }, { "epoch": 62.47761194029851, "grad_norm": 20.475128173828125, "learning_rate": 9.696428571428573e-06, "loss": 43.8792, "step": 2624 }, { "epoch": 62.50149253731343, "grad_norm": 26.103612899780273, "learning_rate": 9.693452380952382e-06, "loss": 44.961, "step": 2625 }, { "epoch": 62.525373134328355, "grad_norm": 21.666906356811523, "learning_rate": 9.690476190476191e-06, "loss": 43.4776, "step": 2626 }, { "epoch": 62.549253731343285, "grad_norm": 22.16206932067871, "learning_rate": 9.6875e-06, "loss": 43.1076, "step": 2627 }, { "epoch": 62.57313432835821, "grad_norm": 18.17336082458496, "learning_rate": 9.68452380952381e-06, "loss": 43.7046, "step": 2628 }, { "epoch": 62.59701492537313, "grad_norm": 25.36472511291504, "learning_rate": 9.68154761904762e-06, "loss": 44.8569, "step": 2629 }, { "epoch": 62.62089552238806, "grad_norm": 18.750511169433594, "learning_rate": 9.678571428571429e-06, "loss": 43.73, "step": 2630 }, { "epoch": 62.644776119402984, "grad_norm": 18.174638748168945, "learning_rate": 9.67559523809524e-06, "loss": 44.4229, "step": 2631 }, { "epoch": 62.668656716417914, "grad_norm": 17.8627872467041, "learning_rate": 9.672619047619049e-06, "loss": 44.9343, "step": 2632 }, { "epoch": 62.69253731343284, "grad_norm": 28.161239624023438, "learning_rate": 9.669642857142858e-06, "loss": 43.7649, "step": 2633 }, { "epoch": 62.71641791044776, "grad_norm": 27.701793670654297, "learning_rate": 9.666666666666667e-06, "loss": 43.7122, "step": 2634 }, { "epoch": 62.74029850746269, "grad_norm": 18.659244537353516, "learning_rate": 9.663690476190477e-06, "loss": 43.5179, "step": 2635 }, { "epoch": 62.76417910447761, "grad_norm": 35.163169860839844, "learning_rate": 9.660714285714287e-06, "loss": 44.4568, "step": 2636 }, { "epoch": 62.788059701492536, "grad_norm": NaN, "learning_rate": 9.657738095238096e-06, "loss": 42.4459, "step": 2637 }, { "epoch": 62.811940298507466, "grad_norm": 26.87259292602539, "learning_rate": 9.657738095238096e-06, "loss": 42.7683, "step": 2638 }, { "epoch": 62.83582089552239, "grad_norm": 31.837942123413086, "learning_rate": 9.654761904761906e-06, "loss": 44.2405, "step": 2639 }, { "epoch": 62.85970149253731, "grad_norm": 24.40672492980957, "learning_rate": 9.651785714285715e-06, "loss": 44.4058, "step": 2640 }, { "epoch": 62.88358208955224, "grad_norm": 29.0338134765625, "learning_rate": 9.648809523809524e-06, "loss": 43.405, "step": 2641 }, { "epoch": 62.907462686567165, "grad_norm": 28.022174835205078, "learning_rate": 9.645833333333333e-06, "loss": 43.6733, "step": 2642 }, { "epoch": 62.93134328358209, "grad_norm": 19.517061233520508, "learning_rate": 9.642857142857144e-06, "loss": 44.2386, "step": 2643 }, { "epoch": 62.95522388059702, "grad_norm": 22.2393741607666, "learning_rate": 9.639880952380953e-06, "loss": 45.1874, "step": 2644 }, { "epoch": 62.97910447761194, "grad_norm": 27.02622413635254, "learning_rate": 9.636904761904762e-06, "loss": 43.8541, "step": 2645 }, { "epoch": 63.0, "grad_norm": 20.51211929321289, "learning_rate": 9.633928571428573e-06, "loss": 39.6487, "step": 2646 }, { "epoch": 63.02388059701492, "grad_norm": 24.02116584777832, "learning_rate": 9.630952380952382e-06, "loss": 44.5685, "step": 2647 }, { "epoch": 63.04776119402985, "grad_norm": 30.00434112548828, "learning_rate": 9.627976190476191e-06, "loss": 43.2549, "step": 2648 }, { "epoch": 63.071641791044776, "grad_norm": 23.16147804260254, "learning_rate": 9.625e-06, "loss": 44.1254, "step": 2649 }, { "epoch": 63.0955223880597, "grad_norm": 30.86275291442871, "learning_rate": 9.622023809523811e-06, "loss": 43.4804, "step": 2650 }, { "epoch": 63.11940298507463, "grad_norm": 27.942575454711914, "learning_rate": 9.61904761904762e-06, "loss": 44.4437, "step": 2651 }, { "epoch": 63.14328358208955, "grad_norm": 22.330169677734375, "learning_rate": 9.616071428571429e-06, "loss": 44.1067, "step": 2652 }, { "epoch": 63.167164179104475, "grad_norm": 27.878795623779297, "learning_rate": 9.61309523809524e-06, "loss": 42.5768, "step": 2653 }, { "epoch": 63.191044776119405, "grad_norm": 23.200098037719727, "learning_rate": 9.610119047619049e-06, "loss": 43.5906, "step": 2654 }, { "epoch": 63.21492537313433, "grad_norm": 23.872238159179688, "learning_rate": 9.607142857142858e-06, "loss": 43.8177, "step": 2655 }, { "epoch": 63.23880597014925, "grad_norm": 31.89397430419922, "learning_rate": 9.604166666666669e-06, "loss": 43.5719, "step": 2656 }, { "epoch": 63.26268656716418, "grad_norm": 24.745256423950195, "learning_rate": 9.601190476190478e-06, "loss": 43.1085, "step": 2657 }, { "epoch": 63.286567164179104, "grad_norm": 31.859682083129883, "learning_rate": 9.598214285714287e-06, "loss": 42.8871, "step": 2658 }, { "epoch": 63.31044776119403, "grad_norm": 25.792551040649414, "learning_rate": 9.595238095238096e-06, "loss": 42.7027, "step": 2659 }, { "epoch": 63.33432835820896, "grad_norm": 25.225967407226562, "learning_rate": 9.592261904761906e-06, "loss": 43.0075, "step": 2660 }, { "epoch": 63.35820895522388, "grad_norm": 27.146207809448242, "learning_rate": 9.589285714285716e-06, "loss": 44.3992, "step": 2661 }, { "epoch": 63.3820895522388, "grad_norm": 16.27069664001465, "learning_rate": 9.586309523809525e-06, "loss": 44.1708, "step": 2662 }, { "epoch": 63.40597014925373, "grad_norm": 34.79555892944336, "learning_rate": 9.583333333333335e-06, "loss": 44.4863, "step": 2663 }, { "epoch": 63.429850746268656, "grad_norm": 23.31925392150879, "learning_rate": 9.580357142857144e-06, "loss": 44.3615, "step": 2664 }, { "epoch": 63.45373134328358, "grad_norm": 28.239566802978516, "learning_rate": 9.577380952380953e-06, "loss": 45.042, "step": 2665 }, { "epoch": 63.47761194029851, "grad_norm": 25.107566833496094, "learning_rate": 9.574404761904762e-06, "loss": 44.4372, "step": 2666 }, { "epoch": 63.50149253731343, "grad_norm": 20.601322174072266, "learning_rate": 9.571428571428573e-06, "loss": 43.5807, "step": 2667 }, { "epoch": 63.525373134328355, "grad_norm": 34.94065475463867, "learning_rate": 9.568452380952382e-06, "loss": 42.8904, "step": 2668 }, { "epoch": 63.549253731343285, "grad_norm": 26.336591720581055, "learning_rate": 9.565476190476191e-06, "loss": 44.1117, "step": 2669 }, { "epoch": 63.57313432835821, "grad_norm": 35.81476974487305, "learning_rate": 9.562500000000002e-06, "loss": 44.847, "step": 2670 }, { "epoch": 63.59701492537313, "grad_norm": 21.011463165283203, "learning_rate": 9.559523809523811e-06, "loss": 43.9457, "step": 2671 }, { "epoch": 63.62089552238806, "grad_norm": 36.32665252685547, "learning_rate": 9.55654761904762e-06, "loss": 43.4279, "step": 2672 }, { "epoch": 63.644776119402984, "grad_norm": 21.384214401245117, "learning_rate": 9.55357142857143e-06, "loss": 45.3947, "step": 2673 }, { "epoch": 63.668656716417914, "grad_norm": 38.893680572509766, "learning_rate": 9.55059523809524e-06, "loss": 43.9397, "step": 2674 }, { "epoch": 63.69253731343284, "grad_norm": 26.085948944091797, "learning_rate": 9.547619047619049e-06, "loss": 43.118, "step": 2675 }, { "epoch": 63.71641791044776, "grad_norm": 35.67828369140625, "learning_rate": 9.544642857142858e-06, "loss": 44.8236, "step": 2676 }, { "epoch": 63.74029850746269, "grad_norm": 25.065685272216797, "learning_rate": 9.541666666666669e-06, "loss": 43.8344, "step": 2677 }, { "epoch": 63.76417910447761, "grad_norm": 26.32991600036621, "learning_rate": 9.538690476190478e-06, "loss": 45.5309, "step": 2678 }, { "epoch": 63.788059701492536, "grad_norm": 26.833250045776367, "learning_rate": 9.535714285714287e-06, "loss": 43.7626, "step": 2679 }, { "epoch": 63.811940298507466, "grad_norm": 23.64604949951172, "learning_rate": 9.532738095238096e-06, "loss": 43.7545, "step": 2680 }, { "epoch": 63.83582089552239, "grad_norm": 24.57122230529785, "learning_rate": 9.529761904761905e-06, "loss": 43.5666, "step": 2681 }, { "epoch": 63.85970149253731, "grad_norm": 21.429603576660156, "learning_rate": 9.526785714285714e-06, "loss": 44.8421, "step": 2682 }, { "epoch": 63.88358208955224, "grad_norm": NaN, "learning_rate": 9.523809523809525e-06, "loss": 77.6969, "step": 2683 }, { "epoch": 63.907462686567165, "grad_norm": 25.843442916870117, "learning_rate": 9.523809523809525e-06, "loss": 42.4353, "step": 2684 }, { "epoch": 63.93134328358209, "grad_norm": 26.74856185913086, "learning_rate": 9.520833333333334e-06, "loss": 45.0478, "step": 2685 }, { "epoch": 63.95522388059702, "grad_norm": 22.9956111907959, "learning_rate": 9.517857142857143e-06, "loss": 44.3383, "step": 2686 }, { "epoch": 63.97910447761194, "grad_norm": 17.62372589111328, "learning_rate": 9.514880952380952e-06, "loss": 43.2765, "step": 2687 }, { "epoch": 64.0, "grad_norm": 21.372318267822266, "learning_rate": 9.511904761904763e-06, "loss": 37.8049, "step": 2688 }, { "epoch": 64.02388059701492, "grad_norm": 23.412595748901367, "learning_rate": 9.508928571428572e-06, "loss": 45.446, "step": 2689 }, { "epoch": 64.04776119402985, "grad_norm": 21.825000762939453, "learning_rate": 9.50595238095238e-06, "loss": 42.871, "step": 2690 }, { "epoch": 64.07164179104478, "grad_norm": 18.50835418701172, "learning_rate": 9.502976190476191e-06, "loss": 43.1485, "step": 2691 }, { "epoch": 64.0955223880597, "grad_norm": 22.428272247314453, "learning_rate": 9.5e-06, "loss": 42.7172, "step": 2692 }, { "epoch": 64.11940298507463, "grad_norm": 19.58050537109375, "learning_rate": 9.49702380952381e-06, "loss": 43.4599, "step": 2693 }, { "epoch": 64.14328358208955, "grad_norm": 21.66231346130371, "learning_rate": 9.494047619047619e-06, "loss": 43.601, "step": 2694 }, { "epoch": 64.16716417910447, "grad_norm": 29.5888729095459, "learning_rate": 9.49107142857143e-06, "loss": 44.8395, "step": 2695 }, { "epoch": 64.1910447761194, "grad_norm": 16.3875675201416, "learning_rate": 9.488095238095238e-06, "loss": 43.8201, "step": 2696 }, { "epoch": 64.21492537313434, "grad_norm": 28.326553344726562, "learning_rate": 9.485119047619047e-06, "loss": 45.1189, "step": 2697 }, { "epoch": 64.23880597014926, "grad_norm": 20.549386978149414, "learning_rate": 9.482142857142858e-06, "loss": 44.0127, "step": 2698 }, { "epoch": 64.26268656716418, "grad_norm": 25.79012680053711, "learning_rate": 9.479166666666667e-06, "loss": 43.0571, "step": 2699 }, { "epoch": 64.2865671641791, "grad_norm": 31.000024795532227, "learning_rate": 9.476190476190476e-06, "loss": 42.4615, "step": 2700 }, { "epoch": 64.31044776119403, "grad_norm": 19.49623680114746, "learning_rate": 9.473214285714285e-06, "loss": 45.6714, "step": 2701 }, { "epoch": 64.33432835820895, "grad_norm": 23.13125991821289, "learning_rate": 9.470238095238096e-06, "loss": 44.1373, "step": 2702 }, { "epoch": 64.35820895522389, "grad_norm": 32.59320068359375, "learning_rate": 9.467261904761905e-06, "loss": 42.5976, "step": 2703 }, { "epoch": 64.38208955223881, "grad_norm": 19.396995544433594, "learning_rate": 9.464285714285714e-06, "loss": 43.9782, "step": 2704 }, { "epoch": 64.40597014925373, "grad_norm": 28.275136947631836, "learning_rate": 9.461309523809525e-06, "loss": 44.3116, "step": 2705 }, { "epoch": 64.42985074626866, "grad_norm": 25.157663345336914, "learning_rate": 9.458333333333334e-06, "loss": 45.07, "step": 2706 }, { "epoch": 64.45373134328358, "grad_norm": 22.684513092041016, "learning_rate": 9.455357142857143e-06, "loss": 44.1489, "step": 2707 }, { "epoch": 64.4776119402985, "grad_norm": 19.41883659362793, "learning_rate": 9.452380952380952e-06, "loss": 43.5031, "step": 2708 }, { "epoch": 64.50149253731344, "grad_norm": 27.202308654785156, "learning_rate": 9.449404761904763e-06, "loss": 44.4811, "step": 2709 }, { "epoch": 64.52537313432836, "grad_norm": 17.535995483398438, "learning_rate": 9.446428571428572e-06, "loss": 43.7648, "step": 2710 }, { "epoch": 64.54925373134328, "grad_norm": 21.47702980041504, "learning_rate": 9.443452380952381e-06, "loss": 42.7421, "step": 2711 }, { "epoch": 64.57313432835821, "grad_norm": 20.23499870300293, "learning_rate": 9.440476190476192e-06, "loss": 43.8339, "step": 2712 }, { "epoch": 64.59701492537313, "grad_norm": 19.41843032836914, "learning_rate": 9.4375e-06, "loss": 44.182, "step": 2713 }, { "epoch": 64.62089552238805, "grad_norm": 22.892518997192383, "learning_rate": 9.43452380952381e-06, "loss": 42.7459, "step": 2714 }, { "epoch": 64.64477611940299, "grad_norm": 25.601083755493164, "learning_rate": 9.431547619047619e-06, "loss": 44.316, "step": 2715 }, { "epoch": 64.66865671641791, "grad_norm": 20.23451042175293, "learning_rate": 9.42857142857143e-06, "loss": 44.6613, "step": 2716 }, { "epoch": 64.69253731343284, "grad_norm": 16.326499938964844, "learning_rate": 9.425595238095239e-06, "loss": 43.9092, "step": 2717 }, { "epoch": 64.71641791044776, "grad_norm": 29.170324325561523, "learning_rate": 9.422619047619048e-06, "loss": 42.957, "step": 2718 }, { "epoch": 64.74029850746268, "grad_norm": 24.257295608520508, "learning_rate": 9.419642857142858e-06, "loss": 44.119, "step": 2719 }, { "epoch": 64.7641791044776, "grad_norm": 21.303083419799805, "learning_rate": 9.416666666666667e-06, "loss": 43.4882, "step": 2720 }, { "epoch": 64.78805970149254, "grad_norm": 20.77082633972168, "learning_rate": 9.413690476190476e-06, "loss": 43.9079, "step": 2721 }, { "epoch": 64.81194029850747, "grad_norm": 24.470279693603516, "learning_rate": 9.410714285714286e-06, "loss": 45.0313, "step": 2722 }, { "epoch": 64.83582089552239, "grad_norm": 22.445308685302734, "learning_rate": 9.407738095238096e-06, "loss": 43.0798, "step": 2723 }, { "epoch": 64.85970149253731, "grad_norm": 27.02490234375, "learning_rate": 9.404761904761905e-06, "loss": 43.1318, "step": 2724 }, { "epoch": 64.88358208955223, "grad_norm": 22.678592681884766, "learning_rate": 9.401785714285714e-06, "loss": 44.1473, "step": 2725 }, { "epoch": 64.90746268656716, "grad_norm": 26.62460708618164, "learning_rate": 9.398809523809525e-06, "loss": 43.2439, "step": 2726 }, { "epoch": 64.9313432835821, "grad_norm": 24.331209182739258, "learning_rate": 9.395833333333334e-06, "loss": 43.517, "step": 2727 }, { "epoch": 64.95522388059702, "grad_norm": 20.00579071044922, "learning_rate": 9.392857142857143e-06, "loss": 44.7679, "step": 2728 }, { "epoch": 64.97910447761194, "grad_norm": 28.423246383666992, "learning_rate": 9.389880952380954e-06, "loss": 43.5171, "step": 2729 }, { "epoch": 65.0, "grad_norm": 22.855792999267578, "learning_rate": 9.386904761904763e-06, "loss": 38.3847, "step": 2730 }, { "epoch": 65.02388059701492, "grad_norm": 22.053749084472656, "learning_rate": 9.383928571428572e-06, "loss": 44.0032, "step": 2731 }, { "epoch": 65.04776119402985, "grad_norm": 24.45530891418457, "learning_rate": 9.380952380952381e-06, "loss": 43.4024, "step": 2732 }, { "epoch": 65.07164179104478, "grad_norm": 24.508438110351562, "learning_rate": 9.377976190476192e-06, "loss": 43.4435, "step": 2733 }, { "epoch": 65.0955223880597, "grad_norm": 22.03391456604004, "learning_rate": 9.375000000000001e-06, "loss": 43.413, "step": 2734 }, { "epoch": 65.11940298507463, "grad_norm": 23.95793342590332, "learning_rate": 9.37202380952381e-06, "loss": 44.1908, "step": 2735 }, { "epoch": 65.14328358208955, "grad_norm": 23.00299072265625, "learning_rate": 9.36904761904762e-06, "loss": 43.9526, "step": 2736 }, { "epoch": 65.16716417910447, "grad_norm": 21.019451141357422, "learning_rate": 9.36607142857143e-06, "loss": 44.114, "step": 2737 }, { "epoch": 65.1910447761194, "grad_norm": 21.974138259887695, "learning_rate": 9.363095238095239e-06, "loss": 43.8516, "step": 2738 }, { "epoch": 65.21492537313434, "grad_norm": 17.860519409179688, "learning_rate": 9.360119047619048e-06, "loss": 44.2829, "step": 2739 }, { "epoch": 65.23880597014926, "grad_norm": 18.76349639892578, "learning_rate": 9.357142857142859e-06, "loss": 42.7555, "step": 2740 }, { "epoch": 65.26268656716418, "grad_norm": 22.4278621673584, "learning_rate": 9.354166666666668e-06, "loss": 43.807, "step": 2741 }, { "epoch": 65.2865671641791, "grad_norm": 17.867431640625, "learning_rate": 9.351190476190477e-06, "loss": 43.474, "step": 2742 }, { "epoch": 65.31044776119403, "grad_norm": 16.060117721557617, "learning_rate": 9.348214285714287e-06, "loss": 44.2362, "step": 2743 }, { "epoch": 65.33432835820895, "grad_norm": 27.756179809570312, "learning_rate": 9.345238095238096e-06, "loss": 43.9566, "step": 2744 }, { "epoch": 65.35820895522389, "grad_norm": 18.937381744384766, "learning_rate": 9.342261904761905e-06, "loss": 42.2936, "step": 2745 }, { "epoch": 65.38208955223881, "grad_norm": 23.91965675354004, "learning_rate": 9.339285714285715e-06, "loss": 43.1194, "step": 2746 }, { "epoch": 65.40597014925373, "grad_norm": 28.585317611694336, "learning_rate": 9.336309523809525e-06, "loss": 43.7419, "step": 2747 }, { "epoch": 65.42985074626866, "grad_norm": 18.788578033447266, "learning_rate": 9.333333333333334e-06, "loss": 43.7811, "step": 2748 }, { "epoch": 65.45373134328358, "grad_norm": 24.84532928466797, "learning_rate": 9.330357142857143e-06, "loss": 44.4898, "step": 2749 }, { "epoch": 65.4776119402985, "grad_norm": 20.880659103393555, "learning_rate": 9.327380952380954e-06, "loss": 44.5627, "step": 2750 }, { "epoch": 65.50149253731344, "grad_norm": 18.502254486083984, "learning_rate": 9.324404761904763e-06, "loss": 43.621, "step": 2751 }, { "epoch": 65.52537313432836, "grad_norm": 23.150991439819336, "learning_rate": 9.321428571428572e-06, "loss": 43.9683, "step": 2752 }, { "epoch": 65.54925373134328, "grad_norm": 20.03653907775879, "learning_rate": 9.318452380952381e-06, "loss": 42.4545, "step": 2753 }, { "epoch": 65.57313432835821, "grad_norm": 24.8642635345459, "learning_rate": 9.315476190476192e-06, "loss": 43.1368, "step": 2754 }, { "epoch": 65.59701492537313, "grad_norm": 19.812273025512695, "learning_rate": 9.312500000000001e-06, "loss": 44.5991, "step": 2755 }, { "epoch": 65.62089552238805, "grad_norm": 20.746320724487305, "learning_rate": 9.30952380952381e-06, "loss": 42.3573, "step": 2756 }, { "epoch": 65.64477611940299, "grad_norm": 28.684810638427734, "learning_rate": 9.30654761904762e-06, "loss": 43.798, "step": 2757 }, { "epoch": 65.66865671641791, "grad_norm": 17.441326141357422, "learning_rate": 9.30357142857143e-06, "loss": 44.0526, "step": 2758 }, { "epoch": 65.69253731343284, "grad_norm": 27.091472625732422, "learning_rate": 9.300595238095239e-06, "loss": 43.5748, "step": 2759 }, { "epoch": 65.71641791044776, "grad_norm": 23.270544052124023, "learning_rate": 9.297619047619048e-06, "loss": 44.0027, "step": 2760 }, { "epoch": 65.74029850746268, "grad_norm": 28.322011947631836, "learning_rate": 9.294642857142859e-06, "loss": 44.4845, "step": 2761 }, { "epoch": 65.7641791044776, "grad_norm": 22.097503662109375, "learning_rate": 9.291666666666668e-06, "loss": 42.6655, "step": 2762 }, { "epoch": 65.78805970149254, "grad_norm": 28.492340087890625, "learning_rate": 9.288690476190477e-06, "loss": 44.669, "step": 2763 }, { "epoch": 65.81194029850747, "grad_norm": 18.208921432495117, "learning_rate": 9.285714285714288e-06, "loss": 44.1797, "step": 2764 }, { "epoch": 65.83582089552239, "grad_norm": 32.15492248535156, "learning_rate": 9.282738095238097e-06, "loss": 43.1981, "step": 2765 }, { "epoch": 65.85970149253731, "grad_norm": 22.335176467895508, "learning_rate": 9.279761904761906e-06, "loss": 43.0925, "step": 2766 }, { "epoch": 65.88358208955223, "grad_norm": 26.412460327148438, "learning_rate": 9.276785714285715e-06, "loss": 43.3175, "step": 2767 }, { "epoch": 65.90746268656716, "grad_norm": 21.380569458007812, "learning_rate": 9.273809523809525e-06, "loss": 45.7111, "step": 2768 }, { "epoch": 65.9313432835821, "grad_norm": 26.568763732910156, "learning_rate": 9.270833333333334e-06, "loss": 44.4841, "step": 2769 }, { "epoch": 65.95522388059702, "grad_norm": 22.947973251342773, "learning_rate": 9.267857142857144e-06, "loss": 44.0597, "step": 2770 }, { "epoch": 65.97910447761194, "grad_norm": 28.732847213745117, "learning_rate": 9.264880952380954e-06, "loss": 43.9232, "step": 2771 }, { "epoch": 66.0, "grad_norm": 19.51029396057129, "learning_rate": 9.261904761904763e-06, "loss": 38.3696, "step": 2772 }, { "epoch": 66.02388059701492, "grad_norm": 26.772396087646484, "learning_rate": 9.258928571428572e-06, "loss": 44.29, "step": 2773 }, { "epoch": 66.04776119402985, "grad_norm": 28.08762550354004, "learning_rate": 9.255952380952381e-06, "loss": 43.5123, "step": 2774 }, { "epoch": 66.07164179104478, "grad_norm": 23.839458465576172, "learning_rate": 9.252976190476192e-06, "loss": 42.9248, "step": 2775 }, { "epoch": 66.0955223880597, "grad_norm": 34.98361587524414, "learning_rate": 9.250000000000001e-06, "loss": 44.5183, "step": 2776 }, { "epoch": 66.11940298507463, "grad_norm": 18.406028747558594, "learning_rate": 9.24702380952381e-06, "loss": 43.6267, "step": 2777 }, { "epoch": 66.14328358208955, "grad_norm": 24.17736053466797, "learning_rate": 9.244047619047621e-06, "loss": 43.9814, "step": 2778 }, { "epoch": 66.16716417910447, "grad_norm": 25.904033660888672, "learning_rate": 9.24107142857143e-06, "loss": 44.2089, "step": 2779 }, { "epoch": 66.1910447761194, "grad_norm": 18.518312454223633, "learning_rate": 9.238095238095239e-06, "loss": 43.8829, "step": 2780 }, { "epoch": 66.21492537313434, "grad_norm": 13.93060302734375, "learning_rate": 9.235119047619048e-06, "loss": 43.0088, "step": 2781 }, { "epoch": 66.23880597014926, "grad_norm": 18.91266632080078, "learning_rate": 9.232142857142859e-06, "loss": 43.9835, "step": 2782 }, { "epoch": 66.26268656716418, "grad_norm": 22.63692283630371, "learning_rate": 9.229166666666668e-06, "loss": 43.8378, "step": 2783 }, { "epoch": 66.2865671641791, "grad_norm": 19.935054779052734, "learning_rate": 9.226190476190477e-06, "loss": 43.5139, "step": 2784 }, { "epoch": 66.31044776119403, "grad_norm": 20.09627342224121, "learning_rate": 9.223214285714288e-06, "loss": 42.9882, "step": 2785 }, { "epoch": 66.33432835820895, "grad_norm": 16.47371482849121, "learning_rate": 9.220238095238097e-06, "loss": 44.0665, "step": 2786 }, { "epoch": 66.35820895522389, "grad_norm": 25.363866806030273, "learning_rate": 9.217261904761904e-06, "loss": 44.696, "step": 2787 }, { "epoch": 66.38208955223881, "grad_norm": 19.95291519165039, "learning_rate": 9.214285714285715e-06, "loss": 44.1116, "step": 2788 }, { "epoch": 66.40597014925373, "grad_norm": NaN, "learning_rate": 9.211309523809524e-06, "loss": 76.4785, "step": 2789 }, { "epoch": 66.42985074626866, "grad_norm": 19.490074157714844, "learning_rate": 9.211309523809524e-06, "loss": 44.0432, "step": 2790 }, { "epoch": 66.45373134328358, "grad_norm": 17.4990234375, "learning_rate": 9.208333333333333e-06, "loss": 43.2972, "step": 2791 }, { "epoch": 66.4776119402985, "grad_norm": 18.9461727142334, "learning_rate": 9.205357142857144e-06, "loss": 43.6698, "step": 2792 }, { "epoch": 66.50149253731344, "grad_norm": 27.035369873046875, "learning_rate": 9.202380952380953e-06, "loss": 43.0748, "step": 2793 }, { "epoch": 66.52537313432836, "grad_norm": 18.747451782226562, "learning_rate": 9.199404761904762e-06, "loss": 43.4684, "step": 2794 }, { "epoch": 66.54925373134328, "grad_norm": 22.31947135925293, "learning_rate": 9.196428571428571e-06, "loss": 43.224, "step": 2795 }, { "epoch": 66.57313432835821, "grad_norm": 20.444355010986328, "learning_rate": 9.193452380952382e-06, "loss": 44.7154, "step": 2796 }, { "epoch": 66.59701492537313, "grad_norm": 20.574586868286133, "learning_rate": 9.19047619047619e-06, "loss": 42.8251, "step": 2797 }, { "epoch": 66.62089552238805, "grad_norm": 21.91870880126953, "learning_rate": 9.1875e-06, "loss": 42.1616, "step": 2798 }, { "epoch": 66.64477611940299, "grad_norm": 20.03777503967285, "learning_rate": 9.18452380952381e-06, "loss": 43.9713, "step": 2799 }, { "epoch": 66.66865671641791, "grad_norm": 25.128442764282227, "learning_rate": 9.18154761904762e-06, "loss": 43.1631, "step": 2800 }, { "epoch": 66.69253731343284, "grad_norm": 21.742931365966797, "learning_rate": 9.178571428571429e-06, "loss": 43.8442, "step": 2801 }, { "epoch": 66.71641791044776, "grad_norm": 25.366992950439453, "learning_rate": 9.17559523809524e-06, "loss": 42.6068, "step": 2802 }, { "epoch": 66.74029850746268, "grad_norm": 22.109886169433594, "learning_rate": 9.172619047619048e-06, "loss": 43.0879, "step": 2803 }, { "epoch": 66.7641791044776, "grad_norm": 26.36429786682129, "learning_rate": 9.169642857142857e-06, "loss": 43.9465, "step": 2804 }, { "epoch": 66.78805970149254, "grad_norm": 20.30027198791504, "learning_rate": 9.166666666666666e-06, "loss": 44.1096, "step": 2805 }, { "epoch": 66.81194029850747, "grad_norm": 25.123811721801758, "learning_rate": 9.163690476190477e-06, "loss": 44.2981, "step": 2806 }, { "epoch": 66.83582089552239, "grad_norm": 23.127016067504883, "learning_rate": 9.160714285714286e-06, "loss": 42.5751, "step": 2807 }, { "epoch": 66.85970149253731, "grad_norm": NaN, "learning_rate": 9.157738095238095e-06, "loss": 66.1901, "step": 2808 }, { "epoch": 66.88358208955223, "grad_norm": 26.10099220275879, "learning_rate": 9.157738095238095e-06, "loss": 44.763, "step": 2809 }, { "epoch": 66.90746268656716, "grad_norm": 23.104337692260742, "learning_rate": 9.154761904761906e-06, "loss": 43.0964, "step": 2810 }, { "epoch": 66.9313432835821, "grad_norm": 25.94508934020996, "learning_rate": 9.151785714285715e-06, "loss": 44.2004, "step": 2811 }, { "epoch": 66.95522388059702, "grad_norm": 19.599328994750977, "learning_rate": 9.148809523809524e-06, "loss": 43.9027, "step": 2812 }, { "epoch": 66.97910447761194, "grad_norm": NaN, "learning_rate": 9.145833333333333e-06, "loss": 53.695, "step": 2813 }, { "epoch": 67.0, "grad_norm": 26.630434036254883, "learning_rate": 9.145833333333333e-06, "loss": 39.2172, "step": 2814 }, { "epoch": 67.02388059701492, "grad_norm": 20.954557418823242, "learning_rate": 9.142857142857144e-06, "loss": 45.022, "step": 2815 }, { "epoch": 67.04776119402985, "grad_norm": 34.23554611206055, "learning_rate": 9.139880952380953e-06, "loss": 44.5962, "step": 2816 }, { "epoch": 67.07164179104478, "grad_norm": 23.212488174438477, "learning_rate": 9.136904761904762e-06, "loss": 43.3898, "step": 2817 }, { "epoch": 67.0955223880597, "grad_norm": 28.811594009399414, "learning_rate": 9.133928571428573e-06, "loss": 43.0525, "step": 2818 }, { "epoch": 67.11940298507463, "grad_norm": 25.612319946289062, "learning_rate": 9.130952380952382e-06, "loss": 45.0229, "step": 2819 }, { "epoch": 67.14328358208955, "grad_norm": 19.928409576416016, "learning_rate": 9.12797619047619e-06, "loss": 42.2313, "step": 2820 }, { "epoch": 67.16716417910447, "grad_norm": 21.425382614135742, "learning_rate": 9.125e-06, "loss": 43.8085, "step": 2821 }, { "epoch": 67.1910447761194, "grad_norm": 24.726892471313477, "learning_rate": 9.12202380952381e-06, "loss": 42.5952, "step": 2822 }, { "epoch": 67.21492537313434, "grad_norm": 21.010210037231445, "learning_rate": 9.11904761904762e-06, "loss": 44.5508, "step": 2823 }, { "epoch": 67.23880597014926, "grad_norm": 17.506437301635742, "learning_rate": 9.116071428571429e-06, "loss": 42.7668, "step": 2824 }, { "epoch": 67.26268656716418, "grad_norm": 20.494918823242188, "learning_rate": 9.11309523809524e-06, "loss": 42.8061, "step": 2825 }, { "epoch": 67.2865671641791, "grad_norm": 20.985994338989258, "learning_rate": 9.110119047619049e-06, "loss": 44.8666, "step": 2826 }, { "epoch": 67.31044776119403, "grad_norm": 22.91364097595215, "learning_rate": 9.107142857142858e-06, "loss": 44.1208, "step": 2827 }, { "epoch": 67.33432835820895, "grad_norm": 19.81299591064453, "learning_rate": 9.104166666666667e-06, "loss": 43.939, "step": 2828 }, { "epoch": 67.35820895522389, "grad_norm": 23.234989166259766, "learning_rate": 9.101190476190477e-06, "loss": 42.0411, "step": 2829 }, { "epoch": 67.38208955223881, "grad_norm": 22.17540168762207, "learning_rate": 9.098214285714286e-06, "loss": 43.5693, "step": 2830 }, { "epoch": 67.40597014925373, "grad_norm": 21.292728424072266, "learning_rate": 9.095238095238095e-06, "loss": 44.0742, "step": 2831 }, { "epoch": 67.42985074626866, "grad_norm": 28.952625274658203, "learning_rate": 9.092261904761906e-06, "loss": 42.8393, "step": 2832 }, { "epoch": 67.45373134328358, "grad_norm": 19.387126922607422, "learning_rate": 9.089285714285715e-06, "loss": 41.7117, "step": 2833 }, { "epoch": 67.4776119402985, "grad_norm": 23.430946350097656, "learning_rate": 9.086309523809524e-06, "loss": 42.7006, "step": 2834 }, { "epoch": 67.50149253731344, "grad_norm": 28.108659744262695, "learning_rate": 9.083333333333333e-06, "loss": 45.417, "step": 2835 }, { "epoch": 67.52537313432836, "grad_norm": 22.115737915039062, "learning_rate": 9.080357142857144e-06, "loss": 44.4444, "step": 2836 }, { "epoch": 67.54925373134328, "grad_norm": 29.82137107849121, "learning_rate": 9.077380952380953e-06, "loss": 43.4888, "step": 2837 }, { "epoch": 67.57313432835821, "grad_norm": 25.010299682617188, "learning_rate": 9.074404761904762e-06, "loss": 43.9609, "step": 2838 }, { "epoch": 67.59701492537313, "grad_norm": 21.027952194213867, "learning_rate": 9.071428571428573e-06, "loss": 44.2489, "step": 2839 }, { "epoch": 67.62089552238805, "grad_norm": 27.009239196777344, "learning_rate": 9.068452380952382e-06, "loss": 43.6564, "step": 2840 }, { "epoch": 67.64477611940299, "grad_norm": 19.743545532226562, "learning_rate": 9.065476190476191e-06, "loss": 43.9997, "step": 2841 }, { "epoch": 67.66865671641791, "grad_norm": 28.90030288696289, "learning_rate": 9.0625e-06, "loss": 42.6926, "step": 2842 }, { "epoch": 67.69253731343284, "grad_norm": 25.418079376220703, "learning_rate": 9.05952380952381e-06, "loss": 43.2036, "step": 2843 }, { "epoch": 67.71641791044776, "grad_norm": 18.400348663330078, "learning_rate": 9.05654761904762e-06, "loss": 44.4565, "step": 2844 }, { "epoch": 67.74029850746268, "grad_norm": 26.924072265625, "learning_rate": 9.053571428571429e-06, "loss": 44.4893, "step": 2845 }, { "epoch": 67.7641791044776, "grad_norm": 25.352108001708984, "learning_rate": 9.05059523809524e-06, "loss": 43.0946, "step": 2846 }, { "epoch": 67.78805970149254, "grad_norm": 19.23507308959961, "learning_rate": 9.047619047619049e-06, "loss": 43.0507, "step": 2847 }, { "epoch": 67.81194029850747, "grad_norm": 28.143768310546875, "learning_rate": 9.044642857142858e-06, "loss": 43.2408, "step": 2848 }, { "epoch": 67.83582089552239, "grad_norm": 20.298084259033203, "learning_rate": 9.041666666666667e-06, "loss": 44.288, "step": 2849 }, { "epoch": 67.85970149253731, "grad_norm": 21.65508460998535, "learning_rate": 9.038690476190478e-06, "loss": 43.4648, "step": 2850 }, { "epoch": 67.88358208955223, "grad_norm": 23.92845916748047, "learning_rate": 9.035714285714287e-06, "loss": 42.1811, "step": 2851 }, { "epoch": 67.90746268656716, "grad_norm": 20.545076370239258, "learning_rate": 9.032738095238096e-06, "loss": 42.9022, "step": 2852 }, { "epoch": 67.9313432835821, "grad_norm": 32.36678695678711, "learning_rate": 9.029761904761906e-06, "loss": 43.4491, "step": 2853 }, { "epoch": 67.95522388059702, "grad_norm": 23.14188003540039, "learning_rate": 9.026785714285715e-06, "loss": 44.4911, "step": 2854 }, { "epoch": 67.97910447761194, "grad_norm": 31.488239288330078, "learning_rate": 9.023809523809524e-06, "loss": 44.5784, "step": 2855 }, { "epoch": 68.0, "grad_norm": 24.96849250793457, "learning_rate": 9.020833333333334e-06, "loss": 38.8154, "step": 2856 }, { "epoch": 68.02388059701492, "grad_norm": 26.00245475769043, "learning_rate": 9.017857142857144e-06, "loss": 43.6339, "step": 2857 }, { "epoch": 68.04776119402985, "grad_norm": 33.977596282958984, "learning_rate": 9.014880952380953e-06, "loss": 43.7634, "step": 2858 }, { "epoch": 68.07164179104478, "grad_norm": 21.762340545654297, "learning_rate": 9.011904761904762e-06, "loss": 43.8865, "step": 2859 }, { "epoch": 68.0955223880597, "grad_norm": 38.268455505371094, "learning_rate": 9.008928571428573e-06, "loss": 43.8947, "step": 2860 }, { "epoch": 68.11940298507463, "grad_norm": 26.789215087890625, "learning_rate": 9.005952380952382e-06, "loss": 42.0072, "step": 2861 }, { "epoch": 68.14328358208955, "grad_norm": 44.15632629394531, "learning_rate": 9.002976190476191e-06, "loss": 43.1045, "step": 2862 }, { "epoch": 68.16716417910447, "grad_norm": 36.71260070800781, "learning_rate": 9e-06, "loss": 43.7232, "step": 2863 }, { "epoch": 68.1910447761194, "grad_norm": 38.94734191894531, "learning_rate": 8.997023809523811e-06, "loss": 43.4934, "step": 2864 }, { "epoch": 68.21492537313434, "grad_norm": 38.295501708984375, "learning_rate": 8.99404761904762e-06, "loss": 43.3372, "step": 2865 }, { "epoch": 68.23880597014926, "grad_norm": 30.496740341186523, "learning_rate": 8.991071428571429e-06, "loss": 43.4905, "step": 2866 }, { "epoch": 68.26268656716418, "grad_norm": 30.001113891601562, "learning_rate": 8.98809523809524e-06, "loss": 44.0486, "step": 2867 }, { "epoch": 68.2865671641791, "grad_norm": 39.6522216796875, "learning_rate": 8.985119047619049e-06, "loss": 43.4828, "step": 2868 }, { "epoch": 68.31044776119403, "grad_norm": 31.28143310546875, "learning_rate": 8.982142857142858e-06, "loss": 43.7613, "step": 2869 }, { "epoch": 68.33432835820895, "grad_norm": 31.950016021728516, "learning_rate": 8.979166666666667e-06, "loss": 43.4209, "step": 2870 }, { "epoch": 68.35820895522389, "grad_norm": 30.61543083190918, "learning_rate": 8.976190476190478e-06, "loss": 43.2437, "step": 2871 }, { "epoch": 68.38208955223881, "grad_norm": 39.3588752746582, "learning_rate": 8.973214285714287e-06, "loss": 43.1893, "step": 2872 }, { "epoch": 68.40597014925373, "grad_norm": 29.70042610168457, "learning_rate": 8.970238095238096e-06, "loss": 42.1193, "step": 2873 }, { "epoch": 68.42985074626866, "grad_norm": 40.6136474609375, "learning_rate": 8.967261904761907e-06, "loss": 41.7532, "step": 2874 }, { "epoch": 68.45373134328358, "grad_norm": 36.44509506225586, "learning_rate": 8.964285714285716e-06, "loss": 44.5191, "step": 2875 }, { "epoch": 68.4776119402985, "grad_norm": NaN, "learning_rate": 8.961309523809525e-06, "loss": 70.4286, "step": 2876 }, { "epoch": 68.50149253731344, "grad_norm": 27.74057960510254, "learning_rate": 8.961309523809525e-06, "loss": 43.254, "step": 2877 }, { "epoch": 68.52537313432836, "grad_norm": 29.346860885620117, "learning_rate": 8.958333333333334e-06, "loss": 43.5863, "step": 2878 }, { "epoch": 68.54925373134328, "grad_norm": 36.642398834228516, "learning_rate": 8.955357142857144e-06, "loss": 43.3733, "step": 2879 }, { "epoch": 68.57313432835821, "grad_norm": 33.670162200927734, "learning_rate": 8.952380952380953e-06, "loss": 43.7232, "step": 2880 }, { "epoch": 68.59701492537313, "grad_norm": 33.738712310791016, "learning_rate": 8.949404761904763e-06, "loss": 42.704, "step": 2881 }, { "epoch": 68.62089552238805, "grad_norm": 31.452713012695312, "learning_rate": 8.946428571428573e-06, "loss": 43.8197, "step": 2882 }, { "epoch": 68.64477611940299, "grad_norm": 35.86618423461914, "learning_rate": 8.943452380952382e-06, "loss": 44.9871, "step": 2883 }, { "epoch": 68.66865671641791, "grad_norm": 30.94584083557129, "learning_rate": 8.940476190476191e-06, "loss": 43.8781, "step": 2884 }, { "epoch": 68.69253731343284, "grad_norm": 30.81380271911621, "learning_rate": 8.9375e-06, "loss": 44.2209, "step": 2885 }, { "epoch": 68.71641791044776, "grad_norm": 28.51966094970703, "learning_rate": 8.934523809523811e-06, "loss": 44.16, "step": 2886 }, { "epoch": 68.74029850746268, "grad_norm": NaN, "learning_rate": 8.93154761904762e-06, "loss": 77.0854, "step": 2887 }, { "epoch": 68.7641791044776, "grad_norm": 34.56461715698242, "learning_rate": 8.93154761904762e-06, "loss": 42.7519, "step": 2888 }, { "epoch": 68.78805970149254, "grad_norm": 27.781518936157227, "learning_rate": 8.92857142857143e-06, "loss": 44.0645, "step": 2889 }, { "epoch": 68.81194029850747, "grad_norm": 33.2479133605957, "learning_rate": 8.92559523809524e-06, "loss": 44.109, "step": 2890 }, { "epoch": 68.83582089552239, "grad_norm": 30.329626083374023, "learning_rate": 8.922619047619049e-06, "loss": 42.8678, "step": 2891 }, { "epoch": 68.85970149253731, "grad_norm": 32.120269775390625, "learning_rate": 8.919642857142858e-06, "loss": 44.2325, "step": 2892 }, { "epoch": 68.88358208955223, "grad_norm": 27.283164978027344, "learning_rate": 8.916666666666667e-06, "loss": 43.7788, "step": 2893 }, { "epoch": 68.90746268656716, "grad_norm": 31.86570930480957, "learning_rate": 8.913690476190478e-06, "loss": 44.3469, "step": 2894 }, { "epoch": 68.9313432835821, "grad_norm": 22.55097007751465, "learning_rate": 8.910714285714287e-06, "loss": 43.85, "step": 2895 }, { "epoch": 68.95522388059702, "grad_norm": 34.648773193359375, "learning_rate": 8.907738095238096e-06, "loss": 44.526, "step": 2896 }, { "epoch": 68.97910447761194, "grad_norm": 26.3565731048584, "learning_rate": 8.904761904761905e-06, "loss": 44.2, "step": 2897 }, { "epoch": 69.0, "grad_norm": 34.48598098754883, "learning_rate": 8.901785714285714e-06, "loss": 37.5511, "step": 2898 }, { "epoch": 69.02388059701492, "grad_norm": 36.6775016784668, "learning_rate": 8.898809523809525e-06, "loss": 42.6231, "step": 2899 }, { "epoch": 69.04776119402985, "grad_norm": 33.529296875, "learning_rate": 8.895833333333334e-06, "loss": 43.2504, "step": 2900 }, { "epoch": 69.07164179104478, "grad_norm": 31.762542724609375, "learning_rate": 8.892857142857143e-06, "loss": 43.752, "step": 2901 }, { "epoch": 69.0955223880597, "grad_norm": 28.147245407104492, "learning_rate": 8.889880952380952e-06, "loss": 43.8319, "step": 2902 }, { "epoch": 69.11940298507463, "grad_norm": 27.896669387817383, "learning_rate": 8.886904761904763e-06, "loss": 42.1947, "step": 2903 }, { "epoch": 69.14328358208955, "grad_norm": 28.625850677490234, "learning_rate": 8.883928571428572e-06, "loss": 43.3085, "step": 2904 }, { "epoch": 69.16716417910447, "grad_norm": 28.545974731445312, "learning_rate": 8.88095238095238e-06, "loss": 42.9532, "step": 2905 }, { "epoch": 69.1910447761194, "grad_norm": 34.869781494140625, "learning_rate": 8.877976190476192e-06, "loss": 44.2308, "step": 2906 }, { "epoch": 69.21492537313434, "grad_norm": 30.4566650390625, "learning_rate": 8.875e-06, "loss": 43.3219, "step": 2907 }, { "epoch": 69.23880597014926, "grad_norm": 29.15296745300293, "learning_rate": 8.87202380952381e-06, "loss": 44.0736, "step": 2908 }, { "epoch": 69.26268656716418, "grad_norm": 29.97230339050293, "learning_rate": 8.869047619047619e-06, "loss": 43.3511, "step": 2909 }, { "epoch": 69.2865671641791, "grad_norm": 30.087413787841797, "learning_rate": 8.86607142857143e-06, "loss": 43.8738, "step": 2910 }, { "epoch": 69.31044776119403, "grad_norm": 23.660356521606445, "learning_rate": 8.863095238095238e-06, "loss": 43.5448, "step": 2911 }, { "epoch": 69.33432835820895, "grad_norm": 28.7690372467041, "learning_rate": 8.860119047619048e-06, "loss": 42.9617, "step": 2912 }, { "epoch": 69.35820895522389, "grad_norm": 25.60896110534668, "learning_rate": 8.857142857142858e-06, "loss": 45.3785, "step": 2913 }, { "epoch": 69.38208955223881, "grad_norm": 31.27063751220703, "learning_rate": 8.854166666666667e-06, "loss": 43.376, "step": 2914 }, { "epoch": 69.40597014925373, "grad_norm": 18.817829132080078, "learning_rate": 8.851190476190476e-06, "loss": 44.0999, "step": 2915 }, { "epoch": 69.42985074626866, "grad_norm": 36.125919342041016, "learning_rate": 8.848214285714285e-06, "loss": 43.8083, "step": 2916 }, { "epoch": 69.45373134328358, "grad_norm": 25.558866500854492, "learning_rate": 8.845238095238096e-06, "loss": 45.1369, "step": 2917 }, { "epoch": 69.4776119402985, "grad_norm": 38.35983657836914, "learning_rate": 8.842261904761905e-06, "loss": 43.587, "step": 2918 }, { "epoch": 69.50149253731344, "grad_norm": 30.0064754486084, "learning_rate": 8.839285714285714e-06, "loss": 43.1645, "step": 2919 }, { "epoch": 69.52537313432836, "grad_norm": 31.177242279052734, "learning_rate": 8.836309523809525e-06, "loss": 42.2305, "step": 2920 }, { "epoch": 69.54925373134328, "grad_norm": 29.04176139831543, "learning_rate": 8.833333333333334e-06, "loss": 45.1403, "step": 2921 }, { "epoch": 69.57313432835821, "grad_norm": 35.95783233642578, "learning_rate": 8.830357142857143e-06, "loss": 43.4081, "step": 2922 }, { "epoch": 69.59701492537313, "grad_norm": 27.838382720947266, "learning_rate": 8.827380952380952e-06, "loss": 44.7195, "step": 2923 }, { "epoch": 69.62089552238805, "grad_norm": 30.860624313354492, "learning_rate": 8.824404761904763e-06, "loss": 42.7175, "step": 2924 }, { "epoch": 69.64477611940299, "grad_norm": 21.701316833496094, "learning_rate": 8.821428571428572e-06, "loss": 43.0401, "step": 2925 }, { "epoch": 69.66865671641791, "grad_norm": 27.270732879638672, "learning_rate": 8.818452380952381e-06, "loss": 43.686, "step": 2926 }, { "epoch": 69.69253731343284, "grad_norm": 25.814538955688477, "learning_rate": 8.815476190476192e-06, "loss": 44.3424, "step": 2927 }, { "epoch": 69.71641791044776, "grad_norm": 26.155197143554688, "learning_rate": 8.8125e-06, "loss": 43.6455, "step": 2928 }, { "epoch": 69.74029850746268, "grad_norm": 20.438846588134766, "learning_rate": 8.80952380952381e-06, "loss": 44.3784, "step": 2929 }, { "epoch": 69.7641791044776, "grad_norm": 26.45317268371582, "learning_rate": 8.806547619047619e-06, "loss": 42.6501, "step": 2930 }, { "epoch": 69.78805970149254, "grad_norm": 22.06026840209961, "learning_rate": 8.80357142857143e-06, "loss": 42.4144, "step": 2931 }, { "epoch": 69.81194029850747, "grad_norm": 24.45191764831543, "learning_rate": 8.800595238095239e-06, "loss": 43.7415, "step": 2932 }, { "epoch": 69.83582089552239, "grad_norm": 26.77782440185547, "learning_rate": 8.797619047619048e-06, "loss": 43.2565, "step": 2933 }, { "epoch": 69.85970149253731, "grad_norm": 22.350242614746094, "learning_rate": 8.794642857142858e-06, "loss": 43.5869, "step": 2934 }, { "epoch": 69.88358208955223, "grad_norm": 23.063016891479492, "learning_rate": 8.791666666666667e-06, "loss": 43.3821, "step": 2935 }, { "epoch": 69.90746268656716, "grad_norm": 18.864139556884766, "learning_rate": 8.788690476190477e-06, "loss": 42.8421, "step": 2936 }, { "epoch": 69.9313432835821, "grad_norm": 19.763843536376953, "learning_rate": 8.785714285714286e-06, "loss": 43.3783, "step": 2937 }, { "epoch": 69.95522388059702, "grad_norm": 19.347801208496094, "learning_rate": 8.782738095238096e-06, "loss": 42.8249, "step": 2938 }, { "epoch": 69.97910447761194, "grad_norm": 16.3013858795166, "learning_rate": 8.779761904761905e-06, "loss": 42.9306, "step": 2939 }, { "epoch": 70.0, "grad_norm": 15.056166648864746, "learning_rate": 8.776785714285714e-06, "loss": 38.2541, "step": 2940 }, { "epoch": 70.02388059701492, "grad_norm": 14.691337585449219, "learning_rate": 8.773809523809525e-06, "loss": 42.8378, "step": 2941 }, { "epoch": 70.04776119402985, "grad_norm": 24.51978874206543, "learning_rate": 8.770833333333334e-06, "loss": 43.2568, "step": 2942 }, { "epoch": 70.07164179104478, "grad_norm": 16.552833557128906, "learning_rate": 8.767857142857143e-06, "loss": 44.9251, "step": 2943 }, { "epoch": 70.0955223880597, "grad_norm": 26.28189468383789, "learning_rate": 8.764880952380952e-06, "loss": 43.8141, "step": 2944 }, { "epoch": 70.11940298507463, "grad_norm": 21.29142951965332, "learning_rate": 8.761904761904763e-06, "loss": 43.8724, "step": 2945 }, { "epoch": 70.14328358208955, "grad_norm": 23.159542083740234, "learning_rate": 8.758928571428572e-06, "loss": 44.4016, "step": 2946 }, { "epoch": 70.16716417910447, "grad_norm": 21.08184051513672, "learning_rate": 8.755952380952381e-06, "loss": 44.1115, "step": 2947 }, { "epoch": 70.1910447761194, "grad_norm": 18.838504791259766, "learning_rate": 8.752976190476192e-06, "loss": 43.2276, "step": 2948 }, { "epoch": 70.21492537313434, "grad_norm": 21.613079071044922, "learning_rate": 8.750000000000001e-06, "loss": 42.679, "step": 2949 }, { "epoch": 70.23880597014926, "grad_norm": 21.29805564880371, "learning_rate": 8.74702380952381e-06, "loss": 43.1822, "step": 2950 }, { "epoch": 70.26268656716418, "grad_norm": NaN, "learning_rate": 8.744047619047619e-06, "loss": 43.3802, "step": 2951 }, { "epoch": 70.2865671641791, "grad_norm": 22.393659591674805, "learning_rate": 8.744047619047619e-06, "loss": 43.3569, "step": 2952 }, { "epoch": 70.31044776119403, "grad_norm": 17.94029998779297, "learning_rate": 8.74107142857143e-06, "loss": 42.4398, "step": 2953 }, { "epoch": 70.33432835820895, "grad_norm": 17.575550079345703, "learning_rate": 8.738095238095239e-06, "loss": 42.3488, "step": 2954 }, { "epoch": 70.35820895522389, "grad_norm": 18.268203735351562, "learning_rate": 8.735119047619048e-06, "loss": 42.6199, "step": 2955 }, { "epoch": 70.38208955223881, "grad_norm": 25.415603637695312, "learning_rate": 8.732142857142859e-06, "loss": 43.803, "step": 2956 }, { "epoch": 70.40597014925373, "grad_norm": 23.37176513671875, "learning_rate": 8.729166666666668e-06, "loss": 44.5072, "step": 2957 }, { "epoch": 70.42985074626866, "grad_norm": 24.91670036315918, "learning_rate": 8.726190476190477e-06, "loss": 44.1411, "step": 2958 }, { "epoch": 70.45373134328358, "grad_norm": 20.50780487060547, "learning_rate": 8.723214285714286e-06, "loss": 45.4114, "step": 2959 }, { "epoch": 70.4776119402985, "grad_norm": 21.885364532470703, "learning_rate": 8.720238095238096e-06, "loss": 43.1786, "step": 2960 }, { "epoch": 70.50149253731344, "grad_norm": 18.620540618896484, "learning_rate": 8.717261904761906e-06, "loss": 42.5272, "step": 2961 }, { "epoch": 70.52537313432836, "grad_norm": 27.28016471862793, "learning_rate": 8.714285714285715e-06, "loss": 44.0531, "step": 2962 }, { "epoch": 70.54925373134328, "grad_norm": 22.124799728393555, "learning_rate": 8.711309523809525e-06, "loss": 43.445, "step": 2963 }, { "epoch": 70.57313432835821, "grad_norm": 25.905492782592773, "learning_rate": 8.708333333333334e-06, "loss": 43.619, "step": 2964 }, { "epoch": 70.59701492537313, "grad_norm": 23.890172958374023, "learning_rate": 8.705357142857143e-06, "loss": 43.1365, "step": 2965 }, { "epoch": 70.62089552238805, "grad_norm": 20.158838272094727, "learning_rate": 8.702380952380952e-06, "loss": 41.9394, "step": 2966 }, { "epoch": 70.64477611940299, "grad_norm": 24.878849029541016, "learning_rate": 8.699404761904763e-06, "loss": 43.7568, "step": 2967 }, { "epoch": 70.66865671641791, "grad_norm": 20.08368492126465, "learning_rate": 8.696428571428572e-06, "loss": 43.7444, "step": 2968 }, { "epoch": 70.69253731343284, "grad_norm": 24.59374237060547, "learning_rate": 8.693452380952381e-06, "loss": 43.6659, "step": 2969 }, { "epoch": 70.71641791044776, "grad_norm": 21.96346664428711, "learning_rate": 8.690476190476192e-06, "loss": 42.8718, "step": 2970 }, { "epoch": 70.74029850746268, "grad_norm": 20.61510467529297, "learning_rate": 8.687500000000001e-06, "loss": 43.8264, "step": 2971 }, { "epoch": 70.7641791044776, "grad_norm": 25.367786407470703, "learning_rate": 8.68452380952381e-06, "loss": 42.2802, "step": 2972 }, { "epoch": 70.78805970149254, "grad_norm": 21.911298751831055, "learning_rate": 8.68154761904762e-06, "loss": 44.4695, "step": 2973 }, { "epoch": 70.81194029850747, "grad_norm": 26.7462100982666, "learning_rate": 8.67857142857143e-06, "loss": 43.4564, "step": 2974 }, { "epoch": 70.83582089552239, "grad_norm": 23.370485305786133, "learning_rate": 8.675595238095239e-06, "loss": 45.0502, "step": 2975 }, { "epoch": 70.85970149253731, "grad_norm": 26.052675247192383, "learning_rate": 8.672619047619048e-06, "loss": 42.6782, "step": 2976 }, { "epoch": 70.88358208955223, "grad_norm": 21.637617111206055, "learning_rate": 8.669642857142859e-06, "loss": 44.426, "step": 2977 }, { "epoch": 70.90746268656716, "grad_norm": 26.575313568115234, "learning_rate": 8.666666666666668e-06, "loss": 43.6968, "step": 2978 }, { "epoch": 70.9313432835821, "grad_norm": 23.814599990844727, "learning_rate": 8.663690476190477e-06, "loss": 43.3269, "step": 2979 }, { "epoch": 70.95522388059702, "grad_norm": 21.367717742919922, "learning_rate": 8.660714285714286e-06, "loss": 43.1399, "step": 2980 }, { "epoch": 70.97910447761194, "grad_norm": 19.98285484313965, "learning_rate": 8.657738095238097e-06, "loss": 42.9342, "step": 2981 }, { "epoch": 71.0, "grad_norm": 22.52842140197754, "learning_rate": 8.654761904761906e-06, "loss": 36.6415, "step": 2982 }, { "epoch": 71.02388059701492, "grad_norm": 22.04327392578125, "learning_rate": 8.651785714285715e-06, "loss": 43.0825, "step": 2983 }, { "epoch": 71.04776119402985, "grad_norm": 21.24346351623535, "learning_rate": 8.648809523809526e-06, "loss": 43.511, "step": 2984 }, { "epoch": 71.07164179104478, "grad_norm": 23.4123592376709, "learning_rate": 8.645833333333335e-06, "loss": 43.1464, "step": 2985 }, { "epoch": 71.0955223880597, "grad_norm": 23.918460845947266, "learning_rate": 8.642857142857144e-06, "loss": 44.1223, "step": 2986 }, { "epoch": 71.11940298507463, "grad_norm": 16.164955139160156, "learning_rate": 8.639880952380953e-06, "loss": 43.2759, "step": 2987 }, { "epoch": 71.14328358208955, "grad_norm": 22.15060043334961, "learning_rate": 8.636904761904763e-06, "loss": 43.1227, "step": 2988 }, { "epoch": 71.16716417910447, "grad_norm": 19.598203659057617, "learning_rate": 8.633928571428572e-06, "loss": 41.9802, "step": 2989 }, { "epoch": 71.1910447761194, "grad_norm": 16.25682830810547, "learning_rate": 8.630952380952381e-06, "loss": 42.1285, "step": 2990 }, { "epoch": 71.21492537313434, "grad_norm": 20.54530143737793, "learning_rate": 8.627976190476192e-06, "loss": 43.4601, "step": 2991 }, { "epoch": 71.23880597014926, "grad_norm": 25.911041259765625, "learning_rate": 8.625000000000001e-06, "loss": 42.6006, "step": 2992 }, { "epoch": 71.26268656716418, "grad_norm": 16.15741539001465, "learning_rate": 8.62202380952381e-06, "loss": 42.6621, "step": 2993 }, { "epoch": 71.2865671641791, "grad_norm": 30.334243774414062, "learning_rate": 8.61904761904762e-06, "loss": 43.4206, "step": 2994 }, { "epoch": 71.31044776119403, "grad_norm": 26.023889541625977, "learning_rate": 8.61607142857143e-06, "loss": 43.5777, "step": 2995 }, { "epoch": 71.33432835820895, "grad_norm": 21.3012638092041, "learning_rate": 8.61309523809524e-06, "loss": 42.4823, "step": 2996 }, { "epoch": 71.35820895522389, "grad_norm": 25.109596252441406, "learning_rate": 8.610119047619048e-06, "loss": 44.2666, "step": 2997 }, { "epoch": 71.38208955223881, "grad_norm": 22.26563835144043, "learning_rate": 8.607142857142859e-06, "loss": 40.9261, "step": 2998 }, { "epoch": 71.40597014925373, "grad_norm": 30.94297218322754, "learning_rate": 8.604166666666668e-06, "loss": 43.5651, "step": 2999 }, { "epoch": 71.42985074626866, "grad_norm": 24.670034408569336, "learning_rate": 8.601190476190477e-06, "loss": 43.6695, "step": 3000 }, { "epoch": 71.45373134328358, "grad_norm": 29.290430068969727, "learning_rate": 8.598214285714288e-06, "loss": 43.6725, "step": 3001 }, { "epoch": 71.4776119402985, "grad_norm": 23.058176040649414, "learning_rate": 8.595238095238097e-06, "loss": 43.695, "step": 3002 }, { "epoch": 71.50149253731344, "grad_norm": 21.41179084777832, "learning_rate": 8.592261904761904e-06, "loss": 43.1715, "step": 3003 }, { "epoch": 71.52537313432836, "grad_norm": 22.226594924926758, "learning_rate": 8.589285714285715e-06, "loss": 43.1411, "step": 3004 }, { "epoch": 71.54925373134328, "grad_norm": 19.892719268798828, "learning_rate": 8.586309523809524e-06, "loss": 44.4913, "step": 3005 }, { "epoch": 71.57313432835821, "grad_norm": 18.263708114624023, "learning_rate": 8.583333333333333e-06, "loss": 43.2348, "step": 3006 }, { "epoch": 71.59701492537313, "grad_norm": 22.065439224243164, "learning_rate": 8.580357142857144e-06, "loss": 44.3296, "step": 3007 }, { "epoch": 71.62089552238805, "grad_norm": 19.95087432861328, "learning_rate": 8.577380952380953e-06, "loss": 44.827, "step": 3008 }, { "epoch": 71.64477611940299, "grad_norm": 19.371231079101562, "learning_rate": 8.574404761904762e-06, "loss": 43.9034, "step": 3009 }, { "epoch": 71.66865671641791, "grad_norm": 22.265600204467773, "learning_rate": 8.571428571428571e-06, "loss": 42.659, "step": 3010 }, { "epoch": 71.69253731343284, "grad_norm": 18.449695587158203, "learning_rate": 8.568452380952382e-06, "loss": 43.674, "step": 3011 }, { "epoch": 71.71641791044776, "grad_norm": 25.14525604248047, "learning_rate": 8.56547619047619e-06, "loss": 43.5625, "step": 3012 }, { "epoch": 71.74029850746268, "grad_norm": 23.78099822998047, "learning_rate": 8.5625e-06, "loss": 44.3134, "step": 3013 }, { "epoch": 71.7641791044776, "grad_norm": 18.84084129333496, "learning_rate": 8.55952380952381e-06, "loss": 44.8436, "step": 3014 }, { "epoch": 71.78805970149254, "grad_norm": 28.59735107421875, "learning_rate": 8.55654761904762e-06, "loss": 43.3521, "step": 3015 }, { "epoch": 71.81194029850747, "grad_norm": 22.86484718322754, "learning_rate": 8.553571428571429e-06, "loss": 44.0742, "step": 3016 }, { "epoch": 71.83582089552239, "grad_norm": 17.34327507019043, "learning_rate": 8.550595238095238e-06, "loss": 43.5721, "step": 3017 }, { "epoch": 71.85970149253731, "grad_norm": 32.2520637512207, "learning_rate": 8.547619047619048e-06, "loss": 42.3465, "step": 3018 }, { "epoch": 71.88358208955223, "grad_norm": 23.380569458007812, "learning_rate": 8.544642857142857e-06, "loss": 43.2287, "step": 3019 }, { "epoch": 71.90746268656716, "grad_norm": 31.07112693786621, "learning_rate": 8.541666666666666e-06, "loss": 44.3177, "step": 3020 }, { "epoch": 71.9313432835821, "grad_norm": 24.860567092895508, "learning_rate": 8.538690476190477e-06, "loss": 43.6361, "step": 3021 }, { "epoch": 71.95522388059702, "grad_norm": 22.43517303466797, "learning_rate": 8.535714285714286e-06, "loss": 43.5824, "step": 3022 }, { "epoch": 71.97910447761194, "grad_norm": 27.975297927856445, "learning_rate": 8.532738095238095e-06, "loss": 43.4829, "step": 3023 }, { "epoch": 72.0, "grad_norm": 17.978660583496094, "learning_rate": 8.529761904761904e-06, "loss": 38.115, "step": 3024 }, { "epoch": 72.02388059701492, "grad_norm": 31.69437599182129, "learning_rate": 8.526785714285715e-06, "loss": 43.5237, "step": 3025 }, { "epoch": 72.04776119402985, "grad_norm": 27.577686309814453, "learning_rate": 8.523809523809524e-06, "loss": 43.1406, "step": 3026 }, { "epoch": 72.07164179104478, "grad_norm": 28.320255279541016, "learning_rate": 8.520833333333333e-06, "loss": 44.5784, "step": 3027 }, { "epoch": 72.0955223880597, "grad_norm": 26.59323501586914, "learning_rate": 8.517857142857144e-06, "loss": 42.2067, "step": 3028 }, { "epoch": 72.11940298507463, "grad_norm": 21.94460105895996, "learning_rate": 8.514880952380953e-06, "loss": 43.4262, "step": 3029 }, { "epoch": 72.14328358208955, "grad_norm": 23.56421661376953, "learning_rate": 8.511904761904762e-06, "loss": 41.1196, "step": 3030 }, { "epoch": 72.16716417910447, "grad_norm": 19.21329689025879, "learning_rate": 8.508928571428571e-06, "loss": 42.5441, "step": 3031 }, { "epoch": 72.1910447761194, "grad_norm": 23.377782821655273, "learning_rate": 8.505952380952382e-06, "loss": 43.0296, "step": 3032 }, { "epoch": 72.21492537313434, "grad_norm": 24.402435302734375, "learning_rate": 8.502976190476191e-06, "loss": 44.2474, "step": 3033 }, { "epoch": 72.23880597014926, "grad_norm": 18.61969566345215, "learning_rate": 8.5e-06, "loss": 43.6984, "step": 3034 }, { "epoch": 72.26268656716418, "grad_norm": 30.627338409423828, "learning_rate": 8.49702380952381e-06, "loss": 42.5441, "step": 3035 }, { "epoch": 72.2865671641791, "grad_norm": 26.115427017211914, "learning_rate": 8.49404761904762e-06, "loss": 41.8235, "step": 3036 }, { "epoch": 72.31044776119403, "grad_norm": 24.971904754638672, "learning_rate": 8.491071428571429e-06, "loss": 43.9344, "step": 3037 }, { "epoch": 72.33432835820895, "grad_norm": 26.42667007446289, "learning_rate": 8.488095238095238e-06, "loss": 43.2757, "step": 3038 }, { "epoch": 72.35820895522389, "grad_norm": 23.19200897216797, "learning_rate": 8.485119047619049e-06, "loss": 42.9536, "step": 3039 }, { "epoch": 72.38208955223881, "grad_norm": 31.263626098632812, "learning_rate": 8.482142857142858e-06, "loss": 42.8037, "step": 3040 }, { "epoch": 72.40597014925373, "grad_norm": 25.049270629882812, "learning_rate": 8.479166666666667e-06, "loss": 42.8005, "step": 3041 }, { "epoch": 72.42985074626866, "grad_norm": 20.71118927001953, "learning_rate": 8.476190476190477e-06, "loss": 43.106, "step": 3042 }, { "epoch": 72.45373134328358, "grad_norm": 22.156679153442383, "learning_rate": 8.473214285714286e-06, "loss": 42.6742, "step": 3043 }, { "epoch": 72.4776119402985, "grad_norm": 22.091957092285156, "learning_rate": 8.470238095238095e-06, "loss": 43.6855, "step": 3044 }, { "epoch": 72.50149253731344, "grad_norm": 21.12959861755371, "learning_rate": 8.467261904761905e-06, "loss": 42.9416, "step": 3045 }, { "epoch": 72.52537313432836, "grad_norm": 20.53251075744629, "learning_rate": 8.464285714285715e-06, "loss": 44.3919, "step": 3046 }, { "epoch": 72.54925373134328, "grad_norm": 19.188758850097656, "learning_rate": 8.461309523809524e-06, "loss": 43.3152, "step": 3047 }, { "epoch": 72.57313432835821, "grad_norm": 26.149826049804688, "learning_rate": 8.458333333333333e-06, "loss": 44.7382, "step": 3048 }, { "epoch": 72.59701492537313, "grad_norm": 18.40545082092285, "learning_rate": 8.455357142857144e-06, "loss": 44.0886, "step": 3049 }, { "epoch": 72.62089552238805, "grad_norm": 21.535911560058594, "learning_rate": 8.452380952380953e-06, "loss": 43.2625, "step": 3050 }, { "epoch": 72.64477611940299, "grad_norm": 17.798324584960938, "learning_rate": 8.449404761904762e-06, "loss": 43.5087, "step": 3051 }, { "epoch": 72.66865671641791, "grad_norm": 22.086271286010742, "learning_rate": 8.446428571428571e-06, "loss": 43.7427, "step": 3052 }, { "epoch": 72.69253731343284, "grad_norm": 20.795154571533203, "learning_rate": 8.443452380952382e-06, "loss": 43.6492, "step": 3053 }, { "epoch": 72.71641791044776, "grad_norm": 23.004671096801758, "learning_rate": 8.440476190476191e-06, "loss": 43.2841, "step": 3054 }, { "epoch": 72.74029850746268, "grad_norm": 19.808507919311523, "learning_rate": 8.4375e-06, "loss": 43.2447, "step": 3055 }, { "epoch": 72.7641791044776, "grad_norm": 25.06849479675293, "learning_rate": 8.434523809523811e-06, "loss": 42.7637, "step": 3056 }, { "epoch": 72.78805970149254, "grad_norm": 25.014245986938477, "learning_rate": 8.43154761904762e-06, "loss": 43.4822, "step": 3057 }, { "epoch": 72.81194029850747, "grad_norm": 22.324596405029297, "learning_rate": 8.428571428571429e-06, "loss": 43.3555, "step": 3058 }, { "epoch": 72.83582089552239, "grad_norm": 28.37264060974121, "learning_rate": 8.425595238095238e-06, "loss": 45.1914, "step": 3059 }, { "epoch": 72.85970149253731, "grad_norm": 20.218700408935547, "learning_rate": 8.422619047619049e-06, "loss": 43.7534, "step": 3060 }, { "epoch": 72.88358208955223, "grad_norm": 23.96106719970703, "learning_rate": 8.419642857142858e-06, "loss": 43.3726, "step": 3061 }, { "epoch": 72.90746268656716, "grad_norm": 24.620227813720703, "learning_rate": 8.416666666666667e-06, "loss": 43.2246, "step": 3062 }, { "epoch": 72.9313432835821, "grad_norm": 17.006282806396484, "learning_rate": 8.413690476190478e-06, "loss": 43.0239, "step": 3063 }, { "epoch": 72.95522388059702, "grad_norm": 32.321250915527344, "learning_rate": 8.410714285714287e-06, "loss": 43.8265, "step": 3064 }, { "epoch": 72.97910447761194, "grad_norm": 26.541305541992188, "learning_rate": 8.407738095238096e-06, "loss": 43.715, "step": 3065 }, { "epoch": 73.0, "grad_norm": 20.71360969543457, "learning_rate": 8.404761904761905e-06, "loss": 38.4916, "step": 3066 }, { "epoch": 73.02388059701492, "grad_norm": 25.500295639038086, "learning_rate": 8.401785714285715e-06, "loss": 43.3955, "step": 3067 }, { "epoch": 73.04776119402985, "grad_norm": 26.59987449645996, "learning_rate": 8.398809523809525e-06, "loss": 43.3811, "step": 3068 }, { "epoch": 73.07164179104478, "grad_norm": 22.731945037841797, "learning_rate": 8.395833333333334e-06, "loss": 43.2902, "step": 3069 }, { "epoch": 73.0955223880597, "grad_norm": 20.676626205444336, "learning_rate": 8.392857142857144e-06, "loss": 44.4288, "step": 3070 }, { "epoch": 73.11940298507463, "grad_norm": 24.257009506225586, "learning_rate": 8.389880952380953e-06, "loss": 42.6346, "step": 3071 }, { "epoch": 73.14328358208955, "grad_norm": 20.27753448486328, "learning_rate": 8.386904761904762e-06, "loss": 43.195, "step": 3072 }, { "epoch": 73.16716417910447, "grad_norm": 22.37655258178711, "learning_rate": 8.383928571428573e-06, "loss": 43.7297, "step": 3073 }, { "epoch": 73.1910447761194, "grad_norm": 22.078298568725586, "learning_rate": 8.380952380952382e-06, "loss": 45.0908, "step": 3074 }, { "epoch": 73.21492537313434, "grad_norm": 22.645662307739258, "learning_rate": 8.377976190476191e-06, "loss": 43.4577, "step": 3075 }, { "epoch": 73.23880597014926, "grad_norm": 18.159029006958008, "learning_rate": 8.375e-06, "loss": 42.8618, "step": 3076 }, { "epoch": 73.26268656716418, "grad_norm": 22.44676399230957, "learning_rate": 8.372023809523811e-06, "loss": 41.7892, "step": 3077 }, { "epoch": 73.2865671641791, "grad_norm": 21.480403900146484, "learning_rate": 8.36904761904762e-06, "loss": 44.0939, "step": 3078 }, { "epoch": 73.31044776119403, "grad_norm": 19.49287986755371, "learning_rate": 8.366071428571429e-06, "loss": 44.0851, "step": 3079 }, { "epoch": 73.33432835820895, "grad_norm": 18.453174591064453, "learning_rate": 8.36309523809524e-06, "loss": 42.5673, "step": 3080 }, { "epoch": 73.35820895522389, "grad_norm": NaN, "learning_rate": 8.360119047619049e-06, "loss": 71.053, "step": 3081 }, { "epoch": 73.38208955223881, "grad_norm": 20.119003295898438, "learning_rate": 8.360119047619049e-06, "loss": 42.981, "step": 3082 }, { "epoch": 73.40597014925373, "grad_norm": 18.897857666015625, "learning_rate": 8.357142857142858e-06, "loss": 42.5696, "step": 3083 }, { "epoch": 73.42985074626866, "grad_norm": 26.755035400390625, "learning_rate": 8.354166666666667e-06, "loss": 43.2951, "step": 3084 }, { "epoch": 73.45373134328358, "grad_norm": 19.104629516601562, "learning_rate": 8.351190476190478e-06, "loss": 42.5016, "step": 3085 }, { "epoch": 73.4776119402985, "grad_norm": 25.36631965637207, "learning_rate": 8.348214285714287e-06, "loss": 42.6552, "step": 3086 }, { "epoch": 73.50149253731344, "grad_norm": 27.23288345336914, "learning_rate": 8.345238095238096e-06, "loss": 42.6917, "step": 3087 }, { "epoch": 73.52537313432836, "grad_norm": 16.930316925048828, "learning_rate": 8.342261904761907e-06, "loss": 43.1315, "step": 3088 }, { "epoch": 73.54925373134328, "grad_norm": 26.30918312072754, "learning_rate": 8.339285714285716e-06, "loss": 42.7197, "step": 3089 }, { "epoch": 73.57313432835821, "grad_norm": 24.781511306762695, "learning_rate": 8.336309523809525e-06, "loss": 42.5099, "step": 3090 }, { "epoch": 73.59701492537313, "grad_norm": 19.516469955444336, "learning_rate": 8.333333333333334e-06, "loss": 43.0713, "step": 3091 }, { "epoch": 73.62089552238805, "grad_norm": 22.657184600830078, "learning_rate": 8.330357142857144e-06, "loss": 43.3808, "step": 3092 }, { "epoch": 73.64477611940299, "grad_norm": 18.468502044677734, "learning_rate": 8.327380952380954e-06, "loss": 43.7249, "step": 3093 }, { "epoch": 73.66865671641791, "grad_norm": 17.16704750061035, "learning_rate": 8.324404761904763e-06, "loss": 43.8457, "step": 3094 }, { "epoch": 73.69253731343284, "grad_norm": 21.254226684570312, "learning_rate": 8.321428571428573e-06, "loss": 43.5131, "step": 3095 }, { "epoch": 73.71641791044776, "grad_norm": 24.988006591796875, "learning_rate": 8.318452380952382e-06, "loss": 43.419, "step": 3096 }, { "epoch": 73.74029850746268, "grad_norm": 18.345117568969727, "learning_rate": 8.315476190476191e-06, "loss": 43.89, "step": 3097 }, { "epoch": 73.7641791044776, "grad_norm": 19.947589874267578, "learning_rate": 8.3125e-06, "loss": 41.9095, "step": 3098 }, { "epoch": 73.78805970149254, "grad_norm": 21.689882278442383, "learning_rate": 8.309523809523811e-06, "loss": 43.5629, "step": 3099 }, { "epoch": 73.81194029850747, "grad_norm": 18.021583557128906, "learning_rate": 8.30654761904762e-06, "loss": 45.2045, "step": 3100 }, { "epoch": 73.83582089552239, "grad_norm": 21.016939163208008, "learning_rate": 8.30357142857143e-06, "loss": 42.9508, "step": 3101 }, { "epoch": 73.85970149253731, "grad_norm": 19.921489715576172, "learning_rate": 8.30059523809524e-06, "loss": 45.0384, "step": 3102 }, { "epoch": 73.88358208955223, "grad_norm": 17.989734649658203, "learning_rate": 8.297619047619049e-06, "loss": 43.6752, "step": 3103 }, { "epoch": 73.90746268656716, "grad_norm": 19.126956939697266, "learning_rate": 8.294642857142858e-06, "loss": 42.4258, "step": 3104 }, { "epoch": 73.9313432835821, "grad_norm": 18.107421875, "learning_rate": 8.291666666666667e-06, "loss": 42.0089, "step": 3105 }, { "epoch": 73.95522388059702, "grad_norm": 22.599328994750977, "learning_rate": 8.288690476190478e-06, "loss": 43.1967, "step": 3106 }, { "epoch": 73.97910447761194, "grad_norm": 17.103744506835938, "learning_rate": 8.285714285714287e-06, "loss": 42.932, "step": 3107 }, { "epoch": 74.0, "grad_norm": 16.514545440673828, "learning_rate": 8.282738095238096e-06, "loss": 38.5601, "step": 3108 }, { "epoch": 74.02388059701492, "grad_norm": 19.938108444213867, "learning_rate": 8.279761904761905e-06, "loss": 43.1656, "step": 3109 }, { "epoch": 74.04776119402985, "grad_norm": 23.691556930541992, "learning_rate": 8.276785714285714e-06, "loss": 43.248, "step": 3110 }, { "epoch": 74.07164179104478, "grad_norm": 24.84130859375, "learning_rate": 8.273809523809523e-06, "loss": 43.0973, "step": 3111 }, { "epoch": 74.0955223880597, "grad_norm": 16.541378021240234, "learning_rate": 8.270833333333334e-06, "loss": 43.8453, "step": 3112 }, { "epoch": 74.11940298507463, "grad_norm": 34.161293029785156, "learning_rate": 8.267857142857143e-06, "loss": 41.7, "step": 3113 }, { "epoch": 74.14328358208955, "grad_norm": 26.104328155517578, "learning_rate": 8.264880952380952e-06, "loss": 43.6119, "step": 3114 }, { "epoch": 74.16716417910447, "grad_norm": 26.31689453125, "learning_rate": 8.261904761904763e-06, "loss": 41.5545, "step": 3115 }, { "epoch": 74.1910447761194, "grad_norm": 23.808761596679688, "learning_rate": 8.258928571428572e-06, "loss": 44.5862, "step": 3116 }, { "epoch": 74.21492537313434, "grad_norm": 24.158493041992188, "learning_rate": 8.25595238095238e-06, "loss": 42.9814, "step": 3117 }, { "epoch": 74.23880597014926, "grad_norm": 25.35089874267578, "learning_rate": 8.25297619047619e-06, "loss": 42.4484, "step": 3118 }, { "epoch": 74.26268656716418, "grad_norm": 24.48615264892578, "learning_rate": 8.25e-06, "loss": 42.7431, "step": 3119 }, { "epoch": 74.2865671641791, "grad_norm": 24.813716888427734, "learning_rate": 8.24702380952381e-06, "loss": 43.1515, "step": 3120 }, { "epoch": 74.31044776119403, "grad_norm": 18.43018341064453, "learning_rate": 8.244047619047619e-06, "loss": 43.5142, "step": 3121 }, { "epoch": 74.33432835820895, "grad_norm": 25.593732833862305, "learning_rate": 8.24107142857143e-06, "loss": 44.4342, "step": 3122 }, { "epoch": 74.35820895522389, "grad_norm": 21.224576950073242, "learning_rate": 8.238095238095239e-06, "loss": 44.1973, "step": 3123 }, { "epoch": 74.38208955223881, "grad_norm": 17.604145050048828, "learning_rate": 8.235119047619048e-06, "loss": 43.4662, "step": 3124 }, { "epoch": 74.40597014925373, "grad_norm": 30.535215377807617, "learning_rate": 8.232142857142857e-06, "loss": 42.8872, "step": 3125 }, { "epoch": 74.42985074626866, "grad_norm": 22.767736434936523, "learning_rate": 8.229166666666667e-06, "loss": 43.4187, "step": 3126 }, { "epoch": 74.45373134328358, "grad_norm": 33.97389221191406, "learning_rate": 8.226190476190476e-06, "loss": 43.8719, "step": 3127 }, { "epoch": 74.4776119402985, "grad_norm": 26.33451271057129, "learning_rate": 8.223214285714285e-06, "loss": 43.6458, "step": 3128 }, { "epoch": 74.50149253731344, "grad_norm": 35.393733978271484, "learning_rate": 8.220238095238096e-06, "loss": 44.2996, "step": 3129 }, { "epoch": 74.52537313432836, "grad_norm": 27.903955459594727, "learning_rate": 8.217261904761905e-06, "loss": 43.5218, "step": 3130 }, { "epoch": 74.54925373134328, "grad_norm": 27.946807861328125, "learning_rate": 8.214285714285714e-06, "loss": 43.8993, "step": 3131 }, { "epoch": 74.57313432835821, "grad_norm": 21.519737243652344, "learning_rate": 8.211309523809523e-06, "loss": 42.9369, "step": 3132 }, { "epoch": 74.59701492537313, "grad_norm": 27.311965942382812, "learning_rate": 8.208333333333334e-06, "loss": 43.4193, "step": 3133 }, { "epoch": 74.62089552238805, "grad_norm": 23.38337516784668, "learning_rate": 8.205357142857143e-06, "loss": 43.5582, "step": 3134 }, { "epoch": 74.64477611940299, "grad_norm": 19.415571212768555, "learning_rate": 8.202380952380952e-06, "loss": 42.8066, "step": 3135 }, { "epoch": 74.66865671641791, "grad_norm": 25.44513511657715, "learning_rate": 8.199404761904763e-06, "loss": 42.8859, "step": 3136 }, { "epoch": 74.69253731343284, "grad_norm": 23.1788330078125, "learning_rate": 8.196428571428572e-06, "loss": 42.1339, "step": 3137 }, { "epoch": 74.71641791044776, "grad_norm": 14.436179161071777, "learning_rate": 8.193452380952381e-06, "loss": 42.6687, "step": 3138 }, { "epoch": 74.74029850746268, "grad_norm": 30.928714752197266, "learning_rate": 8.190476190476192e-06, "loss": 44.5744, "step": 3139 }, { "epoch": 74.7641791044776, "grad_norm": 23.915878295898438, "learning_rate": 8.1875e-06, "loss": 44.3435, "step": 3140 }, { "epoch": 74.78805970149254, "grad_norm": 27.95979881286621, "learning_rate": 8.18452380952381e-06, "loss": 42.4667, "step": 3141 }, { "epoch": 74.81194029850747, "grad_norm": 22.4390811920166, "learning_rate": 8.181547619047619e-06, "loss": 42.6036, "step": 3142 }, { "epoch": 74.83582089552239, "grad_norm": 22.94829750061035, "learning_rate": 8.17857142857143e-06, "loss": 42.4304, "step": 3143 }, { "epoch": 74.85970149253731, "grad_norm": 20.711339950561523, "learning_rate": 8.175595238095239e-06, "loss": 43.1806, "step": 3144 }, { "epoch": 74.88358208955223, "grad_norm": 21.30629539489746, "learning_rate": 8.172619047619048e-06, "loss": 42.7325, "step": 3145 }, { "epoch": 74.90746268656716, "grad_norm": 20.381263732910156, "learning_rate": 8.169642857142858e-06, "loss": 43.0491, "step": 3146 }, { "epoch": 74.9313432835821, "grad_norm": 21.54926300048828, "learning_rate": 8.166666666666668e-06, "loss": 44.8298, "step": 3147 }, { "epoch": 74.95522388059702, "grad_norm": 15.518889427185059, "learning_rate": 8.163690476190477e-06, "loss": 42.6821, "step": 3148 }, { "epoch": 74.97910447761194, "grad_norm": 24.487192153930664, "learning_rate": 8.160714285714286e-06, "loss": 43.0891, "step": 3149 }, { "epoch": 75.0, "grad_norm": 15.607013702392578, "learning_rate": 8.157738095238096e-06, "loss": 37.212, "step": 3150 }, { "epoch": 75.02388059701492, "grad_norm": 31.642353057861328, "learning_rate": 8.154761904761905e-06, "loss": 43.9061, "step": 3151 }, { "epoch": 75.04776119402985, "grad_norm": 23.92624855041504, "learning_rate": 8.151785714285714e-06, "loss": 44.0244, "step": 3152 }, { "epoch": 75.07164179104478, "grad_norm": 23.756420135498047, "learning_rate": 8.148809523809525e-06, "loss": 44.7597, "step": 3153 }, { "epoch": 75.0955223880597, "grad_norm": 26.027414321899414, "learning_rate": 8.145833333333334e-06, "loss": 42.3933, "step": 3154 }, { "epoch": 75.11940298507463, "grad_norm": 18.252239227294922, "learning_rate": 8.142857142857143e-06, "loss": 43.1075, "step": 3155 }, { "epoch": 75.14328358208955, "grad_norm": 25.58303451538086, "learning_rate": 8.139880952380952e-06, "loss": 43.3715, "step": 3156 }, { "epoch": 75.16716417910447, "grad_norm": 24.198566436767578, "learning_rate": 8.136904761904763e-06, "loss": 42.042, "step": 3157 }, { "epoch": 75.1910447761194, "grad_norm": 21.632183074951172, "learning_rate": 8.133928571428572e-06, "loss": 42.4693, "step": 3158 }, { "epoch": 75.21492537313434, "grad_norm": 27.104801177978516, "learning_rate": 8.130952380952381e-06, "loss": 42.597, "step": 3159 }, { "epoch": 75.23880597014926, "grad_norm": 21.614917755126953, "learning_rate": 8.127976190476192e-06, "loss": 42.9729, "step": 3160 }, { "epoch": 75.26268656716418, "grad_norm": 27.62027359008789, "learning_rate": 8.125000000000001e-06, "loss": 43.3302, "step": 3161 }, { "epoch": 75.2865671641791, "grad_norm": 24.087974548339844, "learning_rate": 8.12202380952381e-06, "loss": 44.1364, "step": 3162 }, { "epoch": 75.31044776119403, "grad_norm": 21.590192794799805, "learning_rate": 8.119047619047619e-06, "loss": 42.7373, "step": 3163 }, { "epoch": 75.33432835820895, "grad_norm": 27.612075805664062, "learning_rate": 8.11607142857143e-06, "loss": 43.5758, "step": 3164 }, { "epoch": 75.35820895522389, "grad_norm": 18.209209442138672, "learning_rate": 8.113095238095239e-06, "loss": 43.309, "step": 3165 }, { "epoch": 75.38208955223881, "grad_norm": 28.845134735107422, "learning_rate": 8.110119047619048e-06, "loss": 43.3125, "step": 3166 }, { "epoch": 75.40597014925373, "grad_norm": 20.03913116455078, "learning_rate": 8.107142857142859e-06, "loss": 44.666, "step": 3167 }, { "epoch": 75.42985074626866, "grad_norm": 29.69953155517578, "learning_rate": 8.104166666666668e-06, "loss": 43.3558, "step": 3168 }, { "epoch": 75.45373134328358, "grad_norm": 22.189376831054688, "learning_rate": 8.101190476190477e-06, "loss": 43.6229, "step": 3169 }, { "epoch": 75.4776119402985, "grad_norm": 23.93678092956543, "learning_rate": 8.098214285714286e-06, "loss": 42.9279, "step": 3170 }, { "epoch": 75.50149253731344, "grad_norm": 21.489761352539062, "learning_rate": 8.095238095238097e-06, "loss": 43.4537, "step": 3171 }, { "epoch": 75.52537313432836, "grad_norm": 18.95380210876465, "learning_rate": 8.092261904761906e-06, "loss": 42.9752, "step": 3172 }, { "epoch": 75.54925373134328, "grad_norm": 26.20965576171875, "learning_rate": 8.089285714285715e-06, "loss": 42.7511, "step": 3173 }, { "epoch": 75.57313432835821, "grad_norm": 19.629926681518555, "learning_rate": 8.086309523809525e-06, "loss": 43.7784, "step": 3174 }, { "epoch": 75.59701492537313, "grad_norm": 25.866622924804688, "learning_rate": 8.083333333333334e-06, "loss": 42.7349, "step": 3175 }, { "epoch": 75.62089552238805, "grad_norm": 24.383323669433594, "learning_rate": 8.080357142857143e-06, "loss": 42.5395, "step": 3176 }, { "epoch": 75.64477611940299, "grad_norm": 19.74950408935547, "learning_rate": 8.077380952380953e-06, "loss": 43.1058, "step": 3177 }, { "epoch": 75.66865671641791, "grad_norm": 28.67831039428711, "learning_rate": 8.074404761904763e-06, "loss": 43.5871, "step": 3178 }, { "epoch": 75.69253731343284, "grad_norm": 23.102951049804688, "learning_rate": 8.071428571428572e-06, "loss": 42.76, "step": 3179 }, { "epoch": 75.71641791044776, "grad_norm": 35.02995681762695, "learning_rate": 8.068452380952381e-06, "loss": 43.8252, "step": 3180 }, { "epoch": 75.74029850746268, "grad_norm": 24.358551025390625, "learning_rate": 8.065476190476192e-06, "loss": 43.1074, "step": 3181 }, { "epoch": 75.7641791044776, "grad_norm": 30.14754295349121, "learning_rate": 8.062500000000001e-06, "loss": 43.8415, "step": 3182 }, { "epoch": 75.78805970149254, "grad_norm": 24.45053482055664, "learning_rate": 8.05952380952381e-06, "loss": 43.0215, "step": 3183 }, { "epoch": 75.81194029850747, "grad_norm": 37.40525436401367, "learning_rate": 8.05654761904762e-06, "loss": 42.961, "step": 3184 }, { "epoch": 75.83582089552239, "grad_norm": 24.555240631103516, "learning_rate": 8.05357142857143e-06, "loss": 44.2708, "step": 3185 }, { "epoch": 75.85970149253731, "grad_norm": 37.460670471191406, "learning_rate": 8.050595238095239e-06, "loss": 43.5956, "step": 3186 }, { "epoch": 75.88358208955223, "grad_norm": 32.54770278930664, "learning_rate": 8.047619047619048e-06, "loss": 42.3289, "step": 3187 }, { "epoch": 75.90746268656716, "grad_norm": 38.01876449584961, "learning_rate": 8.044642857142859e-06, "loss": 42.9972, "step": 3188 }, { "epoch": 75.9313432835821, "grad_norm": 30.63246726989746, "learning_rate": 8.041666666666668e-06, "loss": 42.5397, "step": 3189 }, { "epoch": 75.95522388059702, "grad_norm": 27.40627670288086, "learning_rate": 8.038690476190477e-06, "loss": 41.377, "step": 3190 }, { "epoch": 75.97910447761194, "grad_norm": 26.620893478393555, "learning_rate": 8.035714285714286e-06, "loss": 42.9367, "step": 3191 }, { "epoch": 76.0, "grad_norm": 31.36514663696289, "learning_rate": 8.032738095238097e-06, "loss": 37.8523, "step": 3192 }, { "epoch": 76.02388059701492, "grad_norm": 27.55282974243164, "learning_rate": 8.029761904761906e-06, "loss": 43.2614, "step": 3193 }, { "epoch": 76.04776119402985, "grad_norm": 36.373634338378906, "learning_rate": 8.026785714285715e-06, "loss": 42.1966, "step": 3194 }, { "epoch": 76.07164179104478, "grad_norm": 29.89250373840332, "learning_rate": 8.023809523809526e-06, "loss": 43.0278, "step": 3195 }, { "epoch": 76.0955223880597, "grad_norm": 28.84893226623535, "learning_rate": 8.020833333333335e-06, "loss": 44.8331, "step": 3196 }, { "epoch": 76.11940298507463, "grad_norm": 27.258445739746094, "learning_rate": 8.017857142857144e-06, "loss": 42.5176, "step": 3197 }, { "epoch": 76.14328358208955, "grad_norm": 30.8077449798584, "learning_rate": 8.014880952380953e-06, "loss": 43.3045, "step": 3198 }, { "epoch": 76.16716417910447, "grad_norm": 28.528837203979492, "learning_rate": 8.011904761904763e-06, "loss": 43.1302, "step": 3199 }, { "epoch": 76.1910447761194, "grad_norm": 30.751039505004883, "learning_rate": 8.008928571428572e-06, "loss": 43.7287, "step": 3200 }, { "epoch": 76.21492537313434, "grad_norm": 27.781261444091797, "learning_rate": 8.005952380952382e-06, "loss": 43.3939, "step": 3201 }, { "epoch": 76.23880597014926, "grad_norm": 27.45984649658203, "learning_rate": 8.002976190476192e-06, "loss": 43.6411, "step": 3202 }, { "epoch": 76.26268656716418, "grad_norm": 26.628419876098633, "learning_rate": 8.000000000000001e-06, "loss": 42.6454, "step": 3203 }, { "epoch": 76.2865671641791, "grad_norm": 36.02729034423828, "learning_rate": 7.99702380952381e-06, "loss": 43.2459, "step": 3204 }, { "epoch": 76.31044776119403, "grad_norm": 28.480478286743164, "learning_rate": 7.99404761904762e-06, "loss": 42.7675, "step": 3205 }, { "epoch": 76.33432835820895, "grad_norm": 31.36353874206543, "learning_rate": 7.99107142857143e-06, "loss": 42.8571, "step": 3206 }, { "epoch": 76.35820895522389, "grad_norm": 29.178728103637695, "learning_rate": 7.98809523809524e-06, "loss": 42.7477, "step": 3207 }, { "epoch": 76.38208955223881, "grad_norm": 28.539457321166992, "learning_rate": 7.985119047619048e-06, "loss": 44.1444, "step": 3208 }, { "epoch": 76.40597014925373, "grad_norm": 26.178895950317383, "learning_rate": 7.982142857142859e-06, "loss": 42.7187, "step": 3209 }, { "epoch": 76.42985074626866, "grad_norm": 30.825010299682617, "learning_rate": 7.979166666666668e-06, "loss": 43.478, "step": 3210 }, { "epoch": 76.45373134328358, "grad_norm": 27.317245483398438, "learning_rate": 7.976190476190477e-06, "loss": 43.479, "step": 3211 }, { "epoch": 76.4776119402985, "grad_norm": 31.42888641357422, "learning_rate": 7.973214285714286e-06, "loss": 43.7278, "step": 3212 }, { "epoch": 76.50149253731344, "grad_norm": 28.949392318725586, "learning_rate": 7.970238095238097e-06, "loss": 43.6134, "step": 3213 }, { "epoch": 76.52537313432836, "grad_norm": 36.61643981933594, "learning_rate": 7.967261904761904e-06, "loss": 44.1841, "step": 3214 }, { "epoch": 76.54925373134328, "grad_norm": 31.78457260131836, "learning_rate": 7.964285714285715e-06, "loss": 43.9995, "step": 3215 }, { "epoch": 76.57313432835821, "grad_norm": 29.883163452148438, "learning_rate": 7.961309523809524e-06, "loss": 42.596, "step": 3216 }, { "epoch": 76.59701492537313, "grad_norm": 27.458534240722656, "learning_rate": 7.958333333333333e-06, "loss": 43.7156, "step": 3217 }, { "epoch": 76.62089552238805, "grad_norm": 26.423311233520508, "learning_rate": 7.955357142857144e-06, "loss": 42.2925, "step": 3218 }, { "epoch": 76.64477611940299, "grad_norm": 22.850927352905273, "learning_rate": 7.952380952380953e-06, "loss": 43.3146, "step": 3219 }, { "epoch": 76.66865671641791, "grad_norm": 32.23415756225586, "learning_rate": 7.949404761904762e-06, "loss": 43.4622, "step": 3220 }, { "epoch": 76.69253731343284, "grad_norm": 25.596759796142578, "learning_rate": 7.946428571428571e-06, "loss": 42.6238, "step": 3221 }, { "epoch": 76.71641791044776, "grad_norm": 28.371593475341797, "learning_rate": 7.943452380952382e-06, "loss": 41.2267, "step": 3222 }, { "epoch": 76.74029850746268, "grad_norm": 24.369253158569336, "learning_rate": 7.94047619047619e-06, "loss": 43.24, "step": 3223 }, { "epoch": 76.7641791044776, "grad_norm": 34.42658996582031, "learning_rate": 7.9375e-06, "loss": 42.8095, "step": 3224 }, { "epoch": 76.78805970149254, "grad_norm": 26.35492515563965, "learning_rate": 7.93452380952381e-06, "loss": 42.0312, "step": 3225 }, { "epoch": 76.81194029850747, "grad_norm": 33.34773254394531, "learning_rate": 7.93154761904762e-06, "loss": 43.4483, "step": 3226 }, { "epoch": 76.83582089552239, "grad_norm": 31.470170974731445, "learning_rate": 7.928571428571429e-06, "loss": 43.9896, "step": 3227 }, { "epoch": 76.85970149253731, "grad_norm": 28.38050651550293, "learning_rate": 7.925595238095238e-06, "loss": 43.9711, "step": 3228 }, { "epoch": 76.88358208955223, "grad_norm": NaN, "learning_rate": 7.922619047619048e-06, "loss": 75.7577, "step": 3229 }, { "epoch": 76.90746268656716, "grad_norm": 21.927776336669922, "learning_rate": 7.922619047619048e-06, "loss": 42.1852, "step": 3230 }, { "epoch": 76.9313432835821, "grad_norm": 28.636518478393555, "learning_rate": 7.919642857142857e-06, "loss": 43.103, "step": 3231 }, { "epoch": 76.95522388059702, "grad_norm": 25.48936653137207, "learning_rate": 7.916666666666667e-06, "loss": 43.1688, "step": 3232 }, { "epoch": 76.97910447761194, "grad_norm": 29.641143798828125, "learning_rate": 7.913690476190477e-06, "loss": 41.7518, "step": 3233 }, { "epoch": 77.0, "grad_norm": 22.023099899291992, "learning_rate": 7.910714285714286e-06, "loss": 38.1447, "step": 3234 }, { "epoch": 77.02388059701492, "grad_norm": 35.88689041137695, "learning_rate": 7.907738095238095e-06, "loss": 43.1578, "step": 3235 }, { "epoch": 77.04776119402985, "grad_norm": 34.37343978881836, "learning_rate": 7.904761904761904e-06, "loss": 43.0582, "step": 3236 }, { "epoch": 77.07164179104478, "grad_norm": 18.577016830444336, "learning_rate": 7.901785714285715e-06, "loss": 42.1815, "step": 3237 }, { "epoch": 77.0955223880597, "grad_norm": 23.373125076293945, "learning_rate": 7.898809523809524e-06, "loss": 44.491, "step": 3238 }, { "epoch": 77.11940298507463, "grad_norm": 28.848159790039062, "learning_rate": 7.895833333333333e-06, "loss": 43.0114, "step": 3239 }, { "epoch": 77.14328358208955, "grad_norm": 19.10719108581543, "learning_rate": 7.892857142857144e-06, "loss": 42.862, "step": 3240 }, { "epoch": 77.16716417910447, "grad_norm": 34.79095458984375, "learning_rate": 7.889880952380953e-06, "loss": 43.7736, "step": 3241 }, { "epoch": 77.1910447761194, "grad_norm": 28.950021743774414, "learning_rate": 7.886904761904762e-06, "loss": 44.5221, "step": 3242 }, { "epoch": 77.21492537313434, "grad_norm": 29.437536239624023, "learning_rate": 7.883928571428571e-06, "loss": 43.0498, "step": 3243 }, { "epoch": 77.23880597014926, "grad_norm": 26.087984085083008, "learning_rate": 7.880952380952382e-06, "loss": 42.1991, "step": 3244 }, { "epoch": 77.26268656716418, "grad_norm": 30.868637084960938, "learning_rate": 7.877976190476191e-06, "loss": 43.1896, "step": 3245 }, { "epoch": 77.2865671641791, "grad_norm": 26.28648567199707, "learning_rate": 7.875e-06, "loss": 41.9695, "step": 3246 }, { "epoch": 77.31044776119403, "grad_norm": 27.738021850585938, "learning_rate": 7.87202380952381e-06, "loss": 42.9537, "step": 3247 }, { "epoch": 77.33432835820895, "grad_norm": 23.0654296875, "learning_rate": 7.86904761904762e-06, "loss": 43.1053, "step": 3248 }, { "epoch": 77.35820895522389, "grad_norm": 31.976926803588867, "learning_rate": 7.866071428571429e-06, "loss": 42.0648, "step": 3249 }, { "epoch": 77.38208955223881, "grad_norm": 28.690933227539062, "learning_rate": 7.863095238095238e-06, "loss": 43.0786, "step": 3250 }, { "epoch": 77.40597014925373, "grad_norm": 29.870180130004883, "learning_rate": 7.860119047619049e-06, "loss": 44.1362, "step": 3251 }, { "epoch": 77.42985074626866, "grad_norm": 29.524002075195312, "learning_rate": 7.857142857142858e-06, "loss": 42.635, "step": 3252 }, { "epoch": 77.45373134328358, "grad_norm": 24.833131790161133, "learning_rate": 7.854166666666667e-06, "loss": 43.1208, "step": 3253 }, { "epoch": 77.4776119402985, "grad_norm": 24.424755096435547, "learning_rate": 7.851190476190477e-06, "loss": 44.4682, "step": 3254 }, { "epoch": 77.50149253731344, "grad_norm": 30.417823791503906, "learning_rate": 7.848214285714287e-06, "loss": 45.1353, "step": 3255 }, { "epoch": 77.52537313432836, "grad_norm": 23.12209701538086, "learning_rate": 7.845238095238096e-06, "loss": 41.7736, "step": 3256 }, { "epoch": 77.54925373134328, "grad_norm": 30.454221725463867, "learning_rate": 7.842261904761905e-06, "loss": 42.6765, "step": 3257 }, { "epoch": 77.57313432835821, "grad_norm": 30.55715560913086, "learning_rate": 7.839285714285715e-06, "loss": 43.4168, "step": 3258 }, { "epoch": 77.59701492537313, "grad_norm": 26.72547149658203, "learning_rate": 7.836309523809524e-06, "loss": 42.5388, "step": 3259 }, { "epoch": 77.62089552238805, "grad_norm": 25.03418731689453, "learning_rate": 7.833333333333333e-06, "loss": 43.3748, "step": 3260 }, { "epoch": 77.64477611940299, "grad_norm": 28.706029891967773, "learning_rate": 7.830357142857144e-06, "loss": 43.7717, "step": 3261 }, { "epoch": 77.66865671641791, "grad_norm": 30.39940643310547, "learning_rate": 7.827380952380953e-06, "loss": 42.952, "step": 3262 }, { "epoch": 77.69253731343284, "grad_norm": 25.622882843017578, "learning_rate": 7.824404761904762e-06, "loss": 42.7133, "step": 3263 }, { "epoch": 77.71641791044776, "grad_norm": 25.120025634765625, "learning_rate": 7.821428571428571e-06, "loss": 42.2453, "step": 3264 }, { "epoch": 77.74029850746268, "grad_norm": 27.227832794189453, "learning_rate": 7.818452380952382e-06, "loss": 42.4094, "step": 3265 }, { "epoch": 77.7641791044776, "grad_norm": 23.663406372070312, "learning_rate": 7.815476190476191e-06, "loss": 43.7332, "step": 3266 }, { "epoch": 77.78805970149254, "grad_norm": 28.738086700439453, "learning_rate": 7.8125e-06, "loss": 43.7881, "step": 3267 }, { "epoch": 77.81194029850747, "grad_norm": 27.955598831176758, "learning_rate": 7.809523809523811e-06, "loss": 43.4782, "step": 3268 }, { "epoch": 77.83582089552239, "grad_norm": 24.79859161376953, "learning_rate": 7.80654761904762e-06, "loss": 41.0554, "step": 3269 }, { "epoch": 77.85970149253731, "grad_norm": 25.531471252441406, "learning_rate": 7.803571428571429e-06, "loss": 43.0072, "step": 3270 }, { "epoch": 77.88358208955223, "grad_norm": 27.746000289916992, "learning_rate": 7.800595238095238e-06, "loss": 43.8641, "step": 3271 }, { "epoch": 77.90746268656716, "grad_norm": 25.056262969970703, "learning_rate": 7.797619047619049e-06, "loss": 43.1316, "step": 3272 }, { "epoch": 77.9313432835821, "grad_norm": 30.888355255126953, "learning_rate": 7.794642857142858e-06, "loss": 43.482, "step": 3273 }, { "epoch": 77.95522388059702, "grad_norm": 22.501649856567383, "learning_rate": 7.791666666666667e-06, "loss": 43.4869, "step": 3274 }, { "epoch": 77.97910447761194, "grad_norm": 31.175397872924805, "learning_rate": 7.788690476190478e-06, "loss": 43.5349, "step": 3275 }, { "epoch": 78.0, "grad_norm": 20.901432037353516, "learning_rate": 7.785714285714287e-06, "loss": 36.2874, "step": 3276 }, { "epoch": 78.02388059701492, "grad_norm": 30.319852828979492, "learning_rate": 7.782738095238096e-06, "loss": 41.102, "step": 3277 }, { "epoch": 78.04776119402985, "grad_norm": 28.31625747680664, "learning_rate": 7.779761904761905e-06, "loss": 42.4304, "step": 3278 }, { "epoch": 78.07164179104478, "grad_norm": 26.445859909057617, "learning_rate": 7.776785714285716e-06, "loss": 42.8755, "step": 3279 }, { "epoch": 78.0955223880597, "grad_norm": 20.42568588256836, "learning_rate": 7.773809523809525e-06, "loss": 44.5072, "step": 3280 }, { "epoch": 78.11940298507463, "grad_norm": 28.535858154296875, "learning_rate": 7.770833333333334e-06, "loss": 42.325, "step": 3281 }, { "epoch": 78.14328358208955, "grad_norm": 21.800678253173828, "learning_rate": 7.767857142857144e-06, "loss": 44.3283, "step": 3282 }, { "epoch": 78.16716417910447, "grad_norm": 26.74295997619629, "learning_rate": 7.764880952380953e-06, "loss": 44.3208, "step": 3283 }, { "epoch": 78.1910447761194, "grad_norm": 28.9124755859375, "learning_rate": 7.761904761904762e-06, "loss": 42.8112, "step": 3284 }, { "epoch": 78.21492537313434, "grad_norm": 24.948265075683594, "learning_rate": 7.758928571428571e-06, "loss": 42.6617, "step": 3285 }, { "epoch": 78.23880597014926, "grad_norm": 25.038854598999023, "learning_rate": 7.755952380952382e-06, "loss": 42.6089, "step": 3286 }, { "epoch": 78.26268656716418, "grad_norm": 24.622905731201172, "learning_rate": 7.752976190476191e-06, "loss": 42.9201, "step": 3287 }, { "epoch": 78.2865671641791, "grad_norm": 22.999900817871094, "learning_rate": 7.75e-06, "loss": 43.2141, "step": 3288 }, { "epoch": 78.31044776119403, "grad_norm": 22.848161697387695, "learning_rate": 7.747023809523811e-06, "loss": 44.1053, "step": 3289 }, { "epoch": 78.33432835820895, "grad_norm": 16.15705108642578, "learning_rate": 7.74404761904762e-06, "loss": 43.7009, "step": 3290 }, { "epoch": 78.35820895522389, "grad_norm": 29.3355655670166, "learning_rate": 7.74107142857143e-06, "loss": 42.3037, "step": 3291 }, { "epoch": 78.38208955223881, "grad_norm": 19.516281127929688, "learning_rate": 7.738095238095238e-06, "loss": 42.6299, "step": 3292 }, { "epoch": 78.40597014925373, "grad_norm": 34.26980209350586, "learning_rate": 7.735119047619049e-06, "loss": 43.052, "step": 3293 }, { "epoch": 78.42985074626866, "grad_norm": 32.0604133605957, "learning_rate": 7.732142857142858e-06, "loss": 42.4497, "step": 3294 }, { "epoch": 78.45373134328358, "grad_norm": 23.038795471191406, "learning_rate": 7.729166666666667e-06, "loss": 42.5542, "step": 3295 }, { "epoch": 78.4776119402985, "grad_norm": 27.498064041137695, "learning_rate": 7.726190476190478e-06, "loss": 41.834, "step": 3296 }, { "epoch": 78.50149253731344, "grad_norm": 25.38565444946289, "learning_rate": 7.723214285714287e-06, "loss": 44.7325, "step": 3297 }, { "epoch": 78.52537313432836, "grad_norm": 21.209095001220703, "learning_rate": 7.720238095238096e-06, "loss": 44.6015, "step": 3298 }, { "epoch": 78.54925373134328, "grad_norm": 27.321908950805664, "learning_rate": 7.717261904761905e-06, "loss": 43.3014, "step": 3299 }, { "epoch": 78.57313432835821, "grad_norm": 20.742706298828125, "learning_rate": 7.714285714285716e-06, "loss": 44.1572, "step": 3300 }, { "epoch": 78.59701492537313, "grad_norm": 28.640583038330078, "learning_rate": 7.711309523809525e-06, "loss": 42.1555, "step": 3301 }, { "epoch": 78.62089552238805, "grad_norm": 30.252870559692383, "learning_rate": 7.708333333333334e-06, "loss": 43.4469, "step": 3302 }, { "epoch": 78.64477611940299, "grad_norm": 23.6368350982666, "learning_rate": 7.705357142857145e-06, "loss": 44.3375, "step": 3303 }, { "epoch": 78.66865671641791, "grad_norm": 22.434412002563477, "learning_rate": 7.702380952380954e-06, "loss": 42.8106, "step": 3304 }, { "epoch": 78.69253731343284, "grad_norm": 28.329635620117188, "learning_rate": 7.699404761904763e-06, "loss": 42.8968, "step": 3305 }, { "epoch": 78.71641791044776, "grad_norm": 21.02295684814453, "learning_rate": 7.696428571428572e-06, "loss": 42.1169, "step": 3306 }, { "epoch": 78.74029850746268, "grad_norm": 30.06182861328125, "learning_rate": 7.693452380952382e-06, "loss": 43.0741, "step": 3307 }, { "epoch": 78.7641791044776, "grad_norm": 22.40550994873047, "learning_rate": 7.690476190476191e-06, "loss": 42.8449, "step": 3308 }, { "epoch": 78.78805970149254, "grad_norm": 28.855802536010742, "learning_rate": 7.6875e-06, "loss": 43.0846, "step": 3309 }, { "epoch": 78.81194029850747, "grad_norm": 25.507308959960938, "learning_rate": 7.684523809523811e-06, "loss": 41.954, "step": 3310 }, { "epoch": 78.83582089552239, "grad_norm": 26.092424392700195, "learning_rate": 7.68154761904762e-06, "loss": 42.1684, "step": 3311 }, { "epoch": 78.85970149253731, "grad_norm": 24.099889755249023, "learning_rate": 7.67857142857143e-06, "loss": 43.9231, "step": 3312 }, { "epoch": 78.88358208955223, "grad_norm": 28.72806739807129, "learning_rate": 7.675595238095238e-06, "loss": 42.546, "step": 3313 }, { "epoch": 78.90746268656716, "grad_norm": 26.489227294921875, "learning_rate": 7.672619047619049e-06, "loss": 44.1023, "step": 3314 }, { "epoch": 78.9313432835821, "grad_norm": 29.59152603149414, "learning_rate": 7.669642857142858e-06, "loss": 43.7005, "step": 3315 }, { "epoch": 78.95522388059702, "grad_norm": 23.0878963470459, "learning_rate": 7.666666666666667e-06, "loss": 41.9249, "step": 3316 }, { "epoch": 78.97910447761194, "grad_norm": 29.851896286010742, "learning_rate": 7.663690476190478e-06, "loss": 42.2078, "step": 3317 }, { "epoch": 79.0, "grad_norm": 23.739883422851562, "learning_rate": 7.660714285714287e-06, "loss": 39.1357, "step": 3318 }, { "epoch": 79.02388059701492, "grad_norm": 23.394466400146484, "learning_rate": 7.657738095238096e-06, "loss": 43.7385, "step": 3319 }, { "epoch": 79.04776119402985, "grad_norm": 22.10674285888672, "learning_rate": 7.654761904761905e-06, "loss": 43.253, "step": 3320 }, { "epoch": 79.07164179104478, "grad_norm": 25.71041488647461, "learning_rate": 7.651785714285714e-06, "loss": 43.6012, "step": 3321 }, { "epoch": 79.0955223880597, "grad_norm": 18.054738998413086, "learning_rate": 7.648809523809523e-06, "loss": 42.5356, "step": 3322 }, { "epoch": 79.11940298507463, "grad_norm": 25.66161346435547, "learning_rate": 7.645833333333334e-06, "loss": 43.796, "step": 3323 }, { "epoch": 79.14328358208955, "grad_norm": 15.92872142791748, "learning_rate": 7.642857142857143e-06, "loss": 43.4924, "step": 3324 }, { "epoch": 79.16716417910447, "grad_norm": 26.33378791809082, "learning_rate": 7.639880952380952e-06, "loss": 41.9388, "step": 3325 }, { "epoch": 79.1910447761194, "grad_norm": 18.938690185546875, "learning_rate": 7.636904761904763e-06, "loss": 42.6458, "step": 3326 }, { "epoch": 79.21492537313434, "grad_norm": 21.968505859375, "learning_rate": 7.633928571428572e-06, "loss": 43.2856, "step": 3327 }, { "epoch": 79.23880597014926, "grad_norm": 21.652313232421875, "learning_rate": 7.630952380952381e-06, "loss": 43.1669, "step": 3328 }, { "epoch": 79.26268656716418, "grad_norm": 16.064531326293945, "learning_rate": 7.627976190476191e-06, "loss": 43.09, "step": 3329 }, { "epoch": 79.2865671641791, "grad_norm": 21.19333839416504, "learning_rate": 7.625e-06, "loss": 42.7371, "step": 3330 }, { "epoch": 79.31044776119403, "grad_norm": 19.381980895996094, "learning_rate": 7.62202380952381e-06, "loss": 43.129, "step": 3331 }, { "epoch": 79.33432835820895, "grad_norm": 17.10456085205078, "learning_rate": 7.61904761904762e-06, "loss": 42.795, "step": 3332 }, { "epoch": 79.35820895522389, "grad_norm": 18.57830810546875, "learning_rate": 7.616071428571429e-06, "loss": 43.1172, "step": 3333 }, { "epoch": 79.38208955223881, "grad_norm": 16.343597412109375, "learning_rate": 7.6130952380952386e-06, "loss": 44.1413, "step": 3334 }, { "epoch": 79.40597014925373, "grad_norm": 18.999656677246094, "learning_rate": 7.610119047619048e-06, "loss": 43.154, "step": 3335 }, { "epoch": 79.42985074626866, "grad_norm": 18.70110321044922, "learning_rate": 7.6071428571428575e-06, "loss": 43.2832, "step": 3336 }, { "epoch": 79.45373134328358, "grad_norm": 17.107995986938477, "learning_rate": 7.6041666666666666e-06, "loss": 42.6499, "step": 3337 }, { "epoch": 79.4776119402985, "grad_norm": 20.98540496826172, "learning_rate": 7.6011904761904765e-06, "loss": 42.6728, "step": 3338 }, { "epoch": 79.50149253731344, "grad_norm": 18.264223098754883, "learning_rate": 7.598214285714286e-06, "loss": 42.1924, "step": 3339 }, { "epoch": 79.52537313432836, "grad_norm": 22.478178024291992, "learning_rate": 7.595238095238095e-06, "loss": 43.1835, "step": 3340 }, { "epoch": 79.54925373134328, "grad_norm": 21.464313507080078, "learning_rate": 7.592261904761905e-06, "loss": 42.8992, "step": 3341 }, { "epoch": 79.57313432835821, "grad_norm": 23.627376556396484, "learning_rate": 7.589285714285714e-06, "loss": 43.3444, "step": 3342 }, { "epoch": 79.59701492537313, "grad_norm": 20.699804306030273, "learning_rate": 7.586309523809524e-06, "loss": 43.5294, "step": 3343 }, { "epoch": 79.62089552238805, "grad_norm": 27.1911678314209, "learning_rate": 7.583333333333333e-06, "loss": 42.3842, "step": 3344 }, { "epoch": 79.64477611940299, "grad_norm": 22.591445922851562, "learning_rate": 7.580357142857143e-06, "loss": 43.2132, "step": 3345 }, { "epoch": 79.66865671641791, "grad_norm": 23.79202651977539, "learning_rate": 7.577380952380953e-06, "loss": 42.7603, "step": 3346 }, { "epoch": 79.69253731343284, "grad_norm": 21.520214080810547, "learning_rate": 7.574404761904762e-06, "loss": 42.9868, "step": 3347 }, { "epoch": 79.71641791044776, "grad_norm": 21.92240333557129, "learning_rate": 7.571428571428572e-06, "loss": 42.5148, "step": 3348 }, { "epoch": 79.74029850746268, "grad_norm": 21.808698654174805, "learning_rate": 7.568452380952381e-06, "loss": 42.2734, "step": 3349 }, { "epoch": 79.7641791044776, "grad_norm": 21.703947067260742, "learning_rate": 7.565476190476191e-06, "loss": 43.9589, "step": 3350 }, { "epoch": 79.78805970149254, "grad_norm": 21.56643295288086, "learning_rate": 7.5625e-06, "loss": 42.249, "step": 3351 }, { "epoch": 79.81194029850747, "grad_norm": 20.325498580932617, "learning_rate": 7.55952380952381e-06, "loss": 42.5246, "step": 3352 }, { "epoch": 79.83582089552239, "grad_norm": 20.19651985168457, "learning_rate": 7.55654761904762e-06, "loss": 43.1353, "step": 3353 }, { "epoch": 79.85970149253731, "grad_norm": 15.062832832336426, "learning_rate": 7.553571428571429e-06, "loss": 42.7335, "step": 3354 }, { "epoch": 79.88358208955223, "grad_norm": 21.990650177001953, "learning_rate": 7.550595238095239e-06, "loss": 44.103, "step": 3355 }, { "epoch": 79.90746268656716, "grad_norm": 17.816457748413086, "learning_rate": 7.547619047619048e-06, "loss": 43.592, "step": 3356 }, { "epoch": 79.9313432835821, "grad_norm": 21.62665557861328, "learning_rate": 7.544642857142858e-06, "loss": 44.0372, "step": 3357 }, { "epoch": 79.95522388059702, "grad_norm": 20.444469451904297, "learning_rate": 7.541666666666667e-06, "loss": 42.7547, "step": 3358 }, { "epoch": 79.97910447761194, "grad_norm": 15.230064392089844, "learning_rate": 7.538690476190477e-06, "loss": 42.4287, "step": 3359 }, { "epoch": 80.0, "grad_norm": 18.977619171142578, "learning_rate": 7.5357142857142865e-06, "loss": 36.8674, "step": 3360 }, { "epoch": 80.0, "step": 3360, "total_flos": 1.6516474192825325e+17, "train_loss": 10.921977708453223, "train_runtime": 25778.6818, "train_samples_per_second": 16.609, "train_steps_per_second": 0.13 }, { "epoch": 80.02388059701492, "grad_norm": 20.951553344726562, "learning_rate": 1e-05, "loss": 42.8953, "step": 3361 }, { "epoch": 80.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997354497354498e-06, "loss": 49.2702, "step": 3362 }, { "epoch": 80.07164179104478, "grad_norm": 272.02093505859375, "learning_rate": 9.997354497354498e-06, "loss": 48.7639, "step": 3363 }, { "epoch": 80.0955223880597, "grad_norm": 136.40426635742188, "learning_rate": 9.994708994708996e-06, "loss": 48.2845, "step": 3364 }, { "epoch": 80.11940298507463, "grad_norm": 69.2103500366211, "learning_rate": 9.992063492063493e-06, "loss": 45.905, "step": 3365 }, { "epoch": 80.14328358208955, "grad_norm": 42.27269744873047, "learning_rate": 9.989417989417989e-06, "loss": 44.495, "step": 3366 }, { "epoch": 80.16716417910447, "grad_norm": 78.32905578613281, "learning_rate": 9.986772486772488e-06, "loss": 43.787, "step": 3367 }, { "epoch": 80.1910447761194, "grad_norm": 53.60576248168945, "learning_rate": 9.984126984126986e-06, "loss": 44.9412, "step": 3368 }, { "epoch": 80.21492537313434, "grad_norm": 43.58672332763672, "learning_rate": 9.981481481481482e-06, "loss": 43.5559, "step": 3369 }, { "epoch": 80.23880597014926, "grad_norm": 52.74037170410156, "learning_rate": 9.97883597883598e-06, "loss": 43.7715, "step": 3370 }, { "epoch": 80.26268656716418, "grad_norm": 36.5859260559082, "learning_rate": 9.976190476190477e-06, "loss": 44.8368, "step": 3371 }, { "epoch": 80.2865671641791, "grad_norm": 41.1060676574707, "learning_rate": 9.973544973544974e-06, "loss": 44.2442, "step": 3372 }, { "epoch": 80.31044776119403, "grad_norm": 29.22023582458496, "learning_rate": 9.970899470899472e-06, "loss": 44.9361, "step": 3373 }, { "epoch": 80.33432835820895, "grad_norm": 23.876710891723633, "learning_rate": 9.968253968253969e-06, "loss": 43.0819, "step": 3374 }, { "epoch": 80.35820895522389, "grad_norm": 29.575992584228516, "learning_rate": 9.965608465608467e-06, "loss": 43.4547, "step": 3375 }, { "epoch": 80.38208955223881, "grad_norm": 30.555126190185547, "learning_rate": 9.962962962962964e-06, "loss": 42.7816, "step": 3376 }, { "epoch": 80.40597014925373, "grad_norm": 22.153589248657227, "learning_rate": 9.960317460317462e-06, "loss": 43.225, "step": 3377 }, { "epoch": 80.42985074626866, "grad_norm": 22.4864501953125, "learning_rate": 9.957671957671959e-06, "loss": 44.3476, "step": 3378 }, { "epoch": 80.45373134328358, "grad_norm": 28.664342880249023, "learning_rate": 9.955026455026457e-06, "loss": 43.8263, "step": 3379 }, { "epoch": 80.4776119402985, "grad_norm": 20.183809280395508, "learning_rate": 9.952380952380954e-06, "loss": 43.0054, "step": 3380 }, { "epoch": 80.50149253731344, "grad_norm": 20.122495651245117, "learning_rate": 9.94973544973545e-06, "loss": 42.8467, "step": 3381 }, { "epoch": 80.52537313432836, "grad_norm": 18.21672821044922, "learning_rate": 9.947089947089947e-06, "loss": 43.1002, "step": 3382 }, { "epoch": 80.54925373134328, "grad_norm": 19.279260635375977, "learning_rate": 9.944444444444445e-06, "loss": 43.057, "step": 3383 }, { "epoch": 80.57313432835821, "grad_norm": 16.66730308532715, "learning_rate": 9.941798941798942e-06, "loss": 41.9396, "step": 3384 }, { "epoch": 80.59701492537313, "grad_norm": 23.94289779663086, "learning_rate": 9.93915343915344e-06, "loss": 41.9997, "step": 3385 }, { "epoch": 80.62089552238805, "grad_norm": 19.543209075927734, "learning_rate": 9.936507936507937e-06, "loss": 43.4446, "step": 3386 }, { "epoch": 80.64477611940299, "grad_norm": 16.7114315032959, "learning_rate": 9.933862433862435e-06, "loss": 42.8548, "step": 3387 }, { "epoch": 80.66865671641791, "grad_norm": 14.687740325927734, "learning_rate": 9.931216931216932e-06, "loss": 43.4851, "step": 3388 }, { "epoch": 80.69253731343284, "grad_norm": 20.930234909057617, "learning_rate": 9.92857142857143e-06, "loss": 43.216, "step": 3389 }, { "epoch": 80.71641791044776, "grad_norm": 18.500185012817383, "learning_rate": 9.925925925925927e-06, "loss": 43.32, "step": 3390 }, { "epoch": 80.74029850746268, "grad_norm": 17.255064010620117, "learning_rate": 9.923280423280423e-06, "loss": 41.8527, "step": 3391 }, { "epoch": 80.7641791044776, "grad_norm": 23.286033630371094, "learning_rate": 9.920634920634922e-06, "loss": 42.4732, "step": 3392 }, { "epoch": 80.78805970149254, "grad_norm": 21.66954803466797, "learning_rate": 9.917989417989418e-06, "loss": 43.0689, "step": 3393 }, { "epoch": 80.81194029850747, "grad_norm": 15.510072708129883, "learning_rate": 9.915343915343916e-06, "loss": 42.6028, "step": 3394 }, { "epoch": 80.83582089552239, "grad_norm": 17.338539123535156, "learning_rate": 9.912698412698413e-06, "loss": 43.066, "step": 3395 }, { "epoch": 80.85970149253731, "grad_norm": 28.546316146850586, "learning_rate": 9.91005291005291e-06, "loss": 42.7705, "step": 3396 }, { "epoch": 80.88358208955223, "grad_norm": 21.883974075317383, "learning_rate": 9.907407407407408e-06, "loss": 42.3245, "step": 3397 }, { "epoch": 80.90746268656716, "grad_norm": 23.212677001953125, "learning_rate": 9.904761904761906e-06, "loss": 43.1431, "step": 3398 }, { "epoch": 80.9313432835821, "grad_norm": 19.58159828186035, "learning_rate": 9.902116402116403e-06, "loss": 43.5287, "step": 3399 }, { "epoch": 80.95522388059702, "grad_norm": 26.139862060546875, "learning_rate": 9.8994708994709e-06, "loss": 42.9908, "step": 3400 }, { "epoch": 80.97910447761194, "grad_norm": 16.672977447509766, "learning_rate": 9.896825396825398e-06, "loss": 42.1315, "step": 3401 }, { "epoch": 81.0, "grad_norm": 24.852455139160156, "learning_rate": 9.894179894179896e-06, "loss": 36.8278, "step": 3402 }, { "epoch": 81.02388059701492, "grad_norm": 22.26006317138672, "learning_rate": 9.891534391534391e-06, "loss": 42.4729, "step": 3403 }, { "epoch": 81.04776119402985, "grad_norm": 16.017719268798828, "learning_rate": 9.88888888888889e-06, "loss": 42.9225, "step": 3404 }, { "epoch": 81.07164179104478, "grad_norm": 28.550519943237305, "learning_rate": 9.886243386243386e-06, "loss": 42.6745, "step": 3405 }, { "epoch": 81.0955223880597, "grad_norm": 23.507572174072266, "learning_rate": 9.883597883597884e-06, "loss": 42.0028, "step": 3406 }, { "epoch": 81.11940298507463, "grad_norm": 21.06671905517578, "learning_rate": 9.880952380952381e-06, "loss": 43.0596, "step": 3407 }, { "epoch": 81.14328358208955, "grad_norm": 30.52378273010254, "learning_rate": 9.878306878306879e-06, "loss": 42.6651, "step": 3408 }, { "epoch": 81.16716417910447, "grad_norm": 20.8646183013916, "learning_rate": 9.875661375661376e-06, "loss": 42.5492, "step": 3409 }, { "epoch": 81.1910447761194, "grad_norm": 24.76753044128418, "learning_rate": 9.873015873015874e-06, "loss": 44.1658, "step": 3410 }, { "epoch": 81.21492537313434, "grad_norm": 24.59670066833496, "learning_rate": 9.870370370370371e-06, "loss": 41.993, "step": 3411 }, { "epoch": 81.23880597014926, "grad_norm": 18.1619815826416, "learning_rate": 9.867724867724869e-06, "loss": 41.729, "step": 3412 }, { "epoch": 81.26268656716418, "grad_norm": 25.726171493530273, "learning_rate": 9.865079365079366e-06, "loss": 43.4774, "step": 3413 }, { "epoch": 81.2865671641791, "grad_norm": 19.582408905029297, "learning_rate": 9.862433862433864e-06, "loss": 44.2081, "step": 3414 }, { "epoch": 81.31044776119403, "grad_norm": 19.20425033569336, "learning_rate": 9.85978835978836e-06, "loss": 45.2273, "step": 3415 }, { "epoch": 81.33432835820895, "grad_norm": 24.18745994567871, "learning_rate": 9.857142857142859e-06, "loss": 43.2535, "step": 3416 }, { "epoch": 81.35820895522389, "grad_norm": 20.09618377685547, "learning_rate": 9.854497354497355e-06, "loss": 42.837, "step": 3417 }, { "epoch": 81.38208955223881, "grad_norm": 18.357542037963867, "learning_rate": 9.851851851851852e-06, "loss": 42.3722, "step": 3418 }, { "epoch": 81.40597014925373, "grad_norm": 21.53424644470215, "learning_rate": 9.849206349206351e-06, "loss": 42.6014, "step": 3419 }, { "epoch": 81.42985074626866, "grad_norm": 23.138153076171875, "learning_rate": 9.846560846560847e-06, "loss": 43.1802, "step": 3420 }, { "epoch": 81.45373134328358, "grad_norm": NaN, "learning_rate": 9.843915343915345e-06, "loss": 60.8525, "step": 3421 }, { "epoch": 81.4776119402985, "grad_norm": 16.697940826416016, "learning_rate": 9.843915343915345e-06, "loss": 42.6524, "step": 3422 }, { "epoch": 81.50149253731344, "grad_norm": 21.829591751098633, "learning_rate": 9.841269841269842e-06, "loss": 42.8111, "step": 3423 }, { "epoch": 81.52537313432836, "grad_norm": 24.891218185424805, "learning_rate": 9.83862433862434e-06, "loss": 43.6078, "step": 3424 }, { "epoch": 81.54925373134328, "grad_norm": 21.53104019165039, "learning_rate": 9.835978835978837e-06, "loss": 42.8522, "step": 3425 }, { "epoch": 81.57313432835821, "grad_norm": 24.85852813720703, "learning_rate": 9.833333333333333e-06, "loss": 42.5736, "step": 3426 }, { "epoch": 81.59701492537313, "grad_norm": 25.954561233520508, "learning_rate": 9.830687830687832e-06, "loss": 42.513, "step": 3427 }, { "epoch": 81.62089552238805, "grad_norm": 18.79954719543457, "learning_rate": 9.828042328042328e-06, "loss": 42.4569, "step": 3428 }, { "epoch": 81.64477611940299, "grad_norm": 21.777231216430664, "learning_rate": 9.825396825396825e-06, "loss": 41.9235, "step": 3429 }, { "epoch": 81.66865671641791, "grad_norm": 20.84613037109375, "learning_rate": 9.822751322751325e-06, "loss": 43.7221, "step": 3430 }, { "epoch": 81.69253731343284, "grad_norm": 25.095165252685547, "learning_rate": 9.82010582010582e-06, "loss": 43.7676, "step": 3431 }, { "epoch": 81.71641791044776, "grad_norm": 20.732393264770508, "learning_rate": 9.817460317460318e-06, "loss": 42.3845, "step": 3432 }, { "epoch": 81.74029850746268, "grad_norm": NaN, "learning_rate": 9.814814814814815e-06, "loss": 42.1237, "step": 3433 }, { "epoch": 81.7641791044776, "grad_norm": 33.96809768676758, "learning_rate": 9.814814814814815e-06, "loss": 43.6781, "step": 3434 }, { "epoch": 81.78805970149254, "grad_norm": 20.83742904663086, "learning_rate": 9.812169312169313e-06, "loss": 43.1676, "step": 3435 }, { "epoch": 81.81194029850747, "grad_norm": 37.817081451416016, "learning_rate": 9.80952380952381e-06, "loss": 42.5989, "step": 3436 }, { "epoch": 81.83582089552239, "grad_norm": 26.07498550415039, "learning_rate": 9.806878306878308e-06, "loss": 43.613, "step": 3437 }, { "epoch": 81.85970149253731, "grad_norm": 32.35169982910156, "learning_rate": 9.804232804232805e-06, "loss": 43.4166, "step": 3438 }, { "epoch": 81.88358208955223, "grad_norm": 23.49301528930664, "learning_rate": 9.801587301587301e-06, "loss": 40.9932, "step": 3439 }, { "epoch": 81.90746268656716, "grad_norm": 28.475976943969727, "learning_rate": 9.7989417989418e-06, "loss": 44.0779, "step": 3440 }, { "epoch": 81.9313432835821, "grad_norm": 20.77143669128418, "learning_rate": 9.796296296296298e-06, "loss": 43.0358, "step": 3441 }, { "epoch": 81.95522388059702, "grad_norm": 27.558744430541992, "learning_rate": 9.793650793650794e-06, "loss": 42.6501, "step": 3442 }, { "epoch": 81.97910447761194, "grad_norm": 17.57852554321289, "learning_rate": 9.791005291005293e-06, "loss": 43.0594, "step": 3443 }, { "epoch": 82.0, "grad_norm": 30.414134979248047, "learning_rate": 9.788359788359789e-06, "loss": 37.7772, "step": 3444 }, { "epoch": 82.02388059701492, "grad_norm": 29.184572219848633, "learning_rate": 9.785714285714286e-06, "loss": 43.0878, "step": 3445 }, { "epoch": 82.04776119402985, "grad_norm": 24.36541748046875, "learning_rate": 9.783068783068784e-06, "loss": 43.1851, "step": 3446 }, { "epoch": 82.07164179104478, "grad_norm": 24.232807159423828, "learning_rate": 9.780423280423281e-06, "loss": 43.4104, "step": 3447 }, { "epoch": 82.0955223880597, "grad_norm": 29.002002716064453, "learning_rate": 9.777777777777779e-06, "loss": 44.6274, "step": 3448 }, { "epoch": 82.11940298507463, "grad_norm": 22.997961044311523, "learning_rate": 9.775132275132276e-06, "loss": 43.2128, "step": 3449 }, { "epoch": 82.14328358208955, "grad_norm": 26.34942626953125, "learning_rate": 9.772486772486774e-06, "loss": 42.6116, "step": 3450 }, { "epoch": 82.16716417910447, "grad_norm": 19.555774688720703, "learning_rate": 9.769841269841271e-06, "loss": 42.9207, "step": 3451 }, { "epoch": 82.1910447761194, "grad_norm": 25.108083724975586, "learning_rate": 9.767195767195769e-06, "loss": 41.7188, "step": 3452 }, { "epoch": 82.21492537313434, "grad_norm": 20.387653350830078, "learning_rate": 9.764550264550266e-06, "loss": 42.8712, "step": 3453 }, { "epoch": 82.23880597014926, "grad_norm": 24.493921279907227, "learning_rate": 9.761904761904762e-06, "loss": 43.6475, "step": 3454 }, { "epoch": 82.26268656716418, "grad_norm": 23.366165161132812, "learning_rate": 9.759259259259261e-06, "loss": 42.5025, "step": 3455 }, { "epoch": 82.2865671641791, "grad_norm": 25.831466674804688, "learning_rate": 9.756613756613757e-06, "loss": 44.1183, "step": 3456 }, { "epoch": 82.31044776119403, "grad_norm": 20.5382137298584, "learning_rate": 9.753968253968254e-06, "loss": 42.0874, "step": 3457 }, { "epoch": 82.33432835820895, "grad_norm": 23.923063278198242, "learning_rate": 9.751322751322752e-06, "loss": 44.2198, "step": 3458 }, { "epoch": 82.35820895522389, "grad_norm": 21.77039909362793, "learning_rate": 9.74867724867725e-06, "loss": 42.8486, "step": 3459 }, { "epoch": 82.38208955223881, "grad_norm": 19.86173439025879, "learning_rate": 9.746031746031747e-06, "loss": 43.104, "step": 3460 }, { "epoch": 82.40597014925373, "grad_norm": 20.714754104614258, "learning_rate": 9.743386243386244e-06, "loss": 41.789, "step": 3461 }, { "epoch": 82.42985074626866, "grad_norm": 24.748607635498047, "learning_rate": 9.740740740740742e-06, "loss": 41.7835, "step": 3462 }, { "epoch": 82.45373134328358, "grad_norm": 19.247220993041992, "learning_rate": 9.73809523809524e-06, "loss": 42.3253, "step": 3463 }, { "epoch": 82.4776119402985, "grad_norm": 21.964488983154297, "learning_rate": 9.735449735449735e-06, "loss": 40.6579, "step": 3464 }, { "epoch": 82.50149253731344, "grad_norm": 19.75965118408203, "learning_rate": 9.732804232804234e-06, "loss": 42.2777, "step": 3465 }, { "epoch": 82.52537313432836, "grad_norm": 19.871715545654297, "learning_rate": 9.73015873015873e-06, "loss": 41.8654, "step": 3466 }, { "epoch": 82.54925373134328, "grad_norm": 17.353679656982422, "learning_rate": 9.727513227513228e-06, "loss": 43.1572, "step": 3467 }, { "epoch": 82.57313432835821, "grad_norm": 22.952226638793945, "learning_rate": 9.724867724867725e-06, "loss": 42.2348, "step": 3468 }, { "epoch": 82.59701492537313, "grad_norm": 19.62160873413086, "learning_rate": 9.722222222222223e-06, "loss": 43.7133, "step": 3469 }, { "epoch": 82.62089552238805, "grad_norm": NaN, "learning_rate": 9.71957671957672e-06, "loss": 44.3913, "step": 3470 }, { "epoch": 82.64477611940299, "grad_norm": 22.301387786865234, "learning_rate": 9.71957671957672e-06, "loss": 42.7776, "step": 3471 }, { "epoch": 82.66865671641791, "grad_norm": 23.42523956298828, "learning_rate": 9.716931216931218e-06, "loss": 43.9875, "step": 3472 }, { "epoch": 82.69253731343284, "grad_norm": 19.187870025634766, "learning_rate": 9.714285714285715e-06, "loss": 43.6333, "step": 3473 }, { "epoch": 82.71641791044776, "grad_norm": 17.408340454101562, "learning_rate": 9.711640211640213e-06, "loss": 42.3257, "step": 3474 }, { "epoch": 82.74029850746268, "grad_norm": 17.102418899536133, "learning_rate": 9.70899470899471e-06, "loss": 41.7486, "step": 3475 }, { "epoch": 82.7641791044776, "grad_norm": 17.715524673461914, "learning_rate": 9.706349206349208e-06, "loss": 43.9781, "step": 3476 }, { "epoch": 82.78805970149254, "grad_norm": 22.915067672729492, "learning_rate": 9.703703703703703e-06, "loss": 43.0049, "step": 3477 }, { "epoch": 82.81194029850747, "grad_norm": 18.104154586791992, "learning_rate": 9.701058201058203e-06, "loss": 43.0062, "step": 3478 }, { "epoch": 82.83582089552239, "grad_norm": 14.81946086883545, "learning_rate": 9.698412698412698e-06, "loss": 42.0968, "step": 3479 }, { "epoch": 82.85970149253731, "grad_norm": 19.58578872680664, "learning_rate": 9.695767195767196e-06, "loss": 43.6563, "step": 3480 }, { "epoch": 82.88358208955223, "grad_norm": 17.979524612426758, "learning_rate": 9.693121693121693e-06, "loss": 41.9954, "step": 3481 }, { "epoch": 82.90746268656716, "grad_norm": 17.92389488220215, "learning_rate": 9.690476190476191e-06, "loss": 42.0242, "step": 3482 }, { "epoch": 82.9313432835821, "grad_norm": 22.026195526123047, "learning_rate": 9.687830687830688e-06, "loss": 43.2985, "step": 3483 }, { "epoch": 82.95522388059702, "grad_norm": 15.080731391906738, "learning_rate": 9.685185185185186e-06, "loss": 42.8814, "step": 3484 }, { "epoch": 82.97910447761194, "grad_norm": 23.170284271240234, "learning_rate": 9.682539682539683e-06, "loss": 42.4875, "step": 3485 }, { "epoch": 83.0, "grad_norm": 15.19926929473877, "learning_rate": 9.679894179894181e-06, "loss": 38.3047, "step": 3486 }, { "epoch": 83.02388059701492, "grad_norm": 20.842618942260742, "learning_rate": 9.677248677248678e-06, "loss": 41.9214, "step": 3487 }, { "epoch": 83.04776119402985, "grad_norm": 19.11284637451172, "learning_rate": 9.674603174603176e-06, "loss": 43.2375, "step": 3488 }, { "epoch": 83.07164179104478, "grad_norm": 19.39193344116211, "learning_rate": 9.671957671957672e-06, "loss": 43.5418, "step": 3489 }, { "epoch": 83.0955223880597, "grad_norm": 19.154869079589844, "learning_rate": 9.669312169312171e-06, "loss": 42.4917, "step": 3490 }, { "epoch": 83.11940298507463, "grad_norm": 27.682418823242188, "learning_rate": 9.666666666666667e-06, "loss": 43.22, "step": 3491 }, { "epoch": 83.14328358208955, "grad_norm": 19.741304397583008, "learning_rate": 9.664021164021164e-06, "loss": 42.6503, "step": 3492 }, { "epoch": 83.16716417910447, "grad_norm": 23.25188446044922, "learning_rate": 9.661375661375663e-06, "loss": 42.7449, "step": 3493 }, { "epoch": 83.1910447761194, "grad_norm": 25.500925064086914, "learning_rate": 9.65873015873016e-06, "loss": 43.8239, "step": 3494 }, { "epoch": 83.21492537313434, "grad_norm": 22.653488159179688, "learning_rate": 9.656084656084657e-06, "loss": 42.4962, "step": 3495 }, { "epoch": 83.23880597014926, "grad_norm": 21.660871505737305, "learning_rate": 9.653439153439154e-06, "loss": 44.1403, "step": 3496 }, { "epoch": 83.26268656716418, "grad_norm": 24.922666549682617, "learning_rate": 9.650793650793652e-06, "loss": 42.4295, "step": 3497 }, { "epoch": 83.2865671641791, "grad_norm": 20.24859619140625, "learning_rate": 9.64814814814815e-06, "loss": 41.7125, "step": 3498 }, { "epoch": 83.31044776119403, "grad_norm": 16.770278930664062, "learning_rate": 9.645502645502647e-06, "loss": 43.0386, "step": 3499 }, { "epoch": 83.33432835820895, "grad_norm": 20.553585052490234, "learning_rate": 9.642857142857144e-06, "loss": 43.2005, "step": 3500 }, { "epoch": 83.35820895522389, "grad_norm": 22.309749603271484, "learning_rate": 9.64021164021164e-06, "loss": 43.9736, "step": 3501 }, { "epoch": 83.38208955223881, "grad_norm": 16.99924659729004, "learning_rate": 9.63756613756614e-06, "loss": 42.9804, "step": 3502 }, { "epoch": 83.40597014925373, "grad_norm": 17.541120529174805, "learning_rate": 9.634920634920637e-06, "loss": 41.9332, "step": 3503 }, { "epoch": 83.42985074626866, "grad_norm": 19.222923278808594, "learning_rate": 9.632275132275132e-06, "loss": 43.163, "step": 3504 }, { "epoch": 83.45373134328358, "grad_norm": 23.178749084472656, "learning_rate": 9.62962962962963e-06, "loss": 41.4791, "step": 3505 }, { "epoch": 83.4776119402985, "grad_norm": 24.103410720825195, "learning_rate": 9.626984126984127e-06, "loss": 43.5107, "step": 3506 }, { "epoch": 83.50149253731344, "grad_norm": 16.439075469970703, "learning_rate": 9.624338624338625e-06, "loss": 43.6286, "step": 3507 }, { "epoch": 83.52537313432836, "grad_norm": 29.148473739624023, "learning_rate": 9.621693121693122e-06, "loss": 44.0076, "step": 3508 }, { "epoch": 83.54925373134328, "grad_norm": 23.33673667907715, "learning_rate": 9.61904761904762e-06, "loss": 42.0299, "step": 3509 }, { "epoch": 83.57313432835821, "grad_norm": 20.69951820373535, "learning_rate": 9.616402116402117e-06, "loss": 41.9305, "step": 3510 }, { "epoch": 83.59701492537313, "grad_norm": 28.55817413330078, "learning_rate": 9.613756613756613e-06, "loss": 42.112, "step": 3511 }, { "epoch": 83.62089552238805, "grad_norm": 20.63089942932129, "learning_rate": 9.611111111111112e-06, "loss": 42.5737, "step": 3512 }, { "epoch": 83.64477611940299, "grad_norm": 18.186328887939453, "learning_rate": 9.60846560846561e-06, "loss": 42.6654, "step": 3513 }, { "epoch": 83.66865671641791, "grad_norm": 30.312583923339844, "learning_rate": 9.605820105820106e-06, "loss": 41.6198, "step": 3514 }, { "epoch": 83.69253731343284, "grad_norm": 22.397600173950195, "learning_rate": 9.603174603174605e-06, "loss": 43.7027, "step": 3515 }, { "epoch": 83.71641791044776, "grad_norm": 22.637603759765625, "learning_rate": 9.6005291005291e-06, "loss": 43.3998, "step": 3516 }, { "epoch": 83.74029850746268, "grad_norm": 24.366125106811523, "learning_rate": 9.597883597883598e-06, "loss": 42.6906, "step": 3517 }, { "epoch": 83.7641791044776, "grad_norm": 21.425613403320312, "learning_rate": 9.595238095238096e-06, "loss": 42.7173, "step": 3518 }, { "epoch": 83.78805970149254, "grad_norm": 18.075485229492188, "learning_rate": 9.592592592592593e-06, "loss": 42.9601, "step": 3519 }, { "epoch": 83.81194029850747, "grad_norm": 19.24690818786621, "learning_rate": 9.58994708994709e-06, "loss": 41.9579, "step": 3520 }, { "epoch": 83.83582089552239, "grad_norm": 21.10234832763672, "learning_rate": 9.587301587301588e-06, "loss": 42.6078, "step": 3521 }, { "epoch": 83.85970149253731, "grad_norm": 21.595741271972656, "learning_rate": 9.584656084656086e-06, "loss": 43.3926, "step": 3522 }, { "epoch": 83.88358208955223, "grad_norm": 14.618033409118652, "learning_rate": 9.582010582010583e-06, "loss": 42.7237, "step": 3523 }, { "epoch": 83.90746268656716, "grad_norm": 18.805774688720703, "learning_rate": 9.57936507936508e-06, "loss": 43.6884, "step": 3524 }, { "epoch": 83.9313432835821, "grad_norm": 17.666229248046875, "learning_rate": 9.576719576719578e-06, "loss": 43.3069, "step": 3525 }, { "epoch": 83.95522388059702, "grad_norm": NaN, "learning_rate": 9.574074074074074e-06, "loss": 47.9701, "step": 3526 }, { "epoch": 83.97910447761194, "grad_norm": 18.41876792907715, "learning_rate": 9.574074074074074e-06, "loss": 42.0814, "step": 3527 }, { "epoch": 84.0, "grad_norm": 14.344976425170898, "learning_rate": 9.571428571428573e-06, "loss": 36.702, "step": 3528 }, { "epoch": 84.02388059701492, "grad_norm": 19.47123146057129, "learning_rate": 9.568783068783069e-06, "loss": 43.0682, "step": 3529 }, { "epoch": 84.04776119402985, "grad_norm": 18.708087921142578, "learning_rate": 9.566137566137567e-06, "loss": 43.4093, "step": 3530 }, { "epoch": 84.07164179104478, "grad_norm": 21.98741340637207, "learning_rate": 9.563492063492064e-06, "loss": 42.619, "step": 3531 }, { "epoch": 84.0955223880597, "grad_norm": 21.4478816986084, "learning_rate": 9.560846560846561e-06, "loss": 43.3145, "step": 3532 }, { "epoch": 84.11940298507463, "grad_norm": 21.093963623046875, "learning_rate": 9.558201058201059e-06, "loss": 43.1162, "step": 3533 }, { "epoch": 84.14328358208955, "grad_norm": 18.37552833557129, "learning_rate": 9.555555555555556e-06, "loss": 42.4734, "step": 3534 }, { "epoch": 84.16716417910447, "grad_norm": 13.956351280212402, "learning_rate": 9.552910052910054e-06, "loss": 42.4351, "step": 3535 }, { "epoch": 84.1910447761194, "grad_norm": 20.104270935058594, "learning_rate": 9.550264550264551e-06, "loss": 43.2507, "step": 3536 }, { "epoch": 84.21492537313434, "grad_norm": 20.69384002685547, "learning_rate": 9.547619047619049e-06, "loss": 42.8764, "step": 3537 }, { "epoch": 84.23880597014926, "grad_norm": 26.53329086303711, "learning_rate": 9.544973544973546e-06, "loss": 42.4139, "step": 3538 }, { "epoch": 84.26268656716418, "grad_norm": 11.859530448913574, "learning_rate": 9.542328042328042e-06, "loss": 42.4525, "step": 3539 }, { "epoch": 84.2865671641791, "grad_norm": 22.784093856811523, "learning_rate": 9.539682539682541e-06, "loss": 42.6754, "step": 3540 }, { "epoch": 84.31044776119403, "grad_norm": 22.02342987060547, "learning_rate": 9.537037037037037e-06, "loss": 42.8119, "step": 3541 }, { "epoch": 84.33432835820895, "grad_norm": 16.383922576904297, "learning_rate": 9.534391534391535e-06, "loss": 41.9982, "step": 3542 }, { "epoch": 84.35820895522389, "grad_norm": 18.745128631591797, "learning_rate": 9.531746031746032e-06, "loss": 43.0496, "step": 3543 }, { "epoch": 84.38208955223881, "grad_norm": 33.664764404296875, "learning_rate": 9.52910052910053e-06, "loss": 42.5116, "step": 3544 }, { "epoch": 84.40597014925373, "grad_norm": 18.74268341064453, "learning_rate": 9.526455026455027e-06, "loss": 43.642, "step": 3545 }, { "epoch": 84.42985074626866, "grad_norm": 30.136598587036133, "learning_rate": 9.523809523809525e-06, "loss": 42.8695, "step": 3546 }, { "epoch": 84.45373134328358, "grad_norm": 22.268802642822266, "learning_rate": 9.521164021164022e-06, "loss": 42.6697, "step": 3547 }, { "epoch": 84.4776119402985, "grad_norm": 22.149734497070312, "learning_rate": 9.51851851851852e-06, "loss": 43.0171, "step": 3548 }, { "epoch": 84.50149253731344, "grad_norm": 33.4512825012207, "learning_rate": 9.515873015873016e-06, "loss": 43.2588, "step": 3549 }, { "epoch": 84.52537313432836, "grad_norm": 22.5905704498291, "learning_rate": 9.513227513227515e-06, "loss": 43.2581, "step": 3550 }, { "epoch": 84.54925373134328, "grad_norm": 38.85606384277344, "learning_rate": 9.51058201058201e-06, "loss": 42.2418, "step": 3551 }, { "epoch": 84.57313432835821, "grad_norm": 27.77952003479004, "learning_rate": 9.507936507936508e-06, "loss": 43.4077, "step": 3552 }, { "epoch": 84.59701492537313, "grad_norm": 46.536651611328125, "learning_rate": 9.505291005291006e-06, "loss": 42.1365, "step": 3553 }, { "epoch": 84.62089552238805, "grad_norm": 32.448482513427734, "learning_rate": 9.502645502645503e-06, "loss": 43.362, "step": 3554 }, { "epoch": 84.64477611940299, "grad_norm": 43.40568161010742, "learning_rate": 9.5e-06, "loss": 42.4134, "step": 3555 }, { "epoch": 84.66865671641791, "grad_norm": 44.625125885009766, "learning_rate": 9.497354497354498e-06, "loss": 42.3841, "step": 3556 }, { "epoch": 84.69253731343284, "grad_norm": 30.825876235961914, "learning_rate": 9.494708994708996e-06, "loss": 43.0973, "step": 3557 }, { "epoch": 84.71641791044776, "grad_norm": 32.886775970458984, "learning_rate": 9.492063492063493e-06, "loss": 42.6478, "step": 3558 }, { "epoch": 84.74029850746268, "grad_norm": 35.800621032714844, "learning_rate": 9.48941798941799e-06, "loss": 42.1319, "step": 3559 }, { "epoch": 84.7641791044776, "grad_norm": 27.23737907409668, "learning_rate": 9.486772486772488e-06, "loss": 42.0883, "step": 3560 }, { "epoch": 84.78805970149254, "grad_norm": 40.162166595458984, "learning_rate": 9.484126984126984e-06, "loss": 42.5786, "step": 3561 }, { "epoch": 84.81194029850747, "grad_norm": 32.1665153503418, "learning_rate": 9.481481481481483e-06, "loss": 41.7711, "step": 3562 }, { "epoch": 84.83582089552239, "grad_norm": 34.32803726196289, "learning_rate": 9.478835978835979e-06, "loss": 43.3193, "step": 3563 }, { "epoch": 84.85970149253731, "grad_norm": 34.155452728271484, "learning_rate": 9.476190476190476e-06, "loss": 43.2305, "step": 3564 }, { "epoch": 84.88358208955223, "grad_norm": 31.642534255981445, "learning_rate": 9.473544973544975e-06, "loss": 44.1911, "step": 3565 }, { "epoch": 84.90746268656716, "grad_norm": 33.413291931152344, "learning_rate": 9.470899470899471e-06, "loss": 41.0447, "step": 3566 }, { "epoch": 84.9313432835821, "grad_norm": 35.05025100708008, "learning_rate": 9.468253968253969e-06, "loss": 43.3446, "step": 3567 }, { "epoch": 84.95522388059702, "grad_norm": 28.946184158325195, "learning_rate": 9.465608465608466e-06, "loss": 42.4865, "step": 3568 }, { "epoch": 84.97910447761194, "grad_norm": 38.28304672241211, "learning_rate": 9.462962962962964e-06, "loss": 42.6287, "step": 3569 }, { "epoch": 85.0, "grad_norm": 27.42157745361328, "learning_rate": 9.460317460317461e-06, "loss": 37.3853, "step": 3570 }, { "epoch": 85.02388059701492, "grad_norm": 40.84228515625, "learning_rate": 9.457671957671959e-06, "loss": 43.8201, "step": 3571 }, { "epoch": 85.04776119402985, "grad_norm": 36.39906692504883, "learning_rate": 9.455026455026456e-06, "loss": 41.5254, "step": 3572 }, { "epoch": 85.07164179104478, "grad_norm": 29.795923233032227, "learning_rate": 9.452380952380952e-06, "loss": 42.4395, "step": 3573 }, { "epoch": 85.0955223880597, "grad_norm": 25.486753463745117, "learning_rate": 9.449735449735451e-06, "loss": 42.8189, "step": 3574 }, { "epoch": 85.11940298507463, "grad_norm": 37.790260314941406, "learning_rate": 9.447089947089949e-06, "loss": 42.8718, "step": 3575 }, { "epoch": 85.14328358208955, "grad_norm": 29.528602600097656, "learning_rate": 9.444444444444445e-06, "loss": 43.3714, "step": 3576 }, { "epoch": 85.16716417910447, "grad_norm": 37.71443176269531, "learning_rate": 9.441798941798944e-06, "loss": 42.4381, "step": 3577 }, { "epoch": 85.1910447761194, "grad_norm": 36.625, "learning_rate": 9.43915343915344e-06, "loss": 41.7229, "step": 3578 }, { "epoch": 85.21492537313434, "grad_norm": 30.335342407226562, "learning_rate": 9.436507936507937e-06, "loss": 41.8887, "step": 3579 }, { "epoch": 85.23880597014926, "grad_norm": 24.375329971313477, "learning_rate": 9.433862433862435e-06, "loss": 42.6894, "step": 3580 }, { "epoch": 85.26268656716418, "grad_norm": 34.27681350708008, "learning_rate": 9.431216931216932e-06, "loss": 42.4825, "step": 3581 }, { "epoch": 85.2865671641791, "grad_norm": 27.515474319458008, "learning_rate": 9.42857142857143e-06, "loss": 41.3011, "step": 3582 }, { "epoch": 85.31044776119403, "grad_norm": 39.355350494384766, "learning_rate": 9.425925925925925e-06, "loss": 42.1456, "step": 3583 }, { "epoch": 85.33432835820895, "grad_norm": 34.957523345947266, "learning_rate": 9.423280423280425e-06, "loss": 42.9466, "step": 3584 }, { "epoch": 85.35820895522389, "grad_norm": 30.264474868774414, "learning_rate": 9.420634920634922e-06, "loss": 42.5819, "step": 3585 }, { "epoch": 85.38208955223881, "grad_norm": 27.88845443725586, "learning_rate": 9.417989417989418e-06, "loss": 41.4037, "step": 3586 }, { "epoch": 85.40597014925373, "grad_norm": 30.240957260131836, "learning_rate": 9.415343915343917e-06, "loss": 44.7681, "step": 3587 }, { "epoch": 85.42985074626866, "grad_norm": 23.867399215698242, "learning_rate": 9.412698412698413e-06, "loss": 41.2699, "step": 3588 }, { "epoch": 85.45373134328358, "grad_norm": 39.2992057800293, "learning_rate": 9.41005291005291e-06, "loss": 42.2639, "step": 3589 }, { "epoch": 85.4776119402985, "grad_norm": 32.746673583984375, "learning_rate": 9.407407407407408e-06, "loss": 43.3612, "step": 3590 }, { "epoch": 85.50149253731344, "grad_norm": 33.791748046875, "learning_rate": 9.404761904761905e-06, "loss": 43.1554, "step": 3591 }, { "epoch": 85.52537313432836, "grad_norm": 35.11564254760742, "learning_rate": 9.402116402116403e-06, "loss": 43.4265, "step": 3592 }, { "epoch": 85.54925373134328, "grad_norm": 27.411352157592773, "learning_rate": 9.3994708994709e-06, "loss": 42.7959, "step": 3593 }, { "epoch": 85.57313432835821, "grad_norm": 27.369596481323242, "learning_rate": 9.396825396825398e-06, "loss": 44.1557, "step": 3594 }, { "epoch": 85.59701492537313, "grad_norm": 30.399707794189453, "learning_rate": 9.394179894179895e-06, "loss": 42.5034, "step": 3595 }, { "epoch": 85.62089552238805, "grad_norm": 24.180538177490234, "learning_rate": 9.391534391534393e-06, "loss": 42.256, "step": 3596 }, { "epoch": 85.64477611940299, "grad_norm": 35.2861328125, "learning_rate": 9.38888888888889e-06, "loss": 43.6244, "step": 3597 }, { "epoch": 85.66865671641791, "grad_norm": 28.855852127075195, "learning_rate": 9.386243386243386e-06, "loss": 43.046, "step": 3598 }, { "epoch": 85.69253731343284, "grad_norm": 33.648170471191406, "learning_rate": 9.383597883597885e-06, "loss": 42.0113, "step": 3599 }, { "epoch": 85.71641791044776, "grad_norm": 30.42345428466797, "learning_rate": 9.380952380952381e-06, "loss": 42.4, "step": 3600 }, { "epoch": 85.74029850746268, "grad_norm": 34.80357360839844, "learning_rate": 9.378306878306879e-06, "loss": 41.5381, "step": 3601 }, { "epoch": 85.7641791044776, "grad_norm": 29.07464027404785, "learning_rate": 9.375661375661376e-06, "loss": 43.5597, "step": 3602 }, { "epoch": 85.78805970149254, "grad_norm": 35.02674865722656, "learning_rate": 9.373015873015874e-06, "loss": 42.0479, "step": 3603 }, { "epoch": 85.81194029850747, "grad_norm": 29.9696044921875, "learning_rate": 9.370370370370371e-06, "loss": 42.6829, "step": 3604 }, { "epoch": 85.83582089552239, "grad_norm": 31.754671096801758, "learning_rate": 9.367724867724869e-06, "loss": 42.824, "step": 3605 }, { "epoch": 85.85970149253731, "grad_norm": 30.765913009643555, "learning_rate": 9.365079365079366e-06, "loss": 42.8912, "step": 3606 }, { "epoch": 85.88358208955223, "grad_norm": 33.485015869140625, "learning_rate": 9.362433862433864e-06, "loss": 42.7802, "step": 3607 }, { "epoch": 85.90746268656716, "grad_norm": 27.535614013671875, "learning_rate": 9.359788359788361e-06, "loss": 43.0182, "step": 3608 }, { "epoch": 85.9313432835821, "grad_norm": 28.8901309967041, "learning_rate": 9.357142857142859e-06, "loss": 43.1223, "step": 3609 }, { "epoch": 85.95522388059702, "grad_norm": 27.991809844970703, "learning_rate": 9.354497354497354e-06, "loss": 43.4701, "step": 3610 }, { "epoch": 85.97910447761194, "grad_norm": 33.3857536315918, "learning_rate": 9.351851851851854e-06, "loss": 43.115, "step": 3611 }, { "epoch": 86.0, "grad_norm": 25.59542465209961, "learning_rate": 9.34920634920635e-06, "loss": 37.2275, "step": 3612 }, { "epoch": 86.02388059701492, "grad_norm": 32.959251403808594, "learning_rate": 9.346560846560847e-06, "loss": 41.951, "step": 3613 }, { "epoch": 86.04776119402985, "grad_norm": 26.431245803833008, "learning_rate": 9.343915343915344e-06, "loss": 40.553, "step": 3614 }, { "epoch": 86.07164179104478, "grad_norm": 31.518957138061523, "learning_rate": 9.341269841269842e-06, "loss": 42.7644, "step": 3615 }, { "epoch": 86.0955223880597, "grad_norm": 30.063220977783203, "learning_rate": 9.33862433862434e-06, "loss": 41.5891, "step": 3616 }, { "epoch": 86.11940298507463, "grad_norm": 32.649227142333984, "learning_rate": 9.335978835978837e-06, "loss": 43.2927, "step": 3617 }, { "epoch": 86.14328358208955, "grad_norm": 28.61098861694336, "learning_rate": 9.333333333333334e-06, "loss": 42.4467, "step": 3618 }, { "epoch": 86.16716417910447, "grad_norm": 30.715784072875977, "learning_rate": 9.330687830687832e-06, "loss": 41.0933, "step": 3619 }, { "epoch": 86.1910447761194, "grad_norm": 27.99184799194336, "learning_rate": 9.32804232804233e-06, "loss": 42.218, "step": 3620 }, { "epoch": 86.21492537313434, "grad_norm": 32.13215255737305, "learning_rate": 9.325396825396827e-06, "loss": 42.4495, "step": 3621 }, { "epoch": 86.23880597014926, "grad_norm": 28.051559448242188, "learning_rate": 9.322751322751323e-06, "loss": 43.2451, "step": 3622 }, { "epoch": 86.26268656716418, "grad_norm": 33.464115142822266, "learning_rate": 9.32010582010582e-06, "loss": 43.6584, "step": 3623 }, { "epoch": 86.2865671641791, "grad_norm": 28.151817321777344, "learning_rate": 9.317460317460318e-06, "loss": 42.722, "step": 3624 }, { "epoch": 86.31044776119403, "grad_norm": NaN, "learning_rate": 9.314814814814815e-06, "loss": 37.1565, "step": 3625 }, { "epoch": 86.33432835820895, "grad_norm": 29.83131217956543, "learning_rate": 9.314814814814815e-06, "loss": 43.1632, "step": 3626 }, { "epoch": 86.35820895522389, "grad_norm": 24.83383560180664, "learning_rate": 9.312169312169313e-06, "loss": 43.5542, "step": 3627 }, { "epoch": 86.38208955223881, "grad_norm": 33.05693817138672, "learning_rate": 9.30952380952381e-06, "loss": 42.7432, "step": 3628 }, { "epoch": 86.40597014925373, "grad_norm": 24.293209075927734, "learning_rate": 9.306878306878308e-06, "loss": 42.9506, "step": 3629 }, { "epoch": 86.42985074626866, "grad_norm": 33.47346496582031, "learning_rate": 9.304232804232805e-06, "loss": 42.2723, "step": 3630 }, { "epoch": 86.45373134328358, "grad_norm": 28.47313117980957, "learning_rate": 9.301587301587303e-06, "loss": 43.7464, "step": 3631 }, { "epoch": 86.4776119402985, "grad_norm": 32.237510681152344, "learning_rate": 9.2989417989418e-06, "loss": 42.8704, "step": 3632 }, { "epoch": 86.50149253731344, "grad_norm": 25.842601776123047, "learning_rate": 9.296296296296296e-06, "loss": 41.6084, "step": 3633 }, { "epoch": 86.52537313432836, "grad_norm": 27.513307571411133, "learning_rate": 9.293650793650795e-06, "loss": 42.7922, "step": 3634 }, { "epoch": 86.54925373134328, "grad_norm": 25.676212310791016, "learning_rate": 9.291005291005291e-06, "loss": 42.0415, "step": 3635 }, { "epoch": 86.57313432835821, "grad_norm": 29.911081314086914, "learning_rate": 9.288359788359788e-06, "loss": 43.0526, "step": 3636 }, { "epoch": 86.59701492537313, "grad_norm": 21.788707733154297, "learning_rate": 9.285714285714288e-06, "loss": 42.2228, "step": 3637 }, { "epoch": 86.62089552238805, "grad_norm": 34.92530822753906, "learning_rate": 9.283068783068783e-06, "loss": 42.6756, "step": 3638 }, { "epoch": 86.64477611940299, "grad_norm": 31.41309928894043, "learning_rate": 9.280423280423281e-06, "loss": 43.3258, "step": 3639 }, { "epoch": 86.66865671641791, "grad_norm": 27.432342529296875, "learning_rate": 9.277777777777778e-06, "loss": 43.0612, "step": 3640 }, { "epoch": 86.69253731343284, "grad_norm": 25.92644691467285, "learning_rate": 9.275132275132276e-06, "loss": 41.7141, "step": 3641 }, { "epoch": 86.71641791044776, "grad_norm": 27.26793098449707, "learning_rate": 9.272486772486773e-06, "loss": 42.8127, "step": 3642 }, { "epoch": 86.74029850746268, "grad_norm": 22.45132827758789, "learning_rate": 9.26984126984127e-06, "loss": 43.4623, "step": 3643 }, { "epoch": 86.7641791044776, "grad_norm": 29.31770896911621, "learning_rate": 9.267195767195768e-06, "loss": 43.428, "step": 3644 }, { "epoch": 86.78805970149254, "grad_norm": 26.000110626220703, "learning_rate": 9.264550264550264e-06, "loss": 43.7773, "step": 3645 }, { "epoch": 86.81194029850747, "grad_norm": 30.51299476623535, "learning_rate": 9.261904761904763e-06, "loss": 43.2915, "step": 3646 }, { "epoch": 86.83582089552239, "grad_norm": 25.712812423706055, "learning_rate": 9.25925925925926e-06, "loss": 42.6203, "step": 3647 }, { "epoch": 86.85970149253731, "grad_norm": 32.85362243652344, "learning_rate": 9.256613756613757e-06, "loss": 42.1768, "step": 3648 }, { "epoch": 86.88358208955223, "grad_norm": 30.07919692993164, "learning_rate": 9.253968253968256e-06, "loss": 42.4139, "step": 3649 }, { "epoch": 86.90746268656716, "grad_norm": 31.38039779663086, "learning_rate": 9.251322751322752e-06, "loss": 42.23, "step": 3650 }, { "epoch": 86.9313432835821, "grad_norm": 26.601993560791016, "learning_rate": 9.248677248677249e-06, "loss": 42.2522, "step": 3651 }, { "epoch": 86.95522388059702, "grad_norm": 31.616823196411133, "learning_rate": 9.246031746031747e-06, "loss": 43.3183, "step": 3652 }, { "epoch": 86.97910447761194, "grad_norm": 25.606231689453125, "learning_rate": 9.243386243386244e-06, "loss": 42.7862, "step": 3653 }, { "epoch": 87.0, "grad_norm": 22.20980453491211, "learning_rate": 9.240740740740742e-06, "loss": 37.7077, "step": 3654 }, { "epoch": 87.02388059701492, "grad_norm": 24.519224166870117, "learning_rate": 9.238095238095239e-06, "loss": 42.4255, "step": 3655 }, { "epoch": 87.04776119402985, "grad_norm": 27.409582138061523, "learning_rate": 9.235449735449737e-06, "loss": 42.0198, "step": 3656 }, { "epoch": 87.07164179104478, "grad_norm": 20.307886123657227, "learning_rate": 9.232804232804234e-06, "loss": 41.6037, "step": 3657 }, { "epoch": 87.0955223880597, "grad_norm": 24.046375274658203, "learning_rate": 9.230158730158732e-06, "loss": 43.9297, "step": 3658 }, { "epoch": 87.11940298507463, "grad_norm": 23.58251953125, "learning_rate": 9.227513227513229e-06, "loss": 43.4211, "step": 3659 }, { "epoch": 87.14328358208955, "grad_norm": 20.67659568786621, "learning_rate": 9.224867724867725e-06, "loss": 42.7205, "step": 3660 }, { "epoch": 87.16716417910447, "grad_norm": 18.82547950744629, "learning_rate": 9.222222222222224e-06, "loss": 42.8921, "step": 3661 }, { "epoch": 87.1910447761194, "grad_norm": 21.20027732849121, "learning_rate": 9.21957671957672e-06, "loss": 41.0809, "step": 3662 }, { "epoch": 87.21492537313434, "grad_norm": 20.002410888671875, "learning_rate": 9.216931216931217e-06, "loss": 42.0559, "step": 3663 }, { "epoch": 87.23880597014926, "grad_norm": 16.792434692382812, "learning_rate": 9.214285714285715e-06, "loss": 40.659, "step": 3664 }, { "epoch": 87.26268656716418, "grad_norm": 20.209190368652344, "learning_rate": 9.211640211640212e-06, "loss": 42.387, "step": 3665 }, { "epoch": 87.2865671641791, "grad_norm": 17.87749481201172, "learning_rate": 9.20899470899471e-06, "loss": 41.6863, "step": 3666 }, { "epoch": 87.31044776119403, "grad_norm": 16.422809600830078, "learning_rate": 9.206349206349207e-06, "loss": 43.5165, "step": 3667 }, { "epoch": 87.33432835820895, "grad_norm": 17.762025833129883, "learning_rate": 9.203703703703705e-06, "loss": 41.3489, "step": 3668 }, { "epoch": 87.35820895522389, "grad_norm": 18.185434341430664, "learning_rate": 9.201058201058202e-06, "loss": 42.9896, "step": 3669 }, { "epoch": 87.38208955223881, "grad_norm": 15.573823928833008, "learning_rate": 9.198412698412698e-06, "loss": 42.5428, "step": 3670 }, { "epoch": 87.40597014925373, "grad_norm": 21.007041931152344, "learning_rate": 9.195767195767197e-06, "loss": 41.6825, "step": 3671 }, { "epoch": 87.42985074626866, "grad_norm": 21.610292434692383, "learning_rate": 9.193121693121693e-06, "loss": 42.8643, "step": 3672 }, { "epoch": 87.45373134328358, "grad_norm": 16.124156951904297, "learning_rate": 9.19047619047619e-06, "loss": 42.5377, "step": 3673 }, { "epoch": 87.4776119402985, "grad_norm": 22.14504051208496, "learning_rate": 9.187830687830688e-06, "loss": 42.878, "step": 3674 }, { "epoch": 87.50149253731344, "grad_norm": 17.33942222595215, "learning_rate": 9.185185185185186e-06, "loss": 44.3817, "step": 3675 }, { "epoch": 87.52537313432836, "grad_norm": 21.361644744873047, "learning_rate": 9.182539682539683e-06, "loss": 42.913, "step": 3676 }, { "epoch": 87.54925373134328, "grad_norm": 18.6135196685791, "learning_rate": 9.17989417989418e-06, "loss": 42.8328, "step": 3677 }, { "epoch": 87.57313432835821, "grad_norm": 23.618101119995117, "learning_rate": 9.177248677248678e-06, "loss": 42.4581, "step": 3678 }, { "epoch": 87.59701492537313, "grad_norm": 18.788637161254883, "learning_rate": 9.174603174603176e-06, "loss": 43.5344, "step": 3679 }, { "epoch": 87.62089552238805, "grad_norm": 17.69763946533203, "learning_rate": 9.171957671957673e-06, "loss": 42.8437, "step": 3680 }, { "epoch": 87.64477611940299, "grad_norm": 19.06989097595215, "learning_rate": 9.16931216931217e-06, "loss": 42.3788, "step": 3681 }, { "epoch": 87.66865671641791, "grad_norm": 18.462968826293945, "learning_rate": 9.166666666666666e-06, "loss": 42.759, "step": 3682 }, { "epoch": 87.69253731343284, "grad_norm": 21.524621963500977, "learning_rate": 9.164021164021166e-06, "loss": 43.1027, "step": 3683 }, { "epoch": 87.71641791044776, "grad_norm": 18.747453689575195, "learning_rate": 9.161375661375661e-06, "loss": 43.0803, "step": 3684 }, { "epoch": 87.74029850746268, "grad_norm": 21.170255661010742, "learning_rate": 9.158730158730159e-06, "loss": 42.641, "step": 3685 }, { "epoch": 87.7641791044776, "grad_norm": 19.89739990234375, "learning_rate": 9.156084656084656e-06, "loss": 42.5469, "step": 3686 }, { "epoch": 87.78805970149254, "grad_norm": 22.9807071685791, "learning_rate": 9.153439153439154e-06, "loss": 42.5137, "step": 3687 }, { "epoch": 87.81194029850747, "grad_norm": 19.036230087280273, "learning_rate": 9.150793650793651e-06, "loss": 42.8328, "step": 3688 }, { "epoch": 87.83582089552239, "grad_norm": 23.97933006286621, "learning_rate": 9.148148148148149e-06, "loss": 42.9784, "step": 3689 }, { "epoch": 87.85970149253731, "grad_norm": 18.06254768371582, "learning_rate": 9.145502645502646e-06, "loss": 41.7068, "step": 3690 }, { "epoch": 87.88358208955223, "grad_norm": 19.88326072692871, "learning_rate": 9.142857142857144e-06, "loss": 43.8, "step": 3691 }, { "epoch": 87.90746268656716, "grad_norm": 20.145050048828125, "learning_rate": 9.140211640211641e-06, "loss": 43.2459, "step": 3692 }, { "epoch": 87.9313432835821, "grad_norm": 16.824399948120117, "learning_rate": 9.137566137566139e-06, "loss": 42.4406, "step": 3693 }, { "epoch": 87.95522388059702, "grad_norm": 20.99275779724121, "learning_rate": 9.134920634920635e-06, "loss": 42.2506, "step": 3694 }, { "epoch": 87.97910447761194, "grad_norm": 23.64455223083496, "learning_rate": 9.132275132275134e-06, "loss": 43.1451, "step": 3695 }, { "epoch": 88.0, "grad_norm": 17.736629486083984, "learning_rate": 9.12962962962963e-06, "loss": 36.9082, "step": 3696 }, { "epoch": 88.02388059701492, "grad_norm": 20.58110809326172, "learning_rate": 9.126984126984127e-06, "loss": 41.6838, "step": 3697 }, { "epoch": 88.04776119402985, "grad_norm": 21.2742977142334, "learning_rate": 9.124338624338626e-06, "loss": 43.8259, "step": 3698 }, { "epoch": 88.07164179104478, "grad_norm": 18.40839958190918, "learning_rate": 9.121693121693122e-06, "loss": 41.6561, "step": 3699 }, { "epoch": 88.0955223880597, "grad_norm": 25.24982261657715, "learning_rate": 9.11904761904762e-06, "loss": 43.4407, "step": 3700 }, { "epoch": 88.11940298507463, "grad_norm": 16.522397994995117, "learning_rate": 9.116402116402117e-06, "loss": 42.3175, "step": 3701 }, { "epoch": 88.14328358208955, "grad_norm": 23.80354881286621, "learning_rate": 9.113756613756615e-06, "loss": 41.8656, "step": 3702 }, { "epoch": 88.16716417910447, "grad_norm": 17.915058135986328, "learning_rate": 9.111111111111112e-06, "loss": 43.4793, "step": 3703 }, { "epoch": 88.1910447761194, "grad_norm": 24.271337509155273, "learning_rate": 9.108465608465608e-06, "loss": 42.3917, "step": 3704 }, { "epoch": 88.21492537313434, "grad_norm": 21.696147918701172, "learning_rate": 9.105820105820107e-06, "loss": 42.3141, "step": 3705 }, { "epoch": 88.23880597014926, "grad_norm": 23.576507568359375, "learning_rate": 9.103174603174603e-06, "loss": 42.9454, "step": 3706 }, { "epoch": 88.26268656716418, "grad_norm": 25.030128479003906, "learning_rate": 9.1005291005291e-06, "loss": 42.8441, "step": 3707 }, { "epoch": 88.2865671641791, "grad_norm": 21.148405075073242, "learning_rate": 9.0978835978836e-06, "loss": 43.0314, "step": 3708 }, { "epoch": 88.31044776119403, "grad_norm": 25.8000431060791, "learning_rate": 9.095238095238095e-06, "loss": 42.5864, "step": 3709 }, { "epoch": 88.33432835820895, "grad_norm": 15.713743209838867, "learning_rate": 9.092592592592593e-06, "loss": 42.8121, "step": 3710 }, { "epoch": 88.35820895522389, "grad_norm": 23.208627700805664, "learning_rate": 9.08994708994709e-06, "loss": 42.9846, "step": 3711 }, { "epoch": 88.38208955223881, "grad_norm": 17.478639602661133, "learning_rate": 9.087301587301588e-06, "loss": 42.2004, "step": 3712 }, { "epoch": 88.40597014925373, "grad_norm": 21.487903594970703, "learning_rate": 9.084656084656085e-06, "loss": 41.7275, "step": 3713 }, { "epoch": 88.42985074626866, "grad_norm": 27.780941009521484, "learning_rate": 9.082010582010583e-06, "loss": 42.1269, "step": 3714 }, { "epoch": 88.45373134328358, "grad_norm": 14.19015884399414, "learning_rate": 9.07936507936508e-06, "loss": 43.512, "step": 3715 }, { "epoch": 88.4776119402985, "grad_norm": 27.63198471069336, "learning_rate": 9.076719576719576e-06, "loss": 42.4196, "step": 3716 }, { "epoch": 88.50149253731344, "grad_norm": 21.5277099609375, "learning_rate": 9.074074074074075e-06, "loss": 41.8393, "step": 3717 }, { "epoch": 88.52537313432836, "grad_norm": 20.19924545288086, "learning_rate": 9.071428571428573e-06, "loss": 41.6486, "step": 3718 }, { "epoch": 88.54925373134328, "grad_norm": 22.75286865234375, "learning_rate": 9.068783068783069e-06, "loss": 43.3116, "step": 3719 }, { "epoch": 88.57313432835821, "grad_norm": 16.763381958007812, "learning_rate": 9.066137566137568e-06, "loss": 43.0704, "step": 3720 }, { "epoch": 88.59701492537313, "grad_norm": 23.842023849487305, "learning_rate": 9.063492063492064e-06, "loss": 43.7468, "step": 3721 }, { "epoch": 88.62089552238805, "grad_norm": 20.88597297668457, "learning_rate": 9.060846560846561e-06, "loss": 42.2398, "step": 3722 }, { "epoch": 88.64477611940299, "grad_norm": 19.333271026611328, "learning_rate": 9.058201058201059e-06, "loss": 41.7667, "step": 3723 }, { "epoch": 88.66865671641791, "grad_norm": 22.313888549804688, "learning_rate": 9.055555555555556e-06, "loss": 42.3198, "step": 3724 }, { "epoch": 88.69253731343284, "grad_norm": 20.26089096069336, "learning_rate": 9.052910052910054e-06, "loss": 42.9191, "step": 3725 }, { "epoch": 88.71641791044776, "grad_norm": 17.900373458862305, "learning_rate": 9.050264550264551e-06, "loss": 41.7498, "step": 3726 }, { "epoch": 88.74029850746268, "grad_norm": 22.735700607299805, "learning_rate": 9.047619047619049e-06, "loss": 41.4744, "step": 3727 }, { "epoch": 88.7641791044776, "grad_norm": 22.933048248291016, "learning_rate": 9.044973544973546e-06, "loss": 43.4595, "step": 3728 }, { "epoch": 88.78805970149254, "grad_norm": 15.648778915405273, "learning_rate": 9.042328042328044e-06, "loss": 43.4811, "step": 3729 }, { "epoch": 88.81194029850747, "grad_norm": 35.44391632080078, "learning_rate": 9.039682539682541e-06, "loss": 42.0879, "step": 3730 }, { "epoch": 88.83582089552239, "grad_norm": 26.575231552124023, "learning_rate": 9.037037037037037e-06, "loss": 41.6883, "step": 3731 }, { "epoch": 88.85970149253731, "grad_norm": 33.38102340698242, "learning_rate": 9.034391534391536e-06, "loss": 43.2903, "step": 3732 }, { "epoch": 88.88358208955223, "grad_norm": 26.297910690307617, "learning_rate": 9.031746031746032e-06, "loss": 42.744, "step": 3733 }, { "epoch": 88.90746268656716, "grad_norm": 25.057889938354492, "learning_rate": 9.02910052910053e-06, "loss": 42.076, "step": 3734 }, { "epoch": 88.9313432835821, "grad_norm": 21.162078857421875, "learning_rate": 9.026455026455027e-06, "loss": 42.397, "step": 3735 }, { "epoch": 88.95522388059702, "grad_norm": 21.846647262573242, "learning_rate": 9.023809523809524e-06, "loss": 42.7379, "step": 3736 }, { "epoch": 88.97910447761194, "grad_norm": 19.74768829345703, "learning_rate": 9.021164021164022e-06, "loss": 42.0906, "step": 3737 }, { "epoch": 89.0, "grad_norm": 18.839765548706055, "learning_rate": 9.01851851851852e-06, "loss": 37.881, "step": 3738 }, { "epoch": 89.02388059701492, "grad_norm": 22.15633201599121, "learning_rate": 9.015873015873017e-06, "loss": 42.5544, "step": 3739 }, { "epoch": 89.04776119402985, "grad_norm": 18.709840774536133, "learning_rate": 9.013227513227514e-06, "loss": 43.2949, "step": 3740 }, { "epoch": 89.07164179104478, "grad_norm": 22.922399520874023, "learning_rate": 9.010582010582012e-06, "loss": 41.9215, "step": 3741 }, { "epoch": 89.0955223880597, "grad_norm": 18.445695877075195, "learning_rate": 9.00793650793651e-06, "loss": 42.994, "step": 3742 }, { "epoch": 89.11940298507463, "grad_norm": 22.694503784179688, "learning_rate": 9.005291005291005e-06, "loss": 42.4024, "step": 3743 }, { "epoch": 89.14328358208955, "grad_norm": 23.259532928466797, "learning_rate": 9.002645502645503e-06, "loss": 41.9366, "step": 3744 }, { "epoch": 89.16716417910447, "grad_norm": 24.131465911865234, "learning_rate": 9e-06, "loss": 42.9172, "step": 3745 }, { "epoch": 89.1910447761194, "grad_norm": 21.01772117614746, "learning_rate": 8.997354497354498e-06, "loss": 42.0505, "step": 3746 }, { "epoch": 89.21492537313434, "grad_norm": 20.675086975097656, "learning_rate": 8.994708994708995e-06, "loss": 42.7076, "step": 3747 }, { "epoch": 89.23880597014926, "grad_norm": 22.289649963378906, "learning_rate": 8.992063492063493e-06, "loss": 42.4533, "step": 3748 }, { "epoch": 89.26268656716418, "grad_norm": 22.76655387878418, "learning_rate": 8.98941798941799e-06, "loss": 42.0269, "step": 3749 }, { "epoch": 89.2865671641791, "grad_norm": 19.732887268066406, "learning_rate": 8.986772486772488e-06, "loss": 44.2783, "step": 3750 }, { "epoch": 89.31044776119403, "grad_norm": 22.45815658569336, "learning_rate": 8.984126984126985e-06, "loss": 40.1901, "step": 3751 }, { "epoch": 89.33432835820895, "grad_norm": 24.511625289916992, "learning_rate": 8.981481481481483e-06, "loss": 43.0842, "step": 3752 }, { "epoch": 89.35820895522389, "grad_norm": 19.739845275878906, "learning_rate": 8.978835978835979e-06, "loss": 43.7219, "step": 3753 }, { "epoch": 89.38208955223881, "grad_norm": 26.18813133239746, "learning_rate": 8.976190476190478e-06, "loss": 43.5427, "step": 3754 }, { "epoch": 89.40597014925373, "grad_norm": 21.95644760131836, "learning_rate": 8.973544973544973e-06, "loss": 42.9161, "step": 3755 }, { "epoch": 89.42985074626866, "grad_norm": 22.270849227905273, "learning_rate": 8.970899470899471e-06, "loss": 42.6121, "step": 3756 }, { "epoch": 89.45373134328358, "grad_norm": 18.48128318786621, "learning_rate": 8.968253968253968e-06, "loss": 42.044, "step": 3757 }, { "epoch": 89.4776119402985, "grad_norm": 22.865985870361328, "learning_rate": 8.965608465608466e-06, "loss": 42.1096, "step": 3758 }, { "epoch": 89.50149253731344, "grad_norm": 19.26102066040039, "learning_rate": 8.962962962962963e-06, "loss": 42.5147, "step": 3759 }, { "epoch": 89.52537313432836, "grad_norm": 27.352407455444336, "learning_rate": 8.960317460317461e-06, "loss": 41.7614, "step": 3760 }, { "epoch": 89.54925373134328, "grad_norm": 21.059770584106445, "learning_rate": 8.957671957671958e-06, "loss": 41.5053, "step": 3761 }, { "epoch": 89.57313432835821, "grad_norm": 23.909198760986328, "learning_rate": 8.955026455026456e-06, "loss": 43.4126, "step": 3762 }, { "epoch": 89.59701492537313, "grad_norm": 28.529970169067383, "learning_rate": 8.952380952380953e-06, "loss": 43.489, "step": 3763 }, { "epoch": 89.62089552238805, "grad_norm": 22.008472442626953, "learning_rate": 8.949735449735451e-06, "loss": 42.5781, "step": 3764 }, { "epoch": 89.64477611940299, "grad_norm": NaN, "learning_rate": 8.947089947089947e-06, "loss": 37.0211, "step": 3765 }, { "epoch": 89.66865671641791, "grad_norm": 29.881391525268555, "learning_rate": 8.947089947089947e-06, "loss": 42.6102, "step": 3766 }, { "epoch": 89.69253731343284, "grad_norm": 24.919992446899414, "learning_rate": 8.944444444444446e-06, "loss": 42.7878, "step": 3767 }, { "epoch": 89.71641791044776, "grad_norm": 29.473249435424805, "learning_rate": 8.941798941798942e-06, "loss": 41.9105, "step": 3768 }, { "epoch": 89.74029850746268, "grad_norm": 20.71428871154785, "learning_rate": 8.93915343915344e-06, "loss": 42.0715, "step": 3769 }, { "epoch": 89.7641791044776, "grad_norm": 29.31629180908203, "learning_rate": 8.936507936507938e-06, "loss": 41.3888, "step": 3770 }, { "epoch": 89.78805970149254, "grad_norm": 22.29326057434082, "learning_rate": 8.933862433862434e-06, "loss": 43.0029, "step": 3771 }, { "epoch": 89.81194029850747, "grad_norm": NaN, "learning_rate": 8.931216931216932e-06, "loss": 49.4483, "step": 3772 }, { "epoch": 89.83582089552239, "grad_norm": 23.31702423095703, "learning_rate": 8.931216931216932e-06, "loss": 42.8926, "step": 3773 }, { "epoch": 89.85970149253731, "grad_norm": 26.894012451171875, "learning_rate": 8.92857142857143e-06, "loss": 41.3476, "step": 3774 }, { "epoch": 89.88358208955223, "grad_norm": 19.226701736450195, "learning_rate": 8.925925925925927e-06, "loss": 42.9396, "step": 3775 }, { "epoch": 89.90746268656716, "grad_norm": 26.918243408203125, "learning_rate": 8.923280423280424e-06, "loss": 41.7109, "step": 3776 }, { "epoch": 89.9313432835821, "grad_norm": 22.435697555541992, "learning_rate": 8.920634920634922e-06, "loss": 42.5026, "step": 3777 }, { "epoch": 89.95522388059702, "grad_norm": 19.455547332763672, "learning_rate": 8.91798941798942e-06, "loss": 42.4964, "step": 3778 }, { "epoch": 89.97910447761194, "grad_norm": 24.792171478271484, "learning_rate": 8.915343915343915e-06, "loss": 41.6366, "step": 3779 }, { "epoch": 90.0, "grad_norm": 14.4516019821167, "learning_rate": 8.912698412698414e-06, "loss": 36.7873, "step": 3780 }, { "epoch": 90.0, "step": 3780, "total_flos": 1.857999472723437e+17, "train_loss": 4.747417533713043, "train_runtime": 12850.2933, "train_samples_per_second": 37.484, "train_steps_per_second": 0.294 }, { "epoch": 90.02388059701492, "grad_norm": 26.03937339782715, "learning_rate": 1e-05, "loss": 42.4337, "step": 3781 }, { "epoch": 90.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 51.1491, "step": 3782 }, { "epoch": 90.07164179104478, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 53.2051, "step": 3783 }, { "epoch": 90.0955223880597, "grad_norm": 446.1357421875, "learning_rate": 9.997835497835499e-06, "loss": 51.5745, "step": 3784 }, { "epoch": 90.11940298507463, "grad_norm": 229.35903930664062, "learning_rate": 9.995670995670996e-06, "loss": 49.5899, "step": 3785 }, { "epoch": 90.14328358208955, "grad_norm": 109.18777465820312, "learning_rate": 9.993506493506494e-06, "loss": 45.889, "step": 3786 }, { "epoch": 90.16716417910447, "grad_norm": 79.82958221435547, "learning_rate": 9.991341991341992e-06, "loss": 44.3638, "step": 3787 }, { "epoch": 90.1910447761194, "grad_norm": 69.46668243408203, "learning_rate": 9.98917748917749e-06, "loss": 43.6641, "step": 3788 }, { "epoch": 90.21492537313434, "grad_norm": 56.4055290222168, "learning_rate": 9.987012987012988e-06, "loss": 45.0336, "step": 3789 }, { "epoch": 90.23880597014926, "grad_norm": 53.48906326293945, "learning_rate": 9.984848484848485e-06, "loss": 42.9807, "step": 3790 }, { "epoch": 90.26268656716418, "grad_norm": 38.25556564331055, "learning_rate": 9.982683982683983e-06, "loss": 44.1306, "step": 3791 }, { "epoch": 90.2865671641791, "grad_norm": 41.42750549316406, "learning_rate": 9.980519480519481e-06, "loss": 42.1205, "step": 3792 }, { "epoch": 90.31044776119403, "grad_norm": 34.52850341796875, "learning_rate": 9.978354978354979e-06, "loss": 43.3744, "step": 3793 }, { "epoch": 90.33432835820895, "grad_norm": 28.61484146118164, "learning_rate": 9.976190476190477e-06, "loss": 43.487, "step": 3794 }, { "epoch": 90.35820895522389, "grad_norm": 27.961273193359375, "learning_rate": 9.974025974025974e-06, "loss": 43.9663, "step": 3795 }, { "epoch": 90.38208955223881, "grad_norm": 27.92458152770996, "learning_rate": 9.971861471861472e-06, "loss": 43.2716, "step": 3796 }, { "epoch": 90.40597014925373, "grad_norm": 21.93165397644043, "learning_rate": 9.96969696969697e-06, "loss": 43.3704, "step": 3797 }, { "epoch": 90.42985074626866, "grad_norm": 27.053754806518555, "learning_rate": 9.967532467532468e-06, "loss": 42.7038, "step": 3798 }, { "epoch": 90.45373134328358, "grad_norm": 31.030607223510742, "learning_rate": 9.965367965367966e-06, "loss": 43.1343, "step": 3799 }, { "epoch": 90.4776119402985, "grad_norm": 24.048316955566406, "learning_rate": 9.963203463203463e-06, "loss": 42.1113, "step": 3800 }, { "epoch": 90.50149253731344, "grad_norm": 17.98249053955078, "learning_rate": 9.961038961038963e-06, "loss": 42.6117, "step": 3801 }, { "epoch": 90.52537313432836, "grad_norm": 20.080669403076172, "learning_rate": 9.95887445887446e-06, "loss": 42.4281, "step": 3802 }, { "epoch": 90.54925373134328, "grad_norm": 19.842525482177734, "learning_rate": 9.956709956709958e-06, "loss": 40.8022, "step": 3803 }, { "epoch": 90.57313432835821, "grad_norm": 20.453306198120117, "learning_rate": 9.954545454545456e-06, "loss": 42.8288, "step": 3804 }, { "epoch": 90.59701492537313, "grad_norm": 19.955123901367188, "learning_rate": 9.952380952380954e-06, "loss": 40.2546, "step": 3805 }, { "epoch": 90.62089552238805, "grad_norm": 17.246713638305664, "learning_rate": 9.950216450216452e-06, "loss": 42.0433, "step": 3806 }, { "epoch": 90.64477611940299, "grad_norm": 20.76253890991211, "learning_rate": 9.94805194805195e-06, "loss": 42.7741, "step": 3807 }, { "epoch": 90.66865671641791, "grad_norm": 21.001201629638672, "learning_rate": 9.945887445887446e-06, "loss": 43.6741, "step": 3808 }, { "epoch": 90.69253731343284, "grad_norm": 20.765684127807617, "learning_rate": 9.943722943722944e-06, "loss": 41.8182, "step": 3809 }, { "epoch": 90.71641791044776, "grad_norm": 16.794981002807617, "learning_rate": 9.941558441558441e-06, "loss": 42.6478, "step": 3810 }, { "epoch": 90.74029850746268, "grad_norm": 23.377695083618164, "learning_rate": 9.939393939393939e-06, "loss": 42.0878, "step": 3811 }, { "epoch": 90.7641791044776, "grad_norm": 23.543071746826172, "learning_rate": 9.937229437229437e-06, "loss": 42.4977, "step": 3812 }, { "epoch": 90.78805970149254, "grad_norm": 18.546525955200195, "learning_rate": 9.935064935064936e-06, "loss": 42.4457, "step": 3813 }, { "epoch": 90.81194029850747, "grad_norm": 25.244186401367188, "learning_rate": 9.932900432900434e-06, "loss": 42.4906, "step": 3814 }, { "epoch": 90.83582089552239, "grad_norm": 21.267963409423828, "learning_rate": 9.930735930735932e-06, "loss": 41.7433, "step": 3815 }, { "epoch": 90.85970149253731, "grad_norm": 19.291160583496094, "learning_rate": 9.92857142857143e-06, "loss": 41.7054, "step": 3816 }, { "epoch": 90.88358208955223, "grad_norm": 21.301227569580078, "learning_rate": 9.926406926406928e-06, "loss": 42.5566, "step": 3817 }, { "epoch": 90.90746268656716, "grad_norm": 19.511821746826172, "learning_rate": 9.924242424242425e-06, "loss": 41.5064, "step": 3818 }, { "epoch": 90.9313432835821, "grad_norm": 18.419504165649414, "learning_rate": 9.922077922077923e-06, "loss": 41.4675, "step": 3819 }, { "epoch": 90.95522388059702, "grad_norm": 19.577409744262695, "learning_rate": 9.919913419913421e-06, "loss": 43.4705, "step": 3820 }, { "epoch": 90.97910447761194, "grad_norm": 23.015262603759766, "learning_rate": 9.917748917748919e-06, "loss": 42.0356, "step": 3821 }, { "epoch": 91.0, "grad_norm": 17.785385131835938, "learning_rate": 9.915584415584417e-06, "loss": 37.6509, "step": 3822 }, { "epoch": 91.02388059701492, "grad_norm": 16.111051559448242, "learning_rate": 9.913419913419914e-06, "loss": 41.7977, "step": 3823 }, { "epoch": 91.04776119402985, "grad_norm": 22.09601593017578, "learning_rate": 9.911255411255412e-06, "loss": 42.5569, "step": 3824 }, { "epoch": 91.07164179104478, "grad_norm": 18.80573081970215, "learning_rate": 9.90909090909091e-06, "loss": 41.773, "step": 3825 }, { "epoch": 91.0955223880597, "grad_norm": 14.442939758300781, "learning_rate": 9.906926406926408e-06, "loss": 42.0426, "step": 3826 }, { "epoch": 91.11940298507463, "grad_norm": 21.839468002319336, "learning_rate": 9.904761904761906e-06, "loss": 41.9993, "step": 3827 }, { "epoch": 91.14328358208955, "grad_norm": 17.792217254638672, "learning_rate": 9.902597402597403e-06, "loss": 42.1515, "step": 3828 }, { "epoch": 91.16716417910447, "grad_norm": 15.722336769104004, "learning_rate": 9.900432900432901e-06, "loss": 42.2694, "step": 3829 }, { "epoch": 91.1910447761194, "grad_norm": 20.94297218322754, "learning_rate": 9.898268398268399e-06, "loss": 42.7043, "step": 3830 }, { "epoch": 91.21492537313434, "grad_norm": 16.2196044921875, "learning_rate": 9.896103896103897e-06, "loss": 42.4405, "step": 3831 }, { "epoch": 91.23880597014926, "grad_norm": 20.381193161010742, "learning_rate": 9.893939393939395e-06, "loss": 43.424, "step": 3832 }, { "epoch": 91.26268656716418, "grad_norm": 14.948447227478027, "learning_rate": 9.891774891774892e-06, "loss": 42.7289, "step": 3833 }, { "epoch": 91.2865671641791, "grad_norm": 17.548126220703125, "learning_rate": 9.88961038961039e-06, "loss": 41.9656, "step": 3834 }, { "epoch": 91.31044776119403, "grad_norm": 20.301937103271484, "learning_rate": 9.887445887445888e-06, "loss": 42.9516, "step": 3835 }, { "epoch": 91.33432835820895, "grad_norm": 18.3472900390625, "learning_rate": 9.885281385281386e-06, "loss": 42.281, "step": 3836 }, { "epoch": 91.35820895522389, "grad_norm": 15.503434181213379, "learning_rate": 9.883116883116885e-06, "loss": 42.502, "step": 3837 }, { "epoch": 91.38208955223881, "grad_norm": 21.448226928710938, "learning_rate": 9.880952380952381e-06, "loss": 43.0384, "step": 3838 }, { "epoch": 91.40597014925373, "grad_norm": 16.685815811157227, "learning_rate": 9.87878787878788e-06, "loss": 41.798, "step": 3839 }, { "epoch": 91.42985074626866, "grad_norm": 18.722484588623047, "learning_rate": 9.876623376623377e-06, "loss": 43.4082, "step": 3840 }, { "epoch": 91.45373134328358, "grad_norm": 19.54647445678711, "learning_rate": 9.874458874458875e-06, "loss": 42.2679, "step": 3841 }, { "epoch": 91.4776119402985, "grad_norm": 18.793495178222656, "learning_rate": 9.872294372294373e-06, "loss": 42.2962, "step": 3842 }, { "epoch": 91.50149253731344, "grad_norm": 16.687400817871094, "learning_rate": 9.87012987012987e-06, "loss": 44.2949, "step": 3843 }, { "epoch": 91.52537313432836, "grad_norm": 16.13211441040039, "learning_rate": 9.867965367965368e-06, "loss": 42.602, "step": 3844 }, { "epoch": 91.54925373134328, "grad_norm": 16.72748565673828, "learning_rate": 9.865800865800866e-06, "loss": 42.3636, "step": 3845 }, { "epoch": 91.57313432835821, "grad_norm": 22.206905364990234, "learning_rate": 9.863636363636364e-06, "loss": 43.1925, "step": 3846 }, { "epoch": 91.59701492537313, "grad_norm": 19.21588134765625, "learning_rate": 9.861471861471862e-06, "loss": 43.1342, "step": 3847 }, { "epoch": 91.62089552238805, "grad_norm": 19.708059310913086, "learning_rate": 9.85930735930736e-06, "loss": 42.7964, "step": 3848 }, { "epoch": 91.64477611940299, "grad_norm": 22.789594650268555, "learning_rate": 9.857142857142859e-06, "loss": 42.7767, "step": 3849 }, { "epoch": 91.66865671641791, "grad_norm": 17.048229217529297, "learning_rate": 9.854978354978357e-06, "loss": 42.6642, "step": 3850 }, { "epoch": 91.69253731343284, "grad_norm": 21.39427375793457, "learning_rate": 9.852813852813854e-06, "loss": 42.8962, "step": 3851 }, { "epoch": 91.71641791044776, "grad_norm": 25.67850112915039, "learning_rate": 9.850649350649352e-06, "loss": 42.4072, "step": 3852 }, { "epoch": 91.74029850746268, "grad_norm": 20.17367935180664, "learning_rate": 9.84848484848485e-06, "loss": 42.3302, "step": 3853 }, { "epoch": 91.7641791044776, "grad_norm": 16.018030166625977, "learning_rate": 9.846320346320348e-06, "loss": 42.6877, "step": 3854 }, { "epoch": 91.78805970149254, "grad_norm": 18.5965576171875, "learning_rate": 9.844155844155846e-06, "loss": 41.4104, "step": 3855 }, { "epoch": 91.81194029850747, "grad_norm": 17.651378631591797, "learning_rate": 9.841991341991343e-06, "loss": 42.1591, "step": 3856 }, { "epoch": 91.83582089552239, "grad_norm": 15.912792205810547, "learning_rate": 9.839826839826841e-06, "loss": 41.0675, "step": 3857 }, { "epoch": 91.85970149253731, "grad_norm": 20.338071823120117, "learning_rate": 9.837662337662337e-06, "loss": 43.0971, "step": 3858 }, { "epoch": 91.88358208955223, "grad_norm": 19.422807693481445, "learning_rate": 9.835497835497835e-06, "loss": 41.022, "step": 3859 }, { "epoch": 91.90746268656716, "grad_norm": 18.216012954711914, "learning_rate": 9.833333333333333e-06, "loss": 42.0068, "step": 3860 }, { "epoch": 91.9313432835821, "grad_norm": 17.68181610107422, "learning_rate": 9.831168831168832e-06, "loss": 42.778, "step": 3861 }, { "epoch": 91.95522388059702, "grad_norm": 20.660480499267578, "learning_rate": 9.82900432900433e-06, "loss": 42.8923, "step": 3862 }, { "epoch": 91.97910447761194, "grad_norm": 22.78632926940918, "learning_rate": 9.826839826839828e-06, "loss": 41.5412, "step": 3863 }, { "epoch": 92.0, "grad_norm": 17.660106658935547, "learning_rate": 9.824675324675326e-06, "loss": 36.8816, "step": 3864 }, { "epoch": 92.02388059701492, "grad_norm": 19.257198333740234, "learning_rate": 9.822510822510824e-06, "loss": 41.3789, "step": 3865 }, { "epoch": 92.04776119402985, "grad_norm": 17.690038681030273, "learning_rate": 9.820346320346321e-06, "loss": 41.8596, "step": 3866 }, { "epoch": 92.07164179104478, "grad_norm": 25.88194465637207, "learning_rate": 9.81818181818182e-06, "loss": 42.1967, "step": 3867 }, { "epoch": 92.0955223880597, "grad_norm": 18.971637725830078, "learning_rate": 9.816017316017317e-06, "loss": 41.5025, "step": 3868 }, { "epoch": 92.11940298507463, "grad_norm": 18.14025115966797, "learning_rate": 9.813852813852815e-06, "loss": 42.7121, "step": 3869 }, { "epoch": 92.14328358208955, "grad_norm": 24.20391845703125, "learning_rate": 9.811688311688313e-06, "loss": 42.9952, "step": 3870 }, { "epoch": 92.16716417910447, "grad_norm": 18.484018325805664, "learning_rate": 9.80952380952381e-06, "loss": 44.174, "step": 3871 }, { "epoch": 92.1910447761194, "grad_norm": 24.238615036010742, "learning_rate": 9.807359307359308e-06, "loss": 42.933, "step": 3872 }, { "epoch": 92.21492537313434, "grad_norm": 21.95537757873535, "learning_rate": 9.805194805194806e-06, "loss": 42.5797, "step": 3873 }, { "epoch": 92.23880597014926, "grad_norm": 16.300167083740234, "learning_rate": 9.803030303030304e-06, "loss": 41.8871, "step": 3874 }, { "epoch": 92.26268656716418, "grad_norm": 31.398351669311523, "learning_rate": 9.800865800865802e-06, "loss": 42.8308, "step": 3875 }, { "epoch": 92.2865671641791, "grad_norm": 21.76424789428711, "learning_rate": 9.7987012987013e-06, "loss": 42.1119, "step": 3876 }, { "epoch": 92.31044776119403, "grad_norm": 26.037975311279297, "learning_rate": 9.796536796536797e-06, "loss": 42.0092, "step": 3877 }, { "epoch": 92.33432835820895, "grad_norm": 26.393800735473633, "learning_rate": 9.794372294372295e-06, "loss": 43.9124, "step": 3878 }, { "epoch": 92.35820895522389, "grad_norm": 21.763713836669922, "learning_rate": 9.792207792207793e-06, "loss": 42.6169, "step": 3879 }, { "epoch": 92.38208955223881, "grad_norm": 28.867443084716797, "learning_rate": 9.79004329004329e-06, "loss": 43.093, "step": 3880 }, { "epoch": 92.40597014925373, "grad_norm": 20.59787940979004, "learning_rate": 9.787878787878788e-06, "loss": 43.4976, "step": 3881 }, { "epoch": 92.42985074626866, "grad_norm": 32.58126449584961, "learning_rate": 9.785714285714286e-06, "loss": 42.2799, "step": 3882 }, { "epoch": 92.45373134328358, "grad_norm": 18.00343132019043, "learning_rate": 9.783549783549784e-06, "loss": 42.9497, "step": 3883 }, { "epoch": 92.4776119402985, "grad_norm": 31.740930557250977, "learning_rate": 9.781385281385282e-06, "loss": 42.7341, "step": 3884 }, { "epoch": 92.50149253731344, "grad_norm": 24.078405380249023, "learning_rate": 9.779220779220781e-06, "loss": 43.1077, "step": 3885 }, { "epoch": 92.52537313432836, "grad_norm": 21.194313049316406, "learning_rate": 9.777056277056279e-06, "loss": 41.9059, "step": 3886 }, { "epoch": 92.54925373134328, "grad_norm": 30.298595428466797, "learning_rate": 9.774891774891775e-06, "loss": 41.5753, "step": 3887 }, { "epoch": 92.57313432835821, "grad_norm": 21.55902099609375, "learning_rate": 9.772727272727273e-06, "loss": 41.659, "step": 3888 }, { "epoch": 92.59701492537313, "grad_norm": 27.879924774169922, "learning_rate": 9.77056277056277e-06, "loss": 42.4026, "step": 3889 }, { "epoch": 92.62089552238805, "grad_norm": 20.100893020629883, "learning_rate": 9.768398268398269e-06, "loss": 42.3196, "step": 3890 }, { "epoch": 92.64477611940299, "grad_norm": 24.352115631103516, "learning_rate": 9.766233766233766e-06, "loss": 42.4063, "step": 3891 }, { "epoch": 92.66865671641791, "grad_norm": 24.65276336669922, "learning_rate": 9.764069264069264e-06, "loss": 41.6774, "step": 3892 }, { "epoch": 92.69253731343284, "grad_norm": 18.95211410522461, "learning_rate": 9.761904761904762e-06, "loss": 40.4774, "step": 3893 }, { "epoch": 92.71641791044776, "grad_norm": 37.48885726928711, "learning_rate": 9.75974025974026e-06, "loss": 42.0188, "step": 3894 }, { "epoch": 92.74029850746268, "grad_norm": 27.999391555786133, "learning_rate": 9.757575757575758e-06, "loss": 41.9417, "step": 3895 }, { "epoch": 92.7641791044776, "grad_norm": 41.38749694824219, "learning_rate": 9.755411255411255e-06, "loss": 42.3823, "step": 3896 }, { "epoch": 92.78805970149254, "grad_norm": 30.16627311706543, "learning_rate": 9.753246753246755e-06, "loss": 42.6722, "step": 3897 }, { "epoch": 92.81194029850747, "grad_norm": 42.71925735473633, "learning_rate": 9.751082251082253e-06, "loss": 42.7932, "step": 3898 }, { "epoch": 92.83582089552239, "grad_norm": 42.11480712890625, "learning_rate": 9.74891774891775e-06, "loss": 42.3812, "step": 3899 }, { "epoch": 92.85970149253731, "grad_norm": 23.51568031311035, "learning_rate": 9.746753246753248e-06, "loss": 42.0872, "step": 3900 }, { "epoch": 92.88358208955223, "grad_norm": 29.64082145690918, "learning_rate": 9.744588744588746e-06, "loss": 42.7743, "step": 3901 }, { "epoch": 92.90746268656716, "grad_norm": 24.687829971313477, "learning_rate": 9.742424242424244e-06, "loss": 42.151, "step": 3902 }, { "epoch": 92.9313432835821, "grad_norm": 23.673076629638672, "learning_rate": 9.740259740259742e-06, "loss": 42.949, "step": 3903 }, { "epoch": 92.95522388059702, "grad_norm": 29.738771438598633, "learning_rate": 9.73809523809524e-06, "loss": 41.3754, "step": 3904 }, { "epoch": 92.97910447761194, "grad_norm": 23.26430320739746, "learning_rate": 9.735930735930737e-06, "loss": 42.2649, "step": 3905 }, { "epoch": 93.0, "grad_norm": 33.02578353881836, "learning_rate": 9.733766233766235e-06, "loss": 36.7133, "step": 3906 }, { "epoch": 93.02388059701492, "grad_norm": 29.762083053588867, "learning_rate": 9.731601731601731e-06, "loss": 42.1617, "step": 3907 }, { "epoch": 93.04776119402985, "grad_norm": 42.29904556274414, "learning_rate": 9.729437229437229e-06, "loss": 41.4727, "step": 3908 }, { "epoch": 93.07164179104478, "grad_norm": 35.2297477722168, "learning_rate": 9.727272727272728e-06, "loss": 41.8486, "step": 3909 }, { "epoch": 93.0955223880597, "grad_norm": 31.90110206604004, "learning_rate": 9.725108225108226e-06, "loss": 41.3951, "step": 3910 }, { "epoch": 93.11940298507463, "grad_norm": 33.118011474609375, "learning_rate": 9.722943722943724e-06, "loss": 42.8038, "step": 3911 }, { "epoch": 93.14328358208955, "grad_norm": 28.162616729736328, "learning_rate": 9.720779220779222e-06, "loss": 42.2424, "step": 3912 }, { "epoch": 93.16716417910447, "grad_norm": 26.799827575683594, "learning_rate": 9.71861471861472e-06, "loss": 41.9939, "step": 3913 }, { "epoch": 93.1910447761194, "grad_norm": 36.02149200439453, "learning_rate": 9.716450216450217e-06, "loss": 43.0555, "step": 3914 }, { "epoch": 93.21492537313434, "grad_norm": 30.073331832885742, "learning_rate": 9.714285714285715e-06, "loss": 40.7799, "step": 3915 }, { "epoch": 93.23880597014926, "grad_norm": 32.572547912597656, "learning_rate": 9.712121212121213e-06, "loss": 42.139, "step": 3916 }, { "epoch": 93.26268656716418, "grad_norm": 30.6304988861084, "learning_rate": 9.70995670995671e-06, "loss": 42.702, "step": 3917 }, { "epoch": 93.2865671641791, "grad_norm": 33.230812072753906, "learning_rate": 9.707792207792209e-06, "loss": 42.4281, "step": 3918 }, { "epoch": 93.31044776119403, "grad_norm": 29.524002075195312, "learning_rate": 9.705627705627706e-06, "loss": 42.5262, "step": 3919 }, { "epoch": 93.33432835820895, "grad_norm": 29.51606559753418, "learning_rate": 9.703463203463204e-06, "loss": 41.8173, "step": 3920 }, { "epoch": 93.35820895522389, "grad_norm": 22.32621192932129, "learning_rate": 9.701298701298702e-06, "loss": 43.059, "step": 3921 }, { "epoch": 93.38208955223881, "grad_norm": 36.80875778198242, "learning_rate": 9.6991341991342e-06, "loss": 41.8935, "step": 3922 }, { "epoch": 93.40597014925373, "grad_norm": 30.580604553222656, "learning_rate": 9.696969696969698e-06, "loss": 43.2128, "step": 3923 }, { "epoch": 93.42985074626866, "grad_norm": 29.170934677124023, "learning_rate": 9.694805194805195e-06, "loss": 41.6993, "step": 3924 }, { "epoch": 93.45373134328358, "grad_norm": 28.69053840637207, "learning_rate": 9.692640692640693e-06, "loss": 43.051, "step": 3925 }, { "epoch": 93.4776119402985, "grad_norm": 29.881338119506836, "learning_rate": 9.690476190476191e-06, "loss": 41.1923, "step": 3926 }, { "epoch": 93.50149253731344, "grad_norm": 25.122774124145508, "learning_rate": 9.688311688311689e-06, "loss": 42.4061, "step": 3927 }, { "epoch": 93.52537313432836, "grad_norm": 34.054847717285156, "learning_rate": 9.686147186147187e-06, "loss": 42.4354, "step": 3928 }, { "epoch": 93.54925373134328, "grad_norm": 29.546493530273438, "learning_rate": 9.683982683982684e-06, "loss": 41.9759, "step": 3929 }, { "epoch": 93.57313432835821, "grad_norm": 32.49911880493164, "learning_rate": 9.681818181818182e-06, "loss": 43.3769, "step": 3930 }, { "epoch": 93.59701492537313, "grad_norm": 28.943012237548828, "learning_rate": 9.67965367965368e-06, "loss": 41.6171, "step": 3931 }, { "epoch": 93.62089552238805, "grad_norm": 32.4178466796875, "learning_rate": 9.67748917748918e-06, "loss": 42.6111, "step": 3932 }, { "epoch": 93.64477611940299, "grad_norm": 30.295703887939453, "learning_rate": 9.675324675324677e-06, "loss": 41.6904, "step": 3933 }, { "epoch": 93.66865671641791, "grad_norm": 31.419668197631836, "learning_rate": 9.673160173160175e-06, "loss": 42.3374, "step": 3934 }, { "epoch": 93.69253731343284, "grad_norm": 29.994272232055664, "learning_rate": 9.670995670995673e-06, "loss": 42.1553, "step": 3935 }, { "epoch": 93.71641791044776, "grad_norm": 30.031116485595703, "learning_rate": 9.66883116883117e-06, "loss": 42.1101, "step": 3936 }, { "epoch": 93.74029850746268, "grad_norm": 28.21011734008789, "learning_rate": 9.666666666666667e-06, "loss": 42.0604, "step": 3937 }, { "epoch": 93.7641791044776, "grad_norm": 32.34469985961914, "learning_rate": 9.664502164502165e-06, "loss": 42.4025, "step": 3938 }, { "epoch": 93.78805970149254, "grad_norm": 25.2736759185791, "learning_rate": 9.662337662337662e-06, "loss": 42.7677, "step": 3939 }, { "epoch": 93.81194029850747, "grad_norm": 35.72128677368164, "learning_rate": 9.66017316017316e-06, "loss": 43.4687, "step": 3940 }, { "epoch": 93.83582089552239, "grad_norm": 30.39203453063965, "learning_rate": 9.658008658008658e-06, "loss": 41.7504, "step": 3941 }, { "epoch": 93.85970149253731, "grad_norm": 26.031253814697266, "learning_rate": 9.655844155844156e-06, "loss": 41.6092, "step": 3942 }, { "epoch": 93.88358208955223, "grad_norm": 23.05304718017578, "learning_rate": 9.653679653679654e-06, "loss": 42.4116, "step": 3943 }, { "epoch": 93.90746268656716, "grad_norm": 27.849210739135742, "learning_rate": 9.651515151515153e-06, "loss": 43.2295, "step": 3944 }, { "epoch": 93.9313432835821, "grad_norm": 25.089933395385742, "learning_rate": 9.64935064935065e-06, "loss": 42.6244, "step": 3945 }, { "epoch": 93.95522388059702, "grad_norm": 32.90645217895508, "learning_rate": 9.647186147186149e-06, "loss": 42.7992, "step": 3946 }, { "epoch": 93.97910447761194, "grad_norm": 28.58262825012207, "learning_rate": 9.645021645021646e-06, "loss": 43.0072, "step": 3947 }, { "epoch": 94.0, "grad_norm": 23.826631546020508, "learning_rate": 9.642857142857144e-06, "loss": 37.1225, "step": 3948 }, { "epoch": 94.02388059701492, "grad_norm": 28.149904251098633, "learning_rate": 9.640692640692642e-06, "loss": 42.374, "step": 3949 }, { "epoch": 94.04776119402985, "grad_norm": 28.40786361694336, "learning_rate": 9.63852813852814e-06, "loss": 41.6844, "step": 3950 }, { "epoch": 94.07164179104478, "grad_norm": 25.789466857910156, "learning_rate": 9.636363636363638e-06, "loss": 41.9359, "step": 3951 }, { "epoch": 94.0955223880597, "grad_norm": 31.53352928161621, "learning_rate": 9.634199134199135e-06, "loss": 41.4059, "step": 3952 }, { "epoch": 94.11940298507463, "grad_norm": 25.65757179260254, "learning_rate": 9.632034632034633e-06, "loss": 42.8445, "step": 3953 }, { "epoch": 94.14328358208955, "grad_norm": 35.67771911621094, "learning_rate": 9.629870129870131e-06, "loss": 43.0635, "step": 3954 }, { "epoch": 94.16716417910447, "grad_norm": 31.19240951538086, "learning_rate": 9.627705627705629e-06, "loss": 42.4725, "step": 3955 }, { "epoch": 94.1910447761194, "grad_norm": 31.1099853515625, "learning_rate": 9.625541125541127e-06, "loss": 42.6572, "step": 3956 }, { "epoch": 94.21492537313434, "grad_norm": 28.18238639831543, "learning_rate": 9.623376623376624e-06, "loss": 40.6298, "step": 3957 }, { "epoch": 94.23880597014926, "grad_norm": 25.916431427001953, "learning_rate": 9.621212121212122e-06, "loss": 42.1036, "step": 3958 }, { "epoch": 94.26268656716418, "grad_norm": 25.19932746887207, "learning_rate": 9.61904761904762e-06, "loss": 42.7877, "step": 3959 }, { "epoch": 94.2865671641791, "grad_norm": 31.23909568786621, "learning_rate": 9.616883116883118e-06, "loss": 42.3302, "step": 3960 }, { "epoch": 94.31044776119403, "grad_norm": 27.547996520996094, "learning_rate": 9.614718614718616e-06, "loss": 42.9115, "step": 3961 }, { "epoch": 94.33432835820895, "grad_norm": 33.331939697265625, "learning_rate": 9.612554112554113e-06, "loss": 42.9594, "step": 3962 }, { "epoch": 94.35820895522389, "grad_norm": 26.780292510986328, "learning_rate": 9.610389610389611e-06, "loss": 43.8544, "step": 3963 }, { "epoch": 94.38208955223881, "grad_norm": 25.683496475219727, "learning_rate": 9.608225108225109e-06, "loss": 41.3053, "step": 3964 }, { "epoch": 94.40597014925373, "grad_norm": 22.268705368041992, "learning_rate": 9.606060606060607e-06, "loss": 41.5663, "step": 3965 }, { "epoch": 94.42985074626866, "grad_norm": 26.915376663208008, "learning_rate": 9.603896103896105e-06, "loss": 42.8438, "step": 3966 }, { "epoch": 94.45373134328358, "grad_norm": 18.383493423461914, "learning_rate": 9.601731601731602e-06, "loss": 42.499, "step": 3967 }, { "epoch": 94.4776119402985, "grad_norm": 36.09028244018555, "learning_rate": 9.5995670995671e-06, "loss": 42.8744, "step": 3968 }, { "epoch": 94.50149253731344, "grad_norm": 27.188034057617188, "learning_rate": 9.597402597402598e-06, "loss": 41.8915, "step": 3969 }, { "epoch": 94.52537313432836, "grad_norm": 30.428661346435547, "learning_rate": 9.595238095238096e-06, "loss": 41.9762, "step": 3970 }, { "epoch": 94.54925373134328, "grad_norm": 25.777450561523438, "learning_rate": 9.593073593073594e-06, "loss": 43.0853, "step": 3971 }, { "epoch": 94.57313432835821, "grad_norm": 28.07237434387207, "learning_rate": 9.590909090909091e-06, "loss": 42.7039, "step": 3972 }, { "epoch": 94.59701492537313, "grad_norm": 22.956628799438477, "learning_rate": 9.588744588744589e-06, "loss": 42.8252, "step": 3973 }, { "epoch": 94.62089552238805, "grad_norm": 30.87279510498047, "learning_rate": 9.586580086580087e-06, "loss": 42.8992, "step": 3974 }, { "epoch": 94.64477611940299, "grad_norm": 24.29635238647461, "learning_rate": 9.584415584415585e-06, "loss": 41.1669, "step": 3975 }, { "epoch": 94.66865671641791, "grad_norm": 31.342975616455078, "learning_rate": 9.582251082251083e-06, "loss": 42.9206, "step": 3976 }, { "epoch": 94.69253731343284, "grad_norm": 32.55195999145508, "learning_rate": 9.58008658008658e-06, "loss": 41.6277, "step": 3977 }, { "epoch": 94.71641791044776, "grad_norm": 29.774578094482422, "learning_rate": 9.577922077922078e-06, "loss": 42.2551, "step": 3978 }, { "epoch": 94.74029850746268, "grad_norm": 25.456302642822266, "learning_rate": 9.575757575757576e-06, "loss": 40.1482, "step": 3979 }, { "epoch": 94.7641791044776, "grad_norm": 25.847124099731445, "learning_rate": 9.573593073593075e-06, "loss": 42.2201, "step": 3980 }, { "epoch": 94.78805970149254, "grad_norm": 27.12795066833496, "learning_rate": 9.571428571428573e-06, "loss": 41.8479, "step": 3981 }, { "epoch": 94.81194029850747, "grad_norm": 24.278888702392578, "learning_rate": 9.569264069264071e-06, "loss": 42.6692, "step": 3982 }, { "epoch": 94.83582089552239, "grad_norm": 22.567380905151367, "learning_rate": 9.567099567099569e-06, "loss": 42.3215, "step": 3983 }, { "epoch": 94.85970149253731, "grad_norm": 23.813114166259766, "learning_rate": 9.564935064935067e-06, "loss": 42.6284, "step": 3984 }, { "epoch": 94.88358208955223, "grad_norm": 19.152956008911133, "learning_rate": 9.562770562770564e-06, "loss": 41.7055, "step": 3985 }, { "epoch": 94.90746268656716, "grad_norm": 25.253353118896484, "learning_rate": 9.56060606060606e-06, "loss": 42.5487, "step": 3986 }, { "epoch": 94.9313432835821, "grad_norm": 21.04471206665039, "learning_rate": 9.558441558441558e-06, "loss": 44.019, "step": 3987 }, { "epoch": 94.95522388059702, "grad_norm": NaN, "learning_rate": 9.556277056277056e-06, "loss": 47.5805, "step": 3988 }, { "epoch": 94.97910447761194, "grad_norm": 20.38011932373047, "learning_rate": 9.556277056277056e-06, "loss": 40.8306, "step": 3989 }, { "epoch": 95.0, "grad_norm": 20.988080978393555, "learning_rate": 9.554112554112554e-06, "loss": 35.8475, "step": 3990 }, { "epoch": 95.02388059701492, "grad_norm": 25.182218551635742, "learning_rate": 9.551948051948052e-06, "loss": 42.7702, "step": 3991 }, { "epoch": 95.04776119402985, "grad_norm": 18.022729873657227, "learning_rate": 9.54978354978355e-06, "loss": 41.3642, "step": 3992 }, { "epoch": 95.07164179104478, "grad_norm": 28.234127044677734, "learning_rate": 9.547619047619049e-06, "loss": 41.819, "step": 3993 }, { "epoch": 95.0955223880597, "grad_norm": 22.71247100830078, "learning_rate": 9.545454545454547e-06, "loss": 43.0423, "step": 3994 }, { "epoch": 95.11940298507463, "grad_norm": 26.776891708374023, "learning_rate": 9.543290043290045e-06, "loss": 42.4988, "step": 3995 }, { "epoch": 95.14328358208955, "grad_norm": 21.445236206054688, "learning_rate": 9.541125541125542e-06, "loss": 41.4199, "step": 3996 }, { "epoch": 95.16716417910447, "grad_norm": 23.514680862426758, "learning_rate": 9.53896103896104e-06, "loss": 42.3048, "step": 3997 }, { "epoch": 95.1910447761194, "grad_norm": 19.648818969726562, "learning_rate": 9.536796536796538e-06, "loss": 41.8681, "step": 3998 }, { "epoch": 95.21492537313434, "grad_norm": 21.146074295043945, "learning_rate": 9.534632034632036e-06, "loss": 42.3901, "step": 3999 }, { "epoch": 95.23880597014926, "grad_norm": 17.257108688354492, "learning_rate": 9.532467532467534e-06, "loss": 42.5485, "step": 4000 }, { "epoch": 95.26268656716418, "grad_norm": 20.980907440185547, "learning_rate": 9.530303030303031e-06, "loss": 42.5298, "step": 4001 }, { "epoch": 95.2865671641791, "grad_norm": 22.18124771118164, "learning_rate": 9.52813852813853e-06, "loss": 42.7032, "step": 4002 }, { "epoch": 95.31044776119403, "grad_norm": 20.432281494140625, "learning_rate": 9.525974025974027e-06, "loss": 43.449, "step": 4003 }, { "epoch": 95.33432835820895, "grad_norm": 19.2701473236084, "learning_rate": 9.523809523809525e-06, "loss": 40.555, "step": 4004 }, { "epoch": 95.35820895522389, "grad_norm": 19.681455612182617, "learning_rate": 9.521645021645023e-06, "loss": 41.2141, "step": 4005 }, { "epoch": 95.38208955223881, "grad_norm": 18.39265251159668, "learning_rate": 9.51948051948052e-06, "loss": 42.6937, "step": 4006 }, { "epoch": 95.40597014925373, "grad_norm": 19.818313598632812, "learning_rate": 9.517316017316018e-06, "loss": 43.3448, "step": 4007 }, { "epoch": 95.42985074626866, "grad_norm": 22.540481567382812, "learning_rate": 9.515151515151516e-06, "loss": 42.9516, "step": 4008 }, { "epoch": 95.45373134328358, "grad_norm": 19.422515869140625, "learning_rate": 9.512987012987014e-06, "loss": 42.7121, "step": 4009 }, { "epoch": 95.4776119402985, "grad_norm": 22.789037704467773, "learning_rate": 9.510822510822512e-06, "loss": 42.5243, "step": 4010 }, { "epoch": 95.50149253731344, "grad_norm": 18.70187759399414, "learning_rate": 9.50865800865801e-06, "loss": 40.3263, "step": 4011 }, { "epoch": 95.52537313432836, "grad_norm": 24.231351852416992, "learning_rate": 9.506493506493507e-06, "loss": 42.1699, "step": 4012 }, { "epoch": 95.54925373134328, "grad_norm": 26.356748580932617, "learning_rate": 9.504329004329005e-06, "loss": 42.181, "step": 4013 }, { "epoch": 95.57313432835821, "grad_norm": 18.702556610107422, "learning_rate": 9.502164502164503e-06, "loss": 42.4881, "step": 4014 }, { "epoch": 95.59701492537313, "grad_norm": 27.878799438476562, "learning_rate": 9.5e-06, "loss": 42.2801, "step": 4015 }, { "epoch": 95.62089552238805, "grad_norm": 20.791034698486328, "learning_rate": 9.497835497835498e-06, "loss": 41.909, "step": 4016 }, { "epoch": 95.64477611940299, "grad_norm": 24.874574661254883, "learning_rate": 9.495670995670996e-06, "loss": 42.2108, "step": 4017 }, { "epoch": 95.66865671641791, "grad_norm": 18.562255859375, "learning_rate": 9.493506493506494e-06, "loss": 41.687, "step": 4018 }, { "epoch": 95.69253731343284, "grad_norm": 27.460060119628906, "learning_rate": 9.491341991341992e-06, "loss": 42.3688, "step": 4019 }, { "epoch": 95.71641791044776, "grad_norm": 21.485797882080078, "learning_rate": 9.48917748917749e-06, "loss": 42.6037, "step": 4020 }, { "epoch": 95.74029850746268, "grad_norm": 29.475221633911133, "learning_rate": 9.487012987012987e-06, "loss": 39.9582, "step": 4021 }, { "epoch": 95.7641791044776, "grad_norm": 24.83645248413086, "learning_rate": 9.484848484848485e-06, "loss": 42.7876, "step": 4022 }, { "epoch": 95.78805970149254, "grad_norm": 29.321386337280273, "learning_rate": 9.482683982683983e-06, "loss": 42.1032, "step": 4023 }, { "epoch": 95.81194029850747, "grad_norm": 26.891469955444336, "learning_rate": 9.48051948051948e-06, "loss": 42.557, "step": 4024 }, { "epoch": 95.83582089552239, "grad_norm": 27.05336570739746, "learning_rate": 9.478354978354978e-06, "loss": 42.9743, "step": 4025 }, { "epoch": 95.85970149253731, "grad_norm": 25.014963150024414, "learning_rate": 9.476190476190476e-06, "loss": 43.1592, "step": 4026 }, { "epoch": 95.88358208955223, "grad_norm": 25.66219711303711, "learning_rate": 9.474025974025974e-06, "loss": 41.8458, "step": 4027 }, { "epoch": 95.90746268656716, "grad_norm": 22.460660934448242, "learning_rate": 9.471861471861472e-06, "loss": 42.1439, "step": 4028 }, { "epoch": 95.9313432835821, "grad_norm": 19.01448631286621, "learning_rate": 9.469696969696971e-06, "loss": 42.2933, "step": 4029 }, { "epoch": 95.95522388059702, "grad_norm": 21.85147476196289, "learning_rate": 9.46753246753247e-06, "loss": 42.1108, "step": 4030 }, { "epoch": 95.97910447761194, "grad_norm": 18.99871826171875, "learning_rate": 9.465367965367967e-06, "loss": 42.5071, "step": 4031 }, { "epoch": 96.0, "grad_norm": 16.825069427490234, "learning_rate": 9.463203463203465e-06, "loss": 37.1366, "step": 4032 }, { "epoch": 96.02388059701492, "grad_norm": 19.010360717773438, "learning_rate": 9.461038961038963e-06, "loss": 42.3766, "step": 4033 }, { "epoch": 96.04776119402985, "grad_norm": 22.50554656982422, "learning_rate": 9.45887445887446e-06, "loss": 42.5748, "step": 4034 }, { "epoch": 96.07164179104478, "grad_norm": 16.554548263549805, "learning_rate": 9.456709956709958e-06, "loss": 41.9278, "step": 4035 }, { "epoch": 96.0955223880597, "grad_norm": 23.447858810424805, "learning_rate": 9.454545454545456e-06, "loss": 42.3679, "step": 4036 }, { "epoch": 96.11940298507463, "grad_norm": 23.394611358642578, "learning_rate": 9.452380952380952e-06, "loss": 42.4519, "step": 4037 }, { "epoch": 96.14328358208955, "grad_norm": 17.726774215698242, "learning_rate": 9.45021645021645e-06, "loss": 41.8001, "step": 4038 }, { "epoch": 96.16716417910447, "grad_norm": 19.8607177734375, "learning_rate": 9.448051948051948e-06, "loss": 42.2731, "step": 4039 }, { "epoch": 96.1910447761194, "grad_norm": 24.878158569335938, "learning_rate": 9.445887445887445e-06, "loss": 42.4626, "step": 4040 }, { "epoch": 96.21492537313434, "grad_norm": 18.564037322998047, "learning_rate": 9.443722943722945e-06, "loss": 42.4094, "step": 4041 }, { "epoch": 96.23880597014926, "grad_norm": 29.672882080078125, "learning_rate": 9.441558441558443e-06, "loss": 41.8399, "step": 4042 }, { "epoch": 96.26268656716418, "grad_norm": 21.15955924987793, "learning_rate": 9.43939393939394e-06, "loss": 41.8022, "step": 4043 }, { "epoch": 96.2865671641791, "grad_norm": 19.90737533569336, "learning_rate": 9.437229437229438e-06, "loss": 41.5356, "step": 4044 }, { "epoch": 96.31044776119403, "grad_norm": 27.035198211669922, "learning_rate": 9.435064935064936e-06, "loss": 42.3891, "step": 4045 }, { "epoch": 96.33432835820895, "grad_norm": 19.44938850402832, "learning_rate": 9.432900432900434e-06, "loss": 41.7612, "step": 4046 }, { "epoch": 96.35820895522389, "grad_norm": 32.34653091430664, "learning_rate": 9.430735930735932e-06, "loss": 42.9741, "step": 4047 }, { "epoch": 96.38208955223881, "grad_norm": 23.551259994506836, "learning_rate": 9.42857142857143e-06, "loss": 41.3423, "step": 4048 }, { "epoch": 96.40597014925373, "grad_norm": 36.44496536254883, "learning_rate": 9.426406926406927e-06, "loss": 42.8346, "step": 4049 }, { "epoch": 96.42985074626866, "grad_norm": 28.864904403686523, "learning_rate": 9.424242424242425e-06, "loss": 41.8315, "step": 4050 }, { "epoch": 96.45373134328358, "grad_norm": 35.26904296875, "learning_rate": 9.422077922077923e-06, "loss": 41.5353, "step": 4051 }, { "epoch": 96.4776119402985, "grad_norm": 32.65912628173828, "learning_rate": 9.41991341991342e-06, "loss": 41.5928, "step": 4052 }, { "epoch": 96.50149253731344, "grad_norm": 31.7542667388916, "learning_rate": 9.417748917748919e-06, "loss": 41.4377, "step": 4053 }, { "epoch": 96.52537313432836, "grad_norm": 31.60584259033203, "learning_rate": 9.415584415584416e-06, "loss": 42.9119, "step": 4054 }, { "epoch": 96.54925373134328, "grad_norm": 31.597043991088867, "learning_rate": 9.413419913419914e-06, "loss": 42.2946, "step": 4055 }, { "epoch": 96.57313432835821, "grad_norm": 25.871496200561523, "learning_rate": 9.411255411255412e-06, "loss": 42.3518, "step": 4056 }, { "epoch": 96.59701492537313, "grad_norm": 38.121971130371094, "learning_rate": 9.40909090909091e-06, "loss": 41.3268, "step": 4057 }, { "epoch": 96.62089552238805, "grad_norm": 31.4708309173584, "learning_rate": 9.406926406926408e-06, "loss": 42.001, "step": 4058 }, { "epoch": 96.64477611940299, "grad_norm": 32.240604400634766, "learning_rate": 9.404761904761905e-06, "loss": 43.7004, "step": 4059 }, { "epoch": 96.66865671641791, "grad_norm": 29.972900390625, "learning_rate": 9.402597402597403e-06, "loss": 40.8066, "step": 4060 }, { "epoch": 96.69253731343284, "grad_norm": 28.71061897277832, "learning_rate": 9.400432900432901e-06, "loss": 43.052, "step": 4061 }, { "epoch": 96.71641791044776, "grad_norm": 23.861024856567383, "learning_rate": 9.398268398268399e-06, "loss": 42.5682, "step": 4062 }, { "epoch": 96.74029850746268, "grad_norm": 34.21725845336914, "learning_rate": 9.396103896103896e-06, "loss": 42.418, "step": 4063 }, { "epoch": 96.7641791044776, "grad_norm": 22.93166732788086, "learning_rate": 9.393939393939396e-06, "loss": 42.3199, "step": 4064 }, { "epoch": 96.78805970149254, "grad_norm": 35.91544723510742, "learning_rate": 9.391774891774894e-06, "loss": 40.5579, "step": 4065 }, { "epoch": 96.81194029850747, "grad_norm": 29.065799713134766, "learning_rate": 9.38961038961039e-06, "loss": 40.6409, "step": 4066 }, { "epoch": 96.83582089552239, "grad_norm": 33.4009895324707, "learning_rate": 9.387445887445888e-06, "loss": 42.934, "step": 4067 }, { "epoch": 96.85970149253731, "grad_norm": 32.16798782348633, "learning_rate": 9.385281385281385e-06, "loss": 42.4209, "step": 4068 }, { "epoch": 96.88358208955223, "grad_norm": 27.158573150634766, "learning_rate": 9.383116883116883e-06, "loss": 42.3285, "step": 4069 }, { "epoch": 96.90746268656716, "grad_norm": 28.05286407470703, "learning_rate": 9.380952380952381e-06, "loss": 43.6253, "step": 4070 }, { "epoch": 96.9313432835821, "grad_norm": 31.17296028137207, "learning_rate": 9.378787878787879e-06, "loss": 43.8199, "step": 4071 }, { "epoch": 96.95522388059702, "grad_norm": 25.182817459106445, "learning_rate": 9.376623376623377e-06, "loss": 41.0505, "step": 4072 }, { "epoch": 96.97910447761194, "grad_norm": 35.5045166015625, "learning_rate": 9.374458874458874e-06, "loss": 42.9265, "step": 4073 }, { "epoch": 97.0, "grad_norm": 23.445880889892578, "learning_rate": 9.372294372294372e-06, "loss": 36.5814, "step": 4074 }, { "epoch": 97.02388059701492, "grad_norm": 28.6851806640625, "learning_rate": 9.37012987012987e-06, "loss": 41.6689, "step": 4075 }, { "epoch": 97.04776119402985, "grad_norm": 22.152568817138672, "learning_rate": 9.36796536796537e-06, "loss": 41.6459, "step": 4076 }, { "epoch": 97.07164179104478, "grad_norm": 35.39872360229492, "learning_rate": 9.365800865800867e-06, "loss": 41.9915, "step": 4077 }, { "epoch": 97.0955223880597, "grad_norm": 27.264184951782227, "learning_rate": 9.363636363636365e-06, "loss": 42.6117, "step": 4078 }, { "epoch": 97.11940298507463, "grad_norm": 36.01545715332031, "learning_rate": 9.361471861471863e-06, "loss": 43.7312, "step": 4079 }, { "epoch": 97.14328358208955, "grad_norm": 34.436134338378906, "learning_rate": 9.35930735930736e-06, "loss": 42.597, "step": 4080 }, { "epoch": 97.16716417910447, "grad_norm": 24.796520233154297, "learning_rate": 9.357142857142859e-06, "loss": 42.432, "step": 4081 }, { "epoch": 97.1910447761194, "grad_norm": 26.330299377441406, "learning_rate": 9.354978354978356e-06, "loss": 42.1124, "step": 4082 }, { "epoch": 97.21492537313434, "grad_norm": 27.518465042114258, "learning_rate": 9.352813852813854e-06, "loss": 41.3868, "step": 4083 }, { "epoch": 97.23880597014926, "grad_norm": 25.9599552154541, "learning_rate": 9.350649350649352e-06, "loss": 40.6964, "step": 4084 }, { "epoch": 97.26268656716418, "grad_norm": 33.074974060058594, "learning_rate": 9.34848484848485e-06, "loss": 42.1326, "step": 4085 }, { "epoch": 97.2865671641791, "grad_norm": 29.895139694213867, "learning_rate": 9.346320346320346e-06, "loss": 42.1873, "step": 4086 }, { "epoch": 97.31044776119403, "grad_norm": 32.33000946044922, "learning_rate": 9.344155844155844e-06, "loss": 42.5366, "step": 4087 }, { "epoch": 97.33432835820895, "grad_norm": 28.283353805541992, "learning_rate": 9.341991341991343e-06, "loss": 41.8857, "step": 4088 }, { "epoch": 97.35820895522389, "grad_norm": 27.200963973999023, "learning_rate": 9.339826839826841e-06, "loss": 41.4329, "step": 4089 }, { "epoch": 97.38208955223881, "grad_norm": 27.918405532836914, "learning_rate": 9.337662337662339e-06, "loss": 41.4236, "step": 4090 }, { "epoch": 97.40597014925373, "grad_norm": 24.885950088500977, "learning_rate": 9.335497835497837e-06, "loss": 41.8926, "step": 4091 }, { "epoch": 97.42985074626866, "grad_norm": 24.703994750976562, "learning_rate": 9.333333333333334e-06, "loss": 42.3685, "step": 4092 }, { "epoch": 97.45373134328358, "grad_norm": 32.68978500366211, "learning_rate": 9.331168831168832e-06, "loss": 41.5668, "step": 4093 }, { "epoch": 97.4776119402985, "grad_norm": 27.5683536529541, "learning_rate": 9.32900432900433e-06, "loss": 42.4125, "step": 4094 }, { "epoch": 97.50149253731344, "grad_norm": 30.541976928710938, "learning_rate": 9.326839826839828e-06, "loss": 40.7424, "step": 4095 }, { "epoch": 97.52537313432836, "grad_norm": 28.704875946044922, "learning_rate": 9.324675324675326e-06, "loss": 42.0617, "step": 4096 }, { "epoch": 97.54925373134328, "grad_norm": 29.45570945739746, "learning_rate": 9.322510822510823e-06, "loss": 42.2572, "step": 4097 }, { "epoch": 97.57313432835821, "grad_norm": 29.299041748046875, "learning_rate": 9.320346320346321e-06, "loss": 42.5461, "step": 4098 }, { "epoch": 97.59701492537313, "grad_norm": 28.30889320373535, "learning_rate": 9.318181818181819e-06, "loss": 41.9226, "step": 4099 }, { "epoch": 97.62089552238805, "grad_norm": 23.587907791137695, "learning_rate": 9.316017316017317e-06, "loss": 42.0195, "step": 4100 }, { "epoch": 97.64477611940299, "grad_norm": 31.324934005737305, "learning_rate": 9.313852813852815e-06, "loss": 41.731, "step": 4101 }, { "epoch": 97.66865671641791, "grad_norm": 25.146387100219727, "learning_rate": 9.311688311688312e-06, "loss": 41.8452, "step": 4102 }, { "epoch": 97.69253731343284, "grad_norm": NaN, "learning_rate": 9.30952380952381e-06, "loss": 73.1578, "step": 4103 }, { "epoch": 97.71641791044776, "grad_norm": 33.619197845458984, "learning_rate": 9.30952380952381e-06, "loss": 42.6151, "step": 4104 }, { "epoch": 97.74029850746268, "grad_norm": 30.636676788330078, "learning_rate": 9.307359307359308e-06, "loss": 43.1022, "step": 4105 }, { "epoch": 97.7641791044776, "grad_norm": 30.259347915649414, "learning_rate": 9.305194805194806e-06, "loss": 42.0399, "step": 4106 }, { "epoch": 97.78805970149254, "grad_norm": 28.927536010742188, "learning_rate": 9.303030303030303e-06, "loss": 42.5658, "step": 4107 }, { "epoch": 97.81194029850747, "grad_norm": 27.93010139465332, "learning_rate": 9.300865800865801e-06, "loss": 41.5662, "step": 4108 }, { "epoch": 97.83582089552239, "grad_norm": 25.34616470336914, "learning_rate": 9.298701298701299e-06, "loss": 43.0076, "step": 4109 }, { "epoch": 97.85970149253731, "grad_norm": 28.407508850097656, "learning_rate": 9.296536796536797e-06, "loss": 43.035, "step": 4110 }, { "epoch": 97.88358208955223, "grad_norm": 22.58799934387207, "learning_rate": 9.294372294372295e-06, "loss": 42.5904, "step": 4111 }, { "epoch": 97.90746268656716, "grad_norm": 30.51255989074707, "learning_rate": 9.292207792207792e-06, "loss": 40.6314, "step": 4112 }, { "epoch": 97.9313432835821, "grad_norm": NaN, "learning_rate": 9.290043290043292e-06, "loss": 47.9418, "step": 4113 }, { "epoch": 97.95522388059702, "grad_norm": 24.9912166595459, "learning_rate": 9.290043290043292e-06, "loss": 42.5057, "step": 4114 }, { "epoch": 97.97910447761194, "grad_norm": 29.492568969726562, "learning_rate": 9.28787878787879e-06, "loss": 42.4723, "step": 4115 }, { "epoch": 98.0, "grad_norm": 22.984312057495117, "learning_rate": 9.285714285714288e-06, "loss": 36.1324, "step": 4116 }, { "epoch": 98.02388059701492, "grad_norm": 26.956518173217773, "learning_rate": 9.283549783549785e-06, "loss": 42.6798, "step": 4117 }, { "epoch": 98.04776119402985, "grad_norm": 23.24462890625, "learning_rate": 9.281385281385281e-06, "loss": 42.5043, "step": 4118 }, { "epoch": 98.07164179104478, "grad_norm": 32.33470153808594, "learning_rate": 9.27922077922078e-06, "loss": 42.0607, "step": 4119 }, { "epoch": 98.0955223880597, "grad_norm": 30.606536865234375, "learning_rate": 9.277056277056277e-06, "loss": 42.3543, "step": 4120 }, { "epoch": 98.11940298507463, "grad_norm": 26.795475006103516, "learning_rate": 9.274891774891775e-06, "loss": 41.33, "step": 4121 }, { "epoch": 98.14328358208955, "grad_norm": 23.049283981323242, "learning_rate": 9.272727272727273e-06, "loss": 41.2262, "step": 4122 }, { "epoch": 98.16716417910447, "grad_norm": 30.961490631103516, "learning_rate": 9.27056277056277e-06, "loss": 42.3126, "step": 4123 }, { "epoch": 98.1910447761194, "grad_norm": 25.457870483398438, "learning_rate": 9.268398268398268e-06, "loss": 43.0498, "step": 4124 }, { "epoch": 98.21492537313434, "grad_norm": 28.787675857543945, "learning_rate": 9.266233766233766e-06, "loss": 41.5441, "step": 4125 }, { "epoch": 98.23880597014926, "grad_norm": 23.33895492553711, "learning_rate": 9.264069264069266e-06, "loss": 41.2298, "step": 4126 }, { "epoch": 98.26268656716418, "grad_norm": 28.43191146850586, "learning_rate": 9.261904761904763e-06, "loss": 43.8188, "step": 4127 }, { "epoch": 98.2865671641791, "grad_norm": 22.150148391723633, "learning_rate": 9.259740259740261e-06, "loss": 41.9418, "step": 4128 }, { "epoch": 98.31044776119403, "grad_norm": 32.84375762939453, "learning_rate": 9.257575757575759e-06, "loss": 42.181, "step": 4129 }, { "epoch": 98.33432835820895, "grad_norm": 27.58066177368164, "learning_rate": 9.255411255411257e-06, "loss": 41.9053, "step": 4130 }, { "epoch": 98.35820895522389, "grad_norm": 26.275638580322266, "learning_rate": 9.253246753246755e-06, "loss": 42.643, "step": 4131 }, { "epoch": 98.38208955223881, "grad_norm": 26.407045364379883, "learning_rate": 9.251082251082252e-06, "loss": 41.2759, "step": 4132 }, { "epoch": 98.40597014925373, "grad_norm": 28.262874603271484, "learning_rate": 9.24891774891775e-06, "loss": 41.2746, "step": 4133 }, { "epoch": 98.42985074626866, "grad_norm": 25.495405197143555, "learning_rate": 9.246753246753248e-06, "loss": 41.17, "step": 4134 }, { "epoch": 98.45373134328358, "grad_norm": 30.302942276000977, "learning_rate": 9.244588744588746e-06, "loss": 40.8692, "step": 4135 }, { "epoch": 98.4776119402985, "grad_norm": 26.874711990356445, "learning_rate": 9.242424242424244e-06, "loss": 42.9695, "step": 4136 }, { "epoch": 98.50149253731344, "grad_norm": 27.96731948852539, "learning_rate": 9.240259740259741e-06, "loss": 41.0995, "step": 4137 }, { "epoch": 98.52537313432836, "grad_norm": 26.49541664123535, "learning_rate": 9.238095238095239e-06, "loss": 42.3258, "step": 4138 }, { "epoch": 98.54925373134328, "grad_norm": 24.790346145629883, "learning_rate": 9.235930735930737e-06, "loss": 42.5989, "step": 4139 }, { "epoch": 98.57313432835821, "grad_norm": 22.83180809020996, "learning_rate": 9.233766233766235e-06, "loss": 41.4101, "step": 4140 }, { "epoch": 98.59701492537313, "grad_norm": 27.18695640563965, "learning_rate": 9.231601731601733e-06, "loss": 42.1914, "step": 4141 }, { "epoch": 98.62089552238805, "grad_norm": 23.35308074951172, "learning_rate": 9.22943722943723e-06, "loss": 42.3357, "step": 4142 }, { "epoch": 98.64477611940299, "grad_norm": 32.9411735534668, "learning_rate": 9.227272727272728e-06, "loss": 42.2151, "step": 4143 }, { "epoch": 98.66865671641791, "grad_norm": 28.968116760253906, "learning_rate": 9.225108225108226e-06, "loss": 42.5766, "step": 4144 }, { "epoch": 98.69253731343284, "grad_norm": 26.254579544067383, "learning_rate": 9.222943722943724e-06, "loss": 42.5968, "step": 4145 }, { "epoch": 98.71641791044776, "grad_norm": 27.665916442871094, "learning_rate": 9.220779220779221e-06, "loss": 41.0831, "step": 4146 }, { "epoch": 98.74029850746268, "grad_norm": 29.594675064086914, "learning_rate": 9.21861471861472e-06, "loss": 42.1963, "step": 4147 }, { "epoch": 98.7641791044776, "grad_norm": 23.506603240966797, "learning_rate": 9.216450216450217e-06, "loss": 41.9209, "step": 4148 }, { "epoch": 98.78805970149254, "grad_norm": 32.939395904541016, "learning_rate": 9.214285714285715e-06, "loss": 42.0637, "step": 4149 }, { "epoch": 98.81194029850747, "grad_norm": 27.35706901550293, "learning_rate": 9.212121212121213e-06, "loss": 42.4936, "step": 4150 }, { "epoch": 98.83582089552239, "grad_norm": 31.6049861907959, "learning_rate": 9.20995670995671e-06, "loss": 43.5351, "step": 4151 }, { "epoch": 98.85970149253731, "grad_norm": 26.57269287109375, "learning_rate": 9.207792207792208e-06, "loss": 42.2598, "step": 4152 }, { "epoch": 98.88358208955223, "grad_norm": 30.60957908630371, "learning_rate": 9.205627705627706e-06, "loss": 42.3751, "step": 4153 }, { "epoch": 98.90746268656716, "grad_norm": 28.574939727783203, "learning_rate": 9.203463203463204e-06, "loss": 41.8665, "step": 4154 }, { "epoch": 98.9313432835821, "grad_norm": 24.66292953491211, "learning_rate": 9.201298701298702e-06, "loss": 42.2066, "step": 4155 }, { "epoch": 98.95522388059702, "grad_norm": 23.727333068847656, "learning_rate": 9.1991341991342e-06, "loss": 41.3947, "step": 4156 }, { "epoch": 98.97910447761194, "grad_norm": 27.1662654876709, "learning_rate": 9.196969696969697e-06, "loss": 42.752, "step": 4157 }, { "epoch": 99.0, "grad_norm": 19.463891983032227, "learning_rate": 9.194805194805195e-06, "loss": 35.6173, "step": 4158 }, { "epoch": 99.02388059701492, "grad_norm": 31.107654571533203, "learning_rate": 9.192640692640693e-06, "loss": 42.7329, "step": 4159 }, { "epoch": 99.04776119402985, "grad_norm": 26.082523345947266, "learning_rate": 9.19047619047619e-06, "loss": 43.3724, "step": 4160 }, { "epoch": 99.07164179104478, "grad_norm": 23.824567794799805, "learning_rate": 9.188311688311688e-06, "loss": 42.6574, "step": 4161 }, { "epoch": 99.0955223880597, "grad_norm": 23.710350036621094, "learning_rate": 9.186147186147188e-06, "loss": 41.6831, "step": 4162 }, { "epoch": 99.11940298507463, "grad_norm": 28.668537139892578, "learning_rate": 9.183982683982686e-06, "loss": 41.099, "step": 4163 }, { "epoch": 99.14328358208955, "grad_norm": 21.060327529907227, "learning_rate": 9.181818181818184e-06, "loss": 43.0679, "step": 4164 }, { "epoch": 99.16716417910447, "grad_norm": 25.86065673828125, "learning_rate": 9.179653679653681e-06, "loss": 42.248, "step": 4165 }, { "epoch": 99.1910447761194, "grad_norm": 20.043672561645508, "learning_rate": 9.177489177489179e-06, "loss": 41.114, "step": 4166 }, { "epoch": 99.21492537313434, "grad_norm": 25.1352481842041, "learning_rate": 9.175324675324675e-06, "loss": 40.9968, "step": 4167 }, { "epoch": 99.23880597014926, "grad_norm": 20.042200088500977, "learning_rate": 9.173160173160173e-06, "loss": 41.9535, "step": 4168 }, { "epoch": 99.26268656716418, "grad_norm": 27.261369705200195, "learning_rate": 9.17099567099567e-06, "loss": 42.6293, "step": 4169 }, { "epoch": 99.2865671641791, "grad_norm": 23.163576126098633, "learning_rate": 9.168831168831169e-06, "loss": 41.9948, "step": 4170 }, { "epoch": 99.31044776119403, "grad_norm": 27.297080993652344, "learning_rate": 9.166666666666666e-06, "loss": 41.4716, "step": 4171 }, { "epoch": 99.33432835820895, "grad_norm": 22.44979476928711, "learning_rate": 9.164502164502164e-06, "loss": 42.406, "step": 4172 }, { "epoch": 99.35820895522389, "grad_norm": 23.482084274291992, "learning_rate": 9.162337662337664e-06, "loss": 41.5008, "step": 4173 }, { "epoch": 99.38208955223881, "grad_norm": 22.505319595336914, "learning_rate": 9.160173160173162e-06, "loss": 40.9368, "step": 4174 }, { "epoch": 99.40597014925373, "grad_norm": 24.250532150268555, "learning_rate": 9.15800865800866e-06, "loss": 40.7122, "step": 4175 }, { "epoch": 99.42985074626866, "grad_norm": 23.2113037109375, "learning_rate": 9.155844155844157e-06, "loss": 41.7559, "step": 4176 }, { "epoch": 99.45373134328358, "grad_norm": 18.7581787109375, "learning_rate": 9.153679653679655e-06, "loss": 41.661, "step": 4177 }, { "epoch": 99.4776119402985, "grad_norm": 17.8604793548584, "learning_rate": 9.151515151515153e-06, "loss": 41.51, "step": 4178 }, { "epoch": 99.50149253731344, "grad_norm": 16.258312225341797, "learning_rate": 9.14935064935065e-06, "loss": 41.2024, "step": 4179 }, { "epoch": 99.52537313432836, "grad_norm": 16.66613006591797, "learning_rate": 9.147186147186148e-06, "loss": 42.5017, "step": 4180 }, { "epoch": 99.54925373134328, "grad_norm": 15.366393089294434, "learning_rate": 9.145021645021646e-06, "loss": 41.6167, "step": 4181 }, { "epoch": 99.57313432835821, "grad_norm": 23.028663635253906, "learning_rate": 9.142857142857144e-06, "loss": 42.308, "step": 4182 }, { "epoch": 99.59701492537313, "grad_norm": 16.91287612915039, "learning_rate": 9.140692640692642e-06, "loss": 43.1037, "step": 4183 }, { "epoch": 99.62089552238805, "grad_norm": 19.781919479370117, "learning_rate": 9.13852813852814e-06, "loss": 42.3187, "step": 4184 }, { "epoch": 99.64477611940299, "grad_norm": 18.985305786132812, "learning_rate": 9.136363636363637e-06, "loss": 41.971, "step": 4185 }, { "epoch": 99.66865671641791, "grad_norm": 17.393688201904297, "learning_rate": 9.134199134199135e-06, "loss": 41.1467, "step": 4186 }, { "epoch": 99.69253731343284, "grad_norm": 19.685924530029297, "learning_rate": 9.132034632034633e-06, "loss": 41.822, "step": 4187 }, { "epoch": 99.71641791044776, "grad_norm": 19.761327743530273, "learning_rate": 9.12987012987013e-06, "loss": 42.1768, "step": 4188 }, { "epoch": 99.74029850746268, "grad_norm": 16.2159423828125, "learning_rate": 9.127705627705628e-06, "loss": 42.9327, "step": 4189 }, { "epoch": 99.7641791044776, "grad_norm": 21.257530212402344, "learning_rate": 9.125541125541126e-06, "loss": 42.556, "step": 4190 }, { "epoch": 99.78805970149254, "grad_norm": NaN, "learning_rate": 9.123376623376624e-06, "loss": 53.9793, "step": 4191 }, { "epoch": 99.81194029850747, "grad_norm": 19.869991302490234, "learning_rate": 9.123376623376624e-06, "loss": 41.4833, "step": 4192 }, { "epoch": 99.83582089552239, "grad_norm": 17.66855239868164, "learning_rate": 9.121212121212122e-06, "loss": 41.6514, "step": 4193 }, { "epoch": 99.85970149253731, "grad_norm": 19.992225646972656, "learning_rate": 9.11904761904762e-06, "loss": 43.4129, "step": 4194 }, { "epoch": 99.88358208955223, "grad_norm": 23.21436882019043, "learning_rate": 9.116883116883117e-06, "loss": 43.2426, "step": 4195 }, { "epoch": 99.90746268656716, "grad_norm": 18.16109848022461, "learning_rate": 9.114718614718615e-06, "loss": 41.9741, "step": 4196 }, { "epoch": 99.9313432835821, "grad_norm": 22.761810302734375, "learning_rate": 9.112554112554113e-06, "loss": 41.4668, "step": 4197 }, { "epoch": 99.95522388059702, "grad_norm": 21.3942928314209, "learning_rate": 9.110389610389611e-06, "loss": 41.6686, "step": 4198 }, { "epoch": 99.97910447761194, "grad_norm": 17.734172821044922, "learning_rate": 9.108225108225109e-06, "loss": 41.746, "step": 4199 }, { "epoch": 100.0, "grad_norm": 22.795557022094727, "learning_rate": 9.106060606060606e-06, "loss": 37.4113, "step": 4200 }, { "epoch": 100.02388059701492, "grad_norm": 18.693927764892578, "learning_rate": 9.103896103896104e-06, "loss": 41.1692, "step": 4201 }, { "epoch": 100.04776119402985, "grad_norm": 15.947311401367188, "learning_rate": 9.101731601731602e-06, "loss": 43.5011, "step": 4202 }, { "epoch": 100.07164179104478, "grad_norm": 24.349090576171875, "learning_rate": 9.0995670995671e-06, "loss": 41.954, "step": 4203 }, { "epoch": 100.0955223880597, "grad_norm": 18.305612564086914, "learning_rate": 9.097402597402598e-06, "loss": 41.7676, "step": 4204 }, { "epoch": 100.11940298507463, "grad_norm": 29.68235206604004, "learning_rate": 9.095238095238095e-06, "loss": 40.8579, "step": 4205 }, { "epoch": 100.14328358208955, "grad_norm": 24.512508392333984, "learning_rate": 9.093073593073593e-06, "loss": 40.7238, "step": 4206 }, { "epoch": 100.16716417910447, "grad_norm": 24.545705795288086, "learning_rate": 9.090909090909091e-06, "loss": 42.7197, "step": 4207 }, { "epoch": 100.1910447761194, "grad_norm": 18.792917251586914, "learning_rate": 9.088744588744589e-06, "loss": 40.8385, "step": 4208 }, { "epoch": 100.21492537313434, "grad_norm": 21.766145706176758, "learning_rate": 9.086580086580087e-06, "loss": 41.3234, "step": 4209 }, { "epoch": 100.23880597014926, "grad_norm": 17.32309341430664, "learning_rate": 9.084415584415586e-06, "loss": 40.6989, "step": 4210 }, { "epoch": 100.26268656716418, "grad_norm": 17.80112648010254, "learning_rate": 9.082251082251084e-06, "loss": 41.0043, "step": 4211 }, { "epoch": 100.2865671641791, "grad_norm": 15.762267112731934, "learning_rate": 9.080086580086582e-06, "loss": 42.5453, "step": 4212 }, { "epoch": 100.31044776119403, "grad_norm": 15.99219036102295, "learning_rate": 9.07792207792208e-06, "loss": 41.9223, "step": 4213 }, { "epoch": 100.33432835820895, "grad_norm": 21.16149139404297, "learning_rate": 9.075757575757577e-06, "loss": 41.7332, "step": 4214 }, { "epoch": 100.35820895522389, "grad_norm": 16.26340675354004, "learning_rate": 9.073593073593075e-06, "loss": 41.9333, "step": 4215 }, { "epoch": 100.38208955223881, "grad_norm": 22.789945602416992, "learning_rate": 9.071428571428573e-06, "loss": 41.5922, "step": 4216 }, { "epoch": 100.40597014925373, "grad_norm": 20.777421951293945, "learning_rate": 9.06926406926407e-06, "loss": 42.4934, "step": 4217 }, { "epoch": 100.42985074626866, "grad_norm": 20.417619705200195, "learning_rate": 9.067099567099567e-06, "loss": 42.0611, "step": 4218 }, { "epoch": 100.45373134328358, "grad_norm": 17.323135375976562, "learning_rate": 9.064935064935065e-06, "loss": 41.4595, "step": 4219 }, { "epoch": 100.4776119402985, "grad_norm": 17.62958335876465, "learning_rate": 9.062770562770562e-06, "loss": 42.1578, "step": 4220 }, { "epoch": 100.50149253731344, "grad_norm": 19.73848533630371, "learning_rate": 9.06060606060606e-06, "loss": 40.6611, "step": 4221 }, { "epoch": 100.52537313432836, "grad_norm": 15.945398330688477, "learning_rate": 9.05844155844156e-06, "loss": 41.9703, "step": 4222 }, { "epoch": 100.54925373134328, "grad_norm": 31.24019432067871, "learning_rate": 9.056277056277057e-06, "loss": 42.1433, "step": 4223 }, { "epoch": 100.57313432835821, "grad_norm": 21.933677673339844, "learning_rate": 9.054112554112555e-06, "loss": 41.873, "step": 4224 }, { "epoch": 100.59701492537313, "grad_norm": 31.41733741760254, "learning_rate": 9.051948051948053e-06, "loss": 42.7139, "step": 4225 }, { "epoch": 100.62089552238805, "grad_norm": 21.998600006103516, "learning_rate": 9.049783549783551e-06, "loss": 42.7483, "step": 4226 }, { "epoch": 100.64477611940299, "grad_norm": 34.37179183959961, "learning_rate": 9.047619047619049e-06, "loss": 41.3319, "step": 4227 }, { "epoch": 100.66865671641791, "grad_norm": 27.14617156982422, "learning_rate": 9.045454545454546e-06, "loss": 42.022, "step": 4228 }, { "epoch": 100.69253731343284, "grad_norm": 37.454708099365234, "learning_rate": 9.043290043290044e-06, "loss": 41.9875, "step": 4229 }, { "epoch": 100.71641791044776, "grad_norm": 32.32929229736328, "learning_rate": 9.041125541125542e-06, "loss": 43.1461, "step": 4230 }, { "epoch": 100.74029850746268, "grad_norm": 33.369842529296875, "learning_rate": 9.03896103896104e-06, "loss": 42.1309, "step": 4231 }, { "epoch": 100.7641791044776, "grad_norm": 26.55228042602539, "learning_rate": 9.036796536796538e-06, "loss": 42.6242, "step": 4232 }, { "epoch": 100.78805970149254, "grad_norm": 30.329452514648438, "learning_rate": 9.034632034632035e-06, "loss": 41.174, "step": 4233 }, { "epoch": 100.81194029850747, "grad_norm": 32.0432014465332, "learning_rate": 9.032467532467533e-06, "loss": 43.1256, "step": 4234 }, { "epoch": 100.83582089552239, "grad_norm": 29.122236251831055, "learning_rate": 9.030303030303031e-06, "loss": 41.3778, "step": 4235 }, { "epoch": 100.85970149253731, "grad_norm": 24.6899471282959, "learning_rate": 9.028138528138529e-06, "loss": 42.2167, "step": 4236 }, { "epoch": 100.88358208955223, "grad_norm": 31.051576614379883, "learning_rate": 9.025974025974027e-06, "loss": 42.5137, "step": 4237 }, { "epoch": 100.90746268656716, "grad_norm": 27.56793785095215, "learning_rate": 9.023809523809524e-06, "loss": 42.2763, "step": 4238 }, { "epoch": 100.9313432835821, "grad_norm": 35.045108795166016, "learning_rate": 9.021645021645022e-06, "loss": 43.3116, "step": 4239 }, { "epoch": 100.95522388059702, "grad_norm": 28.35376739501953, "learning_rate": 9.01948051948052e-06, "loss": 42.4737, "step": 4240 }, { "epoch": 100.97910447761194, "grad_norm": 29.537580490112305, "learning_rate": 9.017316017316018e-06, "loss": 42.2073, "step": 4241 }, { "epoch": 101.0, "grad_norm": 24.736759185791016, "learning_rate": 9.015151515151516e-06, "loss": 37.5375, "step": 4242 }, { "epoch": 101.02388059701492, "grad_norm": 27.93048667907715, "learning_rate": 9.012987012987013e-06, "loss": 42.1642, "step": 4243 }, { "epoch": 101.04776119402985, "grad_norm": 24.460664749145508, "learning_rate": 9.010822510822511e-06, "loss": 42.1769, "step": 4244 }, { "epoch": 101.07164179104478, "grad_norm": 22.52399253845215, "learning_rate": 9.008658008658009e-06, "loss": 41.99, "step": 4245 }, { "epoch": 101.0955223880597, "grad_norm": 19.33254623413086, "learning_rate": 9.006493506493509e-06, "loss": 40.422, "step": 4246 }, { "epoch": 101.11940298507463, "grad_norm": 22.645910263061523, "learning_rate": 9.004329004329005e-06, "loss": 42.8041, "step": 4247 }, { "epoch": 101.14328358208955, "grad_norm": 20.89433479309082, "learning_rate": 9.002164502164502e-06, "loss": 43.0258, "step": 4248 }, { "epoch": 101.16716417910447, "grad_norm": 19.612567901611328, "learning_rate": 9e-06, "loss": 41.4478, "step": 4249 }, { "epoch": 101.1910447761194, "grad_norm": 19.565265655517578, "learning_rate": 8.997835497835498e-06, "loss": 42.6328, "step": 4250 }, { "epoch": 101.21492537313434, "grad_norm": 20.93030548095703, "learning_rate": 8.995670995670996e-06, "loss": 42.7268, "step": 4251 }, { "epoch": 101.23880597014926, "grad_norm": 18.67580795288086, "learning_rate": 8.993506493506494e-06, "loss": 43.3658, "step": 4252 }, { "epoch": 101.26268656716418, "grad_norm": 26.36067008972168, "learning_rate": 8.991341991341991e-06, "loss": 42.2089, "step": 4253 }, { "epoch": 101.2865671641791, "grad_norm": 19.841224670410156, "learning_rate": 8.98917748917749e-06, "loss": 40.842, "step": 4254 }, { "epoch": 101.31044776119403, "grad_norm": 26.14617156982422, "learning_rate": 8.987012987012987e-06, "loss": 43.1673, "step": 4255 }, { "epoch": 101.33432835820895, "grad_norm": 21.286962509155273, "learning_rate": 8.984848484848485e-06, "loss": 42.0463, "step": 4256 }, { "epoch": 101.35820895522389, "grad_norm": 26.335676193237305, "learning_rate": 8.982683982683983e-06, "loss": 41.8856, "step": 4257 }, { "epoch": 101.38208955223881, "grad_norm": 23.881567001342773, "learning_rate": 8.980519480519482e-06, "loss": 41.6253, "step": 4258 }, { "epoch": 101.40597014925373, "grad_norm": 21.65298843383789, "learning_rate": 8.97835497835498e-06, "loss": 42.0994, "step": 4259 }, { "epoch": 101.42985074626866, "grad_norm": 27.039722442626953, "learning_rate": 8.976190476190478e-06, "loss": 41.8836, "step": 4260 }, { "epoch": 101.45373134328358, "grad_norm": 20.1751766204834, "learning_rate": 8.974025974025975e-06, "loss": 41.1007, "step": 4261 }, { "epoch": 101.4776119402985, "grad_norm": 31.58852767944336, "learning_rate": 8.971861471861473e-06, "loss": 41.9793, "step": 4262 }, { "epoch": 101.50149253731344, "grad_norm": 21.907556533813477, "learning_rate": 8.969696969696971e-06, "loss": 41.509, "step": 4263 }, { "epoch": 101.52537313432836, "grad_norm": 32.310272216796875, "learning_rate": 8.967532467532469e-06, "loss": 41.1805, "step": 4264 }, { "epoch": 101.54925373134328, "grad_norm": 25.363170623779297, "learning_rate": 8.965367965367967e-06, "loss": 42.3668, "step": 4265 }, { "epoch": 101.57313432835821, "grad_norm": 29.320520401000977, "learning_rate": 8.963203463203464e-06, "loss": 41.7248, "step": 4266 }, { "epoch": 101.59701492537313, "grad_norm": 24.637983322143555, "learning_rate": 8.96103896103896e-06, "loss": 40.1595, "step": 4267 }, { "epoch": 101.62089552238805, "grad_norm": 32.69458770751953, "learning_rate": 8.958874458874458e-06, "loss": 41.6096, "step": 4268 }, { "epoch": 101.64477611940299, "grad_norm": 24.87364959716797, "learning_rate": 8.956709956709956e-06, "loss": 41.3295, "step": 4269 }, { "epoch": 101.66865671641791, "grad_norm": 31.5223445892334, "learning_rate": 8.954545454545456e-06, "loss": 42.1731, "step": 4270 }, { "epoch": 101.69253731343284, "grad_norm": 29.047664642333984, "learning_rate": 8.952380952380953e-06, "loss": 41.8301, "step": 4271 }, { "epoch": 101.71641791044776, "grad_norm": 31.420434951782227, "learning_rate": 8.950216450216451e-06, "loss": 41.6502, "step": 4272 }, { "epoch": 101.74029850746268, "grad_norm": 28.40896224975586, "learning_rate": 8.948051948051949e-06, "loss": 41.7585, "step": 4273 }, { "epoch": 101.7641791044776, "grad_norm": 32.256263732910156, "learning_rate": 8.945887445887447e-06, "loss": 41.8508, "step": 4274 }, { "epoch": 101.78805970149254, "grad_norm": 30.496904373168945, "learning_rate": 8.943722943722945e-06, "loss": 41.7192, "step": 4275 }, { "epoch": 101.81194029850747, "grad_norm": 31.20074462890625, "learning_rate": 8.941558441558442e-06, "loss": 42.687, "step": 4276 }, { "epoch": 101.83582089552239, "grad_norm": 27.639835357666016, "learning_rate": 8.93939393939394e-06, "loss": 41.7068, "step": 4277 }, { "epoch": 101.85970149253731, "grad_norm": 31.692638397216797, "learning_rate": 8.937229437229438e-06, "loss": 42.8243, "step": 4278 }, { "epoch": 101.88358208955223, "grad_norm": 28.27922248840332, "learning_rate": 8.935064935064936e-06, "loss": 41.8772, "step": 4279 }, { "epoch": 101.90746268656716, "grad_norm": 28.70676040649414, "learning_rate": 8.932900432900434e-06, "loss": 41.682, "step": 4280 }, { "epoch": 101.9313432835821, "grad_norm": 27.140151977539062, "learning_rate": 8.930735930735931e-06, "loss": 42.078, "step": 4281 }, { "epoch": 101.95522388059702, "grad_norm": 25.135448455810547, "learning_rate": 8.92857142857143e-06, "loss": 42.2035, "step": 4282 }, { "epoch": 101.97910447761194, "grad_norm": 22.988903045654297, "learning_rate": 8.926406926406927e-06, "loss": 41.2573, "step": 4283 }, { "epoch": 102.0, "grad_norm": 25.694786071777344, "learning_rate": 8.924242424242425e-06, "loss": 36.1049, "step": 4284 }, { "epoch": 102.02388059701492, "grad_norm": 24.528118133544922, "learning_rate": 8.922077922077923e-06, "loss": 42.162, "step": 4285 }, { "epoch": 102.04776119402985, "grad_norm": 27.563627243041992, "learning_rate": 8.91991341991342e-06, "loss": 41.7018, "step": 4286 }, { "epoch": 102.07164179104478, "grad_norm": 23.374286651611328, "learning_rate": 8.917748917748918e-06, "loss": 42.4075, "step": 4287 }, { "epoch": 102.0955223880597, "grad_norm": 28.673614501953125, "learning_rate": 8.915584415584416e-06, "loss": 41.8272, "step": 4288 }, { "epoch": 102.11940298507463, "grad_norm": 24.432859420776367, "learning_rate": 8.913419913419914e-06, "loss": 41.7054, "step": 4289 }, { "epoch": 102.14328358208955, "grad_norm": 26.83321189880371, "learning_rate": 8.911255411255412e-06, "loss": 42.2169, "step": 4290 }, { "epoch": 102.16716417910447, "grad_norm": 21.222537994384766, "learning_rate": 8.90909090909091e-06, "loss": 42.164, "step": 4291 }, { "epoch": 102.1910447761194, "grad_norm": 32.05888748168945, "learning_rate": 8.906926406926407e-06, "loss": 42.0759, "step": 4292 }, { "epoch": 102.21492537313434, "grad_norm": 22.959369659423828, "learning_rate": 8.904761904761905e-06, "loss": 43.0785, "step": 4293 }, { "epoch": 102.23880597014926, "grad_norm": 37.53632736206055, "learning_rate": 8.902597402597405e-06, "loss": 42.1665, "step": 4294 }, { "epoch": 102.26268656716418, "grad_norm": 29.86913299560547, "learning_rate": 8.900432900432902e-06, "loss": 41.3932, "step": 4295 }, { "epoch": 102.2865671641791, "grad_norm": 31.11789894104004, "learning_rate": 8.8982683982684e-06, "loss": 43.0771, "step": 4296 }, { "epoch": 102.31044776119403, "grad_norm": 27.745323181152344, "learning_rate": 8.896103896103896e-06, "loss": 41.1395, "step": 4297 }, { "epoch": 102.33432835820895, "grad_norm": 25.368127822875977, "learning_rate": 8.893939393939394e-06, "loss": 42.7978, "step": 4298 }, { "epoch": 102.35820895522389, "grad_norm": 24.081409454345703, "learning_rate": 8.891774891774892e-06, "loss": 41.4698, "step": 4299 }, { "epoch": 102.38208955223881, "grad_norm": 24.39154815673828, "learning_rate": 8.88961038961039e-06, "loss": 41.6765, "step": 4300 }, { "epoch": 102.40597014925373, "grad_norm": 21.794816970825195, "learning_rate": 8.887445887445887e-06, "loss": 40.9793, "step": 4301 }, { "epoch": 102.42985074626866, "grad_norm": 24.50321388244629, "learning_rate": 8.885281385281385e-06, "loss": 41.3914, "step": 4302 }, { "epoch": 102.45373134328358, "grad_norm": 21.492965698242188, "learning_rate": 8.883116883116883e-06, "loss": 42.1772, "step": 4303 }, { "epoch": 102.4776119402985, "grad_norm": 25.231094360351562, "learning_rate": 8.88095238095238e-06, "loss": 41.6758, "step": 4304 }, { "epoch": 102.50149253731344, "grad_norm": 21.51530647277832, "learning_rate": 8.87878787878788e-06, "loss": 41.0819, "step": 4305 }, { "epoch": 102.52537313432836, "grad_norm": 21.023269653320312, "learning_rate": 8.876623376623378e-06, "loss": 41.446, "step": 4306 }, { "epoch": 102.54925373134328, "grad_norm": 25.81951904296875, "learning_rate": 8.874458874458876e-06, "loss": 41.3221, "step": 4307 }, { "epoch": 102.57313432835821, "grad_norm": 19.7045841217041, "learning_rate": 8.872294372294374e-06, "loss": 42.5273, "step": 4308 }, { "epoch": 102.59701492537313, "grad_norm": 30.536680221557617, "learning_rate": 8.870129870129871e-06, "loss": 40.7574, "step": 4309 }, { "epoch": 102.62089552238805, "grad_norm": 22.61910629272461, "learning_rate": 8.86796536796537e-06, "loss": 42.1551, "step": 4310 }, { "epoch": 102.64477611940299, "grad_norm": 31.215150833129883, "learning_rate": 8.865800865800867e-06, "loss": 42.3013, "step": 4311 }, { "epoch": 102.66865671641791, "grad_norm": 29.22039794921875, "learning_rate": 8.863636363636365e-06, "loss": 42.3447, "step": 4312 }, { "epoch": 102.69253731343284, "grad_norm": 31.03571128845215, "learning_rate": 8.861471861471863e-06, "loss": 41.9643, "step": 4313 }, { "epoch": 102.71641791044776, "grad_norm": 26.90915298461914, "learning_rate": 8.85930735930736e-06, "loss": 42.8879, "step": 4314 }, { "epoch": 102.74029850746268, "grad_norm": 31.34430503845215, "learning_rate": 8.857142857142858e-06, "loss": 41.6856, "step": 4315 }, { "epoch": 102.7641791044776, "grad_norm": 26.868675231933594, "learning_rate": 8.854978354978356e-06, "loss": 41.1538, "step": 4316 }, { "epoch": 102.78805970149254, "grad_norm": 26.82084846496582, "learning_rate": 8.852813852813854e-06, "loss": 42.6873, "step": 4317 }, { "epoch": 102.81194029850747, "grad_norm": 24.742094039916992, "learning_rate": 8.850649350649352e-06, "loss": 43.168, "step": 4318 }, { "epoch": 102.83582089552239, "grad_norm": 23.871686935424805, "learning_rate": 8.84848484848485e-06, "loss": 42.0424, "step": 4319 }, { "epoch": 102.85970149253731, "grad_norm": 21.681507110595703, "learning_rate": 8.846320346320347e-06, "loss": 42.1547, "step": 4320 }, { "epoch": 102.88358208955223, "grad_norm": 28.63477325439453, "learning_rate": 8.844155844155845e-06, "loss": 41.2654, "step": 4321 }, { "epoch": 102.90746268656716, "grad_norm": 19.495147705078125, "learning_rate": 8.841991341991343e-06, "loss": 41.5641, "step": 4322 }, { "epoch": 102.9313432835821, "grad_norm": 33.34874725341797, "learning_rate": 8.83982683982684e-06, "loss": 41.7787, "step": 4323 }, { "epoch": 102.95522388059702, "grad_norm": 27.586767196655273, "learning_rate": 8.837662337662338e-06, "loss": 40.4204, "step": 4324 }, { "epoch": 102.97910447761194, "grad_norm": 28.708871841430664, "learning_rate": 8.835497835497836e-06, "loss": 41.7225, "step": 4325 }, { "epoch": 103.0, "grad_norm": 22.439306259155273, "learning_rate": 8.833333333333334e-06, "loss": 35.3291, "step": 4326 }, { "epoch": 103.02388059701492, "grad_norm": 25.760793685913086, "learning_rate": 8.831168831168832e-06, "loss": 42.0465, "step": 4327 }, { "epoch": 103.04776119402985, "grad_norm": 22.56456756591797, "learning_rate": 8.82900432900433e-06, "loss": 41.6094, "step": 4328 }, { "epoch": 103.07164179104478, "grad_norm": 30.912078857421875, "learning_rate": 8.826839826839827e-06, "loss": 43.0196, "step": 4329 }, { "epoch": 103.0955223880597, "grad_norm": 23.01909065246582, "learning_rate": 8.824675324675325e-06, "loss": 42.7305, "step": 4330 }, { "epoch": 103.11940298507463, "grad_norm": 29.197927474975586, "learning_rate": 8.822510822510823e-06, "loss": 41.0641, "step": 4331 }, { "epoch": 103.14328358208955, "grad_norm": 27.894495010375977, "learning_rate": 8.82034632034632e-06, "loss": 40.9656, "step": 4332 }, { "epoch": 103.16716417910447, "grad_norm": 27.135541915893555, "learning_rate": 8.818181818181819e-06, "loss": 41.7715, "step": 4333 }, { "epoch": 103.1910447761194, "grad_norm": 24.774351119995117, "learning_rate": 8.816017316017316e-06, "loss": 40.5809, "step": 4334 }, { "epoch": 103.21492537313434, "grad_norm": 27.74059295654297, "learning_rate": 8.813852813852814e-06, "loss": 40.9501, "step": 4335 }, { "epoch": 103.23880597014926, "grad_norm": 24.502626419067383, "learning_rate": 8.811688311688312e-06, "loss": 41.3341, "step": 4336 }, { "epoch": 103.26268656716418, "grad_norm": 29.406909942626953, "learning_rate": 8.80952380952381e-06, "loss": 42.8791, "step": 4337 }, { "epoch": 103.2865671641791, "grad_norm": 24.162965774536133, "learning_rate": 8.807359307359308e-06, "loss": 41.3314, "step": 4338 }, { "epoch": 103.31044776119403, "grad_norm": 27.782527923583984, "learning_rate": 8.805194805194805e-06, "loss": 41.0151, "step": 4339 }, { "epoch": 103.33432835820895, "grad_norm": 25.89789390563965, "learning_rate": 8.803030303030303e-06, "loss": 41.0182, "step": 4340 }, { "epoch": 103.35820895522389, "grad_norm": 31.413692474365234, "learning_rate": 8.800865800865803e-06, "loss": 41.1635, "step": 4341 }, { "epoch": 103.38208955223881, "grad_norm": 23.838945388793945, "learning_rate": 8.7987012987013e-06, "loss": 42.2695, "step": 4342 }, { "epoch": 103.40597014925373, "grad_norm": 27.55811309814453, "learning_rate": 8.796536796536798e-06, "loss": 42.6491, "step": 4343 }, { "epoch": 103.42985074626866, "grad_norm": 24.99410629272461, "learning_rate": 8.794372294372296e-06, "loss": 40.49, "step": 4344 }, { "epoch": 103.45373134328358, "grad_norm": 32.69471740722656, "learning_rate": 8.792207792207794e-06, "loss": 41.0334, "step": 4345 }, { "epoch": 103.4776119402985, "grad_norm": 25.661212921142578, "learning_rate": 8.79004329004329e-06, "loss": 42.0111, "step": 4346 }, { "epoch": 103.50149253731344, "grad_norm": 32.33528518676758, "learning_rate": 8.787878787878788e-06, "loss": 41.451, "step": 4347 }, { "epoch": 103.52537313432836, "grad_norm": 30.863183975219727, "learning_rate": 8.785714285714286e-06, "loss": 43.0101, "step": 4348 }, { "epoch": 103.54925373134328, "grad_norm": 27.80331802368164, "learning_rate": 8.783549783549783e-06, "loss": 41.8201, "step": 4349 }, { "epoch": 103.57313432835821, "grad_norm": 25.65656089782715, "learning_rate": 8.781385281385281e-06, "loss": 42.3188, "step": 4350 }, { "epoch": 103.59701492537313, "grad_norm": 27.477493286132812, "learning_rate": 8.779220779220779e-06, "loss": 42.4443, "step": 4351 }, { "epoch": 103.62089552238805, "grad_norm": 19.195556640625, "learning_rate": 8.777056277056277e-06, "loss": 41.6902, "step": 4352 }, { "epoch": 103.64477611940299, "grad_norm": 31.54138946533203, "learning_rate": 8.774891774891776e-06, "loss": 41.7891, "step": 4353 }, { "epoch": 103.66865671641791, "grad_norm": 24.392765045166016, "learning_rate": 8.772727272727274e-06, "loss": 43.201, "step": 4354 }, { "epoch": 103.69253731343284, "grad_norm": 31.868196487426758, "learning_rate": 8.770562770562772e-06, "loss": 42.0864, "step": 4355 }, { "epoch": 103.71641791044776, "grad_norm": 28.33005142211914, "learning_rate": 8.76839826839827e-06, "loss": 40.8061, "step": 4356 }, { "epoch": 103.74029850746268, "grad_norm": 29.663543701171875, "learning_rate": 8.766233766233767e-06, "loss": 41.195, "step": 4357 }, { "epoch": 103.7641791044776, "grad_norm": 24.99871826171875, "learning_rate": 8.764069264069265e-06, "loss": 42.0865, "step": 4358 }, { "epoch": 103.78805970149254, "grad_norm": 26.281768798828125, "learning_rate": 8.761904761904763e-06, "loss": 42.2214, "step": 4359 }, { "epoch": 103.81194029850747, "grad_norm": 25.848814010620117, "learning_rate": 8.75974025974026e-06, "loss": 41.752, "step": 4360 }, { "epoch": 103.83582089552239, "grad_norm": 25.99828338623047, "learning_rate": 8.757575757575759e-06, "loss": 41.3675, "step": 4361 }, { "epoch": 103.85970149253731, "grad_norm": 24.577255249023438, "learning_rate": 8.755411255411256e-06, "loss": 41.3633, "step": 4362 }, { "epoch": 103.88358208955223, "grad_norm": 28.189889907836914, "learning_rate": 8.753246753246754e-06, "loss": 41.1397, "step": 4363 }, { "epoch": 103.90746268656716, "grad_norm": 21.285263061523438, "learning_rate": 8.751082251082252e-06, "loss": 42.9034, "step": 4364 }, { "epoch": 103.9313432835821, "grad_norm": 26.459442138671875, "learning_rate": 8.74891774891775e-06, "loss": 42.1868, "step": 4365 }, { "epoch": 103.95522388059702, "grad_norm": 23.833219528198242, "learning_rate": 8.746753246753248e-06, "loss": 43.8222, "step": 4366 }, { "epoch": 103.97910447761194, "grad_norm": 28.269039154052734, "learning_rate": 8.744588744588745e-06, "loss": 41.7916, "step": 4367 }, { "epoch": 104.0, "grad_norm": 21.251577377319336, "learning_rate": 8.742424242424243e-06, "loss": 36.7322, "step": 4368 }, { "epoch": 104.02388059701492, "grad_norm": 24.385892868041992, "learning_rate": 8.740259740259741e-06, "loss": 41.1758, "step": 4369 }, { "epoch": 104.04776119402985, "grad_norm": 24.85951805114746, "learning_rate": 8.738095238095239e-06, "loss": 41.1797, "step": 4370 }, { "epoch": 104.07164179104478, "grad_norm": 22.94902229309082, "learning_rate": 8.735930735930737e-06, "loss": 42.0245, "step": 4371 }, { "epoch": 104.0955223880597, "grad_norm": 22.89316749572754, "learning_rate": 8.733766233766234e-06, "loss": 40.8802, "step": 4372 }, { "epoch": 104.11940298507463, "grad_norm": 17.931550979614258, "learning_rate": 8.731601731601732e-06, "loss": 41.7585, "step": 4373 }, { "epoch": 104.14328358208955, "grad_norm": 25.272066116333008, "learning_rate": 8.72943722943723e-06, "loss": 41.9595, "step": 4374 }, { "epoch": 104.16716417910447, "grad_norm": 18.83379364013672, "learning_rate": 8.727272727272728e-06, "loss": 42.0377, "step": 4375 }, { "epoch": 104.1910447761194, "grad_norm": 26.816553115844727, "learning_rate": 8.725108225108226e-06, "loss": 42.3945, "step": 4376 }, { "epoch": 104.21492537313434, "grad_norm": 21.217594146728516, "learning_rate": 8.722943722943723e-06, "loss": 41.0879, "step": 4377 }, { "epoch": 104.23880597014926, "grad_norm": 26.040369033813477, "learning_rate": 8.720779220779221e-06, "loss": 41.9009, "step": 4378 }, { "epoch": 104.26268656716418, "grad_norm": 21.120927810668945, "learning_rate": 8.718614718614719e-06, "loss": 41.5876, "step": 4379 }, { "epoch": 104.2865671641791, "grad_norm": 24.789485931396484, "learning_rate": 8.716450216450217e-06, "loss": 42.4683, "step": 4380 }, { "epoch": 104.31044776119403, "grad_norm": 20.2288761138916, "learning_rate": 8.714285714285715e-06, "loss": 41.7415, "step": 4381 }, { "epoch": 104.33432835820895, "grad_norm": 23.13172721862793, "learning_rate": 8.712121212121212e-06, "loss": 41.8696, "step": 4382 }, { "epoch": 104.35820895522389, "grad_norm": 21.838037490844727, "learning_rate": 8.70995670995671e-06, "loss": 43.1081, "step": 4383 }, { "epoch": 104.38208955223881, "grad_norm": 18.31660270690918, "learning_rate": 8.707792207792208e-06, "loss": 41.6609, "step": 4384 }, { "epoch": 104.40597014925373, "grad_norm": 20.596466064453125, "learning_rate": 8.705627705627706e-06, "loss": 41.9226, "step": 4385 }, { "epoch": 104.42985074626866, "grad_norm": 19.209354400634766, "learning_rate": 8.703463203463204e-06, "loss": 41.8937, "step": 4386 }, { "epoch": 104.45373134328358, "grad_norm": 21.35397720336914, "learning_rate": 8.701298701298701e-06, "loss": 41.222, "step": 4387 }, { "epoch": 104.4776119402985, "grad_norm": 16.040178298950195, "learning_rate": 8.6991341991342e-06, "loss": 40.8326, "step": 4388 }, { "epoch": 104.50149253731344, "grad_norm": 26.846803665161133, "learning_rate": 8.696969696969699e-06, "loss": 42.0748, "step": 4389 }, { "epoch": 104.52537313432836, "grad_norm": 19.368515014648438, "learning_rate": 8.694805194805196e-06, "loss": 41.4322, "step": 4390 }, { "epoch": 104.54925373134328, "grad_norm": 30.950580596923828, "learning_rate": 8.692640692640694e-06, "loss": 41.2695, "step": 4391 }, { "epoch": 104.57313432835821, "grad_norm": 23.07410430908203, "learning_rate": 8.690476190476192e-06, "loss": 41.8303, "step": 4392 }, { "epoch": 104.59701492537313, "grad_norm": 27.158117294311523, "learning_rate": 8.68831168831169e-06, "loss": 42.4952, "step": 4393 }, { "epoch": 104.62089552238805, "grad_norm": 25.001056671142578, "learning_rate": 8.686147186147188e-06, "loss": 41.4797, "step": 4394 }, { "epoch": 104.64477611940299, "grad_norm": 27.168846130371094, "learning_rate": 8.683982683982685e-06, "loss": 41.8096, "step": 4395 }, { "epoch": 104.66865671641791, "grad_norm": 21.596757888793945, "learning_rate": 8.681818181818182e-06, "loss": 41.9243, "step": 4396 }, { "epoch": 104.69253731343284, "grad_norm": 27.944332122802734, "learning_rate": 8.67965367965368e-06, "loss": 42.5102, "step": 4397 }, { "epoch": 104.71641791044776, "grad_norm": 24.295595169067383, "learning_rate": 8.677489177489177e-06, "loss": 42.5514, "step": 4398 }, { "epoch": 104.74029850746268, "grad_norm": 27.505474090576172, "learning_rate": 8.675324675324675e-06, "loss": 42.017, "step": 4399 }, { "epoch": 104.7641791044776, "grad_norm": 24.030363082885742, "learning_rate": 8.673160173160173e-06, "loss": 42.5318, "step": 4400 }, { "epoch": 104.78805970149254, "grad_norm": 26.74481964111328, "learning_rate": 8.670995670995672e-06, "loss": 42.4153, "step": 4401 }, { "epoch": 104.81194029850747, "grad_norm": 25.275205612182617, "learning_rate": 8.66883116883117e-06, "loss": 40.9114, "step": 4402 }, { "epoch": 104.83582089552239, "grad_norm": 19.21797752380371, "learning_rate": 8.666666666666668e-06, "loss": 41.4621, "step": 4403 }, { "epoch": 104.85970149253731, "grad_norm": 21.647167205810547, "learning_rate": 8.664502164502166e-06, "loss": 42.0579, "step": 4404 }, { "epoch": 104.88358208955223, "grad_norm": 18.133159637451172, "learning_rate": 8.662337662337663e-06, "loss": 41.3995, "step": 4405 }, { "epoch": 104.90746268656716, "grad_norm": 17.7130069732666, "learning_rate": 8.660173160173161e-06, "loss": 42.2021, "step": 4406 }, { "epoch": 104.9313432835821, "grad_norm": 17.646291732788086, "learning_rate": 8.658008658008659e-06, "loss": 41.4231, "step": 4407 }, { "epoch": 104.95522388059702, "grad_norm": 20.67991065979004, "learning_rate": 8.655844155844157e-06, "loss": 40.8638, "step": 4408 }, { "epoch": 104.97910447761194, "grad_norm": 19.140832901000977, "learning_rate": 8.653679653679655e-06, "loss": 42.5387, "step": 4409 }, { "epoch": 105.0, "grad_norm": 13.847710609436035, "learning_rate": 8.651515151515152e-06, "loss": 35.4038, "step": 4410 }, { "epoch": 105.02388059701492, "grad_norm": 16.923620223999023, "learning_rate": 8.64935064935065e-06, "loss": 43.0403, "step": 4411 }, { "epoch": 105.04776119402985, "grad_norm": 17.983060836791992, "learning_rate": 8.647186147186148e-06, "loss": 42.2899, "step": 4412 }, { "epoch": 105.07164179104478, "grad_norm": 16.440452575683594, "learning_rate": 8.645021645021646e-06, "loss": 41.3221, "step": 4413 }, { "epoch": 105.0955223880597, "grad_norm": 20.931194305419922, "learning_rate": 8.642857142857144e-06, "loss": 41.6118, "step": 4414 }, { "epoch": 105.11940298507463, "grad_norm": 16.63971710205078, "learning_rate": 8.640692640692641e-06, "loss": 41.56, "step": 4415 }, { "epoch": 105.14328358208955, "grad_norm": 19.395835876464844, "learning_rate": 8.63852813852814e-06, "loss": 40.3512, "step": 4416 }, { "epoch": 105.16716417910447, "grad_norm": 23.419681549072266, "learning_rate": 8.636363636363637e-06, "loss": 41.588, "step": 4417 }, { "epoch": 105.1910447761194, "grad_norm": 18.330759048461914, "learning_rate": 8.634199134199135e-06, "loss": 41.5903, "step": 4418 }, { "epoch": 105.21492537313434, "grad_norm": 32.92748260498047, "learning_rate": 8.632034632034633e-06, "loss": 41.7201, "step": 4419 }, { "epoch": 105.23880597014926, "grad_norm": 23.43516731262207, "learning_rate": 8.62987012987013e-06, "loss": 42.0367, "step": 4420 }, { "epoch": 105.26268656716418, "grad_norm": 31.077037811279297, "learning_rate": 8.627705627705628e-06, "loss": 41.8229, "step": 4421 }, { "epoch": 105.2865671641791, "grad_norm": 24.310850143432617, "learning_rate": 8.625541125541126e-06, "loss": 41.869, "step": 4422 }, { "epoch": 105.31044776119403, "grad_norm": 29.064128875732422, "learning_rate": 8.623376623376624e-06, "loss": 41.3312, "step": 4423 }, { "epoch": 105.33432835820895, "grad_norm": 27.2437686920166, "learning_rate": 8.621212121212122e-06, "loss": 41.4347, "step": 4424 }, { "epoch": 105.35820895522389, "grad_norm": 26.48787498474121, "learning_rate": 8.61904761904762e-06, "loss": 41.9868, "step": 4425 }, { "epoch": 105.38208955223881, "grad_norm": 23.06917953491211, "learning_rate": 8.616883116883117e-06, "loss": 40.7182, "step": 4426 }, { "epoch": 105.40597014925373, "grad_norm": 25.888072967529297, "learning_rate": 8.614718614718615e-06, "loss": 43.4227, "step": 4427 }, { "epoch": 105.42985074626866, "grad_norm": 21.196561813354492, "learning_rate": 8.612554112554113e-06, "loss": 42.1541, "step": 4428 }, { "epoch": 105.45373134328358, "grad_norm": 23.897281646728516, "learning_rate": 8.61038961038961e-06, "loss": 42.3009, "step": 4429 }, { "epoch": 105.4776119402985, "grad_norm": 21.39472770690918, "learning_rate": 8.608225108225108e-06, "loss": 41.9873, "step": 4430 }, { "epoch": 105.50149253731344, "grad_norm": 22.932235717773438, "learning_rate": 8.606060606060606e-06, "loss": 42.9503, "step": 4431 }, { "epoch": 105.52537313432836, "grad_norm": 19.643224716186523, "learning_rate": 8.603896103896104e-06, "loss": 41.1197, "step": 4432 }, { "epoch": 105.54925373134328, "grad_norm": 22.474496841430664, "learning_rate": 8.601731601731602e-06, "loss": 41.472, "step": 4433 }, { "epoch": 105.57313432835821, "grad_norm": 18.618505477905273, "learning_rate": 8.5995670995671e-06, "loss": 42.0385, "step": 4434 }, { "epoch": 105.59701492537313, "grad_norm": 22.780241012573242, "learning_rate": 8.597402597402597e-06, "loss": 42.2941, "step": 4435 }, { "epoch": 105.62089552238805, "grad_norm": 18.00736427307129, "learning_rate": 8.595238095238097e-06, "loss": 41.086, "step": 4436 }, { "epoch": 105.64477611940299, "grad_norm": 26.372411727905273, "learning_rate": 8.593073593073595e-06, "loss": 43.5092, "step": 4437 }, { "epoch": 105.66865671641791, "grad_norm": 20.315715789794922, "learning_rate": 8.590909090909092e-06, "loss": 42.6546, "step": 4438 }, { "epoch": 105.69253731343284, "grad_norm": 25.5256404876709, "learning_rate": 8.58874458874459e-06, "loss": 41.5332, "step": 4439 }, { "epoch": 105.71641791044776, "grad_norm": 23.848834991455078, "learning_rate": 8.586580086580088e-06, "loss": 41.1996, "step": 4440 }, { "epoch": 105.74029850746268, "grad_norm": 22.64993667602539, "learning_rate": 8.584415584415586e-06, "loss": 41.0973, "step": 4441 }, { "epoch": 105.7641791044776, "grad_norm": 25.26251792907715, "learning_rate": 8.582251082251084e-06, "loss": 40.5223, "step": 4442 }, { "epoch": 105.78805970149254, "grad_norm": 18.45581817626953, "learning_rate": 8.580086580086581e-06, "loss": 40.3545, "step": 4443 }, { "epoch": 105.81194029850747, "grad_norm": 20.561473846435547, "learning_rate": 8.57792207792208e-06, "loss": 41.3425, "step": 4444 }, { "epoch": 105.83582089552239, "grad_norm": 19.369930267333984, "learning_rate": 8.575757575757575e-06, "loss": 41.9595, "step": 4445 }, { "epoch": 105.85970149253731, "grad_norm": 16.14900779724121, "learning_rate": 8.573593073593073e-06, "loss": 41.1797, "step": 4446 }, { "epoch": 105.88358208955223, "grad_norm": 21.74477195739746, "learning_rate": 8.571428571428571e-06, "loss": 40.7879, "step": 4447 }, { "epoch": 105.90746268656716, "grad_norm": 16.549848556518555, "learning_rate": 8.56926406926407e-06, "loss": 41.5383, "step": 4448 }, { "epoch": 105.9313432835821, "grad_norm": 18.291797637939453, "learning_rate": 8.567099567099568e-06, "loss": 42.818, "step": 4449 }, { "epoch": 105.95522388059702, "grad_norm": 21.389198303222656, "learning_rate": 8.564935064935066e-06, "loss": 41.8181, "step": 4450 }, { "epoch": 105.97910447761194, "grad_norm": 18.4671630859375, "learning_rate": 8.562770562770564e-06, "loss": 40.5471, "step": 4451 }, { "epoch": 106.0, "grad_norm": 12.387261390686035, "learning_rate": 8.560606060606062e-06, "loss": 37.1723, "step": 4452 }, { "epoch": 106.02388059701492, "grad_norm": 25.35000991821289, "learning_rate": 8.55844155844156e-06, "loss": 41.526, "step": 4453 }, { "epoch": 106.04776119402985, "grad_norm": 18.196853637695312, "learning_rate": 8.556277056277057e-06, "loss": 42.4372, "step": 4454 }, { "epoch": 106.07164179104478, "grad_norm": 17.911649703979492, "learning_rate": 8.554112554112555e-06, "loss": 41.6807, "step": 4455 }, { "epoch": 106.0955223880597, "grad_norm": 18.82575798034668, "learning_rate": 8.551948051948053e-06, "loss": 41.5713, "step": 4456 }, { "epoch": 106.11940298507463, "grad_norm": 17.8409423828125, "learning_rate": 8.54978354978355e-06, "loss": 41.243, "step": 4457 }, { "epoch": 106.14328358208955, "grad_norm": 14.669032096862793, "learning_rate": 8.547619047619048e-06, "loss": 41.3578, "step": 4458 }, { "epoch": 106.16716417910447, "grad_norm": 18.624805450439453, "learning_rate": 8.545454545454546e-06, "loss": 42.5552, "step": 4459 }, { "epoch": 106.1910447761194, "grad_norm": 15.485766410827637, "learning_rate": 8.543290043290044e-06, "loss": 42.2293, "step": 4460 }, { "epoch": 106.21492537313434, "grad_norm": 19.794565200805664, "learning_rate": 8.541125541125542e-06, "loss": 41.1415, "step": 4461 }, { "epoch": 106.23880597014926, "grad_norm": 18.35716438293457, "learning_rate": 8.53896103896104e-06, "loss": 41.6452, "step": 4462 }, { "epoch": 106.26268656716418, "grad_norm": 20.6253719329834, "learning_rate": 8.536796536796537e-06, "loss": 41.2028, "step": 4463 }, { "epoch": 106.2865671641791, "grad_norm": 17.438785552978516, "learning_rate": 8.534632034632035e-06, "loss": 42.4732, "step": 4464 }, { "epoch": 106.31044776119403, "grad_norm": 22.83930778503418, "learning_rate": 8.532467532467533e-06, "loss": 40.1875, "step": 4465 }, { "epoch": 106.33432835820895, "grad_norm": 19.77629852294922, "learning_rate": 8.53030303030303e-06, "loss": 42.7191, "step": 4466 }, { "epoch": 106.35820895522389, "grad_norm": 24.823516845703125, "learning_rate": 8.528138528138529e-06, "loss": 41.8532, "step": 4467 }, { "epoch": 106.38208955223881, "grad_norm": 25.804109573364258, "learning_rate": 8.525974025974026e-06, "loss": 41.3039, "step": 4468 }, { "epoch": 106.40597014925373, "grad_norm": 18.37181854248047, "learning_rate": 8.523809523809524e-06, "loss": 42.5937, "step": 4469 }, { "epoch": 106.42985074626866, "grad_norm": 21.761140823364258, "learning_rate": 8.521645021645022e-06, "loss": 41.5739, "step": 4470 }, { "epoch": 106.45373134328358, "grad_norm": 18.34234619140625, "learning_rate": 8.51948051948052e-06, "loss": 42.0672, "step": 4471 }, { "epoch": 106.4776119402985, "grad_norm": 21.598434448242188, "learning_rate": 8.51731601731602e-06, "loss": 43.3303, "step": 4472 }, { "epoch": 106.50149253731344, "grad_norm": 19.549448013305664, "learning_rate": 8.515151515151517e-06, "loss": 41.4708, "step": 4473 }, { "epoch": 106.52537313432836, "grad_norm": 20.763225555419922, "learning_rate": 8.512987012987015e-06, "loss": 41.8263, "step": 4474 }, { "epoch": 106.54925373134328, "grad_norm": 19.644168853759766, "learning_rate": 8.510822510822511e-06, "loss": 42.1677, "step": 4475 }, { "epoch": 106.57313432835821, "grad_norm": 23.9834041595459, "learning_rate": 8.508658008658009e-06, "loss": 42.5743, "step": 4476 }, { "epoch": 106.59701492537313, "grad_norm": 19.93153953552246, "learning_rate": 8.506493506493507e-06, "loss": 41.2167, "step": 4477 }, { "epoch": 106.62089552238805, "grad_norm": 22.8863525390625, "learning_rate": 8.504329004329004e-06, "loss": 41.1281, "step": 4478 }, { "epoch": 106.64477611940299, "grad_norm": 20.42034149169922, "learning_rate": 8.502164502164502e-06, "loss": 41.0462, "step": 4479 }, { "epoch": 106.66865671641791, "grad_norm": 21.096284866333008, "learning_rate": 8.5e-06, "loss": 41.1694, "step": 4480 }, { "epoch": 106.69253731343284, "grad_norm": 21.80982780456543, "learning_rate": 8.497835497835498e-06, "loss": 41.3229, "step": 4481 }, { "epoch": 106.71641791044776, "grad_norm": 19.85307502746582, "learning_rate": 8.495670995670996e-06, "loss": 41.6649, "step": 4482 }, { "epoch": 106.74029850746268, "grad_norm": 15.509448051452637, "learning_rate": 8.493506493506493e-06, "loss": 41.1427, "step": 4483 }, { "epoch": 106.7641791044776, "grad_norm": 19.124879837036133, "learning_rate": 8.491341991341993e-06, "loss": 41.2628, "step": 4484 }, { "epoch": 106.78805970149254, "grad_norm": 16.123470306396484, "learning_rate": 8.48917748917749e-06, "loss": 41.9344, "step": 4485 }, { "epoch": 106.81194029850747, "grad_norm": 16.704002380371094, "learning_rate": 8.487012987012988e-06, "loss": 41.3242, "step": 4486 }, { "epoch": 106.83582089552239, "grad_norm": 17.679168701171875, "learning_rate": 8.484848484848486e-06, "loss": 42.4921, "step": 4487 }, { "epoch": 106.85970149253731, "grad_norm": 16.334306716918945, "learning_rate": 8.482683982683984e-06, "loss": 41.7613, "step": 4488 }, { "epoch": 106.88358208955223, "grad_norm": 21.279388427734375, "learning_rate": 8.480519480519482e-06, "loss": 40.7258, "step": 4489 }, { "epoch": 106.90746268656716, "grad_norm": 18.096824645996094, "learning_rate": 8.47835497835498e-06, "loss": 41.4365, "step": 4490 }, { "epoch": 106.9313432835821, "grad_norm": 14.148079872131348, "learning_rate": 8.476190476190477e-06, "loss": 41.9096, "step": 4491 }, { "epoch": 106.95522388059702, "grad_norm": 18.99448013305664, "learning_rate": 8.474025974025975e-06, "loss": 41.1249, "step": 4492 }, { "epoch": 106.97910447761194, "grad_norm": 19.877487182617188, "learning_rate": 8.471861471861473e-06, "loss": 41.6588, "step": 4493 }, { "epoch": 107.0, "grad_norm": 17.858646392822266, "learning_rate": 8.46969696969697e-06, "loss": 35.8561, "step": 4494 }, { "epoch": 107.02388059701492, "grad_norm": 15.608851432800293, "learning_rate": 8.467532467532467e-06, "loss": 41.4418, "step": 4495 }, { "epoch": 107.04776119402985, "grad_norm": 22.582759857177734, "learning_rate": 8.465367965367966e-06, "loss": 41.0498, "step": 4496 }, { "epoch": 107.07164179104478, "grad_norm": 21.779876708984375, "learning_rate": 8.463203463203464e-06, "loss": 41.6588, "step": 4497 }, { "epoch": 107.0955223880597, "grad_norm": 20.698528289794922, "learning_rate": 8.461038961038962e-06, "loss": 43.0142, "step": 4498 }, { "epoch": 107.11940298507463, "grad_norm": 16.091886520385742, "learning_rate": 8.45887445887446e-06, "loss": 41.3033, "step": 4499 }, { "epoch": 107.14328358208955, "grad_norm": 28.291919708251953, "learning_rate": 8.456709956709958e-06, "loss": 41.3949, "step": 4500 }, { "epoch": 107.16716417910447, "grad_norm": 19.51844596862793, "learning_rate": 8.454545454545455e-06, "loss": 42.2322, "step": 4501 }, { "epoch": 107.1910447761194, "grad_norm": 27.817554473876953, "learning_rate": 8.452380952380953e-06, "loss": 41.1951, "step": 4502 }, { "epoch": 107.21492537313434, "grad_norm": 20.286903381347656, "learning_rate": 8.450216450216451e-06, "loss": 42.1971, "step": 4503 }, { "epoch": 107.23880597014926, "grad_norm": 26.34720230102539, "learning_rate": 8.448051948051949e-06, "loss": 40.7248, "step": 4504 }, { "epoch": 107.26268656716418, "grad_norm": 21.989835739135742, "learning_rate": 8.445887445887447e-06, "loss": 42.4462, "step": 4505 }, { "epoch": 107.2865671641791, "grad_norm": 22.28291893005371, "learning_rate": 8.443722943722944e-06, "loss": 41.103, "step": 4506 }, { "epoch": 107.31044776119403, "grad_norm": 21.63711166381836, "learning_rate": 8.441558441558442e-06, "loss": 42.685, "step": 4507 }, { "epoch": 107.33432835820895, "grad_norm": 16.82655143737793, "learning_rate": 8.43939393939394e-06, "loss": 42.0045, "step": 4508 }, { "epoch": 107.35820895522389, "grad_norm": 24.85128402709961, "learning_rate": 8.437229437229438e-06, "loss": 41.6018, "step": 4509 }, { "epoch": 107.38208955223881, "grad_norm": 18.015731811523438, "learning_rate": 8.435064935064936e-06, "loss": 40.7281, "step": 4510 }, { "epoch": 107.40597014925373, "grad_norm": 26.402570724487305, "learning_rate": 8.432900432900433e-06, "loss": 42.5324, "step": 4511 }, { "epoch": 107.42985074626866, "grad_norm": 21.223861694335938, "learning_rate": 8.430735930735931e-06, "loss": 40.7112, "step": 4512 }, { "epoch": 107.45373134328358, "grad_norm": 19.461315155029297, "learning_rate": 8.428571428571429e-06, "loss": 40.8781, "step": 4513 }, { "epoch": 107.4776119402985, "grad_norm": 23.075971603393555, "learning_rate": 8.426406926406927e-06, "loss": 41.2487, "step": 4514 }, { "epoch": 107.50149253731344, "grad_norm": 22.154701232910156, "learning_rate": 8.424242424242425e-06, "loss": 41.175, "step": 4515 }, { "epoch": 107.52537313432836, "grad_norm": 29.775875091552734, "learning_rate": 8.422077922077922e-06, "loss": 42.0601, "step": 4516 }, { "epoch": 107.54925373134328, "grad_norm": 23.231462478637695, "learning_rate": 8.41991341991342e-06, "loss": 41.6765, "step": 4517 }, { "epoch": 107.57313432835821, "grad_norm": 28.446731567382812, "learning_rate": 8.417748917748918e-06, "loss": 43.3939, "step": 4518 }, { "epoch": 107.59701492537313, "grad_norm": 17.26323890686035, "learning_rate": 8.415584415584416e-06, "loss": 41.2887, "step": 4519 }, { "epoch": 107.62089552238805, "grad_norm": 20.49373435974121, "learning_rate": 8.413419913419915e-06, "loss": 41.6722, "step": 4520 }, { "epoch": 107.64477611940299, "grad_norm": 16.85104751586914, "learning_rate": 8.411255411255413e-06, "loss": 40.5418, "step": 4521 }, { "epoch": 107.66865671641791, "grad_norm": 16.146242141723633, "learning_rate": 8.40909090909091e-06, "loss": 40.9689, "step": 4522 }, { "epoch": 107.69253731343284, "grad_norm": 17.153108596801758, "learning_rate": 8.406926406926409e-06, "loss": 41.6464, "step": 4523 }, { "epoch": 107.71641791044776, "grad_norm": 20.580894470214844, "learning_rate": 8.404761904761905e-06, "loss": 42.4474, "step": 4524 }, { "epoch": 107.74029850746268, "grad_norm": 15.058161735534668, "learning_rate": 8.402597402597403e-06, "loss": 41.7216, "step": 4525 }, { "epoch": 107.7641791044776, "grad_norm": 20.474285125732422, "learning_rate": 8.4004329004329e-06, "loss": 41.7553, "step": 4526 }, { "epoch": 107.78805970149254, "grad_norm": 20.75484275817871, "learning_rate": 8.398268398268398e-06, "loss": 41.6698, "step": 4527 }, { "epoch": 107.81194029850747, "grad_norm": 14.600532531738281, "learning_rate": 8.396103896103896e-06, "loss": 41.6121, "step": 4528 }, { "epoch": 107.83582089552239, "grad_norm": 18.704586029052734, "learning_rate": 8.393939393939394e-06, "loss": 40.5424, "step": 4529 }, { "epoch": 107.85970149253731, "grad_norm": 15.7553129196167, "learning_rate": 8.391774891774892e-06, "loss": 41.0951, "step": 4530 }, { "epoch": 107.88358208955223, "grad_norm": 20.587574005126953, "learning_rate": 8.38961038961039e-06, "loss": 42.1185, "step": 4531 }, { "epoch": 107.90746268656716, "grad_norm": 16.545307159423828, "learning_rate": 8.387445887445889e-06, "loss": 40.7692, "step": 4532 }, { "epoch": 107.9313432835821, "grad_norm": 19.46141815185547, "learning_rate": 8.385281385281387e-06, "loss": 41.6013, "step": 4533 }, { "epoch": 107.95522388059702, "grad_norm": 16.87144660949707, "learning_rate": 8.383116883116884e-06, "loss": 41.6794, "step": 4534 }, { "epoch": 107.97910447761194, "grad_norm": 16.42438316345215, "learning_rate": 8.380952380952382e-06, "loss": 42.3932, "step": 4535 }, { "epoch": 108.0, "grad_norm": 24.72748374938965, "learning_rate": 8.37878787878788e-06, "loss": 36.3065, "step": 4536 }, { "epoch": 108.02388059701492, "grad_norm": 21.551437377929688, "learning_rate": 8.376623376623378e-06, "loss": 41.1883, "step": 4537 }, { "epoch": 108.04776119402985, "grad_norm": 31.447101593017578, "learning_rate": 8.374458874458876e-06, "loss": 41.2846, "step": 4538 }, { "epoch": 108.07164179104478, "grad_norm": 23.090343475341797, "learning_rate": 8.372294372294373e-06, "loss": 41.7488, "step": 4539 }, { "epoch": 108.0955223880597, "grad_norm": 29.949562072753906, "learning_rate": 8.370129870129871e-06, "loss": 42.462, "step": 4540 }, { "epoch": 108.11940298507463, "grad_norm": 21.09743309020996, "learning_rate": 8.367965367965369e-06, "loss": 41.4602, "step": 4541 }, { "epoch": 108.14328358208955, "grad_norm": 34.469139099121094, "learning_rate": 8.365800865800867e-06, "loss": 41.5088, "step": 4542 }, { "epoch": 108.16716417910447, "grad_norm": 25.73923110961914, "learning_rate": 8.363636363636365e-06, "loss": 42.9585, "step": 4543 }, { "epoch": 108.1910447761194, "grad_norm": 30.073488235473633, "learning_rate": 8.361471861471862e-06, "loss": 41.5492, "step": 4544 }, { "epoch": 108.21492537313434, "grad_norm": 26.56512451171875, "learning_rate": 8.35930735930736e-06, "loss": 41.2259, "step": 4545 }, { "epoch": 108.23880597014926, "grad_norm": 32.05238723754883, "learning_rate": 8.357142857142858e-06, "loss": 41.8249, "step": 4546 }, { "epoch": 108.26268656716418, "grad_norm": 25.487403869628906, "learning_rate": 8.354978354978356e-06, "loss": 41.2949, "step": 4547 }, { "epoch": 108.2865671641791, "grad_norm": 26.391586303710938, "learning_rate": 8.352813852813854e-06, "loss": 41.9133, "step": 4548 }, { "epoch": 108.31044776119403, "grad_norm": 23.400354385375977, "learning_rate": 8.350649350649351e-06, "loss": 41.2916, "step": 4549 }, { "epoch": 108.33432835820895, "grad_norm": 32.927467346191406, "learning_rate": 8.348484848484849e-06, "loss": 41.0151, "step": 4550 }, { "epoch": 108.35820895522389, "grad_norm": NaN, "learning_rate": 8.346320346320347e-06, "loss": 41.9067, "step": 4551 }, { "epoch": 108.38208955223881, "grad_norm": 27.199121475219727, "learning_rate": 8.346320346320347e-06, "loss": 41.8715, "step": 4552 }, { "epoch": 108.40597014925373, "grad_norm": 30.17380142211914, "learning_rate": 8.344155844155845e-06, "loss": 41.9553, "step": 4553 }, { "epoch": 108.42985074626866, "grad_norm": 28.72991943359375, "learning_rate": 8.341991341991343e-06, "loss": 41.9683, "step": 4554 }, { "epoch": 108.45373134328358, "grad_norm": 26.323143005371094, "learning_rate": 8.33982683982684e-06, "loss": 41.1456, "step": 4555 }, { "epoch": 108.4776119402985, "grad_norm": 23.400619506835938, "learning_rate": 8.337662337662338e-06, "loss": 41.7313, "step": 4556 }, { "epoch": 108.50149253731344, "grad_norm": 23.805021286010742, "learning_rate": 8.335497835497836e-06, "loss": 40.5904, "step": 4557 }, { "epoch": 108.52537313432836, "grad_norm": 21.17874526977539, "learning_rate": 8.333333333333334e-06, "loss": 41.5941, "step": 4558 }, { "epoch": 108.54925373134328, "grad_norm": 26.89427375793457, "learning_rate": 8.331168831168832e-06, "loss": 40.6515, "step": 4559 }, { "epoch": 108.57313432835821, "grad_norm": 22.102890014648438, "learning_rate": 8.32900432900433e-06, "loss": 41.6195, "step": 4560 }, { "epoch": 108.59701492537313, "grad_norm": 28.349239349365234, "learning_rate": 8.326839826839827e-06, "loss": 41.7613, "step": 4561 }, { "epoch": 108.62089552238805, "grad_norm": 24.95227813720703, "learning_rate": 8.324675324675325e-06, "loss": 42.2593, "step": 4562 }, { "epoch": 108.64477611940299, "grad_norm": 29.643531799316406, "learning_rate": 8.322510822510823e-06, "loss": 42.5247, "step": 4563 }, { "epoch": 108.66865671641791, "grad_norm": 24.321622848510742, "learning_rate": 8.32034632034632e-06, "loss": 41.5149, "step": 4564 }, { "epoch": 108.69253731343284, "grad_norm": 27.7292537689209, "learning_rate": 8.318181818181818e-06, "loss": 41.6252, "step": 4565 }, { "epoch": 108.71641791044776, "grad_norm": 23.14917755126953, "learning_rate": 8.316017316017316e-06, "loss": 41.8188, "step": 4566 }, { "epoch": 108.74029850746268, "grad_norm": 31.897857666015625, "learning_rate": 8.313852813852814e-06, "loss": 41.2639, "step": 4567 }, { "epoch": 108.7641791044776, "grad_norm": 22.20448112487793, "learning_rate": 8.311688311688313e-06, "loss": 40.8366, "step": 4568 }, { "epoch": 108.78805970149254, "grad_norm": 33.260982513427734, "learning_rate": 8.309523809523811e-06, "loss": 42.1547, "step": 4569 }, { "epoch": 108.81194029850747, "grad_norm": 28.327970504760742, "learning_rate": 8.307359307359309e-06, "loss": 41.0261, "step": 4570 }, { "epoch": 108.83582089552239, "grad_norm": 30.421405792236328, "learning_rate": 8.305194805194807e-06, "loss": 42.6333, "step": 4571 }, { "epoch": 108.85970149253731, "grad_norm": 27.54227066040039, "learning_rate": 8.303030303030305e-06, "loss": 41.5392, "step": 4572 }, { "epoch": 108.88358208955223, "grad_norm": 27.083431243896484, "learning_rate": 8.300865800865802e-06, "loss": 40.5557, "step": 4573 }, { "epoch": 108.90746268656716, "grad_norm": 22.203136444091797, "learning_rate": 8.2987012987013e-06, "loss": 40.2079, "step": 4574 }, { "epoch": 108.9313432835821, "grad_norm": 30.871158599853516, "learning_rate": 8.296536796536796e-06, "loss": 40.74, "step": 4575 }, { "epoch": 108.95522388059702, "grad_norm": 26.300838470458984, "learning_rate": 8.294372294372294e-06, "loss": 41.3107, "step": 4576 }, { "epoch": 108.97910447761194, "grad_norm": 31.482698440551758, "learning_rate": 8.292207792207792e-06, "loss": 42.54, "step": 4577 }, { "epoch": 109.0, "grad_norm": 21.353776931762695, "learning_rate": 8.29004329004329e-06, "loss": 36.6851, "step": 4578 }, { "epoch": 109.02388059701492, "grad_norm": 27.117504119873047, "learning_rate": 8.287878787878787e-06, "loss": 41.3884, "step": 4579 }, { "epoch": 109.04776119402985, "grad_norm": 23.497106552124023, "learning_rate": 8.285714285714287e-06, "loss": 41.0846, "step": 4580 }, { "epoch": 109.07164179104478, "grad_norm": 27.996051788330078, "learning_rate": 8.283549783549785e-06, "loss": 41.214, "step": 4581 }, { "epoch": 109.0955223880597, "grad_norm": 24.364675521850586, "learning_rate": 8.281385281385283e-06, "loss": 42.1255, "step": 4582 }, { "epoch": 109.11940298507463, "grad_norm": 31.155681610107422, "learning_rate": 8.27922077922078e-06, "loss": 42.1774, "step": 4583 }, { "epoch": 109.14328358208955, "grad_norm": 27.193376541137695, "learning_rate": 8.277056277056278e-06, "loss": 42.4321, "step": 4584 }, { "epoch": 109.16716417910447, "grad_norm": 30.398059844970703, "learning_rate": 8.274891774891776e-06, "loss": 42.0286, "step": 4585 }, { "epoch": 109.1910447761194, "grad_norm": 27.1219425201416, "learning_rate": 8.272727272727274e-06, "loss": 40.9508, "step": 4586 }, { "epoch": 109.21492537313434, "grad_norm": 29.481327056884766, "learning_rate": 8.270562770562772e-06, "loss": 42.1899, "step": 4587 }, { "epoch": 109.23880597014926, "grad_norm": 29.547292709350586, "learning_rate": 8.26839826839827e-06, "loss": 40.2919, "step": 4588 }, { "epoch": 109.26268656716418, "grad_norm": 26.99224281311035, "learning_rate": 8.266233766233767e-06, "loss": 41.1843, "step": 4589 }, { "epoch": 109.2865671641791, "grad_norm": 25.45054054260254, "learning_rate": 8.264069264069265e-06, "loss": 41.6843, "step": 4590 }, { "epoch": 109.31044776119403, "grad_norm": 27.529739379882812, "learning_rate": 8.261904761904763e-06, "loss": 39.7442, "step": 4591 }, { "epoch": 109.33432835820895, "grad_norm": 23.54625129699707, "learning_rate": 8.25974025974026e-06, "loss": 40.9662, "step": 4592 }, { "epoch": 109.35820895522389, "grad_norm": 26.74515151977539, "learning_rate": 8.257575757575758e-06, "loss": 42.1445, "step": 4593 }, { "epoch": 109.38208955223881, "grad_norm": 24.591623306274414, "learning_rate": 8.255411255411256e-06, "loss": 41.4631, "step": 4594 }, { "epoch": 109.40597014925373, "grad_norm": 27.32378578186035, "learning_rate": 8.253246753246754e-06, "loss": 43.223, "step": 4595 }, { "epoch": 109.42985074626866, "grad_norm": 24.74321937561035, "learning_rate": 8.251082251082252e-06, "loss": 42.1151, "step": 4596 }, { "epoch": 109.45373134328358, "grad_norm": 24.764156341552734, "learning_rate": 8.24891774891775e-06, "loss": 42.0353, "step": 4597 }, { "epoch": 109.4776119402985, "grad_norm": 22.001508712768555, "learning_rate": 8.246753246753247e-06, "loss": 41.5189, "step": 4598 }, { "epoch": 109.50149253731344, "grad_norm": 27.916759490966797, "learning_rate": 8.244588744588745e-06, "loss": 42.8372, "step": 4599 }, { "epoch": 109.52537313432836, "grad_norm": 23.65235137939453, "learning_rate": 8.242424242424243e-06, "loss": 40.9737, "step": 4600 }, { "epoch": 109.54925373134328, "grad_norm": 25.150957107543945, "learning_rate": 8.24025974025974e-06, "loss": 41.279, "step": 4601 }, { "epoch": 109.57313432835821, "grad_norm": 24.193187713623047, "learning_rate": 8.238095238095239e-06, "loss": 42.029, "step": 4602 }, { "epoch": 109.59701492537313, "grad_norm": 26.186813354492188, "learning_rate": 8.235930735930736e-06, "loss": 40.3791, "step": 4603 }, { "epoch": 109.62089552238805, "grad_norm": 24.97614097595215, "learning_rate": 8.233766233766236e-06, "loss": 40.8347, "step": 4604 }, { "epoch": 109.64477611940299, "grad_norm": 27.51297950744629, "learning_rate": 8.231601731601732e-06, "loss": 40.6329, "step": 4605 }, { "epoch": 109.66865671641791, "grad_norm": 24.866369247436523, "learning_rate": 8.22943722943723e-06, "loss": 41.5968, "step": 4606 }, { "epoch": 109.69253731343284, "grad_norm": 28.864290237426758, "learning_rate": 8.227272727272728e-06, "loss": 41.3671, "step": 4607 }, { "epoch": 109.71641791044776, "grad_norm": 29.503835678100586, "learning_rate": 8.225108225108225e-06, "loss": 41.2315, "step": 4608 }, { "epoch": 109.74029850746268, "grad_norm": 26.039966583251953, "learning_rate": 8.222943722943723e-06, "loss": 41.0179, "step": 4609 }, { "epoch": 109.7641791044776, "grad_norm": 22.42831039428711, "learning_rate": 8.220779220779221e-06, "loss": 41.3331, "step": 4610 }, { "epoch": 109.78805970149254, "grad_norm": 25.328296661376953, "learning_rate": 8.218614718614719e-06, "loss": 39.9773, "step": 4611 }, { "epoch": 109.81194029850747, "grad_norm": 18.524499893188477, "learning_rate": 8.216450216450216e-06, "loss": 42.5481, "step": 4612 }, { "epoch": 109.83582089552239, "grad_norm": 26.1571102142334, "learning_rate": 8.214285714285714e-06, "loss": 41.759, "step": 4613 }, { "epoch": 109.85970149253731, "grad_norm": 22.46668243408203, "learning_rate": 8.212121212121212e-06, "loss": 42.0987, "step": 4614 }, { "epoch": 109.88358208955223, "grad_norm": 29.418230056762695, "learning_rate": 8.20995670995671e-06, "loss": 42.8083, "step": 4615 }, { "epoch": 109.90746268656716, "grad_norm": 23.00196647644043, "learning_rate": 8.20779220779221e-06, "loss": 40.488, "step": 4616 }, { "epoch": 109.9313432835821, "grad_norm": 27.977956771850586, "learning_rate": 8.205627705627707e-06, "loss": 41.4731, "step": 4617 }, { "epoch": 109.95522388059702, "grad_norm": 24.776628494262695, "learning_rate": 8.203463203463205e-06, "loss": 42.6836, "step": 4618 }, { "epoch": 109.97910447761194, "grad_norm": 27.11109733581543, "learning_rate": 8.201298701298703e-06, "loss": 40.7662, "step": 4619 }, { "epoch": 110.0, "grad_norm": 20.246700286865234, "learning_rate": 8.1991341991342e-06, "loss": 36.1303, "step": 4620 }, { "epoch": 110.0, "step": 4620, "total_flos": 2.2713564637226506e+17, "train_loss": 7.6340307194433175, "train_runtime": 25635.1587, "train_samples_per_second": 22.965, "train_steps_per_second": 0.18 }, { "epoch": 110.02388059701492, "grad_norm": 21.282230377197266, "learning_rate": 1e-05, "loss": 42.4757, "step": 4621 }, { "epoch": 110.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 48.912, "step": 4622 }, { "epoch": 110.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 49.0673, "step": 4623 }, { "epoch": 110.0955223880597, "grad_norm": 445.232177734375, "learning_rate": 9.998015873015874e-06, "loss": 48.7345, "step": 4624 }, { "epoch": 110.11940298507463, "grad_norm": 224.98858642578125, "learning_rate": 9.996031746031746e-06, "loss": 45.5376, "step": 4625 }, { "epoch": 110.14328358208955, "grad_norm": 92.86235046386719, "learning_rate": 9.99404761904762e-06, "loss": 44.0537, "step": 4626 }, { "epoch": 110.16716417910447, "grad_norm": 67.79006958007812, "learning_rate": 9.992063492063493e-06, "loss": 42.4137, "step": 4627 }, { "epoch": 110.1910447761194, "grad_norm": 52.0079345703125, "learning_rate": 9.990079365079366e-06, "loss": 42.7133, "step": 4628 }, { "epoch": 110.21492537313434, "grad_norm": 40.780120849609375, "learning_rate": 9.988095238095239e-06, "loss": 42.7036, "step": 4629 }, { "epoch": 110.23880597014926, "grad_norm": 45.80341339111328, "learning_rate": 9.986111111111111e-06, "loss": 42.5543, "step": 4630 }, { "epoch": 110.26268656716418, "grad_norm": NaN, "learning_rate": 9.984126984126986e-06, "loss": 67.9585, "step": 4631 }, { "epoch": 110.2865671641791, "grad_norm": 37.52305603027344, "learning_rate": 9.984126984126986e-06, "loss": 42.0859, "step": 4632 }, { "epoch": 110.31044776119403, "grad_norm": 37.10969543457031, "learning_rate": 9.982142857142858e-06, "loss": 42.8517, "step": 4633 }, { "epoch": 110.33432835820895, "grad_norm": 33.601905822753906, "learning_rate": 9.980158730158731e-06, "loss": 41.4451, "step": 4634 }, { "epoch": 110.35820895522389, "grad_norm": 25.348403930664062, "learning_rate": 9.978174603174604e-06, "loss": 41.7698, "step": 4635 }, { "epoch": 110.38208955223881, "grad_norm": 28.69048309326172, "learning_rate": 9.976190476190477e-06, "loss": 41.8287, "step": 4636 }, { "epoch": 110.40597014925373, "grad_norm": 30.578548431396484, "learning_rate": 9.97420634920635e-06, "loss": 42.4165, "step": 4637 }, { "epoch": 110.42985074626866, "grad_norm": 19.63727569580078, "learning_rate": 9.972222222222224e-06, "loss": 42.37, "step": 4638 }, { "epoch": 110.45373134328358, "grad_norm": 34.42063522338867, "learning_rate": 9.970238095238096e-06, "loss": 42.7996, "step": 4639 }, { "epoch": 110.4776119402985, "grad_norm": NaN, "learning_rate": 9.968253968253969e-06, "loss": 62.9577, "step": 4640 }, { "epoch": 110.50149253731344, "grad_norm": 23.131494522094727, "learning_rate": 9.968253968253969e-06, "loss": 40.8106, "step": 4641 }, { "epoch": 110.52537313432836, "grad_norm": 22.294376373291016, "learning_rate": 9.966269841269842e-06, "loss": 42.7557, "step": 4642 }, { "epoch": 110.54925373134328, "grad_norm": 30.476016998291016, "learning_rate": 9.964285714285714e-06, "loss": 42.1925, "step": 4643 }, { "epoch": 110.57313432835821, "grad_norm": 20.84010887145996, "learning_rate": 9.962301587301589e-06, "loss": 41.9241, "step": 4644 }, { "epoch": 110.59701492537313, "grad_norm": 23.944196701049805, "learning_rate": 9.960317460317462e-06, "loss": 41.8911, "step": 4645 }, { "epoch": 110.62089552238805, "grad_norm": 28.773279190063477, "learning_rate": 9.958333333333334e-06, "loss": 41.5711, "step": 4646 }, { "epoch": 110.64477611940299, "grad_norm": 22.82482147216797, "learning_rate": 9.956349206349207e-06, "loss": 42.1915, "step": 4647 }, { "epoch": 110.66865671641791, "grad_norm": 24.0530948638916, "learning_rate": 9.95436507936508e-06, "loss": 40.6648, "step": 4648 }, { "epoch": 110.69253731343284, "grad_norm": 21.640682220458984, "learning_rate": 9.952380952380954e-06, "loss": 41.5682, "step": 4649 }, { "epoch": 110.71641791044776, "grad_norm": 21.377979278564453, "learning_rate": 9.950396825396827e-06, "loss": 41.6034, "step": 4650 }, { "epoch": 110.74029850746268, "grad_norm": 19.04741668701172, "learning_rate": 9.9484126984127e-06, "loss": 41.8165, "step": 4651 }, { "epoch": 110.7641791044776, "grad_norm": 22.74652099609375, "learning_rate": 9.946428571428572e-06, "loss": 40.4093, "step": 4652 }, { "epoch": 110.78805970149254, "grad_norm": 16.828824996948242, "learning_rate": 9.944444444444445e-06, "loss": 42.181, "step": 4653 }, { "epoch": 110.81194029850747, "grad_norm": 20.226478576660156, "learning_rate": 9.94246031746032e-06, "loss": 41.774, "step": 4654 }, { "epoch": 110.83582089552239, "grad_norm": 19.935068130493164, "learning_rate": 9.940476190476192e-06, "loss": 41.6547, "step": 4655 }, { "epoch": 110.85970149253731, "grad_norm": 18.150102615356445, "learning_rate": 9.938492063492065e-06, "loss": 39.962, "step": 4656 }, { "epoch": 110.88358208955223, "grad_norm": 27.12464141845703, "learning_rate": 9.936507936507937e-06, "loss": 41.2807, "step": 4657 }, { "epoch": 110.90746268656716, "grad_norm": 18.194360733032227, "learning_rate": 9.93452380952381e-06, "loss": 40.8381, "step": 4658 }, { "epoch": 110.9313432835821, "grad_norm": 25.638107299804688, "learning_rate": 9.932539682539684e-06, "loss": 41.2385, "step": 4659 }, { "epoch": 110.95522388059702, "grad_norm": 21.1163387298584, "learning_rate": 9.930555555555557e-06, "loss": 41.0065, "step": 4660 }, { "epoch": 110.97910447761194, "grad_norm": 17.089710235595703, "learning_rate": 9.92857142857143e-06, "loss": 41.8835, "step": 4661 }, { "epoch": 111.0, "grad_norm": 19.484764099121094, "learning_rate": 9.926587301587303e-06, "loss": 37.1289, "step": 4662 }, { "epoch": 111.02388059701492, "grad_norm": 20.73271942138672, "learning_rate": 9.924603174603175e-06, "loss": 40.8035, "step": 4663 }, { "epoch": 111.04776119402985, "grad_norm": 14.759368896484375, "learning_rate": 9.922619047619048e-06, "loss": 42.164, "step": 4664 }, { "epoch": 111.07164179104478, "grad_norm": 20.654579162597656, "learning_rate": 9.920634920634922e-06, "loss": 41.7302, "step": 4665 }, { "epoch": 111.0955223880597, "grad_norm": 17.05702781677246, "learning_rate": 9.918650793650795e-06, "loss": 41.3278, "step": 4666 }, { "epoch": 111.11940298507463, "grad_norm": 15.701156616210938, "learning_rate": 9.916666666666668e-06, "loss": 40.7933, "step": 4667 }, { "epoch": 111.14328358208955, "grad_norm": 17.04022216796875, "learning_rate": 9.91468253968254e-06, "loss": 41.58, "step": 4668 }, { "epoch": 111.16716417910447, "grad_norm": 16.614116668701172, "learning_rate": 9.912698412698413e-06, "loss": 40.8149, "step": 4669 }, { "epoch": 111.1910447761194, "grad_norm": 17.664091110229492, "learning_rate": 9.910714285714288e-06, "loss": 40.459, "step": 4670 }, { "epoch": 111.21492537313434, "grad_norm": 17.730188369750977, "learning_rate": 9.90873015873016e-06, "loss": 42.3291, "step": 4671 }, { "epoch": 111.23880597014926, "grad_norm": 14.860199928283691, "learning_rate": 9.906746031746033e-06, "loss": 42.314, "step": 4672 }, { "epoch": 111.26268656716418, "grad_norm": 18.45416259765625, "learning_rate": 9.904761904761906e-06, "loss": 41.2486, "step": 4673 }, { "epoch": 111.2865671641791, "grad_norm": 15.178065299987793, "learning_rate": 9.902777777777778e-06, "loss": 42.0394, "step": 4674 }, { "epoch": 111.31044776119403, "grad_norm": 16.214420318603516, "learning_rate": 9.900793650793653e-06, "loss": 41.6074, "step": 4675 }, { "epoch": 111.33432835820895, "grad_norm": 19.241151809692383, "learning_rate": 9.898809523809525e-06, "loss": 42.0125, "step": 4676 }, { "epoch": 111.35820895522389, "grad_norm": 16.019407272338867, "learning_rate": 9.896825396825398e-06, "loss": 41.5867, "step": 4677 }, { "epoch": 111.38208955223881, "grad_norm": 18.017990112304688, "learning_rate": 9.89484126984127e-06, "loss": 40.4534, "step": 4678 }, { "epoch": 111.40597014925373, "grad_norm": 18.37062644958496, "learning_rate": 9.892857142857143e-06, "loss": 41.4307, "step": 4679 }, { "epoch": 111.42985074626866, "grad_norm": 18.12076187133789, "learning_rate": 9.890873015873018e-06, "loss": 42.1702, "step": 4680 }, { "epoch": 111.45373134328358, "grad_norm": 21.935218811035156, "learning_rate": 9.88888888888889e-06, "loss": 40.5205, "step": 4681 }, { "epoch": 111.4776119402985, "grad_norm": 19.747133255004883, "learning_rate": 9.886904761904763e-06, "loss": 41.7721, "step": 4682 }, { "epoch": 111.50149253731344, "grad_norm": 17.159732818603516, "learning_rate": 9.884920634920636e-06, "loss": 41.285, "step": 4683 }, { "epoch": 111.52537313432836, "grad_norm": 15.736952781677246, "learning_rate": 9.882936507936509e-06, "loss": 40.8216, "step": 4684 }, { "epoch": 111.54925373134328, "grad_norm": 17.591854095458984, "learning_rate": 9.880952380952381e-06, "loss": 40.0516, "step": 4685 }, { "epoch": 111.57313432835821, "grad_norm": 17.530582427978516, "learning_rate": 9.878968253968256e-06, "loss": 41.4235, "step": 4686 }, { "epoch": 111.59701492537313, "grad_norm": 18.394372940063477, "learning_rate": 9.876984126984128e-06, "loss": 41.7204, "step": 4687 }, { "epoch": 111.62089552238805, "grad_norm": 17.80558967590332, "learning_rate": 9.875000000000001e-06, "loss": 41.6861, "step": 4688 }, { "epoch": 111.64477611940299, "grad_norm": 19.939964294433594, "learning_rate": 9.873015873015874e-06, "loss": 41.3087, "step": 4689 }, { "epoch": 111.66865671641791, "grad_norm": 14.58205509185791, "learning_rate": 9.871031746031747e-06, "loss": 41.7955, "step": 4690 }, { "epoch": 111.69253731343284, "grad_norm": 19.98933982849121, "learning_rate": 9.869047619047621e-06, "loss": 42.3174, "step": 4691 }, { "epoch": 111.71641791044776, "grad_norm": 20.377466201782227, "learning_rate": 9.867063492063494e-06, "loss": 42.1654, "step": 4692 }, { "epoch": 111.74029850746268, "grad_norm": 19.26752471923828, "learning_rate": 9.865079365079366e-06, "loss": 41.0597, "step": 4693 }, { "epoch": 111.7641791044776, "grad_norm": 16.435440063476562, "learning_rate": 9.863095238095239e-06, "loss": 42.1122, "step": 4694 }, { "epoch": 111.78805970149254, "grad_norm": 17.955474853515625, "learning_rate": 9.861111111111112e-06, "loss": 41.0326, "step": 4695 }, { "epoch": 111.81194029850747, "grad_norm": 21.791505813598633, "learning_rate": 9.859126984126986e-06, "loss": 42.4256, "step": 4696 }, { "epoch": 111.83582089552239, "grad_norm": 17.081600189208984, "learning_rate": 9.857142857142859e-06, "loss": 41.7548, "step": 4697 }, { "epoch": 111.85970149253731, "grad_norm": 21.21491241455078, "learning_rate": 9.855158730158732e-06, "loss": 41.1434, "step": 4698 }, { "epoch": 111.88358208955223, "grad_norm": 25.082992553710938, "learning_rate": 9.853174603174604e-06, "loss": 41.2857, "step": 4699 }, { "epoch": 111.90746268656716, "grad_norm": 19.19919204711914, "learning_rate": 9.851190476190477e-06, "loss": 41.5529, "step": 4700 }, { "epoch": 111.9313432835821, "grad_norm": 32.29753494262695, "learning_rate": 9.849206349206351e-06, "loss": 42.4376, "step": 4701 }, { "epoch": 111.95522388059702, "grad_norm": 20.654430389404297, "learning_rate": 9.847222222222224e-06, "loss": 41.3052, "step": 4702 }, { "epoch": 111.97910447761194, "grad_norm": 32.98462677001953, "learning_rate": 9.845238095238097e-06, "loss": 41.1561, "step": 4703 }, { "epoch": 112.0, "grad_norm": 18.214174270629883, "learning_rate": 9.843253968253968e-06, "loss": 35.3902, "step": 4704 }, { "epoch": 112.02388059701492, "grad_norm": 25.639781951904297, "learning_rate": 9.841269841269842e-06, "loss": 40.7291, "step": 4705 }, { "epoch": 112.04776119402985, "grad_norm": 19.745450973510742, "learning_rate": 9.839285714285715e-06, "loss": 41.6564, "step": 4706 }, { "epoch": 112.07164179104478, "grad_norm": 24.907617568969727, "learning_rate": 9.837301587301588e-06, "loss": 41.4856, "step": 4707 }, { "epoch": 112.0955223880597, "grad_norm": 24.20347023010254, "learning_rate": 9.83531746031746e-06, "loss": 40.6423, "step": 4708 }, { "epoch": 112.11940298507463, "grad_norm": 16.246206283569336, "learning_rate": 9.833333333333333e-06, "loss": 40.5309, "step": 4709 }, { "epoch": 112.14328358208955, "grad_norm": 28.89447784423828, "learning_rate": 9.831349206349207e-06, "loss": 41.173, "step": 4710 }, { "epoch": 112.16716417910447, "grad_norm": 18.989233016967773, "learning_rate": 9.82936507936508e-06, "loss": 42.2629, "step": 4711 }, { "epoch": 112.1910447761194, "grad_norm": 22.261035919189453, "learning_rate": 9.827380952380953e-06, "loss": 41.9901, "step": 4712 }, { "epoch": 112.21492537313434, "grad_norm": 21.082855224609375, "learning_rate": 9.825396825396825e-06, "loss": 40.9817, "step": 4713 }, { "epoch": 112.23880597014926, "grad_norm": 15.739337921142578, "learning_rate": 9.823412698412698e-06, "loss": 42.0745, "step": 4714 }, { "epoch": 112.26268656716418, "grad_norm": 25.604066848754883, "learning_rate": 9.821428571428573e-06, "loss": 40.9371, "step": 4715 }, { "epoch": 112.2865671641791, "grad_norm": 17.916481018066406, "learning_rate": 9.819444444444445e-06, "loss": 40.9361, "step": 4716 }, { "epoch": 112.31044776119403, "grad_norm": 21.53338050842285, "learning_rate": 9.817460317460318e-06, "loss": 40.2245, "step": 4717 }, { "epoch": 112.33432835820895, "grad_norm": 21.370702743530273, "learning_rate": 9.81547619047619e-06, "loss": 40.7986, "step": 4718 }, { "epoch": 112.35820895522389, "grad_norm": 18.217588424682617, "learning_rate": 9.813492063492063e-06, "loss": 41.5072, "step": 4719 }, { "epoch": 112.38208955223881, "grad_norm": 18.874122619628906, "learning_rate": 9.811507936507938e-06, "loss": 39.7088, "step": 4720 }, { "epoch": 112.40597014925373, "grad_norm": 17.31776237487793, "learning_rate": 9.80952380952381e-06, "loss": 41.6839, "step": 4721 }, { "epoch": 112.42985074626866, "grad_norm": 23.88166046142578, "learning_rate": 9.807539682539683e-06, "loss": 41.7857, "step": 4722 }, { "epoch": 112.45373134328358, "grad_norm": 17.09743881225586, "learning_rate": 9.805555555555556e-06, "loss": 42.2407, "step": 4723 }, { "epoch": 112.4776119402985, "grad_norm": 20.519947052001953, "learning_rate": 9.803571428571428e-06, "loss": 41.8095, "step": 4724 }, { "epoch": 112.50149253731344, "grad_norm": 23.761943817138672, "learning_rate": 9.801587301587301e-06, "loss": 41.371, "step": 4725 }, { "epoch": 112.52537313432836, "grad_norm": 17.033470153808594, "learning_rate": 9.799603174603176e-06, "loss": 41.5687, "step": 4726 }, { "epoch": 112.54925373134328, "grad_norm": 18.175559997558594, "learning_rate": 9.797619047619048e-06, "loss": 42.2144, "step": 4727 }, { "epoch": 112.57313432835821, "grad_norm": 19.10957145690918, "learning_rate": 9.795634920634921e-06, "loss": 40.2305, "step": 4728 }, { "epoch": 112.59701492537313, "grad_norm": 20.52096176147461, "learning_rate": 9.793650793650794e-06, "loss": 42.5612, "step": 4729 }, { "epoch": 112.62089552238805, "grad_norm": 17.42753791809082, "learning_rate": 9.791666666666666e-06, "loss": 43.286, "step": 4730 }, { "epoch": 112.64477611940299, "grad_norm": 25.452363967895508, "learning_rate": 9.78968253968254e-06, "loss": 41.0071, "step": 4731 }, { "epoch": 112.66865671641791, "grad_norm": 21.480247497558594, "learning_rate": 9.787698412698413e-06, "loss": 41.7063, "step": 4732 }, { "epoch": 112.69253731343284, "grad_norm": 18.553220748901367, "learning_rate": 9.785714285714286e-06, "loss": 41.4099, "step": 4733 }, { "epoch": 112.71641791044776, "grad_norm": 25.513225555419922, "learning_rate": 9.783730158730159e-06, "loss": 41.5696, "step": 4734 }, { "epoch": 112.74029850746268, "grad_norm": 16.76629638671875, "learning_rate": 9.781746031746032e-06, "loss": 41.6305, "step": 4735 }, { "epoch": 112.7641791044776, "grad_norm": 19.330625534057617, "learning_rate": 9.779761904761906e-06, "loss": 40.7885, "step": 4736 }, { "epoch": 112.78805970149254, "grad_norm": 24.649667739868164, "learning_rate": 9.777777777777779e-06, "loss": 41.5939, "step": 4737 }, { "epoch": 112.81194029850747, "grad_norm": 15.628157615661621, "learning_rate": 9.775793650793651e-06, "loss": 40.9676, "step": 4738 }, { "epoch": 112.83582089552239, "grad_norm": 18.18578338623047, "learning_rate": 9.773809523809524e-06, "loss": 40.0681, "step": 4739 }, { "epoch": 112.85970149253731, "grad_norm": 16.768980026245117, "learning_rate": 9.771825396825397e-06, "loss": 42.2564, "step": 4740 }, { "epoch": 112.88358208955223, "grad_norm": 18.52190399169922, "learning_rate": 9.769841269841271e-06, "loss": 42.4806, "step": 4741 }, { "epoch": 112.90746268656716, "grad_norm": 20.884937286376953, "learning_rate": 9.767857142857144e-06, "loss": 41.2333, "step": 4742 }, { "epoch": 112.9313432835821, "grad_norm": 20.760377883911133, "learning_rate": 9.765873015873017e-06, "loss": 41.3071, "step": 4743 }, { "epoch": 112.95522388059702, "grad_norm": 19.27536392211914, "learning_rate": 9.76388888888889e-06, "loss": 42.3135, "step": 4744 }, { "epoch": 112.97910447761194, "grad_norm": 16.836727142333984, "learning_rate": 9.761904761904762e-06, "loss": 40.9553, "step": 4745 }, { "epoch": 113.0, "grad_norm": 15.910188674926758, "learning_rate": 9.759920634920635e-06, "loss": 35.1574, "step": 4746 }, { "epoch": 113.02388059701492, "grad_norm": 25.05491828918457, "learning_rate": 9.757936507936509e-06, "loss": 40.585, "step": 4747 }, { "epoch": 113.04776119402985, "grad_norm": NaN, "learning_rate": 9.755952380952382e-06, "loss": 62.2866, "step": 4748 }, { "epoch": 113.07164179104478, "grad_norm": 15.88016414642334, "learning_rate": 9.755952380952382e-06, "loss": 41.1309, "step": 4749 }, { "epoch": 113.0955223880597, "grad_norm": NaN, "learning_rate": 9.753968253968254e-06, "loss": 48.2293, "step": 4750 }, { "epoch": 113.11940298507463, "grad_norm": 24.244104385375977, "learning_rate": 9.753968253968254e-06, "loss": 42.1546, "step": 4751 }, { "epoch": 113.14328358208955, "grad_norm": 24.652694702148438, "learning_rate": 9.751984126984127e-06, "loss": 41.6784, "step": 4752 }, { "epoch": 113.16716417910447, "grad_norm": 17.30400276184082, "learning_rate": 9.75e-06, "loss": 41.3338, "step": 4753 }, { "epoch": 113.1910447761194, "grad_norm": 22.837020874023438, "learning_rate": 9.748015873015874e-06, "loss": 39.9112, "step": 4754 }, { "epoch": 113.21492537313434, "grad_norm": NaN, "learning_rate": 9.746031746031747e-06, "loss": 51.4889, "step": 4755 }, { "epoch": 113.23880597014926, "grad_norm": 19.977386474609375, "learning_rate": 9.746031746031747e-06, "loss": 40.8136, "step": 4756 }, { "epoch": 113.26268656716418, "grad_norm": 17.338441848754883, "learning_rate": 9.74404761904762e-06, "loss": 41.41, "step": 4757 }, { "epoch": 113.2865671641791, "grad_norm": 17.25606346130371, "learning_rate": 9.742063492063492e-06, "loss": 42.0376, "step": 4758 }, { "epoch": 113.31044776119403, "grad_norm": 18.690338134765625, "learning_rate": 9.740079365079365e-06, "loss": 39.8714, "step": 4759 }, { "epoch": 113.33432835820895, "grad_norm": 20.5388240814209, "learning_rate": 9.73809523809524e-06, "loss": 40.7767, "step": 4760 }, { "epoch": 113.35820895522389, "grad_norm": 20.36353302001953, "learning_rate": 9.736111111111112e-06, "loss": 42.7652, "step": 4761 }, { "epoch": 113.38208955223881, "grad_norm": 17.473264694213867, "learning_rate": 9.734126984126985e-06, "loss": 41.3501, "step": 4762 }, { "epoch": 113.40597014925373, "grad_norm": 16.665048599243164, "learning_rate": 9.732142857142858e-06, "loss": 40.8948, "step": 4763 }, { "epoch": 113.42985074626866, "grad_norm": 18.917985916137695, "learning_rate": 9.73015873015873e-06, "loss": 42.4226, "step": 4764 }, { "epoch": 113.45373134328358, "grad_norm": 15.448834419250488, "learning_rate": 9.728174603174605e-06, "loss": 41.5246, "step": 4765 }, { "epoch": 113.4776119402985, "grad_norm": 16.92607879638672, "learning_rate": 9.726190476190477e-06, "loss": 41.1058, "step": 4766 }, { "epoch": 113.50149253731344, "grad_norm": 16.17359161376953, "learning_rate": 9.72420634920635e-06, "loss": 41.4232, "step": 4767 }, { "epoch": 113.52537313432836, "grad_norm": 16.6822452545166, "learning_rate": 9.722222222222223e-06, "loss": 41.9703, "step": 4768 }, { "epoch": 113.54925373134328, "grad_norm": 16.724811553955078, "learning_rate": 9.720238095238095e-06, "loss": 41.4117, "step": 4769 }, { "epoch": 113.57313432835821, "grad_norm": 16.85785484313965, "learning_rate": 9.71825396825397e-06, "loss": 41.4467, "step": 4770 }, { "epoch": 113.59701492537313, "grad_norm": 19.173654556274414, "learning_rate": 9.716269841269842e-06, "loss": 40.871, "step": 4771 }, { "epoch": 113.62089552238805, "grad_norm": 16.131881713867188, "learning_rate": 9.714285714285715e-06, "loss": 42.595, "step": 4772 }, { "epoch": 113.64477611940299, "grad_norm": 15.41543960571289, "learning_rate": 9.712301587301588e-06, "loss": 41.7077, "step": 4773 }, { "epoch": 113.66865671641791, "grad_norm": 19.808330535888672, "learning_rate": 9.71031746031746e-06, "loss": 40.8761, "step": 4774 }, { "epoch": 113.69253731343284, "grad_norm": 16.406370162963867, "learning_rate": 9.708333333333333e-06, "loss": 41.1769, "step": 4775 }, { "epoch": 113.71641791044776, "grad_norm": 20.239530563354492, "learning_rate": 9.706349206349208e-06, "loss": 40.8274, "step": 4776 }, { "epoch": 113.74029850746268, "grad_norm": 18.771743774414062, "learning_rate": 9.70436507936508e-06, "loss": 41.4099, "step": 4777 }, { "epoch": 113.7641791044776, "grad_norm": 18.418540954589844, "learning_rate": 9.702380952380953e-06, "loss": 39.6443, "step": 4778 }, { "epoch": 113.78805970149254, "grad_norm": 21.50214958190918, "learning_rate": 9.700396825396826e-06, "loss": 41.6937, "step": 4779 }, { "epoch": 113.81194029850747, "grad_norm": 22.449935913085938, "learning_rate": 9.698412698412698e-06, "loss": 41.7069, "step": 4780 }, { "epoch": 113.83582089552239, "grad_norm": 15.33384895324707, "learning_rate": 9.696428571428573e-06, "loss": 40.6666, "step": 4781 }, { "epoch": 113.85970149253731, "grad_norm": 21.013437271118164, "learning_rate": 9.694444444444446e-06, "loss": 40.5768, "step": 4782 }, { "epoch": 113.88358208955223, "grad_norm": 19.128190994262695, "learning_rate": 9.692460317460318e-06, "loss": 41.4668, "step": 4783 }, { "epoch": 113.90746268656716, "grad_norm": 23.851394653320312, "learning_rate": 9.690476190476191e-06, "loss": 41.1051, "step": 4784 }, { "epoch": 113.9313432835821, "grad_norm": 21.990671157836914, "learning_rate": 9.688492063492064e-06, "loss": 41.6264, "step": 4785 }, { "epoch": 113.95522388059702, "grad_norm": 16.185327529907227, "learning_rate": 9.686507936507938e-06, "loss": 41.8408, "step": 4786 }, { "epoch": 113.97910447761194, "grad_norm": 30.063560485839844, "learning_rate": 9.68452380952381e-06, "loss": 41.2658, "step": 4787 }, { "epoch": 114.0, "grad_norm": 19.5380916595459, "learning_rate": 9.682539682539683e-06, "loss": 36.7106, "step": 4788 }, { "epoch": 114.02388059701492, "grad_norm": 26.1965389251709, "learning_rate": 9.680555555555556e-06, "loss": 42.3092, "step": 4789 }, { "epoch": 114.04776119402985, "grad_norm": 19.98543930053711, "learning_rate": 9.678571428571429e-06, "loss": 41.2309, "step": 4790 }, { "epoch": 114.07164179104478, "grad_norm": 26.361085891723633, "learning_rate": 9.676587301587303e-06, "loss": 41.9058, "step": 4791 }, { "epoch": 114.0955223880597, "grad_norm": 23.132400512695312, "learning_rate": 9.674603174603176e-06, "loss": 43.0372, "step": 4792 }, { "epoch": 114.11940298507463, "grad_norm": 25.199525833129883, "learning_rate": 9.672619047619049e-06, "loss": 41.5403, "step": 4793 }, { "epoch": 114.14328358208955, "grad_norm": 23.17612075805664, "learning_rate": 9.670634920634921e-06, "loss": 41.0863, "step": 4794 }, { "epoch": 114.16716417910447, "grad_norm": 23.930667877197266, "learning_rate": 9.668650793650794e-06, "loss": 40.8035, "step": 4795 }, { "epoch": 114.1910447761194, "grad_norm": 23.487939834594727, "learning_rate": 9.666666666666667e-06, "loss": 39.6217, "step": 4796 }, { "epoch": 114.21492537313434, "grad_norm": 23.342439651489258, "learning_rate": 9.664682539682541e-06, "loss": 42.0502, "step": 4797 }, { "epoch": 114.23880597014926, "grad_norm": 25.328317642211914, "learning_rate": 9.662698412698414e-06, "loss": 40.3101, "step": 4798 }, { "epoch": 114.26268656716418, "grad_norm": 18.363313674926758, "learning_rate": 9.660714285714287e-06, "loss": 40.5746, "step": 4799 }, { "epoch": 114.2865671641791, "grad_norm": 24.081649780273438, "learning_rate": 9.65873015873016e-06, "loss": 42.0376, "step": 4800 }, { "epoch": 114.31044776119403, "grad_norm": 20.24997329711914, "learning_rate": 9.656746031746032e-06, "loss": 40.5347, "step": 4801 }, { "epoch": 114.33432835820895, "grad_norm": 14.942011833190918, "learning_rate": 9.654761904761906e-06, "loss": 41.7814, "step": 4802 }, { "epoch": 114.35820895522389, "grad_norm": 22.662822723388672, "learning_rate": 9.652777777777779e-06, "loss": 41.767, "step": 4803 }, { "epoch": 114.38208955223881, "grad_norm": 19.27354621887207, "learning_rate": 9.650793650793652e-06, "loss": 40.7947, "step": 4804 }, { "epoch": 114.40597014925373, "grad_norm": 14.431193351745605, "learning_rate": 9.648809523809524e-06, "loss": 42.3785, "step": 4805 }, { "epoch": 114.42985074626866, "grad_norm": 15.706212043762207, "learning_rate": 9.646825396825397e-06, "loss": 42.0003, "step": 4806 }, { "epoch": 114.45373134328358, "grad_norm": 17.65169906616211, "learning_rate": 9.644841269841271e-06, "loss": 41.968, "step": 4807 }, { "epoch": 114.4776119402985, "grad_norm": 16.792739868164062, "learning_rate": 9.642857142857144e-06, "loss": 41.1987, "step": 4808 }, { "epoch": 114.50149253731344, "grad_norm": 20.06905746459961, "learning_rate": 9.640873015873017e-06, "loss": 41.0098, "step": 4809 }, { "epoch": 114.52537313432836, "grad_norm": 24.13865852355957, "learning_rate": 9.63888888888889e-06, "loss": 41.5633, "step": 4810 }, { "epoch": 114.54925373134328, "grad_norm": 16.85896873474121, "learning_rate": 9.636904761904762e-06, "loss": 41.7772, "step": 4811 }, { "epoch": 114.57313432835821, "grad_norm": 15.44628620147705, "learning_rate": 9.634920634920637e-06, "loss": 40.0732, "step": 4812 }, { "epoch": 114.59701492537313, "grad_norm": 18.970260620117188, "learning_rate": 9.63293650793651e-06, "loss": 42.318, "step": 4813 }, { "epoch": 114.62089552238805, "grad_norm": 16.574501037597656, "learning_rate": 9.630952380952382e-06, "loss": 40.0387, "step": 4814 }, { "epoch": 114.64477611940299, "grad_norm": 18.372955322265625, "learning_rate": 9.628968253968255e-06, "loss": 41.5759, "step": 4815 }, { "epoch": 114.66865671641791, "grad_norm": 21.253253936767578, "learning_rate": 9.626984126984127e-06, "loss": 40.2675, "step": 4816 }, { "epoch": 114.69253731343284, "grad_norm": 19.223817825317383, "learning_rate": 9.625e-06, "loss": 41.1779, "step": 4817 }, { "epoch": 114.71641791044776, "grad_norm": 17.391407012939453, "learning_rate": 9.623015873015875e-06, "loss": 40.9899, "step": 4818 }, { "epoch": 114.74029850746268, "grad_norm": 21.367889404296875, "learning_rate": 9.621031746031747e-06, "loss": 40.1854, "step": 4819 }, { "epoch": 114.7641791044776, "grad_norm": 21.202396392822266, "learning_rate": 9.61904761904762e-06, "loss": 41.5819, "step": 4820 }, { "epoch": 114.78805970149254, "grad_norm": 14.345793724060059, "learning_rate": 9.617063492063493e-06, "loss": 41.7843, "step": 4821 }, { "epoch": 114.81194029850747, "grad_norm": 16.483112335205078, "learning_rate": 9.615079365079365e-06, "loss": 40.9715, "step": 4822 }, { "epoch": 114.83582089552239, "grad_norm": 16.397315979003906, "learning_rate": 9.61309523809524e-06, "loss": 40.8702, "step": 4823 }, { "epoch": 114.85970149253731, "grad_norm": 14.784750938415527, "learning_rate": 9.611111111111112e-06, "loss": 40.5076, "step": 4824 }, { "epoch": 114.88358208955223, "grad_norm": 21.29036521911621, "learning_rate": 9.609126984126985e-06, "loss": 41.0657, "step": 4825 }, { "epoch": 114.90746268656716, "grad_norm": 19.237743377685547, "learning_rate": 9.607142857142858e-06, "loss": 40.7839, "step": 4826 }, { "epoch": 114.9313432835821, "grad_norm": 17.527833938598633, "learning_rate": 9.60515873015873e-06, "loss": 41.3853, "step": 4827 }, { "epoch": 114.95522388059702, "grad_norm": 16.477439880371094, "learning_rate": 9.603174603174605e-06, "loss": 41.3862, "step": 4828 }, { "epoch": 114.97910447761194, "grad_norm": 16.46197509765625, "learning_rate": 9.601190476190478e-06, "loss": 41.9143, "step": 4829 }, { "epoch": 115.0, "grad_norm": 18.8862361907959, "learning_rate": 9.59920634920635e-06, "loss": 36.444, "step": 4830 }, { "epoch": 115.02388059701492, "grad_norm": 22.985044479370117, "learning_rate": 9.597222222222223e-06, "loss": 41.3098, "step": 4831 }, { "epoch": 115.04776119402985, "grad_norm": 17.263700485229492, "learning_rate": 9.595238095238096e-06, "loss": 41.2013, "step": 4832 }, { "epoch": 115.07164179104478, "grad_norm": 21.497802734375, "learning_rate": 9.59325396825397e-06, "loss": 40.4798, "step": 4833 }, { "epoch": 115.0955223880597, "grad_norm": 20.014450073242188, "learning_rate": 9.591269841269843e-06, "loss": 41.2098, "step": 4834 }, { "epoch": 115.11940298507463, "grad_norm": 18.972618103027344, "learning_rate": 9.589285714285716e-06, "loss": 41.7606, "step": 4835 }, { "epoch": 115.14328358208955, "grad_norm": 14.9144287109375, "learning_rate": 9.587301587301588e-06, "loss": 40.7529, "step": 4836 }, { "epoch": 115.16716417910447, "grad_norm": 24.37519073486328, "learning_rate": 9.585317460317461e-06, "loss": 41.7598, "step": 4837 }, { "epoch": 115.1910447761194, "grad_norm": 23.033283233642578, "learning_rate": 9.583333333333335e-06, "loss": 41.4316, "step": 4838 }, { "epoch": 115.21492537313434, "grad_norm": 20.98251724243164, "learning_rate": 9.581349206349208e-06, "loss": 40.3066, "step": 4839 }, { "epoch": 115.23880597014926, "grad_norm": 21.950714111328125, "learning_rate": 9.57936507936508e-06, "loss": 40.1732, "step": 4840 }, { "epoch": 115.26268656716418, "grad_norm": 22.479713439941406, "learning_rate": 9.577380952380953e-06, "loss": 41.586, "step": 4841 }, { "epoch": 115.2865671641791, "grad_norm": 16.739639282226562, "learning_rate": 9.575396825396826e-06, "loss": 42.143, "step": 4842 }, { "epoch": 115.31044776119403, "grad_norm": 23.182594299316406, "learning_rate": 9.573412698412699e-06, "loss": 42.4852, "step": 4843 }, { "epoch": 115.33432835820895, "grad_norm": 23.18885040283203, "learning_rate": 9.571428571428573e-06, "loss": 40.3618, "step": 4844 }, { "epoch": 115.35820895522389, "grad_norm": 15.238030433654785, "learning_rate": 9.569444444444446e-06, "loss": 41.3859, "step": 4845 }, { "epoch": 115.38208955223881, "grad_norm": 28.07355308532715, "learning_rate": 9.567460317460319e-06, "loss": 41.1147, "step": 4846 }, { "epoch": 115.40597014925373, "grad_norm": 21.76200294494629, "learning_rate": 9.565476190476191e-06, "loss": 41.6603, "step": 4847 }, { "epoch": 115.42985074626866, "grad_norm": 32.459312438964844, "learning_rate": 9.563492063492064e-06, "loss": 40.7283, "step": 4848 }, { "epoch": 115.45373134328358, "grad_norm": 22.368288040161133, "learning_rate": 9.561507936507938e-06, "loss": 40.4951, "step": 4849 }, { "epoch": 115.4776119402985, "grad_norm": 22.91469955444336, "learning_rate": 9.559523809523811e-06, "loss": 41.117, "step": 4850 }, { "epoch": 115.50149253731344, "grad_norm": 20.357376098632812, "learning_rate": 9.557539682539684e-06, "loss": 41.753, "step": 4851 }, { "epoch": 115.52537313432836, "grad_norm": 21.377849578857422, "learning_rate": 9.555555555555556e-06, "loss": 41.8999, "step": 4852 }, { "epoch": 115.54925373134328, "grad_norm": 33.38006591796875, "learning_rate": 9.55357142857143e-06, "loss": 41.1317, "step": 4853 }, { "epoch": 115.57313432835821, "grad_norm": 21.435209274291992, "learning_rate": 9.551587301587304e-06, "loss": 40.1686, "step": 4854 }, { "epoch": 115.59701492537313, "grad_norm": 31.958423614501953, "learning_rate": 9.549603174603176e-06, "loss": 42.572, "step": 4855 }, { "epoch": 115.62089552238805, "grad_norm": 21.460599899291992, "learning_rate": 9.547619047619049e-06, "loss": 40.5071, "step": 4856 }, { "epoch": 115.64477611940299, "grad_norm": 33.65336227416992, "learning_rate": 9.545634920634922e-06, "loss": 41.7753, "step": 4857 }, { "epoch": 115.66865671641791, "grad_norm": 23.594022750854492, "learning_rate": 9.543650793650794e-06, "loss": 41.4436, "step": 4858 }, { "epoch": 115.69253731343284, "grad_norm": 23.563594818115234, "learning_rate": 9.541666666666669e-06, "loss": 39.9414, "step": 4859 }, { "epoch": 115.71641791044776, "grad_norm": 24.98297882080078, "learning_rate": 9.539682539682541e-06, "loss": 40.8619, "step": 4860 }, { "epoch": 115.74029850746268, "grad_norm": 22.393163681030273, "learning_rate": 9.537698412698414e-06, "loss": 42.8338, "step": 4861 }, { "epoch": 115.7641791044776, "grad_norm": 30.07286834716797, "learning_rate": 9.535714285714287e-06, "loss": 41.2226, "step": 4862 }, { "epoch": 115.78805970149254, "grad_norm": 22.388198852539062, "learning_rate": 9.53373015873016e-06, "loss": 41.1935, "step": 4863 }, { "epoch": 115.81194029850747, "grad_norm": 33.4913215637207, "learning_rate": 9.531746031746032e-06, "loss": 42.5784, "step": 4864 }, { "epoch": 115.83582089552239, "grad_norm": 25.117082595825195, "learning_rate": 9.529761904761905e-06, "loss": 39.364, "step": 4865 }, { "epoch": 115.85970149253731, "grad_norm": 37.31660079956055, "learning_rate": 9.527777777777778e-06, "loss": 41.5319, "step": 4866 }, { "epoch": 115.88358208955223, "grad_norm": 28.936159133911133, "learning_rate": 9.52579365079365e-06, "loss": 41.757, "step": 4867 }, { "epoch": 115.90746268656716, "grad_norm": 34.599647521972656, "learning_rate": 9.523809523809525e-06, "loss": 41.6518, "step": 4868 }, { "epoch": 115.9313432835821, "grad_norm": 27.539873123168945, "learning_rate": 9.521825396825397e-06, "loss": 40.9794, "step": 4869 }, { "epoch": 115.95522388059702, "grad_norm": 37.74484634399414, "learning_rate": 9.51984126984127e-06, "loss": 40.8585, "step": 4870 }, { "epoch": 115.97910447761194, "grad_norm": 32.444847106933594, "learning_rate": 9.517857142857143e-06, "loss": 41.7152, "step": 4871 }, { "epoch": 116.0, "grad_norm": 32.239253997802734, "learning_rate": 9.515873015873016e-06, "loss": 35.2825, "step": 4872 }, { "epoch": 116.02388059701492, "grad_norm": 35.12287521362305, "learning_rate": 9.51388888888889e-06, "loss": 41.7451, "step": 4873 }, { "epoch": 116.04776119402985, "grad_norm": 28.03133773803711, "learning_rate": 9.511904761904763e-06, "loss": 40.8461, "step": 4874 }, { "epoch": 116.07164179104478, "grad_norm": 25.59912872314453, "learning_rate": 9.509920634920635e-06, "loss": 41.5307, "step": 4875 }, { "epoch": 116.0955223880597, "grad_norm": 31.361936569213867, "learning_rate": 9.507936507936508e-06, "loss": 41.9054, "step": 4876 }, { "epoch": 116.11940298507463, "grad_norm": 21.869449615478516, "learning_rate": 9.50595238095238e-06, "loss": 40.38, "step": 4877 }, { "epoch": 116.14328358208955, "grad_norm": 38.86557388305664, "learning_rate": 9.503968253968255e-06, "loss": 42.0518, "step": 4878 }, { "epoch": 116.16716417910447, "grad_norm": 31.712495803833008, "learning_rate": 9.501984126984128e-06, "loss": 40.2141, "step": 4879 }, { "epoch": 116.1910447761194, "grad_norm": 34.77455520629883, "learning_rate": 9.5e-06, "loss": 41.5116, "step": 4880 }, { "epoch": 116.21492537313434, "grad_norm": 28.530269622802734, "learning_rate": 9.498015873015873e-06, "loss": 40.6907, "step": 4881 }, { "epoch": 116.23880597014926, "grad_norm": 28.550081253051758, "learning_rate": 9.496031746031746e-06, "loss": 41.0168, "step": 4882 }, { "epoch": 116.26268656716418, "grad_norm": 28.081035614013672, "learning_rate": 9.494047619047619e-06, "loss": 42.3482, "step": 4883 }, { "epoch": 116.2865671641791, "grad_norm": 39.402713775634766, "learning_rate": 9.492063492063493e-06, "loss": 41.3423, "step": 4884 }, { "epoch": 116.31044776119403, "grad_norm": 30.37664794921875, "learning_rate": 9.490079365079366e-06, "loss": 41.0571, "step": 4885 }, { "epoch": 116.33432835820895, "grad_norm": 33.314979553222656, "learning_rate": 9.488095238095238e-06, "loss": 41.7844, "step": 4886 }, { "epoch": 116.35820895522389, "grad_norm": 31.91356658935547, "learning_rate": 9.486111111111111e-06, "loss": 42.6115, "step": 4887 }, { "epoch": 116.38208955223881, "grad_norm": 33.23076629638672, "learning_rate": 9.484126984126984e-06, "loss": 42.9912, "step": 4888 }, { "epoch": 116.40597014925373, "grad_norm": 33.23727798461914, "learning_rate": 9.482142857142858e-06, "loss": 40.2839, "step": 4889 }, { "epoch": 116.42985074626866, "grad_norm": 34.349090576171875, "learning_rate": 9.480158730158731e-06, "loss": 41.3853, "step": 4890 }, { "epoch": 116.45373134328358, "grad_norm": 28.603391647338867, "learning_rate": 9.478174603174604e-06, "loss": 41.8607, "step": 4891 }, { "epoch": 116.4776119402985, "grad_norm": 30.6513671875, "learning_rate": 9.476190476190476e-06, "loss": 40.6123, "step": 4892 }, { "epoch": 116.50149253731344, "grad_norm": 26.542037963867188, "learning_rate": 9.474206349206349e-06, "loss": 40.7056, "step": 4893 }, { "epoch": 116.52537313432836, "grad_norm": 33.709774017333984, "learning_rate": 9.472222222222223e-06, "loss": 41.8717, "step": 4894 }, { "epoch": 116.54925373134328, "grad_norm": 29.847158432006836, "learning_rate": 9.470238095238096e-06, "loss": 39.7896, "step": 4895 }, { "epoch": 116.57313432835821, "grad_norm": 29.366252899169922, "learning_rate": 9.468253968253969e-06, "loss": 40.6317, "step": 4896 }, { "epoch": 116.59701492537313, "grad_norm": 27.17310905456543, "learning_rate": 9.466269841269841e-06, "loss": 41.57, "step": 4897 }, { "epoch": 116.62089552238805, "grad_norm": 29.52984619140625, "learning_rate": 9.464285714285714e-06, "loss": 41.313, "step": 4898 }, { "epoch": 116.64477611940299, "grad_norm": 25.72901725769043, "learning_rate": 9.462301587301589e-06, "loss": 39.4479, "step": 4899 }, { "epoch": 116.66865671641791, "grad_norm": 36.030372619628906, "learning_rate": 9.460317460317461e-06, "loss": 41.6829, "step": 4900 }, { "epoch": 116.69253731343284, "grad_norm": 30.29513168334961, "learning_rate": 9.458333333333334e-06, "loss": 41.8183, "step": 4901 }, { "epoch": 116.71641791044776, "grad_norm": 28.564956665039062, "learning_rate": 9.456349206349207e-06, "loss": 41.1474, "step": 4902 }, { "epoch": 116.74029850746268, "grad_norm": 24.22428321838379, "learning_rate": 9.45436507936508e-06, "loss": 41.2769, "step": 4903 }, { "epoch": 116.7641791044776, "grad_norm": 27.916051864624023, "learning_rate": 9.452380952380952e-06, "loss": 40.8082, "step": 4904 }, { "epoch": 116.78805970149254, "grad_norm": 20.302335739135742, "learning_rate": 9.450396825396826e-06, "loss": 41.0273, "step": 4905 }, { "epoch": 116.81194029850747, "grad_norm": 32.881134033203125, "learning_rate": 9.4484126984127e-06, "loss": 41.9168, "step": 4906 }, { "epoch": 116.83582089552239, "grad_norm": 26.058923721313477, "learning_rate": 9.446428571428572e-06, "loss": 41.0683, "step": 4907 }, { "epoch": 116.85970149253731, "grad_norm": 34.14630889892578, "learning_rate": 9.444444444444445e-06, "loss": 40.9509, "step": 4908 }, { "epoch": 116.88358208955223, "grad_norm": 31.35688018798828, "learning_rate": 9.442460317460317e-06, "loss": 40.551, "step": 4909 }, { "epoch": 116.90746268656716, "grad_norm": 24.473339080810547, "learning_rate": 9.440476190476192e-06, "loss": 39.3649, "step": 4910 }, { "epoch": 116.9313432835821, "grad_norm": 21.814205169677734, "learning_rate": 9.438492063492064e-06, "loss": 40.4577, "step": 4911 }, { "epoch": 116.95522388059702, "grad_norm": 29.724409103393555, "learning_rate": 9.436507936507937e-06, "loss": 40.6152, "step": 4912 }, { "epoch": 116.97910447761194, "grad_norm": 24.086170196533203, "learning_rate": 9.43452380952381e-06, "loss": 41.106, "step": 4913 }, { "epoch": 117.0, "grad_norm": 28.476037979125977, "learning_rate": 9.432539682539682e-06, "loss": 36.447, "step": 4914 }, { "epoch": 117.02388059701492, "grad_norm": 27.55150032043457, "learning_rate": 9.430555555555557e-06, "loss": 41.9582, "step": 4915 }, { "epoch": 117.04776119402985, "grad_norm": 28.565845489501953, "learning_rate": 9.42857142857143e-06, "loss": 40.9572, "step": 4916 }, { "epoch": 117.07164179104478, "grad_norm": 24.59885025024414, "learning_rate": 9.426587301587302e-06, "loss": 41.2797, "step": 4917 }, { "epoch": 117.0955223880597, "grad_norm": 21.83265495300293, "learning_rate": 9.424603174603175e-06, "loss": 41.1726, "step": 4918 }, { "epoch": 117.11940298507463, "grad_norm": 21.117053985595703, "learning_rate": 9.422619047619048e-06, "loss": 42.4423, "step": 4919 }, { "epoch": 117.14328358208955, "grad_norm": 26.478992462158203, "learning_rate": 9.420634920634922e-06, "loss": 40.2709, "step": 4920 }, { "epoch": 117.16716417910447, "grad_norm": 20.61237335205078, "learning_rate": 9.418650793650795e-06, "loss": 40.8788, "step": 4921 }, { "epoch": 117.1910447761194, "grad_norm": 32.1706657409668, "learning_rate": 9.416666666666667e-06, "loss": 41.6381, "step": 4922 }, { "epoch": 117.21492537313434, "grad_norm": 26.040164947509766, "learning_rate": 9.41468253968254e-06, "loss": 40.662, "step": 4923 }, { "epoch": 117.23880597014926, "grad_norm": 27.465307235717773, "learning_rate": 9.412698412698413e-06, "loss": 39.2348, "step": 4924 }, { "epoch": 117.26268656716418, "grad_norm": 28.407739639282227, "learning_rate": 9.410714285714286e-06, "loss": 40.981, "step": 4925 }, { "epoch": 117.2865671641791, "grad_norm": 26.080398559570312, "learning_rate": 9.40873015873016e-06, "loss": 39.726, "step": 4926 }, { "epoch": 117.31044776119403, "grad_norm": 23.23761749267578, "learning_rate": 9.406746031746033e-06, "loss": 41.9898, "step": 4927 }, { "epoch": 117.33432835820895, "grad_norm": 25.763086318969727, "learning_rate": 9.404761904761905e-06, "loss": 41.6503, "step": 4928 }, { "epoch": 117.35820895522389, "grad_norm": 25.27565574645996, "learning_rate": 9.402777777777778e-06, "loss": 41.5848, "step": 4929 }, { "epoch": 117.38208955223881, "grad_norm": 21.535991668701172, "learning_rate": 9.40079365079365e-06, "loss": 41.4816, "step": 4930 }, { "epoch": 117.40597014925373, "grad_norm": 20.212120056152344, "learning_rate": 9.398809523809525e-06, "loss": 40.8427, "step": 4931 }, { "epoch": 117.42985074626866, "grad_norm": 24.479822158813477, "learning_rate": 9.396825396825398e-06, "loss": 41.3141, "step": 4932 }, { "epoch": 117.45373134328358, "grad_norm": 14.332042694091797, "learning_rate": 9.39484126984127e-06, "loss": 41.4974, "step": 4933 }, { "epoch": 117.4776119402985, "grad_norm": 22.84208869934082, "learning_rate": 9.392857142857143e-06, "loss": 41.8713, "step": 4934 }, { "epoch": 117.50149253731344, "grad_norm": 18.916187286376953, "learning_rate": 9.390873015873016e-06, "loss": 41.2954, "step": 4935 }, { "epoch": 117.52537313432836, "grad_norm": 22.096107482910156, "learning_rate": 9.38888888888889e-06, "loss": 40.7045, "step": 4936 }, { "epoch": 117.54925373134328, "grad_norm": 20.42098045349121, "learning_rate": 9.386904761904763e-06, "loss": 42.4039, "step": 4937 }, { "epoch": 117.57313432835821, "grad_norm": 19.17930793762207, "learning_rate": 9.384920634920636e-06, "loss": 41.2849, "step": 4938 }, { "epoch": 117.59701492537313, "grad_norm": 18.003908157348633, "learning_rate": 9.382936507936508e-06, "loss": 41.2694, "step": 4939 }, { "epoch": 117.62089552238805, "grad_norm": 21.67378044128418, "learning_rate": 9.380952380952381e-06, "loss": 41.4086, "step": 4940 }, { "epoch": 117.64477611940299, "grad_norm": 14.220067024230957, "learning_rate": 9.378968253968255e-06, "loss": 40.5293, "step": 4941 }, { "epoch": 117.66865671641791, "grad_norm": 17.12972640991211, "learning_rate": 9.376984126984128e-06, "loss": 40.7469, "step": 4942 }, { "epoch": 117.69253731343284, "grad_norm": 21.055694580078125, "learning_rate": 9.375000000000001e-06, "loss": 39.6643, "step": 4943 }, { "epoch": 117.71641791044776, "grad_norm": 17.032026290893555, "learning_rate": 9.373015873015874e-06, "loss": 39.6835, "step": 4944 }, { "epoch": 117.74029850746268, "grad_norm": 22.909225463867188, "learning_rate": 9.371031746031746e-06, "loss": 41.411, "step": 4945 }, { "epoch": 117.7641791044776, "grad_norm": 15.6399564743042, "learning_rate": 9.36904761904762e-06, "loss": 41.0838, "step": 4946 }, { "epoch": 117.78805970149254, "grad_norm": 22.99868392944336, "learning_rate": 9.367063492063493e-06, "loss": 41.1988, "step": 4947 }, { "epoch": 117.81194029850747, "grad_norm": 19.78955841064453, "learning_rate": 9.365079365079366e-06, "loss": 41.5181, "step": 4948 }, { "epoch": 117.83582089552239, "grad_norm": 21.281328201293945, "learning_rate": 9.363095238095239e-06, "loss": 40.5115, "step": 4949 }, { "epoch": 117.85970149253731, "grad_norm": 19.100648880004883, "learning_rate": 9.361111111111111e-06, "loss": 40.3604, "step": 4950 }, { "epoch": 117.88358208955223, "grad_norm": 24.486183166503906, "learning_rate": 9.359126984126984e-06, "loss": 42.065, "step": 4951 }, { "epoch": 117.90746268656716, "grad_norm": 20.265453338623047, "learning_rate": 9.357142857142859e-06, "loss": 42.1137, "step": 4952 }, { "epoch": 117.9313432835821, "grad_norm": 21.281848907470703, "learning_rate": 9.355158730158731e-06, "loss": 42.0899, "step": 4953 }, { "epoch": 117.95522388059702, "grad_norm": 21.65452766418457, "learning_rate": 9.353174603174604e-06, "loss": 41.4076, "step": 4954 }, { "epoch": 117.97910447761194, "grad_norm": 19.85662841796875, "learning_rate": 9.351190476190477e-06, "loss": 40.9143, "step": 4955 }, { "epoch": 118.0, "grad_norm": 16.60548210144043, "learning_rate": 9.34920634920635e-06, "loss": 35.2268, "step": 4956 }, { "epoch": 118.02388059701492, "grad_norm": 19.02985382080078, "learning_rate": 9.347222222222224e-06, "loss": 41.6227, "step": 4957 }, { "epoch": 118.04776119402985, "grad_norm": 20.057069778442383, "learning_rate": 9.345238095238096e-06, "loss": 39.6729, "step": 4958 }, { "epoch": 118.07164179104478, "grad_norm": 16.330196380615234, "learning_rate": 9.343253968253969e-06, "loss": 41.2542, "step": 4959 }, { "epoch": 118.0955223880597, "grad_norm": 18.172393798828125, "learning_rate": 9.341269841269842e-06, "loss": 40.0607, "step": 4960 }, { "epoch": 118.11940298507463, "grad_norm": 20.96540069580078, "learning_rate": 9.339285714285715e-06, "loss": 40.585, "step": 4961 }, { "epoch": 118.14328358208955, "grad_norm": 14.967394828796387, "learning_rate": 9.337301587301589e-06, "loss": 40.2613, "step": 4962 }, { "epoch": 118.16716417910447, "grad_norm": 18.953601837158203, "learning_rate": 9.335317460317462e-06, "loss": 39.38, "step": 4963 }, { "epoch": 118.1910447761194, "grad_norm": 15.904739379882812, "learning_rate": 9.333333333333334e-06, "loss": 41.6314, "step": 4964 }, { "epoch": 118.21492537313434, "grad_norm": 24.293170928955078, "learning_rate": 9.331349206349207e-06, "loss": 40.5077, "step": 4965 }, { "epoch": 118.23880597014926, "grad_norm": 20.04494857788086, "learning_rate": 9.32936507936508e-06, "loss": 40.8951, "step": 4966 }, { "epoch": 118.26268656716418, "grad_norm": 23.613727569580078, "learning_rate": 9.327380952380954e-06, "loss": 42.0233, "step": 4967 }, { "epoch": 118.2865671641791, "grad_norm": 23.967741012573242, "learning_rate": 9.325396825396827e-06, "loss": 41.0547, "step": 4968 }, { "epoch": 118.31044776119403, "grad_norm": 19.54030418395996, "learning_rate": 9.3234126984127e-06, "loss": 41.2887, "step": 4969 }, { "epoch": 118.33432835820895, "grad_norm": 23.12442398071289, "learning_rate": 9.321428571428572e-06, "loss": 40.5083, "step": 4970 }, { "epoch": 118.35820895522389, "grad_norm": 21.34069061279297, "learning_rate": 9.319444444444445e-06, "loss": 41.3474, "step": 4971 }, { "epoch": 118.38208955223881, "grad_norm": 20.411256790161133, "learning_rate": 9.317460317460318e-06, "loss": 40.3927, "step": 4972 }, { "epoch": 118.40597014925373, "grad_norm": 21.702983856201172, "learning_rate": 9.315476190476192e-06, "loss": 41.2522, "step": 4973 }, { "epoch": 118.42985074626866, "grad_norm": 20.09593963623047, "learning_rate": 9.313492063492065e-06, "loss": 40.8607, "step": 4974 }, { "epoch": 118.45373134328358, "grad_norm": 16.693893432617188, "learning_rate": 9.311507936507937e-06, "loss": 41.9847, "step": 4975 }, { "epoch": 118.4776119402985, "grad_norm": 16.682085037231445, "learning_rate": 9.30952380952381e-06, "loss": 41.3428, "step": 4976 }, { "epoch": 118.50149253731344, "grad_norm": 16.73056983947754, "learning_rate": 9.307539682539683e-06, "loss": 40.8279, "step": 4977 }, { "epoch": 118.52537313432836, "grad_norm": 16.317480087280273, "learning_rate": 9.305555555555557e-06, "loss": 40.4602, "step": 4978 }, { "epoch": 118.54925373134328, "grad_norm": 15.660470008850098, "learning_rate": 9.30357142857143e-06, "loss": 40.7565, "step": 4979 }, { "epoch": 118.57313432835821, "grad_norm": 21.601036071777344, "learning_rate": 9.301587301587303e-06, "loss": 41.7317, "step": 4980 }, { "epoch": 118.59701492537313, "grad_norm": 16.545438766479492, "learning_rate": 9.299603174603175e-06, "loss": 42.1659, "step": 4981 }, { "epoch": 118.62089552238805, "grad_norm": 20.3563175201416, "learning_rate": 9.297619047619048e-06, "loss": 39.8948, "step": 4982 }, { "epoch": 118.64477611940299, "grad_norm": 19.03108024597168, "learning_rate": 9.295634920634922e-06, "loss": 40.6225, "step": 4983 }, { "epoch": 118.66865671641791, "grad_norm": 18.866544723510742, "learning_rate": 9.293650793650795e-06, "loss": 40.781, "step": 4984 }, { "epoch": 118.69253731343284, "grad_norm": 18.367883682250977, "learning_rate": 9.291666666666668e-06, "loss": 42.1775, "step": 4985 }, { "epoch": 118.71641791044776, "grad_norm": 17.574983596801758, "learning_rate": 9.28968253968254e-06, "loss": 40.7228, "step": 4986 }, { "epoch": 118.74029850746268, "grad_norm": 17.931612014770508, "learning_rate": 9.287698412698413e-06, "loss": 41.352, "step": 4987 }, { "epoch": 118.7641791044776, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 37.2747, "step": 4988 }, { "epoch": 118.78805970149254, "grad_norm": 19.131587982177734, "learning_rate": 9.285714285714288e-06, "loss": 41.442, "step": 4989 }, { "epoch": 118.81194029850747, "grad_norm": 19.01002311706543, "learning_rate": 9.28373015873016e-06, "loss": 40.1583, "step": 4990 }, { "epoch": 118.83582089552239, "grad_norm": 20.718921661376953, "learning_rate": 9.281746031746033e-06, "loss": 42.1721, "step": 4991 }, { "epoch": 118.85970149253731, "grad_norm": 24.149545669555664, "learning_rate": 9.279761904761906e-06, "loss": 39.6434, "step": 4992 }, { "epoch": 118.88358208955223, "grad_norm": 19.575162887573242, "learning_rate": 9.277777777777778e-06, "loss": 41.7524, "step": 4993 }, { "epoch": 118.90746268656716, "grad_norm": 21.472047805786133, "learning_rate": 9.275793650793653e-06, "loss": 41.5381, "step": 4994 }, { "epoch": 118.9313432835821, "grad_norm": 18.96376609802246, "learning_rate": 9.273809523809525e-06, "loss": 41.8712, "step": 4995 }, { "epoch": 118.95522388059702, "grad_norm": 20.816585540771484, "learning_rate": 9.271825396825398e-06, "loss": 42.7263, "step": 4996 }, { "epoch": 118.97910447761194, "grad_norm": 18.856704711914062, "learning_rate": 9.26984126984127e-06, "loss": 42.2396, "step": 4997 }, { "epoch": 119.0, "grad_norm": 17.700910568237305, "learning_rate": 9.267857142857144e-06, "loss": 35.0377, "step": 4998 }, { "epoch": 119.02388059701492, "grad_norm": 18.852880477905273, "learning_rate": 9.265873015873016e-06, "loss": 40.1171, "step": 4999 }, { "epoch": 119.04776119402985, "grad_norm": 17.4823055267334, "learning_rate": 9.26388888888889e-06, "loss": 39.7783, "step": 5000 }, { "epoch": 119.07164179104478, "grad_norm": 22.45401954650879, "learning_rate": 9.261904761904763e-06, "loss": 41.6926, "step": 5001 }, { "epoch": 119.0955223880597, "grad_norm": 19.38802719116211, "learning_rate": 9.259920634920636e-06, "loss": 41.0149, "step": 5002 }, { "epoch": 119.11940298507463, "grad_norm": 18.921022415161133, "learning_rate": 9.257936507936509e-06, "loss": 41.2486, "step": 5003 }, { "epoch": 119.14328358208955, "grad_norm": 22.00980567932129, "learning_rate": 9.255952380952381e-06, "loss": 40.8794, "step": 5004 }, { "epoch": 119.16716417910447, "grad_norm": 13.831929206848145, "learning_rate": 9.253968253968256e-06, "loss": 40.3292, "step": 5005 }, { "epoch": 119.1910447761194, "grad_norm": 20.504989624023438, "learning_rate": 9.251984126984129e-06, "loss": 41.5119, "step": 5006 }, { "epoch": 119.21492537313434, "grad_norm": 15.127291679382324, "learning_rate": 9.250000000000001e-06, "loss": 40.214, "step": 5007 }, { "epoch": 119.23880597014926, "grad_norm": 18.562606811523438, "learning_rate": 9.248015873015874e-06, "loss": 41.0757, "step": 5008 }, { "epoch": 119.26268656716418, "grad_norm": 20.99079132080078, "learning_rate": 9.246031746031747e-06, "loss": 41.3658, "step": 5009 }, { "epoch": 119.2865671641791, "grad_norm": 17.714588165283203, "learning_rate": 9.244047619047621e-06, "loss": 41.8379, "step": 5010 }, { "epoch": 119.31044776119403, "grad_norm": 20.95669174194336, "learning_rate": 9.242063492063494e-06, "loss": 40.6619, "step": 5011 }, { "epoch": 119.33432835820895, "grad_norm": 18.291975021362305, "learning_rate": 9.240079365079366e-06, "loss": 38.9992, "step": 5012 }, { "epoch": 119.35820895522389, "grad_norm": 14.831878662109375, "learning_rate": 9.238095238095239e-06, "loss": 41.5072, "step": 5013 }, { "epoch": 119.38208955223881, "grad_norm": 17.76835823059082, "learning_rate": 9.236111111111112e-06, "loss": 41.0227, "step": 5014 }, { "epoch": 119.40597014925373, "grad_norm": 15.433774948120117, "learning_rate": 9.234126984126986e-06, "loss": 40.4539, "step": 5015 }, { "epoch": 119.42985074626866, "grad_norm": 23.18012237548828, "learning_rate": 9.232142857142859e-06, "loss": 41.8991, "step": 5016 }, { "epoch": 119.45373134328358, "grad_norm": 17.35015106201172, "learning_rate": 9.230158730158732e-06, "loss": 40.189, "step": 5017 }, { "epoch": 119.4776119402985, "grad_norm": 19.60420036315918, "learning_rate": 9.228174603174604e-06, "loss": 41.602, "step": 5018 }, { "epoch": 119.50149253731344, "grad_norm": 20.470211029052734, "learning_rate": 9.226190476190477e-06, "loss": 42.1062, "step": 5019 }, { "epoch": 119.52537313432836, "grad_norm": 16.949901580810547, "learning_rate": 9.22420634920635e-06, "loss": 41.6508, "step": 5020 }, { "epoch": 119.54925373134328, "grad_norm": 22.598966598510742, "learning_rate": 9.222222222222224e-06, "loss": 39.9819, "step": 5021 }, { "epoch": 119.57313432835821, "grad_norm": 16.502370834350586, "learning_rate": 9.220238095238097e-06, "loss": 40.1142, "step": 5022 }, { "epoch": 119.59701492537313, "grad_norm": 20.456647872924805, "learning_rate": 9.218253968253968e-06, "loss": 41.6525, "step": 5023 }, { "epoch": 119.62089552238805, "grad_norm": 18.311965942382812, "learning_rate": 9.216269841269842e-06, "loss": 41.1592, "step": 5024 }, { "epoch": 119.64477611940299, "grad_norm": 19.683259963989258, "learning_rate": 9.214285714285715e-06, "loss": 40.853, "step": 5025 }, { "epoch": 119.66865671641791, "grad_norm": 20.134082794189453, "learning_rate": 9.212301587301588e-06, "loss": 40.3045, "step": 5026 }, { "epoch": 119.69253731343284, "grad_norm": 28.281267166137695, "learning_rate": 9.21031746031746e-06, "loss": 41.6703, "step": 5027 }, { "epoch": 119.71641791044776, "grad_norm": 22.25422477722168, "learning_rate": 9.208333333333333e-06, "loss": 41.0012, "step": 5028 }, { "epoch": 119.74029850746268, "grad_norm": 15.698911666870117, "learning_rate": 9.206349206349207e-06, "loss": 39.3874, "step": 5029 }, { "epoch": 119.7641791044776, "grad_norm": 22.822614669799805, "learning_rate": 9.20436507936508e-06, "loss": 42.7782, "step": 5030 }, { "epoch": 119.78805970149254, "grad_norm": 18.489330291748047, "learning_rate": 9.202380952380953e-06, "loss": 42.2175, "step": 5031 }, { "epoch": 119.81194029850747, "grad_norm": 23.18742561340332, "learning_rate": 9.200396825396825e-06, "loss": 42.1583, "step": 5032 }, { "epoch": 119.83582089552239, "grad_norm": 24.11537742614746, "learning_rate": 9.198412698412698e-06, "loss": 40.783, "step": 5033 }, { "epoch": 119.85970149253731, "grad_norm": 16.897441864013672, "learning_rate": 9.196428571428571e-06, "loss": 40.3459, "step": 5034 }, { "epoch": 119.88358208955223, "grad_norm": 20.22298812866211, "learning_rate": 9.194444444444445e-06, "loss": 40.9984, "step": 5035 }, { "epoch": 119.90746268656716, "grad_norm": 19.373756408691406, "learning_rate": 9.192460317460318e-06, "loss": 41.8363, "step": 5036 }, { "epoch": 119.9313432835821, "grad_norm": 16.265701293945312, "learning_rate": 9.19047619047619e-06, "loss": 40.9217, "step": 5037 }, { "epoch": 119.95522388059702, "grad_norm": 28.902698516845703, "learning_rate": 9.188492063492063e-06, "loss": 41.7966, "step": 5038 }, { "epoch": 119.97910447761194, "grad_norm": 19.491430282592773, "learning_rate": 9.186507936507936e-06, "loss": 41.2973, "step": 5039 }, { "epoch": 120.0, "grad_norm": 25.749500274658203, "learning_rate": 9.18452380952381e-06, "loss": 35.3125, "step": 5040 }, { "epoch": 120.0, "step": 5040, "total_flos": 2.4776207925060864e+17, "train_loss": 3.4518184624021013, "train_runtime": 12809.9419, "train_samples_per_second": 50.136, "train_steps_per_second": 0.393 }, { "epoch": 120.02388059701492, "grad_norm": 24.0944766998291, "learning_rate": 1e-05, "loss": 41.0597, "step": 5041 }, { "epoch": 120.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998168498168499e-06, "loss": 46.3783, "step": 5042 }, { "epoch": 120.07164179104478, "grad_norm": 259.0445861816406, "learning_rate": 9.998168498168499e-06, "loss": 46.5108, "step": 5043 }, { "epoch": 120.0955223880597, "grad_norm": 128.19775390625, "learning_rate": 9.996336996336997e-06, "loss": 45.0948, "step": 5044 }, { "epoch": 120.11940298507463, "grad_norm": 58.83436584472656, "learning_rate": 9.994505494505496e-06, "loss": 43.1635, "step": 5045 }, { "epoch": 120.14328358208955, "grad_norm": 58.79975891113281, "learning_rate": 9.992673992673994e-06, "loss": 41.6829, "step": 5046 }, { "epoch": 120.16716417910447, "grad_norm": 50.534278869628906, "learning_rate": 9.990842490842492e-06, "loss": 42.3871, "step": 5047 }, { "epoch": 120.1910447761194, "grad_norm": 38.682125091552734, "learning_rate": 9.98901098901099e-06, "loss": 40.9709, "step": 5048 }, { "epoch": 120.21492537313434, "grad_norm": 35.06442642211914, "learning_rate": 9.987179487179488e-06, "loss": 41.0217, "step": 5049 }, { "epoch": 120.23880597014926, "grad_norm": 59.00712585449219, "learning_rate": 9.985347985347986e-06, "loss": 41.7985, "step": 5050 }, { "epoch": 120.26268656716418, "grad_norm": 36.52231216430664, "learning_rate": 9.983516483516485e-06, "loss": 41.6886, "step": 5051 }, { "epoch": 120.2865671641791, "grad_norm": 35.213436126708984, "learning_rate": 9.981684981684983e-06, "loss": 40.9909, "step": 5052 }, { "epoch": 120.31044776119403, "grad_norm": 40.0443000793457, "learning_rate": 9.97985347985348e-06, "loss": 41.1657, "step": 5053 }, { "epoch": 120.33432835820895, "grad_norm": 27.66771697998047, "learning_rate": 9.978021978021979e-06, "loss": 41.327, "step": 5054 }, { "epoch": 120.35820895522389, "grad_norm": 34.4952507019043, "learning_rate": 9.976190476190477e-06, "loss": 40.8086, "step": 5055 }, { "epoch": 120.38208955223881, "grad_norm": 26.404708862304688, "learning_rate": 9.974358974358974e-06, "loss": 41.0862, "step": 5056 }, { "epoch": 120.40597014925373, "grad_norm": 24.669050216674805, "learning_rate": 9.972527472527474e-06, "loss": 40.6639, "step": 5057 }, { "epoch": 120.42985074626866, "grad_norm": 29.60878562927246, "learning_rate": 9.970695970695972e-06, "loss": 40.8127, "step": 5058 }, { "epoch": 120.45373134328358, "grad_norm": 17.245283126831055, "learning_rate": 9.96886446886447e-06, "loss": 41.6983, "step": 5059 }, { "epoch": 120.4776119402985, "grad_norm": 26.338546752929688, "learning_rate": 9.967032967032968e-06, "loss": 40.5917, "step": 5060 }, { "epoch": 120.50149253731344, "grad_norm": 25.838808059692383, "learning_rate": 9.965201465201466e-06, "loss": 41.6386, "step": 5061 }, { "epoch": 120.52537313432836, "grad_norm": 17.583539962768555, "learning_rate": 9.963369963369965e-06, "loss": 39.5372, "step": 5062 }, { "epoch": 120.54925373134328, "grad_norm": 29.433382034301758, "learning_rate": 9.961538461538463e-06, "loss": 41.2372, "step": 5063 }, { "epoch": 120.57313432835821, "grad_norm": 19.41893768310547, "learning_rate": 9.959706959706961e-06, "loss": 41.2464, "step": 5064 }, { "epoch": 120.59701492537313, "grad_norm": 20.060937881469727, "learning_rate": 9.957875457875459e-06, "loss": 41.1316, "step": 5065 }, { "epoch": 120.62089552238805, "grad_norm": 21.93149185180664, "learning_rate": 9.956043956043957e-06, "loss": 40.6738, "step": 5066 }, { "epoch": 120.64477611940299, "grad_norm": 20.02782440185547, "learning_rate": 9.954212454212454e-06, "loss": 41.0332, "step": 5067 }, { "epoch": 120.66865671641791, "grad_norm": 16.836517333984375, "learning_rate": 9.952380952380954e-06, "loss": 41.8322, "step": 5068 }, { "epoch": 120.69253731343284, "grad_norm": 19.467927932739258, "learning_rate": 9.950549450549452e-06, "loss": 42.0419, "step": 5069 }, { "epoch": 120.71641791044776, "grad_norm": 20.398895263671875, "learning_rate": 9.94871794871795e-06, "loss": 40.1522, "step": 5070 }, { "epoch": 120.74029850746268, "grad_norm": 17.445634841918945, "learning_rate": 9.946886446886448e-06, "loss": 41.1946, "step": 5071 }, { "epoch": 120.7641791044776, "grad_norm": 17.94610595703125, "learning_rate": 9.945054945054946e-06, "loss": 41.4025, "step": 5072 }, { "epoch": 120.78805970149254, "grad_norm": 25.02172088623047, "learning_rate": 9.943223443223443e-06, "loss": 42.0855, "step": 5073 }, { "epoch": 120.81194029850747, "grad_norm": 16.557662963867188, "learning_rate": 9.941391941391943e-06, "loss": 39.8862, "step": 5074 }, { "epoch": 120.83582089552239, "grad_norm": 19.688400268554688, "learning_rate": 9.939560439560441e-06, "loss": 40.9361, "step": 5075 }, { "epoch": 120.85970149253731, "grad_norm": 29.196117401123047, "learning_rate": 9.937728937728939e-06, "loss": 42.8812, "step": 5076 }, { "epoch": 120.88358208955223, "grad_norm": 17.111480712890625, "learning_rate": 9.935897435897437e-06, "loss": 41.4032, "step": 5077 }, { "epoch": 120.90746268656716, "grad_norm": 29.072128295898438, "learning_rate": 9.934065934065935e-06, "loss": 42.2839, "step": 5078 }, { "epoch": 120.9313432835821, "grad_norm": 24.953367233276367, "learning_rate": 9.932234432234434e-06, "loss": 41.5165, "step": 5079 }, { "epoch": 120.95522388059702, "grad_norm": 19.515911102294922, "learning_rate": 9.930402930402932e-06, "loss": 40.4111, "step": 5080 }, { "epoch": 120.97910447761194, "grad_norm": 23.281414031982422, "learning_rate": 9.92857142857143e-06, "loss": 40.3576, "step": 5081 }, { "epoch": 121.0, "grad_norm": 16.75458335876465, "learning_rate": 9.926739926739928e-06, "loss": 36.3203, "step": 5082 }, { "epoch": 121.02388059701492, "grad_norm": 29.20741844177246, "learning_rate": 9.924908424908426e-06, "loss": 39.9303, "step": 5083 }, { "epoch": 121.04776119402985, "grad_norm": 21.79246711730957, "learning_rate": 9.923076923076923e-06, "loss": 41.4785, "step": 5084 }, { "epoch": 121.07164179104478, "grad_norm": 29.117504119873047, "learning_rate": 9.921245421245423e-06, "loss": 41.8695, "step": 5085 }, { "epoch": 121.0955223880597, "grad_norm": 17.819120407104492, "learning_rate": 9.919413919413921e-06, "loss": 39.2762, "step": 5086 }, { "epoch": 121.11940298507463, "grad_norm": 24.556377410888672, "learning_rate": 9.917582417582419e-06, "loss": 41.5134, "step": 5087 }, { "epoch": 121.14328358208955, "grad_norm": 19.049671173095703, "learning_rate": 9.915750915750917e-06, "loss": 40.8369, "step": 5088 }, { "epoch": 121.16716417910447, "grad_norm": 20.745899200439453, "learning_rate": 9.913919413919415e-06, "loss": 41.4137, "step": 5089 }, { "epoch": 121.1910447761194, "grad_norm": 21.53566551208496, "learning_rate": 9.912087912087912e-06, "loss": 40.3688, "step": 5090 }, { "epoch": 121.21492537313434, "grad_norm": 23.52694320678711, "learning_rate": 9.910256410256412e-06, "loss": 41.1741, "step": 5091 }, { "epoch": 121.23880597014926, "grad_norm": 19.23663330078125, "learning_rate": 9.90842490842491e-06, "loss": 41.2629, "step": 5092 }, { "epoch": 121.26268656716418, "grad_norm": 20.38791847229004, "learning_rate": 9.906593406593408e-06, "loss": 40.6994, "step": 5093 }, { "epoch": 121.2865671641791, "grad_norm": 29.10164451599121, "learning_rate": 9.904761904761906e-06, "loss": 41.7159, "step": 5094 }, { "epoch": 121.31044776119403, "grad_norm": 18.191295623779297, "learning_rate": 9.902930402930403e-06, "loss": 40.0695, "step": 5095 }, { "epoch": 121.33432835820895, "grad_norm": 34.14667510986328, "learning_rate": 9.901098901098903e-06, "loss": 40.7836, "step": 5096 }, { "epoch": 121.35820895522389, "grad_norm": 25.464981079101562, "learning_rate": 9.899267399267401e-06, "loss": 40.5731, "step": 5097 }, { "epoch": 121.38208955223881, "grad_norm": 34.738773345947266, "learning_rate": 9.897435897435899e-06, "loss": 42.5079, "step": 5098 }, { "epoch": 121.40597014925373, "grad_norm": 24.047697067260742, "learning_rate": 9.895604395604397e-06, "loss": 41.9274, "step": 5099 }, { "epoch": 121.42985074626866, "grad_norm": 36.788326263427734, "learning_rate": 9.893772893772895e-06, "loss": 41.1378, "step": 5100 }, { "epoch": 121.45373134328358, "grad_norm": 26.662019729614258, "learning_rate": 9.891941391941392e-06, "loss": 41.4065, "step": 5101 }, { "epoch": 121.4776119402985, "grad_norm": 35.20701217651367, "learning_rate": 9.890109890109892e-06, "loss": 39.1299, "step": 5102 }, { "epoch": 121.50149253731344, "grad_norm": 29.675378799438477, "learning_rate": 9.88827838827839e-06, "loss": 41.0234, "step": 5103 }, { "epoch": 121.52537313432836, "grad_norm": 34.06852722167969, "learning_rate": 9.886446886446888e-06, "loss": 41.8632, "step": 5104 }, { "epoch": 121.54925373134328, "grad_norm": 25.621753692626953, "learning_rate": 9.884615384615386e-06, "loss": 40.9295, "step": 5105 }, { "epoch": 121.57313432835821, "grad_norm": 27.804433822631836, "learning_rate": 9.882783882783884e-06, "loss": 40.0458, "step": 5106 }, { "epoch": 121.59701492537313, "grad_norm": 26.332223892211914, "learning_rate": 9.880952380952381e-06, "loss": 39.7798, "step": 5107 }, { "epoch": 121.62089552238805, "grad_norm": 29.49053192138672, "learning_rate": 9.879120879120881e-06, "loss": 42.0289, "step": 5108 }, { "epoch": 121.64477611940299, "grad_norm": 24.052976608276367, "learning_rate": 9.877289377289379e-06, "loss": 40.5861, "step": 5109 }, { "epoch": 121.66865671641791, "grad_norm": 23.03173828125, "learning_rate": 9.875457875457877e-06, "loss": 40.9261, "step": 5110 }, { "epoch": 121.69253731343284, "grad_norm": 24.134889602661133, "learning_rate": 9.873626373626375e-06, "loss": 41.0466, "step": 5111 }, { "epoch": 121.71641791044776, "grad_norm": 19.443124771118164, "learning_rate": 9.871794871794872e-06, "loss": 40.4331, "step": 5112 }, { "epoch": 121.74029850746268, "grad_norm": 31.88178825378418, "learning_rate": 9.869963369963372e-06, "loss": 40.6991, "step": 5113 }, { "epoch": 121.7641791044776, "grad_norm": 21.850631713867188, "learning_rate": 9.86813186813187e-06, "loss": 41.4331, "step": 5114 }, { "epoch": 121.78805970149254, "grad_norm": 37.39925765991211, "learning_rate": 9.866300366300368e-06, "loss": 40.9437, "step": 5115 }, { "epoch": 121.81194029850747, "grad_norm": 31.58283042907715, "learning_rate": 9.864468864468866e-06, "loss": 41.0558, "step": 5116 }, { "epoch": 121.83582089552239, "grad_norm": 29.965499877929688, "learning_rate": 9.862637362637364e-06, "loss": 39.5632, "step": 5117 }, { "epoch": 121.85970149253731, "grad_norm": 25.50206756591797, "learning_rate": 9.860805860805861e-06, "loss": 41.287, "step": 5118 }, { "epoch": 121.88358208955223, "grad_norm": 34.806034088134766, "learning_rate": 9.858974358974361e-06, "loss": 41.0144, "step": 5119 }, { "epoch": 121.90746268656716, "grad_norm": 21.66145133972168, "learning_rate": 9.857142857142859e-06, "loss": 41.2587, "step": 5120 }, { "epoch": 121.9313432835821, "grad_norm": 37.883094787597656, "learning_rate": 9.855311355311357e-06, "loss": 40.7321, "step": 5121 }, { "epoch": 121.95522388059702, "grad_norm": 28.472124099731445, "learning_rate": 9.853479853479855e-06, "loss": 41.5554, "step": 5122 }, { "epoch": 121.97910447761194, "grad_norm": 35.33477783203125, "learning_rate": 9.851648351648352e-06, "loss": 42.0246, "step": 5123 }, { "epoch": 122.0, "grad_norm": 27.911645889282227, "learning_rate": 9.84981684981685e-06, "loss": 35.3824, "step": 5124 }, { "epoch": 122.02388059701492, "grad_norm": 33.792213439941406, "learning_rate": 9.84798534798535e-06, "loss": 40.2451, "step": 5125 }, { "epoch": 122.04776119402985, "grad_norm": 33.73054885864258, "learning_rate": 9.846153846153848e-06, "loss": 41.5777, "step": 5126 }, { "epoch": 122.07164179104478, "grad_norm": 29.55936622619629, "learning_rate": 9.844322344322346e-06, "loss": 40.5313, "step": 5127 }, { "epoch": 122.0955223880597, "grad_norm": 21.786413192749023, "learning_rate": 9.842490842490844e-06, "loss": 41.795, "step": 5128 }, { "epoch": 122.11940298507463, "grad_norm": 38.503475189208984, "learning_rate": 9.840659340659341e-06, "loss": 40.2868, "step": 5129 }, { "epoch": 122.14328358208955, "grad_norm": 27.126779556274414, "learning_rate": 9.83882783882784e-06, "loss": 40.5464, "step": 5130 }, { "epoch": 122.16716417910447, "grad_norm": 34.76428985595703, "learning_rate": 9.836996336996337e-06, "loss": 40.2589, "step": 5131 }, { "epoch": 122.1910447761194, "grad_norm": 37.37604522705078, "learning_rate": 9.835164835164835e-06, "loss": 40.8401, "step": 5132 }, { "epoch": 122.21492537313434, "grad_norm": 29.67528533935547, "learning_rate": 9.833333333333333e-06, "loss": 40.9921, "step": 5133 }, { "epoch": 122.23880597014926, "grad_norm": 27.43715476989746, "learning_rate": 9.831501831501832e-06, "loss": 40.4038, "step": 5134 }, { "epoch": 122.26268656716418, "grad_norm": 30.960216522216797, "learning_rate": 9.82967032967033e-06, "loss": 39.8886, "step": 5135 }, { "epoch": 122.2865671641791, "grad_norm": 27.186513900756836, "learning_rate": 9.827838827838828e-06, "loss": 42.1122, "step": 5136 }, { "epoch": 122.31044776119403, "grad_norm": 32.01823806762695, "learning_rate": 9.826007326007326e-06, "loss": 40.7854, "step": 5137 }, { "epoch": 122.33432835820895, "grad_norm": 26.988773345947266, "learning_rate": 9.824175824175824e-06, "loss": 40.5902, "step": 5138 }, { "epoch": 122.35820895522389, "grad_norm": 29.70166778564453, "learning_rate": 9.822344322344322e-06, "loss": 41.4538, "step": 5139 }, { "epoch": 122.38208955223881, "grad_norm": 25.9971981048584, "learning_rate": 9.820512820512821e-06, "loss": 39.6575, "step": 5140 }, { "epoch": 122.40597014925373, "grad_norm": 33.1441535949707, "learning_rate": 9.81868131868132e-06, "loss": 40.0902, "step": 5141 }, { "epoch": 122.42985074626866, "grad_norm": 27.196630477905273, "learning_rate": 9.816849816849817e-06, "loss": 40.0376, "step": 5142 }, { "epoch": 122.45373134328358, "grad_norm": 34.561798095703125, "learning_rate": 9.815018315018315e-06, "loss": 41.6209, "step": 5143 }, { "epoch": 122.4776119402985, "grad_norm": 33.98078155517578, "learning_rate": 9.813186813186813e-06, "loss": 40.8931, "step": 5144 }, { "epoch": 122.50149253731344, "grad_norm": 29.115427017211914, "learning_rate": 9.811355311355313e-06, "loss": 41.4718, "step": 5145 }, { "epoch": 122.52537313432836, "grad_norm": 24.698219299316406, "learning_rate": 9.80952380952381e-06, "loss": 40.2337, "step": 5146 }, { "epoch": 122.54925373134328, "grad_norm": 32.09329605102539, "learning_rate": 9.807692307692308e-06, "loss": 40.1893, "step": 5147 }, { "epoch": 122.57313432835821, "grad_norm": 28.50708770751953, "learning_rate": 9.805860805860806e-06, "loss": 41.2457, "step": 5148 }, { "epoch": 122.59701492537313, "grad_norm": 34.65631103515625, "learning_rate": 9.804029304029304e-06, "loss": 40.8311, "step": 5149 }, { "epoch": 122.62089552238805, "grad_norm": 27.82625961303711, "learning_rate": 9.802197802197802e-06, "loss": 40.3574, "step": 5150 }, { "epoch": 122.64477611940299, "grad_norm": 31.24656105041504, "learning_rate": 9.800366300366301e-06, "loss": 40.999, "step": 5151 }, { "epoch": 122.66865671641791, "grad_norm": 26.075342178344727, "learning_rate": 9.7985347985348e-06, "loss": 41.6763, "step": 5152 }, { "epoch": 122.69253731343284, "grad_norm": 28.61420440673828, "learning_rate": 9.796703296703297e-06, "loss": 41.1096, "step": 5153 }, { "epoch": 122.71641791044776, "grad_norm": 24.201374053955078, "learning_rate": 9.794871794871795e-06, "loss": 41.7294, "step": 5154 }, { "epoch": 122.74029850746268, "grad_norm": 33.25908660888672, "learning_rate": 9.793040293040293e-06, "loss": 41.0633, "step": 5155 }, { "epoch": 122.7641791044776, "grad_norm": 28.24220848083496, "learning_rate": 9.79120879120879e-06, "loss": 42.0281, "step": 5156 }, { "epoch": 122.78805970149254, "grad_norm": 34.96881103515625, "learning_rate": 9.78937728937729e-06, "loss": 40.648, "step": 5157 }, { "epoch": 122.81194029850747, "grad_norm": 29.03910255432129, "learning_rate": 9.787545787545788e-06, "loss": 41.1215, "step": 5158 }, { "epoch": 122.83582089552239, "grad_norm": 30.120044708251953, "learning_rate": 9.785714285714286e-06, "loss": 41.7353, "step": 5159 }, { "epoch": 122.85970149253731, "grad_norm": 30.23310661315918, "learning_rate": 9.783882783882784e-06, "loss": 40.7885, "step": 5160 }, { "epoch": 122.88358208955223, "grad_norm": 29.74199104309082, "learning_rate": 9.782051282051282e-06, "loss": 41.4646, "step": 5161 }, { "epoch": 122.90746268656716, "grad_norm": 27.558090209960938, "learning_rate": 9.780219780219781e-06, "loss": 41.0687, "step": 5162 }, { "epoch": 122.9313432835821, "grad_norm": 29.82993507385254, "learning_rate": 9.77838827838828e-06, "loss": 41.5666, "step": 5163 }, { "epoch": 122.95522388059702, "grad_norm": 24.96250343322754, "learning_rate": 9.776556776556777e-06, "loss": 41.1099, "step": 5164 }, { "epoch": 122.97910447761194, "grad_norm": 34.85405731201172, "learning_rate": 9.774725274725275e-06, "loss": 38.6541, "step": 5165 }, { "epoch": 123.0, "grad_norm": 28.839818954467773, "learning_rate": 9.772893772893773e-06, "loss": 35.9493, "step": 5166 }, { "epoch": 123.02388059701492, "grad_norm": 32.29933547973633, "learning_rate": 9.771062271062271e-06, "loss": 40.9199, "step": 5167 }, { "epoch": 123.04776119402985, "grad_norm": 26.617511749267578, "learning_rate": 9.76923076923077e-06, "loss": 39.6813, "step": 5168 }, { "epoch": 123.07164179104478, "grad_norm": 29.118209838867188, "learning_rate": 9.767399267399268e-06, "loss": 40.971, "step": 5169 }, { "epoch": 123.0955223880597, "grad_norm": 26.295345306396484, "learning_rate": 9.765567765567766e-06, "loss": 40.8982, "step": 5170 }, { "epoch": 123.11940298507463, "grad_norm": 33.3271369934082, "learning_rate": 9.763736263736264e-06, "loss": 41.9198, "step": 5171 }, { "epoch": 123.14328358208955, "grad_norm": 25.857398986816406, "learning_rate": 9.761904761904762e-06, "loss": 39.5357, "step": 5172 }, { "epoch": 123.16716417910447, "grad_norm": 32.97218704223633, "learning_rate": 9.76007326007326e-06, "loss": 41.1038, "step": 5173 }, { "epoch": 123.1910447761194, "grad_norm": 28.88793182373047, "learning_rate": 9.75824175824176e-06, "loss": 40.8065, "step": 5174 }, { "epoch": 123.21492537313434, "grad_norm": 24.024185180664062, "learning_rate": 9.756410256410257e-06, "loss": 39.8969, "step": 5175 }, { "epoch": 123.23880597014926, "grad_norm": 23.380300521850586, "learning_rate": 9.754578754578755e-06, "loss": 40.962, "step": 5176 }, { "epoch": 123.26268656716418, "grad_norm": 28.82596778869629, "learning_rate": 9.752747252747253e-06, "loss": 40.4959, "step": 5177 }, { "epoch": 123.2865671641791, "grad_norm": 19.895410537719727, "learning_rate": 9.750915750915751e-06, "loss": 41.0015, "step": 5178 }, { "epoch": 123.31044776119403, "grad_norm": 28.44173812866211, "learning_rate": 9.74908424908425e-06, "loss": 40.7281, "step": 5179 }, { "epoch": 123.33432835820895, "grad_norm": 22.277742385864258, "learning_rate": 9.747252747252748e-06, "loss": 40.0391, "step": 5180 }, { "epoch": 123.35820895522389, "grad_norm": 27.770545959472656, "learning_rate": 9.745421245421246e-06, "loss": 41.0649, "step": 5181 }, { "epoch": 123.38208955223881, "grad_norm": 22.383668899536133, "learning_rate": 9.743589743589744e-06, "loss": 40.735, "step": 5182 }, { "epoch": 123.40597014925373, "grad_norm": 31.16164779663086, "learning_rate": 9.741758241758242e-06, "loss": 41.1004, "step": 5183 }, { "epoch": 123.42985074626866, "grad_norm": 25.458309173583984, "learning_rate": 9.73992673992674e-06, "loss": 40.4399, "step": 5184 }, { "epoch": 123.45373134328358, "grad_norm": 37.73893356323242, "learning_rate": 9.73809523809524e-06, "loss": 42.08, "step": 5185 }, { "epoch": 123.4776119402985, "grad_norm": 28.723541259765625, "learning_rate": 9.736263736263737e-06, "loss": 40.479, "step": 5186 }, { "epoch": 123.50149253731344, "grad_norm": 30.29216194152832, "learning_rate": 9.734432234432235e-06, "loss": 40.76, "step": 5187 }, { "epoch": 123.52537313432836, "grad_norm": 25.559480667114258, "learning_rate": 9.732600732600733e-06, "loss": 39.7645, "step": 5188 }, { "epoch": 123.54925373134328, "grad_norm": 30.328344345092773, "learning_rate": 9.730769230769231e-06, "loss": 42.2182, "step": 5189 }, { "epoch": 123.57313432835821, "grad_norm": 24.075218200683594, "learning_rate": 9.728937728937729e-06, "loss": 39.7574, "step": 5190 }, { "epoch": 123.59701492537313, "grad_norm": 29.823719024658203, "learning_rate": 9.727106227106228e-06, "loss": 41.1253, "step": 5191 }, { "epoch": 123.62089552238805, "grad_norm": 21.241701126098633, "learning_rate": 9.725274725274726e-06, "loss": 41.0588, "step": 5192 }, { "epoch": 123.64477611940299, "grad_norm": 34.10343933105469, "learning_rate": 9.723443223443224e-06, "loss": 40.7287, "step": 5193 }, { "epoch": 123.66865671641791, "grad_norm": 24.037466049194336, "learning_rate": 9.721611721611722e-06, "loss": 41.1033, "step": 5194 }, { "epoch": 123.69253731343284, "grad_norm": 26.837879180908203, "learning_rate": 9.71978021978022e-06, "loss": 41.1869, "step": 5195 }, { "epoch": 123.71641791044776, "grad_norm": 22.90353012084961, "learning_rate": 9.71794871794872e-06, "loss": 41.4571, "step": 5196 }, { "epoch": 123.74029850746268, "grad_norm": 31.232582092285156, "learning_rate": 9.716117216117217e-06, "loss": 40.6233, "step": 5197 }, { "epoch": 123.7641791044776, "grad_norm": 24.480405807495117, "learning_rate": 9.714285714285715e-06, "loss": 40.8121, "step": 5198 }, { "epoch": 123.78805970149254, "grad_norm": 33.86972427368164, "learning_rate": 9.712454212454213e-06, "loss": 41.596, "step": 5199 }, { "epoch": 123.81194029850747, "grad_norm": 23.727428436279297, "learning_rate": 9.710622710622711e-06, "loss": 41.1944, "step": 5200 }, { "epoch": 123.83582089552239, "grad_norm": 32.29154586791992, "learning_rate": 9.708791208791209e-06, "loss": 40.361, "step": 5201 }, { "epoch": 123.85970149253731, "grad_norm": 22.611989974975586, "learning_rate": 9.706959706959708e-06, "loss": 39.6001, "step": 5202 }, { "epoch": 123.88358208955223, "grad_norm": 33.92005157470703, "learning_rate": 9.705128205128206e-06, "loss": 42.1112, "step": 5203 }, { "epoch": 123.90746268656716, "grad_norm": 28.993995666503906, "learning_rate": 9.703296703296704e-06, "loss": 41.436, "step": 5204 }, { "epoch": 123.9313432835821, "grad_norm": 27.87895393371582, "learning_rate": 9.701465201465202e-06, "loss": 40.7799, "step": 5205 }, { "epoch": 123.95522388059702, "grad_norm": 29.898271560668945, "learning_rate": 9.6996336996337e-06, "loss": 41.2078, "step": 5206 }, { "epoch": 123.97910447761194, "grad_norm": 24.88825798034668, "learning_rate": 9.697802197802198e-06, "loss": 40.1142, "step": 5207 }, { "epoch": 124.0, "grad_norm": 20.064050674438477, "learning_rate": 9.695970695970697e-06, "loss": 35.74, "step": 5208 }, { "epoch": 124.02388059701492, "grad_norm": 30.132843017578125, "learning_rate": 9.694139194139195e-06, "loss": 41.1361, "step": 5209 }, { "epoch": 124.04776119402985, "grad_norm": 21.139568328857422, "learning_rate": 9.692307692307693e-06, "loss": 41.4147, "step": 5210 }, { "epoch": 124.07164179104478, "grad_norm": 29.36510467529297, "learning_rate": 9.690476190476191e-06, "loss": 40.6358, "step": 5211 }, { "epoch": 124.0955223880597, "grad_norm": 27.090465545654297, "learning_rate": 9.688644688644689e-06, "loss": 41.6791, "step": 5212 }, { "epoch": 124.11940298507463, "grad_norm": 24.170644760131836, "learning_rate": 9.686813186813188e-06, "loss": 40.7704, "step": 5213 }, { "epoch": 124.14328358208955, "grad_norm": 26.26068115234375, "learning_rate": 9.684981684981686e-06, "loss": 39.3922, "step": 5214 }, { "epoch": 124.16716417910447, "grad_norm": 22.155975341796875, "learning_rate": 9.683150183150184e-06, "loss": 40.9681, "step": 5215 }, { "epoch": 124.1910447761194, "grad_norm": 25.21603012084961, "learning_rate": 9.681318681318682e-06, "loss": 42.1241, "step": 5216 }, { "epoch": 124.21492537313434, "grad_norm": 16.18509292602539, "learning_rate": 9.67948717948718e-06, "loss": 40.9757, "step": 5217 }, { "epoch": 124.23880597014926, "grad_norm": 21.46571159362793, "learning_rate": 9.677655677655678e-06, "loss": 41.6829, "step": 5218 }, { "epoch": 124.26268656716418, "grad_norm": 18.195297241210938, "learning_rate": 9.675824175824177e-06, "loss": 40.194, "step": 5219 }, { "epoch": 124.2865671641791, "grad_norm": 18.00617790222168, "learning_rate": 9.673992673992675e-06, "loss": 39.3802, "step": 5220 }, { "epoch": 124.31044776119403, "grad_norm": 18.236934661865234, "learning_rate": 9.672161172161173e-06, "loss": 41.0138, "step": 5221 }, { "epoch": 124.33432835820895, "grad_norm": 16.526309967041016, "learning_rate": 9.670329670329671e-06, "loss": 40.2031, "step": 5222 }, { "epoch": 124.35820895522389, "grad_norm": 20.008708953857422, "learning_rate": 9.668498168498169e-06, "loss": 40.9772, "step": 5223 }, { "epoch": 124.38208955223881, "grad_norm": 14.738056182861328, "learning_rate": 9.666666666666667e-06, "loss": 40.5985, "step": 5224 }, { "epoch": 124.40597014925373, "grad_norm": 19.540645599365234, "learning_rate": 9.664835164835166e-06, "loss": 41.1823, "step": 5225 }, { "epoch": 124.42985074626866, "grad_norm": 17.26000213623047, "learning_rate": 9.663003663003664e-06, "loss": 40.2975, "step": 5226 }, { "epoch": 124.45373134328358, "grad_norm": 19.984989166259766, "learning_rate": 9.661172161172162e-06, "loss": 40.6366, "step": 5227 }, { "epoch": 124.4776119402985, "grad_norm": 24.717369079589844, "learning_rate": 9.65934065934066e-06, "loss": 40.665, "step": 5228 }, { "epoch": 124.50149253731344, "grad_norm": 16.406538009643555, "learning_rate": 9.657509157509158e-06, "loss": 40.4751, "step": 5229 }, { "epoch": 124.52537313432836, "grad_norm": 23.191200256347656, "learning_rate": 9.655677655677657e-06, "loss": 40.6781, "step": 5230 }, { "epoch": 124.54925373134328, "grad_norm": 18.91063690185547, "learning_rate": 9.653846153846155e-06, "loss": 40.9245, "step": 5231 }, { "epoch": 124.57313432835821, "grad_norm": 23.012889862060547, "learning_rate": 9.652014652014653e-06, "loss": 41.7688, "step": 5232 }, { "epoch": 124.59701492537313, "grad_norm": 20.35813331604004, "learning_rate": 9.650183150183151e-06, "loss": 40.9169, "step": 5233 }, { "epoch": 124.62089552238805, "grad_norm": 22.06452751159668, "learning_rate": 9.648351648351649e-06, "loss": 41.0061, "step": 5234 }, { "epoch": 124.64477611940299, "grad_norm": 23.17784309387207, "learning_rate": 9.646520146520147e-06, "loss": 40.7811, "step": 5235 }, { "epoch": 124.66865671641791, "grad_norm": 19.43151092529297, "learning_rate": 9.644688644688646e-06, "loss": 40.3725, "step": 5236 }, { "epoch": 124.69253731343284, "grad_norm": 23.144960403442383, "learning_rate": 9.642857142857144e-06, "loss": 42.382, "step": 5237 }, { "epoch": 124.71641791044776, "grad_norm": 18.223936080932617, "learning_rate": 9.641025641025642e-06, "loss": 40.3686, "step": 5238 }, { "epoch": 124.74029850746268, "grad_norm": 21.855030059814453, "learning_rate": 9.63919413919414e-06, "loss": 41.8939, "step": 5239 }, { "epoch": 124.7641791044776, "grad_norm": 21.252012252807617, "learning_rate": 9.637362637362638e-06, "loss": 40.5954, "step": 5240 }, { "epoch": 124.78805970149254, "grad_norm": 20.320215225219727, "learning_rate": 9.635531135531136e-06, "loss": 40.2941, "step": 5241 }, { "epoch": 124.81194029850747, "grad_norm": 20.874921798706055, "learning_rate": 9.633699633699635e-06, "loss": 39.8523, "step": 5242 }, { "epoch": 124.83582089552239, "grad_norm": 20.560138702392578, "learning_rate": 9.631868131868133e-06, "loss": 40.1141, "step": 5243 }, { "epoch": 124.85970149253731, "grad_norm": 16.412206649780273, "learning_rate": 9.630036630036631e-06, "loss": 41.1038, "step": 5244 }, { "epoch": 124.88358208955223, "grad_norm": 20.39592170715332, "learning_rate": 9.628205128205129e-06, "loss": 39.8884, "step": 5245 }, { "epoch": 124.90746268656716, "grad_norm": 16.07599639892578, "learning_rate": 9.626373626373627e-06, "loss": 40.7929, "step": 5246 }, { "epoch": 124.9313432835821, "grad_norm": 14.183424949645996, "learning_rate": 9.624542124542126e-06, "loss": 41.5261, "step": 5247 }, { "epoch": 124.95522388059702, "grad_norm": 17.80473518371582, "learning_rate": 9.622710622710624e-06, "loss": 39.6694, "step": 5248 }, { "epoch": 124.97910447761194, "grad_norm": 16.59119987487793, "learning_rate": 9.620879120879122e-06, "loss": 40.9024, "step": 5249 }, { "epoch": 125.0, "grad_norm": 15.37125301361084, "learning_rate": 9.61904761904762e-06, "loss": 35.3595, "step": 5250 }, { "epoch": 125.02388059701492, "grad_norm": 18.345430374145508, "learning_rate": 9.617216117216118e-06, "loss": 40.2401, "step": 5251 }, { "epoch": 125.04776119402985, "grad_norm": 17.491918563842773, "learning_rate": 9.615384615384616e-06, "loss": 39.8787, "step": 5252 }, { "epoch": 125.07164179104478, "grad_norm": 16.483713150024414, "learning_rate": 9.613553113553115e-06, "loss": 41.3826, "step": 5253 }, { "epoch": 125.0955223880597, "grad_norm": 15.222822189331055, "learning_rate": 9.611721611721613e-06, "loss": 41.1321, "step": 5254 }, { "epoch": 125.11940298507463, "grad_norm": 16.675804138183594, "learning_rate": 9.609890109890111e-06, "loss": 41.0334, "step": 5255 }, { "epoch": 125.14328358208955, "grad_norm": 17.025266647338867, "learning_rate": 9.608058608058609e-06, "loss": 40.6213, "step": 5256 }, { "epoch": 125.16716417910447, "grad_norm": 15.499921798706055, "learning_rate": 9.606227106227107e-06, "loss": 39.8817, "step": 5257 }, { "epoch": 125.1910447761194, "grad_norm": 14.926107406616211, "learning_rate": 9.604395604395605e-06, "loss": 42.1196, "step": 5258 }, { "epoch": 125.21492537313434, "grad_norm": 17.896583557128906, "learning_rate": 9.602564102564104e-06, "loss": 40.1941, "step": 5259 }, { "epoch": 125.23880597014926, "grad_norm": 21.413713455200195, "learning_rate": 9.600732600732602e-06, "loss": 39.9496, "step": 5260 }, { "epoch": 125.26268656716418, "grad_norm": 20.262035369873047, "learning_rate": 9.5989010989011e-06, "loss": 40.8554, "step": 5261 }, { "epoch": 125.2865671641791, "grad_norm": 17.94382095336914, "learning_rate": 9.597069597069598e-06, "loss": 40.618, "step": 5262 }, { "epoch": 125.31044776119403, "grad_norm": 14.720929145812988, "learning_rate": 9.595238095238096e-06, "loss": 41.6634, "step": 5263 }, { "epoch": 125.33432835820895, "grad_norm": 21.567907333374023, "learning_rate": 9.593406593406595e-06, "loss": 41.6142, "step": 5264 }, { "epoch": 125.35820895522389, "grad_norm": 23.717586517333984, "learning_rate": 9.591575091575093e-06, "loss": 41.458, "step": 5265 }, { "epoch": 125.38208955223881, "grad_norm": 13.948038101196289, "learning_rate": 9.589743589743591e-06, "loss": 41.2909, "step": 5266 }, { "epoch": 125.40597014925373, "grad_norm": 27.392465591430664, "learning_rate": 9.587912087912089e-06, "loss": 41.3308, "step": 5267 }, { "epoch": 125.42985074626866, "grad_norm": 20.557374954223633, "learning_rate": 9.586080586080587e-06, "loss": 41.7717, "step": 5268 }, { "epoch": 125.45373134328358, "grad_norm": 21.076601028442383, "learning_rate": 9.584249084249085e-06, "loss": 39.8928, "step": 5269 }, { "epoch": 125.4776119402985, "grad_norm": 22.425079345703125, "learning_rate": 9.582417582417584e-06, "loss": 41.5856, "step": 5270 }, { "epoch": 125.50149253731344, "grad_norm": 19.16175079345703, "learning_rate": 9.580586080586082e-06, "loss": 40.7785, "step": 5271 }, { "epoch": 125.52537313432836, "grad_norm": 18.763565063476562, "learning_rate": 9.57875457875458e-06, "loss": 39.8277, "step": 5272 }, { "epoch": 125.54925373134328, "grad_norm": 18.3720645904541, "learning_rate": 9.576923076923078e-06, "loss": 40.9783, "step": 5273 }, { "epoch": 125.57313432835821, "grad_norm": 18.62623405456543, "learning_rate": 9.575091575091576e-06, "loss": 40.5206, "step": 5274 }, { "epoch": 125.59701492537313, "grad_norm": 20.078596115112305, "learning_rate": 9.573260073260074e-06, "loss": 40.2231, "step": 5275 }, { "epoch": 125.62089552238805, "grad_norm": 28.77025032043457, "learning_rate": 9.571428571428573e-06, "loss": 40.4628, "step": 5276 }, { "epoch": 125.64477611940299, "grad_norm": 17.48457145690918, "learning_rate": 9.569597069597071e-06, "loss": 38.3776, "step": 5277 }, { "epoch": 125.66865671641791, "grad_norm": 29.077014923095703, "learning_rate": 9.567765567765569e-06, "loss": 40.7467, "step": 5278 }, { "epoch": 125.69253731343284, "grad_norm": 22.95465660095215, "learning_rate": 9.565934065934067e-06, "loss": 40.831, "step": 5279 }, { "epoch": 125.71641791044776, "grad_norm": 26.317485809326172, "learning_rate": 9.564102564102565e-06, "loss": 40.4036, "step": 5280 }, { "epoch": 125.74029850746268, "grad_norm": 20.771020889282227, "learning_rate": 9.562271062271064e-06, "loss": 40.7238, "step": 5281 }, { "epoch": 125.7641791044776, "grad_norm": 21.856155395507812, "learning_rate": 9.560439560439562e-06, "loss": 40.1953, "step": 5282 }, { "epoch": 125.78805970149254, "grad_norm": 24.748037338256836, "learning_rate": 9.55860805860806e-06, "loss": 39.5605, "step": 5283 }, { "epoch": 125.81194029850747, "grad_norm": NaN, "learning_rate": 9.556776556776558e-06, "loss": 60.4649, "step": 5284 }, { "epoch": 125.83582089552239, "grad_norm": 18.565261840820312, "learning_rate": 9.556776556776558e-06, "loss": 41.9694, "step": 5285 }, { "epoch": 125.85970149253731, "grad_norm": 29.6701717376709, "learning_rate": 9.554945054945056e-06, "loss": 41.4843, "step": 5286 }, { "epoch": 125.88358208955223, "grad_norm": 20.192317962646484, "learning_rate": 9.553113553113554e-06, "loss": 39.8961, "step": 5287 }, { "epoch": 125.90746268656716, "grad_norm": 25.22960662841797, "learning_rate": 9.551282051282053e-06, "loss": 39.7754, "step": 5288 }, { "epoch": 125.9313432835821, "grad_norm": 19.892139434814453, "learning_rate": 9.549450549450551e-06, "loss": 40.257, "step": 5289 }, { "epoch": 125.95522388059702, "grad_norm": 18.426124572753906, "learning_rate": 9.547619047619049e-06, "loss": 41.252, "step": 5290 }, { "epoch": 125.97910447761194, "grad_norm": 24.085840225219727, "learning_rate": 9.545787545787547e-06, "loss": 41.3266, "step": 5291 }, { "epoch": 126.0, "grad_norm": 14.462137222290039, "learning_rate": 9.543956043956045e-06, "loss": 36.235, "step": 5292 }, { "epoch": 126.02388059701492, "grad_norm": 21.527910232543945, "learning_rate": 9.542124542124543e-06, "loss": 41.1059, "step": 5293 }, { "epoch": 126.04776119402985, "grad_norm": 19.539413452148438, "learning_rate": 9.540293040293042e-06, "loss": 41.8102, "step": 5294 }, { "epoch": 126.07164179104478, "grad_norm": 16.535566329956055, "learning_rate": 9.53846153846154e-06, "loss": 40.4373, "step": 5295 }, { "epoch": 126.0955223880597, "grad_norm": 30.60129737854004, "learning_rate": 9.536630036630038e-06, "loss": 40.3107, "step": 5296 }, { "epoch": 126.11940298507463, "grad_norm": 19.504737854003906, "learning_rate": 9.534798534798536e-06, "loss": 39.7933, "step": 5297 }, { "epoch": 126.14328358208955, "grad_norm": 40.68082809448242, "learning_rate": 9.532967032967034e-06, "loss": 40.4788, "step": 5298 }, { "epoch": 126.16716417910447, "grad_norm": 29.288623809814453, "learning_rate": 9.531135531135532e-06, "loss": 40.3154, "step": 5299 }, { "epoch": 126.1910447761194, "grad_norm": 39.86507797241211, "learning_rate": 9.52930402930403e-06, "loss": 40.9565, "step": 5300 }, { "epoch": 126.21492537313434, "grad_norm": 37.94214630126953, "learning_rate": 9.527472527472527e-06, "loss": 39.7678, "step": 5301 }, { "epoch": 126.23880597014926, "grad_norm": 30.128881454467773, "learning_rate": 9.525641025641025e-06, "loss": 40.9812, "step": 5302 }, { "epoch": 126.26268656716418, "grad_norm": 32.11579895019531, "learning_rate": 9.523809523809525e-06, "loss": 40.7709, "step": 5303 }, { "epoch": 126.2865671641791, "grad_norm": 30.610383987426758, "learning_rate": 9.521978021978023e-06, "loss": 39.3623, "step": 5304 }, { "epoch": 126.31044776119403, "grad_norm": 25.186908721923828, "learning_rate": 9.52014652014652e-06, "loss": 40.5833, "step": 5305 }, { "epoch": 126.33432835820895, "grad_norm": 35.33464050292969, "learning_rate": 9.518315018315018e-06, "loss": 40.4571, "step": 5306 }, { "epoch": 126.35820895522389, "grad_norm": 30.900115966796875, "learning_rate": 9.516483516483516e-06, "loss": 41.5089, "step": 5307 }, { "epoch": 126.38208955223881, "grad_norm": 30.170385360717773, "learning_rate": 9.514652014652014e-06, "loss": 40.4776, "step": 5308 }, { "epoch": 126.40597014925373, "grad_norm": 25.576396942138672, "learning_rate": 9.512820512820514e-06, "loss": 40.5452, "step": 5309 }, { "epoch": 126.42985074626866, "grad_norm": 31.52381706237793, "learning_rate": 9.510989010989012e-06, "loss": 41.0569, "step": 5310 }, { "epoch": 126.45373134328358, "grad_norm": 28.613876342773438, "learning_rate": 9.50915750915751e-06, "loss": 40.4693, "step": 5311 }, { "epoch": 126.4776119402985, "grad_norm": 34.052391052246094, "learning_rate": 9.507326007326007e-06, "loss": 39.9473, "step": 5312 }, { "epoch": 126.50149253731344, "grad_norm": 28.65314292907715, "learning_rate": 9.505494505494505e-06, "loss": 39.012, "step": 5313 }, { "epoch": 126.52537313432836, "grad_norm": 28.400449752807617, "learning_rate": 9.503663003663005e-06, "loss": 40.3811, "step": 5314 }, { "epoch": 126.54925373134328, "grad_norm": 26.874284744262695, "learning_rate": 9.501831501831503e-06, "loss": 39.8546, "step": 5315 }, { "epoch": 126.57313432835821, "grad_norm": 29.638126373291016, "learning_rate": 9.5e-06, "loss": 41.6115, "step": 5316 }, { "epoch": 126.59701492537313, "grad_norm": 27.8295841217041, "learning_rate": 9.498168498168498e-06, "loss": 40.7197, "step": 5317 }, { "epoch": 126.62089552238805, "grad_norm": 34.33130645751953, "learning_rate": 9.496336996336996e-06, "loss": 41.2572, "step": 5318 }, { "epoch": 126.64477611940299, "grad_norm": 28.334978103637695, "learning_rate": 9.494505494505494e-06, "loss": 41.4411, "step": 5319 }, { "epoch": 126.66865671641791, "grad_norm": 28.492050170898438, "learning_rate": 9.492673992673994e-06, "loss": 39.6498, "step": 5320 }, { "epoch": 126.69253731343284, "grad_norm": 26.12206268310547, "learning_rate": 9.490842490842492e-06, "loss": 41.3662, "step": 5321 }, { "epoch": 126.71641791044776, "grad_norm": 31.722883224487305, "learning_rate": 9.48901098901099e-06, "loss": 41.3887, "step": 5322 }, { "epoch": 126.74029850746268, "grad_norm": 26.18402862548828, "learning_rate": 9.487179487179487e-06, "loss": 41.5819, "step": 5323 }, { "epoch": 126.7641791044776, "grad_norm": 29.256437301635742, "learning_rate": 9.485347985347985e-06, "loss": 41.0093, "step": 5324 }, { "epoch": 126.78805970149254, "grad_norm": 26.79650115966797, "learning_rate": 9.483516483516483e-06, "loss": 39.9968, "step": 5325 }, { "epoch": 126.81194029850747, "grad_norm": 27.056190490722656, "learning_rate": 9.481684981684983e-06, "loss": 39.1216, "step": 5326 }, { "epoch": 126.83582089552239, "grad_norm": 22.335859298706055, "learning_rate": 9.47985347985348e-06, "loss": 40.7778, "step": 5327 }, { "epoch": 126.85970149253731, "grad_norm": NaN, "learning_rate": 9.478021978021978e-06, "loss": 58.1092, "step": 5328 }, { "epoch": 126.88358208955223, "grad_norm": 24.419767379760742, "learning_rate": 9.478021978021978e-06, "loss": 40.4599, "step": 5329 }, { "epoch": 126.90746268656716, "grad_norm": 20.041467666625977, "learning_rate": 9.476190476190476e-06, "loss": 40.1727, "step": 5330 }, { "epoch": 126.9313432835821, "grad_norm": 26.40553855895996, "learning_rate": 9.474358974358974e-06, "loss": 41.0726, "step": 5331 }, { "epoch": 126.95522388059702, "grad_norm": 24.665653228759766, "learning_rate": 9.472527472527474e-06, "loss": 42.0728, "step": 5332 }, { "epoch": 126.97910447761194, "grad_norm": 21.605026245117188, "learning_rate": 9.470695970695972e-06, "loss": 40.6338, "step": 5333 }, { "epoch": 127.0, "grad_norm": 20.224733352661133, "learning_rate": 9.46886446886447e-06, "loss": 35.2993, "step": 5334 }, { "epoch": 127.02388059701492, "grad_norm": 21.891176223754883, "learning_rate": 9.467032967032967e-06, "loss": 40.7857, "step": 5335 }, { "epoch": 127.04776119402985, "grad_norm": 24.02487564086914, "learning_rate": 9.465201465201465e-06, "loss": 39.9996, "step": 5336 }, { "epoch": 127.07164179104478, "grad_norm": 26.67331314086914, "learning_rate": 9.463369963369963e-06, "loss": 41.6034, "step": 5337 }, { "epoch": 127.0955223880597, "grad_norm": 18.6497802734375, "learning_rate": 9.461538461538463e-06, "loss": 42.0516, "step": 5338 }, { "epoch": 127.11940298507463, "grad_norm": 31.833471298217773, "learning_rate": 9.45970695970696e-06, "loss": 40.597, "step": 5339 }, { "epoch": 127.14328358208955, "grad_norm": 27.305522918701172, "learning_rate": 9.457875457875458e-06, "loss": 40.7429, "step": 5340 }, { "epoch": 127.16716417910447, "grad_norm": 29.530677795410156, "learning_rate": 9.456043956043956e-06, "loss": 40.4873, "step": 5341 }, { "epoch": 127.1910447761194, "grad_norm": 29.030101776123047, "learning_rate": 9.454212454212454e-06, "loss": 39.0437, "step": 5342 }, { "epoch": 127.21492537313434, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 60.0619, "step": 5343 }, { "epoch": 127.23880597014926, "grad_norm": 31.528074264526367, "learning_rate": 9.452380952380952e-06, "loss": 40.4362, "step": 5344 }, { "epoch": 127.26268656716418, "grad_norm": 28.54173469543457, "learning_rate": 9.450549450549452e-06, "loss": 41.0408, "step": 5345 }, { "epoch": 127.2865671641791, "grad_norm": 33.24728775024414, "learning_rate": 9.44871794871795e-06, "loss": 40.6743, "step": 5346 }, { "epoch": 127.31044776119403, "grad_norm": 29.59555435180664, "learning_rate": 9.446886446886447e-06, "loss": 40.3809, "step": 5347 }, { "epoch": 127.33432835820895, "grad_norm": 32.15523147583008, "learning_rate": 9.445054945054945e-06, "loss": 40.6554, "step": 5348 }, { "epoch": 127.35820895522389, "grad_norm": 31.1282901763916, "learning_rate": 9.443223443223443e-06, "loss": 39.8469, "step": 5349 }, { "epoch": 127.38208955223881, "grad_norm": 30.29323959350586, "learning_rate": 9.441391941391943e-06, "loss": 40.155, "step": 5350 }, { "epoch": 127.40597014925373, "grad_norm": 28.87554931640625, "learning_rate": 9.43956043956044e-06, "loss": 39.2159, "step": 5351 }, { "epoch": 127.42985074626866, "grad_norm": 28.77309226989746, "learning_rate": 9.437728937728938e-06, "loss": 40.7679, "step": 5352 }, { "epoch": 127.45373134328358, "grad_norm": 24.797941207885742, "learning_rate": 9.435897435897436e-06, "loss": 40.2341, "step": 5353 }, { "epoch": 127.4776119402985, "grad_norm": 30.226261138916016, "learning_rate": 9.434065934065934e-06, "loss": 41.5231, "step": 5354 }, { "epoch": 127.50149253731344, "grad_norm": 23.113122940063477, "learning_rate": 9.432234432234432e-06, "loss": 40.5916, "step": 5355 }, { "epoch": 127.52537313432836, "grad_norm": 36.03447341918945, "learning_rate": 9.430402930402932e-06, "loss": 40.8739, "step": 5356 }, { "epoch": 127.54925373134328, "grad_norm": 32.825225830078125, "learning_rate": 9.42857142857143e-06, "loss": 40.2842, "step": 5357 }, { "epoch": 127.57313432835821, "grad_norm": 26.586402893066406, "learning_rate": 9.426739926739927e-06, "loss": 40.3329, "step": 5358 }, { "epoch": 127.59701492537313, "grad_norm": 25.394254684448242, "learning_rate": 9.424908424908425e-06, "loss": 41.4049, "step": 5359 }, { "epoch": 127.62089552238805, "grad_norm": 28.440998077392578, "learning_rate": 9.423076923076923e-06, "loss": 39.6021, "step": 5360 }, { "epoch": 127.64477611940299, "grad_norm": 25.379180908203125, "learning_rate": 9.421245421245421e-06, "loss": 40.6451, "step": 5361 }, { "epoch": 127.66865671641791, "grad_norm": 33.607208251953125, "learning_rate": 9.41941391941392e-06, "loss": 41.685, "step": 5362 }, { "epoch": 127.69253731343284, "grad_norm": 24.925783157348633, "learning_rate": 9.417582417582418e-06, "loss": 41.2479, "step": 5363 }, { "epoch": 127.71641791044776, "grad_norm": 34.97409439086914, "learning_rate": 9.415750915750916e-06, "loss": 40.9911, "step": 5364 }, { "epoch": 127.74029850746268, "grad_norm": 25.85514259338379, "learning_rate": 9.413919413919414e-06, "loss": 39.0472, "step": 5365 }, { "epoch": 127.7641791044776, "grad_norm": 32.1847038269043, "learning_rate": 9.412087912087912e-06, "loss": 40.6535, "step": 5366 }, { "epoch": 127.78805970149254, "grad_norm": 29.844226837158203, "learning_rate": 9.410256410256412e-06, "loss": 39.3755, "step": 5367 }, { "epoch": 127.81194029850747, "grad_norm": 31.974084854125977, "learning_rate": 9.40842490842491e-06, "loss": 41.1137, "step": 5368 }, { "epoch": 127.83582089552239, "grad_norm": 30.38601303100586, "learning_rate": 9.406593406593407e-06, "loss": 40.9944, "step": 5369 }, { "epoch": 127.85970149253731, "grad_norm": 25.314817428588867, "learning_rate": 9.404761904761905e-06, "loss": 40.0512, "step": 5370 }, { "epoch": 127.88358208955223, "grad_norm": 26.56514549255371, "learning_rate": 9.402930402930403e-06, "loss": 40.3742, "step": 5371 }, { "epoch": 127.90746268656716, "grad_norm": 27.275182723999023, "learning_rate": 9.401098901098901e-06, "loss": 41.387, "step": 5372 }, { "epoch": 127.9313432835821, "grad_norm": 24.307111740112305, "learning_rate": 9.3992673992674e-06, "loss": 41.2045, "step": 5373 }, { "epoch": 127.95522388059702, "grad_norm": 34.821327209472656, "learning_rate": 9.397435897435899e-06, "loss": 40.7932, "step": 5374 }, { "epoch": 127.97910447761194, "grad_norm": 31.300153732299805, "learning_rate": 9.395604395604396e-06, "loss": 41.0882, "step": 5375 }, { "epoch": 128.0, "grad_norm": 25.360746383666992, "learning_rate": 9.393772893772894e-06, "loss": 35.6565, "step": 5376 }, { "epoch": 128.02388059701494, "grad_norm": 32.15913772583008, "learning_rate": 9.391941391941392e-06, "loss": 39.5816, "step": 5377 }, { "epoch": 128.04776119402985, "grad_norm": 22.382192611694336, "learning_rate": 9.39010989010989e-06, "loss": 41.6509, "step": 5378 }, { "epoch": 128.07164179104478, "grad_norm": 23.480571746826172, "learning_rate": 9.38827838827839e-06, "loss": 40.7536, "step": 5379 }, { "epoch": 128.0955223880597, "grad_norm": 19.44637107849121, "learning_rate": 9.386446886446887e-06, "loss": 40.499, "step": 5380 }, { "epoch": 128.11940298507463, "grad_norm": 20.136741638183594, "learning_rate": 9.384615384615385e-06, "loss": 41.3551, "step": 5381 }, { "epoch": 128.14328358208957, "grad_norm": 20.516332626342773, "learning_rate": 9.382783882783883e-06, "loss": 40.7676, "step": 5382 }, { "epoch": 128.16716417910447, "grad_norm": 18.942041397094727, "learning_rate": 9.380952380952381e-06, "loss": 40.5404, "step": 5383 }, { "epoch": 128.1910447761194, "grad_norm": 22.05898666381836, "learning_rate": 9.37912087912088e-06, "loss": 40.9921, "step": 5384 }, { "epoch": 128.21492537313432, "grad_norm": 15.969873428344727, "learning_rate": 9.377289377289379e-06, "loss": 40.1066, "step": 5385 }, { "epoch": 128.23880597014926, "grad_norm": 18.438854217529297, "learning_rate": 9.375457875457876e-06, "loss": 39.8564, "step": 5386 }, { "epoch": 128.26268656716417, "grad_norm": 18.526012420654297, "learning_rate": 9.373626373626374e-06, "loss": 40.388, "step": 5387 }, { "epoch": 128.2865671641791, "grad_norm": 13.87939167022705, "learning_rate": 9.371794871794872e-06, "loss": 41.2088, "step": 5388 }, { "epoch": 128.31044776119404, "grad_norm": 19.515592575073242, "learning_rate": 9.36996336996337e-06, "loss": 40.3321, "step": 5389 }, { "epoch": 128.33432835820895, "grad_norm": 17.547893524169922, "learning_rate": 9.36813186813187e-06, "loss": 40.0459, "step": 5390 }, { "epoch": 128.3582089552239, "grad_norm": 24.08388900756836, "learning_rate": 9.366300366300367e-06, "loss": 40.7233, "step": 5391 }, { "epoch": 128.3820895522388, "grad_norm": 25.02381134033203, "learning_rate": 9.364468864468865e-06, "loss": 41.4629, "step": 5392 }, { "epoch": 128.40597014925373, "grad_norm": 17.845233917236328, "learning_rate": 9.362637362637363e-06, "loss": 40.136, "step": 5393 }, { "epoch": 128.42985074626867, "grad_norm": 24.73293685913086, "learning_rate": 9.360805860805861e-06, "loss": 40.1744, "step": 5394 }, { "epoch": 128.45373134328358, "grad_norm": 18.738384246826172, "learning_rate": 9.358974358974359e-06, "loss": 40.9566, "step": 5395 }, { "epoch": 128.47761194029852, "grad_norm": 22.628456115722656, "learning_rate": 9.357142857142859e-06, "loss": 39.9645, "step": 5396 }, { "epoch": 128.50149253731342, "grad_norm": 19.057598114013672, "learning_rate": 9.355311355311356e-06, "loss": 38.6498, "step": 5397 }, { "epoch": 128.52537313432836, "grad_norm": 20.58139419555664, "learning_rate": 9.353479853479854e-06, "loss": 41.7546, "step": 5398 }, { "epoch": 128.54925373134327, "grad_norm": 23.596145629882812, "learning_rate": 9.351648351648352e-06, "loss": 39.7231, "step": 5399 }, { "epoch": 128.5731343283582, "grad_norm": 18.677183151245117, "learning_rate": 9.34981684981685e-06, "loss": 39.6687, "step": 5400 }, { "epoch": 128.59701492537314, "grad_norm": 22.48053550720215, "learning_rate": 9.34798534798535e-06, "loss": 41.1109, "step": 5401 }, { "epoch": 128.62089552238805, "grad_norm": 18.408390045166016, "learning_rate": 9.346153846153847e-06, "loss": 40.0313, "step": 5402 }, { "epoch": 128.644776119403, "grad_norm": 18.866302490234375, "learning_rate": 9.344322344322345e-06, "loss": 41.4068, "step": 5403 }, { "epoch": 128.6686567164179, "grad_norm": 18.15769386291504, "learning_rate": 9.342490842490843e-06, "loss": 40.0289, "step": 5404 }, { "epoch": 128.69253731343284, "grad_norm": 21.213743209838867, "learning_rate": 9.340659340659341e-06, "loss": 41.5406, "step": 5405 }, { "epoch": 128.71641791044777, "grad_norm": 14.050131797790527, "learning_rate": 9.338827838827839e-06, "loss": 40.9447, "step": 5406 }, { "epoch": 128.74029850746268, "grad_norm": 20.822832107543945, "learning_rate": 9.336996336996339e-06, "loss": 40.7616, "step": 5407 }, { "epoch": 128.76417910447762, "grad_norm": 16.915830612182617, "learning_rate": 9.335164835164836e-06, "loss": 39.8529, "step": 5408 }, { "epoch": 128.78805970149253, "grad_norm": 24.053998947143555, "learning_rate": 9.333333333333334e-06, "loss": 41.2986, "step": 5409 }, { "epoch": 128.81194029850747, "grad_norm": 21.357769012451172, "learning_rate": 9.331501831501832e-06, "loss": 41.6824, "step": 5410 }, { "epoch": 128.83582089552237, "grad_norm": 16.31240463256836, "learning_rate": 9.32967032967033e-06, "loss": 40.1068, "step": 5411 }, { "epoch": 128.8597014925373, "grad_norm": 18.027111053466797, "learning_rate": 9.327838827838828e-06, "loss": 39.9807, "step": 5412 }, { "epoch": 128.88358208955225, "grad_norm": 17.471216201782227, "learning_rate": 9.326007326007328e-06, "loss": 40.1997, "step": 5413 }, { "epoch": 128.90746268656716, "grad_norm": 14.707521438598633, "learning_rate": 9.324175824175825e-06, "loss": 40.2096, "step": 5414 }, { "epoch": 128.9313432835821, "grad_norm": 16.55643081665039, "learning_rate": 9.322344322344323e-06, "loss": 41.4804, "step": 5415 }, { "epoch": 128.955223880597, "grad_norm": 17.47356414794922, "learning_rate": 9.320512820512821e-06, "loss": 40.0436, "step": 5416 }, { "epoch": 128.97910447761194, "grad_norm": 18.92135238647461, "learning_rate": 9.318681318681319e-06, "loss": 40.1894, "step": 5417 }, { "epoch": 129.0, "grad_norm": 17.002300262451172, "learning_rate": 9.316849816849819e-06, "loss": 36.4327, "step": 5418 }, { "epoch": 129.02388059701494, "grad_norm": 18.210742950439453, "learning_rate": 9.315018315018316e-06, "loss": 40.8625, "step": 5419 }, { "epoch": 129.04776119402985, "grad_norm": 17.84212875366211, "learning_rate": 9.313186813186814e-06, "loss": 39.4351, "step": 5420 }, { "epoch": 129.07164179104478, "grad_norm": 16.789724349975586, "learning_rate": 9.311355311355312e-06, "loss": 40.7753, "step": 5421 }, { "epoch": 129.0955223880597, "grad_norm": 16.961986541748047, "learning_rate": 9.30952380952381e-06, "loss": 39.5132, "step": 5422 }, { "epoch": 129.11940298507463, "grad_norm": 19.639286041259766, "learning_rate": 9.307692307692308e-06, "loss": 41.1626, "step": 5423 }, { "epoch": 129.14328358208957, "grad_norm": 15.542900085449219, "learning_rate": 9.305860805860808e-06, "loss": 37.8755, "step": 5424 }, { "epoch": 129.16716417910447, "grad_norm": 21.57238006591797, "learning_rate": 9.304029304029305e-06, "loss": 40.1871, "step": 5425 }, { "epoch": 129.1910447761194, "grad_norm": 18.78668785095215, "learning_rate": 9.302197802197803e-06, "loss": 40.0839, "step": 5426 }, { "epoch": 129.21492537313432, "grad_norm": 19.953189849853516, "learning_rate": 9.300366300366301e-06, "loss": 39.1708, "step": 5427 }, { "epoch": 129.23880597014926, "grad_norm": 19.159618377685547, "learning_rate": 9.298534798534799e-06, "loss": 40.0572, "step": 5428 }, { "epoch": 129.26268656716417, "grad_norm": 20.108295440673828, "learning_rate": 9.296703296703297e-06, "loss": 40.5194, "step": 5429 }, { "epoch": 129.2865671641791, "grad_norm": 18.625139236450195, "learning_rate": 9.294871794871796e-06, "loss": 39.8611, "step": 5430 }, { "epoch": 129.31044776119404, "grad_norm": 23.48390007019043, "learning_rate": 9.293040293040294e-06, "loss": 39.9747, "step": 5431 }, { "epoch": 129.33432835820895, "grad_norm": 17.067564010620117, "learning_rate": 9.291208791208792e-06, "loss": 40.5828, "step": 5432 }, { "epoch": 129.3582089552239, "grad_norm": 24.928804397583008, "learning_rate": 9.28937728937729e-06, "loss": 41.1937, "step": 5433 }, { "epoch": 129.3820895522388, "grad_norm": 20.61871910095215, "learning_rate": 9.287545787545788e-06, "loss": 40.7314, "step": 5434 }, { "epoch": 129.40597014925373, "grad_norm": 28.40680694580078, "learning_rate": 9.285714285714288e-06, "loss": 40.6506, "step": 5435 }, { "epoch": 129.42985074626867, "grad_norm": 22.84246253967285, "learning_rate": 9.283882783882785e-06, "loss": 41.2816, "step": 5436 }, { "epoch": 129.45373134328358, "grad_norm": 29.882131576538086, "learning_rate": 9.282051282051283e-06, "loss": 40.7815, "step": 5437 }, { "epoch": 129.47761194029852, "grad_norm": 21.24380111694336, "learning_rate": 9.280219780219781e-06, "loss": 41.2894, "step": 5438 }, { "epoch": 129.50149253731342, "grad_norm": 23.89835548400879, "learning_rate": 9.278388278388279e-06, "loss": 40.1416, "step": 5439 }, { "epoch": 129.52537313432836, "grad_norm": 21.517475128173828, "learning_rate": 9.276556776556777e-06, "loss": 41.05, "step": 5440 }, { "epoch": 129.54925373134327, "grad_norm": 17.225387573242188, "learning_rate": 9.274725274725277e-06, "loss": 41.058, "step": 5441 }, { "epoch": 129.5731343283582, "grad_norm": 17.844186782836914, "learning_rate": 9.272893772893774e-06, "loss": 41.5639, "step": 5442 }, { "epoch": 129.59701492537314, "grad_norm": 18.40740394592285, "learning_rate": 9.271062271062272e-06, "loss": 40.1886, "step": 5443 }, { "epoch": 129.62089552238805, "grad_norm": 16.104562759399414, "learning_rate": 9.26923076923077e-06, "loss": 40.9611, "step": 5444 }, { "epoch": 129.644776119403, "grad_norm": 15.872597694396973, "learning_rate": 9.267399267399268e-06, "loss": 41.0908, "step": 5445 }, { "epoch": 129.6686567164179, "grad_norm": 16.474458694458008, "learning_rate": 9.265567765567766e-06, "loss": 39.5688, "step": 5446 }, { "epoch": 129.69253731343284, "grad_norm": 18.232454299926758, "learning_rate": 9.263736263736265e-06, "loss": 40.8028, "step": 5447 }, { "epoch": 129.71641791044777, "grad_norm": 16.598379135131836, "learning_rate": 9.261904761904763e-06, "loss": 40.5047, "step": 5448 }, { "epoch": 129.74029850746268, "grad_norm": 19.88945770263672, "learning_rate": 9.260073260073261e-06, "loss": 41.5815, "step": 5449 }, { "epoch": 129.76417910447762, "grad_norm": 18.88849449157715, "learning_rate": 9.258241758241759e-06, "loss": 41.1635, "step": 5450 }, { "epoch": 129.78805970149253, "grad_norm": 16.19620704650879, "learning_rate": 9.256410256410257e-06, "loss": 39.988, "step": 5451 }, { "epoch": 129.81194029850747, "grad_norm": 17.755510330200195, "learning_rate": 9.254578754578757e-06, "loss": 39.5852, "step": 5452 }, { "epoch": 129.83582089552237, "grad_norm": 18.566909790039062, "learning_rate": 9.252747252747254e-06, "loss": 40.5909, "step": 5453 }, { "epoch": 129.8597014925373, "grad_norm": NaN, "learning_rate": 9.250915750915752e-06, "loss": 36.0401, "step": 5454 }, { "epoch": 129.88358208955225, "grad_norm": 16.708894729614258, "learning_rate": 9.250915750915752e-06, "loss": 40.8427, "step": 5455 }, { "epoch": 129.90746268656716, "grad_norm": 18.091861724853516, "learning_rate": 9.24908424908425e-06, "loss": 40.9939, "step": 5456 }, { "epoch": 129.9313432835821, "grad_norm": 19.519044876098633, "learning_rate": 9.247252747252748e-06, "loss": 40.0013, "step": 5457 }, { "epoch": 129.955223880597, "grad_norm": 15.587594032287598, "learning_rate": 9.245421245421246e-06, "loss": 40.2778, "step": 5458 }, { "epoch": 129.97910447761194, "grad_norm": 19.010610580444336, "learning_rate": 9.243589743589745e-06, "loss": 40.7324, "step": 5459 }, { "epoch": 130.0, "grad_norm": 16.66925048828125, "learning_rate": 9.241758241758243e-06, "loss": 35.6576, "step": 5460 }, { "epoch": 130.0, "step": 5460, "total_flos": 2.6841554727339034e+17, "train_loss": 3.138686427441272, "train_runtime": 12817.2093, "train_samples_per_second": 54.283, "train_steps_per_second": 0.426 }, { "epoch": 130.02388059701494, "grad_norm": 18.575708389282227, "learning_rate": 1e-05, "loss": 40.4192, "step": 5461 }, { "epoch": 130.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998299319727893e-06, "loss": 45.3289, "step": 5462 }, { "epoch": 130.07164179104478, "grad_norm": 228.45680236816406, "learning_rate": 9.998299319727893e-06, "loss": 44.984, "step": 5463 }, { "epoch": 130.0955223880597, "grad_norm": 129.42657470703125, "learning_rate": 9.996598639455783e-06, "loss": 44.7713, "step": 5464 }, { "epoch": 130.11940298507463, "grad_norm": 55.86802291870117, "learning_rate": 9.994897959183675e-06, "loss": 43.4058, "step": 5465 }, { "epoch": 130.14328358208957, "grad_norm": 93.52910614013672, "learning_rate": 9.993197278911566e-06, "loss": 42.3555, "step": 5466 }, { "epoch": 130.16716417910447, "grad_norm": 49.01381301879883, "learning_rate": 9.991496598639456e-06, "loss": 41.0505, "step": 5467 }, { "epoch": 130.1910447761194, "grad_norm": 54.29282760620117, "learning_rate": 9.989795918367348e-06, "loss": 40.938, "step": 5468 }, { "epoch": 130.21492537313432, "grad_norm": 37.95635223388672, "learning_rate": 9.988095238095239e-06, "loss": 40.3188, "step": 5469 }, { "epoch": 130.23880597014926, "grad_norm": 36.652523040771484, "learning_rate": 9.98639455782313e-06, "loss": 41.5117, "step": 5470 }, { "epoch": 130.26268656716417, "grad_norm": 40.90021896362305, "learning_rate": 9.984693877551021e-06, "loss": 42.5635, "step": 5471 }, { "epoch": 130.2865671641791, "grad_norm": 28.823503494262695, "learning_rate": 9.982993197278913e-06, "loss": 41.404, "step": 5472 }, { "epoch": 130.31044776119404, "grad_norm": 24.62152862548828, "learning_rate": 9.981292517006804e-06, "loss": 40.9372, "step": 5473 }, { "epoch": 130.33432835820895, "grad_norm": 29.644268035888672, "learning_rate": 9.979591836734694e-06, "loss": 40.5455, "step": 5474 }, { "epoch": 130.3582089552239, "grad_norm": 21.859779357910156, "learning_rate": 9.977891156462586e-06, "loss": 41.4169, "step": 5475 }, { "epoch": 130.3820895522388, "grad_norm": 23.489789962768555, "learning_rate": 9.976190476190477e-06, "loss": 39.8592, "step": 5476 }, { "epoch": 130.40597014925373, "grad_norm": 18.39851951599121, "learning_rate": 9.974489795918369e-06, "loss": 39.969, "step": 5477 }, { "epoch": 130.42985074626867, "grad_norm": 25.369873046875, "learning_rate": 9.972789115646259e-06, "loss": 39.7739, "step": 5478 }, { "epoch": 130.45373134328358, "grad_norm": 22.13943862915039, "learning_rate": 9.97108843537415e-06, "loss": 40.0817, "step": 5479 }, { "epoch": 130.47761194029852, "grad_norm": 22.7308292388916, "learning_rate": 9.969387755102042e-06, "loss": 41.4501, "step": 5480 }, { "epoch": 130.50149253731342, "grad_norm": 16.09027671813965, "learning_rate": 9.967687074829932e-06, "loss": 41.1944, "step": 5481 }, { "epoch": 130.52537313432836, "grad_norm": 20.12171745300293, "learning_rate": 9.965986394557824e-06, "loss": 40.5477, "step": 5482 }, { "epoch": 130.54925373134327, "grad_norm": 18.88404655456543, "learning_rate": 9.964285714285714e-06, "loss": 39.402, "step": 5483 }, { "epoch": 130.5731343283582, "grad_norm": 20.32000732421875, "learning_rate": 9.962585034013607e-06, "loss": 40.6678, "step": 5484 }, { "epoch": 130.59701492537314, "grad_norm": 20.351774215698242, "learning_rate": 9.960884353741499e-06, "loss": 40.075, "step": 5485 }, { "epoch": 130.62089552238805, "grad_norm": 14.881600379943848, "learning_rate": 9.959183673469387e-06, "loss": 40.0113, "step": 5486 }, { "epoch": 130.644776119403, "grad_norm": 21.500431060791016, "learning_rate": 9.95748299319728e-06, "loss": 39.5136, "step": 5487 }, { "epoch": 130.6686567164179, "grad_norm": 20.59417152404785, "learning_rate": 9.955782312925172e-06, "loss": 40.3664, "step": 5488 }, { "epoch": 130.69253731343284, "grad_norm": 20.141138076782227, "learning_rate": 9.954081632653062e-06, "loss": 40.8991, "step": 5489 }, { "epoch": 130.71641791044777, "grad_norm": 16.41176986694336, "learning_rate": 9.952380952380954e-06, "loss": 40.3613, "step": 5490 }, { "epoch": 130.74029850746268, "grad_norm": 20.832176208496094, "learning_rate": 9.950680272108844e-06, "loss": 40.709, "step": 5491 }, { "epoch": 130.76417910447762, "grad_norm": 18.355520248413086, "learning_rate": 9.948979591836737e-06, "loss": 41.1332, "step": 5492 }, { "epoch": 130.78805970149253, "grad_norm": 21.0073299407959, "learning_rate": 9.947278911564627e-06, "loss": 40.496, "step": 5493 }, { "epoch": 130.81194029850747, "grad_norm": 18.184412002563477, "learning_rate": 9.945578231292517e-06, "loss": 40.9514, "step": 5494 }, { "epoch": 130.83582089552237, "grad_norm": 23.272981643676758, "learning_rate": 9.94387755102041e-06, "loss": 40.2932, "step": 5495 }, { "epoch": 130.8597014925373, "grad_norm": 16.066865921020508, "learning_rate": 9.9421768707483e-06, "loss": 41.2145, "step": 5496 }, { "epoch": 130.88358208955225, "grad_norm": 19.863813400268555, "learning_rate": 9.940476190476192e-06, "loss": 40.9969, "step": 5497 }, { "epoch": 130.90746268656716, "grad_norm": 20.84225082397461, "learning_rate": 9.938775510204082e-06, "loss": 40.1575, "step": 5498 }, { "epoch": 130.9313432835821, "grad_norm": 16.452865600585938, "learning_rate": 9.937074829931974e-06, "loss": 38.9115, "step": 5499 }, { "epoch": 130.955223880597, "grad_norm": 19.1783390045166, "learning_rate": 9.935374149659865e-06, "loss": 40.7441, "step": 5500 }, { "epoch": 130.97910447761194, "grad_norm": 21.94544219970703, "learning_rate": 9.933673469387755e-06, "loss": 41.8275, "step": 5501 }, { "epoch": 131.0, "grad_norm": 13.472136497497559, "learning_rate": 9.931972789115647e-06, "loss": 34.5508, "step": 5502 }, { "epoch": 131.02388059701494, "grad_norm": 18.82528305053711, "learning_rate": 9.930272108843538e-06, "loss": 41.3352, "step": 5503 }, { "epoch": 131.04776119402985, "grad_norm": 16.815523147583008, "learning_rate": 9.92857142857143e-06, "loss": 39.1606, "step": 5504 }, { "epoch": 131.07164179104478, "grad_norm": 18.014087677001953, "learning_rate": 9.92687074829932e-06, "loss": 41.389, "step": 5505 }, { "epoch": 131.0955223880597, "grad_norm": 18.153976440429688, "learning_rate": 9.92517006802721e-06, "loss": 41.0835, "step": 5506 }, { "epoch": 131.11940298507463, "grad_norm": 16.97452163696289, "learning_rate": 9.923469387755103e-06, "loss": 41.149, "step": 5507 }, { "epoch": 131.14328358208957, "grad_norm": 16.83989143371582, "learning_rate": 9.921768707482993e-06, "loss": 40.9826, "step": 5508 }, { "epoch": 131.16716417910447, "grad_norm": 15.62459659576416, "learning_rate": 9.920068027210885e-06, "loss": 41.0703, "step": 5509 }, { "epoch": 131.1910447761194, "grad_norm": 14.438183784484863, "learning_rate": 9.918367346938776e-06, "loss": 41.3628, "step": 5510 }, { "epoch": 131.21492537313432, "grad_norm": 23.413602828979492, "learning_rate": 9.916666666666668e-06, "loss": 40.0985, "step": 5511 }, { "epoch": 131.23880597014926, "grad_norm": 19.558998107910156, "learning_rate": 9.91496598639456e-06, "loss": 40.2111, "step": 5512 }, { "epoch": 131.26268656716417, "grad_norm": 12.165032386779785, "learning_rate": 9.913265306122449e-06, "loss": 40.6546, "step": 5513 }, { "epoch": 131.2865671641791, "grad_norm": 18.257869720458984, "learning_rate": 9.91156462585034e-06, "loss": 42.2008, "step": 5514 }, { "epoch": 131.31044776119404, "grad_norm": 21.828651428222656, "learning_rate": 9.909863945578233e-06, "loss": 39.9445, "step": 5515 }, { "epoch": 131.33432835820895, "grad_norm": 16.42722511291504, "learning_rate": 9.908163265306123e-06, "loss": 40.4844, "step": 5516 }, { "epoch": 131.3582089552239, "grad_norm": 15.241820335388184, "learning_rate": 9.906462585034015e-06, "loss": 39.5317, "step": 5517 }, { "epoch": 131.3820895522388, "grad_norm": 14.893516540527344, "learning_rate": 9.904761904761906e-06, "loss": 40.9785, "step": 5518 }, { "epoch": 131.40597014925373, "grad_norm": 15.520328521728516, "learning_rate": 9.903061224489798e-06, "loss": 40.2672, "step": 5519 }, { "epoch": 131.42985074626867, "grad_norm": 20.18976402282715, "learning_rate": 9.901360544217688e-06, "loss": 39.9149, "step": 5520 }, { "epoch": 131.45373134328358, "grad_norm": 17.364437103271484, "learning_rate": 9.899659863945579e-06, "loss": 39.2209, "step": 5521 }, { "epoch": 131.47761194029852, "grad_norm": 18.58165168762207, "learning_rate": 9.89795918367347e-06, "loss": 41.0448, "step": 5522 }, { "epoch": 131.50149253731342, "grad_norm": 14.185492515563965, "learning_rate": 9.896258503401361e-06, "loss": 40.0128, "step": 5523 }, { "epoch": 131.52537313432836, "grad_norm": 18.741228103637695, "learning_rate": 9.894557823129253e-06, "loss": 40.4496, "step": 5524 }, { "epoch": 131.54925373134327, "grad_norm": 16.92427635192871, "learning_rate": 9.892857142857143e-06, "loss": 39.8989, "step": 5525 }, { "epoch": 131.5731343283582, "grad_norm": 18.248445510864258, "learning_rate": 9.891156462585036e-06, "loss": 39.7473, "step": 5526 }, { "epoch": 131.59701492537314, "grad_norm": 18.88313865661621, "learning_rate": 9.889455782312926e-06, "loss": 40.1553, "step": 5527 }, { "epoch": 131.62089552238805, "grad_norm": 19.88404655456543, "learning_rate": 9.887755102040816e-06, "loss": 40.9204, "step": 5528 }, { "epoch": 131.644776119403, "grad_norm": 17.367191314697266, "learning_rate": 9.886054421768708e-06, "loss": 39.9127, "step": 5529 }, { "epoch": 131.6686567164179, "grad_norm": 18.826900482177734, "learning_rate": 9.884353741496599e-06, "loss": 40.3812, "step": 5530 }, { "epoch": 131.69253731343284, "grad_norm": 16.4368953704834, "learning_rate": 9.882653061224491e-06, "loss": 40.536, "step": 5531 }, { "epoch": 131.71641791044777, "grad_norm": 17.32594108581543, "learning_rate": 9.880952380952381e-06, "loss": 41.1266, "step": 5532 }, { "epoch": 131.74029850746268, "grad_norm": 25.614294052124023, "learning_rate": 9.879251700680272e-06, "loss": 40.5684, "step": 5533 }, { "epoch": 131.76417910447762, "grad_norm": 18.330366134643555, "learning_rate": 9.877551020408164e-06, "loss": 40.9372, "step": 5534 }, { "epoch": 131.78805970149253, "grad_norm": 17.743070602416992, "learning_rate": 9.875850340136054e-06, "loss": 40.3854, "step": 5535 }, { "epoch": 131.81194029850747, "grad_norm": 20.566457748413086, "learning_rate": 9.874149659863946e-06, "loss": 40.7082, "step": 5536 }, { "epoch": 131.83582089552237, "grad_norm": 24.459489822387695, "learning_rate": 9.872448979591838e-06, "loss": 41.2391, "step": 5537 }, { "epoch": 131.8597014925373, "grad_norm": 15.706886291503906, "learning_rate": 9.870748299319729e-06, "loss": 39.2229, "step": 5538 }, { "epoch": 131.88358208955225, "grad_norm": 18.37516975402832, "learning_rate": 9.869047619047621e-06, "loss": 39.3149, "step": 5539 }, { "epoch": 131.90746268656716, "grad_norm": 18.098176956176758, "learning_rate": 9.867346938775511e-06, "loss": 39.4227, "step": 5540 }, { "epoch": 131.9313432835821, "grad_norm": 14.898340225219727, "learning_rate": 9.865646258503402e-06, "loss": 39.4192, "step": 5541 }, { "epoch": 131.955223880597, "grad_norm": 21.42721176147461, "learning_rate": 9.863945578231294e-06, "loss": 40.879, "step": 5542 }, { "epoch": 131.97910447761194, "grad_norm": 19.07784652709961, "learning_rate": 9.862244897959184e-06, "loss": 40.1433, "step": 5543 }, { "epoch": 132.0, "grad_norm": 16.04290199279785, "learning_rate": 9.860544217687076e-06, "loss": 34.6443, "step": 5544 }, { "epoch": 132.02388059701494, "grad_norm": 17.641210556030273, "learning_rate": 9.858843537414967e-06, "loss": 40.6522, "step": 5545 }, { "epoch": 132.04776119402985, "grad_norm": 22.368637084960938, "learning_rate": 9.857142857142859e-06, "loss": 41.3016, "step": 5546 }, { "epoch": 132.07164179104478, "grad_norm": 19.166706085205078, "learning_rate": 9.85544217687075e-06, "loss": 40.4856, "step": 5547 }, { "epoch": 132.0955223880597, "grad_norm": 20.525333404541016, "learning_rate": 9.85374149659864e-06, "loss": 41.073, "step": 5548 }, { "epoch": 132.11940298507463, "grad_norm": 19.90342140197754, "learning_rate": 9.852040816326532e-06, "loss": 39.3624, "step": 5549 }, { "epoch": 132.14328358208957, "grad_norm": 15.503653526306152, "learning_rate": 9.850340136054422e-06, "loss": 41.8662, "step": 5550 }, { "epoch": 132.16716417910447, "grad_norm": 15.871750831604004, "learning_rate": 9.848639455782314e-06, "loss": 39.9852, "step": 5551 }, { "epoch": 132.1910447761194, "grad_norm": 25.539695739746094, "learning_rate": 9.846938775510205e-06, "loss": 38.6591, "step": 5552 }, { "epoch": 132.21492537313432, "grad_norm": 20.75281524658203, "learning_rate": 9.845238095238097e-06, "loss": 40.6497, "step": 5553 }, { "epoch": 132.23880597014926, "grad_norm": 17.729764938354492, "learning_rate": 9.843537414965987e-06, "loss": 39.8891, "step": 5554 }, { "epoch": 132.26268656716417, "grad_norm": 16.33675765991211, "learning_rate": 9.841836734693878e-06, "loss": 41.0467, "step": 5555 }, { "epoch": 132.2865671641791, "grad_norm": 16.76603126525879, "learning_rate": 9.84013605442177e-06, "loss": 41.4239, "step": 5556 }, { "epoch": 132.31044776119404, "grad_norm": 20.384475708007812, "learning_rate": 9.83843537414966e-06, "loss": 40.4335, "step": 5557 }, { "epoch": 132.33432835820895, "grad_norm": 16.604625701904297, "learning_rate": 9.836734693877552e-06, "loss": 40.5462, "step": 5558 }, { "epoch": 132.3582089552239, "grad_norm": 19.941377639770508, "learning_rate": 9.835034013605444e-06, "loss": 41.7404, "step": 5559 }, { "epoch": 132.3820895522388, "grad_norm": 15.349913597106934, "learning_rate": 9.833333333333333e-06, "loss": 38.5686, "step": 5560 }, { "epoch": 132.40597014925373, "grad_norm": 21.03326988220215, "learning_rate": 9.831632653061225e-06, "loss": 41.3306, "step": 5561 }, { "epoch": 132.42985074626867, "grad_norm": 18.98489761352539, "learning_rate": 9.829931972789115e-06, "loss": 39.5212, "step": 5562 }, { "epoch": 132.45373134328358, "grad_norm": 15.476447105407715, "learning_rate": 9.828231292517008e-06, "loss": 39.31, "step": 5563 }, { "epoch": 132.47761194029852, "grad_norm": 17.050857543945312, "learning_rate": 9.8265306122449e-06, "loss": 40.1559, "step": 5564 }, { "epoch": 132.50149253731342, "grad_norm": 17.630809783935547, "learning_rate": 9.82482993197279e-06, "loss": 39.8047, "step": 5565 }, { "epoch": 132.52537313432836, "grad_norm": NaN, "learning_rate": 9.823129251700682e-06, "loss": 54.2096, "step": 5566 }, { "epoch": 132.54925373134327, "grad_norm": 18.909269332885742, "learning_rate": 9.823129251700682e-06, "loss": 40.1987, "step": 5567 }, { "epoch": 132.5731343283582, "grad_norm": 20.534330368041992, "learning_rate": 9.821428571428573e-06, "loss": 40.7122, "step": 5568 }, { "epoch": 132.59701492537314, "grad_norm": 16.048946380615234, "learning_rate": 9.819727891156463e-06, "loss": 40.2584, "step": 5569 }, { "epoch": 132.62089552238805, "grad_norm": 14.615914344787598, "learning_rate": 9.818027210884355e-06, "loss": 40.1147, "step": 5570 }, { "epoch": 132.644776119403, "grad_norm": 16.347827911376953, "learning_rate": 9.816326530612245e-06, "loss": 41.0445, "step": 5571 }, { "epoch": 132.6686567164179, "grad_norm": 20.02432632446289, "learning_rate": 9.814625850340137e-06, "loss": 40.1337, "step": 5572 }, { "epoch": 132.69253731343284, "grad_norm": 18.087976455688477, "learning_rate": 9.812925170068028e-06, "loss": 39.9454, "step": 5573 }, { "epoch": 132.71641791044777, "grad_norm": 16.806800842285156, "learning_rate": 9.81122448979592e-06, "loss": 40.7469, "step": 5574 }, { "epoch": 132.74029850746268, "grad_norm": 14.957366943359375, "learning_rate": 9.80952380952381e-06, "loss": 41.9708, "step": 5575 }, { "epoch": 132.76417910447762, "grad_norm": 15.429438591003418, "learning_rate": 9.8078231292517e-06, "loss": 40.5727, "step": 5576 }, { "epoch": 132.78805970149253, "grad_norm": 18.437835693359375, "learning_rate": 9.806122448979593e-06, "loss": 39.3392, "step": 5577 }, { "epoch": 132.81194029850747, "grad_norm": 23.49526023864746, "learning_rate": 9.804421768707483e-06, "loss": 40.8007, "step": 5578 }, { "epoch": 132.83582089552237, "grad_norm": 15.580110549926758, "learning_rate": 9.802721088435375e-06, "loss": 40.2113, "step": 5579 }, { "epoch": 132.8597014925373, "grad_norm": 13.494383811950684, "learning_rate": 9.801020408163266e-06, "loss": 39.5501, "step": 5580 }, { "epoch": 132.88358208955225, "grad_norm": 14.148122787475586, "learning_rate": 9.799319727891158e-06, "loss": 39.5385, "step": 5581 }, { "epoch": 132.90746268656716, "grad_norm": 14.981057167053223, "learning_rate": 9.797619047619048e-06, "loss": 40.1832, "step": 5582 }, { "epoch": 132.9313432835821, "grad_norm": 17.651594161987305, "learning_rate": 9.795918367346939e-06, "loss": 40.8822, "step": 5583 }, { "epoch": 132.955223880597, "grad_norm": 23.53675079345703, "learning_rate": 9.79421768707483e-06, "loss": 40.4374, "step": 5584 }, { "epoch": 132.97910447761194, "grad_norm": 14.797532081604004, "learning_rate": 9.792517006802721e-06, "loss": 40.3035, "step": 5585 }, { "epoch": 133.0, "grad_norm": 19.286834716796875, "learning_rate": 9.790816326530613e-06, "loss": 35.0022, "step": 5586 }, { "epoch": 133.02388059701494, "grad_norm": 25.947200775146484, "learning_rate": 9.789115646258505e-06, "loss": 40.0884, "step": 5587 }, { "epoch": 133.04776119402985, "grad_norm": 17.286386489868164, "learning_rate": 9.787414965986394e-06, "loss": 40.6761, "step": 5588 }, { "epoch": 133.07164179104478, "grad_norm": 16.327795028686523, "learning_rate": 9.785714285714286e-06, "loss": 39.5775, "step": 5589 }, { "epoch": 133.0955223880597, "grad_norm": 25.301265716552734, "learning_rate": 9.784013605442178e-06, "loss": 39.6754, "step": 5590 }, { "epoch": 133.11940298507463, "grad_norm": 18.68819236755371, "learning_rate": 9.782312925170069e-06, "loss": 40.444, "step": 5591 }, { "epoch": 133.14328358208957, "grad_norm": 16.839736938476562, "learning_rate": 9.78061224489796e-06, "loss": 39.586, "step": 5592 }, { "epoch": 133.16716417910447, "grad_norm": 27.723005294799805, "learning_rate": 9.778911564625851e-06, "loss": 40.631, "step": 5593 }, { "epoch": 133.1910447761194, "grad_norm": 16.834030151367188, "learning_rate": 9.777210884353743e-06, "loss": 39.9121, "step": 5594 }, { "epoch": 133.21492537313432, "grad_norm": 16.289016723632812, "learning_rate": 9.775510204081634e-06, "loss": 39.8342, "step": 5595 }, { "epoch": 133.23880597014926, "grad_norm": 23.45367431640625, "learning_rate": 9.773809523809524e-06, "loss": 39.8418, "step": 5596 }, { "epoch": 133.26268656716417, "grad_norm": 18.50150489807129, "learning_rate": 9.772108843537416e-06, "loss": 40.8921, "step": 5597 }, { "epoch": 133.2865671641791, "grad_norm": 15.655564308166504, "learning_rate": 9.770408163265307e-06, "loss": 39.9306, "step": 5598 }, { "epoch": 133.31044776119404, "grad_norm": 23.770095825195312, "learning_rate": 9.768707482993199e-06, "loss": 40.686, "step": 5599 }, { "epoch": 133.33432835820895, "grad_norm": 21.083984375, "learning_rate": 9.767006802721089e-06, "loss": 40.5774, "step": 5600 }, { "epoch": 133.3582089552239, "grad_norm": 14.010787010192871, "learning_rate": 9.765306122448981e-06, "loss": 40.3888, "step": 5601 }, { "epoch": 133.3820895522388, "grad_norm": 29.777660369873047, "learning_rate": 9.763605442176872e-06, "loss": 41.4408, "step": 5602 }, { "epoch": 133.40597014925373, "grad_norm": 19.067794799804688, "learning_rate": 9.761904761904762e-06, "loss": 40.1208, "step": 5603 }, { "epoch": 133.42985074626867, "grad_norm": 30.848791122436523, "learning_rate": 9.760204081632654e-06, "loss": 40.7094, "step": 5604 }, { "epoch": 133.45373134328358, "grad_norm": 29.024898529052734, "learning_rate": 9.758503401360544e-06, "loss": 40.7004, "step": 5605 }, { "epoch": 133.47761194029852, "grad_norm": 22.88898468017578, "learning_rate": 9.756802721088437e-06, "loss": 40.257, "step": 5606 }, { "epoch": 133.50149253731342, "grad_norm": 39.7208137512207, "learning_rate": 9.755102040816327e-06, "loss": 38.6114, "step": 5607 }, { "epoch": 133.52537313432836, "grad_norm": 30.543888092041016, "learning_rate": 9.753401360544217e-06, "loss": 40.1867, "step": 5608 }, { "epoch": 133.54925373134327, "grad_norm": 36.19719314575195, "learning_rate": 9.75170068027211e-06, "loss": 40.5948, "step": 5609 }, { "epoch": 133.5731343283582, "grad_norm": 32.90020751953125, "learning_rate": 9.75e-06, "loss": 39.8139, "step": 5610 }, { "epoch": 133.59701492537314, "grad_norm": 34.50712585449219, "learning_rate": 9.748299319727892e-06, "loss": 40.1155, "step": 5611 }, { "epoch": 133.62089552238805, "grad_norm": 32.24649429321289, "learning_rate": 9.746598639455784e-06, "loss": 40.149, "step": 5612 }, { "epoch": 133.644776119403, "grad_norm": 35.7637939453125, "learning_rate": 9.744897959183674e-06, "loss": 39.7303, "step": 5613 }, { "epoch": 133.6686567164179, "grad_norm": 31.09421157836914, "learning_rate": 9.743197278911567e-06, "loss": 41.0925, "step": 5614 }, { "epoch": 133.69253731343284, "grad_norm": 37.82075881958008, "learning_rate": 9.741496598639457e-06, "loss": 39.9909, "step": 5615 }, { "epoch": 133.71641791044777, "grad_norm": 33.92351150512695, "learning_rate": 9.739795918367347e-06, "loss": 40.0986, "step": 5616 }, { "epoch": 133.74029850746268, "grad_norm": 29.645198822021484, "learning_rate": 9.73809523809524e-06, "loss": 41.5591, "step": 5617 }, { "epoch": 133.76417910447762, "grad_norm": 24.506332397460938, "learning_rate": 9.73639455782313e-06, "loss": 41.3366, "step": 5618 }, { "epoch": 133.78805970149253, "grad_norm": 38.3758544921875, "learning_rate": 9.734693877551022e-06, "loss": 41.016, "step": 5619 }, { "epoch": 133.81194029850747, "grad_norm": 33.210044860839844, "learning_rate": 9.732993197278912e-06, "loss": 40.9384, "step": 5620 }, { "epoch": 133.83582089552237, "grad_norm": 33.01791000366211, "learning_rate": 9.731292517006804e-06, "loss": 39.6658, "step": 5621 }, { "epoch": 133.8597014925373, "grad_norm": 34.2905158996582, "learning_rate": 9.729591836734695e-06, "loss": 40.4843, "step": 5622 }, { "epoch": 133.88358208955225, "grad_norm": 29.771053314208984, "learning_rate": 9.727891156462585e-06, "loss": 40.2978, "step": 5623 }, { "epoch": 133.90746268656716, "grad_norm": 30.07183837890625, "learning_rate": 9.726190476190477e-06, "loss": 40.2479, "step": 5624 }, { "epoch": 133.9313432835821, "grad_norm": 30.720661163330078, "learning_rate": 9.724489795918368e-06, "loss": 39.5252, "step": 5625 }, { "epoch": 133.955223880597, "grad_norm": 27.56161117553711, "learning_rate": 9.72278911564626e-06, "loss": 40.4758, "step": 5626 }, { "epoch": 133.97910447761194, "grad_norm": 32.74715805053711, "learning_rate": 9.72108843537415e-06, "loss": 40.6321, "step": 5627 }, { "epoch": 134.0, "grad_norm": 25.854846954345703, "learning_rate": 9.719387755102042e-06, "loss": 34.2593, "step": 5628 }, { "epoch": 134.02388059701494, "grad_norm": 33.82636642456055, "learning_rate": 9.717687074829933e-06, "loss": 40.2388, "step": 5629 }, { "epoch": 134.04776119402985, "grad_norm": 29.441238403320312, "learning_rate": 9.715986394557823e-06, "loss": 40.5805, "step": 5630 }, { "epoch": 134.07164179104478, "grad_norm": 29.590694427490234, "learning_rate": 9.714285714285715e-06, "loss": 38.7185, "step": 5631 }, { "epoch": 134.0955223880597, "grad_norm": 26.878095626831055, "learning_rate": 9.712585034013606e-06, "loss": 41.1294, "step": 5632 }, { "epoch": 134.11940298507463, "grad_norm": 31.240013122558594, "learning_rate": 9.710884353741498e-06, "loss": 40.0814, "step": 5633 }, { "epoch": 134.14328358208957, "grad_norm": 27.573955535888672, "learning_rate": 9.70918367346939e-06, "loss": 40.6451, "step": 5634 }, { "epoch": 134.16716417910447, "grad_norm": 35.54013442993164, "learning_rate": 9.707482993197278e-06, "loss": 41.3382, "step": 5635 }, { "epoch": 134.1910447761194, "grad_norm": 33.757408142089844, "learning_rate": 9.70578231292517e-06, "loss": 39.4768, "step": 5636 }, { "epoch": 134.21492537313432, "grad_norm": 29.37469482421875, "learning_rate": 9.704081632653061e-06, "loss": 39.8421, "step": 5637 }, { "epoch": 134.23880597014926, "grad_norm": 29.495834350585938, "learning_rate": 9.702380952380953e-06, "loss": 39.2846, "step": 5638 }, { "epoch": 134.26268656716417, "grad_norm": 28.723642349243164, "learning_rate": 9.700680272108845e-06, "loss": 39.4364, "step": 5639 }, { "epoch": 134.2865671641791, "grad_norm": 25.51703453063965, "learning_rate": 9.698979591836736e-06, "loss": 39.4578, "step": 5640 }, { "epoch": 134.31044776119404, "grad_norm": 34.16410446166992, "learning_rate": 9.697278911564628e-06, "loss": 40.5937, "step": 5641 }, { "epoch": 134.33432835820895, "grad_norm": 30.546810150146484, "learning_rate": 9.695578231292518e-06, "loss": 39.912, "step": 5642 }, { "epoch": 134.3582089552239, "grad_norm": 30.73379898071289, "learning_rate": 9.693877551020408e-06, "loss": 41.5471, "step": 5643 }, { "epoch": 134.3820895522388, "grad_norm": 30.759567260742188, "learning_rate": 9.6921768707483e-06, "loss": 40.3315, "step": 5644 }, { "epoch": 134.40597014925373, "grad_norm": 28.02313995361328, "learning_rate": 9.690476190476191e-06, "loss": 40.2851, "step": 5645 }, { "epoch": 134.42985074626867, "grad_norm": 24.580036163330078, "learning_rate": 9.688775510204083e-06, "loss": 40.9942, "step": 5646 }, { "epoch": 134.45373134328358, "grad_norm": 32.100738525390625, "learning_rate": 9.687074829931973e-06, "loss": 40.2184, "step": 5647 }, { "epoch": 134.47761194029852, "grad_norm": 30.24114418029785, "learning_rate": 9.685374149659866e-06, "loss": 40.3371, "step": 5648 }, { "epoch": 134.50149253731342, "grad_norm": 32.3997917175293, "learning_rate": 9.683673469387756e-06, "loss": 40.7586, "step": 5649 }, { "epoch": 134.52537313432836, "grad_norm": 25.58622169494629, "learning_rate": 9.681972789115646e-06, "loss": 40.1238, "step": 5650 }, { "epoch": 134.54925373134327, "grad_norm": 32.82097244262695, "learning_rate": 9.680272108843538e-06, "loss": 40.6563, "step": 5651 }, { "epoch": 134.5731343283582, "grad_norm": 27.216670989990234, "learning_rate": 9.678571428571429e-06, "loss": 38.6664, "step": 5652 }, { "epoch": 134.59701492537314, "grad_norm": 30.91448211669922, "learning_rate": 9.676870748299321e-06, "loss": 40.0405, "step": 5653 }, { "epoch": 134.62089552238805, "grad_norm": 27.467674255371094, "learning_rate": 9.675170068027211e-06, "loss": 40.8484, "step": 5654 }, { "epoch": 134.644776119403, "grad_norm": 33.313507080078125, "learning_rate": 9.673469387755103e-06, "loss": 40.5139, "step": 5655 }, { "epoch": 134.6686567164179, "grad_norm": 28.826663970947266, "learning_rate": 9.671768707482994e-06, "loss": 39.9436, "step": 5656 }, { "epoch": 134.69253731343284, "grad_norm": 31.69590950012207, "learning_rate": 9.670068027210884e-06, "loss": 40.458, "step": 5657 }, { "epoch": 134.71641791044777, "grad_norm": 24.371248245239258, "learning_rate": 9.668367346938776e-06, "loss": 40.4455, "step": 5658 }, { "epoch": 134.74029850746268, "grad_norm": 31.334495544433594, "learning_rate": 9.666666666666667e-06, "loss": 40.7902, "step": 5659 }, { "epoch": 134.76417910447762, "grad_norm": 27.586498260498047, "learning_rate": 9.664965986394559e-06, "loss": 40.5867, "step": 5660 }, { "epoch": 134.78805970149253, "grad_norm": 28.80315399169922, "learning_rate": 9.663265306122451e-06, "loss": 39.4688, "step": 5661 }, { "epoch": 134.81194029850747, "grad_norm": 24.875734329223633, "learning_rate": 9.66156462585034e-06, "loss": 39.2296, "step": 5662 }, { "epoch": 134.83582089552237, "grad_norm": 26.77202033996582, "learning_rate": 9.659863945578232e-06, "loss": 41.5271, "step": 5663 }, { "epoch": 134.8597014925373, "grad_norm": 21.632478713989258, "learning_rate": 9.658163265306124e-06, "loss": 39.7494, "step": 5664 }, { "epoch": 134.88358208955225, "grad_norm": 33.85261154174805, "learning_rate": 9.656462585034014e-06, "loss": 39.4471, "step": 5665 }, { "epoch": 134.90746268656716, "grad_norm": 27.42376708984375, "learning_rate": 9.654761904761906e-06, "loss": 40.2511, "step": 5666 }, { "epoch": 134.9313432835821, "grad_norm": 29.52701187133789, "learning_rate": 9.653061224489797e-06, "loss": 39.9535, "step": 5667 }, { "epoch": 134.955223880597, "grad_norm": 25.98667335510254, "learning_rate": 9.651360544217689e-06, "loss": 40.6712, "step": 5668 }, { "epoch": 134.97910447761194, "grad_norm": 26.950590133666992, "learning_rate": 9.64965986394558e-06, "loss": 40.4322, "step": 5669 }, { "epoch": 135.0, "grad_norm": 18.281841278076172, "learning_rate": 9.64795918367347e-06, "loss": 34.9402, "step": 5670 }, { "epoch": 135.02388059701494, "grad_norm": 30.714963912963867, "learning_rate": 9.646258503401362e-06, "loss": 40.2777, "step": 5671 }, { "epoch": 135.04776119402985, "grad_norm": 21.559858322143555, "learning_rate": 9.644557823129252e-06, "loss": 39.8327, "step": 5672 }, { "epoch": 135.07164179104478, "grad_norm": 27.76194953918457, "learning_rate": 9.642857142857144e-06, "loss": 40.0835, "step": 5673 }, { "epoch": 135.0955223880597, "grad_norm": 22.750877380371094, "learning_rate": 9.641156462585035e-06, "loss": 40.3429, "step": 5674 }, { "epoch": 135.11940298507463, "grad_norm": 28.511995315551758, "learning_rate": 9.639455782312927e-06, "loss": 39.3794, "step": 5675 }, { "epoch": 135.14328358208957, "grad_norm": 21.04129409790039, "learning_rate": 9.637755102040817e-06, "loss": 41.34, "step": 5676 }, { "epoch": 135.16716417910447, "grad_norm": 27.8126277923584, "learning_rate": 9.636054421768707e-06, "loss": 40.3671, "step": 5677 }, { "epoch": 135.1910447761194, "grad_norm": 23.500349044799805, "learning_rate": 9.6343537414966e-06, "loss": 40.235, "step": 5678 }, { "epoch": 135.21492537313432, "grad_norm": 25.186744689941406, "learning_rate": 9.63265306122449e-06, "loss": 40.7387, "step": 5679 }, { "epoch": 135.23880597014926, "grad_norm": 21.36899185180664, "learning_rate": 9.630952380952382e-06, "loss": 40.0717, "step": 5680 }, { "epoch": 135.26268656716417, "grad_norm": 23.584760665893555, "learning_rate": 9.629251700680272e-06, "loss": 40.1511, "step": 5681 }, { "epoch": 135.2865671641791, "grad_norm": 22.20633316040039, "learning_rate": 9.627551020408165e-06, "loss": 40.0655, "step": 5682 }, { "epoch": 135.31044776119404, "grad_norm": 19.99517822265625, "learning_rate": 9.625850340136055e-06, "loss": 39.8154, "step": 5683 }, { "epoch": 135.33432835820895, "grad_norm": 22.59499168395996, "learning_rate": 9.624149659863945e-06, "loss": 40.6277, "step": 5684 }, { "epoch": 135.3582089552239, "grad_norm": 17.33830451965332, "learning_rate": 9.622448979591837e-06, "loss": 39.4982, "step": 5685 }, { "epoch": 135.3820895522388, "grad_norm": 22.377470016479492, "learning_rate": 9.62074829931973e-06, "loss": 39.4303, "step": 5686 }, { "epoch": 135.40597014925373, "grad_norm": 22.994571685791016, "learning_rate": 9.61904761904762e-06, "loss": 38.643, "step": 5687 }, { "epoch": 135.42985074626867, "grad_norm": 17.39454460144043, "learning_rate": 9.617346938775512e-06, "loss": 41.7366, "step": 5688 }, { "epoch": 135.45373134328358, "grad_norm": 24.441268920898438, "learning_rate": 9.6156462585034e-06, "loss": 41.1602, "step": 5689 }, { "epoch": 135.47761194029852, "grad_norm": 16.182247161865234, "learning_rate": 9.613945578231293e-06, "loss": 40.4013, "step": 5690 }, { "epoch": 135.50149253731342, "grad_norm": 23.803049087524414, "learning_rate": 9.612244897959185e-06, "loss": 41.3252, "step": 5691 }, { "epoch": 135.52537313432836, "grad_norm": 20.565837860107422, "learning_rate": 9.610544217687075e-06, "loss": 40.3434, "step": 5692 }, { "epoch": 135.54925373134327, "grad_norm": 26.256967544555664, "learning_rate": 9.608843537414967e-06, "loss": 40.2281, "step": 5693 }, { "epoch": 135.5731343283582, "grad_norm": 18.350553512573242, "learning_rate": 9.607142857142858e-06, "loss": 39.1361, "step": 5694 }, { "epoch": 135.59701492537314, "grad_norm": 25.684616088867188, "learning_rate": 9.60544217687075e-06, "loss": 39.7602, "step": 5695 }, { "epoch": 135.62089552238805, "grad_norm": 22.026763916015625, "learning_rate": 9.60374149659864e-06, "loss": 40.2298, "step": 5696 }, { "epoch": 135.644776119403, "grad_norm": 15.483604431152344, "learning_rate": 9.60204081632653e-06, "loss": 39.8388, "step": 5697 }, { "epoch": 135.6686567164179, "grad_norm": 21.13356590270996, "learning_rate": 9.600340136054423e-06, "loss": 39.5239, "step": 5698 }, { "epoch": 135.69253731343284, "grad_norm": 17.695802688598633, "learning_rate": 9.598639455782313e-06, "loss": 40.336, "step": 5699 }, { "epoch": 135.71641791044777, "grad_norm": 16.947023391723633, "learning_rate": 9.596938775510205e-06, "loss": 39.7942, "step": 5700 }, { "epoch": 135.74029850746268, "grad_norm": 18.580827713012695, "learning_rate": 9.595238095238096e-06, "loss": 41.1081, "step": 5701 }, { "epoch": 135.76417910447762, "grad_norm": 19.310028076171875, "learning_rate": 9.593537414965988e-06, "loss": 38.773, "step": 5702 }, { "epoch": 135.78805970149253, "grad_norm": 17.71697235107422, "learning_rate": 9.591836734693878e-06, "loss": 41.1084, "step": 5703 }, { "epoch": 135.81194029850747, "grad_norm": 19.53215217590332, "learning_rate": 9.590136054421769e-06, "loss": 40.7152, "step": 5704 }, { "epoch": 135.83582089552237, "grad_norm": 26.050701141357422, "learning_rate": 9.58843537414966e-06, "loss": 41.2326, "step": 5705 }, { "epoch": 135.8597014925373, "grad_norm": 21.59418296813965, "learning_rate": 9.586734693877551e-06, "loss": 39.7008, "step": 5706 }, { "epoch": 135.88358208955225, "grad_norm": 17.44019889831543, "learning_rate": 9.585034013605443e-06, "loss": 40.0913, "step": 5707 }, { "epoch": 135.90746268656716, "grad_norm": 22.628219604492188, "learning_rate": 9.583333333333335e-06, "loss": 41.0258, "step": 5708 }, { "epoch": 135.9313432835821, "grad_norm": 18.4293155670166, "learning_rate": 9.581632653061226e-06, "loss": 39.4461, "step": 5709 }, { "epoch": 135.955223880597, "grad_norm": 17.186227798461914, "learning_rate": 9.579931972789116e-06, "loss": 38.9269, "step": 5710 }, { "epoch": 135.97910447761194, "grad_norm": 20.301193237304688, "learning_rate": 9.578231292517007e-06, "loss": 39.9266, "step": 5711 }, { "epoch": 136.0, "grad_norm": 18.15862464904785, "learning_rate": 9.576530612244899e-06, "loss": 35.705, "step": 5712 }, { "epoch": 136.02388059701494, "grad_norm": 17.27276611328125, "learning_rate": 9.57482993197279e-06, "loss": 39.9615, "step": 5713 }, { "epoch": 136.04776119402985, "grad_norm": 16.116933822631836, "learning_rate": 9.573129251700681e-06, "loss": 40.3388, "step": 5714 }, { "epoch": 136.07164179104478, "grad_norm": 14.106700897216797, "learning_rate": 9.571428571428573e-06, "loss": 39.8286, "step": 5715 }, { "epoch": 136.0955223880597, "grad_norm": 18.357019424438477, "learning_rate": 9.569727891156464e-06, "loss": 40.6918, "step": 5716 }, { "epoch": 136.11940298507463, "grad_norm": 16.41695213317871, "learning_rate": 9.568027210884354e-06, "loss": 40.1238, "step": 5717 }, { "epoch": 136.14328358208957, "grad_norm": 15.24857234954834, "learning_rate": 9.566326530612246e-06, "loss": 39.5314, "step": 5718 }, { "epoch": 136.16716417910447, "grad_norm": 21.097612380981445, "learning_rate": 9.564625850340137e-06, "loss": 39.4418, "step": 5719 }, { "epoch": 136.1910447761194, "grad_norm": 15.658564567565918, "learning_rate": 9.562925170068029e-06, "loss": 40.4354, "step": 5720 }, { "epoch": 136.21492537313432, "grad_norm": 18.364137649536133, "learning_rate": 9.561224489795919e-06, "loss": 39.4063, "step": 5721 }, { "epoch": 136.23880597014926, "grad_norm": 16.437915802001953, "learning_rate": 9.559523809523811e-06, "loss": 39.2412, "step": 5722 }, { "epoch": 136.26268656716417, "grad_norm": 18.161527633666992, "learning_rate": 9.557823129251701e-06, "loss": 40.1167, "step": 5723 }, { "epoch": 136.2865671641791, "grad_norm": 19.824352264404297, "learning_rate": 9.556122448979592e-06, "loss": 39.5653, "step": 5724 }, { "epoch": 136.31044776119404, "grad_norm": 16.736989974975586, "learning_rate": 9.554421768707484e-06, "loss": 39.4445, "step": 5725 }, { "epoch": 136.33432835820895, "grad_norm": NaN, "learning_rate": 9.552721088435374e-06, "loss": 40.8717, "step": 5726 }, { "epoch": 136.3582089552239, "grad_norm": 16.963516235351562, "learning_rate": 9.552721088435374e-06, "loss": 40.3213, "step": 5727 }, { "epoch": 136.3820895522388, "grad_norm": 18.735271453857422, "learning_rate": 9.551020408163266e-06, "loss": 40.8078, "step": 5728 }, { "epoch": 136.40597014925373, "grad_norm": 19.308032989501953, "learning_rate": 9.549319727891157e-06, "loss": 39.9691, "step": 5729 }, { "epoch": 136.42985074626867, "grad_norm": 14.293987274169922, "learning_rate": 9.547619047619049e-06, "loss": 39.0772, "step": 5730 }, { "epoch": 136.45373134328358, "grad_norm": 21.123519897460938, "learning_rate": 9.54591836734694e-06, "loss": 40.995, "step": 5731 }, { "epoch": 136.47761194029852, "grad_norm": 16.979511260986328, "learning_rate": 9.54421768707483e-06, "loss": 41.6872, "step": 5732 }, { "epoch": 136.50149253731342, "grad_norm": 20.042757034301758, "learning_rate": 9.542517006802722e-06, "loss": 40.1547, "step": 5733 }, { "epoch": 136.52537313432836, "grad_norm": 19.689138412475586, "learning_rate": 9.540816326530612e-06, "loss": 40.4422, "step": 5734 }, { "epoch": 136.54925373134327, "grad_norm": 19.830251693725586, "learning_rate": 9.539115646258504e-06, "loss": 38.6685, "step": 5735 }, { "epoch": 136.5731343283582, "grad_norm": 19.68994903564453, "learning_rate": 9.537414965986396e-06, "loss": 40.3769, "step": 5736 }, { "epoch": 136.59701492537314, "grad_norm": 19.520610809326172, "learning_rate": 9.535714285714287e-06, "loss": 39.8802, "step": 5737 }, { "epoch": 136.62089552238805, "grad_norm": 20.209075927734375, "learning_rate": 9.534013605442177e-06, "loss": 40.5337, "step": 5738 }, { "epoch": 136.644776119403, "grad_norm": 18.009183883666992, "learning_rate": 9.53231292517007e-06, "loss": 40.5237, "step": 5739 }, { "epoch": 136.6686567164179, "grad_norm": 17.618444442749023, "learning_rate": 9.53061224489796e-06, "loss": 39.4263, "step": 5740 }, { "epoch": 136.69253731343284, "grad_norm": 17.066255569458008, "learning_rate": 9.528911564625852e-06, "loss": 39.0451, "step": 5741 }, { "epoch": 136.71641791044777, "grad_norm": 16.11752700805664, "learning_rate": 9.527210884353742e-06, "loss": 40.5889, "step": 5742 }, { "epoch": 136.74029850746268, "grad_norm": 24.23548126220703, "learning_rate": 9.525510204081634e-06, "loss": 40.1915, "step": 5743 }, { "epoch": 136.76417910447762, "grad_norm": 17.77320671081543, "learning_rate": 9.523809523809525e-06, "loss": 40.4259, "step": 5744 }, { "epoch": 136.78805970149253, "grad_norm": 18.783700942993164, "learning_rate": 9.522108843537415e-06, "loss": 40.112, "step": 5745 }, { "epoch": 136.81194029850747, "grad_norm": 19.52975845336914, "learning_rate": 9.520408163265307e-06, "loss": 40.2694, "step": 5746 }, { "epoch": 136.83582089552237, "grad_norm": 22.467615127563477, "learning_rate": 9.518707482993198e-06, "loss": 41.4122, "step": 5747 }, { "epoch": 136.8597014925373, "grad_norm": 16.6851806640625, "learning_rate": 9.51700680272109e-06, "loss": 40.2696, "step": 5748 }, { "epoch": 136.88358208955225, "grad_norm": 19.272367477416992, "learning_rate": 9.51530612244898e-06, "loss": 40.4809, "step": 5749 }, { "epoch": 136.90746268656716, "grad_norm": 25.64748764038086, "learning_rate": 9.513605442176872e-06, "loss": 40.3818, "step": 5750 }, { "epoch": 136.9313432835821, "grad_norm": 17.339828491210938, "learning_rate": 9.511904761904763e-06, "loss": 40.3506, "step": 5751 }, { "epoch": 136.955223880597, "grad_norm": 23.17976188659668, "learning_rate": 9.510204081632653e-06, "loss": 39.2061, "step": 5752 }, { "epoch": 136.97910447761194, "grad_norm": 23.762033462524414, "learning_rate": 9.508503401360545e-06, "loss": 40.4341, "step": 5753 }, { "epoch": 137.0, "grad_norm": 14.065231323242188, "learning_rate": 9.506802721088436e-06, "loss": 35.9299, "step": 5754 }, { "epoch": 137.02388059701494, "grad_norm": 22.709367752075195, "learning_rate": 9.505102040816328e-06, "loss": 39.375, "step": 5755 }, { "epoch": 137.04776119402985, "grad_norm": 20.099899291992188, "learning_rate": 9.503401360544218e-06, "loss": 39.1875, "step": 5756 }, { "epoch": 137.07164179104478, "grad_norm": 16.251981735229492, "learning_rate": 9.50170068027211e-06, "loss": 39.7174, "step": 5757 }, { "epoch": 137.0955223880597, "grad_norm": 17.096813201904297, "learning_rate": 9.5e-06, "loss": 39.1641, "step": 5758 }, { "epoch": 137.11940298507463, "grad_norm": 21.969449996948242, "learning_rate": 9.498299319727891e-06, "loss": 39.6796, "step": 5759 }, { "epoch": 137.14328358208957, "grad_norm": 13.48315715789795, "learning_rate": 9.496598639455783e-06, "loss": 40.1312, "step": 5760 }, { "epoch": 137.16716417910447, "grad_norm": 15.142317771911621, "learning_rate": 9.494897959183675e-06, "loss": 39.0918, "step": 5761 }, { "epoch": 137.1910447761194, "grad_norm": 15.083260536193848, "learning_rate": 9.493197278911566e-06, "loss": 40.3378, "step": 5762 }, { "epoch": 137.21492537313432, "grad_norm": 16.5947208404541, "learning_rate": 9.491496598639458e-06, "loss": 40.0254, "step": 5763 }, { "epoch": 137.23880597014926, "grad_norm": 17.31525421142578, "learning_rate": 9.489795918367348e-06, "loss": 39.7925, "step": 5764 }, { "epoch": 137.26268656716417, "grad_norm": 13.33224105834961, "learning_rate": 9.488095238095238e-06, "loss": 39.1608, "step": 5765 }, { "epoch": 137.2865671641791, "grad_norm": 18.62505340576172, "learning_rate": 9.48639455782313e-06, "loss": 39.6955, "step": 5766 }, { "epoch": 137.31044776119404, "grad_norm": 20.526426315307617, "learning_rate": 9.484693877551021e-06, "loss": 40.8692, "step": 5767 }, { "epoch": 137.33432835820895, "grad_norm": 17.54509162902832, "learning_rate": 9.482993197278913e-06, "loss": 38.9902, "step": 5768 }, { "epoch": 137.3582089552239, "grad_norm": 13.5675048828125, "learning_rate": 9.481292517006803e-06, "loss": 40.2917, "step": 5769 }, { "epoch": 137.3820895522388, "grad_norm": 17.16435432434082, "learning_rate": 9.479591836734695e-06, "loss": 39.8777, "step": 5770 }, { "epoch": 137.40597014925373, "grad_norm": NaN, "learning_rate": 9.477891156462586e-06, "loss": 40.3914, "step": 5771 }, { "epoch": 137.42985074626867, "grad_norm": 18.361515045166016, "learning_rate": 9.477891156462586e-06, "loss": 42.1308, "step": 5772 }, { "epoch": 137.45373134328358, "grad_norm": 15.623734474182129, "learning_rate": 9.476190476190476e-06, "loss": 41.3761, "step": 5773 }, { "epoch": 137.47761194029852, "grad_norm": 16.020898818969727, "learning_rate": 9.474489795918368e-06, "loss": 40.9852, "step": 5774 }, { "epoch": 137.50149253731342, "grad_norm": 24.223079681396484, "learning_rate": 9.472789115646259e-06, "loss": 40.3601, "step": 5775 }, { "epoch": 137.52537313432836, "grad_norm": 16.226585388183594, "learning_rate": 9.471088435374151e-06, "loss": 39.25, "step": 5776 }, { "epoch": 137.54925373134327, "grad_norm": 14.546438217163086, "learning_rate": 9.469387755102041e-06, "loss": 41.5317, "step": 5777 }, { "epoch": 137.5731343283582, "grad_norm": 25.475976943969727, "learning_rate": 9.467687074829933e-06, "loss": 40.7448, "step": 5778 }, { "epoch": 137.59701492537314, "grad_norm": 21.050052642822266, "learning_rate": 9.465986394557824e-06, "loss": 39.494, "step": 5779 }, { "epoch": 137.62089552238805, "grad_norm": 14.88813591003418, "learning_rate": 9.464285714285714e-06, "loss": 40.073, "step": 5780 }, { "epoch": 137.644776119403, "grad_norm": 21.426273345947266, "learning_rate": 9.462585034013606e-06, "loss": 39.7685, "step": 5781 }, { "epoch": 137.6686567164179, "grad_norm": 21.00870704650879, "learning_rate": 9.460884353741497e-06, "loss": 39.4509, "step": 5782 }, { "epoch": 137.69253731343284, "grad_norm": 14.929703712463379, "learning_rate": 9.459183673469389e-06, "loss": 41.4336, "step": 5783 }, { "epoch": 137.71641791044777, "grad_norm": 13.802526473999023, "learning_rate": 9.457482993197281e-06, "loss": 40.6038, "step": 5784 }, { "epoch": 137.74029850746268, "grad_norm": 25.661685943603516, "learning_rate": 9.455782312925171e-06, "loss": 39.2058, "step": 5785 }, { "epoch": 137.76417910447762, "grad_norm": 17.845937728881836, "learning_rate": 9.454081632653062e-06, "loss": 41.1643, "step": 5786 }, { "epoch": 137.78805970149253, "grad_norm": 25.97015953063965, "learning_rate": 9.452380952380952e-06, "loss": 39.7219, "step": 5787 }, { "epoch": 137.81194029850747, "grad_norm": 17.875333786010742, "learning_rate": 9.450680272108844e-06, "loss": 39.7798, "step": 5788 }, { "epoch": 137.83582089552237, "grad_norm": 18.28219223022461, "learning_rate": 9.448979591836736e-06, "loss": 39.3965, "step": 5789 }, { "epoch": 137.8597014925373, "grad_norm": 19.815677642822266, "learning_rate": 9.447278911564627e-06, "loss": 40.5489, "step": 5790 }, { "epoch": 137.88358208955225, "grad_norm": 20.447330474853516, "learning_rate": 9.445578231292519e-06, "loss": 39.633, "step": 5791 }, { "epoch": 137.90746268656716, "grad_norm": 16.50349998474121, "learning_rate": 9.44387755102041e-06, "loss": 39.9416, "step": 5792 }, { "epoch": 137.9313432835821, "grad_norm": 22.41202735900879, "learning_rate": 9.4421768707483e-06, "loss": 41.0672, "step": 5793 }, { "epoch": 137.955223880597, "grad_norm": 17.746328353881836, "learning_rate": 9.440476190476192e-06, "loss": 39.8027, "step": 5794 }, { "epoch": 137.97910447761194, "grad_norm": 18.95381736755371, "learning_rate": 9.438775510204082e-06, "loss": 40.2487, "step": 5795 }, { "epoch": 138.0, "grad_norm": 14.501996994018555, "learning_rate": 9.437074829931974e-06, "loss": 35.2047, "step": 5796 }, { "epoch": 138.02388059701494, "grad_norm": 23.209070205688477, "learning_rate": 9.435374149659865e-06, "loss": 40.6912, "step": 5797 }, { "epoch": 138.04776119402985, "grad_norm": 19.782623291015625, "learning_rate": 9.433673469387757e-06, "loss": 40.41, "step": 5798 }, { "epoch": 138.07164179104478, "grad_norm": 18.50634002685547, "learning_rate": 9.431972789115647e-06, "loss": 40.1066, "step": 5799 }, { "epoch": 138.0955223880597, "grad_norm": 19.37914276123047, "learning_rate": 9.430272108843537e-06, "loss": 39.8479, "step": 5800 }, { "epoch": 138.11940298507463, "grad_norm": NaN, "learning_rate": 9.42857142857143e-06, "loss": 69.6561, "step": 5801 }, { "epoch": 138.14328358208957, "grad_norm": 20.234079360961914, "learning_rate": 9.42857142857143e-06, "loss": 40.7243, "step": 5802 }, { "epoch": 138.16716417910447, "grad_norm": 19.689483642578125, "learning_rate": 9.42687074829932e-06, "loss": 40.2885, "step": 5803 }, { "epoch": 138.1910447761194, "grad_norm": 15.311651229858398, "learning_rate": 9.425170068027212e-06, "loss": 40.0564, "step": 5804 }, { "epoch": 138.21492537313432, "grad_norm": 22.144147872924805, "learning_rate": 9.423469387755102e-06, "loss": 40.0064, "step": 5805 }, { "epoch": 138.23880597014926, "grad_norm": 19.106332778930664, "learning_rate": 9.421768707482995e-06, "loss": 38.9603, "step": 5806 }, { "epoch": 138.26268656716417, "grad_norm": 17.888164520263672, "learning_rate": 9.420068027210885e-06, "loss": 38.8051, "step": 5807 }, { "epoch": 138.2865671641791, "grad_norm": NaN, "learning_rate": 9.418367346938775e-06, "loss": 34.5894, "step": 5808 }, { "epoch": 138.31044776119404, "grad_norm": 16.295089721679688, "learning_rate": 9.418367346938775e-06, "loss": 39.8359, "step": 5809 }, { "epoch": 138.33432835820895, "grad_norm": 17.898618698120117, "learning_rate": 9.416666666666667e-06, "loss": 40.9419, "step": 5810 }, { "epoch": 138.3582089552239, "grad_norm": 15.549861907958984, "learning_rate": 9.414965986394558e-06, "loss": 38.8705, "step": 5811 }, { "epoch": 138.3820895522388, "grad_norm": 21.414033889770508, "learning_rate": 9.41326530612245e-06, "loss": 40.366, "step": 5812 }, { "epoch": 138.40597014925373, "grad_norm": 18.34477996826172, "learning_rate": 9.411564625850342e-06, "loss": 41.358, "step": 5813 }, { "epoch": 138.42985074626867, "grad_norm": 18.43037223815918, "learning_rate": 9.409863945578232e-06, "loss": 41.0623, "step": 5814 }, { "epoch": 138.45373134328358, "grad_norm": 22.278278350830078, "learning_rate": 9.408163265306123e-06, "loss": 39.3361, "step": 5815 }, { "epoch": 138.47761194029852, "grad_norm": NaN, "learning_rate": 9.406462585034015e-06, "loss": 39.6796, "step": 5816 }, { "epoch": 138.50149253731342, "grad_norm": 18.300764083862305, "learning_rate": 9.406462585034015e-06, "loss": 40.625, "step": 5817 }, { "epoch": 138.52537313432836, "grad_norm": 16.382335662841797, "learning_rate": 9.404761904761905e-06, "loss": 40.4843, "step": 5818 }, { "epoch": 138.54925373134327, "grad_norm": 20.629667282104492, "learning_rate": 9.403061224489797e-06, "loss": 39.1476, "step": 5819 }, { "epoch": 138.5731343283582, "grad_norm": 25.73557472229004, "learning_rate": 9.401360544217688e-06, "loss": 41.4348, "step": 5820 }, { "epoch": 138.59701492537314, "grad_norm": 15.648715019226074, "learning_rate": 9.39965986394558e-06, "loss": 40.4888, "step": 5821 }, { "epoch": 138.62089552238805, "grad_norm": 16.803377151489258, "learning_rate": 9.39795918367347e-06, "loss": 40.5578, "step": 5822 }, { "epoch": 138.644776119403, "grad_norm": 29.228322982788086, "learning_rate": 9.39625850340136e-06, "loss": 40.6632, "step": 5823 }, { "epoch": 138.6686567164179, "grad_norm": 15.427154541015625, "learning_rate": 9.394557823129253e-06, "loss": 39.1214, "step": 5824 }, { "epoch": 138.69253731343284, "grad_norm": 28.359830856323242, "learning_rate": 9.392857142857143e-06, "loss": 40.3437, "step": 5825 }, { "epoch": 138.71641791044777, "grad_norm": 18.356201171875, "learning_rate": 9.391156462585035e-06, "loss": 40.6458, "step": 5826 }, { "epoch": 138.74029850746268, "grad_norm": 19.94028091430664, "learning_rate": 9.389455782312926e-06, "loss": 39.7405, "step": 5827 }, { "epoch": 138.76417910447762, "grad_norm": 27.602651596069336, "learning_rate": 9.387755102040818e-06, "loss": 40.1602, "step": 5828 }, { "epoch": 138.78805970149253, "grad_norm": 16.400421142578125, "learning_rate": 9.386054421768708e-06, "loss": 38.7246, "step": 5829 }, { "epoch": 138.81194029850747, "grad_norm": 26.579187393188477, "learning_rate": 9.384353741496599e-06, "loss": 38.5539, "step": 5830 }, { "epoch": 138.83582089552237, "grad_norm": 21.284912109375, "learning_rate": 9.38265306122449e-06, "loss": 40.3989, "step": 5831 }, { "epoch": 138.8597014925373, "grad_norm": 23.772157669067383, "learning_rate": 9.380952380952381e-06, "loss": 39.4185, "step": 5832 }, { "epoch": 138.88358208955225, "grad_norm": 29.053791046142578, "learning_rate": 9.379251700680273e-06, "loss": 41.1253, "step": 5833 }, { "epoch": 138.90746268656716, "grad_norm": 16.733402252197266, "learning_rate": 9.377551020408164e-06, "loss": 40.1286, "step": 5834 }, { "epoch": 138.9313432835821, "grad_norm": 36.72946548461914, "learning_rate": 9.375850340136056e-06, "loss": 38.9963, "step": 5835 }, { "epoch": 138.955223880597, "grad_norm": 26.51390838623047, "learning_rate": 9.374149659863946e-06, "loss": 40.031, "step": 5836 }, { "epoch": 138.97910447761194, "grad_norm": 31.663070678710938, "learning_rate": 9.372448979591836e-06, "loss": 40.099, "step": 5837 }, { "epoch": 139.0, "grad_norm": 21.624252319335938, "learning_rate": 9.370748299319729e-06, "loss": 35.3932, "step": 5838 }, { "epoch": 139.02388059701494, "grad_norm": 33.74135971069336, "learning_rate": 9.36904761904762e-06, "loss": 40.1791, "step": 5839 }, { "epoch": 139.04776119402985, "grad_norm": 21.488868713378906, "learning_rate": 9.367346938775511e-06, "loss": 38.745, "step": 5840 }, { "epoch": 139.07164179104478, "grad_norm": 35.68408203125, "learning_rate": 9.365646258503403e-06, "loss": 40.4261, "step": 5841 }, { "epoch": 139.0955223880597, "grad_norm": 27.531938552856445, "learning_rate": 9.363945578231294e-06, "loss": 39.8502, "step": 5842 }, { "epoch": 139.11940298507463, "grad_norm": 41.84492874145508, "learning_rate": 9.362244897959184e-06, "loss": 40.0804, "step": 5843 }, { "epoch": 139.14328358208957, "grad_norm": 34.03583526611328, "learning_rate": 9.360544217687076e-06, "loss": 40.4201, "step": 5844 }, { "epoch": 139.16716417910447, "grad_norm": 27.924837112426758, "learning_rate": 9.358843537414966e-06, "loss": 40.006, "step": 5845 }, { "epoch": 139.1910447761194, "grad_norm": 29.39901351928711, "learning_rate": 9.357142857142859e-06, "loss": 39.8178, "step": 5846 }, { "epoch": 139.21492537313432, "grad_norm": 31.170534133911133, "learning_rate": 9.355442176870749e-06, "loss": 39.8539, "step": 5847 }, { "epoch": 139.23880597014926, "grad_norm": 24.115842819213867, "learning_rate": 9.353741496598641e-06, "loss": 39.5716, "step": 5848 }, { "epoch": 139.26268656716417, "grad_norm": 30.86311149597168, "learning_rate": 9.352040816326531e-06, "loss": 39.3648, "step": 5849 }, { "epoch": 139.2865671641791, "grad_norm": 23.781522750854492, "learning_rate": 9.350340136054422e-06, "loss": 39.8786, "step": 5850 }, { "epoch": 139.31044776119404, "grad_norm": 38.31922149658203, "learning_rate": 9.348639455782314e-06, "loss": 39.9693, "step": 5851 }, { "epoch": 139.33432835820895, "grad_norm": 26.093704223632812, "learning_rate": 9.346938775510204e-06, "loss": 41.1107, "step": 5852 }, { "epoch": 139.3582089552239, "grad_norm": 35.27671813964844, "learning_rate": 9.345238095238096e-06, "loss": 39.3532, "step": 5853 }, { "epoch": 139.3820895522388, "grad_norm": 28.302349090576172, "learning_rate": 9.343537414965987e-06, "loss": 40.0611, "step": 5854 }, { "epoch": 139.40597014925373, "grad_norm": 34.33140182495117, "learning_rate": 9.341836734693879e-06, "loss": 39.3752, "step": 5855 }, { "epoch": 139.42985074626867, "grad_norm": 30.547216415405273, "learning_rate": 9.34013605442177e-06, "loss": 41.2377, "step": 5856 }, { "epoch": 139.45373134328358, "grad_norm": 30.49541664123535, "learning_rate": 9.33843537414966e-06, "loss": 39.2959, "step": 5857 }, { "epoch": 139.47761194029852, "grad_norm": 25.64105796813965, "learning_rate": 9.336734693877552e-06, "loss": 41.0909, "step": 5858 }, { "epoch": 139.50149253731342, "grad_norm": NaN, "learning_rate": 9.335034013605442e-06, "loss": 60.6154, "step": 5859 }, { "epoch": 139.52537313432836, "grad_norm": 35.060829162597656, "learning_rate": 9.335034013605442e-06, "loss": 39.9802, "step": 5860 }, { "epoch": 139.54925373134327, "grad_norm": 27.54969024658203, "learning_rate": 9.333333333333334e-06, "loss": 39.3133, "step": 5861 }, { "epoch": 139.5731343283582, "grad_norm": 38.9669075012207, "learning_rate": 9.331632653061225e-06, "loss": 41.312, "step": 5862 }, { "epoch": 139.59701492537314, "grad_norm": 34.280067443847656, "learning_rate": 9.329931972789117e-06, "loss": 39.4449, "step": 5863 }, { "epoch": 139.62089552238805, "grad_norm": 29.73419189453125, "learning_rate": 9.328231292517007e-06, "loss": 40.2213, "step": 5864 }, { "epoch": 139.644776119403, "grad_norm": 24.6690673828125, "learning_rate": 9.326530612244898e-06, "loss": 40.6231, "step": 5865 }, { "epoch": 139.6686567164179, "grad_norm": 35.420101165771484, "learning_rate": 9.32482993197279e-06, "loss": 39.6659, "step": 5866 }, { "epoch": 139.69253731343284, "grad_norm": 32.64005661010742, "learning_rate": 9.323129251700682e-06, "loss": 39.3794, "step": 5867 }, { "epoch": 139.71641791044777, "grad_norm": 30.59541893005371, "learning_rate": 9.321428571428572e-06, "loss": 40.0863, "step": 5868 }, { "epoch": 139.74029850746268, "grad_norm": 28.199207305908203, "learning_rate": 9.319727891156464e-06, "loss": 39.7205, "step": 5869 }, { "epoch": 139.76417910447762, "grad_norm": 28.04796600341797, "learning_rate": 9.318027210884355e-06, "loss": 39.0113, "step": 5870 }, { "epoch": 139.78805970149253, "grad_norm": 24.012332916259766, "learning_rate": 9.316326530612245e-06, "loss": 40.5084, "step": 5871 }, { "epoch": 139.81194029850747, "grad_norm": 34.23363494873047, "learning_rate": 9.314625850340137e-06, "loss": 39.5083, "step": 5872 }, { "epoch": 139.83582089552237, "grad_norm": 29.06350326538086, "learning_rate": 9.312925170068028e-06, "loss": 39.3723, "step": 5873 }, { "epoch": 139.8597014925373, "grad_norm": 28.527681350708008, "learning_rate": 9.31122448979592e-06, "loss": 40.6419, "step": 5874 }, { "epoch": 139.88358208955225, "grad_norm": 29.12566375732422, "learning_rate": 9.30952380952381e-06, "loss": 40.6302, "step": 5875 }, { "epoch": 139.90746268656716, "grad_norm": 31.133377075195312, "learning_rate": 9.307823129251702e-06, "loss": 40.5931, "step": 5876 }, { "epoch": 139.9313432835821, "grad_norm": 24.83881378173828, "learning_rate": 9.306122448979593e-06, "loss": 39.411, "step": 5877 }, { "epoch": 139.955223880597, "grad_norm": 31.597652435302734, "learning_rate": 9.304421768707483e-06, "loss": 39.0108, "step": 5878 }, { "epoch": 139.97910447761194, "grad_norm": 29.325529098510742, "learning_rate": 9.302721088435375e-06, "loss": 39.4128, "step": 5879 }, { "epoch": 140.0, "grad_norm": 25.26357078552246, "learning_rate": 9.301020408163265e-06, "loss": 35.0399, "step": 5880 }, { "epoch": 140.0, "step": 5880, "total_flos": 2.890707963725509e+17, "train_loss": 2.877911633861308, "train_runtime": 12847.333, "train_samples_per_second": 58.322, "train_steps_per_second": 0.458 }, { "epoch": 140.02388059701494, "grad_norm": 27.523469924926758, "learning_rate": 1e-05, "loss": 39.3141, "step": 5881 }, { "epoch": 140.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998412698412699e-06, "loss": 48.2819, "step": 5882 }, { "epoch": 140.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998412698412699e-06, "loss": 48.6322, "step": 5883 }, { "epoch": 140.0955223880597, "grad_norm": 486.48309326171875, "learning_rate": 9.998412698412699e-06, "loss": 47.6507, "step": 5884 }, { "epoch": 140.11940298507463, "grad_norm": 283.4151306152344, "learning_rate": 9.996825396825399e-06, "loss": 43.9795, "step": 5885 }, { "epoch": 140.14328358208957, "grad_norm": 100.155517578125, "learning_rate": 9.995238095238095e-06, "loss": 42.0815, "step": 5886 }, { "epoch": 140.16716417910447, "grad_norm": 87.53604888916016, "learning_rate": 9.993650793650793e-06, "loss": 41.1747, "step": 5887 }, { "epoch": 140.1910447761194, "grad_norm": 57.2898063659668, "learning_rate": 9.992063492063493e-06, "loss": 40.5261, "step": 5888 }, { "epoch": 140.21492537313432, "grad_norm": 67.39340209960938, "learning_rate": 9.990476190476191e-06, "loss": 41.0557, "step": 5889 }, { "epoch": 140.23880597014926, "grad_norm": 52.16965103149414, "learning_rate": 9.98888888888889e-06, "loss": 42.013, "step": 5890 }, { "epoch": 140.26268656716417, "grad_norm": 59.99985885620117, "learning_rate": 9.987301587301588e-06, "loss": 40.2188, "step": 5891 }, { "epoch": 140.2865671641791, "grad_norm": 44.25408935546875, "learning_rate": 9.985714285714286e-06, "loss": 40.8221, "step": 5892 }, { "epoch": 140.31044776119404, "grad_norm": 55.65086364746094, "learning_rate": 9.984126984126986e-06, "loss": 40.2852, "step": 5893 }, { "epoch": 140.33432835820895, "grad_norm": 36.05537033081055, "learning_rate": 9.982539682539684e-06, "loss": 40.1435, "step": 5894 }, { "epoch": 140.3582089552239, "grad_norm": 48.20842361450195, "learning_rate": 9.980952380952382e-06, "loss": 40.599, "step": 5895 }, { "epoch": 140.3820895522388, "grad_norm": 32.273136138916016, "learning_rate": 9.97936507936508e-06, "loss": 41.1943, "step": 5896 }, { "epoch": 140.40597014925373, "grad_norm": 45.55663299560547, "learning_rate": 9.977777777777778e-06, "loss": 39.6306, "step": 5897 }, { "epoch": 140.42985074626867, "grad_norm": NaN, "learning_rate": 9.976190476190477e-06, "loss": 39.3811, "step": 5898 }, { "epoch": 140.45373134328358, "grad_norm": 45.397613525390625, "learning_rate": 9.976190476190477e-06, "loss": 41.2507, "step": 5899 }, { "epoch": 140.47761194029852, "grad_norm": 29.530902862548828, "learning_rate": 9.974603174603176e-06, "loss": 41.1053, "step": 5900 }, { "epoch": 140.50149253731342, "grad_norm": 27.80730628967285, "learning_rate": 9.973015873015875e-06, "loss": 40.5605, "step": 5901 }, { "epoch": 140.52537313432836, "grad_norm": 30.194534301757812, "learning_rate": 9.971428571428571e-06, "loss": 40.2929, "step": 5902 }, { "epoch": 140.54925373134327, "grad_norm": 25.78581428527832, "learning_rate": 9.969841269841271e-06, "loss": 40.735, "step": 5903 }, { "epoch": 140.5731343283582, "grad_norm": 33.138694763183594, "learning_rate": 9.968253968253969e-06, "loss": 40.1703, "step": 5904 }, { "epoch": 140.59701492537314, "grad_norm": 28.775943756103516, "learning_rate": 9.966666666666667e-06, "loss": 40.9193, "step": 5905 }, { "epoch": 140.62089552238805, "grad_norm": 32.31502914428711, "learning_rate": 9.965079365079365e-06, "loss": 40.8351, "step": 5906 }, { "epoch": 140.644776119403, "grad_norm": 26.532331466674805, "learning_rate": 9.963492063492064e-06, "loss": 40.9574, "step": 5907 }, { "epoch": 140.6686567164179, "grad_norm": 29.57513427734375, "learning_rate": 9.961904761904763e-06, "loss": 40.6701, "step": 5908 }, { "epoch": 140.69253731343284, "grad_norm": 24.19226837158203, "learning_rate": 9.960317460317462e-06, "loss": 40.8407, "step": 5909 }, { "epoch": 140.71641791044777, "grad_norm": 31.81574058532715, "learning_rate": 9.95873015873016e-06, "loss": 39.5199, "step": 5910 }, { "epoch": 140.74029850746268, "grad_norm": 22.69314956665039, "learning_rate": 9.957142857142858e-06, "loss": 39.5871, "step": 5911 }, { "epoch": 140.76417910447762, "grad_norm": 32.679744720458984, "learning_rate": 9.955555555555556e-06, "loss": 40.8722, "step": 5912 }, { "epoch": 140.78805970149253, "grad_norm": 29.58128547668457, "learning_rate": 9.953968253968254e-06, "loss": 40.1016, "step": 5913 }, { "epoch": 140.81194029850747, "grad_norm": 28.29144287109375, "learning_rate": 9.952380952380954e-06, "loss": 40.1457, "step": 5914 }, { "epoch": 140.83582089552237, "grad_norm": 29.59137725830078, "learning_rate": 9.950793650793652e-06, "loss": 39.9583, "step": 5915 }, { "epoch": 140.8597014925373, "grad_norm": 26.7771053314209, "learning_rate": 9.94920634920635e-06, "loss": 39.75, "step": 5916 }, { "epoch": 140.88358208955225, "grad_norm": 22.15979766845703, "learning_rate": 9.947619047619049e-06, "loss": 40.9388, "step": 5917 }, { "epoch": 140.90746268656716, "grad_norm": 27.87674903869629, "learning_rate": 9.946031746031747e-06, "loss": 39.6195, "step": 5918 }, { "epoch": 140.9313432835821, "grad_norm": 20.211345672607422, "learning_rate": 9.944444444444445e-06, "loss": 38.7449, "step": 5919 }, { "epoch": 140.955223880597, "grad_norm": 18.372730255126953, "learning_rate": 9.942857142857145e-06, "loss": 39.9378, "step": 5920 }, { "epoch": 140.97910447761194, "grad_norm": NaN, "learning_rate": 9.941269841269841e-06, "loss": 34.089, "step": 5921 }, { "epoch": 141.0, "grad_norm": 19.061168670654297, "learning_rate": 9.941269841269841e-06, "loss": 34.6002, "step": 5922 }, { "epoch": 141.02388059701494, "grad_norm": 15.835738182067871, "learning_rate": 9.939682539682541e-06, "loss": 39.6239, "step": 5923 }, { "epoch": 141.04776119402985, "grad_norm": 17.03848648071289, "learning_rate": 9.93809523809524e-06, "loss": 39.4062, "step": 5924 }, { "epoch": 141.07164179104478, "grad_norm": 20.666255950927734, "learning_rate": 9.936507936507937e-06, "loss": 39.2874, "step": 5925 }, { "epoch": 141.0955223880597, "grad_norm": 18.830081939697266, "learning_rate": 9.934920634920636e-06, "loss": 38.9847, "step": 5926 }, { "epoch": 141.11940298507463, "grad_norm": 18.74386215209961, "learning_rate": 9.933333333333334e-06, "loss": 39.0351, "step": 5927 }, { "epoch": 141.14328358208957, "grad_norm": 20.883689880371094, "learning_rate": 9.931746031746032e-06, "loss": 39.7748, "step": 5928 }, { "epoch": 141.16716417910447, "grad_norm": 20.63790512084961, "learning_rate": 9.930158730158732e-06, "loss": 40.9277, "step": 5929 }, { "epoch": 141.1910447761194, "grad_norm": 16.963199615478516, "learning_rate": 9.92857142857143e-06, "loss": 40.1546, "step": 5930 }, { "epoch": 141.21492537313432, "grad_norm": 21.610614776611328, "learning_rate": 9.926984126984128e-06, "loss": 40.1432, "step": 5931 }, { "epoch": 141.23880597014926, "grad_norm": 15.654236793518066, "learning_rate": 9.925396825396826e-06, "loss": 40.1835, "step": 5932 }, { "epoch": 141.26268656716417, "grad_norm": 22.736265182495117, "learning_rate": 9.923809523809524e-06, "loss": 40.5883, "step": 5933 }, { "epoch": 141.2865671641791, "grad_norm": 17.99093246459961, "learning_rate": 9.922222222222222e-06, "loss": 41.2193, "step": 5934 }, { "epoch": 141.31044776119404, "grad_norm": 19.520702362060547, "learning_rate": 9.920634920634922e-06, "loss": 40.4416, "step": 5935 }, { "epoch": 141.33432835820895, "grad_norm": 15.596070289611816, "learning_rate": 9.91904761904762e-06, "loss": 40.2213, "step": 5936 }, { "epoch": 141.3582089552239, "grad_norm": 20.863988876342773, "learning_rate": 9.917460317460319e-06, "loss": 40.0126, "step": 5937 }, { "epoch": 141.3820895522388, "grad_norm": NaN, "learning_rate": 9.915873015873017e-06, "loss": 59.6905, "step": 5938 }, { "epoch": 141.40597014925373, "grad_norm": 16.92725372314453, "learning_rate": 9.915873015873017e-06, "loss": 39.2636, "step": 5939 }, { "epoch": 141.42985074626867, "grad_norm": 20.527759552001953, "learning_rate": 9.914285714285715e-06, "loss": 40.1856, "step": 5940 }, { "epoch": 141.45373134328358, "grad_norm": 18.0676212310791, "learning_rate": 9.912698412698413e-06, "loss": 39.8544, "step": 5941 }, { "epoch": 141.47761194029852, "grad_norm": 18.4247989654541, "learning_rate": 9.911111111111113e-06, "loss": 39.4032, "step": 5942 }, { "epoch": 141.50149253731342, "grad_norm": 16.13834571838379, "learning_rate": 9.90952380952381e-06, "loss": 39.2789, "step": 5943 }, { "epoch": 141.52537313432836, "grad_norm": 20.949169158935547, "learning_rate": 9.90793650793651e-06, "loss": 40.7231, "step": 5944 }, { "epoch": 141.54925373134327, "grad_norm": 20.491546630859375, "learning_rate": 9.906349206349207e-06, "loss": 39.8461, "step": 5945 }, { "epoch": 141.5731343283582, "grad_norm": 15.535492897033691, "learning_rate": 9.904761904761906e-06, "loss": 40.4749, "step": 5946 }, { "epoch": 141.59701492537314, "grad_norm": 20.440784454345703, "learning_rate": 9.903174603174604e-06, "loss": 39.4721, "step": 5947 }, { "epoch": 141.62089552238805, "grad_norm": 15.877060890197754, "learning_rate": 9.901587301587302e-06, "loss": 39.6905, "step": 5948 }, { "epoch": 141.644776119403, "grad_norm": 17.53740692138672, "learning_rate": 9.9e-06, "loss": 40.3163, "step": 5949 }, { "epoch": 141.6686567164179, "grad_norm": 15.804143905639648, "learning_rate": 9.8984126984127e-06, "loss": 39.4714, "step": 5950 }, { "epoch": 141.69253731343284, "grad_norm": 16.576641082763672, "learning_rate": 9.896825396825398e-06, "loss": 39.2348, "step": 5951 }, { "epoch": 141.71641791044777, "grad_norm": 17.831815719604492, "learning_rate": 9.895238095238096e-06, "loss": 39.6573, "step": 5952 }, { "epoch": 141.74029850746268, "grad_norm": 18.616697311401367, "learning_rate": 9.893650793650794e-06, "loss": 38.9384, "step": 5953 }, { "epoch": 141.76417910447762, "grad_norm": 19.397676467895508, "learning_rate": 9.892063492063493e-06, "loss": 39.7287, "step": 5954 }, { "epoch": 141.78805970149253, "grad_norm": 16.5959415435791, "learning_rate": 9.89047619047619e-06, "loss": 40.8669, "step": 5955 }, { "epoch": 141.81194029850747, "grad_norm": 16.951446533203125, "learning_rate": 9.88888888888889e-06, "loss": 39.6911, "step": 5956 }, { "epoch": 141.83582089552237, "grad_norm": 30.49028778076172, "learning_rate": 9.887301587301587e-06, "loss": 40.0326, "step": 5957 }, { "epoch": 141.8597014925373, "grad_norm": 17.2624568939209, "learning_rate": 9.885714285714287e-06, "loss": 39.2385, "step": 5958 }, { "epoch": 141.88358208955225, "grad_norm": 31.927696228027344, "learning_rate": 9.884126984126985e-06, "loss": 40.8017, "step": 5959 }, { "epoch": 141.90746268656716, "grad_norm": 21.58512306213379, "learning_rate": 9.882539682539683e-06, "loss": 39.3581, "step": 5960 }, { "epoch": 141.9313432835821, "grad_norm": 31.855587005615234, "learning_rate": 9.880952380952381e-06, "loss": 40.7001, "step": 5961 }, { "epoch": 141.955223880597, "grad_norm": 20.689455032348633, "learning_rate": 9.87936507936508e-06, "loss": 40.1663, "step": 5962 }, { "epoch": 141.97910447761194, "grad_norm": 30.823978424072266, "learning_rate": 9.877777777777778e-06, "loss": 40.2097, "step": 5963 }, { "epoch": 142.0, "grad_norm": 21.405845642089844, "learning_rate": 9.876190476190478e-06, "loss": 34.6216, "step": 5964 }, { "epoch": 142.02388059701494, "grad_norm": 26.935768127441406, "learning_rate": 9.874603174603176e-06, "loss": 40.7151, "step": 5965 }, { "epoch": 142.04776119402985, "grad_norm": 25.882448196411133, "learning_rate": 9.873015873015874e-06, "loss": 39.7308, "step": 5966 }, { "epoch": 142.07164179104478, "grad_norm": 26.444034576416016, "learning_rate": 9.871428571428572e-06, "loss": 39.9286, "step": 5967 }, { "epoch": 142.0955223880597, "grad_norm": 31.098644256591797, "learning_rate": 9.86984126984127e-06, "loss": 39.8352, "step": 5968 }, { "epoch": 142.11940298507463, "grad_norm": 22.535404205322266, "learning_rate": 9.868253968253968e-06, "loss": 39.7821, "step": 5969 }, { "epoch": 142.14328358208957, "grad_norm": 35.592140197753906, "learning_rate": 9.866666666666668e-06, "loss": 39.095, "step": 5970 }, { "epoch": 142.16716417910447, "grad_norm": 31.111549377441406, "learning_rate": 9.865079365079366e-06, "loss": 39.7111, "step": 5971 }, { "epoch": 142.1910447761194, "grad_norm": 29.2420654296875, "learning_rate": 9.863492063492065e-06, "loss": 39.5971, "step": 5972 }, { "epoch": 142.21492537313432, "grad_norm": 24.482894897460938, "learning_rate": 9.861904761904763e-06, "loss": 40.3702, "step": 5973 }, { "epoch": 142.23880597014926, "grad_norm": 33.55892562866211, "learning_rate": 9.86031746031746e-06, "loss": 40.1212, "step": 5974 }, { "epoch": 142.26268656716417, "grad_norm": 26.067710876464844, "learning_rate": 9.858730158730159e-06, "loss": 38.6022, "step": 5975 }, { "epoch": 142.2865671641791, "grad_norm": 37.217899322509766, "learning_rate": 9.857142857142859e-06, "loss": 39.6364, "step": 5976 }, { "epoch": 142.31044776119404, "grad_norm": 28.208200454711914, "learning_rate": 9.855555555555555e-06, "loss": 39.2563, "step": 5977 }, { "epoch": 142.33432835820895, "grad_norm": 34.90814971923828, "learning_rate": 9.853968253968255e-06, "loss": 41.1621, "step": 5978 }, { "epoch": 142.3582089552239, "grad_norm": 30.80927848815918, "learning_rate": 9.852380952380953e-06, "loss": 40.3969, "step": 5979 }, { "epoch": 142.3820895522388, "grad_norm": 27.33124351501465, "learning_rate": 9.850793650793651e-06, "loss": 38.5036, "step": 5980 }, { "epoch": 142.40597014925373, "grad_norm": 26.730077743530273, "learning_rate": 9.849206349206351e-06, "loss": 40.0659, "step": 5981 }, { "epoch": 142.42985074626867, "grad_norm": 28.404932022094727, "learning_rate": 9.847619047619048e-06, "loss": 40.1738, "step": 5982 }, { "epoch": 142.45373134328358, "grad_norm": 21.64544677734375, "learning_rate": 9.846031746031746e-06, "loss": 39.3644, "step": 5983 }, { "epoch": 142.47761194029852, "grad_norm": 31.69153594970703, "learning_rate": 9.844444444444446e-06, "loss": 40.0543, "step": 5984 }, { "epoch": 142.50149253731342, "grad_norm": 24.971776962280273, "learning_rate": 9.842857142857144e-06, "loss": 40.4007, "step": 5985 }, { "epoch": 142.52537313432836, "grad_norm": 32.081085205078125, "learning_rate": 9.841269841269842e-06, "loss": 39.7853, "step": 5986 }, { "epoch": 142.54925373134327, "grad_norm": 25.004484176635742, "learning_rate": 9.83968253968254e-06, "loss": 41.2327, "step": 5987 }, { "epoch": 142.5731343283582, "grad_norm": 28.96761703491211, "learning_rate": 9.838095238095238e-06, "loss": 39.2866, "step": 5988 }, { "epoch": 142.59701492537314, "grad_norm": 24.388214111328125, "learning_rate": 9.836507936507937e-06, "loss": 39.2277, "step": 5989 }, { "epoch": 142.62089552238805, "grad_norm": 30.253482818603516, "learning_rate": 9.834920634920636e-06, "loss": 39.7101, "step": 5990 }, { "epoch": 142.644776119403, "grad_norm": 26.706071853637695, "learning_rate": 9.833333333333333e-06, "loss": 39.947, "step": 5991 }, { "epoch": 142.6686567164179, "grad_norm": 29.053794860839844, "learning_rate": 9.831746031746033e-06, "loss": 40.5273, "step": 5992 }, { "epoch": 142.69253731343284, "grad_norm": 23.27960968017578, "learning_rate": 9.830158730158731e-06, "loss": 39.6124, "step": 5993 }, { "epoch": 142.71641791044777, "grad_norm": 24.988405227661133, "learning_rate": 9.828571428571429e-06, "loss": 38.7563, "step": 5994 }, { "epoch": 142.74029850746268, "grad_norm": 22.26626205444336, "learning_rate": 9.826984126984129e-06, "loss": 39.4067, "step": 5995 }, { "epoch": 142.76417910447762, "grad_norm": 21.31068992614746, "learning_rate": 9.825396825396825e-06, "loss": 40.6265, "step": 5996 }, { "epoch": 142.78805970149253, "grad_norm": 16.70918083190918, "learning_rate": 9.823809523809524e-06, "loss": 41.1972, "step": 5997 }, { "epoch": 142.81194029850747, "grad_norm": 25.48511505126953, "learning_rate": 9.822222222222223e-06, "loss": 40.3997, "step": 5998 }, { "epoch": 142.83582089552237, "grad_norm": 16.19297218322754, "learning_rate": 9.820634920634922e-06, "loss": 39.2985, "step": 5999 }, { "epoch": 142.8597014925373, "grad_norm": 30.387081146240234, "learning_rate": 9.81904761904762e-06, "loss": 39.6372, "step": 6000 }, { "epoch": 142.88358208955225, "grad_norm": 26.224910736083984, "learning_rate": 9.817460317460318e-06, "loss": 40.123, "step": 6001 }, { "epoch": 142.90746268656716, "grad_norm": 26.33165168762207, "learning_rate": 9.815873015873016e-06, "loss": 38.9846, "step": 6002 }, { "epoch": 142.9313432835821, "grad_norm": 27.266569137573242, "learning_rate": 9.814285714285716e-06, "loss": 39.3959, "step": 6003 }, { "epoch": 142.955223880597, "grad_norm": 22.750720977783203, "learning_rate": 9.812698412698414e-06, "loss": 39.962, "step": 6004 }, { "epoch": 142.97910447761194, "grad_norm": 28.63392448425293, "learning_rate": 9.811111111111112e-06, "loss": 38.9944, "step": 6005 }, { "epoch": 143.0, "grad_norm": 18.615793228149414, "learning_rate": 9.80952380952381e-06, "loss": 35.8858, "step": 6006 }, { "epoch": 143.02388059701494, "grad_norm": 27.762741088867188, "learning_rate": 9.807936507936509e-06, "loss": 40.7509, "step": 6007 }, { "epoch": 143.04776119402985, "grad_norm": 20.943986892700195, "learning_rate": 9.806349206349207e-06, "loss": 39.5899, "step": 6008 }, { "epoch": 143.07164179104478, "grad_norm": 26.191465377807617, "learning_rate": 9.804761904761907e-06, "loss": 40.6444, "step": 6009 }, { "epoch": 143.0955223880597, "grad_norm": 19.90812110900879, "learning_rate": 9.803174603174605e-06, "loss": 40.5871, "step": 6010 }, { "epoch": 143.11940298507463, "grad_norm": 18.703001022338867, "learning_rate": 9.801587301587301e-06, "loss": 39.6964, "step": 6011 }, { "epoch": 143.14328358208957, "grad_norm": 24.919872283935547, "learning_rate": 9.800000000000001e-06, "loss": 39.6181, "step": 6012 }, { "epoch": 143.16716417910447, "grad_norm": 17.14714813232422, "learning_rate": 9.7984126984127e-06, "loss": 38.8468, "step": 6013 }, { "epoch": 143.1910447761194, "grad_norm": 17.806344985961914, "learning_rate": 9.796825396825397e-06, "loss": 37.7955, "step": 6014 }, { "epoch": 143.21492537313432, "grad_norm": 20.45462989807129, "learning_rate": 9.795238095238097e-06, "loss": 39.7501, "step": 6015 }, { "epoch": 143.23880597014926, "grad_norm": 15.431315422058105, "learning_rate": 9.793650793650794e-06, "loss": 40.5709, "step": 6016 }, { "epoch": 143.26268656716417, "grad_norm": 17.685319900512695, "learning_rate": 9.792063492063494e-06, "loss": 39.8803, "step": 6017 }, { "epoch": 143.2865671641791, "grad_norm": 17.555204391479492, "learning_rate": 9.790476190476192e-06, "loss": 40.2518, "step": 6018 }, { "epoch": 143.31044776119404, "grad_norm": 21.020915985107422, "learning_rate": 9.78888888888889e-06, "loss": 39.6208, "step": 6019 }, { "epoch": 143.33432835820895, "grad_norm": 18.242265701293945, "learning_rate": 9.787301587301588e-06, "loss": 39.7219, "step": 6020 }, { "epoch": 143.3582089552239, "grad_norm": 15.243906021118164, "learning_rate": 9.785714285714286e-06, "loss": 41.2384, "step": 6021 }, { "epoch": 143.3820895522388, "grad_norm": 17.748985290527344, "learning_rate": 9.784126984126984e-06, "loss": 40.7005, "step": 6022 }, { "epoch": 143.40597014925373, "grad_norm": 16.003299713134766, "learning_rate": 9.782539682539684e-06, "loss": 39.4961, "step": 6023 }, { "epoch": 143.42985074626867, "grad_norm": NaN, "learning_rate": 9.780952380952382e-06, "loss": 49.4703, "step": 6024 }, { "epoch": 143.45373134328358, "grad_norm": 18.312435150146484, "learning_rate": 9.780952380952382e-06, "loss": 38.8234, "step": 6025 }, { "epoch": 143.47761194029852, "grad_norm": 16.544918060302734, "learning_rate": 9.779365079365079e-06, "loss": 40.889, "step": 6026 }, { "epoch": 143.50149253731342, "grad_norm": 22.112247467041016, "learning_rate": 9.777777777777779e-06, "loss": 40.1041, "step": 6027 }, { "epoch": 143.52537313432836, "grad_norm": 17.069169998168945, "learning_rate": 9.776190476190477e-06, "loss": 39.9842, "step": 6028 }, { "epoch": 143.54925373134327, "grad_norm": 20.36438751220703, "learning_rate": 9.774603174603175e-06, "loss": 39.4701, "step": 6029 }, { "epoch": 143.5731343283582, "grad_norm": 17.346471786499023, "learning_rate": 9.773015873015875e-06, "loss": 39.0406, "step": 6030 }, { "epoch": 143.59701492537314, "grad_norm": 24.81892967224121, "learning_rate": 9.771428571428571e-06, "loss": 39.2244, "step": 6031 }, { "epoch": 143.62089552238805, "grad_norm": 16.587474822998047, "learning_rate": 9.769841269841271e-06, "loss": 40.7776, "step": 6032 }, { "epoch": 143.644776119403, "grad_norm": 26.76886749267578, "learning_rate": 9.76825396825397e-06, "loss": 39.578, "step": 6033 }, { "epoch": 143.6686567164179, "grad_norm": 23.013551712036133, "learning_rate": 9.766666666666667e-06, "loss": 40.1283, "step": 6034 }, { "epoch": 143.69253731343284, "grad_norm": 21.895034790039062, "learning_rate": 9.765079365079366e-06, "loss": 40.0862, "step": 6035 }, { "epoch": 143.71641791044777, "grad_norm": 20.68297004699707, "learning_rate": 9.763492063492064e-06, "loss": 40.8754, "step": 6036 }, { "epoch": 143.74029850746268, "grad_norm": 25.544919967651367, "learning_rate": 9.761904761904762e-06, "loss": 40.3303, "step": 6037 }, { "epoch": 143.76417910447762, "grad_norm": 18.004247665405273, "learning_rate": 9.760317460317462e-06, "loss": 40.8136, "step": 6038 }, { "epoch": 143.78805970149253, "grad_norm": 34.06446838378906, "learning_rate": 9.75873015873016e-06, "loss": 39.3411, "step": 6039 }, { "epoch": 143.81194029850747, "grad_norm": 26.361446380615234, "learning_rate": 9.757142857142858e-06, "loss": 39.6386, "step": 6040 }, { "epoch": 143.83582089552237, "grad_norm": 34.16010284423828, "learning_rate": 9.755555555555556e-06, "loss": 38.8731, "step": 6041 }, { "epoch": 143.8597014925373, "grad_norm": 25.978195190429688, "learning_rate": 9.753968253968254e-06, "loss": 38.3753, "step": 6042 }, { "epoch": 143.88358208955225, "grad_norm": 30.363014221191406, "learning_rate": 9.752380952380953e-06, "loss": 40.2135, "step": 6043 }, { "epoch": 143.90746268656716, "grad_norm": 21.55478858947754, "learning_rate": 9.750793650793652e-06, "loss": 38.6819, "step": 6044 }, { "epoch": 143.9313432835821, "grad_norm": 24.41329574584961, "learning_rate": 9.74920634920635e-06, "loss": 40.9191, "step": 6045 }, { "epoch": 143.955223880597, "grad_norm": 20.223283767700195, "learning_rate": 9.747619047619049e-06, "loss": 37.8833, "step": 6046 }, { "epoch": 143.97910447761194, "grad_norm": 18.49294662475586, "learning_rate": 9.746031746031747e-06, "loss": 39.6541, "step": 6047 }, { "epoch": 144.0, "grad_norm": 21.152721405029297, "learning_rate": 9.744444444444445e-06, "loss": 35.0013, "step": 6048 }, { "epoch": 144.02388059701494, "grad_norm": 18.513105392456055, "learning_rate": 9.742857142857143e-06, "loss": 39.0689, "step": 6049 }, { "epoch": 144.04776119402985, "grad_norm": 19.236661911010742, "learning_rate": 9.741269841269843e-06, "loss": 40.3136, "step": 6050 }, { "epoch": 144.07164179104478, "grad_norm": 20.3817138671875, "learning_rate": 9.73968253968254e-06, "loss": 39.5201, "step": 6051 }, { "epoch": 144.0955223880597, "grad_norm": 16.75079917907715, "learning_rate": 9.73809523809524e-06, "loss": 40.7713, "step": 6052 }, { "epoch": 144.11940298507463, "grad_norm": 18.25192642211914, "learning_rate": 9.736507936507938e-06, "loss": 37.5623, "step": 6053 }, { "epoch": 144.14328358208957, "grad_norm": 20.650714874267578, "learning_rate": 9.734920634920636e-06, "loss": 40.4165, "step": 6054 }, { "epoch": 144.16716417910447, "grad_norm": 17.22085952758789, "learning_rate": 9.733333333333334e-06, "loss": 39.791, "step": 6055 }, { "epoch": 144.1910447761194, "grad_norm": 17.359233856201172, "learning_rate": 9.731746031746032e-06, "loss": 40.0045, "step": 6056 }, { "epoch": 144.21492537313432, "grad_norm": 18.853940963745117, "learning_rate": 9.73015873015873e-06, "loss": 39.8853, "step": 6057 }, { "epoch": 144.23880597014926, "grad_norm": 17.358612060546875, "learning_rate": 9.72857142857143e-06, "loss": 40.4062, "step": 6058 }, { "epoch": 144.26268656716417, "grad_norm": 17.910709381103516, "learning_rate": 9.726984126984128e-06, "loss": 39.096, "step": 6059 }, { "epoch": 144.2865671641791, "grad_norm": 15.399354934692383, "learning_rate": 9.725396825396826e-06, "loss": 40.6485, "step": 6060 }, { "epoch": 144.31044776119404, "grad_norm": 17.468482971191406, "learning_rate": 9.723809523809525e-06, "loss": 40.331, "step": 6061 }, { "epoch": 144.33432835820895, "grad_norm": 13.7400541305542, "learning_rate": 9.722222222222223e-06, "loss": 38.4288, "step": 6062 }, { "epoch": 144.3582089552239, "grad_norm": 17.06818962097168, "learning_rate": 9.720634920634921e-06, "loss": 40.0401, "step": 6063 }, { "epoch": 144.3820895522388, "grad_norm": 20.590816497802734, "learning_rate": 9.71904761904762e-06, "loss": 38.3167, "step": 6064 }, { "epoch": 144.40597014925373, "grad_norm": 17.27370262145996, "learning_rate": 9.717460317460317e-06, "loss": 39.3935, "step": 6065 }, { "epoch": 144.42985074626867, "grad_norm": 18.074583053588867, "learning_rate": 9.715873015873017e-06, "loss": 40.6159, "step": 6066 }, { "epoch": 144.45373134328358, "grad_norm": 20.387073516845703, "learning_rate": 9.714285714285715e-06, "loss": 40.2223, "step": 6067 }, { "epoch": 144.47761194029852, "grad_norm": 24.666194915771484, "learning_rate": 9.712698412698413e-06, "loss": 39.9985, "step": 6068 }, { "epoch": 144.50149253731342, "grad_norm": 17.105199813842773, "learning_rate": 9.711111111111111e-06, "loss": 39.8627, "step": 6069 }, { "epoch": 144.52537313432836, "grad_norm": 21.538379669189453, "learning_rate": 9.70952380952381e-06, "loss": 38.4146, "step": 6070 }, { "epoch": 144.54925373134327, "grad_norm": 20.19131851196289, "learning_rate": 9.707936507936508e-06, "loss": 39.3941, "step": 6071 }, { "epoch": 144.5731343283582, "grad_norm": NaN, "learning_rate": 9.706349206349208e-06, "loss": 60.62, "step": 6072 }, { "epoch": 144.59701492537314, "grad_norm": 22.036714553833008, "learning_rate": 9.706349206349208e-06, "loss": 41.4121, "step": 6073 }, { "epoch": 144.62089552238805, "grad_norm": 18.917593002319336, "learning_rate": 9.704761904761906e-06, "loss": 40.0146, "step": 6074 }, { "epoch": 144.644776119403, "grad_norm": 15.137066841125488, "learning_rate": 9.703174603174604e-06, "loss": 38.8101, "step": 6075 }, { "epoch": 144.6686567164179, "grad_norm": 22.35431671142578, "learning_rate": 9.701587301587302e-06, "loss": 39.9032, "step": 6076 }, { "epoch": 144.69253731343284, "grad_norm": 16.393959045410156, "learning_rate": 9.7e-06, "loss": 39.0491, "step": 6077 }, { "epoch": 144.71641791044777, "grad_norm": 23.372652053833008, "learning_rate": 9.698412698412698e-06, "loss": 38.3159, "step": 6078 }, { "epoch": 144.74029850746268, "grad_norm": 19.953815460205078, "learning_rate": 9.696825396825398e-06, "loss": 40.3867, "step": 6079 }, { "epoch": 144.76417910447762, "grad_norm": 30.77141761779785, "learning_rate": 9.695238095238096e-06, "loss": 40.612, "step": 6080 }, { "epoch": 144.78805970149253, "grad_norm": 25.008193969726562, "learning_rate": 9.693650793650795e-06, "loss": 40.3145, "step": 6081 }, { "epoch": 144.81194029850747, "grad_norm": 24.88791275024414, "learning_rate": 9.692063492063493e-06, "loss": 40.0597, "step": 6082 }, { "epoch": 144.83582089552237, "grad_norm": 23.809860229492188, "learning_rate": 9.690476190476191e-06, "loss": 39.1451, "step": 6083 }, { "epoch": 144.8597014925373, "grad_norm": 18.984691619873047, "learning_rate": 9.688888888888889e-06, "loss": 40.2938, "step": 6084 }, { "epoch": 144.88358208955225, "grad_norm": 19.395414352416992, "learning_rate": 9.687301587301589e-06, "loss": 39.0497, "step": 6085 }, { "epoch": 144.90746268656716, "grad_norm": 22.040620803833008, "learning_rate": 9.685714285714285e-06, "loss": 40.2134, "step": 6086 }, { "epoch": 144.9313432835821, "grad_norm": 18.223390579223633, "learning_rate": 9.684126984126985e-06, "loss": 40.2474, "step": 6087 }, { "epoch": 144.955223880597, "grad_norm": 16.415504455566406, "learning_rate": 9.682539682539683e-06, "loss": 39.446, "step": 6088 }, { "epoch": 144.97910447761194, "grad_norm": 18.761838912963867, "learning_rate": 9.680952380952382e-06, "loss": 40.2324, "step": 6089 }, { "epoch": 145.0, "grad_norm": 14.694183349609375, "learning_rate": 9.679365079365081e-06, "loss": 33.4446, "step": 6090 }, { "epoch": 145.02388059701494, "grad_norm": 15.788558959960938, "learning_rate": 9.677777777777778e-06, "loss": 39.7514, "step": 6091 }, { "epoch": 145.04776119402985, "grad_norm": 19.04860496520996, "learning_rate": 9.676190476190476e-06, "loss": 39.7695, "step": 6092 }, { "epoch": 145.07164179104478, "grad_norm": 18.22698974609375, "learning_rate": 9.674603174603176e-06, "loss": 38.9197, "step": 6093 }, { "epoch": 145.0955223880597, "grad_norm": 15.227489471435547, "learning_rate": 9.673015873015874e-06, "loss": 39.3056, "step": 6094 }, { "epoch": 145.11940298507463, "grad_norm": 16.864139556884766, "learning_rate": 9.671428571428572e-06, "loss": 39.1712, "step": 6095 }, { "epoch": 145.14328358208957, "grad_norm": 19.5656795501709, "learning_rate": 9.66984126984127e-06, "loss": 40.002, "step": 6096 }, { "epoch": 145.16716417910447, "grad_norm": 15.475809097290039, "learning_rate": 9.668253968253969e-06, "loss": 38.7754, "step": 6097 }, { "epoch": 145.1910447761194, "grad_norm": 21.379589080810547, "learning_rate": 9.666666666666667e-06, "loss": 39.2871, "step": 6098 }, { "epoch": 145.21492537313432, "grad_norm": 15.879408836364746, "learning_rate": 9.665079365079367e-06, "loss": 39.6373, "step": 6099 }, { "epoch": 145.23880597014926, "grad_norm": 20.632753372192383, "learning_rate": 9.663492063492065e-06, "loss": 38.4285, "step": 6100 }, { "epoch": 145.26268656716417, "grad_norm": 21.47446632385254, "learning_rate": 9.661904761904763e-06, "loss": 40.8862, "step": 6101 }, { "epoch": 145.2865671641791, "grad_norm": 17.442068099975586, "learning_rate": 9.660317460317461e-06, "loss": 38.6985, "step": 6102 }, { "epoch": 145.31044776119404, "grad_norm": 19.083768844604492, "learning_rate": 9.65873015873016e-06, "loss": 40.7921, "step": 6103 }, { "epoch": 145.33432835820895, "grad_norm": 18.121421813964844, "learning_rate": 9.657142857142859e-06, "loss": 37.8805, "step": 6104 }, { "epoch": 145.3582089552239, "grad_norm": 17.6599063873291, "learning_rate": 9.655555555555556e-06, "loss": 40.5698, "step": 6105 }, { "epoch": 145.3820895522388, "grad_norm": 19.329008102416992, "learning_rate": 9.653968253968254e-06, "loss": 40.4532, "step": 6106 }, { "epoch": 145.40597014925373, "grad_norm": 16.720779418945312, "learning_rate": 9.652380952380954e-06, "loss": 40.1488, "step": 6107 }, { "epoch": 145.42985074626867, "grad_norm": 21.048095703125, "learning_rate": 9.650793650793652e-06, "loss": 39.7219, "step": 6108 }, { "epoch": 145.45373134328358, "grad_norm": 22.04213523864746, "learning_rate": 9.64920634920635e-06, "loss": 39.7511, "step": 6109 }, { "epoch": 145.47761194029852, "grad_norm": 16.699317932128906, "learning_rate": 9.647619047619048e-06, "loss": 40.4039, "step": 6110 }, { "epoch": 145.50149253731342, "grad_norm": 20.744373321533203, "learning_rate": 9.646031746031746e-06, "loss": 39.7408, "step": 6111 }, { "epoch": 145.52537313432836, "grad_norm": 22.63360595703125, "learning_rate": 9.644444444444444e-06, "loss": 39.3722, "step": 6112 }, { "epoch": 145.54925373134327, "grad_norm": 19.45470428466797, "learning_rate": 9.642857142857144e-06, "loss": 38.0393, "step": 6113 }, { "epoch": 145.5731343283582, "grad_norm": 22.894981384277344, "learning_rate": 9.641269841269842e-06, "loss": 40.2133, "step": 6114 }, { "epoch": 145.59701492537314, "grad_norm": 24.637012481689453, "learning_rate": 9.63968253968254e-06, "loss": 40.3819, "step": 6115 }, { "epoch": 145.62089552238805, "grad_norm": 18.199817657470703, "learning_rate": 9.638095238095239e-06, "loss": 39.591, "step": 6116 }, { "epoch": 145.644776119403, "grad_norm": 26.813262939453125, "learning_rate": 9.636507936507937e-06, "loss": 40.4985, "step": 6117 }, { "epoch": 145.6686567164179, "grad_norm": 20.786285400390625, "learning_rate": 9.634920634920637e-06, "loss": 40.7269, "step": 6118 }, { "epoch": 145.69253731343284, "grad_norm": 23.935718536376953, "learning_rate": 9.633333333333335e-06, "loss": 39.3393, "step": 6119 }, { "epoch": 145.71641791044777, "grad_norm": 23.803821563720703, "learning_rate": 9.631746031746031e-06, "loss": 38.7576, "step": 6120 }, { "epoch": 145.74029850746268, "grad_norm": 20.230010986328125, "learning_rate": 9.630158730158731e-06, "loss": 39.0025, "step": 6121 }, { "epoch": 145.76417910447762, "grad_norm": 26.59383773803711, "learning_rate": 9.62857142857143e-06, "loss": 38.8942, "step": 6122 }, { "epoch": 145.78805970149253, "grad_norm": 26.656192779541016, "learning_rate": 9.626984126984127e-06, "loss": 40.5026, "step": 6123 }, { "epoch": 145.81194029850747, "grad_norm": 17.649873733520508, "learning_rate": 9.625396825396827e-06, "loss": 41.5825, "step": 6124 }, { "epoch": 145.83582089552237, "grad_norm": 25.234466552734375, "learning_rate": 9.623809523809524e-06, "loss": 38.4286, "step": 6125 }, { "epoch": 145.8597014925373, "grad_norm": 21.319074630737305, "learning_rate": 9.622222222222222e-06, "loss": 38.8864, "step": 6126 }, { "epoch": 145.88358208955225, "grad_norm": 18.06063461303711, "learning_rate": 9.620634920634922e-06, "loss": 39.6841, "step": 6127 }, { "epoch": 145.90746268656716, "grad_norm": 30.18861961364746, "learning_rate": 9.61904761904762e-06, "loss": 39.9368, "step": 6128 }, { "epoch": 145.9313432835821, "grad_norm": 17.992721557617188, "learning_rate": 9.617460317460318e-06, "loss": 40.3737, "step": 6129 }, { "epoch": 145.955223880597, "grad_norm": 23.077625274658203, "learning_rate": 9.615873015873016e-06, "loss": 40.5656, "step": 6130 }, { "epoch": 145.97910447761194, "grad_norm": 23.092899322509766, "learning_rate": 9.614285714285714e-06, "loss": 40.3125, "step": 6131 }, { "epoch": 146.0, "grad_norm": 15.744735717773438, "learning_rate": 9.612698412698414e-06, "loss": 34.232, "step": 6132 }, { "epoch": 146.02388059701494, "grad_norm": 26.01073455810547, "learning_rate": 9.611111111111112e-06, "loss": 40.3078, "step": 6133 }, { "epoch": 146.04776119402985, "grad_norm": 19.39035987854004, "learning_rate": 9.60952380952381e-06, "loss": 40.5784, "step": 6134 }, { "epoch": 146.07164179104478, "grad_norm": 15.700801849365234, "learning_rate": 9.607936507936509e-06, "loss": 39.5239, "step": 6135 }, { "epoch": 146.0955223880597, "grad_norm": 19.383134841918945, "learning_rate": 9.606349206349207e-06, "loss": 38.7188, "step": 6136 }, { "epoch": 146.11940298507463, "grad_norm": 16.338594436645508, "learning_rate": 9.604761904761905e-06, "loss": 37.9652, "step": 6137 }, { "epoch": 146.14328358208957, "grad_norm": 16.093175888061523, "learning_rate": 9.603174603174605e-06, "loss": 39.2513, "step": 6138 }, { "epoch": 146.16716417910447, "grad_norm": 17.29031753540039, "learning_rate": 9.601587301587303e-06, "loss": 40.0027, "step": 6139 }, { "epoch": 146.1910447761194, "grad_norm": 19.29131317138672, "learning_rate": 9.600000000000001e-06, "loss": 40.0402, "step": 6140 }, { "epoch": 146.21492537313432, "grad_norm": 14.723686218261719, "learning_rate": 9.5984126984127e-06, "loss": 39.6575, "step": 6141 }, { "epoch": 146.23880597014926, "grad_norm": 13.284400939941406, "learning_rate": 9.596825396825398e-06, "loss": 38.2423, "step": 6142 }, { "epoch": 146.26268656716417, "grad_norm": 14.448948860168457, "learning_rate": 9.595238095238096e-06, "loss": 39.5564, "step": 6143 }, { "epoch": 146.2865671641791, "grad_norm": 15.912578582763672, "learning_rate": 9.593650793650794e-06, "loss": 41.1268, "step": 6144 }, { "epoch": 146.31044776119404, "grad_norm": 13.982776641845703, "learning_rate": 9.592063492063492e-06, "loss": 38.4374, "step": 6145 }, { "epoch": 146.33432835820895, "grad_norm": 18.495389938354492, "learning_rate": 9.590476190476192e-06, "loss": 39.379, "step": 6146 }, { "epoch": 146.3582089552239, "grad_norm": 27.54531478881836, "learning_rate": 9.58888888888889e-06, "loss": 40.0996, "step": 6147 }, { "epoch": 146.3820895522388, "grad_norm": 16.7718563079834, "learning_rate": 9.587301587301588e-06, "loss": 40.0521, "step": 6148 }, { "epoch": 146.40597014925373, "grad_norm": 13.470280647277832, "learning_rate": 9.585714285714286e-06, "loss": 39.616, "step": 6149 }, { "epoch": 146.42985074626867, "grad_norm": 18.987812042236328, "learning_rate": 9.584126984126985e-06, "loss": 40.2916, "step": 6150 }, { "epoch": 146.45373134328358, "grad_norm": 17.242666244506836, "learning_rate": 9.582539682539683e-06, "loss": 39.5103, "step": 6151 }, { "epoch": 146.47761194029852, "grad_norm": 19.262651443481445, "learning_rate": 9.580952380952383e-06, "loss": 39.479, "step": 6152 }, { "epoch": 146.50149253731342, "grad_norm": 19.4732608795166, "learning_rate": 9.57936507936508e-06, "loss": 40.6662, "step": 6153 }, { "epoch": 146.52537313432836, "grad_norm": 18.1159610748291, "learning_rate": 9.577777777777779e-06, "loss": 40.4998, "step": 6154 }, { "epoch": 146.54925373134327, "grad_norm": 16.363819122314453, "learning_rate": 9.576190476190477e-06, "loss": 36.286, "step": 6155 }, { "epoch": 146.5731343283582, "grad_norm": NaN, "learning_rate": 9.574603174603175e-06, "loss": 33.9266, "step": 6156 }, { "epoch": 146.59701492537314, "grad_norm": 17.450937271118164, "learning_rate": 9.574603174603175e-06, "loss": 39.6215, "step": 6157 }, { "epoch": 146.62089552238805, "grad_norm": 20.69955062866211, "learning_rate": 9.573015873015873e-06, "loss": 39.0897, "step": 6158 }, { "epoch": 146.644776119403, "grad_norm": 26.637802124023438, "learning_rate": 9.571428571428573e-06, "loss": 40.1247, "step": 6159 }, { "epoch": 146.6686567164179, "grad_norm": 16.69516372680664, "learning_rate": 9.56984126984127e-06, "loss": 40.1396, "step": 6160 }, { "epoch": 146.69253731343284, "grad_norm": 24.776458740234375, "learning_rate": 9.56825396825397e-06, "loss": 40.1209, "step": 6161 }, { "epoch": 146.71641791044777, "grad_norm": 22.97787094116211, "learning_rate": 9.566666666666668e-06, "loss": 40.8353, "step": 6162 }, { "epoch": 146.74029850746268, "grad_norm": 17.57745361328125, "learning_rate": 9.565079365079366e-06, "loss": 39.4058, "step": 6163 }, { "epoch": 146.76417910447762, "grad_norm": 32.396968841552734, "learning_rate": 9.563492063492064e-06, "loss": 39.9972, "step": 6164 }, { "epoch": 146.78805970149253, "grad_norm": 20.528043746948242, "learning_rate": 9.561904761904762e-06, "loss": 39.2347, "step": 6165 }, { "epoch": 146.81194029850747, "grad_norm": 35.68081283569336, "learning_rate": 9.56031746031746e-06, "loss": 39.7637, "step": 6166 }, { "epoch": 146.83582089552237, "grad_norm": 22.645538330078125, "learning_rate": 9.55873015873016e-06, "loss": 39.389, "step": 6167 }, { "epoch": 146.8597014925373, "grad_norm": 40.081722259521484, "learning_rate": 9.557142857142858e-06, "loss": 39.2152, "step": 6168 }, { "epoch": 146.88358208955225, "grad_norm": 30.616613388061523, "learning_rate": 9.555555555555556e-06, "loss": 41.0145, "step": 6169 }, { "epoch": 146.90746268656716, "grad_norm": 40.12171173095703, "learning_rate": 9.553968253968255e-06, "loss": 38.9865, "step": 6170 }, { "epoch": 146.9313432835821, "grad_norm": 35.75667953491211, "learning_rate": 9.552380952380953e-06, "loss": 39.5375, "step": 6171 }, { "epoch": 146.955223880597, "grad_norm": 33.94740676879883, "learning_rate": 9.550793650793651e-06, "loss": 40.1784, "step": 6172 }, { "epoch": 146.97910447761194, "grad_norm": 30.814422607421875, "learning_rate": 9.54920634920635e-06, "loss": 39.5753, "step": 6173 }, { "epoch": 147.0, "grad_norm": 30.17840003967285, "learning_rate": 9.547619047619049e-06, "loss": 35.3718, "step": 6174 }, { "epoch": 147.02388059701494, "grad_norm": 28.895395278930664, "learning_rate": 9.546031746031747e-06, "loss": 38.3438, "step": 6175 }, { "epoch": 147.04776119402985, "grad_norm": 35.01318359375, "learning_rate": 9.544444444444445e-06, "loss": 40.1541, "step": 6176 }, { "epoch": 147.07164179104478, "grad_norm": 26.909517288208008, "learning_rate": 9.542857142857143e-06, "loss": 40.7689, "step": 6177 }, { "epoch": 147.0955223880597, "grad_norm": 34.66661071777344, "learning_rate": 9.541269841269842e-06, "loss": 39.4185, "step": 6178 }, { "epoch": 147.11940298507463, "grad_norm": 26.95038414001465, "learning_rate": 9.539682539682541e-06, "loss": 39.2899, "step": 6179 }, { "epoch": 147.14328358208957, "grad_norm": 35.695613861083984, "learning_rate": 9.538095238095238e-06, "loss": 38.2958, "step": 6180 }, { "epoch": 147.16716417910447, "grad_norm": 33.42219543457031, "learning_rate": 9.536507936507938e-06, "loss": 38.6355, "step": 6181 }, { "epoch": 147.1910447761194, "grad_norm": 32.482948303222656, "learning_rate": 9.534920634920636e-06, "loss": 39.308, "step": 6182 }, { "epoch": 147.21492537313432, "grad_norm": 30.571081161499023, "learning_rate": 9.533333333333334e-06, "loss": 39.6363, "step": 6183 }, { "epoch": 147.23880597014926, "grad_norm": 32.807952880859375, "learning_rate": 9.531746031746032e-06, "loss": 40.0385, "step": 6184 }, { "epoch": 147.26268656716417, "grad_norm": 32.24506378173828, "learning_rate": 9.53015873015873e-06, "loss": 39.7579, "step": 6185 }, { "epoch": 147.2865671641791, "grad_norm": 30.932525634765625, "learning_rate": 9.528571428571429e-06, "loss": 39.03, "step": 6186 }, { "epoch": 147.31044776119404, "grad_norm": 25.31529998779297, "learning_rate": 9.526984126984128e-06, "loss": 39.6358, "step": 6187 }, { "epoch": 147.33432835820895, "grad_norm": 36.5391960144043, "learning_rate": 9.525396825396827e-06, "loss": 39.6683, "step": 6188 }, { "epoch": 147.3582089552239, "grad_norm": 31.384052276611328, "learning_rate": 9.523809523809525e-06, "loss": 40.0828, "step": 6189 }, { "epoch": 147.3820895522388, "grad_norm": 34.303096771240234, "learning_rate": 9.522222222222223e-06, "loss": 39.218, "step": 6190 }, { "epoch": 147.40597014925373, "grad_norm": 33.030216217041016, "learning_rate": 9.520634920634921e-06, "loss": 40.3229, "step": 6191 }, { "epoch": 147.42985074626867, "grad_norm": 26.773529052734375, "learning_rate": 9.51904761904762e-06, "loss": 39.3039, "step": 6192 }, { "epoch": 147.45373134328358, "grad_norm": 22.8935546875, "learning_rate": 9.517460317460319e-06, "loss": 39.6118, "step": 6193 }, { "epoch": 147.47761194029852, "grad_norm": 36.28474044799805, "learning_rate": 9.515873015873016e-06, "loss": 39.2833, "step": 6194 }, { "epoch": 147.50149253731342, "grad_norm": 30.737974166870117, "learning_rate": 9.514285714285715e-06, "loss": 38.1781, "step": 6195 }, { "epoch": 147.52537313432836, "grad_norm": 35.12816619873047, "learning_rate": 9.512698412698414e-06, "loss": 38.8607, "step": 6196 }, { "epoch": 147.54925373134327, "grad_norm": NaN, "learning_rate": 9.511111111111112e-06, "loss": 60.3704, "step": 6197 }, { "epoch": 147.5731343283582, "grad_norm": 32.037559509277344, "learning_rate": 9.511111111111112e-06, "loss": 39.4876, "step": 6198 }, { "epoch": 147.59701492537314, "grad_norm": 26.51078987121582, "learning_rate": 9.50952380952381e-06, "loss": 38.7221, "step": 6199 }, { "epoch": 147.62089552238805, "grad_norm": 23.575544357299805, "learning_rate": 9.507936507936508e-06, "loss": 39.454, "step": 6200 }, { "epoch": 147.644776119403, "grad_norm": 31.265623092651367, "learning_rate": 9.506349206349206e-06, "loss": 39.9971, "step": 6201 }, { "epoch": 147.6686567164179, "grad_norm": 26.96292495727539, "learning_rate": 9.504761904761906e-06, "loss": 40.717, "step": 6202 }, { "epoch": 147.69253731343284, "grad_norm": 35.76007843017578, "learning_rate": 9.503174603174604e-06, "loss": 40.3732, "step": 6203 }, { "epoch": 147.71641791044777, "grad_norm": 32.061424255371094, "learning_rate": 9.501587301587302e-06, "loss": 40.2162, "step": 6204 }, { "epoch": 147.74029850746268, "grad_norm": 30.06816291809082, "learning_rate": 9.5e-06, "loss": 40.1051, "step": 6205 }, { "epoch": 147.76417910447762, "grad_norm": 26.645023345947266, "learning_rate": 9.498412698412699e-06, "loss": 39.4436, "step": 6206 }, { "epoch": 147.78805970149253, "grad_norm": 31.481412887573242, "learning_rate": 9.496825396825397e-06, "loss": 38.5907, "step": 6207 }, { "epoch": 147.81194029850747, "grad_norm": 25.024534225463867, "learning_rate": 9.495238095238097e-06, "loss": 39.9247, "step": 6208 }, { "epoch": 147.83582089552237, "grad_norm": 37.80125427246094, "learning_rate": 9.493650793650795e-06, "loss": 40.4303, "step": 6209 }, { "epoch": 147.8597014925373, "grad_norm": 30.088382720947266, "learning_rate": 9.492063492063493e-06, "loss": 38.2834, "step": 6210 }, { "epoch": 147.88358208955225, "grad_norm": 33.478736877441406, "learning_rate": 9.490476190476191e-06, "loss": 40.277, "step": 6211 }, { "epoch": 147.90746268656716, "grad_norm": 30.780107498168945, "learning_rate": 9.48888888888889e-06, "loss": 39.0267, "step": 6212 }, { "epoch": 147.9313432835821, "grad_norm": 26.820985794067383, "learning_rate": 9.48730158730159e-06, "loss": 39.6983, "step": 6213 }, { "epoch": 147.955223880597, "grad_norm": 28.82769775390625, "learning_rate": 9.485714285714287e-06, "loss": 39.2138, "step": 6214 }, { "epoch": 147.97910447761194, "grad_norm": 31.736270904541016, "learning_rate": 9.484126984126984e-06, "loss": 40.4601, "step": 6215 }, { "epoch": 148.0, "grad_norm": 21.348119735717773, "learning_rate": 9.482539682539684e-06, "loss": 34.7459, "step": 6216 }, { "epoch": 148.02388059701494, "grad_norm": 28.806316375732422, "learning_rate": 9.480952380952382e-06, "loss": 37.9885, "step": 6217 }, { "epoch": 148.04776119402985, "grad_norm": 28.255447387695312, "learning_rate": 9.47936507936508e-06, "loss": 40.8785, "step": 6218 }, { "epoch": 148.07164179104478, "grad_norm": 30.397302627563477, "learning_rate": 9.47777777777778e-06, "loss": 38.6547, "step": 6219 }, { "epoch": 148.0955223880597, "grad_norm": 27.137815475463867, "learning_rate": 9.476190476190476e-06, "loss": 40.4444, "step": 6220 }, { "epoch": 148.11940298507463, "grad_norm": 31.6801815032959, "learning_rate": 9.474603174603174e-06, "loss": 39.4132, "step": 6221 }, { "epoch": 148.14328358208957, "grad_norm": 30.153980255126953, "learning_rate": 9.473015873015874e-06, "loss": 39.8864, "step": 6222 }, { "epoch": 148.16716417910447, "grad_norm": 30.102392196655273, "learning_rate": 9.471428571428572e-06, "loss": 39.0032, "step": 6223 }, { "epoch": 148.1910447761194, "grad_norm": 28.591657638549805, "learning_rate": 9.46984126984127e-06, "loss": 40.2621, "step": 6224 }, { "epoch": 148.21492537313432, "grad_norm": 31.42799949645996, "learning_rate": 9.468253968253969e-06, "loss": 39.3243, "step": 6225 }, { "epoch": 148.23880597014926, "grad_norm": 27.41240692138672, "learning_rate": 9.466666666666667e-06, "loss": 40.58, "step": 6226 }, { "epoch": 148.26268656716417, "grad_norm": 30.882951736450195, "learning_rate": 9.465079365079367e-06, "loss": 40.3588, "step": 6227 }, { "epoch": 148.2865671641791, "grad_norm": 27.335161209106445, "learning_rate": 9.463492063492065e-06, "loss": 39.879, "step": 6228 }, { "epoch": 148.31044776119404, "grad_norm": 30.169204711914062, "learning_rate": 9.461904761904761e-06, "loss": 39.3827, "step": 6229 }, { "epoch": 148.33432835820895, "grad_norm": 27.254920959472656, "learning_rate": 9.460317460317461e-06, "loss": 40.1883, "step": 6230 }, { "epoch": 148.3582089552239, "grad_norm": 26.484397888183594, "learning_rate": 9.45873015873016e-06, "loss": 38.9431, "step": 6231 }, { "epoch": 148.3820895522388, "grad_norm": 23.49915313720703, "learning_rate": 9.457142857142858e-06, "loss": 39.6657, "step": 6232 }, { "epoch": 148.40597014925373, "grad_norm": 31.44021224975586, "learning_rate": 9.455555555555557e-06, "loss": 40.3813, "step": 6233 }, { "epoch": 148.42985074626867, "grad_norm": 24.419384002685547, "learning_rate": 9.453968253968254e-06, "loss": 40.4738, "step": 6234 }, { "epoch": 148.45373134328358, "grad_norm": 33.38460922241211, "learning_rate": 9.452380952380952e-06, "loss": 40.3119, "step": 6235 }, { "epoch": 148.47761194029852, "grad_norm": 27.34727668762207, "learning_rate": 9.450793650793652e-06, "loss": 39.2309, "step": 6236 }, { "epoch": 148.50149253731342, "grad_norm": 32.275970458984375, "learning_rate": 9.44920634920635e-06, "loss": 38.9233, "step": 6237 }, { "epoch": 148.52537313432836, "grad_norm": 29.967649459838867, "learning_rate": 9.447619047619048e-06, "loss": 39.0778, "step": 6238 }, { "epoch": 148.54925373134327, "grad_norm": 27.539777755737305, "learning_rate": 9.446031746031746e-06, "loss": 38.6598, "step": 6239 }, { "epoch": 148.5731343283582, "grad_norm": 23.671335220336914, "learning_rate": 9.444444444444445e-06, "loss": 39.1927, "step": 6240 }, { "epoch": 148.59701492537314, "grad_norm": 33.137210845947266, "learning_rate": 9.442857142857144e-06, "loss": 38.3873, "step": 6241 }, { "epoch": 148.62089552238805, "grad_norm": 27.124778747558594, "learning_rate": 9.441269841269843e-06, "loss": 39.8926, "step": 6242 }, { "epoch": 148.644776119403, "grad_norm": 27.500160217285156, "learning_rate": 9.43968253968254e-06, "loss": 41.0731, "step": 6243 }, { "epoch": 148.6686567164179, "grad_norm": 20.350675582885742, "learning_rate": 9.438095238095239e-06, "loss": 38.183, "step": 6244 }, { "epoch": 148.69253731343284, "grad_norm": 27.957395553588867, "learning_rate": 9.436507936507937e-06, "loss": 39.4262, "step": 6245 }, { "epoch": 148.71641791044777, "grad_norm": 23.82541275024414, "learning_rate": 9.434920634920635e-06, "loss": 39.3012, "step": 6246 }, { "epoch": 148.74029850746268, "grad_norm": 34.506385803222656, "learning_rate": 9.433333333333335e-06, "loss": 38.9767, "step": 6247 }, { "epoch": 148.76417910447762, "grad_norm": 29.38323974609375, "learning_rate": 9.431746031746033e-06, "loss": 39.8884, "step": 6248 }, { "epoch": 148.78805970149253, "grad_norm": 28.864707946777344, "learning_rate": 9.43015873015873e-06, "loss": 38.4068, "step": 6249 }, { "epoch": 148.81194029850747, "grad_norm": 26.762096405029297, "learning_rate": 9.42857142857143e-06, "loss": 40.1152, "step": 6250 }, { "epoch": 148.83582089552237, "grad_norm": 24.719018936157227, "learning_rate": 9.426984126984128e-06, "loss": 38.8501, "step": 6251 }, { "epoch": 148.8597014925373, "grad_norm": 20.85268211364746, "learning_rate": 9.425396825396826e-06, "loss": 39.1373, "step": 6252 }, { "epoch": 148.88358208955225, "grad_norm": 24.713544845581055, "learning_rate": 9.423809523809526e-06, "loss": 38.214, "step": 6253 }, { "epoch": 148.90746268656716, "grad_norm": 19.68970489501953, "learning_rate": 9.422222222222222e-06, "loss": 38.4203, "step": 6254 }, { "epoch": 148.9313432835821, "grad_norm": 28.356327056884766, "learning_rate": 9.420634920634922e-06, "loss": 40.5012, "step": 6255 }, { "epoch": 148.955223880597, "grad_norm": 24.059450149536133, "learning_rate": 9.41904761904762e-06, "loss": 40.1857, "step": 6256 }, { "epoch": 148.97910447761194, "grad_norm": 24.31195640563965, "learning_rate": 9.417460317460318e-06, "loss": 38.768, "step": 6257 }, { "epoch": 149.0, "grad_norm": 21.859241485595703, "learning_rate": 9.415873015873017e-06, "loss": 36.3293, "step": 6258 }, { "epoch": 149.02388059701494, "grad_norm": 21.140018463134766, "learning_rate": 9.414285714285715e-06, "loss": 39.1804, "step": 6259 }, { "epoch": 149.04776119402985, "grad_norm": 19.773056030273438, "learning_rate": 9.412698412698413e-06, "loss": 38.3969, "step": 6260 }, { "epoch": 149.07164179104478, "grad_norm": 16.957059860229492, "learning_rate": 9.411111111111113e-06, "loss": 40.5906, "step": 6261 }, { "epoch": 149.0955223880597, "grad_norm": 22.389060974121094, "learning_rate": 9.40952380952381e-06, "loss": 39.2568, "step": 6262 }, { "epoch": 149.11940298507463, "grad_norm": 19.9559326171875, "learning_rate": 9.407936507936509e-06, "loss": 40.3229, "step": 6263 }, { "epoch": 149.14328358208957, "grad_norm": 15.990846633911133, "learning_rate": 9.406349206349207e-06, "loss": 39.7053, "step": 6264 }, { "epoch": 149.16716417910447, "grad_norm": 20.258081436157227, "learning_rate": 9.404761904761905e-06, "loss": 37.9371, "step": 6265 }, { "epoch": 149.1910447761194, "grad_norm": 17.81937599182129, "learning_rate": 9.403174603174603e-06, "loss": 39.1156, "step": 6266 }, { "epoch": 149.21492537313432, "grad_norm": 17.503934860229492, "learning_rate": 9.401587301587303e-06, "loss": 38.8225, "step": 6267 }, { "epoch": 149.23880597014926, "grad_norm": 24.96233558654785, "learning_rate": 9.4e-06, "loss": 39.4187, "step": 6268 }, { "epoch": 149.26268656716417, "grad_norm": 16.594804763793945, "learning_rate": 9.3984126984127e-06, "loss": 38.9482, "step": 6269 }, { "epoch": 149.2865671641791, "grad_norm": 23.703876495361328, "learning_rate": 9.396825396825398e-06, "loss": 38.9453, "step": 6270 }, { "epoch": 149.31044776119404, "grad_norm": 19.64229393005371, "learning_rate": 9.395238095238096e-06, "loss": 39.8369, "step": 6271 }, { "epoch": 149.33432835820895, "grad_norm": 24.94515609741211, "learning_rate": 9.393650793650794e-06, "loss": 39.2128, "step": 6272 }, { "epoch": 149.3582089552239, "grad_norm": 20.51252555847168, "learning_rate": 9.392063492063492e-06, "loss": 39.5531, "step": 6273 }, { "epoch": 149.3820895522388, "grad_norm": 22.185834884643555, "learning_rate": 9.39047619047619e-06, "loss": 41.1707, "step": 6274 }, { "epoch": 149.40597014925373, "grad_norm": 20.402530670166016, "learning_rate": 9.38888888888889e-06, "loss": 38.2911, "step": 6275 }, { "epoch": 149.42985074626867, "grad_norm": 21.563535690307617, "learning_rate": 9.387301587301588e-06, "loss": 38.7527, "step": 6276 }, { "epoch": 149.45373134328358, "grad_norm": 20.71843719482422, "learning_rate": 9.385714285714287e-06, "loss": 39.2306, "step": 6277 }, { "epoch": 149.47761194029852, "grad_norm": 22.216678619384766, "learning_rate": 9.384126984126985e-06, "loss": 39.1267, "step": 6278 }, { "epoch": 149.50149253731342, "grad_norm": 17.682825088500977, "learning_rate": 9.382539682539683e-06, "loss": 40.6194, "step": 6279 }, { "epoch": 149.52537313432836, "grad_norm": 22.47300910949707, "learning_rate": 9.380952380952381e-06, "loss": 38.7946, "step": 6280 }, { "epoch": 149.54925373134327, "grad_norm": 16.163511276245117, "learning_rate": 9.379365079365081e-06, "loss": 40.6577, "step": 6281 }, { "epoch": 149.5731343283582, "grad_norm": 25.112506866455078, "learning_rate": 9.377777777777779e-06, "loss": 38.9654, "step": 6282 }, { "epoch": 149.59701492537314, "grad_norm": 24.64344596862793, "learning_rate": 9.376190476190477e-06, "loss": 40.4548, "step": 6283 }, { "epoch": 149.62089552238805, "grad_norm": 17.872581481933594, "learning_rate": 9.374603174603175e-06, "loss": 39.0748, "step": 6284 }, { "epoch": 149.644776119403, "grad_norm": 22.106613159179688, "learning_rate": 9.373015873015874e-06, "loss": 40.1216, "step": 6285 }, { "epoch": 149.6686567164179, "grad_norm": 19.284791946411133, "learning_rate": 9.371428571428572e-06, "loss": 39.2773, "step": 6286 }, { "epoch": 149.69253731343284, "grad_norm": 19.2523136138916, "learning_rate": 9.369841269841272e-06, "loss": 40.2495, "step": 6287 }, { "epoch": 149.71641791044777, "grad_norm": 18.848800659179688, "learning_rate": 9.368253968253968e-06, "loss": 39.1348, "step": 6288 }, { "epoch": 149.74029850746268, "grad_norm": 21.043163299560547, "learning_rate": 9.366666666666668e-06, "loss": 40.5966, "step": 6289 }, { "epoch": 149.76417910447762, "grad_norm": 17.63033676147461, "learning_rate": 9.365079365079366e-06, "loss": 40.1343, "step": 6290 }, { "epoch": 149.78805970149253, "grad_norm": 13.709476470947266, "learning_rate": 9.363492063492064e-06, "loss": 38.4333, "step": 6291 }, { "epoch": 149.81194029850747, "grad_norm": 21.13844108581543, "learning_rate": 9.361904761904762e-06, "loss": 39.8698, "step": 6292 }, { "epoch": 149.83582089552237, "grad_norm": 16.18905258178711, "learning_rate": 9.36031746031746e-06, "loss": 38.9989, "step": 6293 }, { "epoch": 149.8597014925373, "grad_norm": 21.491127014160156, "learning_rate": 9.358730158730159e-06, "loss": 39.4642, "step": 6294 }, { "epoch": 149.88358208955225, "grad_norm": 22.262081146240234, "learning_rate": 9.357142857142859e-06, "loss": 40.0517, "step": 6295 }, { "epoch": 149.90746268656716, "grad_norm": 17.686460494995117, "learning_rate": 9.355555555555557e-06, "loss": 40.9072, "step": 6296 }, { "epoch": 149.9313432835821, "grad_norm": 25.11887550354004, "learning_rate": 9.353968253968255e-06, "loss": 38.7171, "step": 6297 }, { "epoch": 149.955223880597, "grad_norm": 17.390230178833008, "learning_rate": 9.352380952380953e-06, "loss": 38.9999, "step": 6298 }, { "epoch": 149.97910447761194, "grad_norm": 26.161745071411133, "learning_rate": 9.350793650793651e-06, "loss": 39.9917, "step": 6299 }, { "epoch": 150.0, "grad_norm": 18.304304122924805, "learning_rate": 9.34920634920635e-06, "loss": 34.2472, "step": 6300 }, { "epoch": 150.0, "step": 6300, "total_flos": 3.0974324482122125e+17, "train_loss": 2.6563439275347998, "train_runtime": 12847.7486, "train_samples_per_second": 62.486, "train_steps_per_second": 0.49 }, { "epoch": 150.02388059701494, "grad_norm": 19.16498374938965, "learning_rate": 1e-05, "loss": 40.1809, "step": 6301 }, { "epoch": 150.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998599439775911e-06, "loss": 45.4434, "step": 6302 }, { "epoch": 150.07164179104478, "grad_norm": 279.3161926269531, "learning_rate": 9.998599439775911e-06, "loss": 45.5818, "step": 6303 }, { "epoch": 150.0955223880597, "grad_norm": 139.1039581298828, "learning_rate": 9.997198879551822e-06, "loss": 43.4873, "step": 6304 }, { "epoch": 150.11940298507463, "grad_norm": 67.4908676147461, "learning_rate": 9.995798319327733e-06, "loss": 40.3046, "step": 6305 }, { "epoch": 150.14328358208957, "grad_norm": 42.27750015258789, "learning_rate": 9.994397759103642e-06, "loss": 40.384, "step": 6306 }, { "epoch": 150.16716417910447, "grad_norm": 50.98027420043945, "learning_rate": 9.992997198879552e-06, "loss": 40.5811, "step": 6307 }, { "epoch": 150.1910447761194, "grad_norm": 45.15121078491211, "learning_rate": 9.991596638655463e-06, "loss": 40.4276, "step": 6308 }, { "epoch": 150.21492537313432, "grad_norm": 35.41339111328125, "learning_rate": 9.990196078431374e-06, "loss": 41.498, "step": 6309 }, { "epoch": 150.23880597014926, "grad_norm": 24.025501251220703, "learning_rate": 9.988795518207284e-06, "loss": 39.3925, "step": 6310 }, { "epoch": 150.26268656716417, "grad_norm": 28.514684677124023, "learning_rate": 9.987394957983195e-06, "loss": 39.7632, "step": 6311 }, { "epoch": 150.2865671641791, "grad_norm": 24.55237579345703, "learning_rate": 9.985994397759104e-06, "loss": 39.7918, "step": 6312 }, { "epoch": 150.31044776119404, "grad_norm": 24.92273712158203, "learning_rate": 9.984593837535014e-06, "loss": 39.4903, "step": 6313 }, { "epoch": 150.33432835820895, "grad_norm": 22.315248489379883, "learning_rate": 9.983193277310925e-06, "loss": 38.6691, "step": 6314 }, { "epoch": 150.3582089552239, "grad_norm": 21.531641006469727, "learning_rate": 9.981792717086836e-06, "loss": 39.9104, "step": 6315 }, { "epoch": 150.3820895522388, "grad_norm": 22.9049072265625, "learning_rate": 9.980392156862746e-06, "loss": 40.5113, "step": 6316 }, { "epoch": 150.40597014925373, "grad_norm": 22.430377960205078, "learning_rate": 9.978991596638657e-06, "loss": 39.4948, "step": 6317 }, { "epoch": 150.42985074626867, "grad_norm": 18.36344337463379, "learning_rate": 9.977591036414566e-06, "loss": 40.5239, "step": 6318 }, { "epoch": 150.45373134328358, "grad_norm": 21.41360855102539, "learning_rate": 9.976190476190477e-06, "loss": 40.053, "step": 6319 }, { "epoch": 150.47761194029852, "grad_norm": 17.077104568481445, "learning_rate": 9.974789915966387e-06, "loss": 39.8097, "step": 6320 }, { "epoch": 150.50149253731342, "grad_norm": 21.632736206054688, "learning_rate": 9.973389355742298e-06, "loss": 39.7021, "step": 6321 }, { "epoch": 150.52537313432836, "grad_norm": 20.047056198120117, "learning_rate": 9.971988795518209e-06, "loss": 40.2315, "step": 6322 }, { "epoch": 150.54925373134327, "grad_norm": 20.613143920898438, "learning_rate": 9.970588235294119e-06, "loss": 40.0007, "step": 6323 }, { "epoch": 150.5731343283582, "grad_norm": 17.636415481567383, "learning_rate": 9.969187675070028e-06, "loss": 40.4111, "step": 6324 }, { "epoch": 150.59701492537314, "grad_norm": 20.692312240600586, "learning_rate": 9.967787114845939e-06, "loss": 39.2482, "step": 6325 }, { "epoch": 150.62089552238805, "grad_norm": 14.423230171203613, "learning_rate": 9.96638655462185e-06, "loss": 38.3858, "step": 6326 }, { "epoch": 150.644776119403, "grad_norm": 15.524177551269531, "learning_rate": 9.96498599439776e-06, "loss": 38.3366, "step": 6327 }, { "epoch": 150.6686567164179, "grad_norm": 18.633893966674805, "learning_rate": 9.96358543417367e-06, "loss": 39.3271, "step": 6328 }, { "epoch": 150.69253731343284, "grad_norm": 19.223590850830078, "learning_rate": 9.962184873949581e-06, "loss": 39.2428, "step": 6329 }, { "epoch": 150.71641791044777, "grad_norm": 18.664216995239258, "learning_rate": 9.960784313725492e-06, "loss": 38.9784, "step": 6330 }, { "epoch": 150.74029850746268, "grad_norm": 16.15790367126465, "learning_rate": 9.959383753501401e-06, "loss": 39.3126, "step": 6331 }, { "epoch": 150.76417910447762, "grad_norm": 14.722516059875488, "learning_rate": 9.957983193277312e-06, "loss": 39.4758, "step": 6332 }, { "epoch": 150.78805970149253, "grad_norm": 18.573301315307617, "learning_rate": 9.956582633053222e-06, "loss": 39.6617, "step": 6333 }, { "epoch": 150.81194029850747, "grad_norm": 21.44211769104004, "learning_rate": 9.955182072829133e-06, "loss": 38.9266, "step": 6334 }, { "epoch": 150.83582089552237, "grad_norm": 19.530872344970703, "learning_rate": 9.953781512605043e-06, "loss": 39.3849, "step": 6335 }, { "epoch": 150.8597014925373, "grad_norm": 14.259345054626465, "learning_rate": 9.952380952380954e-06, "loss": 40.0807, "step": 6336 }, { "epoch": 150.88358208955225, "grad_norm": 18.270769119262695, "learning_rate": 9.950980392156863e-06, "loss": 38.9433, "step": 6337 }, { "epoch": 150.90746268656716, "grad_norm": 23.193754196166992, "learning_rate": 9.949579831932774e-06, "loss": 39.1875, "step": 6338 }, { "epoch": 150.9313432835821, "grad_norm": 16.407913208007812, "learning_rate": 9.948179271708684e-06, "loss": 39.411, "step": 6339 }, { "epoch": 150.955223880597, "grad_norm": 14.759758949279785, "learning_rate": 9.946778711484595e-06, "loss": 39.4402, "step": 6340 }, { "epoch": 150.97910447761194, "grad_norm": 22.55985450744629, "learning_rate": 9.945378151260506e-06, "loss": 38.5775, "step": 6341 }, { "epoch": 151.0, "grad_norm": 17.532306671142578, "learning_rate": 9.943977591036416e-06, "loss": 33.8262, "step": 6342 }, { "epoch": 151.02388059701494, "grad_norm": 19.179887771606445, "learning_rate": 9.942577030812325e-06, "loss": 41.4249, "step": 6343 }, { "epoch": 151.04776119402985, "grad_norm": 15.04033374786377, "learning_rate": 9.941176470588236e-06, "loss": 39.8833, "step": 6344 }, { "epoch": 151.07164179104478, "grad_norm": 16.083768844604492, "learning_rate": 9.939775910364146e-06, "loss": 38.9967, "step": 6345 }, { "epoch": 151.0955223880597, "grad_norm": 16.410978317260742, "learning_rate": 9.938375350140057e-06, "loss": 38.7727, "step": 6346 }, { "epoch": 151.11940298507463, "grad_norm": 21.35173988342285, "learning_rate": 9.936974789915968e-06, "loss": 40.7854, "step": 6347 }, { "epoch": 151.14328358208957, "grad_norm": 19.4251766204834, "learning_rate": 9.935574229691878e-06, "loss": 39.4606, "step": 6348 }, { "epoch": 151.16716417910447, "grad_norm": 18.5143985748291, "learning_rate": 9.934173669467789e-06, "loss": 39.06, "step": 6349 }, { "epoch": 151.1910447761194, "grad_norm": 15.860893249511719, "learning_rate": 9.932773109243698e-06, "loss": 40.4898, "step": 6350 }, { "epoch": 151.21492537313432, "grad_norm": 18.80919075012207, "learning_rate": 9.931372549019609e-06, "loss": 38.6399, "step": 6351 }, { "epoch": 151.23880597014926, "grad_norm": 18.1273250579834, "learning_rate": 9.92997198879552e-06, "loss": 40.0928, "step": 6352 }, { "epoch": 151.26268656716417, "grad_norm": 17.43776512145996, "learning_rate": 9.92857142857143e-06, "loss": 38.6823, "step": 6353 }, { "epoch": 151.2865671641791, "grad_norm": 15.621599197387695, "learning_rate": 9.92717086834734e-06, "loss": 39.3631, "step": 6354 }, { "epoch": 151.31044776119404, "grad_norm": 20.585025787353516, "learning_rate": 9.925770308123251e-06, "loss": 39.4492, "step": 6355 }, { "epoch": 151.33432835820895, "grad_norm": 23.47856330871582, "learning_rate": 9.92436974789916e-06, "loss": 39.4244, "step": 6356 }, { "epoch": 151.3582089552239, "grad_norm": 12.733617782592773, "learning_rate": 9.92296918767507e-06, "loss": 39.6639, "step": 6357 }, { "epoch": 151.3820895522388, "grad_norm": 29.050830841064453, "learning_rate": 9.921568627450981e-06, "loss": 39.0073, "step": 6358 }, { "epoch": 151.40597014925373, "grad_norm": 20.334535598754883, "learning_rate": 9.920168067226892e-06, "loss": 40.0843, "step": 6359 }, { "epoch": 151.42985074626867, "grad_norm": 18.922494888305664, "learning_rate": 9.918767507002803e-06, "loss": 39.7166, "step": 6360 }, { "epoch": 151.45373134328358, "grad_norm": 22.9791259765625, "learning_rate": 9.917366946778713e-06, "loss": 39.1399, "step": 6361 }, { "epoch": 151.47761194029852, "grad_norm": 19.109474182128906, "learning_rate": 9.915966386554622e-06, "loss": 39.8001, "step": 6362 }, { "epoch": 151.50149253731342, "grad_norm": 16.108705520629883, "learning_rate": 9.914565826330533e-06, "loss": 38.9864, "step": 6363 }, { "epoch": 151.52537313432836, "grad_norm": 19.968387603759766, "learning_rate": 9.913165266106443e-06, "loss": 40.3586, "step": 6364 }, { "epoch": 151.54925373134327, "grad_norm": 19.269989013671875, "learning_rate": 9.911764705882354e-06, "loss": 39.6837, "step": 6365 }, { "epoch": 151.5731343283582, "grad_norm": 19.654542922973633, "learning_rate": 9.910364145658265e-06, "loss": 39.6919, "step": 6366 }, { "epoch": 151.59701492537314, "grad_norm": 16.44729232788086, "learning_rate": 9.908963585434175e-06, "loss": 39.3837, "step": 6367 }, { "epoch": 151.62089552238805, "grad_norm": 21.828369140625, "learning_rate": 9.907563025210084e-06, "loss": 38.4021, "step": 6368 }, { "epoch": 151.644776119403, "grad_norm": 18.849733352661133, "learning_rate": 9.906162464985995e-06, "loss": 38.9801, "step": 6369 }, { "epoch": 151.6686567164179, "grad_norm": 15.600545883178711, "learning_rate": 9.904761904761906e-06, "loss": 39.9175, "step": 6370 }, { "epoch": 151.69253731343284, "grad_norm": 27.39472770690918, "learning_rate": 9.903361344537816e-06, "loss": 38.6417, "step": 6371 }, { "epoch": 151.71641791044777, "grad_norm": 18.112911224365234, "learning_rate": 9.901960784313727e-06, "loss": 40.6345, "step": 6372 }, { "epoch": 151.74029850746268, "grad_norm": 16.16978645324707, "learning_rate": 9.900560224089638e-06, "loss": 39.7246, "step": 6373 }, { "epoch": 151.76417910447762, "grad_norm": 23.89118766784668, "learning_rate": 9.899159663865548e-06, "loss": 39.8915, "step": 6374 }, { "epoch": 151.78805970149253, "grad_norm": 17.466960906982422, "learning_rate": 9.897759103641457e-06, "loss": 38.2378, "step": 6375 }, { "epoch": 151.81194029850747, "grad_norm": 22.093915939331055, "learning_rate": 9.896358543417368e-06, "loss": 38.6562, "step": 6376 }, { "epoch": 151.83582089552237, "grad_norm": 24.06990623474121, "learning_rate": 9.894957983193278e-06, "loss": 40.2845, "step": 6377 }, { "epoch": 151.8597014925373, "grad_norm": 18.051422119140625, "learning_rate": 9.893557422969189e-06, "loss": 39.2352, "step": 6378 }, { "epoch": 151.88358208955225, "grad_norm": 21.78950309753418, "learning_rate": 9.8921568627451e-06, "loss": 38.5991, "step": 6379 }, { "epoch": 151.90746268656716, "grad_norm": 18.29522132873535, "learning_rate": 9.89075630252101e-06, "loss": 38.2873, "step": 6380 }, { "epoch": 151.9313432835821, "grad_norm": 24.120948791503906, "learning_rate": 9.88935574229692e-06, "loss": 38.6462, "step": 6381 }, { "epoch": 151.955223880597, "grad_norm": 17.51114845275879, "learning_rate": 9.88795518207283e-06, "loss": 39.1381, "step": 6382 }, { "epoch": 151.97910447761194, "grad_norm": 22.926584243774414, "learning_rate": 9.88655462184874e-06, "loss": 38.8322, "step": 6383 }, { "epoch": 152.0, "grad_norm": 18.463191986083984, "learning_rate": 9.885154061624651e-06, "loss": 34.0882, "step": 6384 }, { "epoch": 152.02388059701494, "grad_norm": 23.521703720092773, "learning_rate": 9.883753501400562e-06, "loss": 41.0516, "step": 6385 }, { "epoch": 152.04776119402985, "grad_norm": 19.793399810791016, "learning_rate": 9.882352941176472e-06, "loss": 40.0187, "step": 6386 }, { "epoch": 152.07164179104478, "grad_norm": 26.50450897216797, "learning_rate": 9.880952380952381e-06, "loss": 38.7966, "step": 6387 }, { "epoch": 152.0955223880597, "grad_norm": 18.84419822692871, "learning_rate": 9.879551820728292e-06, "loss": 39.4662, "step": 6388 }, { "epoch": 152.11940298507463, "grad_norm": 23.414226531982422, "learning_rate": 9.878151260504203e-06, "loss": 39.6741, "step": 6389 }, { "epoch": 152.14328358208957, "grad_norm": 24.03367042541504, "learning_rate": 9.876750700280113e-06, "loss": 39.5593, "step": 6390 }, { "epoch": 152.16716417910447, "grad_norm": 17.131258010864258, "learning_rate": 9.875350140056024e-06, "loss": 39.0036, "step": 6391 }, { "epoch": 152.1910447761194, "grad_norm": 20.72348976135254, "learning_rate": 9.873949579831935e-06, "loss": 38.6815, "step": 6392 }, { "epoch": 152.21492537313432, "grad_norm": 16.66873550415039, "learning_rate": 9.872549019607845e-06, "loss": 39.2612, "step": 6393 }, { "epoch": 152.23880597014926, "grad_norm": 17.73870849609375, "learning_rate": 9.871148459383754e-06, "loss": 39.6521, "step": 6394 }, { "epoch": 152.26268656716417, "grad_norm": 26.47159767150879, "learning_rate": 9.869747899159665e-06, "loss": 40.0595, "step": 6395 }, { "epoch": 152.2865671641791, "grad_norm": 15.393265724182129, "learning_rate": 9.868347338935575e-06, "loss": 39.7537, "step": 6396 }, { "epoch": 152.31044776119404, "grad_norm": NaN, "learning_rate": 9.866946778711486e-06, "loss": 69.9822, "step": 6397 }, { "epoch": 152.33432835820895, "grad_norm": 17.150754928588867, "learning_rate": 9.866946778711486e-06, "loss": 39.7025, "step": 6398 }, { "epoch": 152.3582089552239, "grad_norm": 19.011913299560547, "learning_rate": 9.865546218487397e-06, "loss": 40.6093, "step": 6399 }, { "epoch": 152.3820895522388, "grad_norm": 18.70676612854004, "learning_rate": 9.864145658263307e-06, "loss": 38.6835, "step": 6400 }, { "epoch": 152.40597014925373, "grad_norm": 16.673490524291992, "learning_rate": 9.862745098039216e-06, "loss": 38.8727, "step": 6401 }, { "epoch": 152.42985074626867, "grad_norm": 15.896980285644531, "learning_rate": 9.861344537815127e-06, "loss": 39.4872, "step": 6402 }, { "epoch": 152.45373134328358, "grad_norm": 19.734540939331055, "learning_rate": 9.859943977591038e-06, "loss": 40.6661, "step": 6403 }, { "epoch": 152.47761194029852, "grad_norm": 20.29810333251953, "learning_rate": 9.858543417366948e-06, "loss": 38.7285, "step": 6404 }, { "epoch": 152.50149253731342, "grad_norm": 19.600051879882812, "learning_rate": 9.857142857142859e-06, "loss": 39.5563, "step": 6405 }, { "epoch": 152.52537313432836, "grad_norm": 17.804553985595703, "learning_rate": 9.85574229691877e-06, "loss": 37.6632, "step": 6406 }, { "epoch": 152.54925373134327, "grad_norm": 16.141559600830078, "learning_rate": 9.854341736694678e-06, "loss": 39.5719, "step": 6407 }, { "epoch": 152.5731343283582, "grad_norm": 13.319794654846191, "learning_rate": 9.852941176470589e-06, "loss": 39.1491, "step": 6408 }, { "epoch": 152.59701492537314, "grad_norm": 16.20441246032715, "learning_rate": 9.8515406162465e-06, "loss": 38.9358, "step": 6409 }, { "epoch": 152.62089552238805, "grad_norm": 16.522918701171875, "learning_rate": 9.85014005602241e-06, "loss": 40.0617, "step": 6410 }, { "epoch": 152.644776119403, "grad_norm": 21.760295867919922, "learning_rate": 9.848739495798321e-06, "loss": 39.4102, "step": 6411 }, { "epoch": 152.6686567164179, "grad_norm": 18.06766700744629, "learning_rate": 9.847338935574232e-06, "loss": 39.4243, "step": 6412 }, { "epoch": 152.69253731343284, "grad_norm": 15.790227890014648, "learning_rate": 9.84593837535014e-06, "loss": 39.2864, "step": 6413 }, { "epoch": 152.71641791044777, "grad_norm": 12.733904838562012, "learning_rate": 9.844537815126051e-06, "loss": 38.8609, "step": 6414 }, { "epoch": 152.74029850746268, "grad_norm": 17.441940307617188, "learning_rate": 9.843137254901962e-06, "loss": 39.965, "step": 6415 }, { "epoch": 152.76417910447762, "grad_norm": 18.61382293701172, "learning_rate": 9.84173669467787e-06, "loss": 38.837, "step": 6416 }, { "epoch": 152.78805970149253, "grad_norm": 16.26108169555664, "learning_rate": 9.840336134453781e-06, "loss": 39.2805, "step": 6417 }, { "epoch": 152.81194029850747, "grad_norm": 17.11221694946289, "learning_rate": 9.838935574229692e-06, "loss": 39.2356, "step": 6418 }, { "epoch": 152.83582089552237, "grad_norm": 11.892387390136719, "learning_rate": 9.837535014005603e-06, "loss": 37.877, "step": 6419 }, { "epoch": 152.8597014925373, "grad_norm": 15.409753799438477, "learning_rate": 9.836134453781513e-06, "loss": 38.1532, "step": 6420 }, { "epoch": 152.88358208955225, "grad_norm": 16.553173065185547, "learning_rate": 9.834733893557424e-06, "loss": 39.9671, "step": 6421 }, { "epoch": 152.90746268656716, "grad_norm": 15.028477668762207, "learning_rate": 9.833333333333333e-06, "loss": 38.5096, "step": 6422 }, { "epoch": 152.9313432835821, "grad_norm": 15.761276245117188, "learning_rate": 9.831932773109244e-06, "loss": 39.7583, "step": 6423 }, { "epoch": 152.955223880597, "grad_norm": 17.00655174255371, "learning_rate": 9.830532212885154e-06, "loss": 38.7234, "step": 6424 }, { "epoch": 152.97910447761194, "grad_norm": 15.511736869812012, "learning_rate": 9.829131652661065e-06, "loss": 39.256, "step": 6425 }, { "epoch": 153.0, "grad_norm": 19.71329116821289, "learning_rate": 9.827731092436975e-06, "loss": 34.6084, "step": 6426 }, { "epoch": 153.02388059701494, "grad_norm": 17.619407653808594, "learning_rate": 9.826330532212886e-06, "loss": 39.3749, "step": 6427 }, { "epoch": 153.04776119402985, "grad_norm": 21.454179763793945, "learning_rate": 9.824929971988795e-06, "loss": 39.189, "step": 6428 }, { "epoch": 153.07164179104478, "grad_norm": 15.592042922973633, "learning_rate": 9.823529411764706e-06, "loss": 40.1688, "step": 6429 }, { "epoch": 153.0955223880597, "grad_norm": 18.97234344482422, "learning_rate": 9.822128851540616e-06, "loss": 38.9257, "step": 6430 }, { "epoch": 153.11940298507463, "grad_norm": 22.144588470458984, "learning_rate": 9.820728291316527e-06, "loss": 39.408, "step": 6431 }, { "epoch": 153.14328358208957, "grad_norm": 22.970861434936523, "learning_rate": 9.819327731092438e-06, "loss": 39.0693, "step": 6432 }, { "epoch": 153.16716417910447, "grad_norm": 14.850386619567871, "learning_rate": 9.817927170868348e-06, "loss": 38.848, "step": 6433 }, { "epoch": 153.1910447761194, "grad_norm": 23.122474670410156, "learning_rate": 9.816526610644259e-06, "loss": 38.69, "step": 6434 }, { "epoch": 153.21492537313432, "grad_norm": 20.157529830932617, "learning_rate": 9.815126050420168e-06, "loss": 38.522, "step": 6435 }, { "epoch": 153.23880597014926, "grad_norm": 16.902263641357422, "learning_rate": 9.813725490196078e-06, "loss": 39.1863, "step": 6436 }, { "epoch": 153.26268656716417, "grad_norm": 20.05976676940918, "learning_rate": 9.812324929971989e-06, "loss": 38.8603, "step": 6437 }, { "epoch": 153.2865671641791, "grad_norm": 21.53315544128418, "learning_rate": 9.8109243697479e-06, "loss": 40.4414, "step": 6438 }, { "epoch": 153.31044776119404, "grad_norm": 17.6829891204834, "learning_rate": 9.80952380952381e-06, "loss": 39.3266, "step": 6439 }, { "epoch": 153.33432835820895, "grad_norm": 18.332361221313477, "learning_rate": 9.808123249299721e-06, "loss": 39.7485, "step": 6440 }, { "epoch": 153.3582089552239, "grad_norm": 13.068806648254395, "learning_rate": 9.80672268907563e-06, "loss": 40.4783, "step": 6441 }, { "epoch": 153.3820895522388, "grad_norm": 24.55851173400879, "learning_rate": 9.80532212885154e-06, "loss": 38.5473, "step": 6442 }, { "epoch": 153.40597014925373, "grad_norm": 20.670440673828125, "learning_rate": 9.803921568627451e-06, "loss": 39.1716, "step": 6443 }, { "epoch": 153.42985074626867, "grad_norm": 16.37117576599121, "learning_rate": 9.802521008403362e-06, "loss": 39.5912, "step": 6444 }, { "epoch": 153.45373134328358, "grad_norm": 22.2893009185791, "learning_rate": 9.801120448179273e-06, "loss": 39.633, "step": 6445 }, { "epoch": 153.47761194029852, "grad_norm": 17.918243408203125, "learning_rate": 9.799719887955183e-06, "loss": 40.6878, "step": 6446 }, { "epoch": 153.50149253731342, "grad_norm": 15.346482276916504, "learning_rate": 9.798319327731092e-06, "loss": 38.6619, "step": 6447 }, { "epoch": 153.52537313432836, "grad_norm": 14.18155574798584, "learning_rate": 9.796918767507003e-06, "loss": 38.8895, "step": 6448 }, { "epoch": 153.54925373134327, "grad_norm": 17.546628952026367, "learning_rate": 9.795518207282913e-06, "loss": 40.0357, "step": 6449 }, { "epoch": 153.5731343283582, "grad_norm": 20.402925491333008, "learning_rate": 9.794117647058824e-06, "loss": 39.3683, "step": 6450 }, { "epoch": 153.59701492537314, "grad_norm": 19.142513275146484, "learning_rate": 9.792717086834735e-06, "loss": 39.1011, "step": 6451 }, { "epoch": 153.62089552238805, "grad_norm": 22.166929244995117, "learning_rate": 9.791316526610645e-06, "loss": 38.6932, "step": 6452 }, { "epoch": 153.644776119403, "grad_norm": 16.393999099731445, "learning_rate": 9.789915966386554e-06, "loss": 39.9522, "step": 6453 }, { "epoch": 153.6686567164179, "grad_norm": 20.74897003173828, "learning_rate": 9.788515406162465e-06, "loss": 39.4043, "step": 6454 }, { "epoch": 153.69253731343284, "grad_norm": 18.162193298339844, "learning_rate": 9.787114845938376e-06, "loss": 38.9868, "step": 6455 }, { "epoch": 153.71641791044777, "grad_norm": 17.61060905456543, "learning_rate": 9.785714285714286e-06, "loss": 39.2859, "step": 6456 }, { "epoch": 153.74029850746268, "grad_norm": 17.78687858581543, "learning_rate": 9.784313725490197e-06, "loss": 39.4746, "step": 6457 }, { "epoch": 153.76417910447762, "grad_norm": 20.72416114807129, "learning_rate": 9.782913165266107e-06, "loss": 39.1889, "step": 6458 }, { "epoch": 153.78805970149253, "grad_norm": 14.441688537597656, "learning_rate": 9.781512605042018e-06, "loss": 38.0719, "step": 6459 }, { "epoch": 153.81194029850747, "grad_norm": 18.633865356445312, "learning_rate": 9.780112044817927e-06, "loss": 40.0062, "step": 6460 }, { "epoch": 153.83582089552237, "grad_norm": 16.08208656311035, "learning_rate": 9.778711484593838e-06, "loss": 39.807, "step": 6461 }, { "epoch": 153.8597014925373, "grad_norm": 21.865211486816406, "learning_rate": 9.777310924369748e-06, "loss": 38.8119, "step": 6462 }, { "epoch": 153.88358208955225, "grad_norm": 18.500150680541992, "learning_rate": 9.775910364145659e-06, "loss": 39.2655, "step": 6463 }, { "epoch": 153.90746268656716, "grad_norm": 17.913738250732422, "learning_rate": 9.77450980392157e-06, "loss": 39.6616, "step": 6464 }, { "epoch": 153.9313432835821, "grad_norm": 19.963655471801758, "learning_rate": 9.77310924369748e-06, "loss": 38.6005, "step": 6465 }, { "epoch": 153.955223880597, "grad_norm": 21.53469467163086, "learning_rate": 9.77170868347339e-06, "loss": 38.5381, "step": 6466 }, { "epoch": 153.97910447761194, "grad_norm": 23.95380973815918, "learning_rate": 9.7703081232493e-06, "loss": 39.2278, "step": 6467 }, { "epoch": 154.0, "grad_norm": NaN, "learning_rate": 9.76890756302521e-06, "loss": 54.3577, "step": 6468 }, { "epoch": 154.02388059701494, "grad_norm": 15.17541217803955, "learning_rate": 9.76890756302521e-06, "loss": 37.9482, "step": 6469 }, { "epoch": 154.04776119402985, "grad_norm": 21.523469924926758, "learning_rate": 9.767507002801121e-06, "loss": 40.279, "step": 6470 }, { "epoch": 154.07164179104478, "grad_norm": 21.303739547729492, "learning_rate": 9.766106442577032e-06, "loss": 38.5729, "step": 6471 }, { "epoch": 154.0955223880597, "grad_norm": 20.074615478515625, "learning_rate": 9.764705882352942e-06, "loss": 40.2056, "step": 6472 }, { "epoch": 154.11940298507463, "grad_norm": 15.129366874694824, "learning_rate": 9.763305322128851e-06, "loss": 39.6186, "step": 6473 }, { "epoch": 154.14328358208957, "grad_norm": 20.436534881591797, "learning_rate": 9.761904761904762e-06, "loss": 38.1005, "step": 6474 }, { "epoch": 154.16716417910447, "grad_norm": 20.725431442260742, "learning_rate": 9.760504201680673e-06, "loss": 40.411, "step": 6475 }, { "epoch": 154.1910447761194, "grad_norm": 19.512540817260742, "learning_rate": 9.759103641456583e-06, "loss": 40.3164, "step": 6476 }, { "epoch": 154.21492537313432, "grad_norm": 17.709447860717773, "learning_rate": 9.757703081232494e-06, "loss": 39.4096, "step": 6477 }, { "epoch": 154.23880597014926, "grad_norm": 18.833778381347656, "learning_rate": 9.756302521008404e-06, "loss": 38.1548, "step": 6478 }, { "epoch": 154.26268656716417, "grad_norm": 16.236936569213867, "learning_rate": 9.754901960784315e-06, "loss": 39.5843, "step": 6479 }, { "epoch": 154.2865671641791, "grad_norm": 18.85942268371582, "learning_rate": 9.753501400560224e-06, "loss": 39.1643, "step": 6480 }, { "epoch": 154.31044776119404, "grad_norm": 14.820978164672852, "learning_rate": 9.752100840336135e-06, "loss": 38.4749, "step": 6481 }, { "epoch": 154.33432835820895, "grad_norm": 18.847061157226562, "learning_rate": 9.750700280112045e-06, "loss": 38.5442, "step": 6482 }, { "epoch": 154.3582089552239, "grad_norm": 16.622020721435547, "learning_rate": 9.749299719887956e-06, "loss": 39.7202, "step": 6483 }, { "epoch": 154.3820895522388, "grad_norm": 17.376924514770508, "learning_rate": 9.747899159663867e-06, "loss": 37.7172, "step": 6484 }, { "epoch": 154.40597014925373, "grad_norm": 14.59231185913086, "learning_rate": 9.746498599439777e-06, "loss": 39.013, "step": 6485 }, { "epoch": 154.42985074626867, "grad_norm": 19.942195892333984, "learning_rate": 9.745098039215686e-06, "loss": 39.7675, "step": 6486 }, { "epoch": 154.45373134328358, "grad_norm": 17.211233139038086, "learning_rate": 9.743697478991597e-06, "loss": 40.5986, "step": 6487 }, { "epoch": 154.47761194029852, "grad_norm": 19.75026512145996, "learning_rate": 9.742296918767507e-06, "loss": 36.8232, "step": 6488 }, { "epoch": 154.50149253731342, "grad_norm": 21.47499656677246, "learning_rate": 9.740896358543418e-06, "loss": 38.4644, "step": 6489 }, { "epoch": 154.52537313432836, "grad_norm": 15.849268913269043, "learning_rate": 9.739495798319329e-06, "loss": 40.2922, "step": 6490 }, { "epoch": 154.54925373134327, "grad_norm": 16.159347534179688, "learning_rate": 9.73809523809524e-06, "loss": 39.124, "step": 6491 }, { "epoch": 154.5731343283582, "grad_norm": 18.291040420532227, "learning_rate": 9.736694677871148e-06, "loss": 38.9641, "step": 6492 }, { "epoch": 154.59701492537314, "grad_norm": 17.57895278930664, "learning_rate": 9.735294117647059e-06, "loss": 40.3195, "step": 6493 }, { "epoch": 154.62089552238805, "grad_norm": 15.273983001708984, "learning_rate": 9.73389355742297e-06, "loss": 39.4168, "step": 6494 }, { "epoch": 154.644776119403, "grad_norm": 18.084733963012695, "learning_rate": 9.73249299719888e-06, "loss": 39.686, "step": 6495 }, { "epoch": 154.6686567164179, "grad_norm": 14.910906791687012, "learning_rate": 9.731092436974791e-06, "loss": 39.4061, "step": 6496 }, { "epoch": 154.69253731343284, "grad_norm": 20.256969451904297, "learning_rate": 9.729691876750702e-06, "loss": 40.062, "step": 6497 }, { "epoch": 154.71641791044777, "grad_norm": 17.579275131225586, "learning_rate": 9.72829131652661e-06, "loss": 38.9151, "step": 6498 }, { "epoch": 154.74029850746268, "grad_norm": 15.15353775024414, "learning_rate": 9.726890756302521e-06, "loss": 37.7658, "step": 6499 }, { "epoch": 154.76417910447762, "grad_norm": 18.60456085205078, "learning_rate": 9.725490196078432e-06, "loss": 39.4208, "step": 6500 }, { "epoch": 154.78805970149253, "grad_norm": 23.808212280273438, "learning_rate": 9.724089635854342e-06, "loss": 39.0267, "step": 6501 }, { "epoch": 154.81194029850747, "grad_norm": 17.379362106323242, "learning_rate": 9.722689075630253e-06, "loss": 40.4376, "step": 6502 }, { "epoch": 154.83582089552237, "grad_norm": 17.4511775970459, "learning_rate": 9.721288515406164e-06, "loss": 39.1269, "step": 6503 }, { "epoch": 154.8597014925373, "grad_norm": 15.80515193939209, "learning_rate": 9.719887955182074e-06, "loss": 38.1002, "step": 6504 }, { "epoch": 154.88358208955225, "grad_norm": 17.451370239257812, "learning_rate": 9.718487394957983e-06, "loss": 40.8626, "step": 6505 }, { "epoch": 154.90746268656716, "grad_norm": 16.299781799316406, "learning_rate": 9.717086834733894e-06, "loss": 38.9524, "step": 6506 }, { "epoch": 154.9313432835821, "grad_norm": 30.887243270874023, "learning_rate": 9.715686274509805e-06, "loss": 38.2993, "step": 6507 }, { "epoch": 154.955223880597, "grad_norm": 17.55278205871582, "learning_rate": 9.714285714285715e-06, "loss": 39.7168, "step": 6508 }, { "epoch": 154.97910447761194, "grad_norm": 20.347652435302734, "learning_rate": 9.712885154061626e-06, "loss": 39.61, "step": 6509 }, { "epoch": 155.0, "grad_norm": 24.018211364746094, "learning_rate": 9.711484593837536e-06, "loss": 34.3992, "step": 6510 }, { "epoch": 155.02388059701494, "grad_norm": 18.324909210205078, "learning_rate": 9.710084033613445e-06, "loss": 38.85, "step": 6511 }, { "epoch": 155.04776119402985, "grad_norm": 36.07960510253906, "learning_rate": 9.708683473389356e-06, "loss": 40.9112, "step": 6512 }, { "epoch": 155.07164179104478, "grad_norm": 21.635616302490234, "learning_rate": 9.707282913165267e-06, "loss": 38.5548, "step": 6513 }, { "epoch": 155.0955223880597, "grad_norm": 34.621952056884766, "learning_rate": 9.705882352941177e-06, "loss": 38.9943, "step": 6514 }, { "epoch": 155.11940298507463, "grad_norm": 21.3360652923584, "learning_rate": 9.704481792717088e-06, "loss": 39.6249, "step": 6515 }, { "epoch": 155.14328358208957, "grad_norm": 29.964282989501953, "learning_rate": 9.703081232492999e-06, "loss": 40.3016, "step": 6516 }, { "epoch": 155.16716417910447, "grad_norm": 21.944597244262695, "learning_rate": 9.701680672268908e-06, "loss": 39.8295, "step": 6517 }, { "epoch": 155.1910447761194, "grad_norm": 22.256132125854492, "learning_rate": 9.700280112044818e-06, "loss": 38.3356, "step": 6518 }, { "epoch": 155.21492537313432, "grad_norm": 30.97159194946289, "learning_rate": 9.698879551820729e-06, "loss": 39.0454, "step": 6519 }, { "epoch": 155.23880597014926, "grad_norm": 19.811222076416016, "learning_rate": 9.69747899159664e-06, "loss": 39.5204, "step": 6520 }, { "epoch": 155.26268656716417, "grad_norm": 40.61001205444336, "learning_rate": 9.69607843137255e-06, "loss": 38.2691, "step": 6521 }, { "epoch": 155.2865671641791, "grad_norm": 30.622100830078125, "learning_rate": 9.69467787114846e-06, "loss": 39.5193, "step": 6522 }, { "epoch": 155.31044776119404, "grad_norm": 39.44752883911133, "learning_rate": 9.693277310924371e-06, "loss": 39.8697, "step": 6523 }, { "epoch": 155.33432835820895, "grad_norm": 31.14112663269043, "learning_rate": 9.69187675070028e-06, "loss": 38.5863, "step": 6524 }, { "epoch": 155.3582089552239, "grad_norm": 41.381629943847656, "learning_rate": 9.690476190476191e-06, "loss": 40.1567, "step": 6525 }, { "epoch": 155.3820895522388, "grad_norm": 36.835693359375, "learning_rate": 9.689075630252102e-06, "loss": 39.1249, "step": 6526 }, { "epoch": 155.40597014925373, "grad_norm": 29.718339920043945, "learning_rate": 9.687675070028012e-06, "loss": 38.2004, "step": 6527 }, { "epoch": 155.42985074626867, "grad_norm": 30.30554962158203, "learning_rate": 9.686274509803923e-06, "loss": 38.4457, "step": 6528 }, { "epoch": 155.45373134328358, "grad_norm": 33.8006591796875, "learning_rate": 9.684873949579834e-06, "loss": 39.0332, "step": 6529 }, { "epoch": 155.47761194029852, "grad_norm": 24.40614128112793, "learning_rate": 9.683473389355742e-06, "loss": 37.3829, "step": 6530 }, { "epoch": 155.50149253731342, "grad_norm": 42.24560546875, "learning_rate": 9.682072829131653e-06, "loss": 39.3422, "step": 6531 }, { "epoch": 155.52537313432836, "grad_norm": 37.031776428222656, "learning_rate": 9.680672268907564e-06, "loss": 38.8629, "step": 6532 }, { "epoch": 155.54925373134327, "grad_norm": 33.795902252197266, "learning_rate": 9.679271708683474e-06, "loss": 39.8452, "step": 6533 }, { "epoch": 155.5731343283582, "grad_norm": 33.857696533203125, "learning_rate": 9.677871148459385e-06, "loss": 39.8327, "step": 6534 }, { "epoch": 155.59701492537314, "grad_norm": 29.091102600097656, "learning_rate": 9.676470588235296e-06, "loss": 40.1465, "step": 6535 }, { "epoch": 155.62089552238805, "grad_norm": 24.67340087890625, "learning_rate": 9.675070028011205e-06, "loss": 39.6256, "step": 6536 }, { "epoch": 155.644776119403, "grad_norm": 34.69873809814453, "learning_rate": 9.673669467787115e-06, "loss": 38.4067, "step": 6537 }, { "epoch": 155.6686567164179, "grad_norm": 29.608654022216797, "learning_rate": 9.672268907563026e-06, "loss": 39.2891, "step": 6538 }, { "epoch": 155.69253731343284, "grad_norm": 36.71940994262695, "learning_rate": 9.670868347338937e-06, "loss": 39.1266, "step": 6539 }, { "epoch": 155.71641791044777, "grad_norm": 33.37260437011719, "learning_rate": 9.669467787114847e-06, "loss": 38.7847, "step": 6540 }, { "epoch": 155.74029850746268, "grad_norm": 31.80318832397461, "learning_rate": 9.668067226890758e-06, "loss": 38.5372, "step": 6541 }, { "epoch": 155.76417910447762, "grad_norm": 30.421329498291016, "learning_rate": 9.666666666666667e-06, "loss": 38.2407, "step": 6542 }, { "epoch": 155.78805970149253, "grad_norm": 34.91036605834961, "learning_rate": 9.665266106442577e-06, "loss": 40.0451, "step": 6543 }, { "epoch": 155.81194029850747, "grad_norm": 29.88376808166504, "learning_rate": 9.663865546218488e-06, "loss": 40.8208, "step": 6544 }, { "epoch": 155.83582089552237, "grad_norm": 33.51005935668945, "learning_rate": 9.662464985994399e-06, "loss": 38.3288, "step": 6545 }, { "epoch": 155.8597014925373, "grad_norm": 28.42050552368164, "learning_rate": 9.66106442577031e-06, "loss": 38.4511, "step": 6546 }, { "epoch": 155.88358208955225, "grad_norm": 28.77273941040039, "learning_rate": 9.65966386554622e-06, "loss": 40.1574, "step": 6547 }, { "epoch": 155.90746268656716, "grad_norm": 24.271469116210938, "learning_rate": 9.65826330532213e-06, "loss": 39.5026, "step": 6548 }, { "epoch": 155.9313432835821, "grad_norm": 34.00471115112305, "learning_rate": 9.65686274509804e-06, "loss": 37.3971, "step": 6549 }, { "epoch": 155.955223880597, "grad_norm": 33.8049430847168, "learning_rate": 9.65546218487395e-06, "loss": 39.0453, "step": 6550 }, { "epoch": 155.97910447761194, "grad_norm": 31.642559051513672, "learning_rate": 9.65406162464986e-06, "loss": 40.4236, "step": 6551 }, { "epoch": 156.0, "grad_norm": 29.527297973632812, "learning_rate": 9.652661064425771e-06, "loss": 34.3201, "step": 6552 }, { "epoch": 156.02388059701494, "grad_norm": 29.68943977355957, "learning_rate": 9.651260504201682e-06, "loss": 38.8877, "step": 6553 }, { "epoch": 156.04776119402985, "grad_norm": 26.39272117614746, "learning_rate": 9.649859943977593e-06, "loss": 37.7011, "step": 6554 }, { "epoch": 156.07164179104478, "grad_norm": 33.61505126953125, "learning_rate": 9.648459383753502e-06, "loss": 39.2487, "step": 6555 }, { "epoch": 156.0955223880597, "grad_norm": 31.842918395996094, "learning_rate": 9.647058823529412e-06, "loss": 38.7661, "step": 6556 }, { "epoch": 156.11940298507463, "grad_norm": 29.886465072631836, "learning_rate": 9.645658263305323e-06, "loss": 39.5789, "step": 6557 }, { "epoch": 156.14328358208957, "grad_norm": 26.030107498168945, "learning_rate": 9.644257703081234e-06, "loss": 38.6281, "step": 6558 }, { "epoch": 156.16716417910447, "grad_norm": 32.856842041015625, "learning_rate": 9.642857142857144e-06, "loss": 39.1047, "step": 6559 }, { "epoch": 156.1910447761194, "grad_norm": 29.918258666992188, "learning_rate": 9.641456582633055e-06, "loss": 37.773, "step": 6560 }, { "epoch": 156.21492537313432, "grad_norm": 33.415077209472656, "learning_rate": 9.640056022408964e-06, "loss": 38.7251, "step": 6561 }, { "epoch": 156.23880597014926, "grad_norm": 29.851547241210938, "learning_rate": 9.638655462184874e-06, "loss": 38.9788, "step": 6562 }, { "epoch": 156.26268656716417, "grad_norm": 32.586849212646484, "learning_rate": 9.637254901960785e-06, "loss": 37.6731, "step": 6563 }, { "epoch": 156.2865671641791, "grad_norm": 27.224334716796875, "learning_rate": 9.635854341736696e-06, "loss": 38.7623, "step": 6564 }, { "epoch": 156.31044776119404, "grad_norm": 37.19491958618164, "learning_rate": 9.634453781512606e-06, "loss": 38.1435, "step": 6565 }, { "epoch": 156.33432835820895, "grad_norm": 33.014869689941406, "learning_rate": 9.633053221288517e-06, "loss": 38.6628, "step": 6566 }, { "epoch": 156.3582089552239, "grad_norm": 30.490785598754883, "learning_rate": 9.631652661064426e-06, "loss": 37.5884, "step": 6567 }, { "epoch": 156.3820895522388, "grad_norm": 26.904870986938477, "learning_rate": 9.630252100840337e-06, "loss": 40.9113, "step": 6568 }, { "epoch": 156.40597014925373, "grad_norm": 33.51700973510742, "learning_rate": 9.628851540616247e-06, "loss": 39.6933, "step": 6569 }, { "epoch": 156.42985074626867, "grad_norm": 27.145734786987305, "learning_rate": 9.627450980392158e-06, "loss": 39.7145, "step": 6570 }, { "epoch": 156.45373134328358, "grad_norm": 34.20743179321289, "learning_rate": 9.626050420168068e-06, "loss": 39.9307, "step": 6571 }, { "epoch": 156.47761194029852, "grad_norm": 28.43290138244629, "learning_rate": 9.624649859943979e-06, "loss": 39.7442, "step": 6572 }, { "epoch": 156.50149253731342, "grad_norm": 30.41081428527832, "learning_rate": 9.62324929971989e-06, "loss": 39.0533, "step": 6573 }, { "epoch": 156.52537313432836, "grad_norm": 24.903308868408203, "learning_rate": 9.621848739495799e-06, "loss": 38.7191, "step": 6574 }, { "epoch": 156.54925373134327, "grad_norm": 32.21714401245117, "learning_rate": 9.62044817927171e-06, "loss": 39.7623, "step": 6575 }, { "epoch": 156.5731343283582, "grad_norm": 27.931760787963867, "learning_rate": 9.61904761904762e-06, "loss": 39.1339, "step": 6576 }, { "epoch": 156.59701492537314, "grad_norm": 32.47041702270508, "learning_rate": 9.61764705882353e-06, "loss": 39.9239, "step": 6577 }, { "epoch": 156.62089552238805, "grad_norm": 26.025028228759766, "learning_rate": 9.616246498599441e-06, "loss": 38.3327, "step": 6578 }, { "epoch": 156.644776119403, "grad_norm": 35.96200942993164, "learning_rate": 9.614845938375352e-06, "loss": 39.9313, "step": 6579 }, { "epoch": 156.6686567164179, "grad_norm": 32.426944732666016, "learning_rate": 9.61344537815126e-06, "loss": 39.4115, "step": 6580 }, { "epoch": 156.69253731343284, "grad_norm": 33.29541015625, "learning_rate": 9.612044817927171e-06, "loss": 39.8026, "step": 6581 }, { "epoch": 156.71641791044777, "grad_norm": 26.586023330688477, "learning_rate": 9.610644257703082e-06, "loss": 39.1864, "step": 6582 }, { "epoch": 156.74029850746268, "grad_norm": 31.9718074798584, "learning_rate": 9.609243697478993e-06, "loss": 39.4903, "step": 6583 }, { "epoch": 156.76417910447762, "grad_norm": 31.3813419342041, "learning_rate": 9.607843137254903e-06, "loss": 38.8532, "step": 6584 }, { "epoch": 156.78805970149253, "grad_norm": 34.09993362426758, "learning_rate": 9.606442577030814e-06, "loss": 39.671, "step": 6585 }, { "epoch": 156.81194029850747, "grad_norm": 30.085002899169922, "learning_rate": 9.605042016806723e-06, "loss": 39.0096, "step": 6586 }, { "epoch": 156.83582089552237, "grad_norm": 28.415733337402344, "learning_rate": 9.603641456582634e-06, "loss": 38.4657, "step": 6587 }, { "epoch": 156.8597014925373, "grad_norm": 27.193313598632812, "learning_rate": 9.602240896358544e-06, "loss": 38.1315, "step": 6588 }, { "epoch": 156.88358208955225, "grad_norm": 30.765335083007812, "learning_rate": 9.600840336134455e-06, "loss": 39.3368, "step": 6589 }, { "epoch": 156.90746268656716, "grad_norm": 23.282987594604492, "learning_rate": 9.599439775910366e-06, "loss": 40.039, "step": 6590 }, { "epoch": 156.9313432835821, "grad_norm": 32.54345703125, "learning_rate": 9.598039215686276e-06, "loss": 38.9747, "step": 6591 }, { "epoch": 156.955223880597, "grad_norm": 30.983205795288086, "learning_rate": 9.596638655462187e-06, "loss": 39.2737, "step": 6592 }, { "epoch": 156.97910447761194, "grad_norm": 28.03235626220703, "learning_rate": 9.595238095238096e-06, "loss": 40.9064, "step": 6593 }, { "epoch": 157.0, "grad_norm": 24.515148162841797, "learning_rate": 9.593837535014006e-06, "loss": 34.6439, "step": 6594 }, { "epoch": 157.02388059701494, "grad_norm": 28.694761276245117, "learning_rate": 9.592436974789917e-06, "loss": 38.4482, "step": 6595 }, { "epoch": 157.04776119402985, "grad_norm": 23.540157318115234, "learning_rate": 9.591036414565828e-06, "loss": 38.1782, "step": 6596 }, { "epoch": 157.07164179104478, "grad_norm": 32.222991943359375, "learning_rate": 9.589635854341738e-06, "loss": 38.2852, "step": 6597 }, { "epoch": 157.0955223880597, "grad_norm": 28.29923439025879, "learning_rate": 9.588235294117649e-06, "loss": 39.1217, "step": 6598 }, { "epoch": 157.11940298507463, "grad_norm": 30.50798988342285, "learning_rate": 9.586834733893558e-06, "loss": 37.9906, "step": 6599 }, { "epoch": 157.14328358208957, "grad_norm": 25.299413681030273, "learning_rate": 9.585434173669469e-06, "loss": 40.1151, "step": 6600 }, { "epoch": 157.16716417910447, "grad_norm": 26.846481323242188, "learning_rate": 9.584033613445379e-06, "loss": 39.1387, "step": 6601 }, { "epoch": 157.1910447761194, "grad_norm": 24.088130950927734, "learning_rate": 9.58263305322129e-06, "loss": 39.9527, "step": 6602 }, { "epoch": 157.21492537313432, "grad_norm": 32.9058723449707, "learning_rate": 9.5812324929972e-06, "loss": 39.6031, "step": 6603 }, { "epoch": 157.23880597014926, "grad_norm": 28.707300186157227, "learning_rate": 9.579831932773111e-06, "loss": 38.1865, "step": 6604 }, { "epoch": 157.26268656716417, "grad_norm": 34.44000244140625, "learning_rate": 9.57843137254902e-06, "loss": 40.0412, "step": 6605 }, { "epoch": 157.2865671641791, "grad_norm": 30.934423446655273, "learning_rate": 9.57703081232493e-06, "loss": 39.241, "step": 6606 }, { "epoch": 157.31044776119404, "grad_norm": 29.025680541992188, "learning_rate": 9.575630252100841e-06, "loss": 39.4661, "step": 6607 }, { "epoch": 157.33432835820895, "grad_norm": 24.268417358398438, "learning_rate": 9.574229691876752e-06, "loss": 39.2482, "step": 6608 }, { "epoch": 157.3582089552239, "grad_norm": 32.32265090942383, "learning_rate": 9.572829131652663e-06, "loss": 39.2594, "step": 6609 }, { "epoch": 157.3820895522388, "grad_norm": 28.000333786010742, "learning_rate": 9.571428571428573e-06, "loss": 38.2507, "step": 6610 }, { "epoch": 157.40597014925373, "grad_norm": 31.72787094116211, "learning_rate": 9.570028011204482e-06, "loss": 40.5381, "step": 6611 }, { "epoch": 157.42985074626867, "grad_norm": 31.978450775146484, "learning_rate": 9.568627450980393e-06, "loss": 39.7319, "step": 6612 }, { "epoch": 157.45373134328358, "grad_norm": 30.192304611206055, "learning_rate": 9.567226890756303e-06, "loss": 40.951, "step": 6613 }, { "epoch": 157.47761194029852, "grad_norm": 25.521732330322266, "learning_rate": 9.565826330532214e-06, "loss": 38.8767, "step": 6614 }, { "epoch": 157.50149253731342, "grad_norm": 30.813644409179688, "learning_rate": 9.564425770308125e-06, "loss": 39.302, "step": 6615 }, { "epoch": 157.52537313432836, "grad_norm": 25.71561622619629, "learning_rate": 9.563025210084035e-06, "loss": 38.4408, "step": 6616 }, { "epoch": 157.54925373134327, "grad_norm": 36.52140426635742, "learning_rate": 9.561624649859946e-06, "loss": 38.4121, "step": 6617 }, { "epoch": 157.5731343283582, "grad_norm": 33.191593170166016, "learning_rate": 9.560224089635855e-06, "loss": 38.1299, "step": 6618 }, { "epoch": 157.59701492537314, "grad_norm": 27.148662567138672, "learning_rate": 9.558823529411766e-06, "loss": 39.3005, "step": 6619 }, { "epoch": 157.62089552238805, "grad_norm": 28.000900268554688, "learning_rate": 9.557422969187676e-06, "loss": 40.1389, "step": 6620 }, { "epoch": 157.644776119403, "grad_norm": 32.106048583984375, "learning_rate": 9.556022408963587e-06, "loss": 39.0995, "step": 6621 }, { "epoch": 157.6686567164179, "grad_norm": 28.147275924682617, "learning_rate": 9.554621848739497e-06, "loss": 38.8633, "step": 6622 }, { "epoch": 157.69253731343284, "grad_norm": 31.858997344970703, "learning_rate": 9.553221288515408e-06, "loss": 38.8929, "step": 6623 }, { "epoch": 157.71641791044777, "grad_norm": 27.966960906982422, "learning_rate": 9.551820728291317e-06, "loss": 38.5249, "step": 6624 }, { "epoch": 157.74029850746268, "grad_norm": 29.7442684173584, "learning_rate": 9.550420168067228e-06, "loss": 39.7258, "step": 6625 }, { "epoch": 157.76417910447762, "grad_norm": 25.475791931152344, "learning_rate": 9.549019607843138e-06, "loss": 38.6937, "step": 6626 }, { "epoch": 157.78805970149253, "grad_norm": 31.998165130615234, "learning_rate": 9.547619047619049e-06, "loss": 39.0858, "step": 6627 }, { "epoch": 157.81194029850747, "grad_norm": 27.826169967651367, "learning_rate": 9.54621848739496e-06, "loss": 39.3365, "step": 6628 }, { "epoch": 157.83582089552237, "grad_norm": 30.259963989257812, "learning_rate": 9.54481792717087e-06, "loss": 38.8625, "step": 6629 }, { "epoch": 157.8597014925373, "grad_norm": 25.748193740844727, "learning_rate": 9.54341736694678e-06, "loss": 39.2983, "step": 6630 }, { "epoch": 157.88358208955225, "grad_norm": 29.5993709564209, "learning_rate": 9.54201680672269e-06, "loss": 38.8356, "step": 6631 }, { "epoch": 157.90746268656716, "grad_norm": 24.60677719116211, "learning_rate": 9.5406162464986e-06, "loss": 39.4259, "step": 6632 }, { "epoch": 157.9313432835821, "grad_norm": 34.27214431762695, "learning_rate": 9.539215686274511e-06, "loss": 39.5947, "step": 6633 }, { "epoch": 157.955223880597, "grad_norm": 28.4605655670166, "learning_rate": 9.537815126050422e-06, "loss": 38.9525, "step": 6634 }, { "epoch": 157.97910447761194, "grad_norm": 29.947551727294922, "learning_rate": 9.536414565826332e-06, "loss": 37.9835, "step": 6635 }, { "epoch": 158.0, "grad_norm": 22.99721908569336, "learning_rate": 9.535014005602243e-06, "loss": 33.7123, "step": 6636 }, { "epoch": 158.02388059701494, "grad_norm": 31.333166122436523, "learning_rate": 9.533613445378152e-06, "loss": 39.6887, "step": 6637 }, { "epoch": 158.04776119402985, "grad_norm": 23.77128791809082, "learning_rate": 9.532212885154063e-06, "loss": 39.8605, "step": 6638 }, { "epoch": 158.07164179104478, "grad_norm": 31.62507438659668, "learning_rate": 9.530812324929972e-06, "loss": 38.9037, "step": 6639 }, { "epoch": 158.0955223880597, "grad_norm": 28.015859603881836, "learning_rate": 9.529411764705882e-06, "loss": 39.5294, "step": 6640 }, { "epoch": 158.11940298507463, "grad_norm": 27.46308135986328, "learning_rate": 9.528011204481793e-06, "loss": 39.4117, "step": 6641 }, { "epoch": 158.14328358208957, "grad_norm": 24.952190399169922, "learning_rate": 9.526610644257703e-06, "loss": 40.1572, "step": 6642 }, { "epoch": 158.16716417910447, "grad_norm": 30.921812057495117, "learning_rate": 9.525210084033614e-06, "loss": 38.4166, "step": 6643 }, { "epoch": 158.1910447761194, "grad_norm": 26.232816696166992, "learning_rate": 9.523809523809525e-06, "loss": 39.3362, "step": 6644 }, { "epoch": 158.21492537313432, "grad_norm": 31.015853881835938, "learning_rate": 9.522408963585434e-06, "loss": 38.4154, "step": 6645 }, { "epoch": 158.23880597014926, "grad_norm": 26.870193481445312, "learning_rate": 9.521008403361344e-06, "loss": 37.9386, "step": 6646 }, { "epoch": 158.26268656716417, "grad_norm": 29.11814308166504, "learning_rate": 9.519607843137255e-06, "loss": 39.9507, "step": 6647 }, { "epoch": 158.2865671641791, "grad_norm": 26.853282928466797, "learning_rate": 9.518207282913166e-06, "loss": 38.8224, "step": 6648 }, { "epoch": 158.31044776119404, "grad_norm": 30.874858856201172, "learning_rate": 9.516806722689076e-06, "loss": 40.0089, "step": 6649 }, { "epoch": 158.33432835820895, "grad_norm": 23.252574920654297, "learning_rate": 9.515406162464987e-06, "loss": 38.1102, "step": 6650 }, { "epoch": 158.3582089552239, "grad_norm": 33.164737701416016, "learning_rate": 9.514005602240896e-06, "loss": 40.089, "step": 6651 }, { "epoch": 158.3820895522388, "grad_norm": 25.860107421875, "learning_rate": 9.512605042016806e-06, "loss": 38.4175, "step": 6652 }, { "epoch": 158.40597014925373, "grad_norm": 31.76698875427246, "learning_rate": 9.511204481792717e-06, "loss": 38.516, "step": 6653 }, { "epoch": 158.42985074626867, "grad_norm": 28.989151000976562, "learning_rate": 9.509803921568628e-06, "loss": 38.7914, "step": 6654 }, { "epoch": 158.45373134328358, "grad_norm": 27.455665588378906, "learning_rate": 9.508403361344538e-06, "loss": 39.2981, "step": 6655 }, { "epoch": 158.47761194029852, "grad_norm": 27.4940128326416, "learning_rate": 9.507002801120449e-06, "loss": 39.8574, "step": 6656 }, { "epoch": 158.50149253731342, "grad_norm": 28.886987686157227, "learning_rate": 9.50560224089636e-06, "loss": 38.5423, "step": 6657 }, { "epoch": 158.52537313432836, "grad_norm": 23.92523956298828, "learning_rate": 9.504201680672269e-06, "loss": 38.8931, "step": 6658 }, { "epoch": 158.54925373134327, "grad_norm": 35.76919174194336, "learning_rate": 9.50280112044818e-06, "loss": 39.2734, "step": 6659 }, { "epoch": 158.5731343283582, "grad_norm": 30.03874969482422, "learning_rate": 9.50140056022409e-06, "loss": 39.1651, "step": 6660 }, { "epoch": 158.59701492537314, "grad_norm": NaN, "learning_rate": 9.5e-06, "loss": 53.1676, "step": 6661 }, { "epoch": 158.62089552238805, "grad_norm": 29.244117736816406, "learning_rate": 9.5e-06, "loss": 39.141, "step": 6662 }, { "epoch": 158.644776119403, "grad_norm": 26.69334602355957, "learning_rate": 9.498599439775911e-06, "loss": 38.2187, "step": 6663 }, { "epoch": 158.6686567164179, "grad_norm": 29.462692260742188, "learning_rate": 9.497198879551822e-06, "loss": 39.808, "step": 6664 }, { "epoch": 158.69253731343284, "grad_norm": 25.570072174072266, "learning_rate": 9.49579831932773e-06, "loss": 39.1149, "step": 6665 }, { "epoch": 158.71641791044777, "grad_norm": 29.6662654876709, "learning_rate": 9.494397759103641e-06, "loss": 39.8864, "step": 6666 }, { "epoch": 158.74029850746268, "grad_norm": 26.96592140197754, "learning_rate": 9.492997198879552e-06, "loss": 40.2124, "step": 6667 }, { "epoch": 158.76417910447762, "grad_norm": 28.80194091796875, "learning_rate": 9.491596638655463e-06, "loss": 39.3583, "step": 6668 }, { "epoch": 158.78805970149253, "grad_norm": 25.05339813232422, "learning_rate": 9.490196078431373e-06, "loss": 39.229, "step": 6669 }, { "epoch": 158.81194029850747, "grad_norm": 30.561386108398438, "learning_rate": 9.488795518207284e-06, "loss": 36.3939, "step": 6670 }, { "epoch": 158.83582089552237, "grad_norm": 28.11375617980957, "learning_rate": 9.487394957983193e-06, "loss": 39.0193, "step": 6671 }, { "epoch": 158.8597014925373, "grad_norm": 28.772184371948242, "learning_rate": 9.485994397759104e-06, "loss": 39.018, "step": 6672 }, { "epoch": 158.88358208955225, "grad_norm": 26.138904571533203, "learning_rate": 9.484593837535014e-06, "loss": 38.5065, "step": 6673 }, { "epoch": 158.90746268656716, "grad_norm": 35.049713134765625, "learning_rate": 9.483193277310925e-06, "loss": 39.4819, "step": 6674 }, { "epoch": 158.9313432835821, "grad_norm": 29.197858810424805, "learning_rate": 9.481792717086835e-06, "loss": 39.2848, "step": 6675 }, { "epoch": 158.955223880597, "grad_norm": 28.17118263244629, "learning_rate": 9.480392156862746e-06, "loss": 37.8984, "step": 6676 }, { "epoch": 158.97910447761194, "grad_norm": 25.997028350830078, "learning_rate": 9.478991596638657e-06, "loss": 38.2248, "step": 6677 }, { "epoch": 159.0, "grad_norm": 22.521821975708008, "learning_rate": 9.477591036414566e-06, "loss": 34.6442, "step": 6678 }, { "epoch": 159.02388059701494, "grad_norm": 22.637258529663086, "learning_rate": 9.476190476190476e-06, "loss": 37.9175, "step": 6679 }, { "epoch": 159.04776119402985, "grad_norm": 31.351964950561523, "learning_rate": 9.474789915966387e-06, "loss": 38.5578, "step": 6680 }, { "epoch": 159.07164179104478, "grad_norm": 31.61567497253418, "learning_rate": 9.473389355742298e-06, "loss": 39.0571, "step": 6681 }, { "epoch": 159.0955223880597, "grad_norm": 26.24010467529297, "learning_rate": 9.471988795518208e-06, "loss": 39.5834, "step": 6682 }, { "epoch": 159.11940298507463, "grad_norm": 28.04585838317871, "learning_rate": 9.470588235294119e-06, "loss": 39.3662, "step": 6683 }, { "epoch": 159.14328358208957, "grad_norm": 25.634319305419922, "learning_rate": 9.469187675070028e-06, "loss": 38.7174, "step": 6684 }, { "epoch": 159.16716417910447, "grad_norm": 21.796682357788086, "learning_rate": 9.467787114845938e-06, "loss": 38.982, "step": 6685 }, { "epoch": 159.1910447761194, "grad_norm": 29.920791625976562, "learning_rate": 9.466386554621849e-06, "loss": 38.6116, "step": 6686 }, { "epoch": 159.21492537313432, "grad_norm": 25.356687545776367, "learning_rate": 9.46498599439776e-06, "loss": 39.5341, "step": 6687 }, { "epoch": 159.23880597014926, "grad_norm": 31.633136749267578, "learning_rate": 9.46358543417367e-06, "loss": 38.5544, "step": 6688 }, { "epoch": 159.26268656716417, "grad_norm": 27.863170623779297, "learning_rate": 9.462184873949581e-06, "loss": 39.6266, "step": 6689 }, { "epoch": 159.2865671641791, "grad_norm": 29.66268539428711, "learning_rate": 9.46078431372549e-06, "loss": 39.5999, "step": 6690 }, { "epoch": 159.31044776119404, "grad_norm": 24.517019271850586, "learning_rate": 9.4593837535014e-06, "loss": 38.8792, "step": 6691 }, { "epoch": 159.33432835820895, "grad_norm": 25.91954231262207, "learning_rate": 9.457983193277311e-06, "loss": 39.5027, "step": 6692 }, { "epoch": 159.3582089552239, "grad_norm": 22.744062423706055, "learning_rate": 9.456582633053222e-06, "loss": 39.2578, "step": 6693 }, { "epoch": 159.3820895522388, "grad_norm": 26.101306915283203, "learning_rate": 9.455182072829132e-06, "loss": 39.1526, "step": 6694 }, { "epoch": 159.40597014925373, "grad_norm": 20.470579147338867, "learning_rate": 9.453781512605043e-06, "loss": 39.2099, "step": 6695 }, { "epoch": 159.42985074626867, "grad_norm": 24.81926155090332, "learning_rate": 9.452380952380952e-06, "loss": 38.8535, "step": 6696 }, { "epoch": 159.45373134328358, "grad_norm": 18.473108291625977, "learning_rate": 9.450980392156863e-06, "loss": 39.2421, "step": 6697 }, { "epoch": 159.47761194029852, "grad_norm": 23.971759796142578, "learning_rate": 9.449579831932773e-06, "loss": 38.4099, "step": 6698 }, { "epoch": 159.50149253731342, "grad_norm": 22.947731018066406, "learning_rate": 9.448179271708684e-06, "loss": 38.4733, "step": 6699 }, { "epoch": 159.52537313432836, "grad_norm": 18.715824127197266, "learning_rate": 9.446778711484595e-06, "loss": 39.0546, "step": 6700 }, { "epoch": 159.54925373134327, "grad_norm": 25.644397735595703, "learning_rate": 9.445378151260505e-06, "loss": 39.4029, "step": 6701 }, { "epoch": 159.5731343283582, "grad_norm": 20.00396728515625, "learning_rate": 9.443977591036416e-06, "loss": 39.7051, "step": 6702 }, { "epoch": 159.59701492537314, "grad_norm": 22.225614547729492, "learning_rate": 9.442577030812325e-06, "loss": 39.8751, "step": 6703 }, { "epoch": 159.62089552238805, "grad_norm": 21.25652313232422, "learning_rate": 9.441176470588235e-06, "loss": 38.6573, "step": 6704 }, { "epoch": 159.644776119403, "grad_norm": 20.495485305786133, "learning_rate": 9.439775910364146e-06, "loss": 37.8811, "step": 6705 }, { "epoch": 159.6686567164179, "grad_norm": 18.924156188964844, "learning_rate": 9.438375350140057e-06, "loss": 39.5924, "step": 6706 }, { "epoch": 159.69253731343284, "grad_norm": 21.530715942382812, "learning_rate": 9.436974789915967e-06, "loss": 38.6536, "step": 6707 }, { "epoch": 159.71641791044777, "grad_norm": 20.47351837158203, "learning_rate": 9.435574229691878e-06, "loss": 38.5164, "step": 6708 }, { "epoch": 159.74029850746268, "grad_norm": 18.203227996826172, "learning_rate": 9.434173669467787e-06, "loss": 39.0054, "step": 6709 }, { "epoch": 159.76417910447762, "grad_norm": 17.892553329467773, "learning_rate": 9.432773109243698e-06, "loss": 38.4729, "step": 6710 }, { "epoch": 159.78805970149253, "grad_norm": 19.664037704467773, "learning_rate": 9.431372549019608e-06, "loss": 38.5362, "step": 6711 }, { "epoch": 159.81194029850747, "grad_norm": 14.706823348999023, "learning_rate": 9.429971988795519e-06, "loss": 38.8772, "step": 6712 }, { "epoch": 159.83582089552237, "grad_norm": 23.517322540283203, "learning_rate": 9.42857142857143e-06, "loss": 38.9856, "step": 6713 }, { "epoch": 159.8597014925373, "grad_norm": 18.907791137695312, "learning_rate": 9.42717086834734e-06, "loss": 39.2286, "step": 6714 }, { "epoch": 159.88358208955225, "grad_norm": 18.358661651611328, "learning_rate": 9.425770308123249e-06, "loss": 39.5321, "step": 6715 }, { "epoch": 159.90746268656716, "grad_norm": 21.41598129272461, "learning_rate": 9.42436974789916e-06, "loss": 38.38, "step": 6716 }, { "epoch": 159.9313432835821, "grad_norm": 20.371999740600586, "learning_rate": 9.42296918767507e-06, "loss": 39.1304, "step": 6717 }, { "epoch": 159.955223880597, "grad_norm": 16.01449966430664, "learning_rate": 9.421568627450981e-06, "loss": 39.2435, "step": 6718 }, { "epoch": 159.97910447761194, "grad_norm": 15.86768627166748, "learning_rate": 9.420168067226892e-06, "loss": 40.169, "step": 6719 }, { "epoch": 160.0, "grad_norm": 17.439725875854492, "learning_rate": 9.418767507002802e-06, "loss": 34.4427, "step": 6720 }, { "epoch": 160.02388059701494, "grad_norm": 16.46625518798828, "learning_rate": 9.417366946778713e-06, "loss": 39.5417, "step": 6721 }, { "epoch": 160.04776119402985, "grad_norm": 19.329574584960938, "learning_rate": 9.415966386554622e-06, "loss": 38.4405, "step": 6722 }, { "epoch": 160.07164179104478, "grad_norm": 17.68001937866211, "learning_rate": 9.414565826330533e-06, "loss": 38.8865, "step": 6723 }, { "epoch": 160.0955223880597, "grad_norm": 14.42570686340332, "learning_rate": 9.413165266106443e-06, "loss": 39.4139, "step": 6724 }, { "epoch": 160.11940298507463, "grad_norm": 16.53023338317871, "learning_rate": 9.411764705882354e-06, "loss": 39.0396, "step": 6725 }, { "epoch": 160.14328358208957, "grad_norm": 16.63241958618164, "learning_rate": 9.410364145658264e-06, "loss": 38.6942, "step": 6726 }, { "epoch": 160.16716417910447, "grad_norm": 16.808700561523438, "learning_rate": 9.408963585434175e-06, "loss": 39.2502, "step": 6727 }, { "epoch": 160.1910447761194, "grad_norm": 16.61543083190918, "learning_rate": 9.407563025210084e-06, "loss": 39.3648, "step": 6728 }, { "epoch": 160.21492537313432, "grad_norm": 17.370962142944336, "learning_rate": 9.406162464985995e-06, "loss": 39.7099, "step": 6729 }, { "epoch": 160.23880597014926, "grad_norm": 18.192951202392578, "learning_rate": 9.404761904761905e-06, "loss": 38.4803, "step": 6730 }, { "epoch": 160.26268656716417, "grad_norm": 17.29343032836914, "learning_rate": 9.403361344537816e-06, "loss": 38.9634, "step": 6731 }, { "epoch": 160.2865671641791, "grad_norm": 15.428152084350586, "learning_rate": 9.401960784313727e-06, "loss": 37.3442, "step": 6732 }, { "epoch": 160.31044776119404, "grad_norm": 20.36634635925293, "learning_rate": 9.400560224089637e-06, "loss": 37.5906, "step": 6733 }, { "epoch": 160.33432835820895, "grad_norm": 19.532516479492188, "learning_rate": 9.399159663865546e-06, "loss": 39.7177, "step": 6734 }, { "epoch": 160.3582089552239, "grad_norm": 15.812334060668945, "learning_rate": 9.397759103641457e-06, "loss": 39.041, "step": 6735 }, { "epoch": 160.3820895522388, "grad_norm": 22.160499572753906, "learning_rate": 9.396358543417367e-06, "loss": 39.5028, "step": 6736 }, { "epoch": 160.40597014925373, "grad_norm": 18.124677658081055, "learning_rate": 9.394957983193278e-06, "loss": 37.5503, "step": 6737 }, { "epoch": 160.42985074626867, "grad_norm": 17.192428588867188, "learning_rate": 9.393557422969189e-06, "loss": 40.2257, "step": 6738 }, { "epoch": 160.45373134328358, "grad_norm": 19.324045181274414, "learning_rate": 9.3921568627451e-06, "loss": 39.3236, "step": 6739 }, { "epoch": 160.47761194029852, "grad_norm": 14.983993530273438, "learning_rate": 9.390756302521008e-06, "loss": 39.1351, "step": 6740 }, { "epoch": 160.50149253731342, "grad_norm": 21.260456085205078, "learning_rate": 9.389355742296919e-06, "loss": 40.2213, "step": 6741 }, { "epoch": 160.52537313432836, "grad_norm": 19.486419677734375, "learning_rate": 9.38795518207283e-06, "loss": 39.116, "step": 6742 }, { "epoch": 160.54925373134327, "grad_norm": 19.740835189819336, "learning_rate": 9.38655462184874e-06, "loss": 38.3406, "step": 6743 }, { "epoch": 160.5731343283582, "grad_norm": 14.827152252197266, "learning_rate": 9.385154061624651e-06, "loss": 39.4013, "step": 6744 }, { "epoch": 160.59701492537314, "grad_norm": 18.374135971069336, "learning_rate": 9.383753501400561e-06, "loss": 39.256, "step": 6745 }, { "epoch": 160.62089552238805, "grad_norm": 14.761893272399902, "learning_rate": 9.382352941176472e-06, "loss": 38.3464, "step": 6746 }, { "epoch": 160.644776119403, "grad_norm": 23.56832504272461, "learning_rate": 9.380952380952381e-06, "loss": 38.6375, "step": 6747 }, { "epoch": 160.6686567164179, "grad_norm": 19.972110748291016, "learning_rate": 9.379551820728292e-06, "loss": 39.4843, "step": 6748 }, { "epoch": 160.69253731343284, "grad_norm": 15.896003723144531, "learning_rate": 9.378151260504202e-06, "loss": 38.3614, "step": 6749 }, { "epoch": 160.71641791044777, "grad_norm": 22.914880752563477, "learning_rate": 9.376750700280113e-06, "loss": 40.2098, "step": 6750 }, { "epoch": 160.74029850746268, "grad_norm": 14.334890365600586, "learning_rate": 9.375350140056024e-06, "loss": 39.3033, "step": 6751 }, { "epoch": 160.76417910447762, "grad_norm": 20.54424476623535, "learning_rate": 9.373949579831934e-06, "loss": 39.6488, "step": 6752 }, { "epoch": 160.78805970149253, "grad_norm": 23.600900650024414, "learning_rate": 9.372549019607843e-06, "loss": 36.9016, "step": 6753 }, { "epoch": 160.81194029850747, "grad_norm": 15.01113510131836, "learning_rate": 9.371148459383754e-06, "loss": 38.4658, "step": 6754 }, { "epoch": 160.83582089552237, "grad_norm": 18.84202766418457, "learning_rate": 9.369747899159664e-06, "loss": 38.6521, "step": 6755 }, { "epoch": 160.8597014925373, "grad_norm": 15.818922996520996, "learning_rate": 9.368347338935575e-06, "loss": 40.3014, "step": 6756 }, { "epoch": 160.88358208955225, "grad_norm": 19.53926658630371, "learning_rate": 9.366946778711486e-06, "loss": 39.0312, "step": 6757 }, { "epoch": 160.90746268656716, "grad_norm": 16.320993423461914, "learning_rate": 9.365546218487396e-06, "loss": 38.1269, "step": 6758 }, { "epoch": 160.9313432835821, "grad_norm": 24.081876754760742, "learning_rate": 9.364145658263305e-06, "loss": 39.681, "step": 6759 }, { "epoch": 160.955223880597, "grad_norm": 16.770343780517578, "learning_rate": 9.362745098039216e-06, "loss": 38.7858, "step": 6760 }, { "epoch": 160.97910447761194, "grad_norm": 22.08024024963379, "learning_rate": 9.361344537815127e-06, "loss": 38.8286, "step": 6761 }, { "epoch": 161.0, "grad_norm": 17.7487735748291, "learning_rate": 9.359943977591037e-06, "loss": 34.0391, "step": 6762 }, { "epoch": 161.02388059701494, "grad_norm": 18.156070709228516, "learning_rate": 9.358543417366948e-06, "loss": 39.4564, "step": 6763 }, { "epoch": 161.04776119402985, "grad_norm": 20.877561569213867, "learning_rate": 9.357142857142859e-06, "loss": 38.7863, "step": 6764 }, { "epoch": 161.07164179104478, "grad_norm": 19.127397537231445, "learning_rate": 9.355742296918767e-06, "loss": 38.5402, "step": 6765 }, { "epoch": 161.0955223880597, "grad_norm": 21.27589988708496, "learning_rate": 9.354341736694678e-06, "loss": 38.9504, "step": 6766 }, { "epoch": 161.11940298507463, "grad_norm": 19.405920028686523, "learning_rate": 9.352941176470589e-06, "loss": 39.851, "step": 6767 }, { "epoch": 161.14328358208957, "grad_norm": 17.423633575439453, "learning_rate": 9.3515406162465e-06, "loss": 39.0452, "step": 6768 }, { "epoch": 161.16716417910447, "grad_norm": NaN, "learning_rate": 9.35014005602241e-06, "loss": 49.0405, "step": 6769 }, { "epoch": 161.1910447761194, "grad_norm": 17.809812545776367, "learning_rate": 9.35014005602241e-06, "loss": 37.5939, "step": 6770 }, { "epoch": 161.21492537313432, "grad_norm": 16.22609519958496, "learning_rate": 9.34873949579832e-06, "loss": 39.4706, "step": 6771 }, { "epoch": 161.23880597014926, "grad_norm": 21.230531692504883, "learning_rate": 9.347338935574231e-06, "loss": 39.2319, "step": 6772 }, { "epoch": 161.26268656716417, "grad_norm": 18.550539016723633, "learning_rate": 9.34593837535014e-06, "loss": 39.4455, "step": 6773 }, { "epoch": 161.2865671641791, "grad_norm": 18.481239318847656, "learning_rate": 9.344537815126051e-06, "loss": 40.0004, "step": 6774 }, { "epoch": 161.31044776119404, "grad_norm": 23.4416446685791, "learning_rate": 9.343137254901962e-06, "loss": 37.5972, "step": 6775 }, { "epoch": 161.33432835820895, "grad_norm": 24.323545455932617, "learning_rate": 9.341736694677872e-06, "loss": 37.308, "step": 6776 }, { "epoch": 161.3582089552239, "grad_norm": 15.038119316101074, "learning_rate": 9.340336134453783e-06, "loss": 39.032, "step": 6777 }, { "epoch": 161.3820895522388, "grad_norm": 26.29281234741211, "learning_rate": 9.338935574229693e-06, "loss": 39.4552, "step": 6778 }, { "epoch": 161.40597014925373, "grad_norm": 17.980562210083008, "learning_rate": 9.337535014005602e-06, "loss": 38.6003, "step": 6779 }, { "epoch": 161.42985074626867, "grad_norm": 23.86687660217285, "learning_rate": 9.336134453781513e-06, "loss": 38.2432, "step": 6780 }, { "epoch": 161.45373134328358, "grad_norm": 21.71341896057129, "learning_rate": 9.334733893557424e-06, "loss": 39.3572, "step": 6781 }, { "epoch": 161.47761194029852, "grad_norm": 22.293689727783203, "learning_rate": 9.333333333333334e-06, "loss": 39.5909, "step": 6782 }, { "epoch": 161.50149253731342, "grad_norm": 22.266357421875, "learning_rate": 9.331932773109245e-06, "loss": 39.4302, "step": 6783 }, { "epoch": 161.52537313432836, "grad_norm": 18.248830795288086, "learning_rate": 9.330532212885156e-06, "loss": 38.9019, "step": 6784 }, { "epoch": 161.54925373134327, "grad_norm": 16.384206771850586, "learning_rate": 9.329131652661065e-06, "loss": 38.9382, "step": 6785 }, { "epoch": 161.5731343283582, "grad_norm": 20.268388748168945, "learning_rate": 9.327731092436975e-06, "loss": 38.7883, "step": 6786 }, { "epoch": 161.59701492537314, "grad_norm": 22.8583927154541, "learning_rate": 9.326330532212886e-06, "loss": 37.8416, "step": 6787 }, { "epoch": 161.62089552238805, "grad_norm": 18.193201065063477, "learning_rate": 9.324929971988796e-06, "loss": 38.4221, "step": 6788 }, { "epoch": 161.644776119403, "grad_norm": 26.44017791748047, "learning_rate": 9.323529411764707e-06, "loss": 37.5674, "step": 6789 }, { "epoch": 161.6686567164179, "grad_norm": 21.0838623046875, "learning_rate": 9.322128851540618e-06, "loss": 39.1413, "step": 6790 }, { "epoch": 161.69253731343284, "grad_norm": 17.49184799194336, "learning_rate": 9.320728291316528e-06, "loss": 39.0681, "step": 6791 }, { "epoch": 161.71641791044777, "grad_norm": 26.257192611694336, "learning_rate": 9.319327731092437e-06, "loss": 39.0174, "step": 6792 }, { "epoch": 161.74029850746268, "grad_norm": 14.942763328552246, "learning_rate": 9.317927170868348e-06, "loss": 39.5799, "step": 6793 }, { "epoch": 161.76417910447762, "grad_norm": 35.21318054199219, "learning_rate": 9.316526610644259e-06, "loss": 38.9654, "step": 6794 }, { "epoch": 161.78805970149253, "grad_norm": 26.097381591796875, "learning_rate": 9.31512605042017e-06, "loss": 39.5492, "step": 6795 }, { "epoch": 161.81194029850747, "grad_norm": 28.224538803100586, "learning_rate": 9.31372549019608e-06, "loss": 38.7373, "step": 6796 }, { "epoch": 161.83582089552237, "grad_norm": 21.44225311279297, "learning_rate": 9.31232492997199e-06, "loss": 38.3801, "step": 6797 }, { "epoch": 161.8597014925373, "grad_norm": 26.673877716064453, "learning_rate": 9.3109243697479e-06, "loss": 39.4883, "step": 6798 }, { "epoch": 161.88358208955225, "grad_norm": 21.419830322265625, "learning_rate": 9.30952380952381e-06, "loss": 38.2996, "step": 6799 }, { "epoch": 161.90746268656716, "grad_norm": 21.3859806060791, "learning_rate": 9.30812324929972e-06, "loss": 39.6105, "step": 6800 }, { "epoch": 161.9313432835821, "grad_norm": 24.542308807373047, "learning_rate": 9.306722689075631e-06, "loss": 40.6519, "step": 6801 }, { "epoch": 161.955223880597, "grad_norm": 20.758468627929688, "learning_rate": 9.305322128851542e-06, "loss": 38.8648, "step": 6802 }, { "epoch": 161.97910447761194, "grad_norm": 25.230525970458984, "learning_rate": 9.303921568627453e-06, "loss": 38.2779, "step": 6803 }, { "epoch": 162.0, "grad_norm": 17.33908462524414, "learning_rate": 9.302521008403362e-06, "loss": 34.062, "step": 6804 }, { "epoch": 162.02388059701494, "grad_norm": 21.04616355895996, "learning_rate": 9.301120448179272e-06, "loss": 37.3716, "step": 6805 }, { "epoch": 162.04776119402985, "grad_norm": 20.279638290405273, "learning_rate": 9.299719887955183e-06, "loss": 37.8552, "step": 6806 }, { "epoch": 162.07164179104478, "grad_norm": 21.690444946289062, "learning_rate": 9.298319327731094e-06, "loss": 38.919, "step": 6807 }, { "epoch": 162.0955223880597, "grad_norm": 19.564537048339844, "learning_rate": 9.296918767507004e-06, "loss": 38.8517, "step": 6808 }, { "epoch": 162.11940298507463, "grad_norm": 20.17282485961914, "learning_rate": 9.295518207282915e-06, "loss": 38.5925, "step": 6809 }, { "epoch": 162.14328358208957, "grad_norm": 17.191225051879883, "learning_rate": 9.294117647058824e-06, "loss": 40.0369, "step": 6810 }, { "epoch": 162.16716417910447, "grad_norm": 15.69149112701416, "learning_rate": 9.292717086834734e-06, "loss": 38.9256, "step": 6811 }, { "epoch": 162.1910447761194, "grad_norm": 19.860177993774414, "learning_rate": 9.291316526610645e-06, "loss": 38.4066, "step": 6812 }, { "epoch": 162.21492537313432, "grad_norm": 20.10934066772461, "learning_rate": 9.289915966386556e-06, "loss": 39.9674, "step": 6813 }, { "epoch": 162.23880597014926, "grad_norm": 15.468286514282227, "learning_rate": 9.288515406162466e-06, "loss": 38.8206, "step": 6814 }, { "epoch": 162.26268656716417, "grad_norm": 19.017433166503906, "learning_rate": 9.287114845938377e-06, "loss": 38.5092, "step": 6815 }, { "epoch": 162.2865671641791, "grad_norm": 17.840295791625977, "learning_rate": 9.285714285714288e-06, "loss": 38.9008, "step": 6816 }, { "epoch": 162.31044776119404, "grad_norm": 18.01124382019043, "learning_rate": 9.284313725490197e-06, "loss": 37.5621, "step": 6817 }, { "epoch": 162.33432835820895, "grad_norm": 15.1130952835083, "learning_rate": 9.282913165266107e-06, "loss": 38.2213, "step": 6818 }, { "epoch": 162.3582089552239, "grad_norm": 13.752197265625, "learning_rate": 9.281512605042018e-06, "loss": 38.9823, "step": 6819 }, { "epoch": 162.3820895522388, "grad_norm": 17.562929153442383, "learning_rate": 9.280112044817928e-06, "loss": 39.5778, "step": 6820 }, { "epoch": 162.40597014925373, "grad_norm": 17.960500717163086, "learning_rate": 9.278711484593839e-06, "loss": 38.6016, "step": 6821 }, { "epoch": 162.42985074626867, "grad_norm": 18.7200984954834, "learning_rate": 9.27731092436975e-06, "loss": 38.9038, "step": 6822 }, { "epoch": 162.45373134328358, "grad_norm": 13.515174865722656, "learning_rate": 9.275910364145659e-06, "loss": 38.9758, "step": 6823 }, { "epoch": 162.47761194029852, "grad_norm": 18.944744110107422, "learning_rate": 9.27450980392157e-06, "loss": 38.7721, "step": 6824 }, { "epoch": 162.50149253731342, "grad_norm": 21.34316062927246, "learning_rate": 9.27310924369748e-06, "loss": 38.7627, "step": 6825 }, { "epoch": 162.52537313432836, "grad_norm": 15.169504165649414, "learning_rate": 9.27170868347339e-06, "loss": 39.6574, "step": 6826 }, { "epoch": 162.54925373134327, "grad_norm": 15.559427261352539, "learning_rate": 9.270308123249301e-06, "loss": 38.9596, "step": 6827 }, { "epoch": 162.5731343283582, "grad_norm": 14.636393547058105, "learning_rate": 9.268907563025212e-06, "loss": 37.9376, "step": 6828 }, { "epoch": 162.59701492537314, "grad_norm": 19.879676818847656, "learning_rate": 9.26750700280112e-06, "loss": 38.8639, "step": 6829 }, { "epoch": 162.62089552238805, "grad_norm": 22.679649353027344, "learning_rate": 9.266106442577031e-06, "loss": 39.7803, "step": 6830 }, { "epoch": 162.644776119403, "grad_norm": 14.357563972473145, "learning_rate": 9.264705882352942e-06, "loss": 39.135, "step": 6831 }, { "epoch": 162.6686567164179, "grad_norm": 31.32781219482422, "learning_rate": 9.263305322128853e-06, "loss": 39.2126, "step": 6832 }, { "epoch": 162.69253731343284, "grad_norm": 23.357858657836914, "learning_rate": 9.261904761904763e-06, "loss": 39.1176, "step": 6833 }, { "epoch": 162.71641791044777, "grad_norm": 23.808473587036133, "learning_rate": 9.260504201680674e-06, "loss": 39.5406, "step": 6834 }, { "epoch": 162.74029850746268, "grad_norm": 32.2010498046875, "learning_rate": 9.259103641456585e-06, "loss": 39.0553, "step": 6835 }, { "epoch": 162.76417910447762, "grad_norm": 22.90511703491211, "learning_rate": 9.257703081232494e-06, "loss": 40.5556, "step": 6836 }, { "epoch": 162.78805970149253, "grad_norm": 35.206451416015625, "learning_rate": 9.256302521008404e-06, "loss": 38.774, "step": 6837 }, { "epoch": 162.81194029850747, "grad_norm": 27.09161949157715, "learning_rate": 9.254901960784315e-06, "loss": 39.1619, "step": 6838 }, { "epoch": 162.83582089552237, "grad_norm": 36.54391860961914, "learning_rate": 9.253501400560225e-06, "loss": 38.2451, "step": 6839 }, { "epoch": 162.8597014925373, "grad_norm": 28.31791877746582, "learning_rate": 9.252100840336136e-06, "loss": 39.4128, "step": 6840 }, { "epoch": 162.88358208955225, "grad_norm": 32.99971389770508, "learning_rate": 9.250700280112047e-06, "loss": 38.2235, "step": 6841 }, { "epoch": 162.90746268656716, "grad_norm": 32.75461959838867, "learning_rate": 9.249299719887956e-06, "loss": 38.1497, "step": 6842 }, { "epoch": 162.9313432835821, "grad_norm": 29.004566192626953, "learning_rate": 9.247899159663866e-06, "loss": 39.3836, "step": 6843 }, { "epoch": 162.955223880597, "grad_norm": 25.05014991760254, "learning_rate": 9.246498599439777e-06, "loss": 37.0636, "step": 6844 }, { "epoch": 162.97910447761194, "grad_norm": 31.873991012573242, "learning_rate": 9.245098039215688e-06, "loss": 39.1655, "step": 6845 }, { "epoch": 163.0, "grad_norm": 23.277997970581055, "learning_rate": 9.243697478991598e-06, "loss": 34.8144, "step": 6846 }, { "epoch": 163.02388059701494, "grad_norm": 36.877803802490234, "learning_rate": 9.242296918767509e-06, "loss": 39.8112, "step": 6847 }, { "epoch": 163.04776119402985, "grad_norm": 32.6607551574707, "learning_rate": 9.240896358543418e-06, "loss": 40.7393, "step": 6848 }, { "epoch": 163.07164179104478, "grad_norm": 31.25403594970703, "learning_rate": 9.239495798319328e-06, "loss": 38.3637, "step": 6849 }, { "epoch": 163.0955223880597, "grad_norm": 28.465002059936523, "learning_rate": 9.238095238095239e-06, "loss": 38.7156, "step": 6850 }, { "epoch": 163.11940298507463, "grad_norm": 26.194684982299805, "learning_rate": 9.23669467787115e-06, "loss": 38.4498, "step": 6851 }, { "epoch": 163.14328358208957, "grad_norm": 27.579038619995117, "learning_rate": 9.23529411764706e-06, "loss": 37.5274, "step": 6852 }, { "epoch": 163.16716417910447, "grad_norm": 29.72901725769043, "learning_rate": 9.233893557422971e-06, "loss": 38.1516, "step": 6853 }, { "epoch": 163.1910447761194, "grad_norm": 22.37741470336914, "learning_rate": 9.23249299719888e-06, "loss": 38.5496, "step": 6854 }, { "epoch": 163.21492537313432, "grad_norm": 34.037353515625, "learning_rate": 9.23109243697479e-06, "loss": 40.6135, "step": 6855 }, { "epoch": 163.23880597014926, "grad_norm": 27.039569854736328, "learning_rate": 9.229691876750701e-06, "loss": 38.4322, "step": 6856 }, { "epoch": 163.26268656716417, "grad_norm": 34.39824676513672, "learning_rate": 9.228291316526612e-06, "loss": 37.4896, "step": 6857 }, { "epoch": 163.2865671641791, "grad_norm": 33.6617546081543, "learning_rate": 9.226890756302523e-06, "loss": 39.0515, "step": 6858 }, { "epoch": 163.31044776119404, "grad_norm": 29.275096893310547, "learning_rate": 9.225490196078433e-06, "loss": 38.9624, "step": 6859 }, { "epoch": 163.33432835820895, "grad_norm": 28.370113372802734, "learning_rate": 9.224089635854344e-06, "loss": 39.8506, "step": 6860 }, { "epoch": 163.3582089552239, "grad_norm": 27.253931045532227, "learning_rate": 9.222689075630253e-06, "loss": 38.5453, "step": 6861 }, { "epoch": 163.3820895522388, "grad_norm": 23.24662971496582, "learning_rate": 9.221288515406163e-06, "loss": 38.912, "step": 6862 }, { "epoch": 163.40597014925373, "grad_norm": 32.55588912963867, "learning_rate": 9.219887955182074e-06, "loss": 38.7099, "step": 6863 }, { "epoch": 163.42985074626867, "grad_norm": 27.186988830566406, "learning_rate": 9.218487394957983e-06, "loss": 39.041, "step": 6864 }, { "epoch": 163.45373134328358, "grad_norm": 34.195472717285156, "learning_rate": 9.217086834733894e-06, "loss": 39.3666, "step": 6865 }, { "epoch": 163.47761194029852, "grad_norm": 31.793378829956055, "learning_rate": 9.215686274509804e-06, "loss": 39.8421, "step": 6866 }, { "epoch": 163.50149253731342, "grad_norm": 30.88374137878418, "learning_rate": 9.214285714285715e-06, "loss": 38.6998, "step": 6867 }, { "epoch": 163.52537313432836, "grad_norm": 26.248395919799805, "learning_rate": 9.212885154061626e-06, "loss": 38.059, "step": 6868 }, { "epoch": 163.54925373134327, "grad_norm": 32.238731384277344, "learning_rate": 9.211484593837534e-06, "loss": 38.0208, "step": 6869 }, { "epoch": 163.5731343283582, "grad_norm": 25.677013397216797, "learning_rate": 9.210084033613445e-06, "loss": 37.9852, "step": 6870 }, { "epoch": 163.59701492537314, "grad_norm": 28.29668617248535, "learning_rate": 9.208683473389356e-06, "loss": 40.2626, "step": 6871 }, { "epoch": 163.62089552238805, "grad_norm": 24.860496520996094, "learning_rate": 9.207282913165266e-06, "loss": 37.8531, "step": 6872 }, { "epoch": 163.644776119403, "grad_norm": 32.82472610473633, "learning_rate": 9.205882352941177e-06, "loss": 38.6631, "step": 6873 }, { "epoch": 163.6686567164179, "grad_norm": 27.975828170776367, "learning_rate": 9.204481792717088e-06, "loss": 39.5384, "step": 6874 }, { "epoch": 163.69253731343284, "grad_norm": 30.201351165771484, "learning_rate": 9.203081232492998e-06, "loss": 38.123, "step": 6875 }, { "epoch": 163.71641791044777, "grad_norm": 32.39480972290039, "learning_rate": 9.201680672268907e-06, "loss": 39.7359, "step": 6876 }, { "epoch": 163.74029850746268, "grad_norm": 27.57598304748535, "learning_rate": 9.200280112044818e-06, "loss": 38.0357, "step": 6877 }, { "epoch": 163.76417910447762, "grad_norm": NaN, "learning_rate": 9.198879551820729e-06, "loss": 38.8083, "step": 6878 }, { "epoch": 163.78805970149253, "grad_norm": 22.73164176940918, "learning_rate": 9.198879551820729e-06, "loss": 39.3746, "step": 6879 }, { "epoch": 163.81194029850747, "grad_norm": 32.470794677734375, "learning_rate": 9.19747899159664e-06, "loss": 38.4076, "step": 6880 }, { "epoch": 163.83582089552237, "grad_norm": 26.334978103637695, "learning_rate": 9.19607843137255e-06, "loss": 38.1445, "step": 6881 }, { "epoch": 163.8597014925373, "grad_norm": 32.576622009277344, "learning_rate": 9.19467787114846e-06, "loss": 38.0056, "step": 6882 }, { "epoch": 163.88358208955225, "grad_norm": 29.02097511291504, "learning_rate": 9.19327731092437e-06, "loss": 40.2424, "step": 6883 }, { "epoch": 163.90746268656716, "grad_norm": 29.78497886657715, "learning_rate": 9.19187675070028e-06, "loss": 37.9755, "step": 6884 }, { "epoch": 163.9313432835821, "grad_norm": 28.278854370117188, "learning_rate": 9.19047619047619e-06, "loss": 38.892, "step": 6885 }, { "epoch": 163.955223880597, "grad_norm": 28.68059730529785, "learning_rate": 9.189075630252101e-06, "loss": 37.8199, "step": 6886 }, { "epoch": 163.97910447761194, "grad_norm": 24.857322692871094, "learning_rate": 9.187675070028012e-06, "loss": 39.6917, "step": 6887 }, { "epoch": 164.0, "grad_norm": 27.12505340576172, "learning_rate": 9.186274509803923e-06, "loss": 34.1164, "step": 6888 }, { "epoch": 164.02388059701494, "grad_norm": 26.02627944946289, "learning_rate": 9.184873949579832e-06, "loss": 38.3949, "step": 6889 }, { "epoch": 164.04776119402985, "grad_norm": 33.95978546142578, "learning_rate": 9.183473389355742e-06, "loss": 39.4691, "step": 6890 }, { "epoch": 164.07164179104478, "grad_norm": 27.781116485595703, "learning_rate": 9.182072829131653e-06, "loss": 37.6125, "step": 6891 }, { "epoch": 164.0955223880597, "grad_norm": 27.846071243286133, "learning_rate": 9.180672268907563e-06, "loss": 37.5973, "step": 6892 }, { "epoch": 164.11940298507463, "grad_norm": 23.98561668395996, "learning_rate": 9.179271708683474e-06, "loss": 38.6246, "step": 6893 }, { "epoch": 164.14328358208957, "grad_norm": 28.748355865478516, "learning_rate": 9.177871148459385e-06, "loss": 39.1328, "step": 6894 }, { "epoch": 164.16716417910447, "grad_norm": 22.823392868041992, "learning_rate": 9.176470588235294e-06, "loss": 38.8228, "step": 6895 }, { "epoch": 164.1910447761194, "grad_norm": 31.56593894958496, "learning_rate": 9.175070028011204e-06, "loss": 39.3094, "step": 6896 }, { "epoch": 164.21492537313432, "grad_norm": 27.536691665649414, "learning_rate": 9.173669467787115e-06, "loss": 38.8276, "step": 6897 }, { "epoch": 164.23880597014926, "grad_norm": 28.798383712768555, "learning_rate": 9.172268907563026e-06, "loss": 37.6737, "step": 6898 }, { "epoch": 164.26268656716417, "grad_norm": 26.151948928833008, "learning_rate": 9.170868347338936e-06, "loss": 38.0184, "step": 6899 }, { "epoch": 164.2865671641791, "grad_norm": 32.22966003417969, "learning_rate": 9.169467787114847e-06, "loss": 39.1828, "step": 6900 }, { "epoch": 164.31044776119404, "grad_norm": 25.881643295288086, "learning_rate": 9.168067226890757e-06, "loss": 37.777, "step": 6901 }, { "epoch": 164.33432835820895, "grad_norm": 29.259702682495117, "learning_rate": 9.166666666666666e-06, "loss": 37.7434, "step": 6902 }, { "epoch": 164.3582089552239, "grad_norm": 24.89307403564453, "learning_rate": 9.165266106442577e-06, "loss": 37.8326, "step": 6903 }, { "epoch": 164.3820895522388, "grad_norm": 27.22188377380371, "learning_rate": 9.163865546218488e-06, "loss": 39.1603, "step": 6904 }, { "epoch": 164.40597014925373, "grad_norm": 23.618114471435547, "learning_rate": 9.162464985994398e-06, "loss": 38.3846, "step": 6905 }, { "epoch": 164.42985074626867, "grad_norm": 32.07040786743164, "learning_rate": 9.161064425770309e-06, "loss": 39.0751, "step": 6906 }, { "epoch": 164.45373134328358, "grad_norm": 29.454524993896484, "learning_rate": 9.15966386554622e-06, "loss": 38.2031, "step": 6907 }, { "epoch": 164.47761194029852, "grad_norm": 23.247859954833984, "learning_rate": 9.158263305322129e-06, "loss": 39.196, "step": 6908 }, { "epoch": 164.50149253731342, "grad_norm": 24.405054092407227, "learning_rate": 9.15686274509804e-06, "loss": 39.4669, "step": 6909 }, { "epoch": 164.52537313432836, "grad_norm": 24.326190948486328, "learning_rate": 9.15546218487395e-06, "loss": 39.9453, "step": 6910 }, { "epoch": 164.54925373134327, "grad_norm": 23.48666000366211, "learning_rate": 9.15406162464986e-06, "loss": 37.9608, "step": 6911 }, { "epoch": 164.5731343283582, "grad_norm": 28.095449447631836, "learning_rate": 9.152661064425771e-06, "loss": 38.9568, "step": 6912 }, { "epoch": 164.59701492537314, "grad_norm": 24.124006271362305, "learning_rate": 9.151260504201682e-06, "loss": 40.1764, "step": 6913 }, { "epoch": 164.62089552238805, "grad_norm": 27.309589385986328, "learning_rate": 9.14985994397759e-06, "loss": 40.3751, "step": 6914 }, { "epoch": 164.644776119403, "grad_norm": 22.699621200561523, "learning_rate": 9.148459383753501e-06, "loss": 39.709, "step": 6915 }, { "epoch": 164.6686567164179, "grad_norm": 28.5625, "learning_rate": 9.147058823529412e-06, "loss": 38.6687, "step": 6916 }, { "epoch": 164.69253731343284, "grad_norm": 24.676198959350586, "learning_rate": 9.145658263305323e-06, "loss": 38.8227, "step": 6917 }, { "epoch": 164.71641791044777, "grad_norm": 25.53683853149414, "learning_rate": 9.144257703081233e-06, "loss": 38.3309, "step": 6918 }, { "epoch": 164.74029850746268, "grad_norm": 21.91119384765625, "learning_rate": 9.142857142857144e-06, "loss": 38.1358, "step": 6919 }, { "epoch": 164.76417910447762, "grad_norm": 23.987483978271484, "learning_rate": 9.141456582633055e-06, "loss": 39.3783, "step": 6920 }, { "epoch": 164.78805970149253, "grad_norm": 21.90296173095703, "learning_rate": 9.140056022408963e-06, "loss": 38.4104, "step": 6921 }, { "epoch": 164.81194029850747, "grad_norm": 23.88947868347168, "learning_rate": 9.138655462184874e-06, "loss": 40.9021, "step": 6922 }, { "epoch": 164.83582089552237, "grad_norm": 19.73707389831543, "learning_rate": 9.137254901960785e-06, "loss": 39.4567, "step": 6923 }, { "epoch": 164.8597014925373, "grad_norm": 21.8206787109375, "learning_rate": 9.135854341736695e-06, "loss": 38.7045, "step": 6924 }, { "epoch": 164.88358208955225, "grad_norm": 20.918621063232422, "learning_rate": 9.134453781512606e-06, "loss": 39.1697, "step": 6925 }, { "epoch": 164.90746268656716, "grad_norm": 21.001279830932617, "learning_rate": 9.133053221288517e-06, "loss": 38.2225, "step": 6926 }, { "epoch": 164.9313432835821, "grad_norm": 16.66687774658203, "learning_rate": 9.131652661064426e-06, "loss": 38.0187, "step": 6927 }, { "epoch": 164.955223880597, "grad_norm": 20.349098205566406, "learning_rate": 9.130252100840336e-06, "loss": 38.9978, "step": 6928 }, { "epoch": 164.97910447761194, "grad_norm": 17.241241455078125, "learning_rate": 9.128851540616247e-06, "loss": 38.5722, "step": 6929 }, { "epoch": 165.0, "grad_norm": 20.30120086669922, "learning_rate": 9.127450980392158e-06, "loss": 33.074, "step": 6930 }, { "epoch": 165.02388059701494, "grad_norm": 20.583757400512695, "learning_rate": 9.126050420168068e-06, "loss": 38.28, "step": 6931 }, { "epoch": 165.04776119402985, "grad_norm": 17.925338745117188, "learning_rate": 9.124649859943979e-06, "loss": 39.5699, "step": 6932 }, { "epoch": 165.07164179104478, "grad_norm": 20.660139083862305, "learning_rate": 9.123249299719888e-06, "loss": 39.1526, "step": 6933 }, { "epoch": 165.0955223880597, "grad_norm": 20.247779846191406, "learning_rate": 9.121848739495798e-06, "loss": 38.3042, "step": 6934 }, { "epoch": 165.11940298507463, "grad_norm": 22.91403579711914, "learning_rate": 9.120448179271709e-06, "loss": 38.7246, "step": 6935 }, { "epoch": 165.14328358208957, "grad_norm": 18.240346908569336, "learning_rate": 9.11904761904762e-06, "loss": 40.6094, "step": 6936 }, { "epoch": 165.16716417910447, "grad_norm": 19.83465576171875, "learning_rate": 9.11764705882353e-06, "loss": 39.5535, "step": 6937 }, { "epoch": 165.1910447761194, "grad_norm": 17.511629104614258, "learning_rate": 9.116246498599441e-06, "loss": 39.8103, "step": 6938 }, { "epoch": 165.21492537313432, "grad_norm": 16.819786071777344, "learning_rate": 9.11484593837535e-06, "loss": 37.9015, "step": 6939 }, { "epoch": 165.23880597014926, "grad_norm": 19.494901657104492, "learning_rate": 9.11344537815126e-06, "loss": 37.4988, "step": 6940 }, { "epoch": 165.26268656716417, "grad_norm": 20.577810287475586, "learning_rate": 9.112044817927171e-06, "loss": 38.5511, "step": 6941 }, { "epoch": 165.2865671641791, "grad_norm": 17.430700302124023, "learning_rate": 9.110644257703082e-06, "loss": 39.5467, "step": 6942 }, { "epoch": 165.31044776119404, "grad_norm": 21.67786407470703, "learning_rate": 9.109243697478992e-06, "loss": 37.5904, "step": 6943 }, { "epoch": 165.33432835820895, "grad_norm": 24.708356857299805, "learning_rate": 9.107843137254903e-06, "loss": 38.7451, "step": 6944 }, { "epoch": 165.3582089552239, "grad_norm": 16.916446685791016, "learning_rate": 9.106442577030814e-06, "loss": 37.8958, "step": 6945 }, { "epoch": 165.3820895522388, "grad_norm": 23.504207611083984, "learning_rate": 9.105042016806723e-06, "loss": 39.1934, "step": 6946 }, { "epoch": 165.40597014925373, "grad_norm": 18.756376266479492, "learning_rate": 9.103641456582633e-06, "loss": 36.8792, "step": 6947 }, { "epoch": 165.42985074626867, "grad_norm": 18.65342140197754, "learning_rate": 9.102240896358544e-06, "loss": 37.893, "step": 6948 }, { "epoch": 165.45373134328358, "grad_norm": 19.376798629760742, "learning_rate": 9.100840336134455e-06, "loss": 39.4041, "step": 6949 }, { "epoch": 165.47761194029852, "grad_norm": 14.387222290039062, "learning_rate": 9.099439775910365e-06, "loss": 38.3171, "step": 6950 }, { "epoch": 165.50149253731342, "grad_norm": 22.488039016723633, "learning_rate": 9.098039215686276e-06, "loss": 37.5534, "step": 6951 }, { "epoch": 165.52537313432836, "grad_norm": 16.533842086791992, "learning_rate": 9.096638655462185e-06, "loss": 39.5571, "step": 6952 }, { "epoch": 165.54925373134327, "grad_norm": 21.6029052734375, "learning_rate": 9.095238095238095e-06, "loss": 39.4568, "step": 6953 }, { "epoch": 165.5731343283582, "grad_norm": 23.718917846679688, "learning_rate": 9.093837535014006e-06, "loss": 38.8227, "step": 6954 }, { "epoch": 165.59701492537314, "grad_norm": 15.316898345947266, "learning_rate": 9.092436974789917e-06, "loss": 38.7754, "step": 6955 }, { "epoch": 165.62089552238805, "grad_norm": 21.905000686645508, "learning_rate": 9.091036414565827e-06, "loss": 39.0251, "step": 6956 }, { "epoch": 165.644776119403, "grad_norm": 25.428035736083984, "learning_rate": 9.089635854341738e-06, "loss": 38.984, "step": 6957 }, { "epoch": 165.6686567164179, "grad_norm": 15.639355659484863, "learning_rate": 9.088235294117647e-06, "loss": 38.9392, "step": 6958 }, { "epoch": 165.69253731343284, "grad_norm": 30.744590759277344, "learning_rate": 9.086834733893558e-06, "loss": 38.2226, "step": 6959 }, { "epoch": 165.71641791044777, "grad_norm": 23.297607421875, "learning_rate": 9.085434173669468e-06, "loss": 38.6842, "step": 6960 }, { "epoch": 165.74029850746268, "grad_norm": 27.761877059936523, "learning_rate": 9.084033613445379e-06, "loss": 38.7044, "step": 6961 }, { "epoch": 165.76417910447762, "grad_norm": 20.486059188842773, "learning_rate": 9.08263305322129e-06, "loss": 38.9376, "step": 6962 }, { "epoch": 165.78805970149253, "grad_norm": 25.5533504486084, "learning_rate": 9.0812324929972e-06, "loss": 38.9618, "step": 6963 }, { "epoch": 165.81194029850747, "grad_norm": 19.12059783935547, "learning_rate": 9.07983193277311e-06, "loss": 38.6444, "step": 6964 }, { "epoch": 165.83582089552237, "grad_norm": 20.324451446533203, "learning_rate": 9.07843137254902e-06, "loss": 38.7041, "step": 6965 }, { "epoch": 165.8597014925373, "grad_norm": 19.719558715820312, "learning_rate": 9.07703081232493e-06, "loss": 39.5301, "step": 6966 }, { "epoch": 165.88358208955225, "grad_norm": 18.664348602294922, "learning_rate": 9.075630252100841e-06, "loss": 38.6139, "step": 6967 }, { "epoch": 165.90746268656716, "grad_norm": 22.657773971557617, "learning_rate": 9.074229691876752e-06, "loss": 39.0989, "step": 6968 }, { "epoch": 165.9313432835821, "grad_norm": 16.08565902709961, "learning_rate": 9.072829131652662e-06, "loss": 37.5231, "step": 6969 }, { "epoch": 165.955223880597, "grad_norm": 20.269485473632812, "learning_rate": 9.071428571428573e-06, "loss": 39.146, "step": 6970 }, { "epoch": 165.97910447761194, "grad_norm": 19.0544376373291, "learning_rate": 9.070028011204482e-06, "loss": 38.0774, "step": 6971 }, { "epoch": 166.0, "grad_norm": 17.171775817871094, "learning_rate": 9.068627450980392e-06, "loss": 35.643, "step": 6972 }, { "epoch": 166.02388059701494, "grad_norm": 16.68134307861328, "learning_rate": 9.067226890756303e-06, "loss": 38.4436, "step": 6973 }, { "epoch": 166.04776119402985, "grad_norm": 20.85126495361328, "learning_rate": 9.065826330532214e-06, "loss": 39.3827, "step": 6974 }, { "epoch": 166.07164179104478, "grad_norm": 16.558414459228516, "learning_rate": 9.064425770308124e-06, "loss": 39.2912, "step": 6975 }, { "epoch": 166.0955223880597, "grad_norm": 16.860647201538086, "learning_rate": 9.063025210084035e-06, "loss": 38.5577, "step": 6976 }, { "epoch": 166.11940298507463, "grad_norm": 18.252050399780273, "learning_rate": 9.061624649859944e-06, "loss": 38.5392, "step": 6977 }, { "epoch": 166.14328358208957, "grad_norm": 15.765396118164062, "learning_rate": 9.060224089635855e-06, "loss": 39.5898, "step": 6978 }, { "epoch": 166.16716417910447, "grad_norm": 18.337221145629883, "learning_rate": 9.058823529411765e-06, "loss": 37.3266, "step": 6979 }, { "epoch": 166.1910447761194, "grad_norm": 17.254228591918945, "learning_rate": 9.057422969187676e-06, "loss": 38.7531, "step": 6980 }, { "epoch": 166.21492537313432, "grad_norm": 15.587992668151855, "learning_rate": 9.056022408963587e-06, "loss": 39.1706, "step": 6981 }, { "epoch": 166.23880597014926, "grad_norm": 17.77199935913086, "learning_rate": 9.054621848739497e-06, "loss": 38.2599, "step": 6982 }, { "epoch": 166.26268656716417, "grad_norm": 18.72295570373535, "learning_rate": 9.053221288515406e-06, "loss": 39.0038, "step": 6983 }, { "epoch": 166.2865671641791, "grad_norm": 15.716513633728027, "learning_rate": 9.051820728291317e-06, "loss": 38.4302, "step": 6984 }, { "epoch": 166.31044776119404, "grad_norm": 18.48078727722168, "learning_rate": 9.050420168067227e-06, "loss": 38.8014, "step": 6985 }, { "epoch": 166.33432835820895, "grad_norm": 15.823141098022461, "learning_rate": 9.049019607843138e-06, "loss": 38.6901, "step": 6986 }, { "epoch": 166.3582089552239, "grad_norm": 19.339231491088867, "learning_rate": 9.047619047619049e-06, "loss": 38.4488, "step": 6987 }, { "epoch": 166.3820895522388, "grad_norm": 18.258453369140625, "learning_rate": 9.04621848739496e-06, "loss": 37.8733, "step": 6988 }, { "epoch": 166.40597014925373, "grad_norm": 15.638240814208984, "learning_rate": 9.04481792717087e-06, "loss": 36.9098, "step": 6989 }, { "epoch": 166.42985074626867, "grad_norm": 19.154794692993164, "learning_rate": 9.043417366946779e-06, "loss": 37.4268, "step": 6990 }, { "epoch": 166.45373134328358, "grad_norm": 14.776188850402832, "learning_rate": 9.04201680672269e-06, "loss": 39.6194, "step": 6991 }, { "epoch": 166.47761194029852, "grad_norm": 18.53327751159668, "learning_rate": 9.0406162464986e-06, "loss": 38.2096, "step": 6992 }, { "epoch": 166.50149253731342, "grad_norm": 16.842300415039062, "learning_rate": 9.03921568627451e-06, "loss": 38.5841, "step": 6993 }, { "epoch": 166.52537313432836, "grad_norm": 15.047019004821777, "learning_rate": 9.037815126050421e-06, "loss": 38.4693, "step": 6994 }, { "epoch": 166.54925373134327, "grad_norm": 24.15064811706543, "learning_rate": 9.036414565826332e-06, "loss": 39.4956, "step": 6995 }, { "epoch": 166.5731343283582, "grad_norm": 18.957366943359375, "learning_rate": 9.035014005602241e-06, "loss": 38.1947, "step": 6996 }, { "epoch": 166.59701492537314, "grad_norm": 14.240507125854492, "learning_rate": 9.033613445378152e-06, "loss": 38.1626, "step": 6997 }, { "epoch": 166.62089552238805, "grad_norm": 23.42329978942871, "learning_rate": 9.032212885154062e-06, "loss": 39.1337, "step": 6998 }, { "epoch": 166.644776119403, "grad_norm": 20.22484016418457, "learning_rate": 9.030812324929973e-06, "loss": 38.7054, "step": 6999 }, { "epoch": 166.6686567164179, "grad_norm": 16.616657257080078, "learning_rate": 9.029411764705884e-06, "loss": 39.5361, "step": 7000 }, { "epoch": 166.69253731343284, "grad_norm": 18.058732986450195, "learning_rate": 9.028011204481794e-06, "loss": 39.2417, "step": 7001 }, { "epoch": 166.71641791044777, "grad_norm": 18.702417373657227, "learning_rate": 9.026610644257703e-06, "loss": 39.4343, "step": 7002 }, { "epoch": 166.74029850746268, "grad_norm": NaN, "learning_rate": 9.025210084033614e-06, "loss": 34.1441, "step": 7003 }, { "epoch": 166.76417910447762, "grad_norm": 15.148275375366211, "learning_rate": 9.025210084033614e-06, "loss": 38.7294, "step": 7004 }, { "epoch": 166.78805970149253, "grad_norm": 17.811681747436523, "learning_rate": 9.023809523809524e-06, "loss": 40.2746, "step": 7005 }, { "epoch": 166.81194029850747, "grad_norm": 15.742315292358398, "learning_rate": 9.022408963585435e-06, "loss": 39.8818, "step": 7006 }, { "epoch": 166.83582089552237, "grad_norm": 16.79622459411621, "learning_rate": 9.021008403361346e-06, "loss": 38.1164, "step": 7007 }, { "epoch": 166.8597014925373, "grad_norm": 17.417369842529297, "learning_rate": 9.019607843137256e-06, "loss": 39.9128, "step": 7008 }, { "epoch": 166.88358208955225, "grad_norm": 18.435260772705078, "learning_rate": 9.018207282913165e-06, "loss": 39.2292, "step": 7009 }, { "epoch": 166.90746268656716, "grad_norm": 14.42545223236084, "learning_rate": 9.016806722689076e-06, "loss": 37.3983, "step": 7010 }, { "epoch": 166.9313432835821, "grad_norm": 19.327251434326172, "learning_rate": 9.015406162464987e-06, "loss": 38.8978, "step": 7011 }, { "epoch": 166.955223880597, "grad_norm": 23.709386825561523, "learning_rate": 9.014005602240897e-06, "loss": 37.6597, "step": 7012 }, { "epoch": 166.97910447761194, "grad_norm": 13.446073532104492, "learning_rate": 9.012605042016808e-06, "loss": 38.0333, "step": 7013 }, { "epoch": 167.0, "grad_norm": 17.742656707763672, "learning_rate": 9.011204481792719e-06, "loss": 33.7607, "step": 7014 }, { "epoch": 167.02388059701494, "grad_norm": 24.789691925048828, "learning_rate": 9.009803921568629e-06, "loss": 37.8773, "step": 7015 }, { "epoch": 167.04776119402985, "grad_norm": 15.1881685256958, "learning_rate": 9.008403361344538e-06, "loss": 38.3267, "step": 7016 }, { "epoch": 167.07164179104478, "grad_norm": 26.128843307495117, "learning_rate": 9.007002801120449e-06, "loss": 38.5114, "step": 7017 }, { "epoch": 167.0955223880597, "grad_norm": 18.408493041992188, "learning_rate": 9.00560224089636e-06, "loss": 39.4748, "step": 7018 }, { "epoch": 167.11940298507463, "grad_norm": 30.897546768188477, "learning_rate": 9.00420168067227e-06, "loss": 38.585, "step": 7019 }, { "epoch": 167.14328358208957, "grad_norm": 21.582815170288086, "learning_rate": 9.00280112044818e-06, "loss": 38.478, "step": 7020 }, { "epoch": 167.16716417910447, "grad_norm": 22.242801666259766, "learning_rate": 9.001400560224091e-06, "loss": 38.6737, "step": 7021 }, { "epoch": 167.1910447761194, "grad_norm": 21.05809211730957, "learning_rate": 9e-06, "loss": 37.014, "step": 7022 }, { "epoch": 167.21492537313432, "grad_norm": 17.701534271240234, "learning_rate": 8.998599439775911e-06, "loss": 37.6012, "step": 7023 }, { "epoch": 167.23880597014926, "grad_norm": 14.203514099121094, "learning_rate": 8.997198879551822e-06, "loss": 38.7506, "step": 7024 }, { "epoch": 167.26268656716417, "grad_norm": 21.035812377929688, "learning_rate": 8.995798319327732e-06, "loss": 39.2168, "step": 7025 }, { "epoch": 167.2865671641791, "grad_norm": 16.741291046142578, "learning_rate": 8.994397759103643e-06, "loss": 37.6171, "step": 7026 }, { "epoch": 167.31044776119404, "grad_norm": 20.702959060668945, "learning_rate": 8.992997198879553e-06, "loss": 39.8419, "step": 7027 }, { "epoch": 167.33432835820895, "grad_norm": 15.385114669799805, "learning_rate": 8.991596638655462e-06, "loss": 40.0453, "step": 7028 }, { "epoch": 167.3582089552239, "grad_norm": 20.197725296020508, "learning_rate": 8.990196078431373e-06, "loss": 37.4206, "step": 7029 }, { "epoch": 167.3820895522388, "grad_norm": 16.919689178466797, "learning_rate": 8.988795518207284e-06, "loss": 38.1808, "step": 7030 }, { "epoch": 167.40597014925373, "grad_norm": 18.998701095581055, "learning_rate": 8.987394957983194e-06, "loss": 38.792, "step": 7031 }, { "epoch": 167.42985074626867, "grad_norm": 17.395341873168945, "learning_rate": 8.985994397759105e-06, "loss": 39.3755, "step": 7032 }, { "epoch": 167.45373134328358, "grad_norm": 16.7122745513916, "learning_rate": 8.984593837535016e-06, "loss": 38.39, "step": 7033 }, { "epoch": 167.47761194029852, "grad_norm": 16.960344314575195, "learning_rate": 8.983193277310926e-06, "loss": 38.9375, "step": 7034 }, { "epoch": 167.50149253731342, "grad_norm": 18.20403480529785, "learning_rate": 8.981792717086835e-06, "loss": 38.3068, "step": 7035 }, { "epoch": 167.52537313432836, "grad_norm": 15.392725944519043, "learning_rate": 8.980392156862746e-06, "loss": 37.5274, "step": 7036 }, { "epoch": 167.54925373134327, "grad_norm": 17.684289932250977, "learning_rate": 8.978991596638656e-06, "loss": 38.959, "step": 7037 }, { "epoch": 167.5731343283582, "grad_norm": 15.469087600708008, "learning_rate": 8.977591036414567e-06, "loss": 37.1788, "step": 7038 }, { "epoch": 167.59701492537314, "grad_norm": 19.313865661621094, "learning_rate": 8.976190476190478e-06, "loss": 39.7805, "step": 7039 }, { "epoch": 167.62089552238805, "grad_norm": 17.780128479003906, "learning_rate": 8.974789915966388e-06, "loss": 38.685, "step": 7040 }, { "epoch": 167.644776119403, "grad_norm": 15.168746948242188, "learning_rate": 8.973389355742297e-06, "loss": 39.9769, "step": 7041 }, { "epoch": 167.6686567164179, "grad_norm": 19.67713165283203, "learning_rate": 8.971988795518208e-06, "loss": 38.8518, "step": 7042 }, { "epoch": 167.69253731343284, "grad_norm": 13.875410079956055, "learning_rate": 8.970588235294119e-06, "loss": 38.6862, "step": 7043 }, { "epoch": 167.71641791044777, "grad_norm": 17.410036087036133, "learning_rate": 8.96918767507003e-06, "loss": 37.2191, "step": 7044 }, { "epoch": 167.74029850746268, "grad_norm": 12.649752616882324, "learning_rate": 8.96778711484594e-06, "loss": 38.9077, "step": 7045 }, { "epoch": 167.76417910447762, "grad_norm": 18.6796932220459, "learning_rate": 8.96638655462185e-06, "loss": 39.6732, "step": 7046 }, { "epoch": 167.78805970149253, "grad_norm": 20.211078643798828, "learning_rate": 8.96498599439776e-06, "loss": 39.0417, "step": 7047 }, { "epoch": 167.81194029850747, "grad_norm": 16.24715805053711, "learning_rate": 8.96358543417367e-06, "loss": 40.6287, "step": 7048 }, { "epoch": 167.83582089552237, "grad_norm": 14.665667533874512, "learning_rate": 8.96218487394958e-06, "loss": 38.0857, "step": 7049 }, { "epoch": 167.8597014925373, "grad_norm": 16.91412925720215, "learning_rate": 8.960784313725491e-06, "loss": 37.7777, "step": 7050 }, { "epoch": 167.88358208955225, "grad_norm": 16.80457878112793, "learning_rate": 8.959383753501402e-06, "loss": 38.633, "step": 7051 }, { "epoch": 167.90746268656716, "grad_norm": 17.991159439086914, "learning_rate": 8.957983193277313e-06, "loss": 39.2465, "step": 7052 }, { "epoch": 167.9313432835821, "grad_norm": 16.38360595703125, "learning_rate": 8.956582633053222e-06, "loss": 39.0996, "step": 7053 }, { "epoch": 167.955223880597, "grad_norm": 15.639243125915527, "learning_rate": 8.955182072829132e-06, "loss": 37.9887, "step": 7054 }, { "epoch": 167.97910447761194, "grad_norm": 13.691624641418457, "learning_rate": 8.953781512605043e-06, "loss": 39.3299, "step": 7055 }, { "epoch": 168.0, "grad_norm": 13.173548698425293, "learning_rate": 8.952380952380953e-06, "loss": 33.9401, "step": 7056 }, { "epoch": 168.02388059701494, "grad_norm": 15.656696319580078, "learning_rate": 8.950980392156864e-06, "loss": 39.6606, "step": 7057 }, { "epoch": 168.04776119402985, "grad_norm": 17.121431350708008, "learning_rate": 8.949579831932775e-06, "loss": 40.1573, "step": 7058 }, { "epoch": 168.07164179104478, "grad_norm": 15.972476959228516, "learning_rate": 8.948179271708685e-06, "loss": 38.4846, "step": 7059 }, { "epoch": 168.0955223880597, "grad_norm": 13.958266258239746, "learning_rate": 8.946778711484594e-06, "loss": 39.4852, "step": 7060 }, { "epoch": 168.11940298507463, "grad_norm": 18.454063415527344, "learning_rate": 8.945378151260505e-06, "loss": 38.1036, "step": 7061 }, { "epoch": 168.14328358208957, "grad_norm": 17.495267868041992, "learning_rate": 8.943977591036416e-06, "loss": 37.9424, "step": 7062 }, { "epoch": 168.16716417910447, "grad_norm": 15.519514083862305, "learning_rate": 8.942577030812326e-06, "loss": 38.6599, "step": 7063 }, { "epoch": 168.1910447761194, "grad_norm": 20.250783920288086, "learning_rate": 8.941176470588237e-06, "loss": 37.5316, "step": 7064 }, { "epoch": 168.21492537313432, "grad_norm": 16.71542739868164, "learning_rate": 8.939775910364148e-06, "loss": 39.2917, "step": 7065 }, { "epoch": 168.23880597014926, "grad_norm": 15.245902061462402, "learning_rate": 8.938375350140056e-06, "loss": 39.1659, "step": 7066 }, { "epoch": 168.26268656716417, "grad_norm": 17.60688018798828, "learning_rate": 8.936974789915967e-06, "loss": 39.6018, "step": 7067 }, { "epoch": 168.2865671641791, "grad_norm": 23.204566955566406, "learning_rate": 8.935574229691878e-06, "loss": 39.2939, "step": 7068 }, { "epoch": 168.31044776119404, "grad_norm": 16.920000076293945, "learning_rate": 8.934173669467788e-06, "loss": 37.6469, "step": 7069 }, { "epoch": 168.33432835820895, "grad_norm": 17.549373626708984, "learning_rate": 8.932773109243699e-06, "loss": 38.3607, "step": 7070 }, { "epoch": 168.3582089552239, "grad_norm": 28.0925350189209, "learning_rate": 8.93137254901961e-06, "loss": 38.8907, "step": 7071 }, { "epoch": 168.3820895522388, "grad_norm": 21.194316864013672, "learning_rate": 8.929971988795519e-06, "loss": 38.6106, "step": 7072 }, { "epoch": 168.40597014925373, "grad_norm": 29.696517944335938, "learning_rate": 8.92857142857143e-06, "loss": 38.6381, "step": 7073 }, { "epoch": 168.42985074626867, "grad_norm": 21.88129997253418, "learning_rate": 8.92717086834734e-06, "loss": 37.0661, "step": 7074 }, { "epoch": 168.45373134328358, "grad_norm": 22.167688369750977, "learning_rate": 8.92577030812325e-06, "loss": 38.1369, "step": 7075 }, { "epoch": 168.47761194029852, "grad_norm": 25.323083877563477, "learning_rate": 8.924369747899161e-06, "loss": 37.809, "step": 7076 }, { "epoch": 168.50149253731342, "grad_norm": 15.450112342834473, "learning_rate": 8.922969187675072e-06, "loss": 38.1747, "step": 7077 }, { "epoch": 168.52537313432836, "grad_norm": 22.90835189819336, "learning_rate": 8.921568627450982e-06, "loss": 37.6116, "step": 7078 }, { "epoch": 168.54925373134327, "grad_norm": 17.67413330078125, "learning_rate": 8.920168067226891e-06, "loss": 38.3698, "step": 7079 }, { "epoch": 168.5731343283582, "grad_norm": 20.223190307617188, "learning_rate": 8.918767507002802e-06, "loss": 39.1152, "step": 7080 }, { "epoch": 168.59701492537314, "grad_norm": 23.67002296447754, "learning_rate": 8.917366946778713e-06, "loss": 38.104, "step": 7081 }, { "epoch": 168.62089552238805, "grad_norm": 17.217201232910156, "learning_rate": 8.915966386554623e-06, "loss": 39.2392, "step": 7082 }, { "epoch": 168.644776119403, "grad_norm": 22.82044792175293, "learning_rate": 8.914565826330534e-06, "loss": 38.5469, "step": 7083 }, { "epoch": 168.6686567164179, "grad_norm": 22.710506439208984, "learning_rate": 8.913165266106445e-06, "loss": 38.6132, "step": 7084 }, { "epoch": 168.69253731343284, "grad_norm": 15.312932014465332, "learning_rate": 8.911764705882354e-06, "loss": 39.6502, "step": 7085 }, { "epoch": 168.71641791044777, "grad_norm": 22.852859497070312, "learning_rate": 8.910364145658264e-06, "loss": 38.7171, "step": 7086 }, { "epoch": 168.74029850746268, "grad_norm": 18.29657554626465, "learning_rate": 8.908963585434175e-06, "loss": 38.8535, "step": 7087 }, { "epoch": 168.76417910447762, "grad_norm": 15.349685668945312, "learning_rate": 8.907563025210085e-06, "loss": 37.8596, "step": 7088 }, { "epoch": 168.78805970149253, "grad_norm": 19.419158935546875, "learning_rate": 8.906162464985994e-06, "loss": 38.5777, "step": 7089 }, { "epoch": 168.81194029850747, "grad_norm": 17.963842391967773, "learning_rate": 8.904761904761905e-06, "loss": 38.8288, "step": 7090 }, { "epoch": 168.83582089552237, "grad_norm": 19.185089111328125, "learning_rate": 8.903361344537816e-06, "loss": 38.0491, "step": 7091 }, { "epoch": 168.8597014925373, "grad_norm": 25.85097312927246, "learning_rate": 8.901960784313726e-06, "loss": 37.4236, "step": 7092 }, { "epoch": 168.88358208955225, "grad_norm": 16.209335327148438, "learning_rate": 8.900560224089635e-06, "loss": 39.0322, "step": 7093 }, { "epoch": 168.90746268656716, "grad_norm": 27.616640090942383, "learning_rate": 8.899159663865546e-06, "loss": 38.5832, "step": 7094 }, { "epoch": 168.9313432835821, "grad_norm": 19.664894104003906, "learning_rate": 8.897759103641457e-06, "loss": 37.6454, "step": 7095 }, { "epoch": 168.955223880597, "grad_norm": 20.211137771606445, "learning_rate": 8.896358543417367e-06, "loss": 39.4748, "step": 7096 }, { "epoch": 168.97910447761194, "grad_norm": 23.72620391845703, "learning_rate": 8.894957983193278e-06, "loss": 39.8556, "step": 7097 }, { "epoch": 169.0, "grad_norm": 11.977401733398438, "learning_rate": 8.893557422969188e-06, "loss": 33.6459, "step": 7098 }, { "epoch": 169.02388059701494, "grad_norm": 27.021682739257812, "learning_rate": 8.892156862745099e-06, "loss": 40.1441, "step": 7099 }, { "epoch": 169.04776119402985, "grad_norm": 16.188669204711914, "learning_rate": 8.890756302521008e-06, "loss": 38.0459, "step": 7100 }, { "epoch": 169.07164179104478, "grad_norm": 24.323711395263672, "learning_rate": 8.889355742296919e-06, "loss": 38.386, "step": 7101 }, { "epoch": 169.0955223880597, "grad_norm": 22.40289878845215, "learning_rate": 8.88795518207283e-06, "loss": 37.6409, "step": 7102 }, { "epoch": 169.11940298507463, "grad_norm": 17.63547706604004, "learning_rate": 8.88655462184874e-06, "loss": 37.1451, "step": 7103 }, { "epoch": 169.14328358208957, "grad_norm": 35.601951599121094, "learning_rate": 8.88515406162465e-06, "loss": 39.6787, "step": 7104 }, { "epoch": 169.16716417910447, "grad_norm": 28.64064598083496, "learning_rate": 8.883753501400561e-06, "loss": 39.7639, "step": 7105 }, { "epoch": 169.1910447761194, "grad_norm": 37.155372619628906, "learning_rate": 8.88235294117647e-06, "loss": 37.6808, "step": 7106 }, { "epoch": 169.21492537313432, "grad_norm": 29.988176345825195, "learning_rate": 8.88095238095238e-06, "loss": 38.2382, "step": 7107 }, { "epoch": 169.23880597014926, "grad_norm": 32.40060806274414, "learning_rate": 8.879551820728291e-06, "loss": 38.6155, "step": 7108 }, { "epoch": 169.26268656716417, "grad_norm": 29.709169387817383, "learning_rate": 8.878151260504202e-06, "loss": 38.6187, "step": 7109 }, { "epoch": 169.2865671641791, "grad_norm": 28.023569107055664, "learning_rate": 8.876750700280113e-06, "loss": 39.2048, "step": 7110 }, { "epoch": 169.31044776119404, "grad_norm": 24.473493576049805, "learning_rate": 8.875350140056023e-06, "loss": 38.3527, "step": 7111 }, { "epoch": 169.33432835820895, "grad_norm": 33.315338134765625, "learning_rate": 8.873949579831932e-06, "loss": 37.1304, "step": 7112 }, { "epoch": 169.3582089552239, "grad_norm": 28.781728744506836, "learning_rate": 8.872549019607843e-06, "loss": 38.6562, "step": 7113 }, { "epoch": 169.3820895522388, "grad_norm": 33.044647216796875, "learning_rate": 8.871148459383754e-06, "loss": 38.6907, "step": 7114 }, { "epoch": 169.40597014925373, "grad_norm": 28.969144821166992, "learning_rate": 8.869747899159664e-06, "loss": 39.0103, "step": 7115 }, { "epoch": 169.42985074626867, "grad_norm": 29.890914916992188, "learning_rate": 8.868347338935575e-06, "loss": 36.4496, "step": 7116 }, { "epoch": 169.45373134328358, "grad_norm": 29.558334350585938, "learning_rate": 8.866946778711485e-06, "loss": 38.781, "step": 7117 }, { "epoch": 169.47761194029852, "grad_norm": 28.465272903442383, "learning_rate": 8.865546218487396e-06, "loss": 37.38, "step": 7118 }, { "epoch": 169.50149253731342, "grad_norm": 26.63448143005371, "learning_rate": 8.864145658263305e-06, "loss": 38.3743, "step": 7119 }, { "epoch": 169.52537313432836, "grad_norm": 33.672149658203125, "learning_rate": 8.862745098039216e-06, "loss": 37.1088, "step": 7120 }, { "epoch": 169.54925373134327, "grad_norm": 27.566909790039062, "learning_rate": 8.861344537815126e-06, "loss": 39.3635, "step": 7121 }, { "epoch": 169.5731343283582, "grad_norm": 30.72598648071289, "learning_rate": 8.859943977591037e-06, "loss": 37.5061, "step": 7122 }, { "epoch": 169.59701492537314, "grad_norm": 21.491497039794922, "learning_rate": 8.858543417366948e-06, "loss": 37.4045, "step": 7123 }, { "epoch": 169.62089552238805, "grad_norm": 31.785451889038086, "learning_rate": 8.857142857142858e-06, "loss": 37.2397, "step": 7124 }, { "epoch": 169.644776119403, "grad_norm": 28.889570236206055, "learning_rate": 8.855742296918767e-06, "loss": 38.2749, "step": 7125 }, { "epoch": 169.6686567164179, "grad_norm": 29.61405372619629, "learning_rate": 8.854341736694678e-06, "loss": 39.3158, "step": 7126 }, { "epoch": 169.69253731343284, "grad_norm": 27.74846076965332, "learning_rate": 8.852941176470588e-06, "loss": 39.5199, "step": 7127 }, { "epoch": 169.71641791044777, "grad_norm": 31.886384963989258, "learning_rate": 8.851540616246499e-06, "loss": 37.9009, "step": 7128 }, { "epoch": 169.74029850746268, "grad_norm": 28.313329696655273, "learning_rate": 8.85014005602241e-06, "loss": 39.9385, "step": 7129 }, { "epoch": 169.76417910447762, "grad_norm": 30.50246810913086, "learning_rate": 8.84873949579832e-06, "loss": 38.9029, "step": 7130 }, { "epoch": 169.78805970149253, "grad_norm": 28.30780792236328, "learning_rate": 8.84733893557423e-06, "loss": 39.1536, "step": 7131 }, { "epoch": 169.81194029850747, "grad_norm": 29.161802291870117, "learning_rate": 8.84593837535014e-06, "loss": 39.0592, "step": 7132 }, { "epoch": 169.83582089552237, "grad_norm": 24.86357307434082, "learning_rate": 8.84453781512605e-06, "loss": 39.0002, "step": 7133 }, { "epoch": 169.8597014925373, "grad_norm": 36.86708068847656, "learning_rate": 8.843137254901961e-06, "loss": 38.9471, "step": 7134 }, { "epoch": 169.88358208955225, "grad_norm": 32.14461898803711, "learning_rate": 8.841736694677872e-06, "loss": 38.8807, "step": 7135 }, { "epoch": 169.90746268656716, "grad_norm": 30.886720657348633, "learning_rate": 8.840336134453783e-06, "loss": 39.0135, "step": 7136 }, { "epoch": 169.9313432835821, "grad_norm": 26.017770767211914, "learning_rate": 8.838935574229691e-06, "loss": 39.394, "step": 7137 }, { "epoch": 169.955223880597, "grad_norm": 27.808815002441406, "learning_rate": 8.837535014005602e-06, "loss": 38.4624, "step": 7138 }, { "epoch": 169.97910447761194, "grad_norm": 24.808860778808594, "learning_rate": 8.836134453781513e-06, "loss": 39.5112, "step": 7139 }, { "epoch": 170.0, "grad_norm": 26.195302963256836, "learning_rate": 8.834733893557423e-06, "loss": 34.1301, "step": 7140 }, { "epoch": 170.0, "step": 7140, "total_flos": 3.510199823180317e+17, "train_loss": 4.586799935733571, "train_runtime": 25702.2082, "train_samples_per_second": 35.399, "train_steps_per_second": 0.278 }, { "epoch": 170.02388059701494, "grad_norm": 27.166627883911133, "learning_rate": 1e-05, "loss": 39.5213, "step": 7141 }, { "epoch": 170.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99874686716792e-06, "loss": 44.4595, "step": 7142 }, { "epoch": 170.07164179104478, "grad_norm": Infinity, "learning_rate": 9.99874686716792e-06, "loss": 47.7558, "step": 7143 }, { "epoch": 170.0955223880597, "grad_norm": 454.9642639160156, "learning_rate": 9.99874686716792e-06, "loss": 47.5788, "step": 7144 }, { "epoch": 170.11940298507463, "grad_norm": 259.2172546386719, "learning_rate": 9.99749373433584e-06, "loss": 44.2854, "step": 7145 }, { "epoch": 170.14328358208957, "grad_norm": 93.37568664550781, "learning_rate": 9.996240601503761e-06, "loss": 41.6306, "step": 7146 }, { "epoch": 170.16716417910447, "grad_norm": 83.61749267578125, "learning_rate": 9.99498746867168e-06, "loss": 40.4442, "step": 7147 }, { "epoch": 170.1910447761194, "grad_norm": 66.74041748046875, "learning_rate": 9.9937343358396e-06, "loss": 39.8658, "step": 7148 }, { "epoch": 170.21492537313432, "grad_norm": 60.166690826416016, "learning_rate": 9.992481203007518e-06, "loss": 40.5291, "step": 7149 }, { "epoch": 170.23880597014926, "grad_norm": 49.17573165893555, "learning_rate": 9.99122807017544e-06, "loss": 38.687, "step": 7150 }, { "epoch": 170.26268656716417, "grad_norm": 53.34871292114258, "learning_rate": 9.98997493734336e-06, "loss": 38.0343, "step": 7151 }, { "epoch": 170.2865671641791, "grad_norm": 36.63299560546875, "learning_rate": 9.988721804511279e-06, "loss": 38.4072, "step": 7152 }, { "epoch": 170.31044776119404, "grad_norm": 51.223777770996094, "learning_rate": 9.987468671679199e-06, "loss": 38.6381, "step": 7153 }, { "epoch": 170.33432835820895, "grad_norm": 29.117027282714844, "learning_rate": 9.986215538847118e-06, "loss": 39.8293, "step": 7154 }, { "epoch": 170.3582089552239, "grad_norm": 42.85747146606445, "learning_rate": 9.984962406015038e-06, "loss": 38.4093, "step": 7155 }, { "epoch": 170.3820895522388, "grad_norm": 24.090818405151367, "learning_rate": 9.983709273182957e-06, "loss": 38.7232, "step": 7156 }, { "epoch": 170.40597014925373, "grad_norm": 37.851863861083984, "learning_rate": 9.982456140350879e-06, "loss": 39.2879, "step": 7157 }, { "epoch": 170.42985074626867, "grad_norm": 20.849395751953125, "learning_rate": 9.981203007518798e-06, "loss": 38.5625, "step": 7158 }, { "epoch": 170.45373134328358, "grad_norm": 32.551849365234375, "learning_rate": 9.979949874686718e-06, "loss": 37.3203, "step": 7159 }, { "epoch": 170.47761194029852, "grad_norm": 23.6810359954834, "learning_rate": 9.978696741854637e-06, "loss": 37.7778, "step": 7160 }, { "epoch": 170.50149253731342, "grad_norm": 26.827194213867188, "learning_rate": 9.977443609022557e-06, "loss": 38.5357, "step": 7161 }, { "epoch": 170.52537313432836, "grad_norm": 25.67653465270996, "learning_rate": 9.976190476190477e-06, "loss": 38.2528, "step": 7162 }, { "epoch": 170.54925373134327, "grad_norm": 25.528614044189453, "learning_rate": 9.974937343358396e-06, "loss": 38.9571, "step": 7163 }, { "epoch": 170.5731343283582, "grad_norm": 17.796689987182617, "learning_rate": 9.973684210526316e-06, "loss": 39.4661, "step": 7164 }, { "epoch": 170.59701492537314, "grad_norm": 25.79865264892578, "learning_rate": 9.972431077694237e-06, "loss": 38.3182, "step": 7165 }, { "epoch": 170.62089552238805, "grad_norm": 21.690196990966797, "learning_rate": 9.971177944862157e-06, "loss": 38.4445, "step": 7166 }, { "epoch": 170.644776119403, "grad_norm": 18.10161590576172, "learning_rate": 9.969924812030076e-06, "loss": 39.3527, "step": 7167 }, { "epoch": 170.6686567164179, "grad_norm": 19.185293197631836, "learning_rate": 9.968671679197996e-06, "loss": 38.0544, "step": 7168 }, { "epoch": 170.69253731343284, "grad_norm": 19.368209838867188, "learning_rate": 9.967418546365915e-06, "loss": 39.4563, "step": 7169 }, { "epoch": 170.71641791044777, "grad_norm": 15.173422813415527, "learning_rate": 9.966165413533837e-06, "loss": 38.7524, "step": 7170 }, { "epoch": 170.74029850746268, "grad_norm": 19.56130027770996, "learning_rate": 9.964912280701755e-06, "loss": 38.7178, "step": 7171 }, { "epoch": 170.76417910447762, "grad_norm": 18.328960418701172, "learning_rate": 9.963659147869676e-06, "loss": 39.8653, "step": 7172 }, { "epoch": 170.78805970149253, "grad_norm": 17.148019790649414, "learning_rate": 9.962406015037594e-06, "loss": 39.0803, "step": 7173 }, { "epoch": 170.81194029850747, "grad_norm": 19.625036239624023, "learning_rate": 9.961152882205515e-06, "loss": 39.3432, "step": 7174 }, { "epoch": 170.83582089552237, "grad_norm": 24.38473129272461, "learning_rate": 9.959899749373435e-06, "loss": 38.8941, "step": 7175 }, { "epoch": 170.8597014925373, "grad_norm": 13.859121322631836, "learning_rate": 9.958646616541354e-06, "loss": 37.8674, "step": 7176 }, { "epoch": 170.88358208955225, "grad_norm": 28.161521911621094, "learning_rate": 9.957393483709274e-06, "loss": 39.448, "step": 7177 }, { "epoch": 170.90746268656716, "grad_norm": 20.711326599121094, "learning_rate": 9.956140350877194e-06, "loss": 39.11, "step": 7178 }, { "epoch": 170.9313432835821, "grad_norm": 19.910417556762695, "learning_rate": 9.954887218045113e-06, "loss": 39.1011, "step": 7179 }, { "epoch": 170.955223880597, "grad_norm": 26.991012573242188, "learning_rate": 9.953634085213033e-06, "loss": 37.7999, "step": 7180 }, { "epoch": 170.97910447761194, "grad_norm": 17.512699127197266, "learning_rate": 9.952380952380954e-06, "loss": 37.333, "step": 7181 }, { "epoch": 171.0, "grad_norm": 31.227685928344727, "learning_rate": 9.951127819548872e-06, "loss": 33.6615, "step": 7182 }, { "epoch": 171.02388059701494, "grad_norm": 22.83246421813965, "learning_rate": 9.949874686716793e-06, "loss": 38.4419, "step": 7183 }, { "epoch": 171.04776119402985, "grad_norm": 40.81578063964844, "learning_rate": 9.948621553884713e-06, "loss": 38.3418, "step": 7184 }, { "epoch": 171.07164179104478, "grad_norm": 34.16019821166992, "learning_rate": 9.947368421052632e-06, "loss": 37.5867, "step": 7185 }, { "epoch": 171.0955223880597, "grad_norm": 35.443870544433594, "learning_rate": 9.946115288220552e-06, "loss": 39.525, "step": 7186 }, { "epoch": 171.11940298507463, "grad_norm": 32.02059555053711, "learning_rate": 9.944862155388472e-06, "loss": 38.8342, "step": 7187 }, { "epoch": 171.14328358208957, "grad_norm": 30.18817710876465, "learning_rate": 9.943609022556391e-06, "loss": 37.9161, "step": 7188 }, { "epoch": 171.16716417910447, "grad_norm": 24.910490036010742, "learning_rate": 9.942355889724311e-06, "loss": 39.0131, "step": 7189 }, { "epoch": 171.1910447761194, "grad_norm": 33.26876449584961, "learning_rate": 9.941102756892232e-06, "loss": 38.0236, "step": 7190 }, { "epoch": 171.21492537313432, "grad_norm": 28.529455184936523, "learning_rate": 9.939849624060152e-06, "loss": 39.2241, "step": 7191 }, { "epoch": 171.23880597014926, "grad_norm": 34.611534118652344, "learning_rate": 9.938596491228071e-06, "loss": 38.5791, "step": 7192 }, { "epoch": 171.26268656716417, "grad_norm": 30.867097854614258, "learning_rate": 9.937343358395991e-06, "loss": 38.9934, "step": 7193 }, { "epoch": 171.2865671641791, "grad_norm": 33.581302642822266, "learning_rate": 9.93609022556391e-06, "loss": 39.1015, "step": 7194 }, { "epoch": 171.31044776119404, "grad_norm": 28.50710678100586, "learning_rate": 9.93483709273183e-06, "loss": 38.525, "step": 7195 }, { "epoch": 171.33432835820895, "grad_norm": 26.710535049438477, "learning_rate": 9.93358395989975e-06, "loss": 38.985, "step": 7196 }, { "epoch": 171.3582089552239, "grad_norm": 22.844213485717773, "learning_rate": 9.93233082706767e-06, "loss": 37.3385, "step": 7197 }, { "epoch": 171.3820895522388, "grad_norm": 38.604583740234375, "learning_rate": 9.93107769423559e-06, "loss": 38.3507, "step": 7198 }, { "epoch": 171.40597014925373, "grad_norm": NaN, "learning_rate": 9.929824561403509e-06, "loss": 54.4909, "step": 7199 }, { "epoch": 171.42985074626867, "grad_norm": 30.092470169067383, "learning_rate": 9.929824561403509e-06, "loss": 38.6197, "step": 7200 }, { "epoch": 171.45373134328358, "grad_norm": 36.72769546508789, "learning_rate": 9.92857142857143e-06, "loss": 37.5002, "step": 7201 }, { "epoch": 171.47761194029852, "grad_norm": 33.80430603027344, "learning_rate": 9.92731829573935e-06, "loss": 38.5208, "step": 7202 }, { "epoch": 171.50149253731342, "grad_norm": 28.850698471069336, "learning_rate": 9.926065162907269e-06, "loss": 38.629, "step": 7203 }, { "epoch": 171.52537313432836, "grad_norm": 26.544612884521484, "learning_rate": 9.924812030075189e-06, "loss": 38.3152, "step": 7204 }, { "epoch": 171.54925373134327, "grad_norm": 28.672277450561523, "learning_rate": 9.923558897243108e-06, "loss": 39.0015, "step": 7205 }, { "epoch": 171.5731343283582, "grad_norm": 26.214168548583984, "learning_rate": 9.92230576441103e-06, "loss": 38.8247, "step": 7206 }, { "epoch": 171.59701492537314, "grad_norm": 37.60875701904297, "learning_rate": 9.921052631578947e-06, "loss": 39.3055, "step": 7207 }, { "epoch": 171.62089552238805, "grad_norm": 32.91227340698242, "learning_rate": 9.919799498746869e-06, "loss": 39.0874, "step": 7208 }, { "epoch": 171.644776119403, "grad_norm": 27.47034454345703, "learning_rate": 9.918546365914787e-06, "loss": 37.7921, "step": 7209 }, { "epoch": 171.6686567164179, "grad_norm": 24.67852210998535, "learning_rate": 9.917293233082708e-06, "loss": 37.8243, "step": 7210 }, { "epoch": 171.69253731343284, "grad_norm": 30.638681411743164, "learning_rate": 9.916040100250628e-06, "loss": 38.2425, "step": 7211 }, { "epoch": 171.71641791044777, "grad_norm": 21.866235733032227, "learning_rate": 9.914786967418547e-06, "loss": 39.4408, "step": 7212 }, { "epoch": 171.74029850746268, "grad_norm": 37.21723175048828, "learning_rate": 9.913533834586467e-06, "loss": 39.3199, "step": 7213 }, { "epoch": 171.76417910447762, "grad_norm": 33.222869873046875, "learning_rate": 9.912280701754386e-06, "loss": 39.3108, "step": 7214 }, { "epoch": 171.78805970149253, "grad_norm": 31.65533447265625, "learning_rate": 9.911027568922308e-06, "loss": 38.7455, "step": 7215 }, { "epoch": 171.81194029850747, "grad_norm": 30.97352409362793, "learning_rate": 9.909774436090226e-06, "loss": 37.6917, "step": 7216 }, { "epoch": 171.83582089552237, "grad_norm": 24.84351921081543, "learning_rate": 9.908521303258147e-06, "loss": 38.2327, "step": 7217 }, { "epoch": 171.8597014925373, "grad_norm": 25.861270904541016, "learning_rate": 9.907268170426066e-06, "loss": 37.2067, "step": 7218 }, { "epoch": 171.88358208955225, "grad_norm": 31.17856216430664, "learning_rate": 9.906015037593986e-06, "loss": 38.2829, "step": 7219 }, { "epoch": 171.90746268656716, "grad_norm": 26.58542823791504, "learning_rate": 9.904761904761906e-06, "loss": 39.0652, "step": 7220 }, { "epoch": 171.9313432835821, "grad_norm": 33.54816436767578, "learning_rate": 9.903508771929825e-06, "loss": 38.1871, "step": 7221 }, { "epoch": 171.955223880597, "grad_norm": 30.45197105407715, "learning_rate": 9.902255639097745e-06, "loss": 38.0343, "step": 7222 }, { "epoch": 171.97910447761194, "grad_norm": 28.675378799438477, "learning_rate": 9.901002506265664e-06, "loss": 38.0105, "step": 7223 }, { "epoch": 172.0, "grad_norm": 22.688058853149414, "learning_rate": 9.899749373433584e-06, "loss": 34.1479, "step": 7224 }, { "epoch": 172.02388059701494, "grad_norm": 29.49295425415039, "learning_rate": 9.898496240601505e-06, "loss": 39.1435, "step": 7225 }, { "epoch": 172.04776119402985, "grad_norm": 24.734025955200195, "learning_rate": 9.897243107769425e-06, "loss": 38.322, "step": 7226 }, { "epoch": 172.07164179104478, "grad_norm": 34.65670394897461, "learning_rate": 9.895989974937344e-06, "loss": 37.4019, "step": 7227 }, { "epoch": 172.0955223880597, "grad_norm": 30.98259925842285, "learning_rate": 9.894736842105264e-06, "loss": 38.6403, "step": 7228 }, { "epoch": 172.11940298507463, "grad_norm": 29.755584716796875, "learning_rate": 9.893483709273184e-06, "loss": 39.1218, "step": 7229 }, { "epoch": 172.14328358208957, "grad_norm": 26.55215835571289, "learning_rate": 9.892230576441103e-06, "loss": 38.5852, "step": 7230 }, { "epoch": 172.16716417910447, "grad_norm": 28.36668586730957, "learning_rate": 9.890977443609023e-06, "loss": 38.6966, "step": 7231 }, { "epoch": 172.1910447761194, "grad_norm": 24.79121971130371, "learning_rate": 9.889724310776944e-06, "loss": 38.3481, "step": 7232 }, { "epoch": 172.21492537313432, "grad_norm": 29.209148406982422, "learning_rate": 9.888471177944862e-06, "loss": 37.8566, "step": 7233 }, { "epoch": 172.23880597014926, "grad_norm": 26.701807022094727, "learning_rate": 9.887218045112783e-06, "loss": 39.5109, "step": 7234 }, { "epoch": 172.26268656716417, "grad_norm": 31.177106857299805, "learning_rate": 9.885964912280703e-06, "loss": 37.8092, "step": 7235 }, { "epoch": 172.2865671641791, "grad_norm": 26.01350212097168, "learning_rate": 9.884711779448623e-06, "loss": 38.2528, "step": 7236 }, { "epoch": 172.31044776119404, "grad_norm": 29.5618896484375, "learning_rate": 9.883458646616542e-06, "loss": 37.5082, "step": 7237 }, { "epoch": 172.33432835820895, "grad_norm": 30.10390281677246, "learning_rate": 9.882205513784462e-06, "loss": 37.9635, "step": 7238 }, { "epoch": 172.3582089552239, "grad_norm": 25.675289154052734, "learning_rate": 9.880952380952381e-06, "loss": 38.3451, "step": 7239 }, { "epoch": 172.3820895522388, "grad_norm": 24.46607780456543, "learning_rate": 9.879699248120301e-06, "loss": 38.9061, "step": 7240 }, { "epoch": 172.40597014925373, "grad_norm": 28.36737060546875, "learning_rate": 9.878446115288222e-06, "loss": 37.5352, "step": 7241 }, { "epoch": 172.42985074626867, "grad_norm": 24.840145111083984, "learning_rate": 9.87719298245614e-06, "loss": 38.7872, "step": 7242 }, { "epoch": 172.45373134328358, "grad_norm": 26.529098510742188, "learning_rate": 9.875939849624061e-06, "loss": 38.0806, "step": 7243 }, { "epoch": 172.47761194029852, "grad_norm": 21.085857391357422, "learning_rate": 9.87468671679198e-06, "loss": 38.301, "step": 7244 }, { "epoch": 172.50149253731342, "grad_norm": 26.893800735473633, "learning_rate": 9.8734335839599e-06, "loss": 37.4983, "step": 7245 }, { "epoch": 172.52537313432836, "grad_norm": 20.84930992126465, "learning_rate": 9.87218045112782e-06, "loss": 37.5785, "step": 7246 }, { "epoch": 172.54925373134327, "grad_norm": 23.662948608398438, "learning_rate": 9.87092731829574e-06, "loss": 37.9118, "step": 7247 }, { "epoch": 172.5731343283582, "grad_norm": 20.506759643554688, "learning_rate": 9.86967418546366e-06, "loss": 38.0004, "step": 7248 }, { "epoch": 172.59701492537314, "grad_norm": 20.3808650970459, "learning_rate": 9.868421052631579e-06, "loss": 39.4201, "step": 7249 }, { "epoch": 172.62089552238805, "grad_norm": 19.416587829589844, "learning_rate": 9.8671679197995e-06, "loss": 38.8031, "step": 7250 }, { "epoch": 172.644776119403, "grad_norm": 20.337444305419922, "learning_rate": 9.86591478696742e-06, "loss": 39.5332, "step": 7251 }, { "epoch": 172.6686567164179, "grad_norm": 19.94097328186035, "learning_rate": 9.86466165413534e-06, "loss": 38.9583, "step": 7252 }, { "epoch": 172.69253731343284, "grad_norm": 18.987834930419922, "learning_rate": 9.86340852130326e-06, "loss": 39.4555, "step": 7253 }, { "epoch": 172.71641791044777, "grad_norm": 17.905500411987305, "learning_rate": 9.862155388471179e-06, "loss": 37.0105, "step": 7254 }, { "epoch": 172.74029850746268, "grad_norm": 16.578981399536133, "learning_rate": 9.860902255639098e-06, "loss": 38.764, "step": 7255 }, { "epoch": 172.76417910447762, "grad_norm": 17.216270446777344, "learning_rate": 9.859649122807018e-06, "loss": 37.615, "step": 7256 }, { "epoch": 172.78805970149253, "grad_norm": 19.038070678710938, "learning_rate": 9.858395989974938e-06, "loss": 38.6936, "step": 7257 }, { "epoch": 172.81194029850747, "grad_norm": NaN, "learning_rate": 9.857142857142859e-06, "loss": 33.444, "step": 7258 }, { "epoch": 172.83582089552237, "grad_norm": 17.612396240234375, "learning_rate": 9.857142857142859e-06, "loss": 38.8854, "step": 7259 }, { "epoch": 172.8597014925373, "grad_norm": 25.871450424194336, "learning_rate": 9.855889724310778e-06, "loss": 39.7183, "step": 7260 }, { "epoch": 172.88358208955225, "grad_norm": 20.912675857543945, "learning_rate": 9.854636591478698e-06, "loss": 38.5365, "step": 7261 }, { "epoch": 172.90746268656716, "grad_norm": 19.9591064453125, "learning_rate": 9.853383458646618e-06, "loss": 38.7913, "step": 7262 }, { "epoch": 172.9313432835821, "grad_norm": 20.852313995361328, "learning_rate": 9.852130325814537e-06, "loss": 38.5725, "step": 7263 }, { "epoch": 172.955223880597, "grad_norm": 16.304344177246094, "learning_rate": 9.850877192982457e-06, "loss": 38.8952, "step": 7264 }, { "epoch": 172.97910447761194, "grad_norm": 21.012598037719727, "learning_rate": 9.849624060150376e-06, "loss": 38.2271, "step": 7265 }, { "epoch": 173.0, "grad_norm": 16.632991790771484, "learning_rate": 9.848370927318298e-06, "loss": 34.2444, "step": 7266 }, { "epoch": 173.02388059701494, "grad_norm": 17.801403045654297, "learning_rate": 9.847117794486216e-06, "loss": 38.2125, "step": 7267 }, { "epoch": 173.04776119402985, "grad_norm": 16.416452407836914, "learning_rate": 9.845864661654137e-06, "loss": 38.6538, "step": 7268 }, { "epoch": 173.07164179104478, "grad_norm": 16.36857032775879, "learning_rate": 9.844611528822055e-06, "loss": 38.9644, "step": 7269 }, { "epoch": 173.0955223880597, "grad_norm": 16.667531967163086, "learning_rate": 9.843358395989976e-06, "loss": 37.3438, "step": 7270 }, { "epoch": 173.11940298507463, "grad_norm": 20.268720626831055, "learning_rate": 9.842105263157896e-06, "loss": 38.4221, "step": 7271 }, { "epoch": 173.14328358208957, "grad_norm": 15.569287300109863, "learning_rate": 9.840852130325815e-06, "loss": 39.1168, "step": 7272 }, { "epoch": 173.16716417910447, "grad_norm": 21.108577728271484, "learning_rate": 9.839598997493735e-06, "loss": 38.7105, "step": 7273 }, { "epoch": 173.1910447761194, "grad_norm": 17.720117568969727, "learning_rate": 9.838345864661655e-06, "loss": 38.4833, "step": 7274 }, { "epoch": 173.21492537313432, "grad_norm": 23.795623779296875, "learning_rate": 9.837092731829576e-06, "loss": 38.9659, "step": 7275 }, { "epoch": 173.23880597014926, "grad_norm": 22.483427047729492, "learning_rate": 9.835839598997494e-06, "loss": 39.7438, "step": 7276 }, { "epoch": 173.26268656716417, "grad_norm": 19.345884323120117, "learning_rate": 9.834586466165415e-06, "loss": 37.2718, "step": 7277 }, { "epoch": 173.2865671641791, "grad_norm": 16.387704849243164, "learning_rate": 9.833333333333333e-06, "loss": 37.6755, "step": 7278 }, { "epoch": 173.31044776119404, "grad_norm": 20.114343643188477, "learning_rate": 9.832080200501254e-06, "loss": 39.3259, "step": 7279 }, { "epoch": 173.33432835820895, "grad_norm": 17.888080596923828, "learning_rate": 9.830827067669174e-06, "loss": 37.8181, "step": 7280 }, { "epoch": 173.3582089552239, "grad_norm": 14.599053382873535, "learning_rate": 9.829573934837093e-06, "loss": 38.5021, "step": 7281 }, { "epoch": 173.3820895522388, "grad_norm": 23.150272369384766, "learning_rate": 9.828320802005013e-06, "loss": 38.7287, "step": 7282 }, { "epoch": 173.40597014925373, "grad_norm": 16.990703582763672, "learning_rate": 9.827067669172933e-06, "loss": 37.7563, "step": 7283 }, { "epoch": 173.42985074626867, "grad_norm": 21.03927230834961, "learning_rate": 9.825814536340852e-06, "loss": 38.8247, "step": 7284 }, { "epoch": 173.45373134328358, "grad_norm": 18.88947868347168, "learning_rate": 9.824561403508772e-06, "loss": 38.5369, "step": 7285 }, { "epoch": 173.47761194029852, "grad_norm": 19.675981521606445, "learning_rate": 9.823308270676693e-06, "loss": 37.2397, "step": 7286 }, { "epoch": 173.50149253731342, "grad_norm": 15.254344940185547, "learning_rate": 9.822055137844613e-06, "loss": 38.7559, "step": 7287 }, { "epoch": 173.52537313432836, "grad_norm": 17.197786331176758, "learning_rate": 9.820802005012532e-06, "loss": 38.6583, "step": 7288 }, { "epoch": 173.54925373134327, "grad_norm": 13.433090209960938, "learning_rate": 9.819548872180452e-06, "loss": 38.2371, "step": 7289 }, { "epoch": 173.5731343283582, "grad_norm": 16.729307174682617, "learning_rate": 9.818295739348372e-06, "loss": 39.3977, "step": 7290 }, { "epoch": 173.59701492537314, "grad_norm": 15.629776000976562, "learning_rate": 9.817042606516291e-06, "loss": 37.9545, "step": 7291 }, { "epoch": 173.62089552238805, "grad_norm": 20.348583221435547, "learning_rate": 9.815789473684212e-06, "loss": 38.7693, "step": 7292 }, { "epoch": 173.644776119403, "grad_norm": 18.90270233154297, "learning_rate": 9.81453634085213e-06, "loss": 39.0774, "step": 7293 }, { "epoch": 173.6686567164179, "grad_norm": 14.576498031616211, "learning_rate": 9.813283208020052e-06, "loss": 38.4017, "step": 7294 }, { "epoch": 173.69253731343284, "grad_norm": 19.90891456604004, "learning_rate": 9.812030075187971e-06, "loss": 38.2823, "step": 7295 }, { "epoch": 173.71641791044777, "grad_norm": 18.241924285888672, "learning_rate": 9.81077694235589e-06, "loss": 37.5477, "step": 7296 }, { "epoch": 173.74029850746268, "grad_norm": 15.419953346252441, "learning_rate": 9.80952380952381e-06, "loss": 38.0637, "step": 7297 }, { "epoch": 173.76417910447762, "grad_norm": 21.94540023803711, "learning_rate": 9.80827067669173e-06, "loss": 37.1823, "step": 7298 }, { "epoch": 173.78805970149253, "grad_norm": 17.8468017578125, "learning_rate": 9.80701754385965e-06, "loss": 38.3753, "step": 7299 }, { "epoch": 173.81194029850747, "grad_norm": 27.14240074157715, "learning_rate": 9.80576441102757e-06, "loss": 39.1886, "step": 7300 }, { "epoch": 173.83582089552237, "grad_norm": 18.001319885253906, "learning_rate": 9.80451127819549e-06, "loss": 38.0588, "step": 7301 }, { "epoch": 173.8597014925373, "grad_norm": 29.474332809448242, "learning_rate": 9.803258145363408e-06, "loss": 38.8461, "step": 7302 }, { "epoch": 173.88358208955225, "grad_norm": 19.030712127685547, "learning_rate": 9.80200501253133e-06, "loss": 38.0905, "step": 7303 }, { "epoch": 173.90746268656716, "grad_norm": 26.412479400634766, "learning_rate": 9.80075187969925e-06, "loss": 38.2451, "step": 7304 }, { "epoch": 173.9313432835821, "grad_norm": 20.873828887939453, "learning_rate": 9.799498746867169e-06, "loss": 37.2942, "step": 7305 }, { "epoch": 173.955223880597, "grad_norm": 25.46503448486328, "learning_rate": 9.798245614035088e-06, "loss": 39.4271, "step": 7306 }, { "epoch": 173.97910447761194, "grad_norm": 19.72415542602539, "learning_rate": 9.796992481203008e-06, "loss": 39.1495, "step": 7307 }, { "epoch": 174.0, "grad_norm": 22.775169372558594, "learning_rate": 9.795739348370928e-06, "loss": 34.0273, "step": 7308 }, { "epoch": 174.02388059701494, "grad_norm": 19.259878158569336, "learning_rate": 9.794486215538847e-06, "loss": 37.977, "step": 7309 }, { "epoch": 174.04776119402985, "grad_norm": 23.16216468811035, "learning_rate": 9.793233082706769e-06, "loss": 38.2491, "step": 7310 }, { "epoch": 174.07164179104478, "grad_norm": 19.84416389465332, "learning_rate": 9.791979949874686e-06, "loss": 37.6288, "step": 7311 }, { "epoch": 174.0955223880597, "grad_norm": 23.899057388305664, "learning_rate": 9.790726817042608e-06, "loss": 38.2218, "step": 7312 }, { "epoch": 174.11940298507463, "grad_norm": 21.903470993041992, "learning_rate": 9.789473684210527e-06, "loss": 38.4567, "step": 7313 }, { "epoch": 174.14328358208957, "grad_norm": 26.155000686645508, "learning_rate": 9.788220551378447e-06, "loss": 38.7748, "step": 7314 }, { "epoch": 174.16716417910447, "grad_norm": 21.06147575378418, "learning_rate": 9.786967418546367e-06, "loss": 38.2962, "step": 7315 }, { "epoch": 174.1910447761194, "grad_norm": 25.352506637573242, "learning_rate": 9.785714285714286e-06, "loss": 38.2831, "step": 7316 }, { "epoch": 174.21492537313432, "grad_norm": 23.535900115966797, "learning_rate": 9.784461152882206e-06, "loss": 39.9764, "step": 7317 }, { "epoch": 174.23880597014926, "grad_norm": 22.830669403076172, "learning_rate": 9.783208020050125e-06, "loss": 39.1042, "step": 7318 }, { "epoch": 174.26268656716417, "grad_norm": 21.910917282104492, "learning_rate": 9.781954887218047e-06, "loss": 38.5363, "step": 7319 }, { "epoch": 174.2865671641791, "grad_norm": 17.074180603027344, "learning_rate": 9.780701754385966e-06, "loss": 37.051, "step": 7320 }, { "epoch": 174.31044776119404, "grad_norm": 19.93785858154297, "learning_rate": 9.779448621553886e-06, "loss": 38.4798, "step": 7321 }, { "epoch": 174.33432835820895, "grad_norm": 22.12788963317871, "learning_rate": 9.778195488721805e-06, "loss": 38.0934, "step": 7322 }, { "epoch": 174.3582089552239, "grad_norm": 17.97043800354004, "learning_rate": 9.776942355889725e-06, "loss": 38.3819, "step": 7323 }, { "epoch": 174.3820895522388, "grad_norm": 20.136077880859375, "learning_rate": 9.775689223057645e-06, "loss": 38.7023, "step": 7324 }, { "epoch": 174.40597014925373, "grad_norm": 16.51250648498535, "learning_rate": 9.774436090225564e-06, "loss": 37.9475, "step": 7325 }, { "epoch": 174.42985074626867, "grad_norm": 21.541324615478516, "learning_rate": 9.773182957393484e-06, "loss": 39.0212, "step": 7326 }, { "epoch": 174.45373134328358, "grad_norm": 17.423656463623047, "learning_rate": 9.771929824561405e-06, "loss": 37.9269, "step": 7327 }, { "epoch": 174.47761194029852, "grad_norm": 18.572166442871094, "learning_rate": 9.770676691729323e-06, "loss": 38.3929, "step": 7328 }, { "epoch": 174.50149253731342, "grad_norm": 19.709980010986328, "learning_rate": 9.769423558897244e-06, "loss": 37.9021, "step": 7329 }, { "epoch": 174.52537313432836, "grad_norm": 20.803659439086914, "learning_rate": 9.768170426065164e-06, "loss": 39.1917, "step": 7330 }, { "epoch": 174.54925373134327, "grad_norm": 17.603025436401367, "learning_rate": 9.766917293233084e-06, "loss": 38.5951, "step": 7331 }, { "epoch": 174.5731343283582, "grad_norm": 20.333627700805664, "learning_rate": 9.765664160401003e-06, "loss": 37.4959, "step": 7332 }, { "epoch": 174.59701492537314, "grad_norm": 17.328895568847656, "learning_rate": 9.764411027568923e-06, "loss": 39.1875, "step": 7333 }, { "epoch": 174.62089552238805, "grad_norm": 20.204282760620117, "learning_rate": 9.763157894736844e-06, "loss": 38.0766, "step": 7334 }, { "epoch": 174.644776119403, "grad_norm": 15.856727600097656, "learning_rate": 9.761904761904762e-06, "loss": 38.9756, "step": 7335 }, { "epoch": 174.6686567164179, "grad_norm": 18.967605590820312, "learning_rate": 9.760651629072683e-06, "loss": 38.378, "step": 7336 }, { "epoch": 174.69253731343284, "grad_norm": 22.51470375061035, "learning_rate": 9.759398496240601e-06, "loss": 37.9415, "step": 7337 }, { "epoch": 174.71641791044777, "grad_norm": 20.97652244567871, "learning_rate": 9.758145363408522e-06, "loss": 38.0416, "step": 7338 }, { "epoch": 174.74029850746268, "grad_norm": 19.052473068237305, "learning_rate": 9.756892230576442e-06, "loss": 39.084, "step": 7339 }, { "epoch": 174.76417910447762, "grad_norm": 15.750896453857422, "learning_rate": 9.755639097744362e-06, "loss": 39.6359, "step": 7340 }, { "epoch": 174.78805970149253, "grad_norm": 21.774534225463867, "learning_rate": 9.754385964912281e-06, "loss": 38.4529, "step": 7341 }, { "epoch": 174.81194029850747, "grad_norm": 17.55640411376953, "learning_rate": 9.7531328320802e-06, "loss": 37.3946, "step": 7342 }, { "epoch": 174.83582089552237, "grad_norm": 21.838682174682617, "learning_rate": 9.751879699248122e-06, "loss": 37.731, "step": 7343 }, { "epoch": 174.8597014925373, "grad_norm": 18.15571403503418, "learning_rate": 9.75062656641604e-06, "loss": 37.917, "step": 7344 }, { "epoch": 174.88358208955225, "grad_norm": 20.560977935791016, "learning_rate": 9.749373433583961e-06, "loss": 37.7593, "step": 7345 }, { "epoch": 174.90746268656716, "grad_norm": 21.18572998046875, "learning_rate": 9.748120300751881e-06, "loss": 37.8903, "step": 7346 }, { "epoch": 174.9313432835821, "grad_norm": 18.545352935791016, "learning_rate": 9.7468671679198e-06, "loss": 37.6087, "step": 7347 }, { "epoch": 174.955223880597, "grad_norm": 21.975116729736328, "learning_rate": 9.74561403508772e-06, "loss": 38.9785, "step": 7348 }, { "epoch": 174.97910447761194, "grad_norm": 18.184467315673828, "learning_rate": 9.74436090225564e-06, "loss": 37.7652, "step": 7349 }, { "epoch": 175.0, "grad_norm": 17.978364944458008, "learning_rate": 9.74310776942356e-06, "loss": 35.1204, "step": 7350 }, { "epoch": 175.02388059701494, "grad_norm": 21.585533142089844, "learning_rate": 9.741854636591479e-06, "loss": 37.4162, "step": 7351 }, { "epoch": 175.04776119402985, "grad_norm": 21.419065475463867, "learning_rate": 9.740601503759399e-06, "loss": 38.1579, "step": 7352 }, { "epoch": 175.07164179104478, "grad_norm": 17.175764083862305, "learning_rate": 9.73934837092732e-06, "loss": 37.2658, "step": 7353 }, { "epoch": 175.0955223880597, "grad_norm": 20.27353286743164, "learning_rate": 9.73809523809524e-06, "loss": 38.8461, "step": 7354 }, { "epoch": 175.11940298507463, "grad_norm": 18.640180587768555, "learning_rate": 9.736842105263159e-06, "loss": 37.937, "step": 7355 }, { "epoch": 175.14328358208957, "grad_norm": 17.626445770263672, "learning_rate": 9.735588972431079e-06, "loss": 37.8605, "step": 7356 }, { "epoch": 175.16716417910447, "grad_norm": 21.346338272094727, "learning_rate": 9.734335839598998e-06, "loss": 38.8235, "step": 7357 }, { "epoch": 175.1910447761194, "grad_norm": NaN, "learning_rate": 9.733082706766918e-06, "loss": 34.0548, "step": 7358 }, { "epoch": 175.21492537313432, "grad_norm": 15.183211326599121, "learning_rate": 9.733082706766918e-06, "loss": 38.7547, "step": 7359 }, { "epoch": 175.23880597014926, "grad_norm": 20.86262321472168, "learning_rate": 9.731829573934837e-06, "loss": 37.4753, "step": 7360 }, { "epoch": 175.26268656716417, "grad_norm": 15.405557632446289, "learning_rate": 9.730576441102759e-06, "loss": 37.4482, "step": 7361 }, { "epoch": 175.2865671641791, "grad_norm": 18.813549041748047, "learning_rate": 9.729323308270677e-06, "loss": 38.8293, "step": 7362 }, { "epoch": 175.31044776119404, "grad_norm": 19.621522903442383, "learning_rate": 9.728070175438598e-06, "loss": 39.3186, "step": 7363 }, { "epoch": 175.33432835820895, "grad_norm": 21.80621337890625, "learning_rate": 9.726817042606517e-06, "loss": 38.1934, "step": 7364 }, { "epoch": 175.3582089552239, "grad_norm": 21.302892684936523, "learning_rate": 9.725563909774437e-06, "loss": 37.5196, "step": 7365 }, { "epoch": 175.3820895522388, "grad_norm": 17.530221939086914, "learning_rate": 9.724310776942357e-06, "loss": 38.3165, "step": 7366 }, { "epoch": 175.40597014925373, "grad_norm": 15.90662956237793, "learning_rate": 9.723057644110276e-06, "loss": 38.0947, "step": 7367 }, { "epoch": 175.42985074626867, "grad_norm": NaN, "learning_rate": 9.721804511278196e-06, "loss": 65.2164, "step": 7368 }, { "epoch": 175.45373134328358, "grad_norm": 18.875221252441406, "learning_rate": 9.721804511278196e-06, "loss": 38.7631, "step": 7369 }, { "epoch": 175.47761194029852, "grad_norm": 17.658750534057617, "learning_rate": 9.720551378446115e-06, "loss": 39.7298, "step": 7370 }, { "epoch": 175.50149253731342, "grad_norm": 16.8253173828125, "learning_rate": 9.719298245614037e-06, "loss": 38.7608, "step": 7371 }, { "epoch": 175.52537313432836, "grad_norm": 14.109174728393555, "learning_rate": 9.718045112781955e-06, "loss": 38.0865, "step": 7372 }, { "epoch": 175.54925373134327, "grad_norm": 16.604694366455078, "learning_rate": 9.716791979949876e-06, "loss": 39.2844, "step": 7373 }, { "epoch": 175.5731343283582, "grad_norm": 20.231338500976562, "learning_rate": 9.715538847117796e-06, "loss": 37.8806, "step": 7374 }, { "epoch": 175.59701492537314, "grad_norm": 22.997631072998047, "learning_rate": 9.714285714285715e-06, "loss": 38.8939, "step": 7375 }, { "epoch": 175.62089552238805, "grad_norm": 19.7714900970459, "learning_rate": 9.713032581453635e-06, "loss": 38.0255, "step": 7376 }, { "epoch": 175.644776119403, "grad_norm": 15.104757308959961, "learning_rate": 9.711779448621554e-06, "loss": 37.361, "step": 7377 }, { "epoch": 175.6686567164179, "grad_norm": 16.79823112487793, "learning_rate": 9.710526315789474e-06, "loss": 38.6268, "step": 7378 }, { "epoch": 175.69253731343284, "grad_norm": 15.565764427185059, "learning_rate": 9.709273182957394e-06, "loss": 38.7304, "step": 7379 }, { "epoch": 175.71641791044777, "grad_norm": 14.454784393310547, "learning_rate": 9.708020050125315e-06, "loss": 39.039, "step": 7380 }, { "epoch": 175.74029850746268, "grad_norm": 14.47907543182373, "learning_rate": 9.706766917293234e-06, "loss": 38.231, "step": 7381 }, { "epoch": 175.76417910447762, "grad_norm": 15.941643714904785, "learning_rate": 9.705513784461154e-06, "loss": 38.1181, "step": 7382 }, { "epoch": 175.78805970149253, "grad_norm": 19.6253662109375, "learning_rate": 9.704260651629074e-06, "loss": 38.4808, "step": 7383 }, { "epoch": 175.81194029850747, "grad_norm": 16.49032211303711, "learning_rate": 9.703007518796993e-06, "loss": 38.1848, "step": 7384 }, { "epoch": 175.83582089552237, "grad_norm": 14.712738037109375, "learning_rate": 9.701754385964913e-06, "loss": 37.8768, "step": 7385 }, { "epoch": 175.8597014925373, "grad_norm": 12.555728912353516, "learning_rate": 9.700501253132832e-06, "loss": 38.9321, "step": 7386 }, { "epoch": 175.88358208955225, "grad_norm": 15.138301849365234, "learning_rate": 9.699248120300752e-06, "loss": 39.1631, "step": 7387 }, { "epoch": 175.90746268656716, "grad_norm": 14.10248851776123, "learning_rate": 9.697994987468673e-06, "loss": 36.9886, "step": 7388 }, { "epoch": 175.9313432835821, "grad_norm": 15.674737930297852, "learning_rate": 9.696741854636593e-06, "loss": 38.6095, "step": 7389 }, { "epoch": 175.955223880597, "grad_norm": 17.84684944152832, "learning_rate": 9.695488721804513e-06, "loss": 38.2303, "step": 7390 }, { "epoch": 175.97910447761194, "grad_norm": 20.122066497802734, "learning_rate": 9.694235588972432e-06, "loss": 39.0045, "step": 7391 }, { "epoch": 176.0, "grad_norm": 17.95144271850586, "learning_rate": 9.692982456140352e-06, "loss": 32.8977, "step": 7392 }, { "epoch": 176.02388059701494, "grad_norm": 14.381842613220215, "learning_rate": 9.691729323308271e-06, "loss": 38.9414, "step": 7393 }, { "epoch": 176.04776119402985, "grad_norm": 18.826648712158203, "learning_rate": 9.690476190476191e-06, "loss": 38.05, "step": 7394 }, { "epoch": 176.07164179104478, "grad_norm": 18.625883102416992, "learning_rate": 9.689223057644112e-06, "loss": 39.3167, "step": 7395 }, { "epoch": 176.0955223880597, "grad_norm": 19.133636474609375, "learning_rate": 9.68796992481203e-06, "loss": 38.3765, "step": 7396 }, { "epoch": 176.11940298507463, "grad_norm": 16.876758575439453, "learning_rate": 9.686716791979951e-06, "loss": 38.8189, "step": 7397 }, { "epoch": 176.14328358208957, "grad_norm": 20.237958908081055, "learning_rate": 9.68546365914787e-06, "loss": 37.9682, "step": 7398 }, { "epoch": 176.16716417910447, "grad_norm": 19.831436157226562, "learning_rate": 9.68421052631579e-06, "loss": 38.1386, "step": 7399 }, { "epoch": 176.1910447761194, "grad_norm": 17.909395217895508, "learning_rate": 9.68295739348371e-06, "loss": 37.6457, "step": 7400 }, { "epoch": 176.21492537313432, "grad_norm": 15.805506706237793, "learning_rate": 9.68170426065163e-06, "loss": 37.7815, "step": 7401 }, { "epoch": 176.23880597014926, "grad_norm": 16.30780601501465, "learning_rate": 9.68045112781955e-06, "loss": 38.5857, "step": 7402 }, { "epoch": 176.26268656716417, "grad_norm": 13.730635643005371, "learning_rate": 9.679197994987469e-06, "loss": 38.3596, "step": 7403 }, { "epoch": 176.2865671641791, "grad_norm": 16.07013511657715, "learning_rate": 9.67794486215539e-06, "loss": 37.8236, "step": 7404 }, { "epoch": 176.31044776119404, "grad_norm": 13.732840538024902, "learning_rate": 9.676691729323308e-06, "loss": 38.4655, "step": 7405 }, { "epoch": 176.33432835820895, "grad_norm": 13.604117393493652, "learning_rate": 9.67543859649123e-06, "loss": 39.0092, "step": 7406 }, { "epoch": 176.3582089552239, "grad_norm": 17.90340232849121, "learning_rate": 9.674185463659147e-06, "loss": 38.4551, "step": 7407 }, { "epoch": 176.3820895522388, "grad_norm": 19.416580200195312, "learning_rate": 9.672932330827069e-06, "loss": 37.8589, "step": 7408 }, { "epoch": 176.40597014925373, "grad_norm": 18.84051513671875, "learning_rate": 9.671679197994988e-06, "loss": 37.1731, "step": 7409 }, { "epoch": 176.42985074626867, "grad_norm": 15.683023452758789, "learning_rate": 9.670426065162908e-06, "loss": 38.6623, "step": 7410 }, { "epoch": 176.45373134328358, "grad_norm": 15.627781867980957, "learning_rate": 9.669172932330828e-06, "loss": 39.5563, "step": 7411 }, { "epoch": 176.47761194029852, "grad_norm": 18.245759963989258, "learning_rate": 9.667919799498747e-06, "loss": 38.0547, "step": 7412 }, { "epoch": 176.50149253731342, "grad_norm": 28.857357025146484, "learning_rate": 9.666666666666667e-06, "loss": 38.2534, "step": 7413 }, { "epoch": 176.52537313432836, "grad_norm": 16.545024871826172, "learning_rate": 9.665413533834588e-06, "loss": 37.7573, "step": 7414 }, { "epoch": 176.54925373134327, "grad_norm": 32.44770431518555, "learning_rate": 9.664160401002508e-06, "loss": 39.0261, "step": 7415 }, { "epoch": 176.5731343283582, "grad_norm": 22.43410301208496, "learning_rate": 9.662907268170427e-06, "loss": 38.1903, "step": 7416 }, { "epoch": 176.59701492537314, "grad_norm": 29.712522506713867, "learning_rate": 9.661654135338347e-06, "loss": 38.1701, "step": 7417 }, { "epoch": 176.62089552238805, "grad_norm": 20.179025650024414, "learning_rate": 9.660401002506266e-06, "loss": 38.8206, "step": 7418 }, { "epoch": 176.644776119403, "grad_norm": 23.98577308654785, "learning_rate": 9.659147869674186e-06, "loss": 39.189, "step": 7419 }, { "epoch": 176.6686567164179, "grad_norm": 24.150781631469727, "learning_rate": 9.657894736842106e-06, "loss": 38.2772, "step": 7420 }, { "epoch": 176.69253731343284, "grad_norm": 16.749544143676758, "learning_rate": 9.656641604010027e-06, "loss": 38.9518, "step": 7421 }, { "epoch": 176.71641791044777, "grad_norm": 26.16396141052246, "learning_rate": 9.655388471177945e-06, "loss": 37.7769, "step": 7422 }, { "epoch": 176.74029850746268, "grad_norm": 22.416610717773438, "learning_rate": 9.654135338345866e-06, "loss": 38.0703, "step": 7423 }, { "epoch": 176.76417910447762, "grad_norm": 14.045994758605957, "learning_rate": 9.652882205513786e-06, "loss": 39.5412, "step": 7424 }, { "epoch": 176.78805970149253, "grad_norm": 29.801090240478516, "learning_rate": 9.651629072681705e-06, "loss": 36.6879, "step": 7425 }, { "epoch": 176.81194029850747, "grad_norm": 16.378732681274414, "learning_rate": 9.650375939849625e-06, "loss": 36.7957, "step": 7426 }, { "epoch": 176.83582089552237, "grad_norm": 29.72284507751465, "learning_rate": 9.649122807017545e-06, "loss": 38.8986, "step": 7427 }, { "epoch": 176.8597014925373, "grad_norm": NaN, "learning_rate": 9.647869674185464e-06, "loss": 32.0868, "step": 7428 }, { "epoch": 176.88358208955225, "grad_norm": 22.460494995117188, "learning_rate": 9.647869674185464e-06, "loss": 38.4721, "step": 7429 }, { "epoch": 176.90746268656716, "grad_norm": 22.48520851135254, "learning_rate": 9.646616541353384e-06, "loss": 38.0989, "step": 7430 }, { "epoch": 176.9313432835821, "grad_norm": 24.938936233520508, "learning_rate": 9.645363408521305e-06, "loss": 38.0467, "step": 7431 }, { "epoch": 176.955223880597, "grad_norm": 19.816362380981445, "learning_rate": 9.644110275689223e-06, "loss": 38.3439, "step": 7432 }, { "epoch": 176.97910447761194, "grad_norm": 33.925724029541016, "learning_rate": 9.642857142857144e-06, "loss": 37.6431, "step": 7433 }, { "epoch": 177.0, "grad_norm": 20.18031120300293, "learning_rate": 9.641604010025064e-06, "loss": 33.7928, "step": 7434 }, { "epoch": 177.02388059701494, "grad_norm": 40.42418670654297, "learning_rate": 9.640350877192983e-06, "loss": 38.1969, "step": 7435 }, { "epoch": 177.04776119402985, "grad_norm": 32.65384292602539, "learning_rate": 9.639097744360903e-06, "loss": 38.2825, "step": 7436 }, { "epoch": 177.07164179104478, "grad_norm": 40.55938720703125, "learning_rate": 9.637844611528823e-06, "loss": 38.3454, "step": 7437 }, { "epoch": 177.0955223880597, "grad_norm": 40.250762939453125, "learning_rate": 9.636591478696742e-06, "loss": 36.7613, "step": 7438 }, { "epoch": 177.11940298507463, "grad_norm": 22.756441116333008, "learning_rate": 9.635338345864662e-06, "loss": 38.4782, "step": 7439 }, { "epoch": 177.14328358208957, "grad_norm": 25.255971908569336, "learning_rate": 9.634085213032583e-06, "loss": 38.5564, "step": 7440 }, { "epoch": 177.16716417910447, "grad_norm": 32.509010314941406, "learning_rate": 9.632832080200501e-06, "loss": 37.9028, "step": 7441 }, { "epoch": 177.1910447761194, "grad_norm": 26.76149559020996, "learning_rate": 9.631578947368422e-06, "loss": 39.4661, "step": 7442 }, { "epoch": 177.21492537313432, "grad_norm": 35.867462158203125, "learning_rate": 9.630325814536342e-06, "loss": 38.6241, "step": 7443 }, { "epoch": 177.23880597014926, "grad_norm": 31.468015670776367, "learning_rate": 9.629072681704261e-06, "loss": 37.8266, "step": 7444 }, { "epoch": 177.26268656716417, "grad_norm": 35.157798767089844, "learning_rate": 9.627819548872181e-06, "loss": 38.3938, "step": 7445 }, { "epoch": 177.2865671641791, "grad_norm": 33.04148483276367, "learning_rate": 9.6265664160401e-06, "loss": 37.382, "step": 7446 }, { "epoch": 177.31044776119404, "grad_norm": 29.57913589477539, "learning_rate": 9.62531328320802e-06, "loss": 37.5585, "step": 7447 }, { "epoch": 177.33432835820895, "grad_norm": 27.25524139404297, "learning_rate": 9.62406015037594e-06, "loss": 37.4982, "step": 7448 }, { "epoch": 177.3582089552239, "grad_norm": NaN, "learning_rate": 9.622807017543861e-06, "loss": 34.8127, "step": 7449 }, { "epoch": 177.3820895522388, "grad_norm": 33.32447814941406, "learning_rate": 9.622807017543861e-06, "loss": 37.4434, "step": 7450 }, { "epoch": 177.40597014925373, "grad_norm": 29.68785285949707, "learning_rate": 9.62155388471178e-06, "loss": 37.8705, "step": 7451 }, { "epoch": 177.42985074626867, "grad_norm": NaN, "learning_rate": 9.6203007518797e-06, "loss": 41.4018, "step": 7452 }, { "epoch": 177.45373134328358, "grad_norm": 32.368263244628906, "learning_rate": 9.6203007518797e-06, "loss": 38.2835, "step": 7453 }, { "epoch": 177.47761194029852, "grad_norm": 29.269750595092773, "learning_rate": 9.61904761904762e-06, "loss": 37.714, "step": 7454 }, { "epoch": 177.50149253731342, "grad_norm": 30.023723602294922, "learning_rate": 9.61779448621554e-06, "loss": 37.1855, "step": 7455 }, { "epoch": 177.52537313432836, "grad_norm": 25.97041130065918, "learning_rate": 9.61654135338346e-06, "loss": 38.573, "step": 7456 }, { "epoch": 177.54925373134327, "grad_norm": 32.41938018798828, "learning_rate": 9.61528822055138e-06, "loss": 39.1859, "step": 7457 }, { "epoch": 177.5731343283582, "grad_norm": 30.231359481811523, "learning_rate": 9.614035087719298e-06, "loss": 39.3906, "step": 7458 }, { "epoch": 177.59701492537314, "grad_norm": 34.94846725463867, "learning_rate": 9.61278195488722e-06, "loss": 39.9365, "step": 7459 }, { "epoch": 177.62089552238805, "grad_norm": 34.16421127319336, "learning_rate": 9.611528822055138e-06, "loss": 37.2807, "step": 7460 }, { "epoch": 177.644776119403, "grad_norm": 27.481935501098633, "learning_rate": 9.610275689223059e-06, "loss": 38.0588, "step": 7461 }, { "epoch": 177.6686567164179, "grad_norm": 22.71653938293457, "learning_rate": 9.609022556390978e-06, "loss": 37.773, "step": 7462 }, { "epoch": 177.69253731343284, "grad_norm": 33.87922668457031, "learning_rate": 9.607769423558898e-06, "loss": 37.8048, "step": 7463 }, { "epoch": 177.71641791044777, "grad_norm": 24.821271896362305, "learning_rate": 9.606516290726818e-06, "loss": 37.876, "step": 7464 }, { "epoch": 177.74029850746268, "grad_norm": 37.070491790771484, "learning_rate": 9.605263157894737e-06, "loss": 38.6927, "step": 7465 }, { "epoch": 177.76417910447762, "grad_norm": 31.79026222229004, "learning_rate": 9.604010025062659e-06, "loss": 37.8024, "step": 7466 }, { "epoch": 177.78805970149253, "grad_norm": 29.7656307220459, "learning_rate": 9.602756892230576e-06, "loss": 38.6212, "step": 7467 }, { "epoch": 177.81194029850747, "grad_norm": 26.21623992919922, "learning_rate": 9.601503759398498e-06, "loss": 37.9078, "step": 7468 }, { "epoch": 177.83582089552237, "grad_norm": 34.19346618652344, "learning_rate": 9.600250626566416e-06, "loss": 37.8592, "step": 7469 }, { "epoch": 177.8597014925373, "grad_norm": 31.018447875976562, "learning_rate": 9.598997493734337e-06, "loss": 39.7333, "step": 7470 }, { "epoch": 177.88358208955225, "grad_norm": 33.910614013671875, "learning_rate": 9.597744360902257e-06, "loss": 38.2207, "step": 7471 }, { "epoch": 177.90746268656716, "grad_norm": 29.57449722290039, "learning_rate": 9.596491228070176e-06, "loss": 37.8515, "step": 7472 }, { "epoch": 177.9313432835821, "grad_norm": 29.0955810546875, "learning_rate": 9.595238095238096e-06, "loss": 39.3709, "step": 7473 }, { "epoch": 177.955223880597, "grad_norm": 22.823320388793945, "learning_rate": 9.593984962406015e-06, "loss": 38.6859, "step": 7474 }, { "epoch": 177.97910447761194, "grad_norm": 33.68880844116211, "learning_rate": 9.592731829573937e-06, "loss": 37.424, "step": 7475 }, { "epoch": 178.0, "grad_norm": 22.224315643310547, "learning_rate": 9.591478696741855e-06, "loss": 33.0249, "step": 7476 }, { "epoch": 178.02388059701494, "grad_norm": 34.6712646484375, "learning_rate": 9.590225563909776e-06, "loss": 38.8583, "step": 7477 }, { "epoch": 178.04776119402985, "grad_norm": 32.04248809814453, "learning_rate": 9.588972431077695e-06, "loss": 38.3, "step": 7478 }, { "epoch": 178.07164179104478, "grad_norm": 27.30583381652832, "learning_rate": 9.587719298245615e-06, "loss": 37.78, "step": 7479 }, { "epoch": 178.0955223880597, "grad_norm": 27.105405807495117, "learning_rate": 9.586466165413535e-06, "loss": 38.0345, "step": 7480 }, { "epoch": 178.11940298507463, "grad_norm": 26.92739486694336, "learning_rate": 9.585213032581454e-06, "loss": 37.9945, "step": 7481 }, { "epoch": 178.14328358208957, "grad_norm": 24.58989715576172, "learning_rate": 9.583959899749374e-06, "loss": 37.7868, "step": 7482 }, { "epoch": 178.16716417910447, "grad_norm": 35.88637924194336, "learning_rate": 9.582706766917293e-06, "loss": 37.5245, "step": 7483 }, { "epoch": 178.1910447761194, "grad_norm": 30.281505584716797, "learning_rate": 9.581453634085213e-06, "loss": 37.7936, "step": 7484 }, { "epoch": 178.21492537313432, "grad_norm": 28.63441276550293, "learning_rate": 9.580200501253134e-06, "loss": 39.1822, "step": 7485 }, { "epoch": 178.23880597014926, "grad_norm": 27.02237319946289, "learning_rate": 9.578947368421054e-06, "loss": 38.5349, "step": 7486 }, { "epoch": 178.26268656716417, "grad_norm": 32.959190368652344, "learning_rate": 9.577694235588974e-06, "loss": 37.9876, "step": 7487 }, { "epoch": 178.2865671641791, "grad_norm": 25.708955764770508, "learning_rate": 9.576441102756893e-06, "loss": 36.7052, "step": 7488 }, { "epoch": 178.31044776119404, "grad_norm": 33.02278137207031, "learning_rate": 9.575187969924813e-06, "loss": 37.8834, "step": 7489 }, { "epoch": 178.33432835820895, "grad_norm": 30.28676986694336, "learning_rate": 9.573934837092732e-06, "loss": 38.6412, "step": 7490 }, { "epoch": 178.3582089552239, "grad_norm": 28.039459228515625, "learning_rate": 9.572681704260652e-06, "loss": 37.4207, "step": 7491 }, { "epoch": 178.3820895522388, "grad_norm": 20.65064239501953, "learning_rate": 9.571428571428573e-06, "loss": 37.3706, "step": 7492 }, { "epoch": 178.40597014925373, "grad_norm": 30.815134048461914, "learning_rate": 9.570175438596491e-06, "loss": 38.8579, "step": 7493 }, { "epoch": 178.42985074626867, "grad_norm": 27.219388961791992, "learning_rate": 9.568922305764412e-06, "loss": 38.3453, "step": 7494 }, { "epoch": 178.45373134328358, "grad_norm": 33.38025665283203, "learning_rate": 9.567669172932332e-06, "loss": 36.6813, "step": 7495 }, { "epoch": 178.47761194029852, "grad_norm": 30.232894897460938, "learning_rate": 9.566416040100252e-06, "loss": 37.8682, "step": 7496 }, { "epoch": 178.50149253731342, "grad_norm": 29.54288673400879, "learning_rate": 9.565162907268171e-06, "loss": 39.1899, "step": 7497 }, { "epoch": 178.52537313432836, "grad_norm": 29.446496963500977, "learning_rate": 9.56390977443609e-06, "loss": 38.4056, "step": 7498 }, { "epoch": 178.54925373134327, "grad_norm": 30.845216751098633, "learning_rate": 9.56265664160401e-06, "loss": 38.3574, "step": 7499 }, { "epoch": 178.5731343283582, "grad_norm": 26.717031478881836, "learning_rate": 9.56140350877193e-06, "loss": 37.8946, "step": 7500 }, { "epoch": 178.59701492537314, "grad_norm": 31.20941925048828, "learning_rate": 9.560150375939851e-06, "loss": 38.3069, "step": 7501 }, { "epoch": 178.62089552238805, "grad_norm": 25.0770206451416, "learning_rate": 9.55889724310777e-06, "loss": 38.1639, "step": 7502 }, { "epoch": 178.644776119403, "grad_norm": 30.205888748168945, "learning_rate": 9.55764411027569e-06, "loss": 37.514, "step": 7503 }, { "epoch": 178.6686567164179, "grad_norm": 27.877737045288086, "learning_rate": 9.55639097744361e-06, "loss": 37.3937, "step": 7504 }, { "epoch": 178.69253731343284, "grad_norm": 31.21794319152832, "learning_rate": 9.55513784461153e-06, "loss": 38.6557, "step": 7505 }, { "epoch": 178.71641791044777, "grad_norm": 26.74827766418457, "learning_rate": 9.55388471177945e-06, "loss": 39.0042, "step": 7506 }, { "epoch": 178.74029850746268, "grad_norm": 32.50165939331055, "learning_rate": 9.552631578947369e-06, "loss": 38.5628, "step": 7507 }, { "epoch": 178.76417910447762, "grad_norm": 28.316530227661133, "learning_rate": 9.551378446115288e-06, "loss": 39.1192, "step": 7508 }, { "epoch": 178.78805970149253, "grad_norm": 26.695558547973633, "learning_rate": 9.550125313283208e-06, "loss": 37.2427, "step": 7509 }, { "epoch": 178.81194029850747, "grad_norm": 27.85847282409668, "learning_rate": 9.54887218045113e-06, "loss": 38.7848, "step": 7510 }, { "epoch": 178.83582089552237, "grad_norm": 30.937238693237305, "learning_rate": 9.547619047619049e-06, "loss": 37.194, "step": 7511 }, { "epoch": 178.8597014925373, "grad_norm": 26.466461181640625, "learning_rate": 9.546365914786969e-06, "loss": 38.8251, "step": 7512 }, { "epoch": 178.88358208955225, "grad_norm": 32.745391845703125, "learning_rate": 9.545112781954888e-06, "loss": 38.0742, "step": 7513 }, { "epoch": 178.90746268656716, "grad_norm": 29.391193389892578, "learning_rate": 9.543859649122808e-06, "loss": 38.6374, "step": 7514 }, { "epoch": 178.9313432835821, "grad_norm": 24.619367599487305, "learning_rate": 9.542606516290727e-06, "loss": 38.7896, "step": 7515 }, { "epoch": 178.955223880597, "grad_norm": 23.773025512695312, "learning_rate": 9.541353383458647e-06, "loss": 38.2214, "step": 7516 }, { "epoch": 178.97910447761194, "grad_norm": NaN, "learning_rate": 9.540100250626567e-06, "loss": 66.8795, "step": 7517 }, { "epoch": 179.0, "grad_norm": 26.397310256958008, "learning_rate": 9.540100250626567e-06, "loss": 34.0341, "step": 7518 }, { "epoch": 179.02388059701494, "grad_norm": 23.14042091369629, "learning_rate": 9.538847117794488e-06, "loss": 38.0085, "step": 7519 }, { "epoch": 179.04776119402985, "grad_norm": 34.07661437988281, "learning_rate": 9.537593984962407e-06, "loss": 38.3437, "step": 7520 }, { "epoch": 179.07164179104478, "grad_norm": 31.97378921508789, "learning_rate": 9.536340852130327e-06, "loss": 39.0287, "step": 7521 }, { "epoch": 179.0955223880597, "grad_norm": 26.95208740234375, "learning_rate": 9.535087719298247e-06, "loss": 38.9776, "step": 7522 }, { "epoch": 179.11940298507463, "grad_norm": 25.850631713867188, "learning_rate": 9.533834586466166e-06, "loss": 37.6615, "step": 7523 }, { "epoch": 179.14328358208957, "grad_norm": 29.238176345825195, "learning_rate": 9.532581453634086e-06, "loss": 39.0653, "step": 7524 }, { "epoch": 179.16716417910447, "grad_norm": 25.66439437866211, "learning_rate": 9.531328320802005e-06, "loss": 38.5403, "step": 7525 }, { "epoch": 179.1910447761194, "grad_norm": 27.784435272216797, "learning_rate": 9.530075187969927e-06, "loss": 38.501, "step": 7526 }, { "epoch": 179.21492537313432, "grad_norm": 27.187753677368164, "learning_rate": 9.528822055137845e-06, "loss": 38.6573, "step": 7527 }, { "epoch": 179.23880597014926, "grad_norm": 29.330095291137695, "learning_rate": 9.527568922305766e-06, "loss": 38.9352, "step": 7528 }, { "epoch": 179.26268656716417, "grad_norm": 28.7341365814209, "learning_rate": 9.526315789473684e-06, "loss": 37.9328, "step": 7529 }, { "epoch": 179.2865671641791, "grad_norm": 28.210481643676758, "learning_rate": 9.525062656641605e-06, "loss": 37.826, "step": 7530 }, { "epoch": 179.31044776119404, "grad_norm": 25.721118927001953, "learning_rate": 9.523809523809525e-06, "loss": 36.9992, "step": 7531 }, { "epoch": 179.33432835820895, "grad_norm": 29.90156364440918, "learning_rate": 9.522556390977444e-06, "loss": 38.8308, "step": 7532 }, { "epoch": 179.3582089552239, "grad_norm": 23.75611114501953, "learning_rate": 9.521303258145364e-06, "loss": 37.236, "step": 7533 }, { "epoch": 179.3820895522388, "grad_norm": 28.6463565826416, "learning_rate": 9.520050125313284e-06, "loss": 37.9837, "step": 7534 }, { "epoch": 179.40597014925373, "grad_norm": 22.911027908325195, "learning_rate": 9.518796992481205e-06, "loss": 37.6443, "step": 7535 }, { "epoch": 179.42985074626867, "grad_norm": 32.341163635253906, "learning_rate": 9.517543859649123e-06, "loss": 38.261, "step": 7536 }, { "epoch": 179.45373134328358, "grad_norm": 25.994626998901367, "learning_rate": 9.516290726817044e-06, "loss": 37.7336, "step": 7537 }, { "epoch": 179.47761194029852, "grad_norm": 32.042869567871094, "learning_rate": 9.515037593984964e-06, "loss": 38.5627, "step": 7538 }, { "epoch": 179.50149253731342, "grad_norm": 28.385757446289062, "learning_rate": 9.513784461152883e-06, "loss": 38.0611, "step": 7539 }, { "epoch": 179.52537313432836, "grad_norm": 26.522703170776367, "learning_rate": 9.512531328320803e-06, "loss": 38.3019, "step": 7540 }, { "epoch": 179.54925373134327, "grad_norm": 25.935222625732422, "learning_rate": 9.511278195488722e-06, "loss": 37.8566, "step": 7541 }, { "epoch": 179.5731343283582, "grad_norm": 30.307241439819336, "learning_rate": 9.510025062656642e-06, "loss": 38.7433, "step": 7542 }, { "epoch": 179.59701492537314, "grad_norm": 25.107316970825195, "learning_rate": 9.508771929824562e-06, "loss": 36.395, "step": 7543 }, { "epoch": 179.62089552238805, "grad_norm": 32.13312530517578, "learning_rate": 9.507518796992481e-06, "loss": 38.4197, "step": 7544 }, { "epoch": 179.644776119403, "grad_norm": 28.332002639770508, "learning_rate": 9.506265664160403e-06, "loss": 37.7446, "step": 7545 }, { "epoch": 179.6686567164179, "grad_norm": 28.015735626220703, "learning_rate": 9.505012531328322e-06, "loss": 38.1281, "step": 7546 }, { "epoch": 179.69253731343284, "grad_norm": 26.351720809936523, "learning_rate": 9.503759398496242e-06, "loss": 38.4677, "step": 7547 }, { "epoch": 179.71641791044777, "grad_norm": 28.444782257080078, "learning_rate": 9.502506265664161e-06, "loss": 37.1134, "step": 7548 }, { "epoch": 179.74029850746268, "grad_norm": 23.317214965820312, "learning_rate": 9.501253132832081e-06, "loss": 36.6965, "step": 7549 }, { "epoch": 179.76417910447762, "grad_norm": 35.22730255126953, "learning_rate": 9.5e-06, "loss": 38.5933, "step": 7550 }, { "epoch": 179.78805970149253, "grad_norm": 28.624221801757812, "learning_rate": 9.49874686716792e-06, "loss": 38.145, "step": 7551 }, { "epoch": 179.81194029850747, "grad_norm": 34.93153381347656, "learning_rate": 9.497493734335841e-06, "loss": 38.0996, "step": 7552 }, { "epoch": 179.83582089552237, "grad_norm": 30.9583797454834, "learning_rate": 9.49624060150376e-06, "loss": 38.9154, "step": 7553 }, { "epoch": 179.8597014925373, "grad_norm": NaN, "learning_rate": 9.49498746867168e-06, "loss": 51.9797, "step": 7554 }, { "epoch": 179.88358208955225, "grad_norm": 27.24198341369629, "learning_rate": 9.49498746867168e-06, "loss": 38.1387, "step": 7555 }, { "epoch": 179.90746268656716, "grad_norm": 28.509775161743164, "learning_rate": 9.4937343358396e-06, "loss": 38.1909, "step": 7556 }, { "epoch": 179.9313432835821, "grad_norm": 27.539745330810547, "learning_rate": 9.49248120300752e-06, "loss": 37.5785, "step": 7557 }, { "epoch": 179.955223880597, "grad_norm": 23.719282150268555, "learning_rate": 9.49122807017544e-06, "loss": 38.5756, "step": 7558 }, { "epoch": 179.97910447761194, "grad_norm": 32.51416015625, "learning_rate": 9.489974937343359e-06, "loss": 38.3331, "step": 7559 }, { "epoch": 180.0, "grad_norm": 21.694049835205078, "learning_rate": 9.488721804511279e-06, "loss": 34.0386, "step": 7560 }, { "epoch": 180.02388059701494, "grad_norm": 34.182586669921875, "learning_rate": 9.487468671679198e-06, "loss": 38.373, "step": 7561 }, { "epoch": 180.04776119402985, "grad_norm": 28.496118545532227, "learning_rate": 9.48621553884712e-06, "loss": 37.3118, "step": 7562 }, { "epoch": 180.07164179104478, "grad_norm": 30.934417724609375, "learning_rate": 9.484962406015037e-06, "loss": 37.2624, "step": 7563 }, { "epoch": 180.0955223880597, "grad_norm": 26.485118865966797, "learning_rate": 9.483709273182959e-06, "loss": 37.9195, "step": 7564 }, { "epoch": 180.11940298507463, "grad_norm": 27.35857391357422, "learning_rate": 9.482456140350878e-06, "loss": 39.061, "step": 7565 }, { "epoch": 180.14328358208957, "grad_norm": 24.543331146240234, "learning_rate": 9.481203007518798e-06, "loss": 38.2442, "step": 7566 }, { "epoch": 180.16716417910447, "grad_norm": NaN, "learning_rate": 9.479949874686717e-06, "loss": 62.5496, "step": 7567 }, { "epoch": 180.1910447761194, "grad_norm": 31.08120346069336, "learning_rate": 9.479949874686717e-06, "loss": 38.2847, "step": 7568 }, { "epoch": 180.21492537313432, "grad_norm": 24.63750457763672, "learning_rate": 9.478696741854637e-06, "loss": 37.3386, "step": 7569 }, { "epoch": 180.23880597014926, "grad_norm": 31.72042465209961, "learning_rate": 9.477443609022557e-06, "loss": 38.5061, "step": 7570 }, { "epoch": 180.26268656716417, "grad_norm": 28.289594650268555, "learning_rate": 9.476190476190476e-06, "loss": 39.2517, "step": 7571 }, { "epoch": 180.2865671641791, "grad_norm": 28.50221824645996, "learning_rate": 9.474937343358398e-06, "loss": 36.671, "step": 7572 }, { "epoch": 180.31044776119404, "grad_norm": 25.799354553222656, "learning_rate": 9.473684210526315e-06, "loss": 38.8831, "step": 7573 }, { "epoch": 180.33432835820895, "grad_norm": 32.123512268066406, "learning_rate": 9.472431077694237e-06, "loss": 38.4682, "step": 7574 }, { "epoch": 180.3582089552239, "grad_norm": 27.540674209594727, "learning_rate": 9.471177944862156e-06, "loss": 38.3273, "step": 7575 }, { "epoch": 180.3820895522388, "grad_norm": NaN, "learning_rate": 9.469924812030076e-06, "loss": 42.6944, "step": 7576 }, { "epoch": 180.40597014925373, "grad_norm": 31.53324317932129, "learning_rate": 9.469924812030076e-06, "loss": 37.8429, "step": 7577 }, { "epoch": 180.42985074626867, "grad_norm": 28.76616859436035, "learning_rate": 9.468671679197996e-06, "loss": 38.9031, "step": 7578 }, { "epoch": 180.45373134328358, "grad_norm": 27.742734909057617, "learning_rate": 9.467418546365915e-06, "loss": 38.2788, "step": 7579 }, { "epoch": 180.47761194029852, "grad_norm": 23.84189224243164, "learning_rate": 9.466165413533835e-06, "loss": 37.3147, "step": 7580 }, { "epoch": 180.50149253731342, "grad_norm": 27.617036819458008, "learning_rate": 9.464912280701754e-06, "loss": 38.3207, "step": 7581 }, { "epoch": 180.52537313432836, "grad_norm": 23.339120864868164, "learning_rate": 9.463659147869676e-06, "loss": 37.9027, "step": 7582 }, { "epoch": 180.54925373134327, "grad_norm": 28.931547164916992, "learning_rate": 9.462406015037595e-06, "loss": 37.7769, "step": 7583 }, { "epoch": 180.5731343283582, "grad_norm": 24.628896713256836, "learning_rate": 9.461152882205515e-06, "loss": 38.4154, "step": 7584 }, { "epoch": 180.59701492537314, "grad_norm": 26.340177536010742, "learning_rate": 9.459899749373434e-06, "loss": 36.9641, "step": 7585 }, { "epoch": 180.62089552238805, "grad_norm": 22.31771469116211, "learning_rate": 9.458646616541354e-06, "loss": 37.9069, "step": 7586 }, { "epoch": 180.644776119403, "grad_norm": 24.355815887451172, "learning_rate": 9.457393483709274e-06, "loss": 37.5167, "step": 7587 }, { "epoch": 180.6686567164179, "grad_norm": 22.45478057861328, "learning_rate": 9.456140350877195e-06, "loss": 37.0763, "step": 7588 }, { "epoch": 180.69253731343284, "grad_norm": 21.712766647338867, "learning_rate": 9.454887218045113e-06, "loss": 37.7854, "step": 7589 }, { "epoch": 180.71641791044777, "grad_norm": 20.127975463867188, "learning_rate": 9.453634085213034e-06, "loss": 38.1377, "step": 7590 }, { "epoch": 180.74029850746268, "grad_norm": 19.657285690307617, "learning_rate": 9.452380952380952e-06, "loss": 39.2664, "step": 7591 }, { "epoch": 180.76417910447762, "grad_norm": 18.618865966796875, "learning_rate": 9.451127819548873e-06, "loss": 38.2197, "step": 7592 }, { "epoch": 180.78805970149253, "grad_norm": 14.82322883605957, "learning_rate": 9.449874686716793e-06, "loss": 38.8792, "step": 7593 }, { "epoch": 180.81194029850747, "grad_norm": 23.297121047973633, "learning_rate": 9.448621553884713e-06, "loss": 37.819, "step": 7594 }, { "epoch": 180.83582089552237, "grad_norm": 18.612077713012695, "learning_rate": 9.447368421052632e-06, "loss": 37.756, "step": 7595 }, { "epoch": 180.8597014925373, "grad_norm": 22.1790771484375, "learning_rate": 9.446115288220552e-06, "loss": 38.2304, "step": 7596 }, { "epoch": 180.88358208955225, "grad_norm": 18.968181610107422, "learning_rate": 9.444862155388473e-06, "loss": 38.8481, "step": 7597 }, { "epoch": 180.90746268656716, "grad_norm": 19.16139793395996, "learning_rate": 9.443609022556391e-06, "loss": 37.887, "step": 7598 }, { "epoch": 180.9313432835821, "grad_norm": 24.370647430419922, "learning_rate": 9.442355889724312e-06, "loss": 38.9882, "step": 7599 }, { "epoch": 180.955223880597, "grad_norm": 19.582992553710938, "learning_rate": 9.44110275689223e-06, "loss": 39.416, "step": 7600 }, { "epoch": 180.97910447761194, "grad_norm": 32.500946044921875, "learning_rate": 9.439849624060151e-06, "loss": 37.0235, "step": 7601 }, { "epoch": 181.0, "grad_norm": 22.320682525634766, "learning_rate": 9.438596491228071e-06, "loss": 34.5362, "step": 7602 }, { "epoch": 181.02388059701494, "grad_norm": 34.26927947998047, "learning_rate": 9.43734335839599e-06, "loss": 38.3729, "step": 7603 }, { "epoch": 181.04776119402985, "grad_norm": NaN, "learning_rate": 9.43609022556391e-06, "loss": 41.5496, "step": 7604 }, { "epoch": 181.07164179104478, "grad_norm": 33.34910202026367, "learning_rate": 9.43609022556391e-06, "loss": 37.429, "step": 7605 }, { "epoch": 181.0955223880597, "grad_norm": 31.405717849731445, "learning_rate": 9.43483709273183e-06, "loss": 37.9895, "step": 7606 }, { "epoch": 181.11940298507463, "grad_norm": 29.484378814697266, "learning_rate": 9.43358395989975e-06, "loss": 38.1531, "step": 7607 }, { "epoch": 181.14328358208957, "grad_norm": 27.070419311523438, "learning_rate": 9.432330827067669e-06, "loss": 37.4599, "step": 7608 }, { "epoch": 181.16716417910447, "grad_norm": 29.64767837524414, "learning_rate": 9.43107769423559e-06, "loss": 39.2385, "step": 7609 }, { "epoch": 181.1910447761194, "grad_norm": 30.908058166503906, "learning_rate": 9.42982456140351e-06, "loss": 38.8913, "step": 7610 }, { "epoch": 181.21492537313432, "grad_norm": 25.773351669311523, "learning_rate": 9.42857142857143e-06, "loss": 38.6503, "step": 7611 }, { "epoch": 181.23880597014926, "grad_norm": 30.587566375732422, "learning_rate": 9.427318295739349e-06, "loss": 38.9265, "step": 7612 }, { "epoch": 181.26268656716417, "grad_norm": 29.84368324279785, "learning_rate": 9.426065162907269e-06, "loss": 38.2391, "step": 7613 }, { "epoch": 181.2865671641791, "grad_norm": 31.73255157470703, "learning_rate": 9.424812030075188e-06, "loss": 37.3281, "step": 7614 }, { "epoch": 181.31044776119404, "grad_norm": 24.846481323242188, "learning_rate": 9.423558897243108e-06, "loss": 38.7828, "step": 7615 }, { "epoch": 181.33432835820895, "grad_norm": 29.050628662109375, "learning_rate": 9.422305764411028e-06, "loss": 38.2192, "step": 7616 }, { "epoch": 181.3582089552239, "grad_norm": 22.826169967651367, "learning_rate": 9.421052631578949e-06, "loss": 37.6121, "step": 7617 }, { "epoch": 181.3820895522388, "grad_norm": 30.73110008239746, "learning_rate": 9.419799498746868e-06, "loss": 39.2107, "step": 7618 }, { "epoch": 181.40597014925373, "grad_norm": 25.17683219909668, "learning_rate": 9.418546365914788e-06, "loss": 38.7411, "step": 7619 }, { "epoch": 181.42985074626867, "grad_norm": 30.565523147583008, "learning_rate": 9.417293233082708e-06, "loss": 37.3072, "step": 7620 }, { "epoch": 181.45373134328358, "grad_norm": 23.134422302246094, "learning_rate": 9.416040100250627e-06, "loss": 38.6556, "step": 7621 }, { "epoch": 181.47761194029852, "grad_norm": 29.73345375061035, "learning_rate": 9.414786967418547e-06, "loss": 37.9902, "step": 7622 }, { "epoch": 181.50149253731342, "grad_norm": 27.748497009277344, "learning_rate": 9.413533834586466e-06, "loss": 38.4888, "step": 7623 }, { "epoch": 181.52537313432836, "grad_norm": 29.086557388305664, "learning_rate": 9.412280701754388e-06, "loss": 38.0586, "step": 7624 }, { "epoch": 181.54925373134327, "grad_norm": 24.033424377441406, "learning_rate": 9.411027568922306e-06, "loss": 39.1418, "step": 7625 }, { "epoch": 181.5731343283582, "grad_norm": 31.593238830566406, "learning_rate": 9.409774436090227e-06, "loss": 39.2994, "step": 7626 }, { "epoch": 181.59701492537314, "grad_norm": 24.30849838256836, "learning_rate": 9.408521303258147e-06, "loss": 37.4472, "step": 7627 }, { "epoch": 181.62089552238805, "grad_norm": 33.19766616821289, "learning_rate": 9.407268170426066e-06, "loss": 38.1845, "step": 7628 }, { "epoch": 181.644776119403, "grad_norm": 24.544702529907227, "learning_rate": 9.406015037593986e-06, "loss": 37.094, "step": 7629 }, { "epoch": 181.6686567164179, "grad_norm": 31.14695167541504, "learning_rate": 9.404761904761905e-06, "loss": 38.9074, "step": 7630 }, { "epoch": 181.69253731343284, "grad_norm": 28.016338348388672, "learning_rate": 9.403508771929825e-06, "loss": 38.6535, "step": 7631 }, { "epoch": 181.71641791044777, "grad_norm": 31.826282501220703, "learning_rate": 9.402255639097745e-06, "loss": 38.6819, "step": 7632 }, { "epoch": 181.74029850746268, "grad_norm": 24.161087036132812, "learning_rate": 9.401002506265666e-06, "loss": 37.7841, "step": 7633 }, { "epoch": 181.76417910447762, "grad_norm": 27.22321128845215, "learning_rate": 9.399749373433584e-06, "loss": 37.2927, "step": 7634 }, { "epoch": 181.78805970149253, "grad_norm": 23.210390090942383, "learning_rate": 9.398496240601505e-06, "loss": 37.9316, "step": 7635 }, { "epoch": 181.81194029850747, "grad_norm": 23.418764114379883, "learning_rate": 9.397243107769425e-06, "loss": 37.1737, "step": 7636 }, { "epoch": 181.83582089552237, "grad_norm": 20.593555450439453, "learning_rate": 9.395989974937344e-06, "loss": 38.3443, "step": 7637 }, { "epoch": 181.8597014925373, "grad_norm": 26.723350524902344, "learning_rate": 9.394736842105264e-06, "loss": 38.5668, "step": 7638 }, { "epoch": 181.88358208955225, "grad_norm": 21.229936599731445, "learning_rate": 9.393483709273183e-06, "loss": 38.2915, "step": 7639 }, { "epoch": 181.90746268656716, "grad_norm": 25.924835205078125, "learning_rate": 9.392230576441103e-06, "loss": 38.9211, "step": 7640 }, { "epoch": 181.9313432835821, "grad_norm": 19.696718215942383, "learning_rate": 9.390977443609023e-06, "loss": 38.486, "step": 7641 }, { "epoch": 181.955223880597, "grad_norm": 22.874286651611328, "learning_rate": 9.389724310776944e-06, "loss": 37.2433, "step": 7642 }, { "epoch": 181.97910447761194, "grad_norm": 18.583221435546875, "learning_rate": 9.388471177944863e-06, "loss": 37.7967, "step": 7643 }, { "epoch": 182.0, "grad_norm": 20.269168853759766, "learning_rate": 9.387218045112783e-06, "loss": 31.9012, "step": 7644 }, { "epoch": 182.02388059701494, "grad_norm": 19.068851470947266, "learning_rate": 9.385964912280703e-06, "loss": 36.8886, "step": 7645 }, { "epoch": 182.04776119402985, "grad_norm": 19.43038558959961, "learning_rate": 9.384711779448622e-06, "loss": 38.2625, "step": 7646 }, { "epoch": 182.07164179104478, "grad_norm": 18.73162269592285, "learning_rate": 9.383458646616542e-06, "loss": 38.9788, "step": 7647 }, { "epoch": 182.0955223880597, "grad_norm": 21.09528923034668, "learning_rate": 9.382205513784461e-06, "loss": 37.1233, "step": 7648 }, { "epoch": 182.11940298507463, "grad_norm": 20.089250564575195, "learning_rate": 9.380952380952381e-06, "loss": 38.2381, "step": 7649 }, { "epoch": 182.14328358208957, "grad_norm": 17.421754837036133, "learning_rate": 9.379699248120302e-06, "loss": 37.9993, "step": 7650 }, { "epoch": 182.16716417910447, "grad_norm": 18.992149353027344, "learning_rate": 9.378446115288222e-06, "loss": 37.3638, "step": 7651 }, { "epoch": 182.1910447761194, "grad_norm": 15.953680038452148, "learning_rate": 9.377192982456142e-06, "loss": 38.0116, "step": 7652 }, { "epoch": 182.21492537313432, "grad_norm": 20.523019790649414, "learning_rate": 9.375939849624061e-06, "loss": 38.9804, "step": 7653 }, { "epoch": 182.23880597014926, "grad_norm": 15.280717849731445, "learning_rate": 9.37468671679198e-06, "loss": 37.5246, "step": 7654 }, { "epoch": 182.26268656716417, "grad_norm": 20.681921005249023, "learning_rate": 9.3734335839599e-06, "loss": 36.9325, "step": 7655 }, { "epoch": 182.2865671641791, "grad_norm": 17.027320861816406, "learning_rate": 9.37218045112782e-06, "loss": 38.0196, "step": 7656 }, { "epoch": 182.31044776119404, "grad_norm": 20.468412399291992, "learning_rate": 9.370927318295741e-06, "loss": 39.457, "step": 7657 }, { "epoch": 182.33432835820895, "grad_norm": 18.735979080200195, "learning_rate": 9.36967418546366e-06, "loss": 38.0952, "step": 7658 }, { "epoch": 182.3582089552239, "grad_norm": NaN, "learning_rate": 9.36842105263158e-06, "loss": 43.9235, "step": 7659 }, { "epoch": 182.3820895522388, "grad_norm": 26.147253036499023, "learning_rate": 9.36842105263158e-06, "loss": 38.8342, "step": 7660 }, { "epoch": 182.40597014925373, "grad_norm": 30.72784423828125, "learning_rate": 9.367167919799498e-06, "loss": 38.1475, "step": 7661 }, { "epoch": 182.42985074626867, "grad_norm": 23.31533432006836, "learning_rate": 9.36591478696742e-06, "loss": 37.7839, "step": 7662 }, { "epoch": 182.45373134328358, "grad_norm": 28.856985092163086, "learning_rate": 9.36466165413534e-06, "loss": 38.5783, "step": 7663 }, { "epoch": 182.47761194029852, "grad_norm": 24.6004581451416, "learning_rate": 9.363408521303259e-06, "loss": 39.4207, "step": 7664 }, { "epoch": 182.50149253731342, "grad_norm": 24.046371459960938, "learning_rate": 9.362155388471178e-06, "loss": 38.2667, "step": 7665 }, { "epoch": 182.52537313432836, "grad_norm": 26.086563110351562, "learning_rate": 9.360902255639098e-06, "loss": 37.57, "step": 7666 }, { "epoch": 182.54925373134327, "grad_norm": 19.401077270507812, "learning_rate": 9.35964912280702e-06, "loss": 37.6523, "step": 7667 }, { "epoch": 182.5731343283582, "grad_norm": 26.627574920654297, "learning_rate": 9.358395989974937e-06, "loss": 37.9341, "step": 7668 }, { "epoch": 182.59701492537314, "grad_norm": 21.351564407348633, "learning_rate": 9.357142857142859e-06, "loss": 37.7288, "step": 7669 }, { "epoch": 182.62089552238805, "grad_norm": 15.088356018066406, "learning_rate": 9.355889724310778e-06, "loss": 37.764, "step": 7670 }, { "epoch": 182.644776119403, "grad_norm": 19.552310943603516, "learning_rate": 9.354636591478698e-06, "loss": 37.6055, "step": 7671 }, { "epoch": 182.6686567164179, "grad_norm": 17.138029098510742, "learning_rate": 9.353383458646617e-06, "loss": 38.8954, "step": 7672 }, { "epoch": 182.69253731343284, "grad_norm": 20.236984252929688, "learning_rate": 9.352130325814537e-06, "loss": 38.7094, "step": 7673 }, { "epoch": 182.71641791044777, "grad_norm": 17.15636444091797, "learning_rate": 9.350877192982457e-06, "loss": 38.7631, "step": 7674 }, { "epoch": 182.74029850746268, "grad_norm": 14.976155281066895, "learning_rate": 9.349624060150376e-06, "loss": 37.9882, "step": 7675 }, { "epoch": 182.76417910447762, "grad_norm": 17.57347297668457, "learning_rate": 9.348370927318296e-06, "loss": 38.4724, "step": 7676 }, { "epoch": 182.78805970149253, "grad_norm": 19.05532455444336, "learning_rate": 9.347117794486217e-06, "loss": 37.4343, "step": 7677 }, { "epoch": 182.81194029850747, "grad_norm": 20.01117515563965, "learning_rate": 9.345864661654137e-06, "loss": 39.2464, "step": 7678 }, { "epoch": 182.83582089552237, "grad_norm": 15.85883903503418, "learning_rate": 9.344611528822056e-06, "loss": 37.8886, "step": 7679 }, { "epoch": 182.8597014925373, "grad_norm": 16.31858253479004, "learning_rate": 9.343358395989976e-06, "loss": 38.2813, "step": 7680 }, { "epoch": 182.88358208955225, "grad_norm": 21.91824722290039, "learning_rate": 9.342105263157895e-06, "loss": 38.7016, "step": 7681 }, { "epoch": 182.90746268656716, "grad_norm": 13.628788948059082, "learning_rate": 9.340852130325815e-06, "loss": 38.281, "step": 7682 }, { "epoch": 182.9313432835821, "grad_norm": 18.080459594726562, "learning_rate": 9.339598997493735e-06, "loss": 39.22, "step": 7683 }, { "epoch": 182.955223880597, "grad_norm": 22.61056137084961, "learning_rate": 9.338345864661656e-06, "loss": 39.4347, "step": 7684 }, { "epoch": 182.97910447761194, "grad_norm": 18.271526336669922, "learning_rate": 9.337092731829574e-06, "loss": 39.2384, "step": 7685 }, { "epoch": 183.0, "grad_norm": 15.15306568145752, "learning_rate": 9.335839598997495e-06, "loss": 34.1513, "step": 7686 }, { "epoch": 183.02388059701494, "grad_norm": 28.156301498413086, "learning_rate": 9.334586466165415e-06, "loss": 37.1129, "step": 7687 }, { "epoch": 183.04776119402985, "grad_norm": 17.79732894897461, "learning_rate": 9.333333333333334e-06, "loss": 38.5227, "step": 7688 }, { "epoch": 183.07164179104478, "grad_norm": 18.45402717590332, "learning_rate": 9.332080200501254e-06, "loss": 38.492, "step": 7689 }, { "epoch": 183.0955223880597, "grad_norm": 25.375320434570312, "learning_rate": 9.330827067669174e-06, "loss": 38.8671, "step": 7690 }, { "epoch": 183.11940298507463, "grad_norm": 16.20077896118164, "learning_rate": 9.329573934837093e-06, "loss": 38.1921, "step": 7691 }, { "epoch": 183.14328358208957, "grad_norm": 23.60683822631836, "learning_rate": 9.328320802005013e-06, "loss": 38.3588, "step": 7692 }, { "epoch": 183.16716417910447, "grad_norm": 28.53430938720703, "learning_rate": 9.327067669172934e-06, "loss": 38.4831, "step": 7693 }, { "epoch": 183.1910447761194, "grad_norm": 19.33002281188965, "learning_rate": 9.325814536340852e-06, "loss": 37.6731, "step": 7694 }, { "epoch": 183.21492537313432, "grad_norm": 35.12484359741211, "learning_rate": 9.324561403508773e-06, "loss": 38.6068, "step": 7695 }, { "epoch": 183.23880597014926, "grad_norm": 25.615224838256836, "learning_rate": 9.323308270676693e-06, "loss": 40.1274, "step": 7696 }, { "epoch": 183.26268656716417, "grad_norm": 38.18474197387695, "learning_rate": 9.322055137844612e-06, "loss": 38.6643, "step": 7697 }, { "epoch": 183.2865671641791, "grad_norm": 25.419836044311523, "learning_rate": 9.320802005012532e-06, "loss": 39.6969, "step": 7698 }, { "epoch": 183.31044776119404, "grad_norm": 40.90986251831055, "learning_rate": 9.319548872180452e-06, "loss": 37.8611, "step": 7699 }, { "epoch": 183.33432835820895, "grad_norm": 40.589378356933594, "learning_rate": 9.318295739348371e-06, "loss": 39.3673, "step": 7700 }, { "epoch": 183.3582089552239, "grad_norm": 34.87507629394531, "learning_rate": 9.31704260651629e-06, "loss": 39.072, "step": 7701 }, { "epoch": 183.3820895522388, "grad_norm": 35.49257278442383, "learning_rate": 9.315789473684212e-06, "loss": 38.231, "step": 7702 }, { "epoch": 183.40597014925373, "grad_norm": 31.80084991455078, "learning_rate": 9.31453634085213e-06, "loss": 39.5215, "step": 7703 }, { "epoch": 183.42985074626867, "grad_norm": 32.01988983154297, "learning_rate": 9.313283208020051e-06, "loss": 38.3811, "step": 7704 }, { "epoch": 183.45373134328358, "grad_norm": 32.97187042236328, "learning_rate": 9.312030075187971e-06, "loss": 37.529, "step": 7705 }, { "epoch": 183.47761194029852, "grad_norm": 32.333255767822266, "learning_rate": 9.31077694235589e-06, "loss": 38.386, "step": 7706 }, { "epoch": 183.50149253731342, "grad_norm": 31.905254364013672, "learning_rate": 9.30952380952381e-06, "loss": 39.7536, "step": 7707 }, { "epoch": 183.52537313432836, "grad_norm": 26.036340713500977, "learning_rate": 9.30827067669173e-06, "loss": 39.0496, "step": 7708 }, { "epoch": 183.54925373134327, "grad_norm": 31.00899887084961, "learning_rate": 9.30701754385965e-06, "loss": 37.6582, "step": 7709 }, { "epoch": 183.5731343283582, "grad_norm": 23.661453247070312, "learning_rate": 9.30576441102757e-06, "loss": 38.5868, "step": 7710 }, { "epoch": 183.59701492537314, "grad_norm": 35.26527404785156, "learning_rate": 9.30451127819549e-06, "loss": 36.9418, "step": 7711 }, { "epoch": 183.62089552238805, "grad_norm": 30.152225494384766, "learning_rate": 9.30325814536341e-06, "loss": 37.9041, "step": 7712 }, { "epoch": 183.644776119403, "grad_norm": 36.04405212402344, "learning_rate": 9.30200501253133e-06, "loss": 37.8787, "step": 7713 }, { "epoch": 183.6686567164179, "grad_norm": 32.55191421508789, "learning_rate": 9.300751879699249e-06, "loss": 38.5805, "step": 7714 }, { "epoch": 183.69253731343284, "grad_norm": 35.73372268676758, "learning_rate": 9.299498746867169e-06, "loss": 39.6069, "step": 7715 }, { "epoch": 183.71641791044777, "grad_norm": 30.653011322021484, "learning_rate": 9.298245614035088e-06, "loss": 38.2053, "step": 7716 }, { "epoch": 183.74029850746268, "grad_norm": 34.98927688598633, "learning_rate": 9.29699248120301e-06, "loss": 38.7874, "step": 7717 }, { "epoch": 183.76417910447762, "grad_norm": 27.018739700317383, "learning_rate": 9.295739348370927e-06, "loss": 38.1713, "step": 7718 }, { "epoch": 183.78805970149253, "grad_norm": 37.969173431396484, "learning_rate": 9.294486215538849e-06, "loss": 39.1804, "step": 7719 }, { "epoch": 183.81194029850747, "grad_norm": 35.38280487060547, "learning_rate": 9.293233082706767e-06, "loss": 38.5821, "step": 7720 }, { "epoch": 183.83582089552237, "grad_norm": 31.927392959594727, "learning_rate": 9.291979949874688e-06, "loss": 38.0057, "step": 7721 }, { "epoch": 183.8597014925373, "grad_norm": 32.185203552246094, "learning_rate": 9.290726817042607e-06, "loss": 38.5497, "step": 7722 }, { "epoch": 183.88358208955225, "grad_norm": 32.241790771484375, "learning_rate": 9.289473684210527e-06, "loss": 37.8311, "step": 7723 }, { "epoch": 183.90746268656716, "grad_norm": 30.077545166015625, "learning_rate": 9.288220551378447e-06, "loss": 38.8007, "step": 7724 }, { "epoch": 183.9313432835821, "grad_norm": 35.88338088989258, "learning_rate": 9.286967418546366e-06, "loss": 38.5947, "step": 7725 }, { "epoch": 183.955223880597, "grad_norm": 31.73858642578125, "learning_rate": 9.285714285714288e-06, "loss": 37.8341, "step": 7726 }, { "epoch": 183.97910447761194, "grad_norm": 29.405078887939453, "learning_rate": 9.284461152882205e-06, "loss": 37.7245, "step": 7727 }, { "epoch": 184.0, "grad_norm": NaN, "learning_rate": 9.283208020050127e-06, "loss": 56.4104, "step": 7728 }, { "epoch": 184.02388059701494, "grad_norm": 19.570993423461914, "learning_rate": 9.283208020050127e-06, "loss": 38.1128, "step": 7729 }, { "epoch": 184.04776119402985, "grad_norm": NaN, "learning_rate": 9.281954887218045e-06, "loss": 63.7892, "step": 7730 }, { "epoch": 184.07164179104478, "grad_norm": 48.01729965209961, "learning_rate": 9.281954887218045e-06, "loss": 39.5211, "step": 7731 }, { "epoch": 184.0955223880597, "grad_norm": 33.520503997802734, "learning_rate": 9.280701754385966e-06, "loss": 39.2581, "step": 7732 }, { "epoch": 184.11940298507463, "grad_norm": 40.728187561035156, "learning_rate": 9.279448621553886e-06, "loss": 40.5069, "step": 7733 }, { "epoch": 184.14328358208957, "grad_norm": 34.6091194152832, "learning_rate": 9.278195488721805e-06, "loss": 38.6713, "step": 7734 }, { "epoch": 184.16716417910447, "grad_norm": 27.991249084472656, "learning_rate": 9.276942355889725e-06, "loss": 38.9768, "step": 7735 }, { "epoch": 184.1910447761194, "grad_norm": 32.23847198486328, "learning_rate": 9.275689223057644e-06, "loss": 39.503, "step": 7736 }, { "epoch": 184.21492537313432, "grad_norm": 23.983753204345703, "learning_rate": 9.274436090225564e-06, "loss": 39.7654, "step": 7737 }, { "epoch": 184.23880597014926, "grad_norm": 33.7354736328125, "learning_rate": 9.273182957393484e-06, "loss": 40.2666, "step": 7738 }, { "epoch": 184.26268656716417, "grad_norm": 22.54912567138672, "learning_rate": 9.271929824561405e-06, "loss": 40.3684, "step": 7739 }, { "epoch": 184.2865671641791, "grad_norm": 31.727224349975586, "learning_rate": 9.270676691729324e-06, "loss": 39.9529, "step": 7740 }, { "epoch": 184.31044776119404, "grad_norm": 32.118106842041016, "learning_rate": 9.269423558897244e-06, "loss": 41.3572, "step": 7741 }, { "epoch": 184.33432835820895, "grad_norm": 19.489656448364258, "learning_rate": 9.268170426065164e-06, "loss": 39.6626, "step": 7742 }, { "epoch": 184.3582089552239, "grad_norm": 29.95058822631836, "learning_rate": 9.266917293233083e-06, "loss": 40.7756, "step": 7743 }, { "epoch": 184.3820895522388, "grad_norm": 22.743227005004883, "learning_rate": 9.265664160401003e-06, "loss": 40.0637, "step": 7744 }, { "epoch": 184.40597014925373, "grad_norm": 24.127614974975586, "learning_rate": 9.264411027568922e-06, "loss": 39.5272, "step": 7745 }, { "epoch": 184.42985074626867, "grad_norm": 32.89726257324219, "learning_rate": 9.263157894736842e-06, "loss": 40.244, "step": 7746 }, { "epoch": 184.45373134328358, "grad_norm": 20.5611629486084, "learning_rate": 9.261904761904763e-06, "loss": 39.9755, "step": 7747 }, { "epoch": 184.47761194029852, "grad_norm": 36.67335510253906, "learning_rate": 9.260651629072683e-06, "loss": 40.6738, "step": 7748 }, { "epoch": 184.50149253731342, "grad_norm": 27.706262588500977, "learning_rate": 9.259398496240603e-06, "loss": 40.3502, "step": 7749 }, { "epoch": 184.52537313432836, "grad_norm": 22.725189208984375, "learning_rate": 9.258145363408522e-06, "loss": 37.5332, "step": 7750 }, { "epoch": 184.54925373134327, "grad_norm": 40.575313568115234, "learning_rate": 9.256892230576442e-06, "loss": 40.0921, "step": 7751 }, { "epoch": 184.5731343283582, "grad_norm": 27.19171142578125, "learning_rate": 9.255639097744363e-06, "loss": 38.1854, "step": 7752 }, { "epoch": 184.59701492537314, "grad_norm": 30.067363739013672, "learning_rate": 9.254385964912281e-06, "loss": 40.1989, "step": 7753 }, { "epoch": 184.62089552238805, "grad_norm": 25.565664291381836, "learning_rate": 9.253132832080202e-06, "loss": 40.3723, "step": 7754 }, { "epoch": 184.644776119403, "grad_norm": 28.75983428955078, "learning_rate": 9.25187969924812e-06, "loss": 40.6146, "step": 7755 }, { "epoch": 184.6686567164179, "grad_norm": 20.13669776916504, "learning_rate": 9.250626566416041e-06, "loss": 41.4179, "step": 7756 }, { "epoch": 184.69253731343284, "grad_norm": 34.65123748779297, "learning_rate": 9.249373433583961e-06, "loss": 41.4522, "step": 7757 }, { "epoch": 184.71641791044777, "grad_norm": 29.887758255004883, "learning_rate": 9.24812030075188e-06, "loss": 41.7257, "step": 7758 }, { "epoch": 184.74029850746268, "grad_norm": 22.708446502685547, "learning_rate": 9.2468671679198e-06, "loss": 39.5245, "step": 7759 }, { "epoch": 184.76417910447762, "grad_norm": 27.760478973388672, "learning_rate": 9.24561403508772e-06, "loss": 40.1309, "step": 7760 }, { "epoch": 184.78805970149253, "grad_norm": 35.92546844482422, "learning_rate": 9.24436090225564e-06, "loss": 39.6421, "step": 7761 }, { "epoch": 184.81194029850747, "grad_norm": 18.31737518310547, "learning_rate": 9.243107769423559e-06, "loss": 39.7503, "step": 7762 }, { "epoch": 184.83582089552237, "grad_norm": 26.146255493164062, "learning_rate": 9.24185463659148e-06, "loss": 39.6416, "step": 7763 }, { "epoch": 184.8597014925373, "grad_norm": 21.460485458374023, "learning_rate": 9.240601503759398e-06, "loss": 40.1663, "step": 7764 }, { "epoch": 184.88358208955225, "grad_norm": 22.847776412963867, "learning_rate": 9.23934837092732e-06, "loss": 41.4605, "step": 7765 }, { "epoch": 184.90746268656716, "grad_norm": 21.51983070373535, "learning_rate": 9.238095238095239e-06, "loss": 38.5327, "step": 7766 }, { "epoch": 184.9313432835821, "grad_norm": 23.702680587768555, "learning_rate": 9.236842105263159e-06, "loss": 40.1406, "step": 7767 }, { "epoch": 184.955223880597, "grad_norm": 25.279239654541016, "learning_rate": 9.235588972431078e-06, "loss": 40.8743, "step": 7768 }, { "epoch": 184.97910447761194, "grad_norm": 33.16035842895508, "learning_rate": 9.234335839598998e-06, "loss": 39.785, "step": 7769 }, { "epoch": 185.0, "grad_norm": 22.09147834777832, "learning_rate": 9.233082706766918e-06, "loss": 34.7394, "step": 7770 }, { "epoch": 185.02388059701494, "grad_norm": 30.796735763549805, "learning_rate": 9.231829573934837e-06, "loss": 40.6943, "step": 7771 }, { "epoch": 185.04776119402985, "grad_norm": 32.15016555786133, "learning_rate": 9.230576441102758e-06, "loss": 40.4672, "step": 7772 }, { "epoch": 185.07164179104478, "grad_norm": 22.353782653808594, "learning_rate": 9.229323308270678e-06, "loss": 41.4305, "step": 7773 }, { "epoch": 185.0955223880597, "grad_norm": 53.881473541259766, "learning_rate": 9.228070175438598e-06, "loss": 38.3879, "step": 7774 }, { "epoch": 185.11940298507463, "grad_norm": 39.897361755371094, "learning_rate": 9.226817042606517e-06, "loss": 41.0108, "step": 7775 }, { "epoch": 185.14328358208957, "grad_norm": NaN, "learning_rate": 9.225563909774437e-06, "loss": 51.0282, "step": 7776 }, { "epoch": 185.16716417910447, "grad_norm": 33.82575988769531, "learning_rate": 9.225563909774437e-06, "loss": 40.394, "step": 7777 }, { "epoch": 185.1910447761194, "grad_norm": 82.26239776611328, "learning_rate": 9.224310776942356e-06, "loss": 43.2198, "step": 7778 }, { "epoch": 185.21492537313432, "grad_norm": 73.5898208618164, "learning_rate": 9.223057644110276e-06, "loss": 41.9718, "step": 7779 }, { "epoch": 185.23880597014926, "grad_norm": 54.194740295410156, "learning_rate": 9.221804511278196e-06, "loss": 42.3892, "step": 7780 }, { "epoch": 185.26268656716417, "grad_norm": 64.35474395751953, "learning_rate": 9.220551378446117e-06, "loss": 41.4895, "step": 7781 }, { "epoch": 185.2865671641791, "grad_norm": 52.19340515136719, "learning_rate": 9.219298245614035e-06, "loss": 41.6825, "step": 7782 }, { "epoch": 185.31044776119404, "grad_norm": 49.811134338378906, "learning_rate": 9.218045112781956e-06, "loss": 42.6462, "step": 7783 }, { "epoch": 185.33432835820895, "grad_norm": 40.66097640991211, "learning_rate": 9.216791979949876e-06, "loss": 41.6908, "step": 7784 }, { "epoch": 185.3582089552239, "grad_norm": 37.429107666015625, "learning_rate": 9.215538847117795e-06, "loss": 42.5027, "step": 7785 }, { "epoch": 185.3820895522388, "grad_norm": 42.99324035644531, "learning_rate": 9.214285714285715e-06, "loss": 41.92, "step": 7786 }, { "epoch": 185.40597014925373, "grad_norm": 33.43999099731445, "learning_rate": 9.213032581453634e-06, "loss": 40.9467, "step": 7787 }, { "epoch": 185.42985074626867, "grad_norm": 49.963966369628906, "learning_rate": 9.211779448621556e-06, "loss": 42.4665, "step": 7788 }, { "epoch": 185.45373134328358, "grad_norm": 34.8093147277832, "learning_rate": 9.210526315789474e-06, "loss": 41.5139, "step": 7789 }, { "epoch": 185.47761194029852, "grad_norm": 34.77069854736328, "learning_rate": 9.209273182957395e-06, "loss": 41.5734, "step": 7790 }, { "epoch": 185.50149253731342, "grad_norm": 47.45319747924805, "learning_rate": 9.208020050125313e-06, "loss": 41.3032, "step": 7791 }, { "epoch": 185.52537313432836, "grad_norm": 24.358047485351562, "learning_rate": 9.206766917293234e-06, "loss": 41.1637, "step": 7792 }, { "epoch": 185.54925373134327, "grad_norm": 46.77471923828125, "learning_rate": 9.205513784461154e-06, "loss": 44.595, "step": 7793 }, { "epoch": 185.5731343283582, "grad_norm": 36.80247116088867, "learning_rate": 9.204260651629073e-06, "loss": 42.6577, "step": 7794 }, { "epoch": 185.59701492537314, "grad_norm": 30.75225830078125, "learning_rate": 9.203007518796993e-06, "loss": 41.0479, "step": 7795 }, { "epoch": 185.62089552238805, "grad_norm": 55.35914611816406, "learning_rate": 9.201754385964913e-06, "loss": 43.4212, "step": 7796 }, { "epoch": 185.644776119403, "grad_norm": 34.891109466552734, "learning_rate": 9.200501253132834e-06, "loss": 42.6526, "step": 7797 }, { "epoch": 185.6686567164179, "grad_norm": 60.54024887084961, "learning_rate": 9.199248120300752e-06, "loss": 43.1204, "step": 7798 }, { "epoch": 185.69253731343284, "grad_norm": 48.87995529174805, "learning_rate": 9.197994987468673e-06, "loss": 43.8232, "step": 7799 }, { "epoch": 185.71641791044777, "grad_norm": 43.41633224487305, "learning_rate": 9.196741854636593e-06, "loss": 40.743, "step": 7800 }, { "epoch": 185.74029850746268, "grad_norm": 43.59025955200195, "learning_rate": 9.195488721804512e-06, "loss": 42.746, "step": 7801 }, { "epoch": 185.76417910447762, "grad_norm": 45.43309020996094, "learning_rate": 9.194235588972432e-06, "loss": 42.921, "step": 7802 }, { "epoch": 185.78805970149253, "grad_norm": 28.127649307250977, "learning_rate": 9.192982456140351e-06, "loss": 42.1366, "step": 7803 }, { "epoch": 185.81194029850747, "grad_norm": 61.12681579589844, "learning_rate": 9.191729323308271e-06, "loss": 42.4191, "step": 7804 }, { "epoch": 185.83582089552237, "grad_norm": 48.988365173339844, "learning_rate": 9.19047619047619e-06, "loss": 43.5104, "step": 7805 }, { "epoch": 185.8597014925373, "grad_norm": 56.43540954589844, "learning_rate": 9.18922305764411e-06, "loss": 43.6525, "step": 7806 }, { "epoch": 185.88358208955225, "grad_norm": 46.069435119628906, "learning_rate": 9.187969924812032e-06, "loss": 43.4069, "step": 7807 }, { "epoch": 185.90746268656716, "grad_norm": 39.751705169677734, "learning_rate": 9.186716791979951e-06, "loss": 43.334, "step": 7808 }, { "epoch": 185.9313432835821, "grad_norm": 46.80768966674805, "learning_rate": 9.18546365914787e-06, "loss": 41.914, "step": 7809 }, { "epoch": 185.955223880597, "grad_norm": 38.70549392700195, "learning_rate": 9.18421052631579e-06, "loss": 43.7965, "step": 7810 }, { "epoch": 185.97910447761194, "grad_norm": 33.33662414550781, "learning_rate": 9.18295739348371e-06, "loss": 42.2128, "step": 7811 }, { "epoch": 186.0, "grad_norm": 42.501155853271484, "learning_rate": 9.18170426065163e-06, "loss": 36.0084, "step": 7812 }, { "epoch": 186.02388059701494, "grad_norm": 40.92805862426758, "learning_rate": 9.180451127819549e-06, "loss": 43.0312, "step": 7813 }, { "epoch": 186.04776119402985, "grad_norm": 34.84563446044922, "learning_rate": 9.17919799498747e-06, "loss": 42.8822, "step": 7814 }, { "epoch": 186.07164179104478, "grad_norm": 27.465234756469727, "learning_rate": 9.177944862155388e-06, "loss": 43.2051, "step": 7815 }, { "epoch": 186.0955223880597, "grad_norm": 36.75338363647461, "learning_rate": 9.17669172932331e-06, "loss": 41.1784, "step": 7816 }, { "epoch": 186.11940298507463, "grad_norm": 47.202701568603516, "learning_rate": 9.17543859649123e-06, "loss": 42.8061, "step": 7817 }, { "epoch": 186.14328358208957, "grad_norm": 35.21329116821289, "learning_rate": 9.174185463659149e-06, "loss": 43.1316, "step": 7818 }, { "epoch": 186.16716417910447, "grad_norm": 27.600418090820312, "learning_rate": 9.172932330827068e-06, "loss": 42.7668, "step": 7819 }, { "epoch": 186.1910447761194, "grad_norm": 46.3722038269043, "learning_rate": 9.171679197994988e-06, "loss": 44.4291, "step": 7820 }, { "epoch": 186.21492537313432, "grad_norm": 37.76526641845703, "learning_rate": 9.170426065162908e-06, "loss": 42.964, "step": 7821 }, { "epoch": 186.23880597014926, "grad_norm": 27.865131378173828, "learning_rate": 9.169172932330827e-06, "loss": 42.4656, "step": 7822 }, { "epoch": 186.26268656716417, "grad_norm": 31.589683532714844, "learning_rate": 9.167919799498749e-06, "loss": 42.4538, "step": 7823 }, { "epoch": 186.2865671641791, "grad_norm": 41.379058837890625, "learning_rate": 9.166666666666666e-06, "loss": 43.2337, "step": 7824 }, { "epoch": 186.31044776119404, "grad_norm": 35.93637466430664, "learning_rate": 9.165413533834588e-06, "loss": 41.7727, "step": 7825 }, { "epoch": 186.33432835820895, "grad_norm": 29.648672103881836, "learning_rate": 9.164160401002507e-06, "loss": 43.7275, "step": 7826 }, { "epoch": 186.3582089552239, "grad_norm": 22.539348602294922, "learning_rate": 9.162907268170427e-06, "loss": 43.0448, "step": 7827 }, { "epoch": 186.3820895522388, "grad_norm": 31.496742248535156, "learning_rate": 9.161654135338347e-06, "loss": 43.3235, "step": 7828 }, { "epoch": 186.40597014925373, "grad_norm": 27.494714736938477, "learning_rate": 9.160401002506266e-06, "loss": 42.2111, "step": 7829 }, { "epoch": 186.42985074626867, "grad_norm": 39.6995735168457, "learning_rate": 9.159147869674186e-06, "loss": 43.8583, "step": 7830 }, { "epoch": 186.45373134328358, "grad_norm": 44.616390228271484, "learning_rate": 9.157894736842105e-06, "loss": 43.1443, "step": 7831 }, { "epoch": 186.47761194029852, "grad_norm": 37.15000534057617, "learning_rate": 9.156641604010027e-06, "loss": 43.7389, "step": 7832 }, { "epoch": 186.50149253731342, "grad_norm": 32.24622344970703, "learning_rate": 9.155388471177946e-06, "loss": 42.566, "step": 7833 }, { "epoch": 186.52537313432836, "grad_norm": 28.09488868713379, "learning_rate": 9.154135338345866e-06, "loss": 43.3224, "step": 7834 }, { "epoch": 186.54925373134327, "grad_norm": 43.34132385253906, "learning_rate": 9.152882205513785e-06, "loss": 42.2653, "step": 7835 }, { "epoch": 186.5731343283582, "grad_norm": 37.8883056640625, "learning_rate": 9.151629072681705e-06, "loss": 42.4098, "step": 7836 }, { "epoch": 186.59701492537314, "grad_norm": 26.457744598388672, "learning_rate": 9.150375939849625e-06, "loss": 44.2239, "step": 7837 }, { "epoch": 186.62089552238805, "grad_norm": 23.77232551574707, "learning_rate": 9.149122807017544e-06, "loss": 43.7133, "step": 7838 }, { "epoch": 186.644776119403, "grad_norm": 32.34585189819336, "learning_rate": 9.147869674185464e-06, "loss": 43.1097, "step": 7839 }, { "epoch": 186.6686567164179, "grad_norm": 40.74631881713867, "learning_rate": 9.146616541353385e-06, "loss": 43.3843, "step": 7840 }, { "epoch": 186.69253731343284, "grad_norm": 31.526451110839844, "learning_rate": 9.145363408521305e-06, "loss": 42.1462, "step": 7841 }, { "epoch": 186.71641791044777, "grad_norm": 29.849029541015625, "learning_rate": 9.144110275689224e-06, "loss": 41.4517, "step": 7842 }, { "epoch": 186.74029850746268, "grad_norm": 46.14763641357422, "learning_rate": 9.142857142857144e-06, "loss": 43.6828, "step": 7843 }, { "epoch": 186.76417910447762, "grad_norm": 29.060964584350586, "learning_rate": 9.141604010025063e-06, "loss": 42.8401, "step": 7844 }, { "epoch": 186.78805970149253, "grad_norm": 29.804529190063477, "learning_rate": 9.140350877192983e-06, "loss": 42.2589, "step": 7845 }, { "epoch": 186.81194029850747, "grad_norm": 24.214675903320312, "learning_rate": 9.139097744360903e-06, "loss": 44.5865, "step": 7846 }, { "epoch": 186.83582089552237, "grad_norm": 40.95576858520508, "learning_rate": 9.137844611528824e-06, "loss": 43.1832, "step": 7847 }, { "epoch": 186.8597014925373, "grad_norm": 45.6334228515625, "learning_rate": 9.136591478696742e-06, "loss": 42.2239, "step": 7848 }, { "epoch": 186.88358208955225, "grad_norm": 22.144073486328125, "learning_rate": 9.135338345864663e-06, "loss": 42.834, "step": 7849 }, { "epoch": 186.90746268656716, "grad_norm": 41.78306579589844, "learning_rate": 9.134085213032581e-06, "loss": 41.8102, "step": 7850 }, { "epoch": 186.9313432835821, "grad_norm": 60.63203811645508, "learning_rate": 9.132832080200502e-06, "loss": 41.8373, "step": 7851 }, { "epoch": 186.955223880597, "grad_norm": 32.649009704589844, "learning_rate": 9.131578947368422e-06, "loss": 42.7243, "step": 7852 }, { "epoch": 186.97910447761194, "grad_norm": 60.176368713378906, "learning_rate": 9.130325814536342e-06, "loss": 43.7076, "step": 7853 }, { "epoch": 187.0, "grad_norm": 34.826778411865234, "learning_rate": 9.129072681704261e-06, "loss": 36.2024, "step": 7854 }, { "epoch": 187.02388059701494, "grad_norm": 77.55919647216797, "learning_rate": 9.12781954887218e-06, "loss": 42.4281, "step": 7855 }, { "epoch": 187.04776119402985, "grad_norm": 38.300228118896484, "learning_rate": 9.126566416040102e-06, "loss": 42.0331, "step": 7856 }, { "epoch": 187.07164179104478, "grad_norm": 94.77706146240234, "learning_rate": 9.12531328320802e-06, "loss": 42.6255, "step": 7857 }, { "epoch": 187.0955223880597, "grad_norm": 75.2891616821289, "learning_rate": 9.124060150375941e-06, "loss": 42.5707, "step": 7858 }, { "epoch": 187.11940298507463, "grad_norm": 69.42463684082031, "learning_rate": 9.12280701754386e-06, "loss": 43.4412, "step": 7859 }, { "epoch": 187.14328358208957, "grad_norm": 71.69522857666016, "learning_rate": 9.12155388471178e-06, "loss": 42.0621, "step": 7860 }, { "epoch": 187.16716417910447, "grad_norm": 60.53205490112305, "learning_rate": 9.1203007518797e-06, "loss": 42.2368, "step": 7861 }, { "epoch": 187.1910447761194, "grad_norm": 62.059078216552734, "learning_rate": 9.11904761904762e-06, "loss": 43.9762, "step": 7862 }, { "epoch": 187.21492537313432, "grad_norm": 60.76016616821289, "learning_rate": 9.11779448621554e-06, "loss": 42.2956, "step": 7863 }, { "epoch": 187.23880597014926, "grad_norm": NaN, "learning_rate": 9.116541353383459e-06, "loss": 59.8308, "step": 7864 }, { "epoch": 187.26268656716417, "grad_norm": 117.1172103881836, "learning_rate": 9.116541353383459e-06, "loss": 42.8994, "step": 7865 }, { "epoch": 187.2865671641791, "grad_norm": 36.338539123535156, "learning_rate": 9.115288220551378e-06, "loss": 44.6887, "step": 7866 }, { "epoch": 187.31044776119404, "grad_norm": 117.24983978271484, "learning_rate": 9.114035087719298e-06, "loss": 45.3314, "step": 7867 }, { "epoch": 187.33432835820895, "grad_norm": 105.75101470947266, "learning_rate": 9.11278195488722e-06, "loss": 47.1684, "step": 7868 }, { "epoch": 187.3582089552239, "grad_norm": 94.0297622680664, "learning_rate": 9.111528822055139e-06, "loss": 46.3184, "step": 7869 }, { "epoch": 187.3820895522388, "grad_norm": 128.11460876464844, "learning_rate": 9.110275689223059e-06, "loss": 45.4642, "step": 7870 }, { "epoch": 187.40597014925373, "grad_norm": 68.23126220703125, "learning_rate": 9.109022556390978e-06, "loss": 46.0183, "step": 7871 }, { "epoch": 187.42985074626867, "grad_norm": 127.36836242675781, "learning_rate": 9.107769423558898e-06, "loss": 48.2838, "step": 7872 }, { "epoch": 187.45373134328358, "grad_norm": 94.4326171875, "learning_rate": 9.106516290726817e-06, "loss": 45.5114, "step": 7873 }, { "epoch": 187.47761194029852, "grad_norm": 80.82902526855469, "learning_rate": 9.105263157894739e-06, "loss": 45.2363, "step": 7874 }, { "epoch": 187.50149253731342, "grad_norm": 88.6158447265625, "learning_rate": 9.104010025062657e-06, "loss": 45.7324, "step": 7875 }, { "epoch": 187.52537313432836, "grad_norm": 71.13653564453125, "learning_rate": 9.102756892230578e-06, "loss": 44.9489, "step": 7876 }, { "epoch": 187.54925373134327, "grad_norm": 86.65029907226562, "learning_rate": 9.101503759398497e-06, "loss": 47.3925, "step": 7877 }, { "epoch": 187.5731343283582, "grad_norm": 74.06425476074219, "learning_rate": 9.100250626566417e-06, "loss": 47.0441, "step": 7878 }, { "epoch": 187.59701492537314, "grad_norm": 73.37405395507812, "learning_rate": 9.098997493734337e-06, "loss": 46.7387, "step": 7879 }, { "epoch": 187.62089552238805, "grad_norm": 65.9063491821289, "learning_rate": 9.097744360902256e-06, "loss": 46.0156, "step": 7880 }, { "epoch": 187.644776119403, "grad_norm": 64.36770629882812, "learning_rate": 9.096491228070178e-06, "loss": 47.2289, "step": 7881 }, { "epoch": 187.6686567164179, "grad_norm": 77.95767974853516, "learning_rate": 9.095238095238095e-06, "loss": 46.719, "step": 7882 }, { "epoch": 187.69253731343284, "grad_norm": 63.57596969604492, "learning_rate": 9.093984962406017e-06, "loss": 47.6364, "step": 7883 }, { "epoch": 187.71641791044777, "grad_norm": 70.62786102294922, "learning_rate": 9.092731829573935e-06, "loss": 45.9877, "step": 7884 }, { "epoch": 187.74029850746268, "grad_norm": 66.49168395996094, "learning_rate": 9.091478696741856e-06, "loss": 46.4989, "step": 7885 }, { "epoch": 187.76417910447762, "grad_norm": 67.90526580810547, "learning_rate": 9.090225563909776e-06, "loss": 46.4051, "step": 7886 }, { "epoch": 187.78805970149253, "grad_norm": 66.38755798339844, "learning_rate": 9.088972431077695e-06, "loss": 48.2885, "step": 7887 }, { "epoch": 187.81194029850747, "grad_norm": 45.47574234008789, "learning_rate": 9.087719298245615e-06, "loss": 48.894, "step": 7888 }, { "epoch": 187.83582089552237, "grad_norm": 99.2913589477539, "learning_rate": 9.086466165413534e-06, "loss": 44.985, "step": 7889 }, { "epoch": 187.8597014925373, "grad_norm": 69.80074310302734, "learning_rate": 9.085213032581454e-06, "loss": 46.3278, "step": 7890 }, { "epoch": 187.88358208955225, "grad_norm": 112.97005462646484, "learning_rate": 9.083959899749374e-06, "loss": 47.703, "step": 7891 }, { "epoch": 187.90746268656716, "grad_norm": 107.71286010742188, "learning_rate": 9.082706766917295e-06, "loss": 48.1937, "step": 7892 }, { "epoch": 187.9313432835821, "grad_norm": 89.39302825927734, "learning_rate": 9.081453634085213e-06, "loss": 45.5158, "step": 7893 }, { "epoch": 187.955223880597, "grad_norm": 92.1109848022461, "learning_rate": 9.080200501253134e-06, "loss": 46.6717, "step": 7894 }, { "epoch": 187.97910447761194, "grad_norm": 102.06716918945312, "learning_rate": 9.078947368421054e-06, "loss": 47.8201, "step": 7895 }, { "epoch": 188.0, "grad_norm": 93.22721862792969, "learning_rate": 9.077694235588973e-06, "loss": 42.6335, "step": 7896 }, { "epoch": 188.02388059701494, "grad_norm": 88.64990234375, "learning_rate": 9.076441102756893e-06, "loss": 46.5677, "step": 7897 }, { "epoch": 188.04776119402985, "grad_norm": 75.4064712524414, "learning_rate": 9.075187969924812e-06, "loss": 45.7132, "step": 7898 }, { "epoch": 188.07164179104478, "grad_norm": 90.9974594116211, "learning_rate": 9.073934837092732e-06, "loss": 48.1919, "step": 7899 }, { "epoch": 188.0955223880597, "grad_norm": 73.4759521484375, "learning_rate": 9.072681704260652e-06, "loss": 46.6734, "step": 7900 }, { "epoch": 188.11940298507463, "grad_norm": 84.57060241699219, "learning_rate": 9.071428571428573e-06, "loss": 44.9733, "step": 7901 }, { "epoch": 188.14328358208957, "grad_norm": 83.07115936279297, "learning_rate": 9.070175438596493e-06, "loss": 46.51, "step": 7902 }, { "epoch": 188.16716417910447, "grad_norm": 102.78768920898438, "learning_rate": 9.068922305764412e-06, "loss": 48.7774, "step": 7903 }, { "epoch": 188.1910447761194, "grad_norm": 85.22908020019531, "learning_rate": 9.067669172932332e-06, "loss": 47.7442, "step": 7904 }, { "epoch": 188.21492537313432, "grad_norm": 94.43974304199219, "learning_rate": 9.066416040100251e-06, "loss": 45.0615, "step": 7905 }, { "epoch": 188.23880597014926, "grad_norm": 76.01085662841797, "learning_rate": 9.065162907268171e-06, "loss": 45.716, "step": 7906 }, { "epoch": 188.26268656716417, "grad_norm": 114.36836242675781, "learning_rate": 9.06390977443609e-06, "loss": 50.8167, "step": 7907 }, { "epoch": 188.2865671641791, "grad_norm": 103.42585754394531, "learning_rate": 9.06265664160401e-06, "loss": 47.1027, "step": 7908 }, { "epoch": 188.31044776119404, "grad_norm": NaN, "learning_rate": 9.061403508771931e-06, "loss": 63.108, "step": 7909 }, { "epoch": 188.33432835820895, "grad_norm": 164.3147735595703, "learning_rate": 9.061403508771931e-06, "loss": 45.6579, "step": 7910 }, { "epoch": 188.3582089552239, "grad_norm": 169.74057006835938, "learning_rate": 9.06015037593985e-06, "loss": 47.956, "step": 7911 }, { "epoch": 188.3820895522388, "grad_norm": 40.49880599975586, "learning_rate": 9.05889724310777e-06, "loss": 47.9248, "step": 7912 }, { "epoch": 188.40597014925373, "grad_norm": 126.68148803710938, "learning_rate": 9.05764411027569e-06, "loss": 51.7, "step": 7913 }, { "epoch": 188.42985074626867, "grad_norm": 78.71920013427734, "learning_rate": 9.05639097744361e-06, "loss": 51.6456, "step": 7914 }, { "epoch": 188.45373134328358, "grad_norm": 148.0843963623047, "learning_rate": 9.05513784461153e-06, "loss": 49.7085, "step": 7915 }, { "epoch": 188.47761194029852, "grad_norm": 133.39694213867188, "learning_rate": 9.053884711779449e-06, "loss": 50.2062, "step": 7916 }, { "epoch": 188.50149253731342, "grad_norm": 101.5084457397461, "learning_rate": 9.05263157894737e-06, "loss": 50.2694, "step": 7917 }, { "epoch": 188.52537313432836, "grad_norm": 114.27274322509766, "learning_rate": 9.051378446115288e-06, "loss": 52.0106, "step": 7918 }, { "epoch": 188.54925373134327, "grad_norm": 76.4062728881836, "learning_rate": 9.05012531328321e-06, "loss": 49.0237, "step": 7919 }, { "epoch": 188.5731343283582, "grad_norm": 122.54005432128906, "learning_rate": 9.048872180451127e-06, "loss": 51.1967, "step": 7920 }, { "epoch": 188.59701492537314, "grad_norm": 78.65968322753906, "learning_rate": 9.047619047619049e-06, "loss": 49.6068, "step": 7921 }, { "epoch": 188.62089552238805, "grad_norm": 107.99015045166016, "learning_rate": 9.046365914786968e-06, "loss": 52.1905, "step": 7922 }, { "epoch": 188.644776119403, "grad_norm": 67.97981262207031, "learning_rate": 9.045112781954888e-06, "loss": 52.1235, "step": 7923 }, { "epoch": 188.6686567164179, "grad_norm": 61.18889617919922, "learning_rate": 9.043859649122807e-06, "loss": 52.9808, "step": 7924 }, { "epoch": 188.69253731343284, "grad_norm": 74.68904876708984, "learning_rate": 9.042606516290727e-06, "loss": 50.4739, "step": 7925 }, { "epoch": 188.71641791044777, "grad_norm": 64.90814971923828, "learning_rate": 9.041353383458648e-06, "loss": 52.8017, "step": 7926 }, { "epoch": 188.74029850746268, "grad_norm": 90.75438690185547, "learning_rate": 9.040100250626566e-06, "loss": 52.6495, "step": 7927 }, { "epoch": 188.76417910447762, "grad_norm": 70.7194595336914, "learning_rate": 9.038847117794488e-06, "loss": 51.2997, "step": 7928 }, { "epoch": 188.78805970149253, "grad_norm": 68.57101440429688, "learning_rate": 9.037593984962407e-06, "loss": 53.6467, "step": 7929 }, { "epoch": 188.81194029850747, "grad_norm": 70.22977447509766, "learning_rate": 9.036340852130327e-06, "loss": 50.1726, "step": 7930 }, { "epoch": 188.83582089552237, "grad_norm": 45.77416229248047, "learning_rate": 9.035087719298246e-06, "loss": 50.1196, "step": 7931 }, { "epoch": 188.8597014925373, "grad_norm": 84.61479187011719, "learning_rate": 9.033834586466166e-06, "loss": 55.4963, "step": 7932 }, { "epoch": 188.88358208955225, "grad_norm": 68.9653091430664, "learning_rate": 9.032581453634086e-06, "loss": 53.8675, "step": 7933 }, { "epoch": 188.90746268656716, "grad_norm": 70.38067626953125, "learning_rate": 9.031328320802005e-06, "loss": 50.8352, "step": 7934 }, { "epoch": 188.9313432835821, "grad_norm": 92.68718719482422, "learning_rate": 9.030075187969925e-06, "loss": 50.6091, "step": 7935 }, { "epoch": 188.955223880597, "grad_norm": 64.3499984741211, "learning_rate": 9.028822055137846e-06, "loss": 54.2168, "step": 7936 }, { "epoch": 188.97910447761194, "grad_norm": 144.99208068847656, "learning_rate": 9.027568922305766e-06, "loss": 52.3189, "step": 7937 }, { "epoch": 189.0, "grad_norm": 94.81573486328125, "learning_rate": 9.026315789473685e-06, "loss": 45.4228, "step": 7938 }, { "epoch": 189.02388059701494, "grad_norm": 82.54339599609375, "learning_rate": 9.025062656641605e-06, "loss": 51.6774, "step": 7939 }, { "epoch": 189.04776119402985, "grad_norm": 89.21769714355469, "learning_rate": 9.023809523809524e-06, "loss": 51.129, "step": 7940 }, { "epoch": 189.07164179104478, "grad_norm": 99.30877685546875, "learning_rate": 9.022556390977444e-06, "loss": 53.2338, "step": 7941 }, { "epoch": 189.0955223880597, "grad_norm": 80.18331909179688, "learning_rate": 9.021303258145364e-06, "loss": 51.5768, "step": 7942 }, { "epoch": 189.11940298507463, "grad_norm": 104.38993835449219, "learning_rate": 9.020050125313285e-06, "loss": 52.6943, "step": 7943 }, { "epoch": 189.14328358208957, "grad_norm": 65.29129791259766, "learning_rate": 9.018796992481203e-06, "loss": 52.3297, "step": 7944 }, { "epoch": 189.16716417910447, "grad_norm": 157.73797607421875, "learning_rate": 9.017543859649124e-06, "loss": 52.3911, "step": 7945 }, { "epoch": 189.1910447761194, "grad_norm": 123.55492401123047, "learning_rate": 9.016290726817044e-06, "loss": 51.8406, "step": 7946 }, { "epoch": 189.21492537313432, "grad_norm": 92.67152404785156, "learning_rate": 9.015037593984963e-06, "loss": 53.1862, "step": 7947 }, { "epoch": 189.23880597014926, "grad_norm": 89.75629425048828, "learning_rate": 9.013784461152883e-06, "loss": 53.6459, "step": 7948 }, { "epoch": 189.26268656716417, "grad_norm": 104.82584381103516, "learning_rate": 9.012531328320803e-06, "loss": 51.5277, "step": 7949 }, { "epoch": 189.2865671641791, "grad_norm": 84.31354522705078, "learning_rate": 9.011278195488722e-06, "loss": 51.0429, "step": 7950 }, { "epoch": 189.31044776119404, "grad_norm": 129.83505249023438, "learning_rate": 9.010025062656642e-06, "loss": 51.312, "step": 7951 }, { "epoch": 189.33432835820895, "grad_norm": 111.79363250732422, "learning_rate": 9.008771929824563e-06, "loss": 52.9559, "step": 7952 }, { "epoch": 189.3582089552239, "grad_norm": 102.4739990234375, "learning_rate": 9.007518796992481e-06, "loss": 53.0077, "step": 7953 }, { "epoch": 189.3820895522388, "grad_norm": 102.056640625, "learning_rate": 9.006265664160402e-06, "loss": 51.0904, "step": 7954 }, { "epoch": 189.40597014925373, "grad_norm": 112.38508605957031, "learning_rate": 9.005012531328322e-06, "loss": 56.6613, "step": 7955 }, { "epoch": 189.42985074626867, "grad_norm": 96.45929718017578, "learning_rate": 9.003759398496241e-06, "loss": 53.1833, "step": 7956 }, { "epoch": 189.45373134328358, "grad_norm": 108.03256225585938, "learning_rate": 9.002506265664161e-06, "loss": 52.3652, "step": 7957 }, { "epoch": 189.47761194029852, "grad_norm": 99.21728515625, "learning_rate": 9.00125313283208e-06, "loss": 52.947, "step": 7958 }, { "epoch": 189.50149253731342, "grad_norm": 97.00907897949219, "learning_rate": 9e-06, "loss": 53.861, "step": 7959 }, { "epoch": 189.52537313432836, "grad_norm": 88.09246063232422, "learning_rate": 8.99874686716792e-06, "loss": 51.9903, "step": 7960 }, { "epoch": 189.54925373134327, "grad_norm": 144.51100158691406, "learning_rate": 8.997493734335841e-06, "loss": 51.6353, "step": 7961 }, { "epoch": 189.5731343283582, "grad_norm": 137.4646453857422, "learning_rate": 8.99624060150376e-06, "loss": 52.1263, "step": 7962 }, { "epoch": 189.59701492537314, "grad_norm": 82.87004852294922, "learning_rate": 8.99498746867168e-06, "loss": 53.4347, "step": 7963 }, { "epoch": 189.62089552238805, "grad_norm": 73.51374053955078, "learning_rate": 8.9937343358396e-06, "loss": 54.3519, "step": 7964 }, { "epoch": 189.644776119403, "grad_norm": 124.1756362915039, "learning_rate": 8.99248120300752e-06, "loss": 51.7761, "step": 7965 }, { "epoch": 189.6686567164179, "grad_norm": 103.279052734375, "learning_rate": 8.991228070175439e-06, "loss": 52.0611, "step": 7966 }, { "epoch": 189.69253731343284, "grad_norm": 114.74515533447266, "learning_rate": 8.989974937343359e-06, "loss": 52.4413, "step": 7967 }, { "epoch": 189.71641791044777, "grad_norm": 117.58612823486328, "learning_rate": 8.988721804511278e-06, "loss": 52.8869, "step": 7968 }, { "epoch": 189.74029850746268, "grad_norm": 97.4476089477539, "learning_rate": 8.9874686716792e-06, "loss": 54.2015, "step": 7969 }, { "epoch": 189.76417910447762, "grad_norm": 84.25241088867188, "learning_rate": 8.98621553884712e-06, "loss": 50.7358, "step": 7970 }, { "epoch": 189.78805970149253, "grad_norm": 120.82125854492188, "learning_rate": 8.984962406015039e-06, "loss": 55.0902, "step": 7971 }, { "epoch": 189.81194029850747, "grad_norm": 109.24608612060547, "learning_rate": 8.983709273182958e-06, "loss": 52.6568, "step": 7972 }, { "epoch": 189.83582089552237, "grad_norm": 94.20904541015625, "learning_rate": 8.982456140350878e-06, "loss": 51.7781, "step": 7973 }, { "epoch": 189.8597014925373, "grad_norm": 90.83358764648438, "learning_rate": 8.981203007518798e-06, "loss": 51.6348, "step": 7974 }, { "epoch": 189.88358208955225, "grad_norm": 103.75609588623047, "learning_rate": 8.979949874686717e-06, "loss": 52.4758, "step": 7975 }, { "epoch": 189.90746268656716, "grad_norm": 81.22749328613281, "learning_rate": 8.978696741854638e-06, "loss": 53.4991, "step": 7976 }, { "epoch": 189.9313432835821, "grad_norm": 122.41837310791016, "learning_rate": 8.977443609022556e-06, "loss": 53.3596, "step": 7977 }, { "epoch": 189.955223880597, "grad_norm": 93.01902770996094, "learning_rate": 8.976190476190478e-06, "loss": 53.743, "step": 7978 }, { "epoch": 189.97910447761194, "grad_norm": 110.40689086914062, "learning_rate": 8.974937343358396e-06, "loss": 54.3228, "step": 7979 }, { "epoch": 190.0, "grad_norm": 109.52339172363281, "learning_rate": 8.973684210526317e-06, "loss": 47.9354, "step": 7980 }, { "epoch": 190.0, "step": 7980, "total_flos": 3.92332713507634e+17, "train_loss": 4.278731836591448, "train_runtime": 25654.6059, "train_samples_per_second": 39.637, "train_steps_per_second": 0.311 }, { "epoch": 190.02388059701494, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 52.4803, "step": 7981 }, { "epoch": 190.04776119402985, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 52.896, "step": 7982 }, { "epoch": 190.07164179104478, "grad_norm": 441.7268371582031, "learning_rate": 1e-05, "loss": 51.0126, "step": 7983 }, { "epoch": 190.0955223880597, "grad_norm": 493.6844482421875, "learning_rate": 9.998809523809524e-06, "loss": 55.3637, "step": 7984 }, { "epoch": 190.11940298507463, "grad_norm": 251.88214111328125, "learning_rate": 9.997619047619048e-06, "loss": 48.297, "step": 7985 }, { "epoch": 190.14328358208957, "grad_norm": 171.21153259277344, "learning_rate": 9.996428571428572e-06, "loss": 43.8059, "step": 7986 }, { "epoch": 190.16716417910447, "grad_norm": 102.77701568603516, "learning_rate": 9.995238095238095e-06, "loss": 43.0885, "step": 7987 }, { "epoch": 190.1910447761194, "grad_norm": 99.9967041015625, "learning_rate": 9.99404761904762e-06, "loss": 43.7896, "step": 7988 }, { "epoch": 190.21492537313432, "grad_norm": 66.79180145263672, "learning_rate": 9.992857142857144e-06, "loss": 40.7604, "step": 7989 }, { "epoch": 190.23880597014926, "grad_norm": 49.20097732543945, "learning_rate": 9.991666666666668e-06, "loss": 43.0224, "step": 7990 }, { "epoch": 190.26268656716417, "grad_norm": 66.32890319824219, "learning_rate": 9.990476190476191e-06, "loss": 40.7674, "step": 7991 }, { "epoch": 190.2865671641791, "grad_norm": 44.98344421386719, "learning_rate": 9.989285714285715e-06, "loss": 41.5453, "step": 7992 }, { "epoch": 190.31044776119404, "grad_norm": 32.713680267333984, "learning_rate": 9.988095238095239e-06, "loss": 40.4575, "step": 7993 }, { "epoch": 190.33432835820895, "grad_norm": 28.200599670410156, "learning_rate": 9.986904761904764e-06, "loss": 40.2662, "step": 7994 }, { "epoch": 190.3582089552239, "grad_norm": 26.295700073242188, "learning_rate": 9.985714285714286e-06, "loss": 41.0945, "step": 7995 }, { "epoch": 190.3820895522388, "grad_norm": 23.81634521484375, "learning_rate": 9.984523809523811e-06, "loss": 39.8172, "step": 7996 }, { "epoch": 190.40597014925373, "grad_norm": 23.616661071777344, "learning_rate": 9.983333333333333e-06, "loss": 39.8794, "step": 7997 }, { "epoch": 190.42985074626867, "grad_norm": 25.093244552612305, "learning_rate": 9.982142857142858e-06, "loss": 40.4312, "step": 7998 }, { "epoch": 190.45373134328358, "grad_norm": 20.444486618041992, "learning_rate": 9.980952380952382e-06, "loss": 40.2981, "step": 7999 }, { "epoch": 190.47761194029852, "grad_norm": 32.319217681884766, "learning_rate": 9.979761904761906e-06, "loss": 40.1029, "step": 8000 }, { "epoch": 190.50149253731342, "grad_norm": 27.1708984375, "learning_rate": 9.97857142857143e-06, "loss": 40.2249, "step": 8001 }, { "epoch": 190.52537313432836, "grad_norm": NaN, "learning_rate": 9.977380952380953e-06, "loss": 37.4824, "step": 8002 }, { "epoch": 190.54925373134327, "grad_norm": 17.54867935180664, "learning_rate": 9.977380952380953e-06, "loss": 38.6291, "step": 8003 }, { "epoch": 190.5731343283582, "grad_norm": 16.21549415588379, "learning_rate": 9.976190476190477e-06, "loss": 38.1713, "step": 8004 }, { "epoch": 190.59701492537314, "grad_norm": 18.15376853942871, "learning_rate": 9.975000000000002e-06, "loss": 38.2825, "step": 8005 }, { "epoch": 190.62089552238805, "grad_norm": 23.3570613861084, "learning_rate": 9.973809523809524e-06, "loss": 39.1566, "step": 8006 }, { "epoch": 190.644776119403, "grad_norm": 26.723541259765625, "learning_rate": 9.972619047619049e-06, "loss": 39.1849, "step": 8007 }, { "epoch": 190.6686567164179, "grad_norm": 16.985681533813477, "learning_rate": 9.971428571428571e-06, "loss": 37.7583, "step": 8008 }, { "epoch": 190.69253731343284, "grad_norm": 30.02682876586914, "learning_rate": 9.970238095238096e-06, "loss": 40.4904, "step": 8009 }, { "epoch": 190.71641791044777, "grad_norm": 20.605270385742188, "learning_rate": 9.96904761904762e-06, "loss": 38.376, "step": 8010 }, { "epoch": 190.74029850746268, "grad_norm": 16.961578369140625, "learning_rate": 9.967857142857144e-06, "loss": 39.2046, "step": 8011 }, { "epoch": 190.76417910447762, "grad_norm": 17.805517196655273, "learning_rate": 9.966666666666667e-06, "loss": 40.0389, "step": 8012 }, { "epoch": 190.78805970149253, "grad_norm": 19.87175750732422, "learning_rate": 9.965476190476191e-06, "loss": 40.2082, "step": 8013 }, { "epoch": 190.81194029850747, "grad_norm": 14.827139854431152, "learning_rate": 9.964285714285714e-06, "loss": 38.3013, "step": 8014 }, { "epoch": 190.83582089552237, "grad_norm": 21.773862838745117, "learning_rate": 9.963095238095238e-06, "loss": 38.1243, "step": 8015 }, { "epoch": 190.8597014925373, "grad_norm": 17.08941078186035, "learning_rate": 9.961904761904763e-06, "loss": 38.4414, "step": 8016 }, { "epoch": 190.88358208955225, "grad_norm": 21.989667892456055, "learning_rate": 9.960714285714287e-06, "loss": 37.9022, "step": 8017 }, { "epoch": 190.90746268656716, "grad_norm": 20.80973243713379, "learning_rate": 9.95952380952381e-06, "loss": 39.3492, "step": 8018 }, { "epoch": 190.9313432835821, "grad_norm": 16.177169799804688, "learning_rate": 9.958333333333334e-06, "loss": 37.8216, "step": 8019 }, { "epoch": 190.955223880597, "grad_norm": 15.633136749267578, "learning_rate": 9.957142857142858e-06, "loss": 39.4773, "step": 8020 }, { "epoch": 190.97910447761194, "grad_norm": 16.583450317382812, "learning_rate": 9.955952380952382e-06, "loss": 38.428, "step": 8021 }, { "epoch": 191.0, "grad_norm": 17.35057830810547, "learning_rate": 9.954761904761905e-06, "loss": 32.5048, "step": 8022 }, { "epoch": 191.02388059701494, "grad_norm": 15.571488380432129, "learning_rate": 9.953571428571429e-06, "loss": 38.6245, "step": 8023 }, { "epoch": 191.04776119402985, "grad_norm": 16.987716674804688, "learning_rate": 9.952380952380954e-06, "loss": 38.1887, "step": 8024 }, { "epoch": 191.07164179104478, "grad_norm": 15.907127380371094, "learning_rate": 9.951190476190476e-06, "loss": 38.5188, "step": 8025 }, { "epoch": 191.0955223880597, "grad_norm": 14.632792472839355, "learning_rate": 9.950000000000001e-06, "loss": 39.141, "step": 8026 }, { "epoch": 191.11940298507463, "grad_norm": 17.629247665405273, "learning_rate": 9.948809523809525e-06, "loss": 38.007, "step": 8027 }, { "epoch": 191.14328358208957, "grad_norm": NaN, "learning_rate": 9.947619047619049e-06, "loss": 69.9276, "step": 8028 }, { "epoch": 191.16716417910447, "grad_norm": 18.94240951538086, "learning_rate": 9.947619047619049e-06, "loss": 37.8418, "step": 8029 }, { "epoch": 191.1910447761194, "grad_norm": 18.839948654174805, "learning_rate": 9.946428571428572e-06, "loss": 37.8526, "step": 8030 }, { "epoch": 191.21492537313432, "grad_norm": 14.232295989990234, "learning_rate": 9.945238095238096e-06, "loss": 38.1558, "step": 8031 }, { "epoch": 191.23880597014926, "grad_norm": 15.381818771362305, "learning_rate": 9.94404761904762e-06, "loss": 37.9595, "step": 8032 }, { "epoch": 191.26268656716417, "grad_norm": 17.140670776367188, "learning_rate": 9.942857142857145e-06, "loss": 38.8779, "step": 8033 }, { "epoch": 191.2865671641791, "grad_norm": 18.122289657592773, "learning_rate": 9.941666666666667e-06, "loss": 38.0931, "step": 8034 }, { "epoch": 191.31044776119404, "grad_norm": 14.274928092956543, "learning_rate": 9.940476190476192e-06, "loss": 38.2908, "step": 8035 }, { "epoch": 191.33432835820895, "grad_norm": 18.1863956451416, "learning_rate": 9.939285714285714e-06, "loss": 37.1521, "step": 8036 }, { "epoch": 191.3582089552239, "grad_norm": 24.29243278503418, "learning_rate": 9.93809523809524e-06, "loss": 35.9136, "step": 8037 }, { "epoch": 191.3820895522388, "grad_norm": 18.86484146118164, "learning_rate": 9.936904761904763e-06, "loss": 38.4593, "step": 8038 }, { "epoch": 191.40597014925373, "grad_norm": 13.947208404541016, "learning_rate": 9.935714285714286e-06, "loss": 37.8925, "step": 8039 }, { "epoch": 191.42985074626867, "grad_norm": 23.31012535095215, "learning_rate": 9.93452380952381e-06, "loss": 37.635, "step": 8040 }, { "epoch": 191.45373134328358, "grad_norm": 16.41149139404297, "learning_rate": 9.933333333333334e-06, "loss": 39.8144, "step": 8041 }, { "epoch": 191.47761194029852, "grad_norm": 18.565690994262695, "learning_rate": 9.932142857142857e-06, "loss": 38.132, "step": 8042 }, { "epoch": 191.50149253731342, "grad_norm": 26.473716735839844, "learning_rate": 9.930952380952383e-06, "loss": 38.2176, "step": 8043 }, { "epoch": 191.52537313432836, "grad_norm": 18.78125762939453, "learning_rate": 9.929761904761906e-06, "loss": 38.9379, "step": 8044 }, { "epoch": 191.54925373134327, "grad_norm": 34.09467315673828, "learning_rate": 9.92857142857143e-06, "loss": 38.1285, "step": 8045 }, { "epoch": 191.5731343283582, "grad_norm": 27.37683868408203, "learning_rate": 9.927380952380953e-06, "loss": 38.5859, "step": 8046 }, { "epoch": 191.59701492537314, "grad_norm": 38.51087188720703, "learning_rate": 9.926190476190477e-06, "loss": 38.3501, "step": 8047 }, { "epoch": 191.62089552238805, "grad_norm": NaN, "learning_rate": 9.925e-06, "loss": 44.5791, "step": 8048 }, { "epoch": 191.644776119403, "grad_norm": 29.464855194091797, "learning_rate": 9.925e-06, "loss": 39.7041, "step": 8049 }, { "epoch": 191.6686567164179, "grad_norm": 32.26063919067383, "learning_rate": 9.923809523809524e-06, "loss": 38.6149, "step": 8050 }, { "epoch": 191.69253731343284, "grad_norm": 27.70380401611328, "learning_rate": 9.922619047619048e-06, "loss": 38.1828, "step": 8051 }, { "epoch": 191.71641791044777, "grad_norm": 33.598445892333984, "learning_rate": 9.921428571428572e-06, "loss": 38.5361, "step": 8052 }, { "epoch": 191.74029850746268, "grad_norm": 30.742996215820312, "learning_rate": 9.920238095238097e-06, "loss": 38.2798, "step": 8053 }, { "epoch": 191.76417910447762, "grad_norm": 26.842199325561523, "learning_rate": 9.91904761904762e-06, "loss": 38.3534, "step": 8054 }, { "epoch": 191.78805970149253, "grad_norm": 25.476842880249023, "learning_rate": 9.917857142857144e-06, "loss": 38.7694, "step": 8055 }, { "epoch": 191.81194029850747, "grad_norm": 33.05502700805664, "learning_rate": 9.916666666666668e-06, "loss": 38.1734, "step": 8056 }, { "epoch": 191.83582089552237, "grad_norm": 27.849227905273438, "learning_rate": 9.915476190476191e-06, "loss": 36.4576, "step": 8057 }, { "epoch": 191.8597014925373, "grad_norm": 29.301816940307617, "learning_rate": 9.914285714285715e-06, "loss": 38.1189, "step": 8058 }, { "epoch": 191.88358208955225, "grad_norm": 31.922199249267578, "learning_rate": 9.91309523809524e-06, "loss": 38.8657, "step": 8059 }, { "epoch": 191.90746268656716, "grad_norm": 26.626665115356445, "learning_rate": 9.911904761904762e-06, "loss": 37.9762, "step": 8060 }, { "epoch": 191.9313432835821, "grad_norm": 23.473674774169922, "learning_rate": 9.910714285714288e-06, "loss": 37.5389, "step": 8061 }, { "epoch": 191.955223880597, "grad_norm": 32.28257751464844, "learning_rate": 9.90952380952381e-06, "loss": 38.1126, "step": 8062 }, { "epoch": 191.97910447761194, "grad_norm": 25.231307983398438, "learning_rate": 9.908333333333335e-06, "loss": 38.2216, "step": 8063 }, { "epoch": 192.0, "grad_norm": NaN, "learning_rate": 9.907142857142858e-06, "loss": 54.0147, "step": 8064 }, { "epoch": 192.02388059701494, "grad_norm": 31.353717803955078, "learning_rate": 9.907142857142858e-06, "loss": 38.4074, "step": 8065 }, { "epoch": 192.04776119402985, "grad_norm": 30.8924503326416, "learning_rate": 9.905952380952382e-06, "loss": 37.7876, "step": 8066 }, { "epoch": 192.07164179104478, "grad_norm": 28.787822723388672, "learning_rate": 9.904761904761906e-06, "loss": 38.7704, "step": 8067 }, { "epoch": 192.0955223880597, "grad_norm": 29.935930252075195, "learning_rate": 9.90357142857143e-06, "loss": 37.1704, "step": 8068 }, { "epoch": 192.11940298507463, "grad_norm": 30.520862579345703, "learning_rate": 9.902380952380953e-06, "loss": 38.6613, "step": 8069 }, { "epoch": 192.14328358208957, "grad_norm": 22.295175552368164, "learning_rate": 9.901190476190476e-06, "loss": 38.7869, "step": 8070 }, { "epoch": 192.16716417910447, "grad_norm": NaN, "learning_rate": 9.9e-06, "loss": 41.744, "step": 8071 }, { "epoch": 192.1910447761194, "grad_norm": 32.97742462158203, "learning_rate": 9.9e-06, "loss": 39.0328, "step": 8072 }, { "epoch": 192.21492537313432, "grad_norm": 28.674301147460938, "learning_rate": 9.898809523809525e-06, "loss": 38.5525, "step": 8073 }, { "epoch": 192.23880597014926, "grad_norm": 31.001100540161133, "learning_rate": 9.897619047619047e-06, "loss": 38.7256, "step": 8074 }, { "epoch": 192.26268656716417, "grad_norm": 27.50682258605957, "learning_rate": 9.896428571428573e-06, "loss": 38.6324, "step": 8075 }, { "epoch": 192.2865671641791, "grad_norm": 29.152971267700195, "learning_rate": 9.895238095238096e-06, "loss": 37.3363, "step": 8076 }, { "epoch": 192.31044776119404, "grad_norm": 23.952505111694336, "learning_rate": 9.89404761904762e-06, "loss": 38.3122, "step": 8077 }, { "epoch": 192.33432835820895, "grad_norm": 34.11610412597656, "learning_rate": 9.892857142857143e-06, "loss": 38.5167, "step": 8078 }, { "epoch": 192.3582089552239, "grad_norm": 25.457550048828125, "learning_rate": 9.891666666666667e-06, "loss": 37.1989, "step": 8079 }, { "epoch": 192.3820895522388, "grad_norm": 28.14068603515625, "learning_rate": 9.89047619047619e-06, "loss": 38.4137, "step": 8080 }, { "epoch": 192.40597014925373, "grad_norm": 29.55773162841797, "learning_rate": 9.889285714285714e-06, "loss": 37.9116, "step": 8081 }, { "epoch": 192.42985074626867, "grad_norm": NaN, "learning_rate": 9.88809523809524e-06, "loss": 33.8244, "step": 8082 }, { "epoch": 192.45373134328358, "grad_norm": 24.517562866210938, "learning_rate": 9.88809523809524e-06, "loss": 37.5466, "step": 8083 }, { "epoch": 192.47761194029852, "grad_norm": 24.37813949584961, "learning_rate": 9.886904761904763e-06, "loss": 37.6051, "step": 8084 }, { "epoch": 192.50149253731342, "grad_norm": 28.54468536376953, "learning_rate": 9.885714285714287e-06, "loss": 38.4495, "step": 8085 }, { "epoch": 192.52537313432836, "grad_norm": 22.6004581451416, "learning_rate": 9.88452380952381e-06, "loss": 37.1362, "step": 8086 }, { "epoch": 192.54925373134327, "grad_norm": 30.039899826049805, "learning_rate": 9.883333333333334e-06, "loss": 37.9443, "step": 8087 }, { "epoch": 192.5731343283582, "grad_norm": 28.983667373657227, "learning_rate": 9.882142857142858e-06, "loss": 37.8541, "step": 8088 }, { "epoch": 192.59701492537314, "grad_norm": 26.619503021240234, "learning_rate": 9.880952380952381e-06, "loss": 37.3685, "step": 8089 }, { "epoch": 192.62089552238805, "grad_norm": 25.137807846069336, "learning_rate": 9.879761904761905e-06, "loss": 36.9982, "step": 8090 }, { "epoch": 192.644776119403, "grad_norm": 29.406036376953125, "learning_rate": 9.87857142857143e-06, "loss": 38.4477, "step": 8091 }, { "epoch": 192.6686567164179, "grad_norm": 25.904155731201172, "learning_rate": 9.877380952380952e-06, "loss": 36.9548, "step": 8092 }, { "epoch": 192.69253731343284, "grad_norm": 29.080020904541016, "learning_rate": 9.876190476190478e-06, "loss": 38.5147, "step": 8093 }, { "epoch": 192.71641791044777, "grad_norm": 24.259777069091797, "learning_rate": 9.875000000000001e-06, "loss": 38.4572, "step": 8094 }, { "epoch": 192.74029850746268, "grad_norm": 30.02660369873047, "learning_rate": 9.873809523809525e-06, "loss": 38.2938, "step": 8095 }, { "epoch": 192.76417910447762, "grad_norm": 26.388124465942383, "learning_rate": 9.872619047619048e-06, "loss": 38.2691, "step": 8096 }, { "epoch": 192.78805970149253, "grad_norm": 29.877897262573242, "learning_rate": 9.871428571428572e-06, "loss": 37.2446, "step": 8097 }, { "epoch": 192.81194029850747, "grad_norm": 24.555816650390625, "learning_rate": 9.870238095238096e-06, "loss": 37.7283, "step": 8098 }, { "epoch": 192.83582089552237, "grad_norm": 29.00787925720215, "learning_rate": 9.869047619047621e-06, "loss": 37.5237, "step": 8099 }, { "epoch": 192.8597014925373, "grad_norm": 26.88437271118164, "learning_rate": 9.867857142857143e-06, "loss": 38.1217, "step": 8100 }, { "epoch": 192.88358208955225, "grad_norm": 27.196168899536133, "learning_rate": 9.866666666666668e-06, "loss": 37.9326, "step": 8101 }, { "epoch": 192.90746268656716, "grad_norm": 25.58395004272461, "learning_rate": 9.86547619047619e-06, "loss": 38.3009, "step": 8102 }, { "epoch": 192.9313432835821, "grad_norm": 30.2125186920166, "learning_rate": 9.864285714285715e-06, "loss": 38.0861, "step": 8103 }, { "epoch": 192.955223880597, "grad_norm": 22.175670623779297, "learning_rate": 9.863095238095239e-06, "loss": 38.6503, "step": 8104 }, { "epoch": 192.97910447761194, "grad_norm": 30.515262603759766, "learning_rate": 9.861904761904763e-06, "loss": 38.1392, "step": 8105 }, { "epoch": 193.0, "grad_norm": 22.113122940063477, "learning_rate": 9.860714285714286e-06, "loss": 34.0352, "step": 8106 }, { "epoch": 193.02388059701494, "grad_norm": 32.851016998291016, "learning_rate": 9.85952380952381e-06, "loss": 37.6278, "step": 8107 }, { "epoch": 193.04776119402985, "grad_norm": 29.324411392211914, "learning_rate": 9.858333333333334e-06, "loss": 39.0323, "step": 8108 }, { "epoch": 193.07164179104478, "grad_norm": 26.576251983642578, "learning_rate": 9.857142857142859e-06, "loss": 38.2389, "step": 8109 }, { "epoch": 193.0955223880597, "grad_norm": 25.70130157470703, "learning_rate": 9.85595238095238e-06, "loss": 37.5488, "step": 8110 }, { "epoch": 193.11940298507463, "grad_norm": 28.945816040039062, "learning_rate": 9.854761904761906e-06, "loss": 38.3826, "step": 8111 }, { "epoch": 193.14328358208957, "grad_norm": 26.942113876342773, "learning_rate": 9.85357142857143e-06, "loss": 37.8922, "step": 8112 }, { "epoch": 193.16716417910447, "grad_norm": 28.610998153686523, "learning_rate": 9.852380952380953e-06, "loss": 37.7559, "step": 8113 }, { "epoch": 193.1910447761194, "grad_norm": 27.649139404296875, "learning_rate": 9.851190476190477e-06, "loss": 38.1339, "step": 8114 }, { "epoch": 193.21492537313432, "grad_norm": 24.258901596069336, "learning_rate": 9.85e-06, "loss": 38.9597, "step": 8115 }, { "epoch": 193.23880597014926, "grad_norm": 21.32745361328125, "learning_rate": 9.848809523809524e-06, "loss": 37.4101, "step": 8116 }, { "epoch": 193.26268656716417, "grad_norm": 25.53805923461914, "learning_rate": 9.847619047619048e-06, "loss": 35.1791, "step": 8117 }, { "epoch": 193.2865671641791, "grad_norm": 21.969554901123047, "learning_rate": 9.846428571428573e-06, "loss": 39.1762, "step": 8118 }, { "epoch": 193.31044776119404, "grad_norm": 37.39038848876953, "learning_rate": 9.845238095238097e-06, "loss": 37.2427, "step": 8119 }, { "epoch": 193.33432835820895, "grad_norm": 33.77116394042969, "learning_rate": 9.84404761904762e-06, "loss": 37.6819, "step": 8120 }, { "epoch": 193.3582089552239, "grad_norm": 23.498018264770508, "learning_rate": 9.842857142857144e-06, "loss": 38.6812, "step": 8121 }, { "epoch": 193.3820895522388, "grad_norm": 25.90577507019043, "learning_rate": 9.841666666666668e-06, "loss": 38.1744, "step": 8122 }, { "epoch": 193.40597014925373, "grad_norm": 25.708383560180664, "learning_rate": 9.840476190476191e-06, "loss": 37.9103, "step": 8123 }, { "epoch": 193.42985074626867, "grad_norm": 19.413223266601562, "learning_rate": 9.839285714285715e-06, "loss": 37.3252, "step": 8124 }, { "epoch": 193.45373134328358, "grad_norm": 30.142765045166016, "learning_rate": 9.838095238095238e-06, "loss": 38.1061, "step": 8125 }, { "epoch": 193.47761194029852, "grad_norm": 24.21076774597168, "learning_rate": 9.836904761904764e-06, "loss": 37.9563, "step": 8126 }, { "epoch": 193.50149253731342, "grad_norm": 30.14665985107422, "learning_rate": 9.835714285714286e-06, "loss": 37.8519, "step": 8127 }, { "epoch": 193.52537313432836, "grad_norm": 27.567338943481445, "learning_rate": 9.834523809523811e-06, "loss": 37.6953, "step": 8128 }, { "epoch": 193.54925373134327, "grad_norm": 29.56963348388672, "learning_rate": 9.833333333333333e-06, "loss": 38.6179, "step": 8129 }, { "epoch": 193.5731343283582, "grad_norm": 25.726116180419922, "learning_rate": 9.832142857142858e-06, "loss": 39.0633, "step": 8130 }, { "epoch": 193.59701492537314, "grad_norm": 28.918392181396484, "learning_rate": 9.830952380952382e-06, "loss": 37.2939, "step": 8131 }, { "epoch": 193.62089552238805, "grad_norm": 26.439655303955078, "learning_rate": 9.829761904761905e-06, "loss": 36.7785, "step": 8132 }, { "epoch": 193.644776119403, "grad_norm": 30.66209602355957, "learning_rate": 9.828571428571429e-06, "loss": 38.4436, "step": 8133 }, { "epoch": 193.6686567164179, "grad_norm": 28.186386108398438, "learning_rate": 9.827380952380953e-06, "loss": 37.3892, "step": 8134 }, { "epoch": 193.69253731343284, "grad_norm": 29.687488555908203, "learning_rate": 9.826190476190476e-06, "loss": 36.881, "step": 8135 }, { "epoch": 193.71641791044777, "grad_norm": NaN, "learning_rate": 9.825000000000002e-06, "loss": 61.7147, "step": 8136 }, { "epoch": 193.74029850746268, "grad_norm": 27.555301666259766, "learning_rate": 9.825000000000002e-06, "loss": 38.5232, "step": 8137 }, { "epoch": 193.76417910447762, "grad_norm": 24.061349868774414, "learning_rate": 9.823809523809524e-06, "loss": 38.7169, "step": 8138 }, { "epoch": 193.78805970149253, "grad_norm": 23.157058715820312, "learning_rate": 9.822619047619049e-06, "loss": 37.1344, "step": 8139 }, { "epoch": 193.81194029850747, "grad_norm": 26.823884963989258, "learning_rate": 9.821428571428573e-06, "loss": 37.9303, "step": 8140 }, { "epoch": 193.83582089552237, "grad_norm": 23.348529815673828, "learning_rate": 9.820238095238096e-06, "loss": 36.8772, "step": 8141 }, { "epoch": 193.8597014925373, "grad_norm": 29.376649856567383, "learning_rate": 9.81904761904762e-06, "loss": 38.3394, "step": 8142 }, { "epoch": 193.88358208955225, "grad_norm": 26.600069046020508, "learning_rate": 9.817857142857143e-06, "loss": 38.7027, "step": 8143 }, { "epoch": 193.90746268656716, "grad_norm": 27.550716400146484, "learning_rate": 9.816666666666667e-06, "loss": 38.6352, "step": 8144 }, { "epoch": 193.9313432835821, "grad_norm": 24.643022537231445, "learning_rate": 9.81547619047619e-06, "loss": 38.334, "step": 8145 }, { "epoch": 193.955223880597, "grad_norm": 25.975337982177734, "learning_rate": 9.814285714285716e-06, "loss": 38.192, "step": 8146 }, { "epoch": 193.97910447761194, "grad_norm": 20.999217987060547, "learning_rate": 9.81309523809524e-06, "loss": 37.1881, "step": 8147 }, { "epoch": 194.0, "grad_norm": 23.38616943359375, "learning_rate": 9.811904761904763e-06, "loss": 34.2557, "step": 8148 }, { "epoch": 194.02388059701494, "grad_norm": 19.22756576538086, "learning_rate": 9.810714285714287e-06, "loss": 36.5685, "step": 8149 }, { "epoch": 194.04776119402985, "grad_norm": 29.112741470336914, "learning_rate": 9.80952380952381e-06, "loss": 38.4037, "step": 8150 }, { "epoch": 194.07164179104478, "grad_norm": 23.135292053222656, "learning_rate": 9.808333333333334e-06, "loss": 38.7293, "step": 8151 }, { "epoch": 194.0955223880597, "grad_norm": 29.674514770507812, "learning_rate": 9.807142857142858e-06, "loss": 36.4426, "step": 8152 }, { "epoch": 194.11940298507463, "grad_norm": 26.233339309692383, "learning_rate": 9.805952380952381e-06, "loss": 38.8275, "step": 8153 }, { "epoch": 194.14328358208957, "grad_norm": 29.903133392333984, "learning_rate": 9.804761904761907e-06, "loss": 38.7159, "step": 8154 }, { "epoch": 194.16716417910447, "grad_norm": 23.831342697143555, "learning_rate": 9.803571428571428e-06, "loss": 38.1488, "step": 8155 }, { "epoch": 194.1910447761194, "grad_norm": 27.747787475585938, "learning_rate": 9.802380952380954e-06, "loss": 37.2102, "step": 8156 }, { "epoch": 194.21492537313432, "grad_norm": 22.538318634033203, "learning_rate": 9.801190476190477e-06, "loss": 39.4014, "step": 8157 }, { "epoch": 194.23880597014926, "grad_norm": 31.50472068786621, "learning_rate": 9.800000000000001e-06, "loss": 37.8116, "step": 8158 }, { "epoch": 194.26268656716417, "grad_norm": 26.331830978393555, "learning_rate": 9.798809523809525e-06, "loss": 37.7444, "step": 8159 }, { "epoch": 194.2865671641791, "grad_norm": 29.495019912719727, "learning_rate": 9.797619047619048e-06, "loss": 36.9883, "step": 8160 }, { "epoch": 194.31044776119404, "grad_norm": 28.174238204956055, "learning_rate": 9.796428571428572e-06, "loss": 37.2823, "step": 8161 }, { "epoch": 194.33432835820895, "grad_norm": 28.516429901123047, "learning_rate": 9.795238095238097e-06, "loss": 37.691, "step": 8162 }, { "epoch": 194.3582089552239, "grad_norm": 25.835765838623047, "learning_rate": 9.794047619047619e-06, "loss": 38.1477, "step": 8163 }, { "epoch": 194.3820895522388, "grad_norm": 25.793895721435547, "learning_rate": 9.792857142857144e-06, "loss": 36.5988, "step": 8164 }, { "epoch": 194.40597014925373, "grad_norm": 20.387521743774414, "learning_rate": 9.791666666666666e-06, "loss": 38.1124, "step": 8165 }, { "epoch": 194.42985074626867, "grad_norm": 24.153411865234375, "learning_rate": 9.790476190476192e-06, "loss": 38.2235, "step": 8166 }, { "epoch": 194.45373134328358, "grad_norm": 17.82645034790039, "learning_rate": 9.789285714285715e-06, "loss": 38.8839, "step": 8167 }, { "epoch": 194.47761194029852, "grad_norm": 26.742172241210938, "learning_rate": 9.788095238095239e-06, "loss": 37.5795, "step": 8168 }, { "epoch": 194.50149253731342, "grad_norm": 19.46932601928711, "learning_rate": 9.786904761904763e-06, "loss": 38.3185, "step": 8169 }, { "epoch": 194.52537313432836, "grad_norm": 27.765174865722656, "learning_rate": 9.785714285714286e-06, "loss": 37.7904, "step": 8170 }, { "epoch": 194.54925373134327, "grad_norm": 23.08241081237793, "learning_rate": 9.78452380952381e-06, "loss": 37.9733, "step": 8171 }, { "epoch": 194.5731343283582, "grad_norm": 23.362966537475586, "learning_rate": 9.783333333333335e-06, "loss": 38.658, "step": 8172 }, { "epoch": 194.59701492537314, "grad_norm": 20.630313873291016, "learning_rate": 9.782142857142857e-06, "loss": 38.6314, "step": 8173 }, { "epoch": 194.62089552238805, "grad_norm": 22.159696578979492, "learning_rate": 9.780952380952382e-06, "loss": 37.8746, "step": 8174 }, { "epoch": 194.644776119403, "grad_norm": 17.65380859375, "learning_rate": 9.779761904761906e-06, "loss": 37.3745, "step": 8175 }, { "epoch": 194.6686567164179, "grad_norm": 24.66594886779785, "learning_rate": 9.77857142857143e-06, "loss": 38.3225, "step": 8176 }, { "epoch": 194.69253731343284, "grad_norm": 18.938581466674805, "learning_rate": 9.777380952380953e-06, "loss": 38.4467, "step": 8177 }, { "epoch": 194.71641791044777, "grad_norm": 26.19101905822754, "learning_rate": 9.776190476190477e-06, "loss": 37.6916, "step": 8178 }, { "epoch": 194.74029850746268, "grad_norm": 23.304584503173828, "learning_rate": 9.775e-06, "loss": 37.0147, "step": 8179 }, { "epoch": 194.76417910447762, "grad_norm": 22.964975357055664, "learning_rate": 9.773809523809524e-06, "loss": 37.2688, "step": 8180 }, { "epoch": 194.78805970149253, "grad_norm": 20.74350929260254, "learning_rate": 9.77261904761905e-06, "loss": 38.9556, "step": 8181 }, { "epoch": 194.81194029850747, "grad_norm": 20.30811309814453, "learning_rate": 9.771428571428571e-06, "loss": 36.9859, "step": 8182 }, { "epoch": 194.83582089552237, "grad_norm": 19.99317741394043, "learning_rate": 9.770238095238097e-06, "loss": 38.7915, "step": 8183 }, { "epoch": 194.8597014925373, "grad_norm": 19.990209579467773, "learning_rate": 9.76904761904762e-06, "loss": 36.9583, "step": 8184 }, { "epoch": 194.88358208955225, "grad_norm": 18.58809471130371, "learning_rate": 9.767857142857144e-06, "loss": 37.8493, "step": 8185 }, { "epoch": 194.90746268656716, "grad_norm": 19.796863555908203, "learning_rate": 9.766666666666667e-06, "loss": 37.5244, "step": 8186 }, { "epoch": 194.9313432835821, "grad_norm": 16.461299896240234, "learning_rate": 9.765476190476191e-06, "loss": 38.1746, "step": 8187 }, { "epoch": 194.955223880597, "grad_norm": 22.582000732421875, "learning_rate": 9.764285714285715e-06, "loss": 38.2809, "step": 8188 }, { "epoch": 194.97910447761194, "grad_norm": 18.955257415771484, "learning_rate": 9.76309523809524e-06, "loss": 36.9266, "step": 8189 }, { "epoch": 195.0, "grad_norm": 17.642061233520508, "learning_rate": 9.761904761904762e-06, "loss": 32.7614, "step": 8190 }, { "epoch": 195.02388059701494, "grad_norm": 19.75026512145996, "learning_rate": 9.760714285714287e-06, "loss": 38.3124, "step": 8191 }, { "epoch": 195.04776119402985, "grad_norm": 17.81207275390625, "learning_rate": 9.75952380952381e-06, "loss": 38.404, "step": 8192 }, { "epoch": 195.07164179104478, "grad_norm": 17.180660247802734, "learning_rate": 9.758333333333334e-06, "loss": 37.2074, "step": 8193 }, { "epoch": 195.0955223880597, "grad_norm": 18.23331069946289, "learning_rate": 9.757142857142858e-06, "loss": 37.6715, "step": 8194 }, { "epoch": 195.11940298507463, "grad_norm": 15.751823425292969, "learning_rate": 9.755952380952382e-06, "loss": 37.7539, "step": 8195 }, { "epoch": 195.14328358208957, "grad_norm": 20.383882522583008, "learning_rate": 9.754761904761905e-06, "loss": 37.5887, "step": 8196 }, { "epoch": 195.16716417910447, "grad_norm": 16.265350341796875, "learning_rate": 9.753571428571429e-06, "loss": 38.5134, "step": 8197 }, { "epoch": 195.1910447761194, "grad_norm": 18.961380004882812, "learning_rate": 9.752380952380953e-06, "loss": 37.9139, "step": 8198 }, { "epoch": 195.21492537313432, "grad_norm": 17.0926513671875, "learning_rate": 9.751190476190478e-06, "loss": 37.3768, "step": 8199 }, { "epoch": 195.23880597014926, "grad_norm": 17.138843536376953, "learning_rate": 9.75e-06, "loss": 37.9512, "step": 8200 }, { "epoch": 195.26268656716417, "grad_norm": 17.132709503173828, "learning_rate": 9.748809523809525e-06, "loss": 38.0403, "step": 8201 }, { "epoch": 195.2865671641791, "grad_norm": 16.376869201660156, "learning_rate": 9.747619047619049e-06, "loss": 37.8101, "step": 8202 }, { "epoch": 195.31044776119404, "grad_norm": 14.971624374389648, "learning_rate": 9.746428571428572e-06, "loss": 38.0216, "step": 8203 }, { "epoch": 195.33432835820895, "grad_norm": 16.75035285949707, "learning_rate": 9.745238095238096e-06, "loss": 37.994, "step": 8204 }, { "epoch": 195.3582089552239, "grad_norm": 15.411774635314941, "learning_rate": 9.74404761904762e-06, "loss": 38.9679, "step": 8205 }, { "epoch": 195.3820895522388, "grad_norm": 14.596837997436523, "learning_rate": 9.742857142857143e-06, "loss": 36.7789, "step": 8206 }, { "epoch": 195.40597014925373, "grad_norm": 20.539121627807617, "learning_rate": 9.741666666666667e-06, "loss": 37.5518, "step": 8207 }, { "epoch": 195.42985074626867, "grad_norm": 16.56817626953125, "learning_rate": 9.74047619047619e-06, "loss": 37.3836, "step": 8208 }, { "epoch": 195.45373134328358, "grad_norm": 17.15846061706543, "learning_rate": 9.739285714285716e-06, "loss": 37.1393, "step": 8209 }, { "epoch": 195.47761194029852, "grad_norm": 17.65296745300293, "learning_rate": 9.73809523809524e-06, "loss": 38.287, "step": 8210 }, { "epoch": 195.50149253731342, "grad_norm": 14.050993919372559, "learning_rate": 9.736904761904763e-06, "loss": 37.2698, "step": 8211 }, { "epoch": 195.52537313432836, "grad_norm": 16.587993621826172, "learning_rate": 9.735714285714287e-06, "loss": 38.2786, "step": 8212 }, { "epoch": 195.54925373134327, "grad_norm": 15.469551086425781, "learning_rate": 9.73452380952381e-06, "loss": 37.2241, "step": 8213 }, { "epoch": 195.5731343283582, "grad_norm": 15.310493469238281, "learning_rate": 9.733333333333334e-06, "loss": 37.7574, "step": 8214 }, { "epoch": 195.59701492537314, "grad_norm": 12.98195743560791, "learning_rate": 9.732142857142858e-06, "loss": 37.173, "step": 8215 }, { "epoch": 195.62089552238805, "grad_norm": 18.167980194091797, "learning_rate": 9.730952380952383e-06, "loss": 37.4605, "step": 8216 }, { "epoch": 195.644776119403, "grad_norm": 15.992667198181152, "learning_rate": 9.729761904761905e-06, "loss": 36.284, "step": 8217 }, { "epoch": 195.6686567164179, "grad_norm": 11.325750350952148, "learning_rate": 9.72857142857143e-06, "loss": 37.5665, "step": 8218 }, { "epoch": 195.69253731343284, "grad_norm": 17.86125373840332, "learning_rate": 9.727380952380954e-06, "loss": 38.5734, "step": 8219 }, { "epoch": 195.71641791044777, "grad_norm": 15.634565353393555, "learning_rate": 9.726190476190477e-06, "loss": 37.0224, "step": 8220 }, { "epoch": 195.74029850746268, "grad_norm": 15.09487247467041, "learning_rate": 9.725000000000001e-06, "loss": 37.3904, "step": 8221 }, { "epoch": 195.76417910447762, "grad_norm": 18.640703201293945, "learning_rate": 9.723809523809525e-06, "loss": 38.1708, "step": 8222 }, { "epoch": 195.78805970149253, "grad_norm": 20.45812225341797, "learning_rate": 9.722619047619048e-06, "loss": 38.8948, "step": 8223 }, { "epoch": 195.81194029850747, "grad_norm": 15.778278350830078, "learning_rate": 9.721428571428573e-06, "loss": 38.3331, "step": 8224 }, { "epoch": 195.83582089552237, "grad_norm": 19.648868560791016, "learning_rate": 9.720238095238095e-06, "loss": 36.9344, "step": 8225 }, { "epoch": 195.8597014925373, "grad_norm": 18.820405960083008, "learning_rate": 9.71904761904762e-06, "loss": 37.0866, "step": 8226 }, { "epoch": 195.88358208955225, "grad_norm": 16.977508544921875, "learning_rate": 9.717857142857143e-06, "loss": 39.2188, "step": 8227 }, { "epoch": 195.90746268656716, "grad_norm": 18.5307559967041, "learning_rate": 9.716666666666668e-06, "loss": 38.635, "step": 8228 }, { "epoch": 195.9313432835821, "grad_norm": 16.042844772338867, "learning_rate": 9.715476190476192e-06, "loss": 38.3158, "step": 8229 }, { "epoch": 195.955223880597, "grad_norm": 18.010839462280273, "learning_rate": 9.714285714285715e-06, "loss": 38.3673, "step": 8230 }, { "epoch": 195.97910447761194, "grad_norm": 19.28598976135254, "learning_rate": 9.713095238095239e-06, "loss": 37.6234, "step": 8231 }, { "epoch": 196.0, "grad_norm": 12.07419490814209, "learning_rate": 9.711904761904762e-06, "loss": 33.0793, "step": 8232 }, { "epoch": 196.02388059701494, "grad_norm": 18.44976234436035, "learning_rate": 9.710714285714286e-06, "loss": 37.1636, "step": 8233 }, { "epoch": 196.04776119402985, "grad_norm": 15.406002044677734, "learning_rate": 9.70952380952381e-06, "loss": 36.0832, "step": 8234 }, { "epoch": 196.07164179104478, "grad_norm": 16.38665008544922, "learning_rate": 9.708333333333333e-06, "loss": 37.709, "step": 8235 }, { "epoch": 196.0955223880597, "grad_norm": 16.234838485717773, "learning_rate": 9.707142857142859e-06, "loss": 37.3668, "step": 8236 }, { "epoch": 196.11940298507463, "grad_norm": 15.862046241760254, "learning_rate": 9.705952380952382e-06, "loss": 37.2736, "step": 8237 }, { "epoch": 196.14328358208957, "grad_norm": 14.635769844055176, "learning_rate": 9.704761904761906e-06, "loss": 37.643, "step": 8238 }, { "epoch": 196.16716417910447, "grad_norm": 20.509693145751953, "learning_rate": 9.70357142857143e-06, "loss": 37.8172, "step": 8239 }, { "epoch": 196.1910447761194, "grad_norm": 17.599340438842773, "learning_rate": 9.702380952380953e-06, "loss": 37.2197, "step": 8240 }, { "epoch": 196.21492537313432, "grad_norm": 22.59776496887207, "learning_rate": 9.701190476190477e-06, "loss": 38.7183, "step": 8241 }, { "epoch": 196.23880597014926, "grad_norm": 16.052112579345703, "learning_rate": 9.7e-06, "loss": 39.4656, "step": 8242 }, { "epoch": 196.26268656716417, "grad_norm": 15.938725471496582, "learning_rate": 9.698809523809526e-06, "loss": 36.0209, "step": 8243 }, { "epoch": 196.2865671641791, "grad_norm": 18.32191276550293, "learning_rate": 9.697619047619048e-06, "loss": 37.7406, "step": 8244 }, { "epoch": 196.31044776119404, "grad_norm": 17.4626407623291, "learning_rate": 9.696428571428573e-06, "loss": 37.5677, "step": 8245 }, { "epoch": 196.33432835820895, "grad_norm": 16.99214744567871, "learning_rate": 9.695238095238096e-06, "loss": 38.1292, "step": 8246 }, { "epoch": 196.3582089552239, "grad_norm": 20.447288513183594, "learning_rate": 9.69404761904762e-06, "loss": 36.6892, "step": 8247 }, { "epoch": 196.3820895522388, "grad_norm": 16.08639907836914, "learning_rate": 9.692857142857144e-06, "loss": 38.0742, "step": 8248 }, { "epoch": 196.40597014925373, "grad_norm": 20.349925994873047, "learning_rate": 9.691666666666667e-06, "loss": 36.1244, "step": 8249 }, { "epoch": 196.42985074626867, "grad_norm": 18.42910385131836, "learning_rate": 9.690476190476191e-06, "loss": 38.6168, "step": 8250 }, { "epoch": 196.45373134328358, "grad_norm": 19.19205665588379, "learning_rate": 9.689285714285716e-06, "loss": 38.4484, "step": 8251 }, { "epoch": 196.47761194029852, "grad_norm": 15.112396240234375, "learning_rate": 9.688095238095238e-06, "loss": 38.2654, "step": 8252 }, { "epoch": 196.50149253731342, "grad_norm": 22.324058532714844, "learning_rate": 9.686904761904764e-06, "loss": 39.2476, "step": 8253 }, { "epoch": 196.52537313432836, "grad_norm": 19.882596969604492, "learning_rate": 9.685714285714285e-06, "loss": 37.9678, "step": 8254 }, { "epoch": 196.54925373134327, "grad_norm": 20.174253463745117, "learning_rate": 9.68452380952381e-06, "loss": 38.0835, "step": 8255 }, { "epoch": 196.5731343283582, "grad_norm": 15.697175979614258, "learning_rate": 9.683333333333334e-06, "loss": 37.7158, "step": 8256 }, { "epoch": 196.59701492537314, "grad_norm": 17.792743682861328, "learning_rate": 9.682142857142858e-06, "loss": 38.0134, "step": 8257 }, { "epoch": 196.62089552238805, "grad_norm": 19.55414390563965, "learning_rate": 9.680952380952382e-06, "loss": 38.0596, "step": 8258 }, { "epoch": 196.644776119403, "grad_norm": 16.656089782714844, "learning_rate": 9.679761904761905e-06, "loss": 37.9196, "step": 8259 }, { "epoch": 196.6686567164179, "grad_norm": 15.824597358703613, "learning_rate": 9.678571428571429e-06, "loss": 37.6505, "step": 8260 }, { "epoch": 196.69253731343284, "grad_norm": 16.158740997314453, "learning_rate": 9.677380952380954e-06, "loss": 37.6144, "step": 8261 }, { "epoch": 196.71641791044777, "grad_norm": 15.026473045349121, "learning_rate": 9.676190476190476e-06, "loss": 38.89, "step": 8262 }, { "epoch": 196.74029850746268, "grad_norm": 17.907974243164062, "learning_rate": 9.675000000000001e-06, "loss": 37.778, "step": 8263 }, { "epoch": 196.76417910447762, "grad_norm": 18.452228546142578, "learning_rate": 9.673809523809525e-06, "loss": 36.5277, "step": 8264 }, { "epoch": 196.78805970149253, "grad_norm": 13.613883972167969, "learning_rate": 9.672619047619049e-06, "loss": 37.7714, "step": 8265 }, { "epoch": 196.81194029850747, "grad_norm": 15.803605079650879, "learning_rate": 9.671428571428572e-06, "loss": 37.4731, "step": 8266 }, { "epoch": 196.83582089552237, "grad_norm": 15.423070907592773, "learning_rate": 9.670238095238096e-06, "loss": 38.3607, "step": 8267 }, { "epoch": 196.8597014925373, "grad_norm": 14.29737663269043, "learning_rate": 9.66904761904762e-06, "loss": 37.8142, "step": 8268 }, { "epoch": 196.88358208955225, "grad_norm": 12.915861129760742, "learning_rate": 9.667857142857143e-06, "loss": 38.1578, "step": 8269 }, { "epoch": 196.90746268656716, "grad_norm": 17.85457420349121, "learning_rate": 9.666666666666667e-06, "loss": 37.3879, "step": 8270 }, { "epoch": 196.9313432835821, "grad_norm": 17.17386817932129, "learning_rate": 9.665476190476192e-06, "loss": 38.6943, "step": 8271 }, { "epoch": 196.955223880597, "grad_norm": 13.46798038482666, "learning_rate": 9.664285714285716e-06, "loss": 36.7541, "step": 8272 }, { "epoch": 196.97910447761194, "grad_norm": 18.20702362060547, "learning_rate": 9.66309523809524e-06, "loss": 37.5384, "step": 8273 }, { "epoch": 197.0, "grad_norm": 12.276162147521973, "learning_rate": 9.661904761904763e-06, "loss": 32.1864, "step": 8274 }, { "epoch": 197.02388059701494, "grad_norm": 24.191837310791016, "learning_rate": 9.660714285714287e-06, "loss": 37.9378, "step": 8275 }, { "epoch": 197.04776119402985, "grad_norm": 16.841337203979492, "learning_rate": 9.65952380952381e-06, "loss": 37.5576, "step": 8276 }, { "epoch": 197.07164179104478, "grad_norm": 18.5413761138916, "learning_rate": 9.658333333333334e-06, "loss": 36.504, "step": 8277 }, { "epoch": 197.0955223880597, "grad_norm": 22.87081527709961, "learning_rate": 9.657142857142859e-06, "loss": 38.4459, "step": 8278 }, { "epoch": 197.11940298507463, "grad_norm": 16.935012817382812, "learning_rate": 9.655952380952381e-06, "loss": 36.5404, "step": 8279 }, { "epoch": 197.14328358208957, "grad_norm": 15.1573486328125, "learning_rate": 9.654761904761906e-06, "loss": 38.72, "step": 8280 }, { "epoch": 197.16716417910447, "grad_norm": 14.638708114624023, "learning_rate": 9.653571428571428e-06, "loss": 37.1901, "step": 8281 }, { "epoch": 197.1910447761194, "grad_norm": 16.643617630004883, "learning_rate": 9.652380952380954e-06, "loss": 36.4447, "step": 8282 }, { "epoch": 197.21492537313432, "grad_norm": 17.821044921875, "learning_rate": 9.651190476190477e-06, "loss": 36.5, "step": 8283 }, { "epoch": 197.23880597014926, "grad_norm": 15.265460014343262, "learning_rate": 9.65e-06, "loss": 37.3383, "step": 8284 }, { "epoch": 197.26268656716417, "grad_norm": 17.732948303222656, "learning_rate": 9.648809523809524e-06, "loss": 36.9182, "step": 8285 }, { "epoch": 197.2865671641791, "grad_norm": 13.246759414672852, "learning_rate": 9.647619047619048e-06, "loss": 38.1302, "step": 8286 }, { "epoch": 197.31044776119404, "grad_norm": 16.793123245239258, "learning_rate": 9.646428571428572e-06, "loss": 38.0947, "step": 8287 }, { "epoch": 197.33432835820895, "grad_norm": 15.542506217956543, "learning_rate": 9.645238095238097e-06, "loss": 36.9537, "step": 8288 }, { "epoch": 197.3582089552239, "grad_norm": 20.053983688354492, "learning_rate": 9.644047619047619e-06, "loss": 38.043, "step": 8289 }, { "epoch": 197.3820895522388, "grad_norm": 21.54052734375, "learning_rate": 9.642857142857144e-06, "loss": 37.6954, "step": 8290 }, { "epoch": 197.40597014925373, "grad_norm": 14.850491523742676, "learning_rate": 9.641666666666666e-06, "loss": 37.2277, "step": 8291 }, { "epoch": 197.42985074626867, "grad_norm": 18.214035034179688, "learning_rate": 9.640476190476191e-06, "loss": 37.2554, "step": 8292 }, { "epoch": 197.45373134328358, "grad_norm": 14.643274307250977, "learning_rate": 9.639285714285715e-06, "loss": 38.0327, "step": 8293 }, { "epoch": 197.47761194029852, "grad_norm": 16.960464477539062, "learning_rate": 9.638095238095239e-06, "loss": 38.2719, "step": 8294 }, { "epoch": 197.50149253731342, "grad_norm": 16.77781105041504, "learning_rate": 9.636904761904762e-06, "loss": 37.8132, "step": 8295 }, { "epoch": 197.52537313432836, "grad_norm": 16.169910430908203, "learning_rate": 9.635714285714286e-06, "loss": 36.9185, "step": 8296 }, { "epoch": 197.54925373134327, "grad_norm": 16.040151596069336, "learning_rate": 9.63452380952381e-06, "loss": 37.9674, "step": 8297 }, { "epoch": 197.5731343283582, "grad_norm": 18.088455200195312, "learning_rate": 9.633333333333335e-06, "loss": 38.4402, "step": 8298 }, { "epoch": 197.59701492537314, "grad_norm": 17.952877044677734, "learning_rate": 9.632142857142858e-06, "loss": 38.6546, "step": 8299 }, { "epoch": 197.62089552238805, "grad_norm": NaN, "learning_rate": 9.630952380952382e-06, "loss": 60.4204, "step": 8300 }, { "epoch": 197.644776119403, "grad_norm": 20.842395782470703, "learning_rate": 9.630952380952382e-06, "loss": 38.1086, "step": 8301 }, { "epoch": 197.6686567164179, "grad_norm": 16.893850326538086, "learning_rate": 9.629761904761906e-06, "loss": 37.6307, "step": 8302 }, { "epoch": 197.69253731343284, "grad_norm": 21.6165714263916, "learning_rate": 9.62857142857143e-06, "loss": 38.3061, "step": 8303 }, { "epoch": 197.71641791044777, "grad_norm": 22.543777465820312, "learning_rate": 9.627380952380953e-06, "loss": 37.4894, "step": 8304 }, { "epoch": 197.74029850746268, "grad_norm": 15.899398803710938, "learning_rate": 9.626190476190477e-06, "loss": 39.1086, "step": 8305 }, { "epoch": 197.76417910447762, "grad_norm": 33.675968170166016, "learning_rate": 9.625e-06, "loss": 38.5525, "step": 8306 }, { "epoch": 197.78805970149253, "grad_norm": 23.818912506103516, "learning_rate": 9.623809523809524e-06, "loss": 37.7922, "step": 8307 }, { "epoch": 197.81194029850747, "grad_norm": 31.32359504699707, "learning_rate": 9.622619047619049e-06, "loss": 37.6434, "step": 8308 }, { "epoch": 197.83582089552237, "grad_norm": 21.7730712890625, "learning_rate": 9.621428571428573e-06, "loss": 38.4835, "step": 8309 }, { "epoch": 197.8597014925373, "grad_norm": 32.48667907714844, "learning_rate": 9.620238095238096e-06, "loss": 38.0251, "step": 8310 }, { "epoch": 197.88358208955225, "grad_norm": 16.73311424255371, "learning_rate": 9.61904761904762e-06, "loss": 38.1521, "step": 8311 }, { "epoch": 197.90746268656716, "grad_norm": 36.096553802490234, "learning_rate": 9.617857142857144e-06, "loss": 36.5312, "step": 8312 }, { "epoch": 197.9313432835821, "grad_norm": 24.470571517944336, "learning_rate": 9.616666666666667e-06, "loss": 36.2796, "step": 8313 }, { "epoch": 197.955223880597, "grad_norm": 37.85520553588867, "learning_rate": 9.615476190476193e-06, "loss": 37.2368, "step": 8314 }, { "epoch": 197.97910447761194, "grad_norm": 35.127803802490234, "learning_rate": 9.614285714285714e-06, "loss": 38.4903, "step": 8315 }, { "epoch": 198.0, "grad_norm": 26.169633865356445, "learning_rate": 9.61309523809524e-06, "loss": 32.8102, "step": 8316 }, { "epoch": 198.02388059701494, "grad_norm": 27.632892608642578, "learning_rate": 9.611904761904762e-06, "loss": 37.6685, "step": 8317 }, { "epoch": 198.04776119402985, "grad_norm": 26.571184158325195, "learning_rate": 9.610714285714287e-06, "loss": 37.188, "step": 8318 }, { "epoch": 198.07164179104478, "grad_norm": 21.513154983520508, "learning_rate": 9.60952380952381e-06, "loss": 38.8865, "step": 8319 }, { "epoch": 198.0955223880597, "grad_norm": 32.81885528564453, "learning_rate": 9.608333333333334e-06, "loss": 37.3792, "step": 8320 }, { "epoch": 198.11940298507463, "grad_norm": 27.440208435058594, "learning_rate": 9.607142857142858e-06, "loss": 37.5087, "step": 8321 }, { "epoch": 198.14328358208957, "grad_norm": 33.64108657836914, "learning_rate": 9.605952380952381e-06, "loss": 39.1434, "step": 8322 }, { "epoch": 198.16716417910447, "grad_norm": 32.416358947753906, "learning_rate": 9.604761904761905e-06, "loss": 38.1156, "step": 8323 }, { "epoch": 198.1910447761194, "grad_norm": 29.13149070739746, "learning_rate": 9.60357142857143e-06, "loss": 36.4538, "step": 8324 }, { "epoch": 198.21492537313432, "grad_norm": 29.666540145874023, "learning_rate": 9.602380952380952e-06, "loss": 38.0201, "step": 8325 }, { "epoch": 198.23880597014926, "grad_norm": 29.291536331176758, "learning_rate": 9.601190476190478e-06, "loss": 36.4241, "step": 8326 }, { "epoch": 198.26268656716417, "grad_norm": 22.8216609954834, "learning_rate": 9.600000000000001e-06, "loss": 37.9777, "step": 8327 }, { "epoch": 198.2865671641791, "grad_norm": 34.89872360229492, "learning_rate": 9.598809523809525e-06, "loss": 38.2485, "step": 8328 }, { "epoch": 198.31044776119404, "grad_norm": 30.85089874267578, "learning_rate": 9.597619047619048e-06, "loss": 37.071, "step": 8329 }, { "epoch": 198.33432835820895, "grad_norm": 30.49010467529297, "learning_rate": 9.596428571428572e-06, "loss": 37.4761, "step": 8330 }, { "epoch": 198.3582089552239, "grad_norm": 28.778947830200195, "learning_rate": 9.595238095238096e-06, "loss": 36.5581, "step": 8331 }, { "epoch": 198.3820895522388, "grad_norm": 26.166597366333008, "learning_rate": 9.59404761904762e-06, "loss": 37.0474, "step": 8332 }, { "epoch": 198.40597014925373, "grad_norm": 22.517654418945312, "learning_rate": 9.592857142857143e-06, "loss": 35.7226, "step": 8333 }, { "epoch": 198.42985074626867, "grad_norm": 33.42197036743164, "learning_rate": 9.591666666666667e-06, "loss": 38.6154, "step": 8334 }, { "epoch": 198.45373134328358, "grad_norm": 25.83587074279785, "learning_rate": 9.590476190476192e-06, "loss": 37.2725, "step": 8335 }, { "epoch": 198.47761194029852, "grad_norm": 34.74106979370117, "learning_rate": 9.589285714285716e-06, "loss": 38.0993, "step": 8336 }, { "epoch": 198.50149253731342, "grad_norm": 33.30450439453125, "learning_rate": 9.588095238095239e-06, "loss": 37.8493, "step": 8337 }, { "epoch": 198.52537313432836, "grad_norm": 26.693960189819336, "learning_rate": 9.586904761904763e-06, "loss": 38.3353, "step": 8338 }, { "epoch": 198.54925373134327, "grad_norm": 26.671049118041992, "learning_rate": 9.585714285714286e-06, "loss": 37.1855, "step": 8339 }, { "epoch": 198.5731343283582, "grad_norm": 29.15550994873047, "learning_rate": 9.58452380952381e-06, "loss": 38.1117, "step": 8340 }, { "epoch": 198.59701492537314, "grad_norm": 25.527177810668945, "learning_rate": 9.583333333333335e-06, "loss": 37.4152, "step": 8341 }, { "epoch": 198.62089552238805, "grad_norm": 30.142030715942383, "learning_rate": 9.582142857142857e-06, "loss": 38.7658, "step": 8342 }, { "epoch": 198.644776119403, "grad_norm": 26.978553771972656, "learning_rate": 9.580952380952383e-06, "loss": 36.8429, "step": 8343 }, { "epoch": 198.6686567164179, "grad_norm": 29.713537216186523, "learning_rate": 9.579761904761904e-06, "loss": 36.5465, "step": 8344 }, { "epoch": 198.69253731343284, "grad_norm": 27.22649383544922, "learning_rate": 9.57857142857143e-06, "loss": 38.3485, "step": 8345 }, { "epoch": 198.71641791044777, "grad_norm": 29.604351043701172, "learning_rate": 9.577380952380953e-06, "loss": 37.0354, "step": 8346 }, { "epoch": 198.74029850746268, "grad_norm": 26.05364227294922, "learning_rate": 9.576190476190477e-06, "loss": 38.0035, "step": 8347 }, { "epoch": 198.76417910447762, "grad_norm": 30.7071533203125, "learning_rate": 9.575e-06, "loss": 37.6573, "step": 8348 }, { "epoch": 198.78805970149253, "grad_norm": 27.888521194458008, "learning_rate": 9.573809523809524e-06, "loss": 37.6404, "step": 8349 }, { "epoch": 198.81194029850747, "grad_norm": 28.2436466217041, "learning_rate": 9.572619047619048e-06, "loss": 38.7478, "step": 8350 }, { "epoch": 198.83582089552237, "grad_norm": 27.264436721801758, "learning_rate": 9.571428571428573e-06, "loss": 36.9871, "step": 8351 }, { "epoch": 198.8597014925373, "grad_norm": 27.143701553344727, "learning_rate": 9.570238095238095e-06, "loss": 37.7301, "step": 8352 }, { "epoch": 198.88358208955225, "grad_norm": 25.06464385986328, "learning_rate": 9.56904761904762e-06, "loss": 38.1237, "step": 8353 }, { "epoch": 198.90746268656716, "grad_norm": 30.2600040435791, "learning_rate": 9.567857142857142e-06, "loss": 37.5021, "step": 8354 }, { "epoch": 198.9313432835821, "grad_norm": 28.089567184448242, "learning_rate": 9.566666666666668e-06, "loss": 36.0255, "step": 8355 }, { "epoch": 198.955223880597, "grad_norm": 28.468385696411133, "learning_rate": 9.565476190476191e-06, "loss": 37.7092, "step": 8356 }, { "epoch": 198.97910447761194, "grad_norm": 24.718828201293945, "learning_rate": 9.564285714285715e-06, "loss": 38.1477, "step": 8357 }, { "epoch": 199.0, "grad_norm": 25.489274978637695, "learning_rate": 9.563095238095239e-06, "loss": 33.5207, "step": 8358 }, { "epoch": 199.02388059701494, "grad_norm": 23.228055953979492, "learning_rate": 9.561904761904762e-06, "loss": 36.6224, "step": 8359 }, { "epoch": 199.04776119402985, "grad_norm": 29.2338924407959, "learning_rate": 9.560714285714286e-06, "loss": 37.4582, "step": 8360 }, { "epoch": 199.07164179104478, "grad_norm": 25.881567001342773, "learning_rate": 9.559523809523811e-06, "loss": 37.7522, "step": 8361 }, { "epoch": 199.0955223880597, "grad_norm": 32.1788444519043, "learning_rate": 9.558333333333335e-06, "loss": 38.0786, "step": 8362 }, { "epoch": 199.11940298507463, "grad_norm": 29.03343391418457, "learning_rate": 9.557142857142858e-06, "loss": 37.7836, "step": 8363 }, { "epoch": 199.14328358208957, "grad_norm": 26.068822860717773, "learning_rate": 9.555952380952382e-06, "loss": 37.4076, "step": 8364 }, { "epoch": 199.16716417910447, "grad_norm": 25.541522979736328, "learning_rate": 9.554761904761906e-06, "loss": 38.1175, "step": 8365 }, { "epoch": 199.1910447761194, "grad_norm": 28.156938552856445, "learning_rate": 9.55357142857143e-06, "loss": 38.3294, "step": 8366 }, { "epoch": 199.21492537313432, "grad_norm": 24.11278533935547, "learning_rate": 9.552380952380953e-06, "loss": 37.0253, "step": 8367 }, { "epoch": 199.23880597014926, "grad_norm": 31.672948837280273, "learning_rate": 9.551190476190476e-06, "loss": 37.647, "step": 8368 }, { "epoch": 199.26268656716417, "grad_norm": 31.267433166503906, "learning_rate": 9.55e-06, "loss": 38.0205, "step": 8369 }, { "epoch": 199.2865671641791, "grad_norm": 24.666099548339844, "learning_rate": 9.548809523809525e-06, "loss": 37.0519, "step": 8370 }, { "epoch": 199.31044776119404, "grad_norm": 24.412540435791016, "learning_rate": 9.547619047619049e-06, "loss": 36.7093, "step": 8371 }, { "epoch": 199.33432835820895, "grad_norm": 25.304214477539062, "learning_rate": 9.546428571428573e-06, "loss": 37.3666, "step": 8372 }, { "epoch": 199.3582089552239, "grad_norm": 18.673847198486328, "learning_rate": 9.545238095238096e-06, "loss": 36.7329, "step": 8373 }, { "epoch": 199.3820895522388, "grad_norm": 31.444181442260742, "learning_rate": 9.54404761904762e-06, "loss": 37.595, "step": 8374 }, { "epoch": 199.40597014925373, "grad_norm": 28.37276268005371, "learning_rate": 9.542857142857143e-06, "loss": 36.5752, "step": 8375 }, { "epoch": 199.42985074626867, "grad_norm": 31.71356773376465, "learning_rate": 9.541666666666669e-06, "loss": 37.6783, "step": 8376 }, { "epoch": 199.45373134328358, "grad_norm": 29.976390838623047, "learning_rate": 9.54047619047619e-06, "loss": 36.7449, "step": 8377 }, { "epoch": 199.47761194029852, "grad_norm": 24.911964416503906, "learning_rate": 9.539285714285716e-06, "loss": 37.8768, "step": 8378 }, { "epoch": 199.50149253731342, "grad_norm": 18.54723358154297, "learning_rate": 9.538095238095238e-06, "loss": 36.9114, "step": 8379 }, { "epoch": 199.52537313432836, "grad_norm": 31.114011764526367, "learning_rate": 9.536904761904763e-06, "loss": 38.7622, "step": 8380 }, { "epoch": 199.54925373134327, "grad_norm": 20.96200942993164, "learning_rate": 9.535714285714287e-06, "loss": 37.7349, "step": 8381 }, { "epoch": 199.5731343283582, "grad_norm": 30.523534774780273, "learning_rate": 9.53452380952381e-06, "loss": 37.2072, "step": 8382 }, { "epoch": 199.59701492537314, "grad_norm": 26.291833877563477, "learning_rate": 9.533333333333334e-06, "loss": 37.9218, "step": 8383 }, { "epoch": 199.62089552238805, "grad_norm": 30.261066436767578, "learning_rate": 9.532142857142858e-06, "loss": 36.9925, "step": 8384 }, { "epoch": 199.644776119403, "grad_norm": 30.21729850769043, "learning_rate": 9.530952380952381e-06, "loss": 38.014, "step": 8385 }, { "epoch": 199.6686567164179, "grad_norm": 26.573165893554688, "learning_rate": 9.529761904761905e-06, "loss": 38.287, "step": 8386 }, { "epoch": 199.69253731343284, "grad_norm": 24.21065330505371, "learning_rate": 9.528571428571429e-06, "loss": 38.5124, "step": 8387 }, { "epoch": 199.71641791044777, "grad_norm": 26.77363395690918, "learning_rate": 9.527380952380954e-06, "loss": 38.4566, "step": 8388 }, { "epoch": 199.74029850746268, "grad_norm": 23.666229248046875, "learning_rate": 9.526190476190476e-06, "loss": 38.0738, "step": 8389 }, { "epoch": 199.76417910447762, "grad_norm": 26.105844497680664, "learning_rate": 9.525000000000001e-06, "loss": 35.2158, "step": 8390 }, { "epoch": 199.78805970149253, "grad_norm": 23.236692428588867, "learning_rate": 9.523809523809525e-06, "loss": 37.9765, "step": 8391 }, { "epoch": 199.81194029850747, "grad_norm": 28.990320205688477, "learning_rate": 9.522619047619048e-06, "loss": 39.4678, "step": 8392 }, { "epoch": 199.83582089552237, "grad_norm": 26.152000427246094, "learning_rate": 9.521428571428572e-06, "loss": 38.2819, "step": 8393 }, { "epoch": 199.8597014925373, "grad_norm": 27.403459548950195, "learning_rate": 9.520238095238096e-06, "loss": 37.7756, "step": 8394 }, { "epoch": 199.88358208955225, "grad_norm": 25.223661422729492, "learning_rate": 9.51904761904762e-06, "loss": 38.0363, "step": 8395 }, { "epoch": 199.90746268656716, "grad_norm": 28.14125633239746, "learning_rate": 9.517857142857143e-06, "loss": 37.2465, "step": 8396 }, { "epoch": 199.9313432835821, "grad_norm": NaN, "learning_rate": 9.516666666666668e-06, "loss": 60.1323, "step": 8397 }, { "epoch": 199.955223880597, "grad_norm": 25.910566329956055, "learning_rate": 9.516666666666668e-06, "loss": 37.1015, "step": 8398 }, { "epoch": 199.97910447761194, "grad_norm": 30.221599578857422, "learning_rate": 9.515476190476192e-06, "loss": 37.6695, "step": 8399 }, { "epoch": 200.0, "grad_norm": 22.35158348083496, "learning_rate": 9.514285714285715e-06, "loss": 32.3938, "step": 8400 }, { "epoch": 200.0, "step": 8400, "total_flos": 4.129605818803725e+17, "train_loss": 1.9183280272710892, "train_runtime": 12838.6174, "train_samples_per_second": 83.373, "train_steps_per_second": 0.654 }, { "epoch": 200.02388059701494, "grad_norm": 23.81245994567871, "learning_rate": 1e-05, "loss": 37.5421, "step": 8401 }, { "epoch": 200.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998866213151928e-06, "loss": 45.3604, "step": 8402 }, { "epoch": 200.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998866213151928e-06, "loss": 46.7324, "step": 8403 }, { "epoch": 200.0955223880597, "grad_norm": 504.5543212890625, "learning_rate": 9.998866213151928e-06, "loss": 46.4227, "step": 8404 }, { "epoch": 200.11940298507463, "grad_norm": 304.8294372558594, "learning_rate": 9.997732426303856e-06, "loss": 42.1724, "step": 8405 }, { "epoch": 200.14328358208957, "grad_norm": 102.3563232421875, "learning_rate": 9.996598639455783e-06, "loss": 39.149, "step": 8406 }, { "epoch": 200.16716417910447, "grad_norm": 111.65090942382812, "learning_rate": 9.99546485260771e-06, "loss": 39.3382, "step": 8407 }, { "epoch": 200.1910447761194, "grad_norm": 107.08712005615234, "learning_rate": 9.994331065759638e-06, "loss": 39.5328, "step": 8408 }, { "epoch": 200.21492537313432, "grad_norm": 47.29191589355469, "learning_rate": 9.993197278911566e-06, "loss": 38.9979, "step": 8409 }, { "epoch": 200.23880597014926, "grad_norm": 46.862098693847656, "learning_rate": 9.992063492063493e-06, "loss": 37.9798, "step": 8410 }, { "epoch": 200.26268656716417, "grad_norm": 37.27891159057617, "learning_rate": 9.99092970521542e-06, "loss": 38.5795, "step": 8411 }, { "epoch": 200.2865671641791, "grad_norm": 28.95697021484375, "learning_rate": 9.989795918367348e-06, "loss": 39.2989, "step": 8412 }, { "epoch": 200.31044776119404, "grad_norm": 31.485437393188477, "learning_rate": 9.988662131519276e-06, "loss": 37.808, "step": 8413 }, { "epoch": 200.33432835820895, "grad_norm": 25.093996047973633, "learning_rate": 9.987528344671202e-06, "loss": 37.9118, "step": 8414 }, { "epoch": 200.3582089552239, "grad_norm": 18.148664474487305, "learning_rate": 9.98639455782313e-06, "loss": 37.8239, "step": 8415 }, { "epoch": 200.3820895522388, "grad_norm": 24.032161712646484, "learning_rate": 9.985260770975057e-06, "loss": 37.5528, "step": 8416 }, { "epoch": 200.40597014925373, "grad_norm": 19.333480834960938, "learning_rate": 9.984126984126986e-06, "loss": 37.2521, "step": 8417 }, { "epoch": 200.42985074626867, "grad_norm": 26.17083740234375, "learning_rate": 9.982993197278913e-06, "loss": 37.8435, "step": 8418 }, { "epoch": 200.45373134328358, "grad_norm": 21.792404174804688, "learning_rate": 9.981859410430839e-06, "loss": 38.5489, "step": 8419 }, { "epoch": 200.47761194029852, "grad_norm": 18.229753494262695, "learning_rate": 9.980725623582768e-06, "loss": 37.0981, "step": 8420 }, { "epoch": 200.50149253731342, "grad_norm": 23.669044494628906, "learning_rate": 9.979591836734694e-06, "loss": 37.6026, "step": 8421 }, { "epoch": 200.52537313432836, "grad_norm": 24.45855712890625, "learning_rate": 9.978458049886622e-06, "loss": 37.5576, "step": 8422 }, { "epoch": 200.54925373134327, "grad_norm": 17.91193199157715, "learning_rate": 9.977324263038549e-06, "loss": 38.3311, "step": 8423 }, { "epoch": 200.5731343283582, "grad_norm": 19.74493408203125, "learning_rate": 9.976190476190477e-06, "loss": 37.4511, "step": 8424 }, { "epoch": 200.59701492537314, "grad_norm": 15.995309829711914, "learning_rate": 9.975056689342404e-06, "loss": 38.1505, "step": 8425 }, { "epoch": 200.62089552238805, "grad_norm": 21.274045944213867, "learning_rate": 9.973922902494332e-06, "loss": 36.8368, "step": 8426 }, { "epoch": 200.644776119403, "grad_norm": 16.583993911743164, "learning_rate": 9.972789115646259e-06, "loss": 37.2429, "step": 8427 }, { "epoch": 200.6686567164179, "grad_norm": 24.277111053466797, "learning_rate": 9.971655328798187e-06, "loss": 38.7733, "step": 8428 }, { "epoch": 200.69253731343284, "grad_norm": 17.2474308013916, "learning_rate": 9.970521541950114e-06, "loss": 38.1177, "step": 8429 }, { "epoch": 200.71641791044777, "grad_norm": 19.051393508911133, "learning_rate": 9.969387755102042e-06, "loss": 37.7148, "step": 8430 }, { "epoch": 200.74029850746268, "grad_norm": NaN, "learning_rate": 9.968253968253969e-06, "loss": 65.8134, "step": 8431 }, { "epoch": 200.76417910447762, "grad_norm": 22.762027740478516, "learning_rate": 9.968253968253969e-06, "loss": 38.2388, "step": 8432 }, { "epoch": 200.78805970149253, "grad_norm": 15.53449821472168, "learning_rate": 9.967120181405897e-06, "loss": 37.2729, "step": 8433 }, { "epoch": 200.81194029850747, "grad_norm": 23.350296020507812, "learning_rate": 9.965986394557824e-06, "loss": 36.1391, "step": 8434 }, { "epoch": 200.83582089552237, "grad_norm": 20.47188949584961, "learning_rate": 9.964852607709752e-06, "loss": 36.8894, "step": 8435 }, { "epoch": 200.8597014925373, "grad_norm": 20.669742584228516, "learning_rate": 9.963718820861679e-06, "loss": 36.2852, "step": 8436 }, { "epoch": 200.88358208955225, "grad_norm": 28.16716957092285, "learning_rate": 9.962585034013607e-06, "loss": 38.1568, "step": 8437 }, { "epoch": 200.90746268656716, "grad_norm": 20.141733169555664, "learning_rate": 9.961451247165534e-06, "loss": 38.1183, "step": 8438 }, { "epoch": 200.9313432835821, "grad_norm": 31.327373504638672, "learning_rate": 9.960317460317462e-06, "loss": 38.2826, "step": 8439 }, { "epoch": 200.955223880597, "grad_norm": 23.03270721435547, "learning_rate": 9.959183673469387e-06, "loss": 37.7775, "step": 8440 }, { "epoch": 200.97910447761194, "grad_norm": 30.23394775390625, "learning_rate": 9.958049886621317e-06, "loss": 36.9908, "step": 8441 }, { "epoch": 201.0, "grad_norm": 19.29242515563965, "learning_rate": 9.956916099773244e-06, "loss": 32.7172, "step": 8442 }, { "epoch": 201.02388059701494, "grad_norm": 33.536903381347656, "learning_rate": 9.955782312925172e-06, "loss": 36.1264, "step": 8443 }, { "epoch": 201.04776119402985, "grad_norm": 30.620258331298828, "learning_rate": 9.954648526077099e-06, "loss": 37.4299, "step": 8444 }, { "epoch": 201.07164179104478, "grad_norm": 29.916500091552734, "learning_rate": 9.953514739229025e-06, "loss": 38.3549, "step": 8445 }, { "epoch": 201.0955223880597, "grad_norm": 26.54745864868164, "learning_rate": 9.952380952380954e-06, "loss": 38.2229, "step": 8446 }, { "epoch": 201.11940298507463, "grad_norm": 29.324121475219727, "learning_rate": 9.95124716553288e-06, "loss": 37.9866, "step": 8447 }, { "epoch": 201.14328358208957, "grad_norm": 22.872337341308594, "learning_rate": 9.950113378684807e-06, "loss": 37.0148, "step": 8448 }, { "epoch": 201.16716417910447, "grad_norm": 28.70613670349121, "learning_rate": 9.948979591836737e-06, "loss": 36.8655, "step": 8449 }, { "epoch": 201.1910447761194, "grad_norm": 22.34271240234375, "learning_rate": 9.947845804988662e-06, "loss": 36.3357, "step": 8450 }, { "epoch": 201.21492537313432, "grad_norm": 31.665597915649414, "learning_rate": 9.946712018140592e-06, "loss": 37.386, "step": 8451 }, { "epoch": 201.23880597014926, "grad_norm": 23.60234260559082, "learning_rate": 9.945578231292517e-06, "loss": 37.0227, "step": 8452 }, { "epoch": 201.26268656716417, "grad_norm": 31.781818389892578, "learning_rate": 9.944444444444445e-06, "loss": 38.0446, "step": 8453 }, { "epoch": 201.2865671641791, "grad_norm": 24.861406326293945, "learning_rate": 9.943310657596372e-06, "loss": 38.2091, "step": 8454 }, { "epoch": 201.31044776119404, "grad_norm": 28.904706954956055, "learning_rate": 9.9421768707483e-06, "loss": 36.8503, "step": 8455 }, { "epoch": 201.33432835820895, "grad_norm": 24.053878784179688, "learning_rate": 9.941043083900227e-06, "loss": 38.1396, "step": 8456 }, { "epoch": 201.3582089552239, "grad_norm": 29.818387985229492, "learning_rate": 9.939909297052155e-06, "loss": 36.598, "step": 8457 }, { "epoch": 201.3820895522388, "grad_norm": 22.70626449584961, "learning_rate": 9.938775510204082e-06, "loss": 36.83, "step": 8458 }, { "epoch": 201.40597014925373, "grad_norm": 30.062395095825195, "learning_rate": 9.93764172335601e-06, "loss": 37.1404, "step": 8459 }, { "epoch": 201.42985074626867, "grad_norm": 24.94289207458496, "learning_rate": 9.936507936507937e-06, "loss": 37.4896, "step": 8460 }, { "epoch": 201.45373134328358, "grad_norm": 34.83537673950195, "learning_rate": 9.935374149659865e-06, "loss": 37.4165, "step": 8461 }, { "epoch": 201.47761194029852, "grad_norm": 30.582931518554688, "learning_rate": 9.934240362811792e-06, "loss": 36.9354, "step": 8462 }, { "epoch": 201.50149253731342, "grad_norm": 28.539152145385742, "learning_rate": 9.93310657596372e-06, "loss": 37.6099, "step": 8463 }, { "epoch": 201.52537313432836, "grad_norm": 28.249475479125977, "learning_rate": 9.931972789115647e-06, "loss": 36.0266, "step": 8464 }, { "epoch": 201.54925373134327, "grad_norm": 26.3381290435791, "learning_rate": 9.930839002267575e-06, "loss": 37.0545, "step": 8465 }, { "epoch": 201.5731343283582, "grad_norm": 23.9443359375, "learning_rate": 9.929705215419502e-06, "loss": 37.6213, "step": 8466 }, { "epoch": 201.59701492537314, "grad_norm": 32.69965744018555, "learning_rate": 9.92857142857143e-06, "loss": 39.0624, "step": 8467 }, { "epoch": 201.62089552238805, "grad_norm": 26.842876434326172, "learning_rate": 9.927437641723356e-06, "loss": 37.8574, "step": 8468 }, { "epoch": 201.644776119403, "grad_norm": 28.573163986206055, "learning_rate": 9.926303854875285e-06, "loss": 38.1716, "step": 8469 }, { "epoch": 201.6686567164179, "grad_norm": 28.380313873291016, "learning_rate": 9.92517006802721e-06, "loss": 37.498, "step": 8470 }, { "epoch": 201.69253731343284, "grad_norm": 25.541645050048828, "learning_rate": 9.92403628117914e-06, "loss": 37.255, "step": 8471 }, { "epoch": 201.71641791044777, "grad_norm": 25.202747344970703, "learning_rate": 9.922902494331067e-06, "loss": 38.1171, "step": 8472 }, { "epoch": 201.74029850746268, "grad_norm": 32.81104278564453, "learning_rate": 9.921768707482993e-06, "loss": 37.3737, "step": 8473 }, { "epoch": 201.76417910447762, "grad_norm": 26.47867774963379, "learning_rate": 9.920634920634922e-06, "loss": 38.4246, "step": 8474 }, { "epoch": 201.78805970149253, "grad_norm": 30.32525062561035, "learning_rate": 9.919501133786848e-06, "loss": 37.4488, "step": 8475 }, { "epoch": 201.81194029850747, "grad_norm": 26.98731231689453, "learning_rate": 9.918367346938776e-06, "loss": 37.9646, "step": 8476 }, { "epoch": 201.83582089552237, "grad_norm": 26.333789825439453, "learning_rate": 9.917233560090703e-06, "loss": 37.9806, "step": 8477 }, { "epoch": 201.8597014925373, "grad_norm": 21.36071014404297, "learning_rate": 9.91609977324263e-06, "loss": 35.9579, "step": 8478 }, { "epoch": 201.88358208955225, "grad_norm": 25.346942901611328, "learning_rate": 9.91496598639456e-06, "loss": 37.2274, "step": 8479 }, { "epoch": 201.90746268656716, "grad_norm": 22.294967651367188, "learning_rate": 9.913832199546486e-06, "loss": 37.1912, "step": 8480 }, { "epoch": 201.9313432835821, "grad_norm": 28.8373966217041, "learning_rate": 9.912698412698413e-06, "loss": 38.0725, "step": 8481 }, { "epoch": 201.955223880597, "grad_norm": 25.565916061401367, "learning_rate": 9.91156462585034e-06, "loss": 38.5633, "step": 8482 }, { "epoch": 201.97910447761194, "grad_norm": 29.3934268951416, "learning_rate": 9.910430839002268e-06, "loss": 39.6998, "step": 8483 }, { "epoch": 202.0, "grad_norm": 20.555400848388672, "learning_rate": 9.909297052154196e-06, "loss": 32.8394, "step": 8484 }, { "epoch": 202.02388059701494, "grad_norm": 31.105358123779297, "learning_rate": 9.908163265306123e-06, "loss": 38.0253, "step": 8485 }, { "epoch": 202.04776119402985, "grad_norm": 28.788240432739258, "learning_rate": 9.90702947845805e-06, "loss": 38.106, "step": 8486 }, { "epoch": 202.07164179104478, "grad_norm": 30.641401290893555, "learning_rate": 9.905895691609978e-06, "loss": 37.1781, "step": 8487 }, { "epoch": 202.0955223880597, "grad_norm": 29.78472900390625, "learning_rate": 9.904761904761906e-06, "loss": 37.2946, "step": 8488 }, { "epoch": 202.11940298507463, "grad_norm": 24.245201110839844, "learning_rate": 9.903628117913833e-06, "loss": 37.3295, "step": 8489 }, { "epoch": 202.14328358208957, "grad_norm": 27.062395095825195, "learning_rate": 9.90249433106576e-06, "loss": 37.7033, "step": 8490 }, { "epoch": 202.16716417910447, "grad_norm": 29.303415298461914, "learning_rate": 9.901360544217688e-06, "loss": 36.5479, "step": 8491 }, { "epoch": 202.1910447761194, "grad_norm": 23.434593200683594, "learning_rate": 9.900226757369616e-06, "loss": 36.5344, "step": 8492 }, { "epoch": 202.21492537313432, "grad_norm": 27.116130828857422, "learning_rate": 9.899092970521543e-06, "loss": 36.8862, "step": 8493 }, { "epoch": 202.23880597014926, "grad_norm": 23.95987892150879, "learning_rate": 9.89795918367347e-06, "loss": 37.2771, "step": 8494 }, { "epoch": 202.26268656716417, "grad_norm": NaN, "learning_rate": 9.896825396825398e-06, "loss": 64.9095, "step": 8495 }, { "epoch": 202.2865671641791, "grad_norm": 26.921449661254883, "learning_rate": 9.896825396825398e-06, "loss": 36.0847, "step": 8496 }, { "epoch": 202.31044776119404, "grad_norm": 25.219202041625977, "learning_rate": 9.895691609977326e-06, "loss": 36.8732, "step": 8497 }, { "epoch": 202.33432835820895, "grad_norm": 29.823617935180664, "learning_rate": 9.894557823129253e-06, "loss": 38.1664, "step": 8498 }, { "epoch": 202.3582089552239, "grad_norm": 24.486186981201172, "learning_rate": 9.893424036281179e-06, "loss": 38.4029, "step": 8499 }, { "epoch": 202.3820895522388, "grad_norm": 28.144065856933594, "learning_rate": 9.892290249433108e-06, "loss": 37.321, "step": 8500 }, { "epoch": 202.40597014925373, "grad_norm": 22.483613967895508, "learning_rate": 9.891156462585036e-06, "loss": 37.651, "step": 8501 }, { "epoch": 202.42985074626867, "grad_norm": 25.333362579345703, "learning_rate": 9.890022675736961e-06, "loss": 37.4532, "step": 8502 }, { "epoch": 202.45373134328358, "grad_norm": 19.66573143005371, "learning_rate": 9.88888888888889e-06, "loss": 37.0419, "step": 8503 }, { "epoch": 202.47761194029852, "grad_norm": 23.879840850830078, "learning_rate": 9.887755102040816e-06, "loss": 37.5707, "step": 8504 }, { "epoch": 202.50149253731342, "grad_norm": 17.775848388671875, "learning_rate": 9.886621315192746e-06, "loss": 37.9697, "step": 8505 }, { "epoch": 202.52537313432836, "grad_norm": 24.975574493408203, "learning_rate": 9.885487528344671e-06, "loss": 36.9127, "step": 8506 }, { "epoch": 202.54925373134327, "grad_norm": 18.6658935546875, "learning_rate": 9.884353741496599e-06, "loss": 37.4472, "step": 8507 }, { "epoch": 202.5731343283582, "grad_norm": 23.0604248046875, "learning_rate": 9.883219954648526e-06, "loss": 37.623, "step": 8508 }, { "epoch": 202.59701492537314, "grad_norm": 20.533077239990234, "learning_rate": 9.882086167800454e-06, "loss": 38.4347, "step": 8509 }, { "epoch": 202.62089552238805, "grad_norm": 19.784536361694336, "learning_rate": 9.880952380952381e-06, "loss": 37.1416, "step": 8510 }, { "epoch": 202.644776119403, "grad_norm": 21.132734298706055, "learning_rate": 9.879818594104309e-06, "loss": 38.3573, "step": 8511 }, { "epoch": 202.6686567164179, "grad_norm": 16.398435592651367, "learning_rate": 9.878684807256236e-06, "loss": 36.9268, "step": 8512 }, { "epoch": 202.69253731343284, "grad_norm": 29.87149429321289, "learning_rate": 9.877551020408164e-06, "loss": 38.7687, "step": 8513 }, { "epoch": 202.71641791044777, "grad_norm": 21.406925201416016, "learning_rate": 9.876417233560091e-06, "loss": 37.5118, "step": 8514 }, { "epoch": 202.74029850746268, "grad_norm": 33.05040740966797, "learning_rate": 9.875283446712019e-06, "loss": 37.2421, "step": 8515 }, { "epoch": 202.76417910447762, "grad_norm": 28.560482025146484, "learning_rate": 9.874149659863946e-06, "loss": 37.2285, "step": 8516 }, { "epoch": 202.78805970149253, "grad_norm": 32.275150299072266, "learning_rate": 9.873015873015874e-06, "loss": 37.5002, "step": 8517 }, { "epoch": 202.81194029850747, "grad_norm": 26.057050704956055, "learning_rate": 9.871882086167801e-06, "loss": 38.5121, "step": 8518 }, { "epoch": 202.83582089552237, "grad_norm": 26.400562286376953, "learning_rate": 9.870748299319729e-06, "loss": 37.2058, "step": 8519 }, { "epoch": 202.8597014925373, "grad_norm": 19.530229568481445, "learning_rate": 9.869614512471656e-06, "loss": 37.6345, "step": 8520 }, { "epoch": 202.88358208955225, "grad_norm": 28.110204696655273, "learning_rate": 9.868480725623584e-06, "loss": 37.4057, "step": 8521 }, { "epoch": 202.90746268656716, "grad_norm": 24.72870445251465, "learning_rate": 9.867346938775511e-06, "loss": 37.214, "step": 8522 }, { "epoch": 202.9313432835821, "grad_norm": 27.58170509338379, "learning_rate": 9.866213151927439e-06, "loss": 36.9991, "step": 8523 }, { "epoch": 202.955223880597, "grad_norm": 24.87717056274414, "learning_rate": 9.865079365079366e-06, "loss": 38.3773, "step": 8524 }, { "epoch": 202.97910447761194, "grad_norm": 27.23594856262207, "learning_rate": 9.863945578231294e-06, "loss": 37.8239, "step": 8525 }, { "epoch": 203.0, "grad_norm": 21.036359786987305, "learning_rate": 9.862811791383221e-06, "loss": 32.8495, "step": 8526 }, { "epoch": 203.02388059701494, "grad_norm": 25.65850257873535, "learning_rate": 9.861678004535147e-06, "loss": 37.4606, "step": 8527 }, { "epoch": 203.04776119402985, "grad_norm": 21.02652359008789, "learning_rate": 9.860544217687076e-06, "loss": 37.9006, "step": 8528 }, { "epoch": 203.07164179104478, "grad_norm": 27.834203720092773, "learning_rate": 9.859410430839002e-06, "loss": 37.1006, "step": 8529 }, { "epoch": 203.0955223880597, "grad_norm": 22.7817440032959, "learning_rate": 9.858276643990931e-06, "loss": 36.8413, "step": 8530 }, { "epoch": 203.11940298507463, "grad_norm": 32.14323425292969, "learning_rate": 9.857142857142859e-06, "loss": 37.8483, "step": 8531 }, { "epoch": 203.14328358208957, "grad_norm": 26.14910316467285, "learning_rate": 9.856009070294785e-06, "loss": 38.3764, "step": 8532 }, { "epoch": 203.16716417910447, "grad_norm": 27.673919677734375, "learning_rate": 9.854875283446714e-06, "loss": 36.9376, "step": 8533 }, { "epoch": 203.1910447761194, "grad_norm": 24.009227752685547, "learning_rate": 9.85374149659864e-06, "loss": 36.506, "step": 8534 }, { "epoch": 203.21492537313432, "grad_norm": 29.15199089050293, "learning_rate": 9.852607709750567e-06, "loss": 37.311, "step": 8535 }, { "epoch": 203.23880597014926, "grad_norm": 21.224517822265625, "learning_rate": 9.851473922902495e-06, "loss": 36.0035, "step": 8536 }, { "epoch": 203.26268656716417, "grad_norm": 34.10138702392578, "learning_rate": 9.850340136054422e-06, "loss": 38.4266, "step": 8537 }, { "epoch": 203.2865671641791, "grad_norm": 30.168445587158203, "learning_rate": 9.849206349206351e-06, "loss": 37.3233, "step": 8538 }, { "epoch": 203.31044776119404, "grad_norm": 24.34166145324707, "learning_rate": 9.848072562358277e-06, "loss": 37.2346, "step": 8539 }, { "epoch": 203.33432835820895, "grad_norm": 22.984699249267578, "learning_rate": 9.846938775510205e-06, "loss": 37.3328, "step": 8540 }, { "epoch": 203.3582089552239, "grad_norm": 26.927013397216797, "learning_rate": 9.845804988662132e-06, "loss": 37.3275, "step": 8541 }, { "epoch": 203.3820895522388, "grad_norm": 22.42952537536621, "learning_rate": 9.84467120181406e-06, "loss": 38.1146, "step": 8542 }, { "epoch": 203.40597014925373, "grad_norm": 26.905364990234375, "learning_rate": 9.843537414965987e-06, "loss": 37.6205, "step": 8543 }, { "epoch": 203.42985074626867, "grad_norm": 23.009504318237305, "learning_rate": 9.842403628117915e-06, "loss": 36.9357, "step": 8544 }, { "epoch": 203.45373134328358, "grad_norm": 25.13264274597168, "learning_rate": 9.841269841269842e-06, "loss": 37.3651, "step": 8545 }, { "epoch": 203.47761194029852, "grad_norm": 24.366397857666016, "learning_rate": 9.84013605442177e-06, "loss": 37.44, "step": 8546 }, { "epoch": 203.50149253731342, "grad_norm": 24.072046279907227, "learning_rate": 9.839002267573697e-06, "loss": 36.5196, "step": 8547 }, { "epoch": 203.52537313432836, "grad_norm": 20.05785369873047, "learning_rate": 9.837868480725625e-06, "loss": 37.6953, "step": 8548 }, { "epoch": 203.54925373134327, "grad_norm": NaN, "learning_rate": 9.836734693877552e-06, "loss": 31.6875, "step": 8549 }, { "epoch": 203.5731343283582, "grad_norm": 26.91598892211914, "learning_rate": 9.836734693877552e-06, "loss": 37.5256, "step": 8550 }, { "epoch": 203.59701492537314, "grad_norm": 23.437747955322266, "learning_rate": 9.83560090702948e-06, "loss": 38.1517, "step": 8551 }, { "epoch": 203.62089552238805, "grad_norm": 22.155426025390625, "learning_rate": 9.834467120181407e-06, "loss": 38.0603, "step": 8552 }, { "epoch": 203.644776119403, "grad_norm": 21.953975677490234, "learning_rate": 9.833333333333333e-06, "loss": 37.3232, "step": 8553 }, { "epoch": 203.6686567164179, "grad_norm": 22.587839126586914, "learning_rate": 9.832199546485262e-06, "loss": 36.6528, "step": 8554 }, { "epoch": 203.69253731343284, "grad_norm": 20.159603118896484, "learning_rate": 9.83106575963719e-06, "loss": 38.2609, "step": 8555 }, { "epoch": 203.71641791044777, "grad_norm": 21.83957290649414, "learning_rate": 9.829931972789115e-06, "loss": 38.0038, "step": 8556 }, { "epoch": 203.74029850746268, "grad_norm": 20.712881088256836, "learning_rate": 9.828798185941045e-06, "loss": 36.8639, "step": 8557 }, { "epoch": 203.76417910447762, "grad_norm": 24.437532424926758, "learning_rate": 9.82766439909297e-06, "loss": 37.3083, "step": 8558 }, { "epoch": 203.78805970149253, "grad_norm": 18.273344039916992, "learning_rate": 9.8265306122449e-06, "loss": 37.1752, "step": 8559 }, { "epoch": 203.81194029850747, "grad_norm": 22.60394859313965, "learning_rate": 9.825396825396825e-06, "loss": 37.4568, "step": 8560 }, { "epoch": 203.83582089552237, "grad_norm": 18.706016540527344, "learning_rate": 9.824263038548753e-06, "loss": 37.6933, "step": 8561 }, { "epoch": 203.8597014925373, "grad_norm": 23.628393173217773, "learning_rate": 9.823129251700682e-06, "loss": 38.1902, "step": 8562 }, { "epoch": 203.88358208955225, "grad_norm": 22.622568130493164, "learning_rate": 9.821995464852608e-06, "loss": 37.5829, "step": 8563 }, { "epoch": 203.90746268656716, "grad_norm": 23.485595703125, "learning_rate": 9.820861678004537e-06, "loss": 37.6715, "step": 8564 }, { "epoch": 203.9313432835821, "grad_norm": 24.30974006652832, "learning_rate": 9.819727891156463e-06, "loss": 38.7099, "step": 8565 }, { "epoch": 203.955223880597, "grad_norm": 21.94779396057129, "learning_rate": 9.81859410430839e-06, "loss": 37.1328, "step": 8566 }, { "epoch": 203.97910447761194, "grad_norm": 23.196590423583984, "learning_rate": 9.817460317460318e-06, "loss": 36.9389, "step": 8567 }, { "epoch": 204.0, "grad_norm": 17.97230339050293, "learning_rate": 9.816326530612245e-06, "loss": 33.5231, "step": 8568 }, { "epoch": 204.02388059701494, "grad_norm": 18.16822624206543, "learning_rate": 9.815192743764173e-06, "loss": 37.7035, "step": 8569 }, { "epoch": 204.04776119402985, "grad_norm": 19.428035736083984, "learning_rate": 9.8140589569161e-06, "loss": 37.3438, "step": 8570 }, { "epoch": 204.07164179104478, "grad_norm": 16.83907699584961, "learning_rate": 9.812925170068028e-06, "loss": 37.7712, "step": 8571 }, { "epoch": 204.0955223880597, "grad_norm": 17.97744369506836, "learning_rate": 9.811791383219955e-06, "loss": 37.8334, "step": 8572 }, { "epoch": 204.11940298507463, "grad_norm": 17.03276252746582, "learning_rate": 9.810657596371883e-06, "loss": 37.0864, "step": 8573 }, { "epoch": 204.14328358208957, "grad_norm": 16.34898567199707, "learning_rate": 9.80952380952381e-06, "loss": 37.4734, "step": 8574 }, { "epoch": 204.16716417910447, "grad_norm": 16.26249122619629, "learning_rate": 9.808390022675738e-06, "loss": 36.2818, "step": 8575 }, { "epoch": 204.1910447761194, "grad_norm": 18.367773056030273, "learning_rate": 9.807256235827665e-06, "loss": 37.1955, "step": 8576 }, { "epoch": 204.21492537313432, "grad_norm": 14.366243362426758, "learning_rate": 9.806122448979593e-06, "loss": 36.6985, "step": 8577 }, { "epoch": 204.23880597014926, "grad_norm": 18.252262115478516, "learning_rate": 9.80498866213152e-06, "loss": 38.7752, "step": 8578 }, { "epoch": 204.26268656716417, "grad_norm": 16.947233200073242, "learning_rate": 9.803854875283448e-06, "loss": 37.0114, "step": 8579 }, { "epoch": 204.2865671641791, "grad_norm": 19.83219337463379, "learning_rate": 9.802721088435375e-06, "loss": 37.5581, "step": 8580 }, { "epoch": 204.31044776119404, "grad_norm": 12.658903121948242, "learning_rate": 9.801587301587301e-06, "loss": 35.1772, "step": 8581 }, { "epoch": 204.33432835820895, "grad_norm": 21.653749465942383, "learning_rate": 9.80045351473923e-06, "loss": 36.6293, "step": 8582 }, { "epoch": 204.3582089552239, "grad_norm": 20.153541564941406, "learning_rate": 9.799319727891158e-06, "loss": 37.3803, "step": 8583 }, { "epoch": 204.3820895522388, "grad_norm": 14.480081558227539, "learning_rate": 9.798185941043085e-06, "loss": 37.3014, "step": 8584 }, { "epoch": 204.40597014925373, "grad_norm": 17.002986907958984, "learning_rate": 9.797052154195013e-06, "loss": 36.4257, "step": 8585 }, { "epoch": 204.42985074626867, "grad_norm": 16.259973526000977, "learning_rate": 9.795918367346939e-06, "loss": 37.4563, "step": 8586 }, { "epoch": 204.45373134328358, "grad_norm": 14.481822967529297, "learning_rate": 9.794784580498868e-06, "loss": 36.5634, "step": 8587 }, { "epoch": 204.47761194029852, "grad_norm": 16.941505432128906, "learning_rate": 9.793650793650794e-06, "loss": 36.4001, "step": 8588 }, { "epoch": 204.50149253731342, "grad_norm": 14.08771800994873, "learning_rate": 9.792517006802721e-06, "loss": 36.7239, "step": 8589 }, { "epoch": 204.52537313432836, "grad_norm": 17.970989227294922, "learning_rate": 9.791383219954649e-06, "loss": 38.1865, "step": 8590 }, { "epoch": 204.54925373134327, "grad_norm": 22.19449234008789, "learning_rate": 9.790249433106576e-06, "loss": 37.6346, "step": 8591 }, { "epoch": 204.5731343283582, "grad_norm": 16.227018356323242, "learning_rate": 9.789115646258505e-06, "loss": 38.6723, "step": 8592 }, { "epoch": 204.59701492537314, "grad_norm": 15.995810508728027, "learning_rate": 9.787981859410431e-06, "loss": 37.3718, "step": 8593 }, { "epoch": 204.62089552238805, "grad_norm": 16.17877769470215, "learning_rate": 9.786848072562359e-06, "loss": 38.2895, "step": 8594 }, { "epoch": 204.644776119403, "grad_norm": 16.616500854492188, "learning_rate": 9.785714285714286e-06, "loss": 37.6198, "step": 8595 }, { "epoch": 204.6686567164179, "grad_norm": 15.92837142944336, "learning_rate": 9.784580498866214e-06, "loss": 37.6528, "step": 8596 }, { "epoch": 204.69253731343284, "grad_norm": 14.861062049865723, "learning_rate": 9.783446712018141e-06, "loss": 38.6355, "step": 8597 }, { "epoch": 204.71641791044777, "grad_norm": 14.655280113220215, "learning_rate": 9.782312925170069e-06, "loss": 37.8583, "step": 8598 }, { "epoch": 204.74029850746268, "grad_norm": 15.297805786132812, "learning_rate": 9.781179138321996e-06, "loss": 38.0771, "step": 8599 }, { "epoch": 204.76417910447762, "grad_norm": 14.192242622375488, "learning_rate": 9.780045351473924e-06, "loss": 38.3822, "step": 8600 }, { "epoch": 204.78805970149253, "grad_norm": 16.92637825012207, "learning_rate": 9.778911564625851e-06, "loss": 37.8253, "step": 8601 }, { "epoch": 204.81194029850747, "grad_norm": 13.849637031555176, "learning_rate": 9.777777777777779e-06, "loss": 38.0741, "step": 8602 }, { "epoch": 204.83582089552237, "grad_norm": 20.005584716796875, "learning_rate": 9.776643990929706e-06, "loss": 36.9411, "step": 8603 }, { "epoch": 204.8597014925373, "grad_norm": 16.371997833251953, "learning_rate": 9.775510204081634e-06, "loss": 35.9473, "step": 8604 }, { "epoch": 204.88358208955225, "grad_norm": 18.115108489990234, "learning_rate": 9.774376417233561e-06, "loss": 36.1243, "step": 8605 }, { "epoch": 204.90746268656716, "grad_norm": 16.897581100463867, "learning_rate": 9.773242630385489e-06, "loss": 38.1212, "step": 8606 }, { "epoch": 204.9313432835821, "grad_norm": 22.27849006652832, "learning_rate": 9.772108843537416e-06, "loss": 36.6232, "step": 8607 }, { "epoch": 204.955223880597, "grad_norm": 16.656368255615234, "learning_rate": 9.770975056689344e-06, "loss": 38.9034, "step": 8608 }, { "epoch": 204.97910447761194, "grad_norm": 21.257678985595703, "learning_rate": 9.769841269841271e-06, "loss": 37.4408, "step": 8609 }, { "epoch": 205.0, "grad_norm": 18.820884704589844, "learning_rate": 9.768707482993199e-06, "loss": 32.95, "step": 8610 }, { "epoch": 205.02388059701494, "grad_norm": 17.434200286865234, "learning_rate": 9.767573696145124e-06, "loss": 37.1015, "step": 8611 }, { "epoch": 205.04776119402985, "grad_norm": 24.209625244140625, "learning_rate": 9.766439909297054e-06, "loss": 37.2563, "step": 8612 }, { "epoch": 205.07164179104478, "grad_norm": 18.49870491027832, "learning_rate": 9.765306122448981e-06, "loss": 37.4353, "step": 8613 }, { "epoch": 205.0955223880597, "grad_norm": 19.67914390563965, "learning_rate": 9.764172335600907e-06, "loss": 36.5181, "step": 8614 }, { "epoch": 205.11940298507463, "grad_norm": 21.088788986206055, "learning_rate": 9.763038548752836e-06, "loss": 37.5769, "step": 8615 }, { "epoch": 205.14328358208957, "grad_norm": 13.879831314086914, "learning_rate": 9.761904761904762e-06, "loss": 38.6267, "step": 8616 }, { "epoch": 205.16716417910447, "grad_norm": 25.4019718170166, "learning_rate": 9.760770975056691e-06, "loss": 37.8395, "step": 8617 }, { "epoch": 205.1910447761194, "grad_norm": 17.966567993164062, "learning_rate": 9.759637188208617e-06, "loss": 36.8954, "step": 8618 }, { "epoch": 205.21492537313432, "grad_norm": 21.86101722717285, "learning_rate": 9.758503401360544e-06, "loss": 37.7932, "step": 8619 }, { "epoch": 205.23880597014926, "grad_norm": 19.588476181030273, "learning_rate": 9.757369614512474e-06, "loss": 36.7557, "step": 8620 }, { "epoch": 205.26268656716417, "grad_norm": 21.534090042114258, "learning_rate": 9.7562358276644e-06, "loss": 38.0934, "step": 8621 }, { "epoch": 205.2865671641791, "grad_norm": 21.240619659423828, "learning_rate": 9.755102040816327e-06, "loss": 37.9864, "step": 8622 }, { "epoch": 205.31044776119404, "grad_norm": 19.190460205078125, "learning_rate": 9.753968253968254e-06, "loss": 37.7771, "step": 8623 }, { "epoch": 205.33432835820895, "grad_norm": 18.091548919677734, "learning_rate": 9.752834467120182e-06, "loss": 37.3592, "step": 8624 }, { "epoch": 205.3582089552239, "grad_norm": 23.65395164489746, "learning_rate": 9.75170068027211e-06, "loss": 37.2367, "step": 8625 }, { "epoch": 205.3820895522388, "grad_norm": 15.69301986694336, "learning_rate": 9.750566893424037e-06, "loss": 36.6072, "step": 8626 }, { "epoch": 205.40597014925373, "grad_norm": 29.14418601989746, "learning_rate": 9.749433106575964e-06, "loss": 36.2826, "step": 8627 }, { "epoch": 205.42985074626867, "grad_norm": 19.379981994628906, "learning_rate": 9.748299319727892e-06, "loss": 36.8289, "step": 8628 }, { "epoch": 205.45373134328358, "grad_norm": 24.329856872558594, "learning_rate": 9.74716553287982e-06, "loss": 36.0532, "step": 8629 }, { "epoch": 205.47761194029852, "grad_norm": 21.218088150024414, "learning_rate": 9.746031746031747e-06, "loss": 37.3484, "step": 8630 }, { "epoch": 205.50149253731342, "grad_norm": 17.96894645690918, "learning_rate": 9.744897959183674e-06, "loss": 36.6539, "step": 8631 }, { "epoch": 205.52537313432836, "grad_norm": 29.74249839782715, "learning_rate": 9.743764172335602e-06, "loss": 36.3547, "step": 8632 }, { "epoch": 205.54925373134327, "grad_norm": 19.569395065307617, "learning_rate": 9.74263038548753e-06, "loss": 36.5655, "step": 8633 }, { "epoch": 205.5731343283582, "grad_norm": 37.414424896240234, "learning_rate": 9.741496598639457e-06, "loss": 37.9095, "step": 8634 }, { "epoch": 205.59701492537314, "grad_norm": 32.67489242553711, "learning_rate": 9.740362811791384e-06, "loss": 37.9754, "step": 8635 }, { "epoch": 205.62089552238805, "grad_norm": 26.269237518310547, "learning_rate": 9.739229024943312e-06, "loss": 36.5291, "step": 8636 }, { "epoch": 205.644776119403, "grad_norm": 26.150583267211914, "learning_rate": 9.73809523809524e-06, "loss": 36.9841, "step": 8637 }, { "epoch": 205.6686567164179, "grad_norm": 29.86044692993164, "learning_rate": 9.736961451247167e-06, "loss": 37.4311, "step": 8638 }, { "epoch": 205.69253731343284, "grad_norm": 24.11771011352539, "learning_rate": 9.735827664399093e-06, "loss": 37.3217, "step": 8639 }, { "epoch": 205.71641791044777, "grad_norm": 35.74076461791992, "learning_rate": 9.734693877551022e-06, "loss": 38.697, "step": 8640 }, { "epoch": 205.74029850746268, "grad_norm": 29.668153762817383, "learning_rate": 9.733560090702948e-06, "loss": 38.0177, "step": 8641 }, { "epoch": 205.76417910447762, "grad_norm": 26.31679344177246, "learning_rate": 9.732426303854877e-06, "loss": 36.4184, "step": 8642 }, { "epoch": 205.78805970149253, "grad_norm": 25.133056640625, "learning_rate": 9.731292517006804e-06, "loss": 38.2124, "step": 8643 }, { "epoch": 205.81194029850747, "grad_norm": 29.476600646972656, "learning_rate": 9.73015873015873e-06, "loss": 37.3606, "step": 8644 }, { "epoch": 205.83582089552237, "grad_norm": 28.86757469177246, "learning_rate": 9.72902494331066e-06, "loss": 38.1226, "step": 8645 }, { "epoch": 205.8597014925373, "grad_norm": 34.02037048339844, "learning_rate": 9.727891156462585e-06, "loss": 38.4452, "step": 8646 }, { "epoch": 205.88358208955225, "grad_norm": 32.310176849365234, "learning_rate": 9.726757369614513e-06, "loss": 35.7592, "step": 8647 }, { "epoch": 205.90746268656716, "grad_norm": 27.667375564575195, "learning_rate": 9.72562358276644e-06, "loss": 37.4783, "step": 8648 }, { "epoch": 205.9313432835821, "grad_norm": 25.79292869567871, "learning_rate": 9.724489795918368e-06, "loss": 38.5843, "step": 8649 }, { "epoch": 205.955223880597, "grad_norm": 30.97063446044922, "learning_rate": 9.723356009070297e-06, "loss": 38.1735, "step": 8650 }, { "epoch": 205.97910447761194, "grad_norm": 27.409870147705078, "learning_rate": 9.722222222222223e-06, "loss": 37.2472, "step": 8651 }, { "epoch": 206.0, "grad_norm": 26.654399871826172, "learning_rate": 9.72108843537415e-06, "loss": 32.5645, "step": 8652 }, { "epoch": 206.02388059701494, "grad_norm": 26.989267349243164, "learning_rate": 9.719954648526078e-06, "loss": 37.6551, "step": 8653 }, { "epoch": 206.04776119402985, "grad_norm": 27.682390213012695, "learning_rate": 9.718820861678005e-06, "loss": 37.2163, "step": 8654 }, { "epoch": 206.07164179104478, "grad_norm": 23.36117172241211, "learning_rate": 9.717687074829933e-06, "loss": 37.1891, "step": 8655 }, { "epoch": 206.0955223880597, "grad_norm": 28.605031967163086, "learning_rate": 9.71655328798186e-06, "loss": 37.4626, "step": 8656 }, { "epoch": 206.11940298507463, "grad_norm": 29.006641387939453, "learning_rate": 9.715419501133788e-06, "loss": 37.3505, "step": 8657 }, { "epoch": 206.14328358208957, "grad_norm": 28.94482421875, "learning_rate": 9.714285714285715e-06, "loss": 38.1489, "step": 8658 }, { "epoch": 206.16716417910447, "grad_norm": 28.851049423217773, "learning_rate": 9.713151927437643e-06, "loss": 36.5958, "step": 8659 }, { "epoch": 206.1910447761194, "grad_norm": 25.789871215820312, "learning_rate": 9.71201814058957e-06, "loss": 37.5911, "step": 8660 }, { "epoch": 206.21492537313432, "grad_norm": 20.312673568725586, "learning_rate": 9.710884353741498e-06, "loss": 37.4759, "step": 8661 }, { "epoch": 206.23880597014926, "grad_norm": 29.052183151245117, "learning_rate": 9.709750566893425e-06, "loss": 36.9356, "step": 8662 }, { "epoch": 206.26268656716417, "grad_norm": NaN, "learning_rate": 9.708616780045353e-06, "loss": 59.7149, "step": 8663 }, { "epoch": 206.2865671641791, "grad_norm": 23.565332412719727, "learning_rate": 9.708616780045353e-06, "loss": 36.9372, "step": 8664 }, { "epoch": 206.31044776119404, "grad_norm": 33.91110610961914, "learning_rate": 9.707482993197278e-06, "loss": 37.12, "step": 8665 }, { "epoch": 206.33432835820895, "grad_norm": 30.193803787231445, "learning_rate": 9.706349206349208e-06, "loss": 38.121, "step": 8666 }, { "epoch": 206.3582089552239, "grad_norm": 25.15426254272461, "learning_rate": 9.705215419501135e-06, "loss": 38.1279, "step": 8667 }, { "epoch": 206.3820895522388, "grad_norm": NaN, "learning_rate": 9.704081632653061e-06, "loss": 46.2935, "step": 8668 }, { "epoch": 206.40597014925373, "grad_norm": 25.120586395263672, "learning_rate": 9.704081632653061e-06, "loss": 37.7689, "step": 8669 }, { "epoch": 206.42985074626867, "grad_norm": 25.196691513061523, "learning_rate": 9.70294784580499e-06, "loss": 38.0344, "step": 8670 }, { "epoch": 206.45373134328358, "grad_norm": 21.404460906982422, "learning_rate": 9.701814058956916e-06, "loss": 36.8867, "step": 8671 }, { "epoch": 206.47761194029852, "grad_norm": 29.804731369018555, "learning_rate": 9.700680272108845e-06, "loss": 36.2151, "step": 8672 }, { "epoch": 206.50149253731342, "grad_norm": 27.454139709472656, "learning_rate": 9.699546485260771e-06, "loss": 36.4495, "step": 8673 }, { "epoch": 206.52537313432836, "grad_norm": 29.241275787353516, "learning_rate": 9.698412698412698e-06, "loss": 36.5479, "step": 8674 }, { "epoch": 206.54925373134327, "grad_norm": 29.399368286132812, "learning_rate": 9.697278911564628e-06, "loss": 37.5718, "step": 8675 }, { "epoch": 206.5731343283582, "grad_norm": 25.647436141967773, "learning_rate": 9.696145124716553e-06, "loss": 37.3242, "step": 8676 }, { "epoch": 206.59701492537314, "grad_norm": 23.48780632019043, "learning_rate": 9.695011337868483e-06, "loss": 37.484, "step": 8677 }, { "epoch": 206.62089552238805, "grad_norm": 30.65043067932129, "learning_rate": 9.693877551020408e-06, "loss": 37.318, "step": 8678 }, { "epoch": 206.644776119403, "grad_norm": 27.024131774902344, "learning_rate": 9.692743764172336e-06, "loss": 37.3023, "step": 8679 }, { "epoch": 206.6686567164179, "grad_norm": 28.0163516998291, "learning_rate": 9.691609977324263e-06, "loss": 37.4882, "step": 8680 }, { "epoch": 206.69253731343284, "grad_norm": 27.679622650146484, "learning_rate": 9.690476190476191e-06, "loss": 37.6525, "step": 8681 }, { "epoch": 206.71641791044777, "grad_norm": 25.003210067749023, "learning_rate": 9.689342403628118e-06, "loss": 36.7752, "step": 8682 }, { "epoch": 206.74029850746268, "grad_norm": 24.170604705810547, "learning_rate": 9.688208616780046e-06, "loss": 38.1328, "step": 8683 }, { "epoch": 206.76417910447762, "grad_norm": 26.91878890991211, "learning_rate": 9.687074829931973e-06, "loss": 37.0418, "step": 8684 }, { "epoch": 206.78805970149253, "grad_norm": 21.186622619628906, "learning_rate": 9.685941043083901e-06, "loss": 37.6866, "step": 8685 }, { "epoch": 206.81194029850747, "grad_norm": 32.290283203125, "learning_rate": 9.684807256235828e-06, "loss": 37.996, "step": 8686 }, { "epoch": 206.83582089552237, "grad_norm": 27.53929328918457, "learning_rate": 9.683673469387756e-06, "loss": 37.8191, "step": 8687 }, { "epoch": 206.8597014925373, "grad_norm": 28.326705932617188, "learning_rate": 9.682539682539683e-06, "loss": 37.1428, "step": 8688 }, { "epoch": 206.88358208955225, "grad_norm": 27.45022201538086, "learning_rate": 9.681405895691611e-06, "loss": 38.0987, "step": 8689 }, { "epoch": 206.90746268656716, "grad_norm": 21.459352493286133, "learning_rate": 9.680272108843538e-06, "loss": 36.5714, "step": 8690 }, { "epoch": 206.9313432835821, "grad_norm": 20.296875, "learning_rate": 9.679138321995466e-06, "loss": 34.7537, "step": 8691 }, { "epoch": 206.955223880597, "grad_norm": 32.78303527832031, "learning_rate": 9.678004535147393e-06, "loss": 37.9229, "step": 8692 }, { "epoch": 206.97910447761194, "grad_norm": 25.335390090942383, "learning_rate": 9.676870748299321e-06, "loss": 37.0462, "step": 8693 }, { "epoch": 207.0, "grad_norm": 27.832971572875977, "learning_rate": 9.675736961451247e-06, "loss": 32.6488, "step": 8694 }, { "epoch": 207.02388059701494, "grad_norm": 28.880048751831055, "learning_rate": 9.674603174603176e-06, "loss": 36.2167, "step": 8695 }, { "epoch": 207.04776119402985, "grad_norm": 23.16868782043457, "learning_rate": 9.673469387755103e-06, "loss": 35.8238, "step": 8696 }, { "epoch": 207.07164179104478, "grad_norm": 23.041492462158203, "learning_rate": 9.672335600907031e-06, "loss": 37.2479, "step": 8697 }, { "epoch": 207.0955223880597, "grad_norm": 27.763071060180664, "learning_rate": 9.671201814058958e-06, "loss": 37.8876, "step": 8698 }, { "epoch": 207.11940298507463, "grad_norm": 23.571678161621094, "learning_rate": 9.670068027210884e-06, "loss": 37.9851, "step": 8699 }, { "epoch": 207.14328358208957, "grad_norm": 28.939640045166016, "learning_rate": 9.668934240362813e-06, "loss": 37.6695, "step": 8700 }, { "epoch": 207.16716417910447, "grad_norm": 25.583032608032227, "learning_rate": 9.66780045351474e-06, "loss": 36.5894, "step": 8701 }, { "epoch": 207.1910447761194, "grad_norm": 26.849519729614258, "learning_rate": 9.666666666666667e-06, "loss": 37.2977, "step": 8702 }, { "epoch": 207.21492537313432, "grad_norm": NaN, "learning_rate": 9.665532879818596e-06, "loss": 51.6228, "step": 8703 }, { "epoch": 207.23880597014926, "grad_norm": 28.73918914794922, "learning_rate": 9.665532879818596e-06, "loss": 38.3042, "step": 8704 }, { "epoch": 207.26268656716417, "grad_norm": 25.652467727661133, "learning_rate": 9.664399092970522e-06, "loss": 37.2118, "step": 8705 }, { "epoch": 207.2865671641791, "grad_norm": 23.142736434936523, "learning_rate": 9.663265306122451e-06, "loss": 36.7965, "step": 8706 }, { "epoch": 207.31044776119404, "grad_norm": 26.23145866394043, "learning_rate": 9.662131519274377e-06, "loss": 36.7877, "step": 8707 }, { "epoch": 207.33432835820895, "grad_norm": 20.018190383911133, "learning_rate": 9.660997732426304e-06, "loss": 36.7477, "step": 8708 }, { "epoch": 207.3582089552239, "grad_norm": 28.383352279663086, "learning_rate": 9.659863945578232e-06, "loss": 35.9798, "step": 8709 }, { "epoch": 207.3820895522388, "grad_norm": 24.80544090270996, "learning_rate": 9.65873015873016e-06, "loss": 37.4661, "step": 8710 }, { "epoch": 207.40597014925373, "grad_norm": 27.32225227355957, "learning_rate": 9.657596371882087e-06, "loss": 38.4724, "step": 8711 }, { "epoch": 207.42985074626867, "grad_norm": 24.118083953857422, "learning_rate": 9.656462585034014e-06, "loss": 37.5195, "step": 8712 }, { "epoch": 207.45373134328358, "grad_norm": 31.7176513671875, "learning_rate": 9.655328798185942e-06, "loss": 37.9662, "step": 8713 }, { "epoch": 207.47761194029852, "grad_norm": 28.144126892089844, "learning_rate": 9.65419501133787e-06, "loss": 37.9732, "step": 8714 }, { "epoch": 207.50149253731342, "grad_norm": 23.09284782409668, "learning_rate": 9.653061224489797e-06, "loss": 37.4785, "step": 8715 }, { "epoch": 207.52537313432836, "grad_norm": 22.441999435424805, "learning_rate": 9.651927437641724e-06, "loss": 36.9549, "step": 8716 }, { "epoch": 207.54925373134327, "grad_norm": 26.634323120117188, "learning_rate": 9.650793650793652e-06, "loss": 37.5379, "step": 8717 }, { "epoch": 207.5731343283582, "grad_norm": 19.879837036132812, "learning_rate": 9.64965986394558e-06, "loss": 38.1537, "step": 8718 }, { "epoch": 207.59701492537314, "grad_norm": 30.73711395263672, "learning_rate": 9.648526077097507e-06, "loss": 37.6, "step": 8719 }, { "epoch": 207.62089552238805, "grad_norm": 23.9213924407959, "learning_rate": 9.647392290249434e-06, "loss": 36.2659, "step": 8720 }, { "epoch": 207.644776119403, "grad_norm": 26.76925277709961, "learning_rate": 9.646258503401362e-06, "loss": 37.7797, "step": 8721 }, { "epoch": 207.6686567164179, "grad_norm": 27.074373245239258, "learning_rate": 9.64512471655329e-06, "loss": 37.0032, "step": 8722 }, { "epoch": 207.69253731343284, "grad_norm": 20.3933162689209, "learning_rate": 9.643990929705217e-06, "loss": 36.884, "step": 8723 }, { "epoch": 207.71641791044777, "grad_norm": 22.100154876708984, "learning_rate": 9.642857142857144e-06, "loss": 36.168, "step": 8724 }, { "epoch": 207.74029850746268, "grad_norm": 26.020097732543945, "learning_rate": 9.64172335600907e-06, "loss": 36.8531, "step": 8725 }, { "epoch": 207.76417910447762, "grad_norm": 20.40485191345215, "learning_rate": 9.640589569161e-06, "loss": 37.6331, "step": 8726 }, { "epoch": 207.78805970149253, "grad_norm": 26.535852432250977, "learning_rate": 9.639455782312927e-06, "loss": 36.59, "step": 8727 }, { "epoch": 207.81194029850747, "grad_norm": 24.5301456451416, "learning_rate": 9.638321995464852e-06, "loss": 36.8655, "step": 8728 }, { "epoch": 207.83582089552237, "grad_norm": 24.19950294494629, "learning_rate": 9.637188208616782e-06, "loss": 37.2821, "step": 8729 }, { "epoch": 207.8597014925373, "grad_norm": 22.14886474609375, "learning_rate": 9.636054421768707e-06, "loss": 37.5526, "step": 8730 }, { "epoch": 207.88358208955225, "grad_norm": 27.889387130737305, "learning_rate": 9.634920634920637e-06, "loss": 37.4035, "step": 8731 }, { "epoch": 207.90746268656716, "grad_norm": 23.2374267578125, "learning_rate": 9.633786848072562e-06, "loss": 37.0872, "step": 8732 }, { "epoch": 207.9313432835821, "grad_norm": 28.7000675201416, "learning_rate": 9.63265306122449e-06, "loss": 37.2093, "step": 8733 }, { "epoch": 207.955223880597, "grad_norm": 25.44818878173828, "learning_rate": 9.63151927437642e-06, "loss": 36.7601, "step": 8734 }, { "epoch": 207.97910447761194, "grad_norm": 25.34256362915039, "learning_rate": 9.630385487528345e-06, "loss": 38.1114, "step": 8735 }, { "epoch": 208.0, "grad_norm": 21.16939926147461, "learning_rate": 9.629251700680272e-06, "loss": 33.3985, "step": 8736 }, { "epoch": 208.02388059701494, "grad_norm": 26.90691375732422, "learning_rate": 9.6281179138322e-06, "loss": 37.5929, "step": 8737 }, { "epoch": 208.04776119402985, "grad_norm": 21.72243881225586, "learning_rate": 9.626984126984127e-06, "loss": 37.9544, "step": 8738 }, { "epoch": 208.07164179104478, "grad_norm": 25.133729934692383, "learning_rate": 9.625850340136055e-06, "loss": 36.4881, "step": 8739 }, { "epoch": 208.0955223880597, "grad_norm": 21.65189552307129, "learning_rate": 9.624716553287982e-06, "loss": 35.4018, "step": 8740 }, { "epoch": 208.11940298507463, "grad_norm": 20.662872314453125, "learning_rate": 9.62358276643991e-06, "loss": 38.068, "step": 8741 }, { "epoch": 208.14328358208957, "grad_norm": 18.179677963256836, "learning_rate": 9.622448979591837e-06, "loss": 36.9269, "step": 8742 }, { "epoch": 208.16716417910447, "grad_norm": 20.394655227661133, "learning_rate": 9.621315192743765e-06, "loss": 37.5103, "step": 8743 }, { "epoch": 208.1910447761194, "grad_norm": 14.633431434631348, "learning_rate": 9.620181405895692e-06, "loss": 37.4208, "step": 8744 }, { "epoch": 208.21492537313432, "grad_norm": 26.77837371826172, "learning_rate": 9.61904761904762e-06, "loss": 37.438, "step": 8745 }, { "epoch": 208.23880597014926, "grad_norm": 20.291751861572266, "learning_rate": 9.617913832199547e-06, "loss": 37.2541, "step": 8746 }, { "epoch": 208.26268656716417, "grad_norm": 29.006790161132812, "learning_rate": 9.616780045351475e-06, "loss": 37.77, "step": 8747 }, { "epoch": 208.2865671641791, "grad_norm": 23.452924728393555, "learning_rate": 9.6156462585034e-06, "loss": 36.8207, "step": 8748 }, { "epoch": 208.31044776119404, "grad_norm": 25.323101043701172, "learning_rate": 9.61451247165533e-06, "loss": 38.0678, "step": 8749 }, { "epoch": 208.33432835820895, "grad_norm": 24.719270706176758, "learning_rate": 9.613378684807257e-06, "loss": 38.1048, "step": 8750 }, { "epoch": 208.3582089552239, "grad_norm": 23.332378387451172, "learning_rate": 9.612244897959185e-06, "loss": 36.2702, "step": 8751 }, { "epoch": 208.3820895522388, "grad_norm": 18.7159366607666, "learning_rate": 9.611111111111112e-06, "loss": 35.932, "step": 8752 }, { "epoch": 208.40597014925373, "grad_norm": 22.514942169189453, "learning_rate": 9.609977324263038e-06, "loss": 37.6841, "step": 8753 }, { "epoch": 208.42985074626867, "grad_norm": 18.898149490356445, "learning_rate": 9.608843537414967e-06, "loss": 37.062, "step": 8754 }, { "epoch": 208.45373134328358, "grad_norm": 23.485321044921875, "learning_rate": 9.607709750566893e-06, "loss": 37.5833, "step": 8755 }, { "epoch": 208.47761194029852, "grad_norm": 18.638851165771484, "learning_rate": 9.606575963718822e-06, "loss": 37.5329, "step": 8756 }, { "epoch": 208.50149253731342, "grad_norm": 25.822765350341797, "learning_rate": 9.60544217687075e-06, "loss": 38.0558, "step": 8757 }, { "epoch": 208.52537313432836, "grad_norm": 23.212072372436523, "learning_rate": 9.604308390022676e-06, "loss": 37.2138, "step": 8758 }, { "epoch": 208.54925373134327, "grad_norm": 20.474445343017578, "learning_rate": 9.603174603174605e-06, "loss": 35.3232, "step": 8759 }, { "epoch": 208.5731343283582, "grad_norm": 19.00440788269043, "learning_rate": 9.60204081632653e-06, "loss": 37.4117, "step": 8760 }, { "epoch": 208.59701492537314, "grad_norm": 22.211830139160156, "learning_rate": 9.600907029478458e-06, "loss": 37.0947, "step": 8761 }, { "epoch": 208.62089552238805, "grad_norm": 16.204587936401367, "learning_rate": 9.599773242630386e-06, "loss": 36.1761, "step": 8762 }, { "epoch": 208.644776119403, "grad_norm": 26.495813369750977, "learning_rate": 9.598639455782313e-06, "loss": 36.6699, "step": 8763 }, { "epoch": 208.6686567164179, "grad_norm": 22.76972007751465, "learning_rate": 9.597505668934242e-06, "loss": 36.6711, "step": 8764 }, { "epoch": 208.69253731343284, "grad_norm": 23.398727416992188, "learning_rate": 9.596371882086168e-06, "loss": 36.0655, "step": 8765 }, { "epoch": 208.71641791044777, "grad_norm": 21.727886199951172, "learning_rate": 9.595238095238096e-06, "loss": 38.3942, "step": 8766 }, { "epoch": 208.74029850746268, "grad_norm": 25.35695457458496, "learning_rate": 9.594104308390023e-06, "loss": 36.5805, "step": 8767 }, { "epoch": 208.76417910447762, "grad_norm": 20.90379524230957, "learning_rate": 9.59297052154195e-06, "loss": 37.3334, "step": 8768 }, { "epoch": 208.78805970149253, "grad_norm": 23.767805099487305, "learning_rate": 9.591836734693878e-06, "loss": 37.0844, "step": 8769 }, { "epoch": 208.81194029850747, "grad_norm": 22.0218563079834, "learning_rate": 9.590702947845806e-06, "loss": 37.8925, "step": 8770 }, { "epoch": 208.83582089552237, "grad_norm": 28.21807861328125, "learning_rate": 9.589569160997733e-06, "loss": 37.1123, "step": 8771 }, { "epoch": 208.8597014925373, "grad_norm": 21.722558975219727, "learning_rate": 9.58843537414966e-06, "loss": 37.4826, "step": 8772 }, { "epoch": 208.88358208955225, "grad_norm": NaN, "learning_rate": 9.587301587301588e-06, "loss": 66.8301, "step": 8773 }, { "epoch": 208.90746268656716, "grad_norm": 19.700010299682617, "learning_rate": 9.587301587301588e-06, "loss": 37.7419, "step": 8774 }, { "epoch": 208.9313432835821, "grad_norm": 21.414432525634766, "learning_rate": 9.586167800453516e-06, "loss": 37.6931, "step": 8775 }, { "epoch": 208.955223880597, "grad_norm": 16.847640991210938, "learning_rate": 9.585034013605443e-06, "loss": 37.3081, "step": 8776 }, { "epoch": 208.97910447761194, "grad_norm": 18.310691833496094, "learning_rate": 9.58390022675737e-06, "loss": 37.2922, "step": 8777 }, { "epoch": 209.0, "grad_norm": 14.464380264282227, "learning_rate": 9.582766439909298e-06, "loss": 33.5285, "step": 8778 }, { "epoch": 209.02388059701494, "grad_norm": 18.036569595336914, "learning_rate": 9.581632653061226e-06, "loss": 35.9583, "step": 8779 }, { "epoch": 209.04776119402985, "grad_norm": 14.068912506103516, "learning_rate": 9.580498866213153e-06, "loss": 36.8707, "step": 8780 }, { "epoch": 209.07164179104478, "grad_norm": 21.767086029052734, "learning_rate": 9.57936507936508e-06, "loss": 37.8075, "step": 8781 }, { "epoch": 209.0955223880597, "grad_norm": 19.587646484375, "learning_rate": 9.578231292517007e-06, "loss": 37.7072, "step": 8782 }, { "epoch": 209.11940298507463, "grad_norm": 19.14802360534668, "learning_rate": 9.577097505668936e-06, "loss": 36.6308, "step": 8783 }, { "epoch": 209.14328358208957, "grad_norm": 17.630035400390625, "learning_rate": 9.575963718820862e-06, "loss": 36.497, "step": 8784 }, { "epoch": 209.16716417910447, "grad_norm": 19.561717987060547, "learning_rate": 9.57482993197279e-06, "loss": 36.6801, "step": 8785 }, { "epoch": 209.1910447761194, "grad_norm": 15.293615341186523, "learning_rate": 9.573696145124717e-06, "loss": 35.5048, "step": 8786 }, { "epoch": 209.21492537313432, "grad_norm": 18.61341094970703, "learning_rate": 9.572562358276644e-06, "loss": 37.7874, "step": 8787 }, { "epoch": 209.23880597014926, "grad_norm": 19.37540054321289, "learning_rate": 9.571428571428573e-06, "loss": 37.4133, "step": 8788 }, { "epoch": 209.26268656716417, "grad_norm": 13.318099975585938, "learning_rate": 9.570294784580499e-06, "loss": 37.4057, "step": 8789 }, { "epoch": 209.2865671641791, "grad_norm": 15.878247261047363, "learning_rate": 9.569160997732427e-06, "loss": 36.2705, "step": 8790 }, { "epoch": 209.31044776119404, "grad_norm": 16.74108123779297, "learning_rate": 9.568027210884354e-06, "loss": 35.7654, "step": 8791 }, { "epoch": 209.33432835820895, "grad_norm": 17.369836807250977, "learning_rate": 9.566893424036282e-06, "loss": 36.6832, "step": 8792 }, { "epoch": 209.3582089552239, "grad_norm": 17.037837982177734, "learning_rate": 9.565759637188209e-06, "loss": 37.4281, "step": 8793 }, { "epoch": 209.3820895522388, "grad_norm": 14.933297157287598, "learning_rate": 9.564625850340137e-06, "loss": 36.907, "step": 8794 }, { "epoch": 209.40597014925373, "grad_norm": 17.732763290405273, "learning_rate": 9.563492063492064e-06, "loss": 37.0403, "step": 8795 }, { "epoch": 209.42985074626867, "grad_norm": 16.423961639404297, "learning_rate": 9.562358276643991e-06, "loss": 37.6568, "step": 8796 }, { "epoch": 209.45373134328358, "grad_norm": 13.933506965637207, "learning_rate": 9.561224489795919e-06, "loss": 37.5156, "step": 8797 }, { "epoch": 209.47761194029852, "grad_norm": 17.839454650878906, "learning_rate": 9.560090702947846e-06, "loss": 37.8671, "step": 8798 }, { "epoch": 209.50149253731342, "grad_norm": 18.602453231811523, "learning_rate": 9.558956916099774e-06, "loss": 37.6858, "step": 8799 }, { "epoch": 209.52537313432836, "grad_norm": NaN, "learning_rate": 9.557823129251701e-06, "loss": 36.5838, "step": 8800 }, { "epoch": 209.54925373134327, "grad_norm": 15.753517150878906, "learning_rate": 9.557823129251701e-06, "loss": 37.803, "step": 8801 }, { "epoch": 209.5731343283582, "grad_norm": 15.225348472595215, "learning_rate": 9.556689342403629e-06, "loss": 37.2629, "step": 8802 }, { "epoch": 209.59701492537314, "grad_norm": 14.971363067626953, "learning_rate": 9.555555555555556e-06, "loss": 35.9027, "step": 8803 }, { "epoch": 209.62089552238805, "grad_norm": 14.179505348205566, "learning_rate": 9.554421768707484e-06, "loss": 36.3974, "step": 8804 }, { "epoch": 209.644776119403, "grad_norm": 18.063364028930664, "learning_rate": 9.553287981859411e-06, "loss": 36.8283, "step": 8805 }, { "epoch": 209.6686567164179, "grad_norm": 13.856316566467285, "learning_rate": 9.552154195011339e-06, "loss": 37.0903, "step": 8806 }, { "epoch": 209.69253731343284, "grad_norm": 21.6617488861084, "learning_rate": 9.551020408163266e-06, "loss": 38.114, "step": 8807 }, { "epoch": 209.71641791044777, "grad_norm": 18.399335861206055, "learning_rate": 9.549886621315192e-06, "loss": 37.6584, "step": 8808 }, { "epoch": 209.74029850746268, "grad_norm": 14.723889350891113, "learning_rate": 9.548752834467121e-06, "loss": 37.6456, "step": 8809 }, { "epoch": 209.76417910447762, "grad_norm": 18.87386703491211, "learning_rate": 9.547619047619049e-06, "loss": 37.9242, "step": 8810 }, { "epoch": 209.78805970149253, "grad_norm": 17.717641830444336, "learning_rate": 9.546485260770976e-06, "loss": 36.445, "step": 8811 }, { "epoch": 209.81194029850747, "grad_norm": 15.05385971069336, "learning_rate": 9.545351473922904e-06, "loss": 38.2042, "step": 8812 }, { "epoch": 209.83582089552237, "grad_norm": 22.728147506713867, "learning_rate": 9.54421768707483e-06, "loss": 37.3571, "step": 8813 }, { "epoch": 209.8597014925373, "grad_norm": 18.1093807220459, "learning_rate": 9.543083900226759e-06, "loss": 37.8085, "step": 8814 }, { "epoch": 209.88358208955225, "grad_norm": 15.268453598022461, "learning_rate": 9.541950113378685e-06, "loss": 37.8936, "step": 8815 }, { "epoch": 209.90746268656716, "grad_norm": 22.95656967163086, "learning_rate": 9.540816326530612e-06, "loss": 37.1294, "step": 8816 }, { "epoch": 209.9313432835821, "grad_norm": 15.439409255981445, "learning_rate": 9.539682539682541e-06, "loss": 37.3681, "step": 8817 }, { "epoch": 209.955223880597, "grad_norm": 17.66872215270996, "learning_rate": 9.538548752834467e-06, "loss": 38.0217, "step": 8818 }, { "epoch": 209.97910447761194, "grad_norm": 20.06760597229004, "learning_rate": 9.537414965986396e-06, "loss": 37.7129, "step": 8819 }, { "epoch": 210.0, "grad_norm": 19.19893455505371, "learning_rate": 9.536281179138322e-06, "loss": 33.1443, "step": 8820 }, { "epoch": 210.0, "step": 8820, "total_flos": 4.3361080674915085e+17, "train_loss": 1.7935449273678181, "train_runtime": 12838.2452, "train_samples_per_second": 87.545, "train_steps_per_second": 0.687 }, { "epoch": 210.02388059701494, "grad_norm": 15.075254440307617, "learning_rate": 1e-05, "loss": 37.2785, "step": 8821 }, { "epoch": 210.04776119402985, "grad_norm": 142.95925903320312, "learning_rate": 9.99896480331263e-06, "loss": 40.939, "step": 8822 }, { "epoch": 210.07164179104478, "grad_norm": 73.43154907226562, "learning_rate": 9.99792960662526e-06, "loss": 39.356, "step": 8823 }, { "epoch": 210.0955223880597, "grad_norm": 42.96079635620117, "learning_rate": 9.99689440993789e-06, "loss": 38.2294, "step": 8824 }, { "epoch": 210.11940298507463, "grad_norm": 49.12348937988281, "learning_rate": 9.995859213250519e-06, "loss": 38.5214, "step": 8825 }, { "epoch": 210.14328358208957, "grad_norm": 62.188777923583984, "learning_rate": 9.994824016563148e-06, "loss": 36.7933, "step": 8826 }, { "epoch": 210.16716417910447, "grad_norm": 32.76605224609375, "learning_rate": 9.993788819875776e-06, "loss": 38.1907, "step": 8827 }, { "epoch": 210.1910447761194, "grad_norm": 52.76780700683594, "learning_rate": 9.992753623188408e-06, "loss": 37.3923, "step": 8828 }, { "epoch": 210.21492537313432, "grad_norm": 35.62137985229492, "learning_rate": 9.991718426501035e-06, "loss": 37.1639, "step": 8829 }, { "epoch": 210.23880597014926, "grad_norm": 38.32008361816406, "learning_rate": 9.990683229813667e-06, "loss": 36.3991, "step": 8830 }, { "epoch": 210.26268656716417, "grad_norm": 30.2634334564209, "learning_rate": 9.989648033126294e-06, "loss": 36.7869, "step": 8831 }, { "epoch": 210.2865671641791, "grad_norm": 22.547588348388672, "learning_rate": 9.988612836438924e-06, "loss": 37.0281, "step": 8832 }, { "epoch": 210.31044776119404, "grad_norm": 32.26959991455078, "learning_rate": 9.987577639751553e-06, "loss": 37.768, "step": 8833 }, { "epoch": 210.33432835820895, "grad_norm": 24.533544540405273, "learning_rate": 9.986542443064183e-06, "loss": 36.9135, "step": 8834 }, { "epoch": 210.3582089552239, "grad_norm": 17.129941940307617, "learning_rate": 9.985507246376813e-06, "loss": 38.2224, "step": 8835 }, { "epoch": 210.3820895522388, "grad_norm": 19.48621368408203, "learning_rate": 9.984472049689442e-06, "loss": 37.1503, "step": 8836 }, { "epoch": 210.40597014925373, "grad_norm": 19.355815887451172, "learning_rate": 9.983436853002072e-06, "loss": 37.1349, "step": 8837 }, { "epoch": 210.42985074626867, "grad_norm": 20.324405670166016, "learning_rate": 9.982401656314701e-06, "loss": 35.8842, "step": 8838 }, { "epoch": 210.45373134328358, "grad_norm": 15.293591499328613, "learning_rate": 9.981366459627329e-06, "loss": 36.4122, "step": 8839 }, { "epoch": 210.47761194029852, "grad_norm": 20.000492095947266, "learning_rate": 9.98033126293996e-06, "loss": 38.4082, "step": 8840 }, { "epoch": 210.50149253731342, "grad_norm": 17.6369686126709, "learning_rate": 9.979296066252588e-06, "loss": 38.5251, "step": 8841 }, { "epoch": 210.52537313432836, "grad_norm": 20.191181182861328, "learning_rate": 9.978260869565218e-06, "loss": 36.9463, "step": 8842 }, { "epoch": 210.54925373134327, "grad_norm": 16.443561553955078, "learning_rate": 9.977225672877847e-06, "loss": 37.2469, "step": 8843 }, { "epoch": 210.5731343283582, "grad_norm": 15.099014282226562, "learning_rate": 9.976190476190477e-06, "loss": 38.4285, "step": 8844 }, { "epoch": 210.59701492537314, "grad_norm": 15.890122413635254, "learning_rate": 9.975155279503106e-06, "loss": 37.8013, "step": 8845 }, { "epoch": 210.62089552238805, "grad_norm": 18.88666343688965, "learning_rate": 9.974120082815736e-06, "loss": 37.1099, "step": 8846 }, { "epoch": 210.644776119403, "grad_norm": 15.706725120544434, "learning_rate": 9.973084886128365e-06, "loss": 37.0593, "step": 8847 }, { "epoch": 210.6686567164179, "grad_norm": 24.356304168701172, "learning_rate": 9.972049689440995e-06, "loss": 37.0481, "step": 8848 }, { "epoch": 210.69253731343284, "grad_norm": 18.533910751342773, "learning_rate": 9.971014492753624e-06, "loss": 36.6005, "step": 8849 }, { "epoch": 210.71641791044777, "grad_norm": 16.038110733032227, "learning_rate": 9.969979296066254e-06, "loss": 37.6422, "step": 8850 }, { "epoch": 210.74029850746268, "grad_norm": 15.192877769470215, "learning_rate": 9.968944099378883e-06, "loss": 37.0933, "step": 8851 }, { "epoch": 210.76417910447762, "grad_norm": 16.551071166992188, "learning_rate": 9.967908902691513e-06, "loss": 37.2211, "step": 8852 }, { "epoch": 210.78805970149253, "grad_norm": 14.400941848754883, "learning_rate": 9.966873706004142e-06, "loss": 36.0341, "step": 8853 }, { "epoch": 210.81194029850747, "grad_norm": 15.2099027633667, "learning_rate": 9.96583850931677e-06, "loss": 36.9797, "step": 8854 }, { "epoch": 210.83582089552237, "grad_norm": 15.81094741821289, "learning_rate": 9.964803312629401e-06, "loss": 37.4321, "step": 8855 }, { "epoch": 210.8597014925373, "grad_norm": 17.03694725036621, "learning_rate": 9.96376811594203e-06, "loss": 38.3356, "step": 8856 }, { "epoch": 210.88358208955225, "grad_norm": 15.178110122680664, "learning_rate": 9.962732919254659e-06, "loss": 36.5125, "step": 8857 }, { "epoch": 210.90746268656716, "grad_norm": 16.529111862182617, "learning_rate": 9.961697722567288e-06, "loss": 36.6884, "step": 8858 }, { "epoch": 210.9313432835821, "grad_norm": 17.919584274291992, "learning_rate": 9.960662525879918e-06, "loss": 38.351, "step": 8859 }, { "epoch": 210.955223880597, "grad_norm": 15.563426971435547, "learning_rate": 9.959627329192547e-06, "loss": 38.2391, "step": 8860 }, { "epoch": 210.97910447761194, "grad_norm": 17.85155487060547, "learning_rate": 9.958592132505177e-06, "loss": 37.3985, "step": 8861 }, { "epoch": 211.0, "grad_norm": 16.511281967163086, "learning_rate": 9.957556935817806e-06, "loss": 32.5785, "step": 8862 }, { "epoch": 211.02388059701494, "grad_norm": 14.808560371398926, "learning_rate": 9.956521739130436e-06, "loss": 37.2203, "step": 8863 }, { "epoch": 211.04776119402985, "grad_norm": 15.766176223754883, "learning_rate": 9.955486542443065e-06, "loss": 37.1016, "step": 8864 }, { "epoch": 211.07164179104478, "grad_norm": 13.97423267364502, "learning_rate": 9.954451345755695e-06, "loss": 38.1213, "step": 8865 }, { "epoch": 211.0955223880597, "grad_norm": 17.683542251586914, "learning_rate": 9.953416149068323e-06, "loss": 37.1273, "step": 8866 }, { "epoch": 211.11940298507463, "grad_norm": 18.57022476196289, "learning_rate": 9.952380952380954e-06, "loss": 38.2498, "step": 8867 }, { "epoch": 211.14328358208957, "grad_norm": 18.91432762145996, "learning_rate": 9.951345755693582e-06, "loss": 36.2946, "step": 8868 }, { "epoch": 211.16716417910447, "grad_norm": NaN, "learning_rate": 9.950310559006211e-06, "loss": 35.6508, "step": 8869 }, { "epoch": 211.1910447761194, "grad_norm": 18.006126403808594, "learning_rate": 9.950310559006211e-06, "loss": 37.4559, "step": 8870 }, { "epoch": 211.21492537313432, "grad_norm": 14.555634498596191, "learning_rate": 9.949275362318841e-06, "loss": 37.3692, "step": 8871 }, { "epoch": 211.23880597014926, "grad_norm": 16.3458251953125, "learning_rate": 9.94824016563147e-06, "loss": 38.2418, "step": 8872 }, { "epoch": 211.26268656716417, "grad_norm": 13.63530445098877, "learning_rate": 9.9472049689441e-06, "loss": 35.4199, "step": 8873 }, { "epoch": 211.2865671641791, "grad_norm": 13.486242294311523, "learning_rate": 9.94616977225673e-06, "loss": 37.8011, "step": 8874 }, { "epoch": 211.31044776119404, "grad_norm": 14.73640251159668, "learning_rate": 9.945134575569359e-06, "loss": 36.5183, "step": 8875 }, { "epoch": 211.33432835820895, "grad_norm": 16.692045211791992, "learning_rate": 9.944099378881989e-06, "loss": 37.1129, "step": 8876 }, { "epoch": 211.3582089552239, "grad_norm": NaN, "learning_rate": 9.943064182194618e-06, "loss": 63.5603, "step": 8877 }, { "epoch": 211.3820895522388, "grad_norm": NaN, "learning_rate": 9.943064182194618e-06, "loss": 31.2282, "step": 8878 }, { "epoch": 211.40597014925373, "grad_norm": 17.002836227416992, "learning_rate": 9.943064182194618e-06, "loss": 37.1965, "step": 8879 }, { "epoch": 211.42985074626867, "grad_norm": 18.055377960205078, "learning_rate": 9.942028985507248e-06, "loss": 36.6305, "step": 8880 }, { "epoch": 211.45373134328358, "grad_norm": 15.369948387145996, "learning_rate": 9.940993788819877e-06, "loss": 36.7211, "step": 8881 }, { "epoch": 211.47761194029852, "grad_norm": 14.401118278503418, "learning_rate": 9.939958592132507e-06, "loss": 36.1752, "step": 8882 }, { "epoch": 211.50149253731342, "grad_norm": 17.921104431152344, "learning_rate": 9.938923395445136e-06, "loss": 38.1467, "step": 8883 }, { "epoch": 211.52537313432836, "grad_norm": 11.802447319030762, "learning_rate": 9.937888198757764e-06, "loss": 37.3524, "step": 8884 }, { "epoch": 211.54925373134327, "grad_norm": 19.655445098876953, "learning_rate": 9.936853002070395e-06, "loss": 35.8514, "step": 8885 }, { "epoch": 211.5731343283582, "grad_norm": 15.055081367492676, "learning_rate": 9.935817805383023e-06, "loss": 37.7844, "step": 8886 }, { "epoch": 211.59701492537314, "grad_norm": 19.81008529663086, "learning_rate": 9.934782608695653e-06, "loss": 37.4641, "step": 8887 }, { "epoch": 211.62089552238805, "grad_norm": 15.446284294128418, "learning_rate": 9.933747412008282e-06, "loss": 36.2631, "step": 8888 }, { "epoch": 211.644776119403, "grad_norm": 19.153871536254883, "learning_rate": 9.932712215320912e-06, "loss": 38.593, "step": 8889 }, { "epoch": 211.6686567164179, "grad_norm": 17.141765594482422, "learning_rate": 9.931677018633541e-06, "loss": 37.508, "step": 8890 }, { "epoch": 211.69253731343284, "grad_norm": 20.760438919067383, "learning_rate": 9.93064182194617e-06, "loss": 38.5176, "step": 8891 }, { "epoch": 211.71641791044777, "grad_norm": 16.805908203125, "learning_rate": 9.9296066252588e-06, "loss": 37.825, "step": 8892 }, { "epoch": 211.74029850746268, "grad_norm": 20.541210174560547, "learning_rate": 9.92857142857143e-06, "loss": 37.9388, "step": 8893 }, { "epoch": 211.76417910447762, "grad_norm": 18.6077880859375, "learning_rate": 9.927536231884058e-06, "loss": 37.2533, "step": 8894 }, { "epoch": 211.78805970149253, "grad_norm": NaN, "learning_rate": 9.926501035196689e-06, "loss": 56.6819, "step": 8895 }, { "epoch": 211.81194029850747, "grad_norm": 17.51033592224121, "learning_rate": 9.926501035196689e-06, "loss": 36.7446, "step": 8896 }, { "epoch": 211.83582089552237, "grad_norm": 19.205183029174805, "learning_rate": 9.925465838509317e-06, "loss": 36.8939, "step": 8897 }, { "epoch": 211.8597014925373, "grad_norm": 15.204777717590332, "learning_rate": 9.924430641821948e-06, "loss": 37.2798, "step": 8898 }, { "epoch": 211.88358208955225, "grad_norm": 15.828685760498047, "learning_rate": 9.923395445134576e-06, "loss": 36.6658, "step": 8899 }, { "epoch": 211.90746268656716, "grad_norm": 15.359902381896973, "learning_rate": 9.922360248447205e-06, "loss": 36.0643, "step": 8900 }, { "epoch": 211.9313432835821, "grad_norm": 23.389053344726562, "learning_rate": 9.921325051759835e-06, "loss": 37.1585, "step": 8901 }, { "epoch": 211.955223880597, "grad_norm": 16.01727867126465, "learning_rate": 9.920289855072464e-06, "loss": 36.6398, "step": 8902 }, { "epoch": 211.97910447761194, "grad_norm": 18.08060073852539, "learning_rate": 9.919254658385094e-06, "loss": 37.8393, "step": 8903 }, { "epoch": 212.0, "grad_norm": 14.446660995483398, "learning_rate": 9.918219461697723e-06, "loss": 31.096, "step": 8904 }, { "epoch": 212.02388059701494, "grad_norm": 17.174697875976562, "learning_rate": 9.917184265010353e-06, "loss": 36.3044, "step": 8905 }, { "epoch": 212.04776119402985, "grad_norm": 14.765143394470215, "learning_rate": 9.916149068322982e-06, "loss": 36.1536, "step": 8906 }, { "epoch": 212.07164179104478, "grad_norm": 17.595178604125977, "learning_rate": 9.915113871635612e-06, "loss": 36.9917, "step": 8907 }, { "epoch": 212.0955223880597, "grad_norm": 15.640548706054688, "learning_rate": 9.914078674948242e-06, "loss": 35.9084, "step": 8908 }, { "epoch": 212.11940298507463, "grad_norm": 19.422521591186523, "learning_rate": 9.913043478260871e-06, "loss": 36.4987, "step": 8909 }, { "epoch": 212.14328358208957, "grad_norm": 17.555795669555664, "learning_rate": 9.912008281573499e-06, "loss": 37.8731, "step": 8910 }, { "epoch": 212.16716417910447, "grad_norm": 22.860374450683594, "learning_rate": 9.91097308488613e-06, "loss": 36.6072, "step": 8911 }, { "epoch": 212.1910447761194, "grad_norm": 21.4945125579834, "learning_rate": 9.909937888198758e-06, "loss": 35.3901, "step": 8912 }, { "epoch": 212.21492537313432, "grad_norm": 17.8464298248291, "learning_rate": 9.90890269151139e-06, "loss": 36.3299, "step": 8913 }, { "epoch": 212.23880597014926, "grad_norm": 22.104679107666016, "learning_rate": 9.907867494824017e-06, "loss": 37.7505, "step": 8914 }, { "epoch": 212.26268656716417, "grad_norm": 17.718652725219727, "learning_rate": 9.906832298136647e-06, "loss": 36.346, "step": 8915 }, { "epoch": 212.2865671641791, "grad_norm": 16.828168869018555, "learning_rate": 9.905797101449276e-06, "loss": 36.2079, "step": 8916 }, { "epoch": 212.31044776119404, "grad_norm": 17.614103317260742, "learning_rate": 9.904761904761906e-06, "loss": 37.1404, "step": 8917 }, { "epoch": 212.33432835820895, "grad_norm": 25.996273040771484, "learning_rate": 9.903726708074535e-06, "loss": 37.5513, "step": 8918 }, { "epoch": 212.3582089552239, "grad_norm": 16.21636199951172, "learning_rate": 9.902691511387165e-06, "loss": 37.9346, "step": 8919 }, { "epoch": 212.3820895522388, "grad_norm": 20.41476821899414, "learning_rate": 9.901656314699794e-06, "loss": 38.6836, "step": 8920 }, { "epoch": 212.40597014925373, "grad_norm": 25.203231811523438, "learning_rate": 9.900621118012424e-06, "loss": 36.0799, "step": 8921 }, { "epoch": 212.42985074626867, "grad_norm": 18.574546813964844, "learning_rate": 9.899585921325052e-06, "loss": 36.3672, "step": 8922 }, { "epoch": 212.45373134328358, "grad_norm": 25.822708129882812, "learning_rate": 9.898550724637683e-06, "loss": 37.1143, "step": 8923 }, { "epoch": 212.47761194029852, "grad_norm": 18.57192611694336, "learning_rate": 9.89751552795031e-06, "loss": 37.4702, "step": 8924 }, { "epoch": 212.50149253731342, "grad_norm": NaN, "learning_rate": 9.89648033126294e-06, "loss": 55.2507, "step": 8925 }, { "epoch": 212.52537313432836, "grad_norm": 16.169931411743164, "learning_rate": 9.89648033126294e-06, "loss": 37.0546, "step": 8926 }, { "epoch": 212.54925373134327, "grad_norm": 37.11153030395508, "learning_rate": 9.89544513457557e-06, "loss": 37.5354, "step": 8927 }, { "epoch": 212.5731343283582, "grad_norm": 23.903852462768555, "learning_rate": 9.8944099378882e-06, "loss": 37.2159, "step": 8928 }, { "epoch": 212.59701492537314, "grad_norm": 40.34111785888672, "learning_rate": 9.893374741200829e-06, "loss": 36.4423, "step": 8929 }, { "epoch": 212.62089552238805, "grad_norm": 37.706871032714844, "learning_rate": 9.892339544513458e-06, "loss": 36.2335, "step": 8930 }, { "epoch": 212.644776119403, "grad_norm": 30.773954391479492, "learning_rate": 9.891304347826088e-06, "loss": 38.6459, "step": 8931 }, { "epoch": 212.6686567164179, "grad_norm": 32.333580017089844, "learning_rate": 9.890269151138717e-06, "loss": 37.8582, "step": 8932 }, { "epoch": 212.69253731343284, "grad_norm": 27.975984573364258, "learning_rate": 9.889233954451347e-06, "loss": 37.2514, "step": 8933 }, { "epoch": 212.71641791044777, "grad_norm": 26.902097702026367, "learning_rate": 9.888198757763976e-06, "loss": 37.0266, "step": 8934 }, { "epoch": 212.74029850746268, "grad_norm": 34.41338348388672, "learning_rate": 9.887163561076606e-06, "loss": 38.0053, "step": 8935 }, { "epoch": 212.76417910447762, "grad_norm": 29.491994857788086, "learning_rate": 9.886128364389235e-06, "loss": 37.6406, "step": 8936 }, { "epoch": 212.78805970149253, "grad_norm": 33.101356506347656, "learning_rate": 9.885093167701865e-06, "loss": 36.0706, "step": 8937 }, { "epoch": 212.81194029850747, "grad_norm": 25.407678604125977, "learning_rate": 9.884057971014493e-06, "loss": 37.6107, "step": 8938 }, { "epoch": 212.83582089552237, "grad_norm": 31.88374137878418, "learning_rate": 9.883022774327124e-06, "loss": 37.2537, "step": 8939 }, { "epoch": 212.8597014925373, "grad_norm": 27.657949447631836, "learning_rate": 9.881987577639752e-06, "loss": 37.4872, "step": 8940 }, { "epoch": 212.88358208955225, "grad_norm": 30.719676971435547, "learning_rate": 9.880952380952381e-06, "loss": 37.3958, "step": 8941 }, { "epoch": 212.90746268656716, "grad_norm": 25.53170394897461, "learning_rate": 9.879917184265011e-06, "loss": 37.8997, "step": 8942 }, { "epoch": 212.9313432835821, "grad_norm": 31.681127548217773, "learning_rate": 9.87888198757764e-06, "loss": 36.8812, "step": 8943 }, { "epoch": 212.955223880597, "grad_norm": 28.184024810791016, "learning_rate": 9.87784679089027e-06, "loss": 37.533, "step": 8944 }, { "epoch": 212.97910447761194, "grad_norm": 30.316749572753906, "learning_rate": 9.8768115942029e-06, "loss": 37.9202, "step": 8945 }, { "epoch": 213.0, "grad_norm": 23.389223098754883, "learning_rate": 9.875776397515529e-06, "loss": 32.0388, "step": 8946 }, { "epoch": 213.02388059701494, "grad_norm": 29.8602294921875, "learning_rate": 9.874741200828159e-06, "loss": 37.686, "step": 8947 }, { "epoch": 213.04776119402985, "grad_norm": 26.334007263183594, "learning_rate": 9.873706004140788e-06, "loss": 36.9403, "step": 8948 }, { "epoch": 213.07164179104478, "grad_norm": 30.050151824951172, "learning_rate": 9.872670807453418e-06, "loss": 37.0253, "step": 8949 }, { "epoch": 213.0955223880597, "grad_norm": 26.691158294677734, "learning_rate": 9.871635610766045e-06, "loss": 36.9505, "step": 8950 }, { "epoch": 213.11940298507463, "grad_norm": 31.54173469543457, "learning_rate": 9.870600414078677e-06, "loss": 36.5836, "step": 8951 }, { "epoch": 213.14328358208957, "grad_norm": 28.14643096923828, "learning_rate": 9.869565217391304e-06, "loss": 37.0423, "step": 8952 }, { "epoch": 213.16716417910447, "grad_norm": 29.28129005432129, "learning_rate": 9.868530020703934e-06, "loss": 36.4348, "step": 8953 }, { "epoch": 213.1910447761194, "grad_norm": 23.95172882080078, "learning_rate": 9.867494824016564e-06, "loss": 36.9681, "step": 8954 }, { "epoch": 213.21492537313432, "grad_norm": 30.376632690429688, "learning_rate": 9.866459627329193e-06, "loss": 38.3925, "step": 8955 }, { "epoch": 213.23880597014926, "grad_norm": 25.140405654907227, "learning_rate": 9.865424430641823e-06, "loss": 35.3339, "step": 8956 }, { "epoch": 213.26268656716417, "grad_norm": 29.816177368164062, "learning_rate": 9.864389233954452e-06, "loss": 36.7105, "step": 8957 }, { "epoch": 213.2865671641791, "grad_norm": 28.26422882080078, "learning_rate": 9.863354037267082e-06, "loss": 35.156, "step": 8958 }, { "epoch": 213.31044776119404, "grad_norm": 29.071168899536133, "learning_rate": 9.862318840579711e-06, "loss": 37.4681, "step": 8959 }, { "epoch": 213.33432835820895, "grad_norm": 26.617605209350586, "learning_rate": 9.861283643892339e-06, "loss": 36.2425, "step": 8960 }, { "epoch": 213.3582089552239, "grad_norm": 29.758590698242188, "learning_rate": 9.86024844720497e-06, "loss": 37.2265, "step": 8961 }, { "epoch": 213.3820895522388, "grad_norm": 25.337291717529297, "learning_rate": 9.8592132505176e-06, "loss": 37.793, "step": 8962 }, { "epoch": 213.40597014925373, "grad_norm": 31.47548484802246, "learning_rate": 9.85817805383023e-06, "loss": 37.1752, "step": 8963 }, { "epoch": 213.42985074626867, "grad_norm": 27.181623458862305, "learning_rate": 9.857142857142859e-06, "loss": 37.445, "step": 8964 }, { "epoch": 213.45373134328358, "grad_norm": 29.49827766418457, "learning_rate": 9.856107660455487e-06, "loss": 36.1177, "step": 8965 }, { "epoch": 213.47761194029852, "grad_norm": 28.424724578857422, "learning_rate": 9.855072463768118e-06, "loss": 37.834, "step": 8966 }, { "epoch": 213.50149253731342, "grad_norm": 27.049346923828125, "learning_rate": 9.854037267080746e-06, "loss": 37.0673, "step": 8967 }, { "epoch": 213.52537313432836, "grad_norm": 26.61677360534668, "learning_rate": 9.853002070393375e-06, "loss": 37.2056, "step": 8968 }, { "epoch": 213.54925373134327, "grad_norm": 29.839797973632812, "learning_rate": 9.851966873706005e-06, "loss": 37.1301, "step": 8969 }, { "epoch": 213.5731343283582, "grad_norm": 26.292333602905273, "learning_rate": 9.850931677018634e-06, "loss": 37.6243, "step": 8970 }, { "epoch": 213.59701492537314, "grad_norm": 32.21665573120117, "learning_rate": 9.849896480331264e-06, "loss": 38.6864, "step": 8971 }, { "epoch": 213.62089552238805, "grad_norm": 28.92923927307129, "learning_rate": 9.848861283643893e-06, "loss": 37.4806, "step": 8972 }, { "epoch": 213.644776119403, "grad_norm": 25.853914260864258, "learning_rate": 9.847826086956523e-06, "loss": 35.8329, "step": 8973 }, { "epoch": 213.6686567164179, "grad_norm": 25.25511360168457, "learning_rate": 9.846790890269152e-06, "loss": 37.6845, "step": 8974 }, { "epoch": 213.69253731343284, "grad_norm": 26.28786849975586, "learning_rate": 9.84575569358178e-06, "loss": 36.928, "step": 8975 }, { "epoch": 213.71641791044777, "grad_norm": 22.015167236328125, "learning_rate": 9.844720496894411e-06, "loss": 37.8689, "step": 8976 }, { "epoch": 213.74029850746268, "grad_norm": 32.0677490234375, "learning_rate": 9.84368530020704e-06, "loss": 36.9482, "step": 8977 }, { "epoch": 213.76417910447762, "grad_norm": 26.79532814025879, "learning_rate": 9.84265010351967e-06, "loss": 36.9821, "step": 8978 }, { "epoch": 213.78805970149253, "grad_norm": 30.176692962646484, "learning_rate": 9.841614906832298e-06, "loss": 36.7283, "step": 8979 }, { "epoch": 213.81194029850747, "grad_norm": 26.936988830566406, "learning_rate": 9.840579710144928e-06, "loss": 36.7164, "step": 8980 }, { "epoch": 213.83582089552237, "grad_norm": 30.490339279174805, "learning_rate": 9.839544513457557e-06, "loss": 36.4903, "step": 8981 }, { "epoch": 213.8597014925373, "grad_norm": 26.552900314331055, "learning_rate": 9.838509316770187e-06, "loss": 37.4003, "step": 8982 }, { "epoch": 213.88358208955225, "grad_norm": 25.060441970825195, "learning_rate": 9.837474120082817e-06, "loss": 36.2822, "step": 8983 }, { "epoch": 213.90746268656716, "grad_norm": 25.573841094970703, "learning_rate": 9.836438923395446e-06, "loss": 36.8246, "step": 8984 }, { "epoch": 213.9313432835821, "grad_norm": 28.39388656616211, "learning_rate": 9.835403726708076e-06, "loss": 36.6885, "step": 8985 }, { "epoch": 213.955223880597, "grad_norm": 21.96089744567871, "learning_rate": 9.834368530020705e-06, "loss": 36.3271, "step": 8986 }, { "epoch": 213.97910447761194, "grad_norm": 32.2141227722168, "learning_rate": 9.833333333333333e-06, "loss": 37.9622, "step": 8987 }, { "epoch": 214.0, "grad_norm": 25.528892517089844, "learning_rate": 9.832298136645964e-06, "loss": 32.4558, "step": 8988 }, { "epoch": 214.02388059701494, "grad_norm": 26.472932815551758, "learning_rate": 9.831262939958594e-06, "loss": 36.5696, "step": 8989 }, { "epoch": 214.04776119402985, "grad_norm": 25.03866195678711, "learning_rate": 9.830227743271222e-06, "loss": 35.5905, "step": 8990 }, { "epoch": 214.07164179104478, "grad_norm": 29.836627960205078, "learning_rate": 9.829192546583853e-06, "loss": 36.0014, "step": 8991 }, { "epoch": 214.0955223880597, "grad_norm": 26.660200119018555, "learning_rate": 9.82815734989648e-06, "loss": 36.8765, "step": 8992 }, { "epoch": 214.11940298507463, "grad_norm": 28.668292999267578, "learning_rate": 9.827122153209112e-06, "loss": 37.3687, "step": 8993 }, { "epoch": 214.14328358208957, "grad_norm": 25.813344955444336, "learning_rate": 9.82608695652174e-06, "loss": 37.2473, "step": 8994 }, { "epoch": 214.16716417910447, "grad_norm": 32.36681365966797, "learning_rate": 9.82505175983437e-06, "loss": 37.8641, "step": 8995 }, { "epoch": 214.1910447761194, "grad_norm": 27.731050491333008, "learning_rate": 9.824016563146999e-06, "loss": 36.6503, "step": 8996 }, { "epoch": 214.21492537313432, "grad_norm": 28.96619415283203, "learning_rate": 9.822981366459628e-06, "loss": 38.0194, "step": 8997 }, { "epoch": 214.23880597014926, "grad_norm": 25.87613868713379, "learning_rate": 9.821946169772258e-06, "loss": 36.4218, "step": 8998 }, { "epoch": 214.26268656716417, "grad_norm": 24.4296817779541, "learning_rate": 9.820910973084887e-06, "loss": 35.2054, "step": 8999 }, { "epoch": 214.2865671641791, "grad_norm": 22.787378311157227, "learning_rate": 9.819875776397517e-06, "loss": 37.2037, "step": 9000 }, { "epoch": 214.31044776119404, "grad_norm": 32.02442169189453, "learning_rate": 9.818840579710146e-06, "loss": 35.7057, "step": 9001 }, { "epoch": 214.33432835820895, "grad_norm": 27.07895851135254, "learning_rate": 9.817805383022774e-06, "loss": 36.0511, "step": 9002 }, { "epoch": 214.3582089552239, "grad_norm": 28.697946548461914, "learning_rate": 9.816770186335405e-06, "loss": 37.5292, "step": 9003 }, { "epoch": 214.3820895522388, "grad_norm": 25.989091873168945, "learning_rate": 9.815734989648033e-06, "loss": 37.1703, "step": 9004 }, { "epoch": 214.40597014925373, "grad_norm": 27.755807876586914, "learning_rate": 9.814699792960663e-06, "loss": 36.4164, "step": 9005 }, { "epoch": 214.42985074626867, "grad_norm": 24.19984245300293, "learning_rate": 9.813664596273292e-06, "loss": 37.2214, "step": 9006 }, { "epoch": 214.45373134328358, "grad_norm": 27.303367614746094, "learning_rate": 9.812629399585922e-06, "loss": 35.7464, "step": 9007 }, { "epoch": 214.47761194029852, "grad_norm": 26.95231819152832, "learning_rate": 9.811594202898551e-06, "loss": 38.0957, "step": 9008 }, { "epoch": 214.50149253731342, "grad_norm": 26.177330017089844, "learning_rate": 9.810559006211181e-06, "loss": 37.598, "step": 9009 }, { "epoch": 214.52537313432836, "grad_norm": 22.942121505737305, "learning_rate": 9.80952380952381e-06, "loss": 37.7739, "step": 9010 }, { "epoch": 214.54925373134327, "grad_norm": 29.14752960205078, "learning_rate": 9.80848861283644e-06, "loss": 37.2559, "step": 9011 }, { "epoch": 214.5731343283582, "grad_norm": 24.57861328125, "learning_rate": 9.80745341614907e-06, "loss": 37.3289, "step": 9012 }, { "epoch": 214.59701492537314, "grad_norm": 30.79751205444336, "learning_rate": 9.806418219461699e-06, "loss": 36.7896, "step": 9013 }, { "epoch": 214.62089552238805, "grad_norm": 28.287281036376953, "learning_rate": 9.805383022774327e-06, "loss": 37.0989, "step": 9014 }, { "epoch": 214.644776119403, "grad_norm": 28.00124168395996, "learning_rate": 9.804347826086958e-06, "loss": 37.511, "step": 9015 }, { "epoch": 214.6686567164179, "grad_norm": 24.868619918823242, "learning_rate": 9.803312629399588e-06, "loss": 36.6411, "step": 9016 }, { "epoch": 214.69253731343284, "grad_norm": 27.01886558532715, "learning_rate": 9.802277432712215e-06, "loss": 38.0124, "step": 9017 }, { "epoch": 214.71641791044777, "grad_norm": 23.0460147857666, "learning_rate": 9.801242236024847e-06, "loss": 36.6776, "step": 9018 }, { "epoch": 214.74029850746268, "grad_norm": 28.32352066040039, "learning_rate": 9.800207039337474e-06, "loss": 37.474, "step": 9019 }, { "epoch": 214.76417910447762, "grad_norm": 23.069040298461914, "learning_rate": 9.799171842650104e-06, "loss": 37.2404, "step": 9020 }, { "epoch": 214.78805970149253, "grad_norm": 30.9008731842041, "learning_rate": 9.798136645962734e-06, "loss": 38.2099, "step": 9021 }, { "epoch": 214.81194029850747, "grad_norm": 25.48306655883789, "learning_rate": 9.797101449275363e-06, "loss": 34.9577, "step": 9022 }, { "epoch": 214.83582089552237, "grad_norm": 31.529767990112305, "learning_rate": 9.796066252587993e-06, "loss": 36.741, "step": 9023 }, { "epoch": 214.8597014925373, "grad_norm": 28.165117263793945, "learning_rate": 9.795031055900622e-06, "loss": 37.6705, "step": 9024 }, { "epoch": 214.88358208955225, "grad_norm": 26.423799514770508, "learning_rate": 9.793995859213252e-06, "loss": 36.9861, "step": 9025 }, { "epoch": 214.90746268656716, "grad_norm": 23.710920333862305, "learning_rate": 9.792960662525881e-06, "loss": 37.4554, "step": 9026 }, { "epoch": 214.9313432835821, "grad_norm": 32.062286376953125, "learning_rate": 9.79192546583851e-06, "loss": 37.4132, "step": 9027 }, { "epoch": 214.955223880597, "grad_norm": 25.85164451599121, "learning_rate": 9.79089026915114e-06, "loss": 37.4271, "step": 9028 }, { "epoch": 214.97910447761194, "grad_norm": 28.508134841918945, "learning_rate": 9.789855072463768e-06, "loss": 37.9368, "step": 9029 }, { "epoch": 215.0, "grad_norm": 23.24784278869629, "learning_rate": 9.7888198757764e-06, "loss": 31.3253, "step": 9030 }, { "epoch": 215.02388059701494, "grad_norm": 26.680927276611328, "learning_rate": 9.787784679089027e-06, "loss": 37.5587, "step": 9031 }, { "epoch": 215.04776119402985, "grad_norm": 21.63987922668457, "learning_rate": 9.786749482401657e-06, "loss": 37.2078, "step": 9032 }, { "epoch": 215.07164179104478, "grad_norm": 28.85713768005371, "learning_rate": 9.785714285714286e-06, "loss": 37.9692, "step": 9033 }, { "epoch": 215.0955223880597, "grad_norm": 22.70249366760254, "learning_rate": 9.784679089026916e-06, "loss": 36.2635, "step": 9034 }, { "epoch": 215.11940298507463, "grad_norm": 31.306209564208984, "learning_rate": 9.783643892339545e-06, "loss": 36.2981, "step": 9035 }, { "epoch": 215.14328358208957, "grad_norm": 26.145404815673828, "learning_rate": 9.782608695652175e-06, "loss": 36.4388, "step": 9036 }, { "epoch": 215.16716417910447, "grad_norm": 23.636449813842773, "learning_rate": 9.781573498964804e-06, "loss": 36.396, "step": 9037 }, { "epoch": 215.1910447761194, "grad_norm": 22.753822326660156, "learning_rate": 9.780538302277434e-06, "loss": 36.2617, "step": 9038 }, { "epoch": 215.21492537313432, "grad_norm": 24.617338180541992, "learning_rate": 9.779503105590062e-06, "loss": 36.9093, "step": 9039 }, { "epoch": 215.23880597014926, "grad_norm": 19.743427276611328, "learning_rate": 9.778467908902693e-06, "loss": 36.941, "step": 9040 }, { "epoch": 215.26268656716417, "grad_norm": 27.8900146484375, "learning_rate": 9.77743271221532e-06, "loss": 36.108, "step": 9041 }, { "epoch": 215.2865671641791, "grad_norm": 21.104822158813477, "learning_rate": 9.776397515527952e-06, "loss": 37.6502, "step": 9042 }, { "epoch": 215.31044776119404, "grad_norm": 29.389020919799805, "learning_rate": 9.77536231884058e-06, "loss": 37.9045, "step": 9043 }, { "epoch": 215.33432835820895, "grad_norm": 25.60100555419922, "learning_rate": 9.77432712215321e-06, "loss": 37.3051, "step": 9044 }, { "epoch": 215.3582089552239, "grad_norm": 29.061010360717773, "learning_rate": 9.77329192546584e-06, "loss": 37.6249, "step": 9045 }, { "epoch": 215.3820895522388, "grad_norm": 24.267969131469727, "learning_rate": 9.772256728778468e-06, "loss": 36.8123, "step": 9046 }, { "epoch": 215.40597014925373, "grad_norm": 31.36228370666504, "learning_rate": 9.771221532091098e-06, "loss": 38.5689, "step": 9047 }, { "epoch": 215.42985074626867, "grad_norm": 21.266315460205078, "learning_rate": 9.770186335403727e-06, "loss": 36.8086, "step": 9048 }, { "epoch": 215.45373134328358, "grad_norm": 28.91936492919922, "learning_rate": 9.769151138716357e-06, "loss": 36.9067, "step": 9049 }, { "epoch": 215.47761194029852, "grad_norm": 21.60762596130371, "learning_rate": 9.768115942028986e-06, "loss": 37.4202, "step": 9050 }, { "epoch": 215.50149253731342, "grad_norm": 26.22998046875, "learning_rate": 9.767080745341616e-06, "loss": 35.7377, "step": 9051 }, { "epoch": 215.52537313432836, "grad_norm": 20.090837478637695, "learning_rate": 9.766045548654246e-06, "loss": 37.8684, "step": 9052 }, { "epoch": 215.54925373134327, "grad_norm": 31.179576873779297, "learning_rate": 9.765010351966875e-06, "loss": 36.2355, "step": 9053 }, { "epoch": 215.5731343283582, "grad_norm": 23.71304702758789, "learning_rate": 9.763975155279503e-06, "loss": 36.1069, "step": 9054 }, { "epoch": 215.59701492537314, "grad_norm": 32.945030212402344, "learning_rate": 9.762939958592134e-06, "loss": 37.4249, "step": 9055 }, { "epoch": 215.62089552238805, "grad_norm": 32.486446380615234, "learning_rate": 9.761904761904762e-06, "loss": 36.4778, "step": 9056 }, { "epoch": 215.644776119403, "grad_norm": 29.82095718383789, "learning_rate": 9.760869565217393e-06, "loss": 37.1965, "step": 9057 }, { "epoch": 215.6686567164179, "grad_norm": 26.731918334960938, "learning_rate": 9.759834368530021e-06, "loss": 37.4825, "step": 9058 }, { "epoch": 215.69253731343284, "grad_norm": 24.850849151611328, "learning_rate": 9.75879917184265e-06, "loss": 37.2026, "step": 9059 }, { "epoch": 215.71641791044777, "grad_norm": 19.47140884399414, "learning_rate": 9.75776397515528e-06, "loss": 36.0008, "step": 9060 }, { "epoch": 215.74029850746268, "grad_norm": 24.869354248046875, "learning_rate": 9.75672877846791e-06, "loss": 36.0339, "step": 9061 }, { "epoch": 215.76417910447762, "grad_norm": 19.912994384765625, "learning_rate": 9.755693581780539e-06, "loss": 37.9942, "step": 9062 }, { "epoch": 215.78805970149253, "grad_norm": 26.943859100341797, "learning_rate": 9.754658385093169e-06, "loss": 36.5328, "step": 9063 }, { "epoch": 215.81194029850747, "grad_norm": 21.20590591430664, "learning_rate": 9.753623188405798e-06, "loss": 36.6017, "step": 9064 }, { "epoch": 215.83582089552237, "grad_norm": 24.163738250732422, "learning_rate": 9.752587991718428e-06, "loss": 37.0538, "step": 9065 }, { "epoch": 215.8597014925373, "grad_norm": 20.0353946685791, "learning_rate": 9.751552795031056e-06, "loss": 36.0332, "step": 9066 }, { "epoch": 215.88358208955225, "grad_norm": 26.323434829711914, "learning_rate": 9.750517598343687e-06, "loss": 38.0896, "step": 9067 }, { "epoch": 215.90746268656716, "grad_norm": 18.738004684448242, "learning_rate": 9.749482401656315e-06, "loss": 36.6215, "step": 9068 }, { "epoch": 215.9313432835821, "grad_norm": 31.44061279296875, "learning_rate": 9.748447204968944e-06, "loss": 37.4861, "step": 9069 }, { "epoch": 215.955223880597, "grad_norm": 24.629789352416992, "learning_rate": 9.747412008281574e-06, "loss": 38.1235, "step": 9070 }, { "epoch": 215.97910447761194, "grad_norm": 21.770977020263672, "learning_rate": 9.746376811594203e-06, "loss": 36.6435, "step": 9071 }, { "epoch": 216.0, "grad_norm": 16.773563385009766, "learning_rate": 9.745341614906834e-06, "loss": 31.4797, "step": 9072 }, { "epoch": 216.02388059701494, "grad_norm": 23.379308700561523, "learning_rate": 9.744306418219462e-06, "loss": 36.9589, "step": 9073 }, { "epoch": 216.04776119402985, "grad_norm": 18.13448715209961, "learning_rate": 9.743271221532092e-06, "loss": 37.2716, "step": 9074 }, { "epoch": 216.07164179104478, "grad_norm": 20.16269302368164, "learning_rate": 9.742236024844721e-06, "loss": 37.6363, "step": 9075 }, { "epoch": 216.0955223880597, "grad_norm": 18.20665740966797, "learning_rate": 9.74120082815735e-06, "loss": 36.0095, "step": 9076 }, { "epoch": 216.11940298507463, "grad_norm": 19.75546646118164, "learning_rate": 9.74016563146998e-06, "loss": 36.5171, "step": 9077 }, { "epoch": 216.14328358208957, "grad_norm": 19.876527786254883, "learning_rate": 9.73913043478261e-06, "loss": 36.8232, "step": 9078 }, { "epoch": 216.16716417910447, "grad_norm": 15.993359565734863, "learning_rate": 9.73809523809524e-06, "loss": 36.62, "step": 9079 }, { "epoch": 216.1910447761194, "grad_norm": 21.49464225769043, "learning_rate": 9.737060041407869e-06, "loss": 37.1535, "step": 9080 }, { "epoch": 216.21492537313432, "grad_norm": 17.079620361328125, "learning_rate": 9.736024844720497e-06, "loss": 37.3859, "step": 9081 }, { "epoch": 216.23880597014926, "grad_norm": 16.47243881225586, "learning_rate": 9.734989648033128e-06, "loss": 36.8688, "step": 9082 }, { "epoch": 216.26268656716417, "grad_norm": 19.390705108642578, "learning_rate": 9.733954451345756e-06, "loss": 37.118, "step": 9083 }, { "epoch": 216.2865671641791, "grad_norm": 18.31181526184082, "learning_rate": 9.732919254658385e-06, "loss": 37.6145, "step": 9084 }, { "epoch": 216.31044776119404, "grad_norm": 15.150775909423828, "learning_rate": 9.731884057971015e-06, "loss": 35.758, "step": 9085 }, { "epoch": 216.33432835820895, "grad_norm": 20.944284439086914, "learning_rate": 9.730848861283644e-06, "loss": 36.9532, "step": 9086 }, { "epoch": 216.3582089552239, "grad_norm": 16.172500610351562, "learning_rate": 9.729813664596274e-06, "loss": 36.6033, "step": 9087 }, { "epoch": 216.3820895522388, "grad_norm": 21.084178924560547, "learning_rate": 9.728778467908903e-06, "loss": 36.6835, "step": 9088 }, { "epoch": 216.40597014925373, "grad_norm": 17.895221710205078, "learning_rate": 9.727743271221533e-06, "loss": 36.5752, "step": 9089 }, { "epoch": 216.42985074626867, "grad_norm": 21.309843063354492, "learning_rate": 9.726708074534163e-06, "loss": 37.3744, "step": 9090 }, { "epoch": 216.45373134328358, "grad_norm": 16.370155334472656, "learning_rate": 9.725672877846792e-06, "loss": 37.1859, "step": 9091 }, { "epoch": 216.47761194029852, "grad_norm": 21.996126174926758, "learning_rate": 9.724637681159422e-06, "loss": 35.2165, "step": 9092 }, { "epoch": 216.50149253731342, "grad_norm": 20.077713012695312, "learning_rate": 9.72360248447205e-06, "loss": 36.995, "step": 9093 }, { "epoch": 216.52537313432836, "grad_norm": 19.9365177154541, "learning_rate": 9.72256728778468e-06, "loss": 36.6285, "step": 9094 }, { "epoch": 216.54925373134327, "grad_norm": 23.95235252380371, "learning_rate": 9.721532091097308e-06, "loss": 36.7808, "step": 9095 }, { "epoch": 216.5731343283582, "grad_norm": 17.251880645751953, "learning_rate": 9.720496894409938e-06, "loss": 36.7247, "step": 9096 }, { "epoch": 216.59701492537314, "grad_norm": 26.106557846069336, "learning_rate": 9.719461697722568e-06, "loss": 37.7299, "step": 9097 }, { "epoch": 216.62089552238805, "grad_norm": 20.359811782836914, "learning_rate": 9.718426501035197e-06, "loss": 36.8477, "step": 9098 }, { "epoch": 216.644776119403, "grad_norm": 30.013118743896484, "learning_rate": 9.717391304347827e-06, "loss": 38.8294, "step": 9099 }, { "epoch": 216.6686567164179, "grad_norm": 21.708553314208984, "learning_rate": 9.716356107660456e-06, "loss": 37.0413, "step": 9100 }, { "epoch": 216.69253731343284, "grad_norm": 26.785297393798828, "learning_rate": 9.715320910973086e-06, "loss": 37.5232, "step": 9101 }, { "epoch": 216.71641791044777, "grad_norm": 25.817947387695312, "learning_rate": 9.714285714285715e-06, "loss": 36.6427, "step": 9102 }, { "epoch": 216.74029850746268, "grad_norm": 25.02275276184082, "learning_rate": 9.713250517598345e-06, "loss": 37.5533, "step": 9103 }, { "epoch": 216.76417910447762, "grad_norm": 23.612350463867188, "learning_rate": 9.712215320910974e-06, "loss": 36.6638, "step": 9104 }, { "epoch": 216.78805970149253, "grad_norm": 24.220951080322266, "learning_rate": 9.711180124223604e-06, "loss": 37.8171, "step": 9105 }, { "epoch": 216.81194029850747, "grad_norm": 23.3463191986084, "learning_rate": 9.710144927536233e-06, "loss": 36.1184, "step": 9106 }, { "epoch": 216.83582089552237, "grad_norm": 22.078628540039062, "learning_rate": 9.709109730848863e-06, "loss": 36.764, "step": 9107 }, { "epoch": 216.8597014925373, "grad_norm": 20.95184326171875, "learning_rate": 9.70807453416149e-06, "loss": 35.9814, "step": 9108 }, { "epoch": 216.88358208955225, "grad_norm": 22.184547424316406, "learning_rate": 9.707039337474122e-06, "loss": 37.7234, "step": 9109 }, { "epoch": 216.90746268656716, "grad_norm": 21.662275314331055, "learning_rate": 9.70600414078675e-06, "loss": 37.3595, "step": 9110 }, { "epoch": 216.9313432835821, "grad_norm": 20.41446304321289, "learning_rate": 9.70496894409938e-06, "loss": 37.1269, "step": 9111 }, { "epoch": 216.955223880597, "grad_norm": 19.57245445251465, "learning_rate": 9.703933747412009e-06, "loss": 36.0114, "step": 9112 }, { "epoch": 216.97910447761194, "grad_norm": 18.723487854003906, "learning_rate": 9.702898550724638e-06, "loss": 36.2825, "step": 9113 }, { "epoch": 217.0, "grad_norm": 16.511157989501953, "learning_rate": 9.701863354037268e-06, "loss": 33.4808, "step": 9114 }, { "epoch": 217.02388059701494, "grad_norm": 18.5831298828125, "learning_rate": 9.700828157349897e-06, "loss": 36.5857, "step": 9115 }, { "epoch": 217.04776119402985, "grad_norm": 20.021087646484375, "learning_rate": 9.699792960662527e-06, "loss": 36.9092, "step": 9116 }, { "epoch": 217.07164179104478, "grad_norm": 15.198619842529297, "learning_rate": 9.698757763975156e-06, "loss": 35.8288, "step": 9117 }, { "epoch": 217.0955223880597, "grad_norm": 19.33254623413086, "learning_rate": 9.697722567287784e-06, "loss": 36.4575, "step": 9118 }, { "epoch": 217.11940298507463, "grad_norm": 15.601790428161621, "learning_rate": 9.696687370600415e-06, "loss": 37.7686, "step": 9119 }, { "epoch": 217.14328358208957, "grad_norm": NaN, "learning_rate": 9.695652173913043e-06, "loss": 65.063, "step": 9120 }, { "epoch": 217.16716417910447, "grad_norm": 22.725147247314453, "learning_rate": 9.695652173913043e-06, "loss": 36.1623, "step": 9121 }, { "epoch": 217.1910447761194, "grad_norm": 18.391794204711914, "learning_rate": 9.694616977225675e-06, "loss": 36.8899, "step": 9122 }, { "epoch": 217.21492537313432, "grad_norm": 18.53925895690918, "learning_rate": 9.693581780538302e-06, "loss": 37.4847, "step": 9123 }, { "epoch": 217.23880597014926, "grad_norm": 19.029287338256836, "learning_rate": 9.692546583850932e-06, "loss": 36.9624, "step": 9124 }, { "epoch": 217.26268656716417, "grad_norm": 16.01725959777832, "learning_rate": 9.691511387163561e-06, "loss": 36.1798, "step": 9125 }, { "epoch": 217.2865671641791, "grad_norm": 21.95234489440918, "learning_rate": 9.690476190476191e-06, "loss": 36.9395, "step": 9126 }, { "epoch": 217.31044776119404, "grad_norm": 17.638107299804688, "learning_rate": 9.68944099378882e-06, "loss": 36.2656, "step": 9127 }, { "epoch": 217.33432835820895, "grad_norm": 19.56740379333496, "learning_rate": 9.68840579710145e-06, "loss": 35.8209, "step": 9128 }, { "epoch": 217.3582089552239, "grad_norm": 16.121322631835938, "learning_rate": 9.68737060041408e-06, "loss": 37.7785, "step": 9129 }, { "epoch": 217.3820895522388, "grad_norm": 19.964323043823242, "learning_rate": 9.686335403726709e-06, "loss": 36.8468, "step": 9130 }, { "epoch": 217.40597014925373, "grad_norm": 14.685638427734375, "learning_rate": 9.685300207039339e-06, "loss": 37.3379, "step": 9131 }, { "epoch": 217.42985074626867, "grad_norm": 18.589954376220703, "learning_rate": 9.684265010351968e-06, "loss": 37.4971, "step": 9132 }, { "epoch": 217.45373134328358, "grad_norm": 15.052332878112793, "learning_rate": 9.683229813664598e-06, "loss": 36.1428, "step": 9133 }, { "epoch": 217.47761194029852, "grad_norm": 15.699575424194336, "learning_rate": 9.682194616977225e-06, "loss": 38.2258, "step": 9134 }, { "epoch": 217.50149253731342, "grad_norm": 20.121877670288086, "learning_rate": 9.681159420289857e-06, "loss": 37.3783, "step": 9135 }, { "epoch": 217.52537313432836, "grad_norm": 14.712428092956543, "learning_rate": 9.680124223602485e-06, "loss": 37.149, "step": 9136 }, { "epoch": 217.54925373134327, "grad_norm": 16.807716369628906, "learning_rate": 9.679089026915116e-06, "loss": 37.1475, "step": 9137 }, { "epoch": 217.5731343283582, "grad_norm": 17.029197692871094, "learning_rate": 9.678053830227744e-06, "loss": 37.0724, "step": 9138 }, { "epoch": 217.59701492537314, "grad_norm": 15.771768569946289, "learning_rate": 9.677018633540373e-06, "loss": 37.7616, "step": 9139 }, { "epoch": 217.62089552238805, "grad_norm": 17.461040496826172, "learning_rate": 9.675983436853003e-06, "loss": 36.4125, "step": 9140 }, { "epoch": 217.644776119403, "grad_norm": 15.030492782592773, "learning_rate": 9.674948240165632e-06, "loss": 36.2567, "step": 9141 }, { "epoch": 217.6686567164179, "grad_norm": 17.556913375854492, "learning_rate": 9.673913043478262e-06, "loss": 35.6493, "step": 9142 }, { "epoch": 217.69253731343284, "grad_norm": 15.683633804321289, "learning_rate": 9.672877846790891e-06, "loss": 36.5538, "step": 9143 }, { "epoch": 217.71641791044777, "grad_norm": 19.198259353637695, "learning_rate": 9.67184265010352e-06, "loss": 37.0452, "step": 9144 }, { "epoch": 217.74029850746268, "grad_norm": 19.338232040405273, "learning_rate": 9.67080745341615e-06, "loss": 37.4315, "step": 9145 }, { "epoch": 217.76417910447762, "grad_norm": 21.04673194885254, "learning_rate": 9.669772256728778e-06, "loss": 37.6606, "step": 9146 }, { "epoch": 217.78805970149253, "grad_norm": 15.2819242477417, "learning_rate": 9.66873706004141e-06, "loss": 35.3826, "step": 9147 }, { "epoch": 217.81194029850747, "grad_norm": 18.835935592651367, "learning_rate": 9.667701863354037e-06, "loss": 37.4417, "step": 9148 }, { "epoch": 217.83582089552237, "grad_norm": 19.404373168945312, "learning_rate": 9.666666666666667e-06, "loss": 35.8734, "step": 9149 }, { "epoch": 217.8597014925373, "grad_norm": 15.097556114196777, "learning_rate": 9.665631469979296e-06, "loss": 37.443, "step": 9150 }, { "epoch": 217.88358208955225, "grad_norm": 19.009340286254883, "learning_rate": 9.664596273291926e-06, "loss": 37.6373, "step": 9151 }, { "epoch": 217.90746268656716, "grad_norm": 20.086925506591797, "learning_rate": 9.663561076604555e-06, "loss": 36.842, "step": 9152 }, { "epoch": 217.9313432835821, "grad_norm": 13.27296257019043, "learning_rate": 9.662525879917185e-06, "loss": 37.3396, "step": 9153 }, { "epoch": 217.955223880597, "grad_norm": 17.1934814453125, "learning_rate": 9.661490683229814e-06, "loss": 37.7651, "step": 9154 }, { "epoch": 217.97910447761194, "grad_norm": 20.468364715576172, "learning_rate": 9.660455486542444e-06, "loss": 35.9981, "step": 9155 }, { "epoch": 218.0, "grad_norm": 15.743645668029785, "learning_rate": 9.659420289855073e-06, "loss": 32.9107, "step": 9156 }, { "epoch": 218.02388059701494, "grad_norm": 13.317445755004883, "learning_rate": 9.658385093167703e-06, "loss": 34.8715, "step": 9157 }, { "epoch": 218.04776119402985, "grad_norm": 16.343395233154297, "learning_rate": 9.657349896480332e-06, "loss": 36.6059, "step": 9158 }, { "epoch": 218.07164179104478, "grad_norm": 14.639159202575684, "learning_rate": 9.656314699792962e-06, "loss": 37.2106, "step": 9159 }, { "epoch": 218.0955223880597, "grad_norm": 14.244100570678711, "learning_rate": 9.655279503105592e-06, "loss": 37.2165, "step": 9160 }, { "epoch": 218.11940298507463, "grad_norm": 16.957246780395508, "learning_rate": 9.65424430641822e-06, "loss": 36.6862, "step": 9161 }, { "epoch": 218.14328358208957, "grad_norm": 12.883386611938477, "learning_rate": 9.65320910973085e-06, "loss": 37.553, "step": 9162 }, { "epoch": 218.16716417910447, "grad_norm": 20.242103576660156, "learning_rate": 9.652173913043478e-06, "loss": 36.5785, "step": 9163 }, { "epoch": 218.1910447761194, "grad_norm": 16.585161209106445, "learning_rate": 9.651138716356108e-06, "loss": 38.3141, "step": 9164 }, { "epoch": 218.21492537313432, "grad_norm": 20.26701545715332, "learning_rate": 9.650103519668737e-06, "loss": 35.7752, "step": 9165 }, { "epoch": 218.23880597014926, "grad_norm": 19.144397735595703, "learning_rate": 9.649068322981367e-06, "loss": 36.6697, "step": 9166 }, { "epoch": 218.26268656716417, "grad_norm": 19.420429229736328, "learning_rate": 9.648033126293997e-06, "loss": 36.0501, "step": 9167 }, { "epoch": 218.2865671641791, "grad_norm": 17.626617431640625, "learning_rate": 9.646997929606626e-06, "loss": 36.0203, "step": 9168 }, { "epoch": 218.31044776119404, "grad_norm": 18.067670822143555, "learning_rate": 9.645962732919256e-06, "loss": 37.7678, "step": 9169 }, { "epoch": 218.33432835820895, "grad_norm": 16.033538818359375, "learning_rate": 9.644927536231885e-06, "loss": 36.5795, "step": 9170 }, { "epoch": 218.3582089552239, "grad_norm": 17.337013244628906, "learning_rate": 9.643892339544515e-06, "loss": 37.7038, "step": 9171 }, { "epoch": 218.3820895522388, "grad_norm": 15.980463027954102, "learning_rate": 9.642857142857144e-06, "loss": 37.8133, "step": 9172 }, { "epoch": 218.40597014925373, "grad_norm": 18.340688705444336, "learning_rate": 9.641821946169772e-06, "loss": 36.7869, "step": 9173 }, { "epoch": 218.42985074626867, "grad_norm": 19.210174560546875, "learning_rate": 9.640786749482403e-06, "loss": 37.0788, "step": 9174 }, { "epoch": 218.45373134328358, "grad_norm": 19.932706832885742, "learning_rate": 9.639751552795031e-06, "loss": 36.5435, "step": 9175 }, { "epoch": 218.47761194029852, "grad_norm": 13.324295997619629, "learning_rate": 9.63871635610766e-06, "loss": 35.4878, "step": 9176 }, { "epoch": 218.50149253731342, "grad_norm": 18.737878799438477, "learning_rate": 9.63768115942029e-06, "loss": 36.5564, "step": 9177 }, { "epoch": 218.52537313432836, "grad_norm": 14.989187240600586, "learning_rate": 9.63664596273292e-06, "loss": 36.5208, "step": 9178 }, { "epoch": 218.54925373134327, "grad_norm": 20.792604446411133, "learning_rate": 9.63561076604555e-06, "loss": 35.9787, "step": 9179 }, { "epoch": 218.5731343283582, "grad_norm": 16.712087631225586, "learning_rate": 9.634575569358179e-06, "loss": 35.9913, "step": 9180 }, { "epoch": 218.59701492537314, "grad_norm": 19.96718406677246, "learning_rate": 9.633540372670808e-06, "loss": 38.5806, "step": 9181 }, { "epoch": 218.62089552238805, "grad_norm": 17.18105697631836, "learning_rate": 9.632505175983438e-06, "loss": 36.9286, "step": 9182 }, { "epoch": 218.644776119403, "grad_norm": 17.91598892211914, "learning_rate": 9.631469979296067e-06, "loss": 36.6085, "step": 9183 }, { "epoch": 218.6686567164179, "grad_norm": 17.956539154052734, "learning_rate": 9.630434782608697e-06, "loss": 36.1301, "step": 9184 }, { "epoch": 218.69253731343284, "grad_norm": 19.377994537353516, "learning_rate": 9.629399585921326e-06, "loss": 36.9346, "step": 9185 }, { "epoch": 218.71641791044777, "grad_norm": 18.69805145263672, "learning_rate": 9.628364389233956e-06, "loss": 36.7145, "step": 9186 }, { "epoch": 218.74029850746268, "grad_norm": 17.211429595947266, "learning_rate": 9.627329192546585e-06, "loss": 38.0607, "step": 9187 }, { "epoch": 218.76417910447762, "grad_norm": 18.123088836669922, "learning_rate": 9.626293995859213e-06, "loss": 36.0628, "step": 9188 }, { "epoch": 218.78805970149253, "grad_norm": NaN, "learning_rate": 9.625258799171844e-06, "loss": 52.3213, "step": 9189 }, { "epoch": 218.81194029850747, "grad_norm": 15.918928146362305, "learning_rate": 9.625258799171844e-06, "loss": 37.8708, "step": 9190 }, { "epoch": 218.83582089552237, "grad_norm": 19.9727840423584, "learning_rate": 9.624223602484472e-06, "loss": 36.9569, "step": 9191 }, { "epoch": 218.8597014925373, "grad_norm": 18.443010330200195, "learning_rate": 9.623188405797102e-06, "loss": 37.2799, "step": 9192 }, { "epoch": 218.88358208955225, "grad_norm": 18.08015251159668, "learning_rate": 9.622153209109731e-06, "loss": 36.4251, "step": 9193 }, { "epoch": 218.90746268656716, "grad_norm": 15.19926643371582, "learning_rate": 9.621118012422361e-06, "loss": 38.557, "step": 9194 }, { "epoch": 218.9313432835821, "grad_norm": 14.689451217651367, "learning_rate": 9.62008281573499e-06, "loss": 37.5389, "step": 9195 }, { "epoch": 218.955223880597, "grad_norm": 16.193313598632812, "learning_rate": 9.61904761904762e-06, "loss": 35.9918, "step": 9196 }, { "epoch": 218.97910447761194, "grad_norm": 13.192307472229004, "learning_rate": 9.61801242236025e-06, "loss": 35.8413, "step": 9197 }, { "epoch": 219.0, "grad_norm": 13.88156509399414, "learning_rate": 9.616977225672879e-06, "loss": 32.3184, "step": 9198 }, { "epoch": 219.02388059701494, "grad_norm": 14.823074340820312, "learning_rate": 9.615942028985507e-06, "loss": 35.9319, "step": 9199 }, { "epoch": 219.04776119402985, "grad_norm": 17.479848861694336, "learning_rate": 9.614906832298138e-06, "loss": 35.9434, "step": 9200 }, { "epoch": 219.07164179104478, "grad_norm": 15.585541725158691, "learning_rate": 9.613871635610766e-06, "loss": 36.0488, "step": 9201 }, { "epoch": 219.0955223880597, "grad_norm": 18.069622039794922, "learning_rate": 9.612836438923397e-06, "loss": 36.7673, "step": 9202 }, { "epoch": 219.11940298507463, "grad_norm": 23.847604751586914, "learning_rate": 9.611801242236025e-06, "loss": 37.5001, "step": 9203 }, { "epoch": 219.14328358208957, "grad_norm": 14.445686340332031, "learning_rate": 9.610766045548654e-06, "loss": 37.0008, "step": 9204 }, { "epoch": 219.16716417910447, "grad_norm": 19.418006896972656, "learning_rate": 9.609730848861284e-06, "loss": 37.5214, "step": 9205 }, { "epoch": 219.1910447761194, "grad_norm": 19.122482299804688, "learning_rate": 9.608695652173914e-06, "loss": 36.9843, "step": 9206 }, { "epoch": 219.21492537313432, "grad_norm": 13.655843734741211, "learning_rate": 9.607660455486543e-06, "loss": 35.9457, "step": 9207 }, { "epoch": 219.23880597014926, "grad_norm": 17.065208435058594, "learning_rate": 9.606625258799173e-06, "loss": 37.2418, "step": 9208 }, { "epoch": 219.26268656716417, "grad_norm": 13.323447227478027, "learning_rate": 9.605590062111802e-06, "loss": 37.8345, "step": 9209 }, { "epoch": 219.2865671641791, "grad_norm": 17.5530948638916, "learning_rate": 9.604554865424432e-06, "loss": 36.3977, "step": 9210 }, { "epoch": 219.31044776119404, "grad_norm": 16.106706619262695, "learning_rate": 9.603519668737061e-06, "loss": 36.4268, "step": 9211 }, { "epoch": 219.33432835820895, "grad_norm": 13.573047637939453, "learning_rate": 9.60248447204969e-06, "loss": 35.8055, "step": 9212 }, { "epoch": 219.3582089552239, "grad_norm": 16.16801643371582, "learning_rate": 9.60144927536232e-06, "loss": 36.2473, "step": 9213 }, { "epoch": 219.3820895522388, "grad_norm": 14.158682823181152, "learning_rate": 9.600414078674948e-06, "loss": 36.0948, "step": 9214 }, { "epoch": 219.40597014925373, "grad_norm": 12.716529846191406, "learning_rate": 9.59937888198758e-06, "loss": 36.4154, "step": 9215 }, { "epoch": 219.42985074626867, "grad_norm": 14.64181900024414, "learning_rate": 9.598343685300207e-06, "loss": 37.5303, "step": 9216 }, { "epoch": 219.45373134328358, "grad_norm": NaN, "learning_rate": 9.597308488612838e-06, "loss": 41.3461, "step": 9217 }, { "epoch": 219.47761194029852, "grad_norm": 12.889041900634766, "learning_rate": 9.597308488612838e-06, "loss": 36.4206, "step": 9218 }, { "epoch": 219.50149253731342, "grad_norm": 15.781641006469727, "learning_rate": 9.596273291925466e-06, "loss": 37.3701, "step": 9219 }, { "epoch": 219.52537313432836, "grad_norm": 14.214648246765137, "learning_rate": 9.595238095238096e-06, "loss": 36.8136, "step": 9220 }, { "epoch": 219.54925373134327, "grad_norm": 16.5802001953125, "learning_rate": 9.594202898550725e-06, "loss": 36.9532, "step": 9221 }, { "epoch": 219.5731343283582, "grad_norm": 13.49622917175293, "learning_rate": 9.593167701863355e-06, "loss": 36.7924, "step": 9222 }, { "epoch": 219.59701492537314, "grad_norm": 15.241145133972168, "learning_rate": 9.592132505175984e-06, "loss": 37.2905, "step": 9223 }, { "epoch": 219.62089552238805, "grad_norm": 14.371820449829102, "learning_rate": 9.591097308488614e-06, "loss": 37.6425, "step": 9224 }, { "epoch": 219.644776119403, "grad_norm": 16.55687141418457, "learning_rate": 9.590062111801243e-06, "loss": 37.3197, "step": 9225 }, { "epoch": 219.6686567164179, "grad_norm": 18.263837814331055, "learning_rate": 9.589026915113873e-06, "loss": 36.9441, "step": 9226 }, { "epoch": 219.69253731343284, "grad_norm": 16.113059997558594, "learning_rate": 9.5879917184265e-06, "loss": 36.8065, "step": 9227 }, { "epoch": 219.71641791044777, "grad_norm": 17.373727798461914, "learning_rate": 9.586956521739132e-06, "loss": 36.2574, "step": 9228 }, { "epoch": 219.74029850746268, "grad_norm": 16.516822814941406, "learning_rate": 9.58592132505176e-06, "loss": 35.8442, "step": 9229 }, { "epoch": 219.76417910447762, "grad_norm": 16.880693435668945, "learning_rate": 9.58488612836439e-06, "loss": 37.4098, "step": 9230 }, { "epoch": 219.78805970149253, "grad_norm": 14.671090126037598, "learning_rate": 9.583850931677019e-06, "loss": 38.1314, "step": 9231 }, { "epoch": 219.81194029850747, "grad_norm": 14.153200149536133, "learning_rate": 9.582815734989648e-06, "loss": 36.2577, "step": 9232 }, { "epoch": 219.83582089552237, "grad_norm": 16.481653213500977, "learning_rate": 9.581780538302278e-06, "loss": 37.1363, "step": 9233 }, { "epoch": 219.8597014925373, "grad_norm": 12.075387954711914, "learning_rate": 9.580745341614907e-06, "loss": 37.0842, "step": 9234 }, { "epoch": 219.88358208955225, "grad_norm": 17.5874080657959, "learning_rate": 9.579710144927537e-06, "loss": 36.651, "step": 9235 }, { "epoch": 219.90746268656716, "grad_norm": 13.357857704162598, "learning_rate": 9.578674948240167e-06, "loss": 36.7274, "step": 9236 }, { "epoch": 219.9313432835821, "grad_norm": 17.825353622436523, "learning_rate": 9.577639751552796e-06, "loss": 36.7724, "step": 9237 }, { "epoch": 219.955223880597, "grad_norm": 14.304804801940918, "learning_rate": 9.576604554865426e-06, "loss": 36.8395, "step": 9238 }, { "epoch": 219.97910447761194, "grad_norm": 15.323370933532715, "learning_rate": 9.575569358178055e-06, "loss": 36.6138, "step": 9239 }, { "epoch": 220.0, "grad_norm": 18.964876174926758, "learning_rate": 9.574534161490685e-06, "loss": 32.3095, "step": 9240 }, { "epoch": 220.02388059701494, "grad_norm": 17.353851318359375, "learning_rate": 9.573498964803314e-06, "loss": 36.4195, "step": 9241 }, { "epoch": 220.04776119402985, "grad_norm": 15.174515724182129, "learning_rate": 9.572463768115942e-06, "loss": 36.7379, "step": 9242 }, { "epoch": 220.07164179104478, "grad_norm": 17.7712459564209, "learning_rate": 9.571428571428573e-06, "loss": 36.1797, "step": 9243 }, { "epoch": 220.0955223880597, "grad_norm": 15.105401039123535, "learning_rate": 9.570393374741201e-06, "loss": 36.7952, "step": 9244 }, { "epoch": 220.11940298507463, "grad_norm": 19.85262680053711, "learning_rate": 9.56935817805383e-06, "loss": 37.2235, "step": 9245 }, { "epoch": 220.14328358208957, "grad_norm": 21.23208236694336, "learning_rate": 9.56832298136646e-06, "loss": 36.532, "step": 9246 }, { "epoch": 220.16716417910447, "grad_norm": 16.253814697265625, "learning_rate": 9.56728778467909e-06, "loss": 36.9258, "step": 9247 }, { "epoch": 220.1910447761194, "grad_norm": 20.50847625732422, "learning_rate": 9.56625258799172e-06, "loss": 36.7899, "step": 9248 }, { "epoch": 220.21492537313432, "grad_norm": 15.544790267944336, "learning_rate": 9.565217391304349e-06, "loss": 37.627, "step": 9249 }, { "epoch": 220.23880597014926, "grad_norm": 15.568458557128906, "learning_rate": 9.564182194616978e-06, "loss": 37.1097, "step": 9250 }, { "epoch": 220.26268656716417, "grad_norm": 13.180426597595215, "learning_rate": 9.563146997929608e-06, "loss": 37.0846, "step": 9251 }, { "epoch": 220.2865671641791, "grad_norm": 14.081692695617676, "learning_rate": 9.562111801242237e-06, "loss": 36.5627, "step": 9252 }, { "epoch": 220.31044776119404, "grad_norm": 16.696626663208008, "learning_rate": 9.561076604554867e-06, "loss": 35.5503, "step": 9253 }, { "epoch": 220.33432835820895, "grad_norm": 22.202970504760742, "learning_rate": 9.560041407867495e-06, "loss": 35.8487, "step": 9254 }, { "epoch": 220.3582089552239, "grad_norm": 13.861478805541992, "learning_rate": 9.559006211180126e-06, "loss": 37.0773, "step": 9255 }, { "epoch": 220.3820895522388, "grad_norm": 23.343826293945312, "learning_rate": 9.557971014492754e-06, "loss": 37.297, "step": 9256 }, { "epoch": 220.40597014925373, "grad_norm": 19.788761138916016, "learning_rate": 9.556935817805383e-06, "loss": 36.5872, "step": 9257 }, { "epoch": 220.42985074626867, "grad_norm": 14.769845008850098, "learning_rate": 9.555900621118013e-06, "loss": 37.4659, "step": 9258 }, { "epoch": 220.45373134328358, "grad_norm": 25.088499069213867, "learning_rate": 9.554865424430642e-06, "loss": 36.4183, "step": 9259 }, { "epoch": 220.47761194029852, "grad_norm": 19.144893646240234, "learning_rate": 9.553830227743272e-06, "loss": 36.895, "step": 9260 }, { "epoch": 220.50149253731342, "grad_norm": 16.255157470703125, "learning_rate": 9.552795031055901e-06, "loss": 36.1986, "step": 9261 }, { "epoch": 220.52537313432836, "grad_norm": 30.13691520690918, "learning_rate": 9.551759834368531e-06, "loss": 36.4725, "step": 9262 }, { "epoch": 220.54925373134327, "grad_norm": 16.886524200439453, "learning_rate": 9.55072463768116e-06, "loss": 35.1262, "step": 9263 }, { "epoch": 220.5731343283582, "grad_norm": 40.92776107788086, "learning_rate": 9.54968944099379e-06, "loss": 36.4841, "step": 9264 }, { "epoch": 220.59701492537314, "grad_norm": 29.202791213989258, "learning_rate": 9.54865424430642e-06, "loss": 37.9052, "step": 9265 }, { "epoch": 220.62089552238805, "grad_norm": 39.013118743896484, "learning_rate": 9.547619047619049e-06, "loss": 36.8797, "step": 9266 }, { "epoch": 220.644776119403, "grad_norm": 37.325225830078125, "learning_rate": 9.546583850931679e-06, "loss": 36.3243, "step": 9267 }, { "epoch": 220.6686567164179, "grad_norm": 24.062450408935547, "learning_rate": 9.545548654244308e-06, "loss": 37.3711, "step": 9268 }, { "epoch": 220.69253731343284, "grad_norm": 25.628732681274414, "learning_rate": 9.544513457556936e-06, "loss": 36.9699, "step": 9269 }, { "epoch": 220.71641791044777, "grad_norm": 31.478517532348633, "learning_rate": 9.543478260869567e-06, "loss": 36.9297, "step": 9270 }, { "epoch": 220.74029850746268, "grad_norm": 22.91040802001953, "learning_rate": 9.542443064182195e-06, "loss": 36.687, "step": 9271 }, { "epoch": 220.76417910447762, "grad_norm": 36.85102462768555, "learning_rate": 9.541407867494824e-06, "loss": 36.3917, "step": 9272 }, { "epoch": 220.78805970149253, "grad_norm": 33.397098541259766, "learning_rate": 9.540372670807454e-06, "loss": 36.1614, "step": 9273 }, { "epoch": 220.81194029850747, "grad_norm": 28.137874603271484, "learning_rate": 9.539337474120084e-06, "loss": 37.2919, "step": 9274 }, { "epoch": 220.83582089552237, "grad_norm": 26.61577796936035, "learning_rate": 9.538302277432713e-06, "loss": 36.4024, "step": 9275 }, { "epoch": 220.8597014925373, "grad_norm": 27.809036254882812, "learning_rate": 9.537267080745343e-06, "loss": 36.6629, "step": 9276 }, { "epoch": 220.88358208955225, "grad_norm": 25.116004943847656, "learning_rate": 9.536231884057972e-06, "loss": 36.7077, "step": 9277 }, { "epoch": 220.90746268656716, "grad_norm": 32.13947677612305, "learning_rate": 9.535196687370602e-06, "loss": 37.3892, "step": 9278 }, { "epoch": 220.9313432835821, "grad_norm": 28.722021102905273, "learning_rate": 9.53416149068323e-06, "loss": 38.0002, "step": 9279 }, { "epoch": 220.955223880597, "grad_norm": 29.87248992919922, "learning_rate": 9.53312629399586e-06, "loss": 37.0684, "step": 9280 }, { "epoch": 220.97910447761194, "grad_norm": 25.427030563354492, "learning_rate": 9.532091097308489e-06, "loss": 36.559, "step": 9281 }, { "epoch": 221.0, "grad_norm": 28.555543899536133, "learning_rate": 9.53105590062112e-06, "loss": 31.9309, "step": 9282 }, { "epoch": 221.02388059701494, "grad_norm": 28.924266815185547, "learning_rate": 9.530020703933748e-06, "loss": 36.5134, "step": 9283 }, { "epoch": 221.04776119402985, "grad_norm": 27.023439407348633, "learning_rate": 9.528985507246377e-06, "loss": 36.8401, "step": 9284 }, { "epoch": 221.07164179104478, "grad_norm": 25.894798278808594, "learning_rate": 9.527950310559007e-06, "loss": 36.8202, "step": 9285 }, { "epoch": 221.0955223880597, "grad_norm": 31.44702911376953, "learning_rate": 9.526915113871636e-06, "loss": 36.7885, "step": 9286 }, { "epoch": 221.11940298507463, "grad_norm": 24.80626678466797, "learning_rate": 9.525879917184266e-06, "loss": 38.347, "step": 9287 }, { "epoch": 221.14328358208957, "grad_norm": 31.647432327270508, "learning_rate": 9.524844720496895e-06, "loss": 36.9638, "step": 9288 }, { "epoch": 221.16716417910447, "grad_norm": 27.200225830078125, "learning_rate": 9.523809523809525e-06, "loss": 37.1524, "step": 9289 }, { "epoch": 221.1910447761194, "grad_norm": 28.28724479675293, "learning_rate": 9.522774327122154e-06, "loss": 36.547, "step": 9290 }, { "epoch": 221.21492537313432, "grad_norm": 28.738943099975586, "learning_rate": 9.521739130434784e-06, "loss": 36.8349, "step": 9291 }, { "epoch": 221.23880597014926, "grad_norm": 29.01047134399414, "learning_rate": 9.520703933747413e-06, "loss": 35.6371, "step": 9292 }, { "epoch": 221.26268656716417, "grad_norm": 25.44609260559082, "learning_rate": 9.519668737060043e-06, "loss": 37.8431, "step": 9293 }, { "epoch": 221.2865671641791, "grad_norm": 30.137645721435547, "learning_rate": 9.51863354037267e-06, "loss": 37.2189, "step": 9294 }, { "epoch": 221.31044776119404, "grad_norm": 26.297382354736328, "learning_rate": 9.517598343685302e-06, "loss": 35.8506, "step": 9295 }, { "epoch": 221.33432835820895, "grad_norm": 30.275667190551758, "learning_rate": 9.51656314699793e-06, "loss": 36.866, "step": 9296 }, { "epoch": 221.3582089552239, "grad_norm": 27.415193557739258, "learning_rate": 9.515527950310561e-06, "loss": 35.4461, "step": 9297 }, { "epoch": 221.3820895522388, "grad_norm": 30.012296676635742, "learning_rate": 9.514492753623189e-06, "loss": 36.361, "step": 9298 }, { "epoch": 221.40597014925373, "grad_norm": 25.366943359375, "learning_rate": 9.513457556935818e-06, "loss": 35.9706, "step": 9299 }, { "epoch": 221.42985074626867, "grad_norm": 28.098432540893555, "learning_rate": 9.512422360248448e-06, "loss": 36.8511, "step": 9300 }, { "epoch": 221.45373134328358, "grad_norm": 27.954959869384766, "learning_rate": 9.511387163561077e-06, "loss": 37.4078, "step": 9301 }, { "epoch": 221.47761194029852, "grad_norm": 30.06001091003418, "learning_rate": 9.510351966873707e-06, "loss": 37.7406, "step": 9302 }, { "epoch": 221.50149253731342, "grad_norm": 25.211225509643555, "learning_rate": 9.509316770186336e-06, "loss": 37.0598, "step": 9303 }, { "epoch": 221.52537313432836, "grad_norm": 28.45794105529785, "learning_rate": 9.508281573498966e-06, "loss": 36.447, "step": 9304 }, { "epoch": 221.54925373134327, "grad_norm": 26.013484954833984, "learning_rate": 9.507246376811596e-06, "loss": 36.8887, "step": 9305 }, { "epoch": 221.5731343283582, "grad_norm": 28.581687927246094, "learning_rate": 9.506211180124223e-06, "loss": 35.7594, "step": 9306 }, { "epoch": 221.59701492537314, "grad_norm": 25.98162078857422, "learning_rate": 9.505175983436855e-06, "loss": 36.6488, "step": 9307 }, { "epoch": 221.62089552238805, "grad_norm": 29.857873916625977, "learning_rate": 9.504140786749482e-06, "loss": 36.5665, "step": 9308 }, { "epoch": 221.644776119403, "grad_norm": 25.796464920043945, "learning_rate": 9.503105590062112e-06, "loss": 37.4074, "step": 9309 }, { "epoch": 221.6686567164179, "grad_norm": 29.17536735534668, "learning_rate": 9.502070393374741e-06, "loss": 36.9913, "step": 9310 }, { "epoch": 221.69253731343284, "grad_norm": 28.136606216430664, "learning_rate": 9.501035196687371e-06, "loss": 35.9862, "step": 9311 }, { "epoch": 221.71641791044777, "grad_norm": 30.56552505493164, "learning_rate": 9.5e-06, "loss": 35.5431, "step": 9312 }, { "epoch": 221.74029850746268, "grad_norm": 24.548416137695312, "learning_rate": 9.49896480331263e-06, "loss": 36.7893, "step": 9313 }, { "epoch": 221.76417910447762, "grad_norm": 30.45704460144043, "learning_rate": 9.49792960662526e-06, "loss": 37.2092, "step": 9314 }, { "epoch": 221.78805970149253, "grad_norm": NaN, "learning_rate": 9.496894409937889e-06, "loss": 59.5099, "step": 9315 }, { "epoch": 221.81194029850747, "grad_norm": 25.80453109741211, "learning_rate": 9.496894409937889e-06, "loss": 36.6871, "step": 9316 }, { "epoch": 221.83582089552237, "grad_norm": 28.29366683959961, "learning_rate": 9.495859213250519e-06, "loss": 37.6914, "step": 9317 }, { "epoch": 221.8597014925373, "grad_norm": 27.410236358642578, "learning_rate": 9.494824016563148e-06, "loss": 36.4577, "step": 9318 }, { "epoch": 221.88358208955225, "grad_norm": 29.258834838867188, "learning_rate": 9.493788819875778e-06, "loss": 36.2599, "step": 9319 }, { "epoch": 221.90746268656716, "grad_norm": 27.47816276550293, "learning_rate": 9.492753623188407e-06, "loss": 35.9362, "step": 9320 }, { "epoch": 221.9313432835821, "grad_norm": 32.89609146118164, "learning_rate": 9.491718426501037e-06, "loss": 37.7435, "step": 9321 }, { "epoch": 221.955223880597, "grad_norm": 28.55097198486328, "learning_rate": 9.490683229813665e-06, "loss": 36.0103, "step": 9322 }, { "epoch": 221.97910447761194, "grad_norm": 23.855192184448242, "learning_rate": 9.489648033126296e-06, "loss": 36.576, "step": 9323 }, { "epoch": 222.0, "grad_norm": 21.09449577331543, "learning_rate": 9.488612836438924e-06, "loss": 31.4365, "step": 9324 }, { "epoch": 222.02388059701494, "grad_norm": 32.309261322021484, "learning_rate": 9.487577639751553e-06, "loss": 37.0288, "step": 9325 }, { "epoch": 222.04776119402985, "grad_norm": 26.1121768951416, "learning_rate": 9.486542443064183e-06, "loss": 36.8737, "step": 9326 }, { "epoch": 222.07164179104478, "grad_norm": 34.31647491455078, "learning_rate": 9.485507246376812e-06, "loss": 35.9067, "step": 9327 }, { "epoch": 222.0955223880597, "grad_norm": 32.59259033203125, "learning_rate": 9.484472049689442e-06, "loss": 36.4487, "step": 9328 }, { "epoch": 222.11940298507463, "grad_norm": 22.975914001464844, "learning_rate": 9.483436853002071e-06, "loss": 36.6547, "step": 9329 }, { "epoch": 222.14328358208957, "grad_norm": 21.199647903442383, "learning_rate": 9.4824016563147e-06, "loss": 37.0391, "step": 9330 }, { "epoch": 222.16716417910447, "grad_norm": 29.940921783447266, "learning_rate": 9.48136645962733e-06, "loss": 36.1474, "step": 9331 }, { "epoch": 222.1910447761194, "grad_norm": 24.6529483795166, "learning_rate": 9.48033126293996e-06, "loss": 36.8716, "step": 9332 }, { "epoch": 222.21492537313432, "grad_norm": 32.249847412109375, "learning_rate": 9.47929606625259e-06, "loss": 37.5204, "step": 9333 }, { "epoch": 222.23880597014926, "grad_norm": 29.692399978637695, "learning_rate": 9.478260869565217e-06, "loss": 35.7068, "step": 9334 }, { "epoch": 222.26268656716417, "grad_norm": 23.01384162902832, "learning_rate": 9.477225672877848e-06, "loss": 37.0796, "step": 9335 }, { "epoch": 222.2865671641791, "grad_norm": 22.15422821044922, "learning_rate": 9.476190476190476e-06, "loss": 38.0209, "step": 9336 }, { "epoch": 222.31044776119404, "grad_norm": 28.740741729736328, "learning_rate": 9.475155279503106e-06, "loss": 36.7489, "step": 9337 }, { "epoch": 222.33432835820895, "grad_norm": 21.39152717590332, "learning_rate": 9.474120082815735e-06, "loss": 37.9498, "step": 9338 }, { "epoch": 222.3582089552239, "grad_norm": 31.728282928466797, "learning_rate": 9.473084886128365e-06, "loss": 37.0965, "step": 9339 }, { "epoch": 222.3820895522388, "grad_norm": 26.833126068115234, "learning_rate": 9.472049689440994e-06, "loss": 37.4387, "step": 9340 }, { "epoch": 222.40597014925373, "grad_norm": 27.517566680908203, "learning_rate": 9.471014492753624e-06, "loss": 36.3681, "step": 9341 }, { "epoch": 222.42985074626867, "grad_norm": 25.988306045532227, "learning_rate": 9.469979296066253e-06, "loss": 36.96, "step": 9342 }, { "epoch": 222.45373134328358, "grad_norm": 27.119274139404297, "learning_rate": 9.468944099378883e-06, "loss": 36.1979, "step": 9343 }, { "epoch": 222.47761194029852, "grad_norm": 21.548036575317383, "learning_rate": 9.46790890269151e-06, "loss": 37.0174, "step": 9344 }, { "epoch": 222.50149253731342, "grad_norm": 32.375579833984375, "learning_rate": 9.466873706004142e-06, "loss": 36.4319, "step": 9345 }, { "epoch": 222.52537313432836, "grad_norm": 25.519662857055664, "learning_rate": 9.465838509316772e-06, "loss": 36.6505, "step": 9346 }, { "epoch": 222.54925373134327, "grad_norm": 30.985382080078125, "learning_rate": 9.464803312629401e-06, "loss": 36.7131, "step": 9347 }, { "epoch": 222.5731343283582, "grad_norm": 26.79625701904297, "learning_rate": 9.46376811594203e-06, "loss": 37.1961, "step": 9348 }, { "epoch": 222.59701492537314, "grad_norm": 25.46737289428711, "learning_rate": 9.462732919254658e-06, "loss": 36.6815, "step": 9349 }, { "epoch": 222.62089552238805, "grad_norm": 22.190353393554688, "learning_rate": 9.46169772256729e-06, "loss": 35.9443, "step": 9350 }, { "epoch": 222.644776119403, "grad_norm": 27.605209350585938, "learning_rate": 9.460662525879918e-06, "loss": 36.8895, "step": 9351 }, { "epoch": 222.6686567164179, "grad_norm": 22.50598907470703, "learning_rate": 9.459627329192547e-06, "loss": 37.0562, "step": 9352 }, { "epoch": 222.69253731343284, "grad_norm": 30.742273330688477, "learning_rate": 9.458592132505177e-06, "loss": 34.8006, "step": 9353 }, { "epoch": 222.71641791044777, "grad_norm": 27.609203338623047, "learning_rate": 9.457556935817806e-06, "loss": 36.096, "step": 9354 }, { "epoch": 222.74029850746268, "grad_norm": 25.80821990966797, "learning_rate": 9.456521739130436e-06, "loss": 35.8656, "step": 9355 }, { "epoch": 222.76417910447762, "grad_norm": 24.429798126220703, "learning_rate": 9.455486542443065e-06, "loss": 35.7282, "step": 9356 }, { "epoch": 222.78805970149253, "grad_norm": 26.56473159790039, "learning_rate": 9.454451345755695e-06, "loss": 35.7349, "step": 9357 }, { "epoch": 222.81194029850747, "grad_norm": 23.048847198486328, "learning_rate": 9.453416149068324e-06, "loss": 37.8195, "step": 9358 }, { "epoch": 222.83582089552237, "grad_norm": 25.443201065063477, "learning_rate": 9.452380952380952e-06, "loss": 36.0767, "step": 9359 }, { "epoch": 222.8597014925373, "grad_norm": 21.29392433166504, "learning_rate": 9.451345755693583e-06, "loss": 36.912, "step": 9360 }, { "epoch": 222.88358208955225, "grad_norm": 27.57860565185547, "learning_rate": 9.450310559006211e-06, "loss": 36.4075, "step": 9361 }, { "epoch": 222.90746268656716, "grad_norm": 24.445480346679688, "learning_rate": 9.449275362318842e-06, "loss": 36.4575, "step": 9362 }, { "epoch": 222.9313432835821, "grad_norm": 29.19906997680664, "learning_rate": 9.44824016563147e-06, "loss": 37.9941, "step": 9363 }, { "epoch": 222.955223880597, "grad_norm": 26.642549514770508, "learning_rate": 9.4472049689441e-06, "loss": 36.3679, "step": 9364 }, { "epoch": 222.97910447761194, "grad_norm": 21.054933547973633, "learning_rate": 9.44616977225673e-06, "loss": 35.4231, "step": 9365 }, { "epoch": 223.0, "grad_norm": 19.74691390991211, "learning_rate": 9.445134575569359e-06, "loss": 33.1747, "step": 9366 }, { "epoch": 223.02388059701494, "grad_norm": 23.323965072631836, "learning_rate": 9.444099378881988e-06, "loss": 36.345, "step": 9367 }, { "epoch": 223.04776119402985, "grad_norm": 17.254398345947266, "learning_rate": 9.443064182194618e-06, "loss": 35.4009, "step": 9368 }, { "epoch": 223.07164179104478, "grad_norm": 24.655067443847656, "learning_rate": 9.442028985507247e-06, "loss": 35.2186, "step": 9369 }, { "epoch": 223.0955223880597, "grad_norm": 18.97420883178711, "learning_rate": 9.440993788819877e-06, "loss": 37.9157, "step": 9370 }, { "epoch": 223.11940298507463, "grad_norm": 24.617870330810547, "learning_rate": 9.439958592132505e-06, "loss": 37.5063, "step": 9371 }, { "epoch": 223.14328358208957, "grad_norm": 23.071441650390625, "learning_rate": 9.438923395445136e-06, "loss": 37.1021, "step": 9372 }, { "epoch": 223.16716417910447, "grad_norm": 23.001476287841797, "learning_rate": 9.437888198757765e-06, "loss": 36.4769, "step": 9373 }, { "epoch": 223.1910447761194, "grad_norm": 21.526748657226562, "learning_rate": 9.436853002070393e-06, "loss": 36.0368, "step": 9374 }, { "epoch": 223.21492537313432, "grad_norm": 22.32477378845215, "learning_rate": 9.435817805383025e-06, "loss": 37.367, "step": 9375 }, { "epoch": 223.23880597014926, "grad_norm": 19.45122718811035, "learning_rate": 9.434782608695652e-06, "loss": 37.2365, "step": 9376 }, { "epoch": 223.26268656716417, "grad_norm": 23.752643585205078, "learning_rate": 9.433747412008284e-06, "loss": 37.151, "step": 9377 }, { "epoch": 223.2865671641791, "grad_norm": 18.8602237701416, "learning_rate": 9.432712215320911e-06, "loss": 36.614, "step": 9378 }, { "epoch": 223.31044776119404, "grad_norm": 22.16071319580078, "learning_rate": 9.431677018633541e-06, "loss": 36.9328, "step": 9379 }, { "epoch": 223.33432835820895, "grad_norm": 21.07900619506836, "learning_rate": 9.43064182194617e-06, "loss": 35.7809, "step": 9380 }, { "epoch": 223.3582089552239, "grad_norm": 24.552772521972656, "learning_rate": 9.4296066252588e-06, "loss": 35.3386, "step": 9381 }, { "epoch": 223.3820895522388, "grad_norm": 22.024473190307617, "learning_rate": 9.42857142857143e-06, "loss": 37.6294, "step": 9382 }, { "epoch": 223.40597014925373, "grad_norm": 27.61777114868164, "learning_rate": 9.427536231884059e-06, "loss": 36.9125, "step": 9383 }, { "epoch": 223.42985074626867, "grad_norm": 22.7742919921875, "learning_rate": 9.426501035196689e-06, "loss": 38.2719, "step": 9384 }, { "epoch": 223.45373134328358, "grad_norm": NaN, "learning_rate": 9.425465838509318e-06, "loss": 32.1527, "step": 9385 }, { "epoch": 223.47761194029852, "grad_norm": 20.231372833251953, "learning_rate": 9.425465838509318e-06, "loss": 36.6002, "step": 9386 }, { "epoch": 223.50149253731342, "grad_norm": 20.850204467773438, "learning_rate": 9.424430641821946e-06, "loss": 37.3469, "step": 9387 }, { "epoch": 223.52537313432836, "grad_norm": 17.099939346313477, "learning_rate": 9.423395445134577e-06, "loss": 35.4745, "step": 9388 }, { "epoch": 223.54925373134327, "grad_norm": 20.98678970336914, "learning_rate": 9.422360248447205e-06, "loss": 36.0333, "step": 9389 }, { "epoch": 223.5731343283582, "grad_norm": 17.527341842651367, "learning_rate": 9.421325051759835e-06, "loss": 37.9387, "step": 9390 }, { "epoch": 223.59701492537314, "grad_norm": 19.38433265686035, "learning_rate": 9.420289855072464e-06, "loss": 37.2279, "step": 9391 }, { "epoch": 223.62089552238805, "grad_norm": 15.133399963378906, "learning_rate": 9.419254658385094e-06, "loss": 36.4897, "step": 9392 }, { "epoch": 223.644776119403, "grad_norm": 16.894901275634766, "learning_rate": 9.418219461697723e-06, "loss": 36.3029, "step": 9393 }, { "epoch": 223.6686567164179, "grad_norm": 16.413881301879883, "learning_rate": 9.417184265010353e-06, "loss": 36.3818, "step": 9394 }, { "epoch": 223.69253731343284, "grad_norm": 15.037392616271973, "learning_rate": 9.416149068322982e-06, "loss": 36.4857, "step": 9395 }, { "epoch": 223.71641791044777, "grad_norm": 15.291950225830078, "learning_rate": 9.415113871635612e-06, "loss": 37.1058, "step": 9396 }, { "epoch": 223.74029850746268, "grad_norm": 15.040635108947754, "learning_rate": 9.414078674948241e-06, "loss": 35.7792, "step": 9397 }, { "epoch": 223.76417910447762, "grad_norm": 15.97356128692627, "learning_rate": 9.41304347826087e-06, "loss": 36.0541, "step": 9398 }, { "epoch": 223.78805970149253, "grad_norm": 15.278190612792969, "learning_rate": 9.412008281573499e-06, "loss": 37.2379, "step": 9399 }, { "epoch": 223.81194029850747, "grad_norm": 21.481409072875977, "learning_rate": 9.41097308488613e-06, "loss": 37.0654, "step": 9400 }, { "epoch": 223.83582089552237, "grad_norm": 15.87094783782959, "learning_rate": 9.40993788819876e-06, "loss": 36.9308, "step": 9401 }, { "epoch": 223.8597014925373, "grad_norm": 17.718656539916992, "learning_rate": 9.408902691511387e-06, "loss": 37.1585, "step": 9402 }, { "epoch": 223.88358208955225, "grad_norm": 16.742095947265625, "learning_rate": 9.407867494824018e-06, "loss": 37.4013, "step": 9403 }, { "epoch": 223.90746268656716, "grad_norm": 16.419233322143555, "learning_rate": 9.406832298136646e-06, "loss": 34.8722, "step": 9404 }, { "epoch": 223.9313432835821, "grad_norm": 19.157041549682617, "learning_rate": 9.405797101449276e-06, "loss": 36.4444, "step": 9405 }, { "epoch": 223.955223880597, "grad_norm": 16.145048141479492, "learning_rate": 9.404761904761905e-06, "loss": 36.0074, "step": 9406 }, { "epoch": 223.97910447761194, "grad_norm": 22.667598724365234, "learning_rate": 9.403726708074535e-06, "loss": 36.9183, "step": 9407 }, { "epoch": 224.0, "grad_norm": 16.207134246826172, "learning_rate": 9.402691511387164e-06, "loss": 31.4937, "step": 9408 }, { "epoch": 224.02388059701494, "grad_norm": 18.85007095336914, "learning_rate": 9.401656314699794e-06, "loss": 37.0026, "step": 9409 }, { "epoch": 224.04776119402985, "grad_norm": 19.203710556030273, "learning_rate": 9.400621118012423e-06, "loss": 35.727, "step": 9410 }, { "epoch": 224.07164179104478, "grad_norm": 17.155635833740234, "learning_rate": 9.399585921325053e-06, "loss": 37.0265, "step": 9411 }, { "epoch": 224.0955223880597, "grad_norm": 19.872440338134766, "learning_rate": 9.398550724637682e-06, "loss": 36.5634, "step": 9412 }, { "epoch": 224.11940298507463, "grad_norm": 18.595470428466797, "learning_rate": 9.397515527950312e-06, "loss": 38.2642, "step": 9413 }, { "epoch": 224.14328358208957, "grad_norm": 16.608049392700195, "learning_rate": 9.39648033126294e-06, "loss": 35.9693, "step": 9414 }, { "epoch": 224.16716417910447, "grad_norm": 17.762571334838867, "learning_rate": 9.395445134575571e-06, "loss": 37.0019, "step": 9415 }, { "epoch": 224.1910447761194, "grad_norm": 17.047176361083984, "learning_rate": 9.394409937888199e-06, "loss": 36.5651, "step": 9416 }, { "epoch": 224.21492537313432, "grad_norm": 18.56181526184082, "learning_rate": 9.393374741200828e-06, "loss": 36.5981, "step": 9417 }, { "epoch": 224.23880597014926, "grad_norm": 14.048553466796875, "learning_rate": 9.392339544513458e-06, "loss": 36.8729, "step": 9418 }, { "epoch": 224.26268656716417, "grad_norm": 18.872583389282227, "learning_rate": 9.391304347826087e-06, "loss": 37.0848, "step": 9419 }, { "epoch": 224.2865671641791, "grad_norm": 16.786888122558594, "learning_rate": 9.390269151138717e-06, "loss": 36.495, "step": 9420 }, { "epoch": 224.31044776119404, "grad_norm": 17.79970359802246, "learning_rate": 9.389233954451347e-06, "loss": 37.1684, "step": 9421 }, { "epoch": 224.33432835820895, "grad_norm": 13.249444007873535, "learning_rate": 9.388198757763976e-06, "loss": 35.6529, "step": 9422 }, { "epoch": 224.3582089552239, "grad_norm": 21.127038955688477, "learning_rate": 9.387163561076606e-06, "loss": 36.3109, "step": 9423 }, { "epoch": 224.3820895522388, "grad_norm": 17.2930908203125, "learning_rate": 9.386128364389233e-06, "loss": 36.643, "step": 9424 }, { "epoch": 224.40597014925373, "grad_norm": 19.54315185546875, "learning_rate": 9.385093167701865e-06, "loss": 36.7007, "step": 9425 }, { "epoch": 224.42985074626867, "grad_norm": 17.682388305664062, "learning_rate": 9.384057971014492e-06, "loss": 35.7651, "step": 9426 }, { "epoch": 224.45373134328358, "grad_norm": 18.735719680786133, "learning_rate": 9.383022774327124e-06, "loss": 35.9602, "step": 9427 }, { "epoch": 224.47761194029852, "grad_norm": 16.567495346069336, "learning_rate": 9.381987577639753e-06, "loss": 35.8212, "step": 9428 }, { "epoch": 224.50149253731342, "grad_norm": 18.95294189453125, "learning_rate": 9.380952380952381e-06, "loss": 36.5769, "step": 9429 }, { "epoch": 224.52537313432836, "grad_norm": 21.27597427368164, "learning_rate": 9.379917184265012e-06, "loss": 36.8463, "step": 9430 }, { "epoch": 224.54925373134327, "grad_norm": 15.662125587463379, "learning_rate": 9.37888198757764e-06, "loss": 36.874, "step": 9431 }, { "epoch": 224.5731343283582, "grad_norm": 18.717649459838867, "learning_rate": 9.37784679089027e-06, "loss": 36.1701, "step": 9432 }, { "epoch": 224.59701492537314, "grad_norm": 15.020418167114258, "learning_rate": 9.3768115942029e-06, "loss": 35.3557, "step": 9433 }, { "epoch": 224.62089552238805, "grad_norm": 21.474580764770508, "learning_rate": 9.375776397515529e-06, "loss": 36.4692, "step": 9434 }, { "epoch": 224.644776119403, "grad_norm": 15.604917526245117, "learning_rate": 9.374741200828158e-06, "loss": 35.8736, "step": 9435 }, { "epoch": 224.6686567164179, "grad_norm": 20.360532760620117, "learning_rate": 9.373706004140788e-06, "loss": 36.359, "step": 9436 }, { "epoch": 224.69253731343284, "grad_norm": 17.218374252319336, "learning_rate": 9.372670807453417e-06, "loss": 37.9752, "step": 9437 }, { "epoch": 224.71641791044777, "grad_norm": 20.179065704345703, "learning_rate": 9.371635610766047e-06, "loss": 36.7497, "step": 9438 }, { "epoch": 224.74029850746268, "grad_norm": 16.89761734008789, "learning_rate": 9.370600414078675e-06, "loss": 36.1195, "step": 9439 }, { "epoch": 224.76417910447762, "grad_norm": 20.876089096069336, "learning_rate": 9.369565217391306e-06, "loss": 37.1461, "step": 9440 }, { "epoch": 224.78805970149253, "grad_norm": 20.17327880859375, "learning_rate": 9.368530020703934e-06, "loss": 35.7237, "step": 9441 }, { "epoch": 224.81194029850747, "grad_norm": 17.4796085357666, "learning_rate": 9.367494824016565e-06, "loss": 38.5719, "step": 9442 }, { "epoch": 224.83582089552237, "grad_norm": 18.749671936035156, "learning_rate": 9.366459627329193e-06, "loss": 37.7713, "step": 9443 }, { "epoch": 224.8597014925373, "grad_norm": 18.992996215820312, "learning_rate": 9.365424430641822e-06, "loss": 37.2909, "step": 9444 }, { "epoch": 224.88358208955225, "grad_norm": 14.612916946411133, "learning_rate": 9.364389233954452e-06, "loss": 37.2268, "step": 9445 }, { "epoch": 224.90746268656716, "grad_norm": 14.41982650756836, "learning_rate": 9.363354037267081e-06, "loss": 36.3542, "step": 9446 }, { "epoch": 224.9313432835821, "grad_norm": 14.91037654876709, "learning_rate": 9.362318840579711e-06, "loss": 36.4081, "step": 9447 }, { "epoch": 224.955223880597, "grad_norm": 15.922717094421387, "learning_rate": 9.36128364389234e-06, "loss": 36.5212, "step": 9448 }, { "epoch": 224.97910447761194, "grad_norm": 18.122549057006836, "learning_rate": 9.36024844720497e-06, "loss": 36.5235, "step": 9449 }, { "epoch": 225.0, "grad_norm": 15.519862174987793, "learning_rate": 9.3592132505176e-06, "loss": 31.8674, "step": 9450 }, { "epoch": 225.02388059701494, "grad_norm": NaN, "learning_rate": 9.358178053830227e-06, "loss": 35.3011, "step": 9451 }, { "epoch": 225.04776119402985, "grad_norm": 17.2370662689209, "learning_rate": 9.358178053830227e-06, "loss": 36.4703, "step": 9452 }, { "epoch": 225.07164179104478, "grad_norm": 14.530112266540527, "learning_rate": 9.357142857142859e-06, "loss": 37.5439, "step": 9453 }, { "epoch": 225.0955223880597, "grad_norm": 17.49200439453125, "learning_rate": 9.356107660455486e-06, "loss": 35.407, "step": 9454 }, { "epoch": 225.11940298507463, "grad_norm": 19.838918685913086, "learning_rate": 9.355072463768116e-06, "loss": 36.3665, "step": 9455 }, { "epoch": 225.14328358208957, "grad_norm": 15.36133098602295, "learning_rate": 9.354037267080745e-06, "loss": 37.52, "step": 9456 }, { "epoch": 225.16716417910447, "grad_norm": 25.876646041870117, "learning_rate": 9.353002070393375e-06, "loss": 35.3112, "step": 9457 }, { "epoch": 225.1910447761194, "grad_norm": 18.423568725585938, "learning_rate": 9.351966873706006e-06, "loss": 37.3666, "step": 9458 }, { "epoch": 225.21492537313432, "grad_norm": 22.096370697021484, "learning_rate": 9.350931677018634e-06, "loss": 36.9002, "step": 9459 }, { "epoch": 225.23880597014926, "grad_norm": 20.92823028564453, "learning_rate": 9.349896480331264e-06, "loss": 36.2065, "step": 9460 }, { "epoch": 225.26268656716417, "grad_norm": 17.686012268066406, "learning_rate": 9.348861283643893e-06, "loss": 36.1261, "step": 9461 }, { "epoch": 225.2865671641791, "grad_norm": 13.867901802062988, "learning_rate": 9.347826086956523e-06, "loss": 36.382, "step": 9462 }, { "epoch": 225.31044776119404, "grad_norm": 17.950428009033203, "learning_rate": 9.346790890269152e-06, "loss": 35.615, "step": 9463 }, { "epoch": 225.33432835820895, "grad_norm": 20.58045768737793, "learning_rate": 9.345755693581782e-06, "loss": 36.307, "step": 9464 }, { "epoch": 225.3582089552239, "grad_norm": 16.847736358642578, "learning_rate": 9.344720496894411e-06, "loss": 36.6786, "step": 9465 }, { "epoch": 225.3820895522388, "grad_norm": 21.384674072265625, "learning_rate": 9.34368530020704e-06, "loss": 36.6836, "step": 9466 }, { "epoch": 225.40597014925373, "grad_norm": 20.075319290161133, "learning_rate": 9.342650103519669e-06, "loss": 36.1683, "step": 9467 }, { "epoch": 225.42985074626867, "grad_norm": 17.858158111572266, "learning_rate": 9.3416149068323e-06, "loss": 37.2771, "step": 9468 }, { "epoch": 225.45373134328358, "grad_norm": 13.715435981750488, "learning_rate": 9.340579710144928e-06, "loss": 35.7138, "step": 9469 }, { "epoch": 225.47761194029852, "grad_norm": 24.905656814575195, "learning_rate": 9.339544513457557e-06, "loss": 36.5885, "step": 9470 }, { "epoch": 225.50149253731342, "grad_norm": 16.43111801147461, "learning_rate": 9.338509316770187e-06, "loss": 36.8778, "step": 9471 }, { "epoch": 225.52537313432836, "grad_norm": 15.977991104125977, "learning_rate": 9.337474120082816e-06, "loss": 36.921, "step": 9472 }, { "epoch": 225.54925373134327, "grad_norm": 18.274497985839844, "learning_rate": 9.336438923395446e-06, "loss": 37.1489, "step": 9473 }, { "epoch": 225.5731343283582, "grad_norm": 18.14158821105957, "learning_rate": 9.335403726708075e-06, "loss": 35.983, "step": 9474 }, { "epoch": 225.59701492537314, "grad_norm": 18.77350425720215, "learning_rate": 9.334368530020705e-06, "loss": 36.4306, "step": 9475 }, { "epoch": 225.62089552238805, "grad_norm": 17.489553451538086, "learning_rate": 9.333333333333334e-06, "loss": 36.7718, "step": 9476 }, { "epoch": 225.644776119403, "grad_norm": 22.633970260620117, "learning_rate": 9.332298136645964e-06, "loss": 37.5032, "step": 9477 }, { "epoch": 225.6686567164179, "grad_norm": 13.850768089294434, "learning_rate": 9.331262939958593e-06, "loss": 37.5411, "step": 9478 }, { "epoch": 225.69253731343284, "grad_norm": 27.146482467651367, "learning_rate": 9.330227743271221e-06, "loss": 36.5295, "step": 9479 }, { "epoch": 225.71641791044777, "grad_norm": 21.86114501953125, "learning_rate": 9.329192546583852e-06, "loss": 36.4041, "step": 9480 }, { "epoch": 225.74029850746268, "grad_norm": 21.19093894958496, "learning_rate": 9.32815734989648e-06, "loss": 35.0877, "step": 9481 }, { "epoch": 225.76417910447762, "grad_norm": 22.90280532836914, "learning_rate": 9.32712215320911e-06, "loss": 36.268, "step": 9482 }, { "epoch": 225.78805970149253, "grad_norm": 18.020240783691406, "learning_rate": 9.32608695652174e-06, "loss": 34.9878, "step": 9483 }, { "epoch": 225.81194029850747, "grad_norm": 16.98219108581543, "learning_rate": 9.325051759834369e-06, "loss": 37.7387, "step": 9484 }, { "epoch": 225.83582089552237, "grad_norm": 19.392545700073242, "learning_rate": 9.324016563146998e-06, "loss": 37.0511, "step": 9485 }, { "epoch": 225.8597014925373, "grad_norm": 15.673796653747559, "learning_rate": 9.322981366459628e-06, "loss": 36.8313, "step": 9486 }, { "epoch": 225.88358208955225, "grad_norm": 18.076011657714844, "learning_rate": 9.321946169772257e-06, "loss": 37.5841, "step": 9487 }, { "epoch": 225.90746268656716, "grad_norm": 16.198062896728516, "learning_rate": 9.320910973084887e-06, "loss": 37.4417, "step": 9488 }, { "epoch": 225.9313432835821, "grad_norm": 26.08418846130371, "learning_rate": 9.319875776397517e-06, "loss": 36.6044, "step": 9489 }, { "epoch": 225.955223880597, "grad_norm": 18.208662033081055, "learning_rate": 9.318840579710146e-06, "loss": 37.0748, "step": 9490 }, { "epoch": 225.97910447761194, "grad_norm": 22.65439796447754, "learning_rate": 9.317805383022776e-06, "loss": 36.1861, "step": 9491 }, { "epoch": 226.0, "grad_norm": 22.45075225830078, "learning_rate": 9.316770186335405e-06, "loss": 32.8049, "step": 9492 }, { "epoch": 226.02388059701494, "grad_norm": 16.150846481323242, "learning_rate": 9.315734989648035e-06, "loss": 36.8926, "step": 9493 }, { "epoch": 226.04776119402985, "grad_norm": 25.77251434326172, "learning_rate": 9.314699792960662e-06, "loss": 37.1812, "step": 9494 }, { "epoch": 226.07164179104478, "grad_norm": 18.994468688964844, "learning_rate": 9.313664596273294e-06, "loss": 36.6646, "step": 9495 }, { "epoch": 226.0955223880597, "grad_norm": 21.6476993560791, "learning_rate": 9.312629399585922e-06, "loss": 37.6085, "step": 9496 }, { "epoch": 226.11940298507463, "grad_norm": 24.551372528076172, "learning_rate": 9.311594202898551e-06, "loss": 36.5801, "step": 9497 }, { "epoch": 226.14328358208957, "grad_norm": 17.199466705322266, "learning_rate": 9.31055900621118e-06, "loss": 35.884, "step": 9498 }, { "epoch": 226.16716417910447, "grad_norm": 29.144901275634766, "learning_rate": 9.30952380952381e-06, "loss": 35.4717, "step": 9499 }, { "epoch": 226.1910447761194, "grad_norm": 20.848114013671875, "learning_rate": 9.30848861283644e-06, "loss": 36.9175, "step": 9500 }, { "epoch": 226.21492537313432, "grad_norm": 32.51123809814453, "learning_rate": 9.30745341614907e-06, "loss": 37.4948, "step": 9501 }, { "epoch": 226.23880597014926, "grad_norm": 18.725080490112305, "learning_rate": 9.306418219461699e-06, "loss": 36.4376, "step": 9502 }, { "epoch": 226.26268656716417, "grad_norm": 34.88555145263672, "learning_rate": 9.305383022774328e-06, "loss": 37.9428, "step": 9503 }, { "epoch": 226.2865671641791, "grad_norm": 25.855121612548828, "learning_rate": 9.304347826086956e-06, "loss": 37.1728, "step": 9504 }, { "epoch": 226.31044776119404, "grad_norm": 37.347198486328125, "learning_rate": 9.303312629399587e-06, "loss": 35.5822, "step": 9505 }, { "epoch": 226.33432835820895, "grad_norm": 37.224613189697266, "learning_rate": 9.302277432712215e-06, "loss": 36.2776, "step": 9506 }, { "epoch": 226.3582089552239, "grad_norm": 23.242774963378906, "learning_rate": 9.301242236024846e-06, "loss": 36.7405, "step": 9507 }, { "epoch": 226.3820895522388, "grad_norm": 26.102792739868164, "learning_rate": 9.300207039337474e-06, "loss": 35.7835, "step": 9508 }, { "epoch": 226.40597014925373, "grad_norm": 28.836580276489258, "learning_rate": 9.299171842650104e-06, "loss": 37.6715, "step": 9509 }, { "epoch": 226.42985074626867, "grad_norm": 23.27029800415039, "learning_rate": 9.298136645962733e-06, "loss": 35.8731, "step": 9510 }, { "epoch": 226.45373134328358, "grad_norm": 35.15096664428711, "learning_rate": 9.297101449275363e-06, "loss": 37.1661, "step": 9511 }, { "epoch": 226.47761194029852, "grad_norm": 28.0610408782959, "learning_rate": 9.296066252587992e-06, "loss": 36.5984, "step": 9512 }, { "epoch": 226.50149253731342, "grad_norm": 33.2618293762207, "learning_rate": 9.295031055900622e-06, "loss": 36.2855, "step": 9513 }, { "epoch": 226.52537313432836, "grad_norm": 31.961000442504883, "learning_rate": 9.293995859213251e-06, "loss": 37.3197, "step": 9514 }, { "epoch": 226.54925373134327, "grad_norm": 26.64537239074707, "learning_rate": 9.292960662525881e-06, "loss": 36.5365, "step": 9515 }, { "epoch": 226.5731343283582, "grad_norm": 26.88564682006836, "learning_rate": 9.29192546583851e-06, "loss": 37.6781, "step": 9516 }, { "epoch": 226.59701492537314, "grad_norm": 26.57740592956543, "learning_rate": 9.29089026915114e-06, "loss": 34.8302, "step": 9517 }, { "epoch": 226.62089552238805, "grad_norm": 22.637767791748047, "learning_rate": 9.28985507246377e-06, "loss": 35.4446, "step": 9518 }, { "epoch": 226.644776119403, "grad_norm": 32.20105743408203, "learning_rate": 9.288819875776397e-06, "loss": 37.527, "step": 9519 }, { "epoch": 226.6686567164179, "grad_norm": 24.580772399902344, "learning_rate": 9.287784679089029e-06, "loss": 37.1435, "step": 9520 }, { "epoch": 226.69253731343284, "grad_norm": 30.045690536499023, "learning_rate": 9.286749482401656e-06, "loss": 35.4939, "step": 9521 }, { "epoch": 226.71641791044777, "grad_norm": 27.443836212158203, "learning_rate": 9.285714285714288e-06, "loss": 37.1542, "step": 9522 }, { "epoch": 226.74029850746268, "grad_norm": 30.533077239990234, "learning_rate": 9.284679089026915e-06, "loss": 36.9308, "step": 9523 }, { "epoch": 226.76417910447762, "grad_norm": 27.056224822998047, "learning_rate": 9.283643892339545e-06, "loss": 37.3001, "step": 9524 }, { "epoch": 226.78805970149253, "grad_norm": 27.161880493164062, "learning_rate": 9.282608695652174e-06, "loss": 35.7908, "step": 9525 }, { "epoch": 226.81194029850747, "grad_norm": 24.670005798339844, "learning_rate": 9.281573498964804e-06, "loss": 37.226, "step": 9526 }, { "epoch": 226.83582089552237, "grad_norm": 28.789091110229492, "learning_rate": 9.280538302277434e-06, "loss": 35.142, "step": 9527 }, { "epoch": 226.8597014925373, "grad_norm": 28.024110794067383, "learning_rate": 9.279503105590063e-06, "loss": 36.7626, "step": 9528 }, { "epoch": 226.88358208955225, "grad_norm": 29.142467498779297, "learning_rate": 9.278467908902693e-06, "loss": 36.4019, "step": 9529 }, { "epoch": 226.90746268656716, "grad_norm": 26.798542022705078, "learning_rate": 9.277432712215322e-06, "loss": 35.8312, "step": 9530 }, { "epoch": 226.9313432835821, "grad_norm": 27.18988800048828, "learning_rate": 9.27639751552795e-06, "loss": 35.5619, "step": 9531 }, { "epoch": 226.955223880597, "grad_norm": 24.10135841369629, "learning_rate": 9.275362318840581e-06, "loss": 35.2492, "step": 9532 }, { "epoch": 226.97910447761194, "grad_norm": 30.03662109375, "learning_rate": 9.274327122153209e-06, "loss": 36.867, "step": 9533 }, { "epoch": 227.0, "grad_norm": 20.40000343322754, "learning_rate": 9.273291925465839e-06, "loss": 32.0142, "step": 9534 }, { "epoch": 227.02388059701494, "grad_norm": 30.297977447509766, "learning_rate": 9.272256728778468e-06, "loss": 36.9057, "step": 9535 }, { "epoch": 227.04776119402985, "grad_norm": 27.713289260864258, "learning_rate": 9.271221532091098e-06, "loss": 36.9784, "step": 9536 }, { "epoch": 227.07164179104478, "grad_norm": 30.362102508544922, "learning_rate": 9.270186335403727e-06, "loss": 37.718, "step": 9537 }, { "epoch": 227.0955223880597, "grad_norm": 29.50467300415039, "learning_rate": 9.269151138716357e-06, "loss": 36.3835, "step": 9538 }, { "epoch": 227.11940298507463, "grad_norm": 27.414894104003906, "learning_rate": 9.268115942028986e-06, "loss": 36.8967, "step": 9539 }, { "epoch": 227.14328358208957, "grad_norm": 25.29361915588379, "learning_rate": 9.267080745341616e-06, "loss": 37.5438, "step": 9540 }, { "epoch": 227.16716417910447, "grad_norm": 28.762990951538086, "learning_rate": 9.266045548654245e-06, "loss": 36.4315, "step": 9541 }, { "epoch": 227.1910447761194, "grad_norm": 24.950563430786133, "learning_rate": 9.265010351966875e-06, "loss": 36.9243, "step": 9542 }, { "epoch": 227.21492537313432, "grad_norm": 30.792552947998047, "learning_rate": 9.263975155279504e-06, "loss": 36.2705, "step": 9543 }, { "epoch": 227.23880597014926, "grad_norm": 29.26622772216797, "learning_rate": 9.262939958592134e-06, "loss": 35.4549, "step": 9544 }, { "epoch": 227.26268656716417, "grad_norm": 27.079885482788086, "learning_rate": 9.261904761904763e-06, "loss": 36.2627, "step": 9545 }, { "epoch": 227.2865671641791, "grad_norm": 26.671934127807617, "learning_rate": 9.260869565217391e-06, "loss": 37.5339, "step": 9546 }, { "epoch": 227.31044776119404, "grad_norm": 29.74240493774414, "learning_rate": 9.259834368530022e-06, "loss": 36.8341, "step": 9547 }, { "epoch": 227.33432835820895, "grad_norm": 23.974681854248047, "learning_rate": 9.25879917184265e-06, "loss": 37.6008, "step": 9548 }, { "epoch": 227.3582089552239, "grad_norm": 30.059099197387695, "learning_rate": 9.25776397515528e-06, "loss": 36.8922, "step": 9549 }, { "epoch": 227.3820895522388, "grad_norm": 25.293973922729492, "learning_rate": 9.25672877846791e-06, "loss": 36.2701, "step": 9550 }, { "epoch": 227.40597014925373, "grad_norm": 29.775676727294922, "learning_rate": 9.255693581780539e-06, "loss": 36.2126, "step": 9551 }, { "epoch": 227.42985074626867, "grad_norm": 24.457752227783203, "learning_rate": 9.254658385093168e-06, "loss": 36.4361, "step": 9552 }, { "epoch": 227.45373134328358, "grad_norm": 31.548799514770508, "learning_rate": 9.253623188405798e-06, "loss": 36.2246, "step": 9553 }, { "epoch": 227.47761194029852, "grad_norm": 27.01188087463379, "learning_rate": 9.252587991718427e-06, "loss": 36.3035, "step": 9554 }, { "epoch": 227.50149253731342, "grad_norm": 29.452869415283203, "learning_rate": 9.251552795031057e-06, "loss": 36.749, "step": 9555 }, { "epoch": 227.52537313432836, "grad_norm": 27.627206802368164, "learning_rate": 9.250517598343686e-06, "loss": 35.4703, "step": 9556 }, { "epoch": 227.54925373134327, "grad_norm": 30.036054611206055, "learning_rate": 9.249482401656316e-06, "loss": 37.4655, "step": 9557 }, { "epoch": 227.5731343283582, "grad_norm": 26.98161506652832, "learning_rate": 9.248447204968944e-06, "loss": 36.8224, "step": 9558 }, { "epoch": 227.59701492537314, "grad_norm": 26.470125198364258, "learning_rate": 9.247412008281575e-06, "loss": 37.1462, "step": 9559 }, { "epoch": 227.62089552238805, "grad_norm": 24.754650115966797, "learning_rate": 9.246376811594203e-06, "loss": 35.6753, "step": 9560 }, { "epoch": 227.644776119403, "grad_norm": 28.549808502197266, "learning_rate": 9.245341614906832e-06, "loss": 35.236, "step": 9561 }, { "epoch": 227.6686567164179, "grad_norm": 22.26018714904785, "learning_rate": 9.244306418219462e-06, "loss": 36.3384, "step": 9562 }, { "epoch": 227.69253731343284, "grad_norm": NaN, "learning_rate": 9.243271221532091e-06, "loss": 54.0053, "step": 9563 }, { "epoch": 227.71641791044777, "grad_norm": 29.885662078857422, "learning_rate": 9.243271221532091e-06, "loss": 35.7748, "step": 9564 }, { "epoch": 227.74029850746268, "grad_norm": 26.544864654541016, "learning_rate": 9.242236024844721e-06, "loss": 36.4898, "step": 9565 }, { "epoch": 227.76417910447762, "grad_norm": 29.086551666259766, "learning_rate": 9.24120082815735e-06, "loss": 36.8967, "step": 9566 }, { "epoch": 227.78805970149253, "grad_norm": 25.901094436645508, "learning_rate": 9.24016563146998e-06, "loss": 36.4017, "step": 9567 }, { "epoch": 227.81194029850747, "grad_norm": 26.74492645263672, "learning_rate": 9.23913043478261e-06, "loss": 36.814, "step": 9568 }, { "epoch": 227.83582089552237, "grad_norm": 25.30301856994629, "learning_rate": 9.238095238095239e-06, "loss": 36.5462, "step": 9569 }, { "epoch": 227.8597014925373, "grad_norm": 27.54271125793457, "learning_rate": 9.237060041407869e-06, "loss": 36.1326, "step": 9570 }, { "epoch": 227.88358208955225, "grad_norm": 28.441038131713867, "learning_rate": 9.236024844720498e-06, "loss": 36.421, "step": 9571 }, { "epoch": 227.90746268656716, "grad_norm": 28.303749084472656, "learning_rate": 9.234989648033128e-06, "loss": 36.3227, "step": 9572 }, { "epoch": 227.9313432835821, "grad_norm": 24.03018569946289, "learning_rate": 9.233954451345757e-06, "loss": 35.5522, "step": 9573 }, { "epoch": 227.955223880597, "grad_norm": 28.449106216430664, "learning_rate": 9.232919254658385e-06, "loss": 36.2296, "step": 9574 }, { "epoch": 227.97910447761194, "grad_norm": 26.051109313964844, "learning_rate": 9.231884057971016e-06, "loss": 36.0445, "step": 9575 }, { "epoch": 228.0, "grad_norm": 24.871156692504883, "learning_rate": 9.230848861283644e-06, "loss": 31.9742, "step": 9576 }, { "epoch": 228.02388059701494, "grad_norm": 30.20591926574707, "learning_rate": 9.229813664596274e-06, "loss": 36.5131, "step": 9577 }, { "epoch": 228.04776119402985, "grad_norm": 24.737354278564453, "learning_rate": 9.228778467908903e-06, "loss": 36.2709, "step": 9578 }, { "epoch": 228.07164179104478, "grad_norm": 24.169931411743164, "learning_rate": 9.227743271221533e-06, "loss": 36.7686, "step": 9579 }, { "epoch": 228.0955223880597, "grad_norm": 27.94664764404297, "learning_rate": 9.226708074534162e-06, "loss": 35.84, "step": 9580 }, { "epoch": 228.11940298507463, "grad_norm": 24.36469841003418, "learning_rate": 9.225672877846792e-06, "loss": 35.7983, "step": 9581 }, { "epoch": 228.14328358208957, "grad_norm": 30.60504150390625, "learning_rate": 9.224637681159421e-06, "loss": 36.2161, "step": 9582 }, { "epoch": 228.16716417910447, "grad_norm": 24.57705307006836, "learning_rate": 9.22360248447205e-06, "loss": 36.4545, "step": 9583 }, { "epoch": 228.1910447761194, "grad_norm": 28.83746337890625, "learning_rate": 9.222567287784679e-06, "loss": 36.1562, "step": 9584 }, { "epoch": 228.21492537313432, "grad_norm": 23.025724411010742, "learning_rate": 9.22153209109731e-06, "loss": 35.4331, "step": 9585 }, { "epoch": 228.23880597014926, "grad_norm": 26.98846435546875, "learning_rate": 9.220496894409938e-06, "loss": 37.1912, "step": 9586 }, { "epoch": 228.26268656716417, "grad_norm": 22.991891860961914, "learning_rate": 9.219461697722569e-06, "loss": 37.0172, "step": 9587 }, { "epoch": 228.2865671641791, "grad_norm": 27.927915573120117, "learning_rate": 9.218426501035197e-06, "loss": 37.2899, "step": 9588 }, { "epoch": 228.31044776119404, "grad_norm": 25.7060546875, "learning_rate": 9.217391304347826e-06, "loss": 36.2329, "step": 9589 }, { "epoch": 228.33432835820895, "grad_norm": 31.828887939453125, "learning_rate": 9.216356107660456e-06, "loss": 37.1658, "step": 9590 }, { "epoch": 228.3582089552239, "grad_norm": 25.7882022857666, "learning_rate": 9.215320910973085e-06, "loss": 35.7852, "step": 9591 }, { "epoch": 228.3820895522388, "grad_norm": 24.80341339111328, "learning_rate": 9.214285714285715e-06, "loss": 37.3967, "step": 9592 }, { "epoch": 228.40597014925373, "grad_norm": 26.441181182861328, "learning_rate": 9.213250517598344e-06, "loss": 36.7858, "step": 9593 }, { "epoch": 228.42985074626867, "grad_norm": 27.830324172973633, "learning_rate": 9.212215320910974e-06, "loss": 35.8861, "step": 9594 }, { "epoch": 228.45373134328358, "grad_norm": 28.198192596435547, "learning_rate": 9.211180124223603e-06, "loss": 36.8701, "step": 9595 }, { "epoch": 228.47761194029852, "grad_norm": 27.90357208251953, "learning_rate": 9.210144927536233e-06, "loss": 36.896, "step": 9596 }, { "epoch": 228.50149253731342, "grad_norm": 27.799747467041016, "learning_rate": 9.209109730848863e-06, "loss": 36.4181, "step": 9597 }, { "epoch": 228.52537313432836, "grad_norm": 27.674747467041016, "learning_rate": 9.208074534161492e-06, "loss": 36.6183, "step": 9598 }, { "epoch": 228.54925373134327, "grad_norm": 22.056236267089844, "learning_rate": 9.20703933747412e-06, "loss": 35.8563, "step": 9599 }, { "epoch": 228.5731343283582, "grad_norm": 28.755813598632812, "learning_rate": 9.206004140786751e-06, "loss": 36.1678, "step": 9600 }, { "epoch": 228.59701492537314, "grad_norm": 24.192420959472656, "learning_rate": 9.204968944099379e-06, "loss": 36.8989, "step": 9601 }, { "epoch": 228.62089552238805, "grad_norm": 28.1363468170166, "learning_rate": 9.20393374741201e-06, "loss": 36.3853, "step": 9602 }, { "epoch": 228.644776119403, "grad_norm": 27.22496795654297, "learning_rate": 9.202898550724638e-06, "loss": 36.8836, "step": 9603 }, { "epoch": 228.6686567164179, "grad_norm": 27.100008010864258, "learning_rate": 9.201863354037268e-06, "loss": 36.5644, "step": 9604 }, { "epoch": 228.69253731343284, "grad_norm": 23.599897384643555, "learning_rate": 9.200828157349897e-06, "loss": 34.7451, "step": 9605 }, { "epoch": 228.71641791044777, "grad_norm": 27.625505447387695, "learning_rate": 9.199792960662527e-06, "loss": 36.1246, "step": 9606 }, { "epoch": 228.74029850746268, "grad_norm": 23.810094833374023, "learning_rate": 9.198757763975156e-06, "loss": 37.7482, "step": 9607 }, { "epoch": 228.76417910447762, "grad_norm": 30.633962631225586, "learning_rate": 9.197722567287786e-06, "loss": 37.4939, "step": 9608 }, { "epoch": 228.78805970149253, "grad_norm": 26.48391342163086, "learning_rate": 9.196687370600415e-06, "loss": 37.7659, "step": 9609 }, { "epoch": 228.81194029850747, "grad_norm": 27.97773551940918, "learning_rate": 9.195652173913045e-06, "loss": 36.5648, "step": 9610 }, { "epoch": 228.83582089552237, "grad_norm": 25.009822845458984, "learning_rate": 9.194616977225673e-06, "loss": 35.4764, "step": 9611 }, { "epoch": 228.8597014925373, "grad_norm": 25.571136474609375, "learning_rate": 9.193581780538304e-06, "loss": 35.1957, "step": 9612 }, { "epoch": 228.88358208955225, "grad_norm": 23.255353927612305, "learning_rate": 9.192546583850932e-06, "loss": 36.2558, "step": 9613 }, { "epoch": 228.90746268656716, "grad_norm": 29.044546127319336, "learning_rate": 9.191511387163561e-06, "loss": 37.4831, "step": 9614 }, { "epoch": 228.9313432835821, "grad_norm": 23.691383361816406, "learning_rate": 9.19047619047619e-06, "loss": 36.819, "step": 9615 }, { "epoch": 228.955223880597, "grad_norm": 32.514060974121094, "learning_rate": 9.18944099378882e-06, "loss": 36.1476, "step": 9616 }, { "epoch": 228.97910447761194, "grad_norm": 28.069005966186523, "learning_rate": 9.18840579710145e-06, "loss": 36.6969, "step": 9617 }, { "epoch": 229.0, "grad_norm": 26.643814086914062, "learning_rate": 9.18737060041408e-06, "loss": 31.8657, "step": 9618 }, { "epoch": 229.02388059701494, "grad_norm": 24.87017059326172, "learning_rate": 9.186335403726709e-06, "loss": 35.0354, "step": 9619 }, { "epoch": 229.04776119402985, "grad_norm": 29.640108108520508, "learning_rate": 9.185300207039338e-06, "loss": 36.069, "step": 9620 }, { "epoch": 229.07164179104478, "grad_norm": 24.502927780151367, "learning_rate": 9.184265010351968e-06, "loss": 36.3469, "step": 9621 }, { "epoch": 229.0955223880597, "grad_norm": 30.278432846069336, "learning_rate": 9.183229813664597e-06, "loss": 36.4906, "step": 9622 }, { "epoch": 229.11940298507463, "grad_norm": 28.056241989135742, "learning_rate": 9.182194616977227e-06, "loss": 36.9138, "step": 9623 }, { "epoch": 229.14328358208957, "grad_norm": 28.766387939453125, "learning_rate": 9.181159420289856e-06, "loss": 36.6277, "step": 9624 }, { "epoch": 229.16716417910447, "grad_norm": 25.354930877685547, "learning_rate": 9.180124223602486e-06, "loss": 35.8951, "step": 9625 }, { "epoch": 229.1910447761194, "grad_norm": 26.487565994262695, "learning_rate": 9.179089026915114e-06, "loss": 37.2082, "step": 9626 }, { "epoch": 229.21492537313432, "grad_norm": 21.196584701538086, "learning_rate": 9.178053830227745e-06, "loss": 36.8846, "step": 9627 }, { "epoch": 229.23880597014926, "grad_norm": 27.11143684387207, "learning_rate": 9.177018633540373e-06, "loss": 37.4834, "step": 9628 }, { "epoch": 229.26268656716417, "grad_norm": 23.48799705505371, "learning_rate": 9.175983436853002e-06, "loss": 37.0113, "step": 9629 }, { "epoch": 229.2865671641791, "grad_norm": 29.37694549560547, "learning_rate": 9.174948240165632e-06, "loss": 35.9114, "step": 9630 }, { "epoch": 229.31044776119404, "grad_norm": 23.161012649536133, "learning_rate": 9.173913043478261e-06, "loss": 36.5496, "step": 9631 }, { "epoch": 229.33432835820895, "grad_norm": 28.731334686279297, "learning_rate": 9.172877846790891e-06, "loss": 35.6109, "step": 9632 }, { "epoch": 229.3582089552239, "grad_norm": 25.209381103515625, "learning_rate": 9.17184265010352e-06, "loss": 36.8632, "step": 9633 }, { "epoch": 229.3820895522388, "grad_norm": 30.024168014526367, "learning_rate": 9.17080745341615e-06, "loss": 36.5665, "step": 9634 }, { "epoch": 229.40597014925373, "grad_norm": 25.45462989807129, "learning_rate": 9.16977225672878e-06, "loss": 35.5529, "step": 9635 }, { "epoch": 229.42985074626867, "grad_norm": 26.418886184692383, "learning_rate": 9.168737060041409e-06, "loss": 36.4459, "step": 9636 }, { "epoch": 229.45373134328358, "grad_norm": 24.408700942993164, "learning_rate": 9.167701863354039e-06, "loss": 36.6138, "step": 9637 }, { "epoch": 229.47761194029852, "grad_norm": 28.08294105529785, "learning_rate": 9.166666666666666e-06, "loss": 36.4635, "step": 9638 }, { "epoch": 229.50149253731342, "grad_norm": 22.752918243408203, "learning_rate": 9.165631469979298e-06, "loss": 36.985, "step": 9639 }, { "epoch": 229.52537313432836, "grad_norm": 28.713455200195312, "learning_rate": 9.164596273291925e-06, "loss": 36.6903, "step": 9640 }, { "epoch": 229.54925373134327, "grad_norm": 23.558683395385742, "learning_rate": 9.163561076604555e-06, "loss": 36.7125, "step": 9641 }, { "epoch": 229.5731343283582, "grad_norm": 27.721750259399414, "learning_rate": 9.162525879917185e-06, "loss": 36.0821, "step": 9642 }, { "epoch": 229.59701492537314, "grad_norm": 26.57756233215332, "learning_rate": 9.161490683229814e-06, "loss": 36.8507, "step": 9643 }, { "epoch": 229.62089552238805, "grad_norm": 29.08770751953125, "learning_rate": 9.160455486542444e-06, "loss": 35.9886, "step": 9644 }, { "epoch": 229.644776119403, "grad_norm": 26.117769241333008, "learning_rate": 9.159420289855073e-06, "loss": 36.9678, "step": 9645 }, { "epoch": 229.6686567164179, "grad_norm": 23.67546272277832, "learning_rate": 9.158385093167703e-06, "loss": 35.8266, "step": 9646 }, { "epoch": 229.69253731343284, "grad_norm": 23.151939392089844, "learning_rate": 9.157349896480332e-06, "loss": 36.994, "step": 9647 }, { "epoch": 229.71641791044777, "grad_norm": 28.121440887451172, "learning_rate": 9.156314699792962e-06, "loss": 36.6383, "step": 9648 }, { "epoch": 229.74029850746268, "grad_norm": 22.268630981445312, "learning_rate": 9.155279503105591e-06, "loss": 37.0435, "step": 9649 }, { "epoch": 229.76417910447762, "grad_norm": 30.70549964904785, "learning_rate": 9.15424430641822e-06, "loss": 35.9803, "step": 9650 }, { "epoch": 229.78805970149253, "grad_norm": 22.488264083862305, "learning_rate": 9.15320910973085e-06, "loss": 35.8651, "step": 9651 }, { "epoch": 229.81194029850747, "grad_norm": 29.461984634399414, "learning_rate": 9.15217391304348e-06, "loss": 36.7376, "step": 9652 }, { "epoch": 229.83582089552237, "grad_norm": 25.53286361694336, "learning_rate": 9.151138716356108e-06, "loss": 36.3119, "step": 9653 }, { "epoch": 229.8597014925373, "grad_norm": 26.687524795532227, "learning_rate": 9.150103519668739e-06, "loss": 36.9127, "step": 9654 }, { "epoch": 229.88358208955225, "grad_norm": 23.459135055541992, "learning_rate": 9.149068322981367e-06, "loss": 36.0665, "step": 9655 }, { "epoch": 229.90746268656716, "grad_norm": 26.695363998413086, "learning_rate": 9.148033126293996e-06, "loss": 36.1897, "step": 9656 }, { "epoch": 229.9313432835821, "grad_norm": 21.218103408813477, "learning_rate": 9.146997929606626e-06, "loss": 35.0119, "step": 9657 }, { "epoch": 229.955223880597, "grad_norm": 30.54604721069336, "learning_rate": 9.145962732919255e-06, "loss": 36.636, "step": 9658 }, { "epoch": 229.97910447761194, "grad_norm": 26.00942611694336, "learning_rate": 9.144927536231885e-06, "loss": 37.2953, "step": 9659 }, { "epoch": 230.0, "grad_norm": 22.77959632873535, "learning_rate": 9.143892339544514e-06, "loss": 33.3568, "step": 9660 }, { "epoch": 230.0, "step": 9660, "total_flos": 4.7489759270672614e+17, "train_loss": 3.205690276499367, "train_runtime": 25607.5331, "train_samples_per_second": 48.07, "train_steps_per_second": 0.377 }, { "epoch": 230.02388059701494, "grad_norm": 24.71234893798828, "learning_rate": 1e-05, "loss": 36.4127, "step": 9661 }, { "epoch": 230.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99904761904762e-06, "loss": 43.3981, "step": 9662 }, { "epoch": 230.07164179104478, "grad_norm": Infinity, "learning_rate": 9.99904761904762e-06, "loss": 43.4953, "step": 9663 }, { "epoch": 230.0955223880597, "grad_norm": 375.4003601074219, "learning_rate": 9.99904761904762e-06, "loss": 42.9443, "step": 9664 }, { "epoch": 230.11940298507463, "grad_norm": 212.64337158203125, "learning_rate": 9.998095238095239e-06, "loss": 41.8767, "step": 9665 }, { "epoch": 230.14328358208957, "grad_norm": 81.78107452392578, "learning_rate": 9.997142857142858e-06, "loss": 39.5315, "step": 9666 }, { "epoch": 230.16716417910447, "grad_norm": 69.39502716064453, "learning_rate": 9.996190476190476e-06, "loss": 38.3034, "step": 9667 }, { "epoch": 230.1910447761194, "grad_norm": 58.64521026611328, "learning_rate": 9.995238095238095e-06, "loss": 36.6875, "step": 9668 }, { "epoch": 230.21492537313432, "grad_norm": 62.30952835083008, "learning_rate": 9.994285714285716e-06, "loss": 36.8947, "step": 9669 }, { "epoch": 230.23880597014926, "grad_norm": 46.44341278076172, "learning_rate": 9.993333333333333e-06, "loss": 36.5471, "step": 9670 }, { "epoch": 230.26268656716417, "grad_norm": 52.63033676147461, "learning_rate": 9.992380952380954e-06, "loss": 37.0309, "step": 9671 }, { "epoch": 230.2865671641791, "grad_norm": 33.28845977783203, "learning_rate": 9.991428571428573e-06, "loss": 36.6858, "step": 9672 }, { "epoch": 230.31044776119404, "grad_norm": 43.48398208618164, "learning_rate": 9.990476190476191e-06, "loss": 36.7173, "step": 9673 }, { "epoch": 230.33432835820895, "grad_norm": 30.336566925048828, "learning_rate": 9.98952380952381e-06, "loss": 36.4503, "step": 9674 }, { "epoch": 230.3582089552239, "grad_norm": 32.99346923828125, "learning_rate": 9.98857142857143e-06, "loss": 36.694, "step": 9675 }, { "epoch": 230.3820895522388, "grad_norm": 26.682479858398438, "learning_rate": 9.987619047619048e-06, "loss": 36.7674, "step": 9676 }, { "epoch": 230.40597014925373, "grad_norm": 23.91337776184082, "learning_rate": 9.986666666666667e-06, "loss": 36.2288, "step": 9677 }, { "epoch": 230.42985074626867, "grad_norm": 21.41556739807129, "learning_rate": 9.985714285714286e-06, "loss": 37.7821, "step": 9678 }, { "epoch": 230.45373134328358, "grad_norm": 21.785661697387695, "learning_rate": 9.984761904761907e-06, "loss": 36.516, "step": 9679 }, { "epoch": 230.47761194029852, "grad_norm": 21.943296432495117, "learning_rate": 9.983809523809524e-06, "loss": 36.4244, "step": 9680 }, { "epoch": 230.50149253731342, "grad_norm": 16.069475173950195, "learning_rate": 9.982857142857144e-06, "loss": 37.5188, "step": 9681 }, { "epoch": 230.52537313432836, "grad_norm": 22.269662857055664, "learning_rate": 9.981904761904763e-06, "loss": 35.9702, "step": 9682 }, { "epoch": 230.54925373134327, "grad_norm": 16.287111282348633, "learning_rate": 9.980952380952382e-06, "loss": 36.6675, "step": 9683 }, { "epoch": 230.5731343283582, "grad_norm": 25.83818817138672, "learning_rate": 9.980000000000001e-06, "loss": 36.7307, "step": 9684 }, { "epoch": 230.59701492537314, "grad_norm": 19.08870506286621, "learning_rate": 9.97904761904762e-06, "loss": 35.9015, "step": 9685 }, { "epoch": 230.62089552238805, "grad_norm": 19.956871032714844, "learning_rate": 9.978095238095239e-06, "loss": 36.4668, "step": 9686 }, { "epoch": 230.644776119403, "grad_norm": 23.43463897705078, "learning_rate": 9.977142857142858e-06, "loss": 37.2856, "step": 9687 }, { "epoch": 230.6686567164179, "grad_norm": 22.461307525634766, "learning_rate": 9.976190476190477e-06, "loss": 36.2338, "step": 9688 }, { "epoch": 230.69253731343284, "grad_norm": NaN, "learning_rate": 9.975238095238095e-06, "loss": 35.0579, "step": 9689 }, { "epoch": 230.71641791044777, "grad_norm": 15.775413513183594, "learning_rate": 9.975238095238095e-06, "loss": 37.179, "step": 9690 }, { "epoch": 230.74029850746268, "grad_norm": 16.799991607666016, "learning_rate": 9.974285714285716e-06, "loss": 36.5493, "step": 9691 }, { "epoch": 230.76417910447762, "grad_norm": 19.954538345336914, "learning_rate": 9.973333333333333e-06, "loss": 36.9768, "step": 9692 }, { "epoch": 230.78805970149253, "grad_norm": 18.49732208251953, "learning_rate": 9.972380952380954e-06, "loss": 37.5158, "step": 9693 }, { "epoch": 230.81194029850747, "grad_norm": 12.87289047241211, "learning_rate": 9.971428571428571e-06, "loss": 35.1848, "step": 9694 }, { "epoch": 230.83582089552237, "grad_norm": 15.607162475585938, "learning_rate": 9.970476190476192e-06, "loss": 37.1183, "step": 9695 }, { "epoch": 230.8597014925373, "grad_norm": 14.40938949584961, "learning_rate": 9.96952380952381e-06, "loss": 35.9702, "step": 9696 }, { "epoch": 230.88358208955225, "grad_norm": 14.662040710449219, "learning_rate": 9.96857142857143e-06, "loss": 35.782, "step": 9697 }, { "epoch": 230.90746268656716, "grad_norm": 13.855502128601074, "learning_rate": 9.967619047619048e-06, "loss": 36.739, "step": 9698 }, { "epoch": 230.9313432835821, "grad_norm": 16.36407470703125, "learning_rate": 9.966666666666667e-06, "loss": 37.031, "step": 9699 }, { "epoch": 230.955223880597, "grad_norm": 15.637924194335938, "learning_rate": 9.965714285714286e-06, "loss": 37.6073, "step": 9700 }, { "epoch": 230.97910447761194, "grad_norm": 15.853774070739746, "learning_rate": 9.964761904761907e-06, "loss": 36.0398, "step": 9701 }, { "epoch": 231.0, "grad_norm": 15.01644229888916, "learning_rate": 9.963809523809524e-06, "loss": 31.3042, "step": 9702 }, { "epoch": 231.02388059701494, "grad_norm": 13.843478202819824, "learning_rate": 9.962857142857145e-06, "loss": 36.955, "step": 9703 }, { "epoch": 231.04776119402985, "grad_norm": 19.220901489257812, "learning_rate": 9.961904761904763e-06, "loss": 37.508, "step": 9704 }, { "epoch": 231.07164179104478, "grad_norm": 15.885614395141602, "learning_rate": 9.960952380952382e-06, "loss": 37.5109, "step": 9705 }, { "epoch": 231.0955223880597, "grad_norm": 20.546024322509766, "learning_rate": 9.960000000000001e-06, "loss": 35.8497, "step": 9706 }, { "epoch": 231.11940298507463, "grad_norm": 15.986220359802246, "learning_rate": 9.95904761904762e-06, "loss": 36.7785, "step": 9707 }, { "epoch": 231.14328358208957, "grad_norm": 17.797225952148438, "learning_rate": 9.958095238095239e-06, "loss": 36.2075, "step": 9708 }, { "epoch": 231.16716417910447, "grad_norm": 17.886463165283203, "learning_rate": 9.957142857142858e-06, "loss": 35.7369, "step": 9709 }, { "epoch": 231.1910447761194, "grad_norm": 20.580068588256836, "learning_rate": 9.956190476190477e-06, "loss": 36.6398, "step": 9710 }, { "epoch": 231.21492537313432, "grad_norm": 17.89637565612793, "learning_rate": 9.955238095238096e-06, "loss": 36.6353, "step": 9711 }, { "epoch": 231.23880597014926, "grad_norm": 16.90445899963379, "learning_rate": 9.954285714285715e-06, "loss": 36.2476, "step": 9712 }, { "epoch": 231.26268656716417, "grad_norm": 14.622599601745605, "learning_rate": 9.953333333333333e-06, "loss": 35.352, "step": 9713 }, { "epoch": 231.2865671641791, "grad_norm": 17.468870162963867, "learning_rate": 9.952380952380954e-06, "loss": 35.306, "step": 9714 }, { "epoch": 231.31044776119404, "grad_norm": 16.23251724243164, "learning_rate": 9.951428571428571e-06, "loss": 36.6746, "step": 9715 }, { "epoch": 231.33432835820895, "grad_norm": 16.193260192871094, "learning_rate": 9.950476190476192e-06, "loss": 37.1332, "step": 9716 }, { "epoch": 231.3582089552239, "grad_norm": 13.897563934326172, "learning_rate": 9.94952380952381e-06, "loss": 35.1147, "step": 9717 }, { "epoch": 231.3820895522388, "grad_norm": 17.120468139648438, "learning_rate": 9.94857142857143e-06, "loss": 37.01, "step": 9718 }, { "epoch": 231.40597014925373, "grad_norm": 14.881387710571289, "learning_rate": 9.947619047619049e-06, "loss": 35.7699, "step": 9719 }, { "epoch": 231.42985074626867, "grad_norm": 14.89230728149414, "learning_rate": 9.946666666666667e-06, "loss": 35.4544, "step": 9720 }, { "epoch": 231.45373134328358, "grad_norm": 12.17119026184082, "learning_rate": 9.945714285714286e-06, "loss": 35.3585, "step": 9721 }, { "epoch": 231.47761194029852, "grad_norm": 19.992794036865234, "learning_rate": 9.944761904761905e-06, "loss": 35.4771, "step": 9722 }, { "epoch": 231.50149253731342, "grad_norm": 20.673452377319336, "learning_rate": 9.943809523809524e-06, "loss": 36.3602, "step": 9723 }, { "epoch": 231.52537313432836, "grad_norm": 18.165555953979492, "learning_rate": 9.942857142857145e-06, "loss": 37.6295, "step": 9724 }, { "epoch": 231.54925373134327, "grad_norm": 13.455978393554688, "learning_rate": 9.941904761904762e-06, "loss": 36.6793, "step": 9725 }, { "epoch": 231.5731343283582, "grad_norm": 16.61351776123047, "learning_rate": 9.940952380952382e-06, "loss": 36.8213, "step": 9726 }, { "epoch": 231.59701492537314, "grad_norm": 17.952800750732422, "learning_rate": 9.940000000000001e-06, "loss": 36.6038, "step": 9727 }, { "epoch": 231.62089552238805, "grad_norm": 20.534774780273438, "learning_rate": 9.93904761904762e-06, "loss": 36.165, "step": 9728 }, { "epoch": 231.644776119403, "grad_norm": 14.053009033203125, "learning_rate": 9.93809523809524e-06, "loss": 35.3623, "step": 9729 }, { "epoch": 231.6686567164179, "grad_norm": 22.43195152282715, "learning_rate": 9.937142857142858e-06, "loss": 36.9735, "step": 9730 }, { "epoch": 231.69253731343284, "grad_norm": 21.216463088989258, "learning_rate": 9.936190476190477e-06, "loss": 36.5385, "step": 9731 }, { "epoch": 231.71641791044777, "grad_norm": 16.259366989135742, "learning_rate": 9.935238095238096e-06, "loss": 36.7006, "step": 9732 }, { "epoch": 231.74029850746268, "grad_norm": 29.90939712524414, "learning_rate": 9.934285714285715e-06, "loss": 36.2747, "step": 9733 }, { "epoch": 231.76417910447762, "grad_norm": NaN, "learning_rate": 9.933333333333334e-06, "loss": 50.0571, "step": 9734 }, { "epoch": 231.78805970149253, "grad_norm": 18.765172958374023, "learning_rate": 9.933333333333334e-06, "loss": 36.055, "step": 9735 }, { "epoch": 231.81194029850747, "grad_norm": 33.186214447021484, "learning_rate": 9.932380952380953e-06, "loss": 37.1991, "step": 9736 }, { "epoch": 231.83582089552237, "grad_norm": 22.32003402709961, "learning_rate": 9.931428571428571e-06, "loss": 36.8529, "step": 9737 }, { "epoch": 231.8597014925373, "grad_norm": 34.36970520019531, "learning_rate": 9.930476190476192e-06, "loss": 37.8959, "step": 9738 }, { "epoch": 231.88358208955225, "grad_norm": 22.484411239624023, "learning_rate": 9.92952380952381e-06, "loss": 35.1009, "step": 9739 }, { "epoch": 231.90746268656716, "grad_norm": 35.59208679199219, "learning_rate": 9.92857142857143e-06, "loss": 38.0479, "step": 9740 }, { "epoch": 231.9313432835821, "grad_norm": 24.744535446166992, "learning_rate": 9.927619047619049e-06, "loss": 36.7969, "step": 9741 }, { "epoch": 231.955223880597, "grad_norm": 35.06355285644531, "learning_rate": 9.926666666666668e-06, "loss": 36.65, "step": 9742 }, { "epoch": 231.97910447761194, "grad_norm": 29.507776260375977, "learning_rate": 9.925714285714287e-06, "loss": 36.2103, "step": 9743 }, { "epoch": 232.0, "grad_norm": 31.731843948364258, "learning_rate": 9.924761904761905e-06, "loss": 30.5489, "step": 9744 }, { "epoch": 232.02388059701494, "grad_norm": 31.205385208129883, "learning_rate": 9.923809523809524e-06, "loss": 36.8333, "step": 9745 }, { "epoch": 232.04776119402985, "grad_norm": 28.050081253051758, "learning_rate": 9.922857142857145e-06, "loss": 36.6318, "step": 9746 }, { "epoch": 232.07164179104478, "grad_norm": 26.250442504882812, "learning_rate": 9.921904761904762e-06, "loss": 36.0135, "step": 9747 }, { "epoch": 232.0955223880597, "grad_norm": 27.33561897277832, "learning_rate": 9.920952380952383e-06, "loss": 36.6887, "step": 9748 }, { "epoch": 232.11940298507463, "grad_norm": 23.297338485717773, "learning_rate": 9.920000000000002e-06, "loss": 36.5706, "step": 9749 }, { "epoch": 232.14328358208957, "grad_norm": 31.878978729248047, "learning_rate": 9.91904761904762e-06, "loss": 34.7229, "step": 9750 }, { "epoch": 232.16716417910447, "grad_norm": NaN, "learning_rate": 9.91809523809524e-06, "loss": 30.7021, "step": 9751 }, { "epoch": 232.1910447761194, "grad_norm": 27.822471618652344, "learning_rate": 9.91809523809524e-06, "loss": 35.5529, "step": 9752 }, { "epoch": 232.21492537313432, "grad_norm": 28.86005973815918, "learning_rate": 9.917142857142857e-06, "loss": 36.7525, "step": 9753 }, { "epoch": 232.23880597014926, "grad_norm": 28.202646255493164, "learning_rate": 9.916190476190477e-06, "loss": 37.5697, "step": 9754 }, { "epoch": 232.26268656716417, "grad_norm": 31.08536720275879, "learning_rate": 9.915238095238096e-06, "loss": 36.452, "step": 9755 }, { "epoch": 232.2865671641791, "grad_norm": 27.056488037109375, "learning_rate": 9.914285714285715e-06, "loss": 36.5263, "step": 9756 }, { "epoch": 232.31044776119404, "grad_norm": 32.32611083984375, "learning_rate": 9.913333333333334e-06, "loss": 35.9751, "step": 9757 }, { "epoch": 232.33432835820895, "grad_norm": 29.796775817871094, "learning_rate": 9.912380952380953e-06, "loss": 36.5457, "step": 9758 }, { "epoch": 232.3582089552239, "grad_norm": 27.9810733795166, "learning_rate": 9.911428571428572e-06, "loss": 36.3721, "step": 9759 }, { "epoch": 232.3820895522388, "grad_norm": 25.292329788208008, "learning_rate": 9.910476190476192e-06, "loss": 35.6735, "step": 9760 }, { "epoch": 232.40597014925373, "grad_norm": 31.544578552246094, "learning_rate": 9.90952380952381e-06, "loss": 35.9049, "step": 9761 }, { "epoch": 232.42985074626867, "grad_norm": 26.03896713256836, "learning_rate": 9.90857142857143e-06, "loss": 35.0874, "step": 9762 }, { "epoch": 232.45373134328358, "grad_norm": 31.23866081237793, "learning_rate": 9.907619047619049e-06, "loss": 35.927, "step": 9763 }, { "epoch": 232.47761194029852, "grad_norm": 25.877737045288086, "learning_rate": 9.906666666666668e-06, "loss": 36.2758, "step": 9764 }, { "epoch": 232.50149253731342, "grad_norm": 28.32729721069336, "learning_rate": 9.905714285714287e-06, "loss": 35.9696, "step": 9765 }, { "epoch": 232.52537313432836, "grad_norm": 26.140939712524414, "learning_rate": 9.904761904761906e-06, "loss": 37.0429, "step": 9766 }, { "epoch": 232.54925373134327, "grad_norm": 29.77901268005371, "learning_rate": 9.903809523809524e-06, "loss": 36.6883, "step": 9767 }, { "epoch": 232.5731343283582, "grad_norm": 28.25278091430664, "learning_rate": 9.902857142857143e-06, "loss": 36.8882, "step": 9768 }, { "epoch": 232.59701492537314, "grad_norm": 30.905784606933594, "learning_rate": 9.901904761904762e-06, "loss": 37.1189, "step": 9769 }, { "epoch": 232.62089552238805, "grad_norm": 24.243087768554688, "learning_rate": 9.900952380952383e-06, "loss": 36.439, "step": 9770 }, { "epoch": 232.644776119403, "grad_norm": 28.366220474243164, "learning_rate": 9.9e-06, "loss": 35.9505, "step": 9771 }, { "epoch": 232.6686567164179, "grad_norm": 24.840042114257812, "learning_rate": 9.89904761904762e-06, "loss": 35.9767, "step": 9772 }, { "epoch": 232.69253731343284, "grad_norm": 27.562599182128906, "learning_rate": 9.89809523809524e-06, "loss": 37.1143, "step": 9773 }, { "epoch": 232.71641791044777, "grad_norm": 28.17928695678711, "learning_rate": 9.897142857142858e-06, "loss": 36.3648, "step": 9774 }, { "epoch": 232.74029850746268, "grad_norm": 26.700790405273438, "learning_rate": 9.896190476190477e-06, "loss": 36.998, "step": 9775 }, { "epoch": 232.76417910447762, "grad_norm": 24.507808685302734, "learning_rate": 9.895238095238096e-06, "loss": 36.2914, "step": 9776 }, { "epoch": 232.78805970149253, "grad_norm": 28.5314998626709, "learning_rate": 9.894285714285715e-06, "loss": 36.9364, "step": 9777 }, { "epoch": 232.81194029850747, "grad_norm": 23.566978454589844, "learning_rate": 9.893333333333334e-06, "loss": 37.4334, "step": 9778 }, { "epoch": 232.83582089552237, "grad_norm": 32.451316833496094, "learning_rate": 9.892380952380953e-06, "loss": 36.7729, "step": 9779 }, { "epoch": 232.8597014925373, "grad_norm": 28.89414405822754, "learning_rate": 9.891428571428572e-06, "loss": 36.2706, "step": 9780 }, { "epoch": 232.88358208955225, "grad_norm": 26.81434440612793, "learning_rate": 9.89047619047619e-06, "loss": 35.4961, "step": 9781 }, { "epoch": 232.90746268656716, "grad_norm": 24.25072479248047, "learning_rate": 9.88952380952381e-06, "loss": 36.3199, "step": 9782 }, { "epoch": 232.9313432835821, "grad_norm": 28.760164260864258, "learning_rate": 9.88857142857143e-06, "loss": 36.1606, "step": 9783 }, { "epoch": 232.955223880597, "grad_norm": 26.750282287597656, "learning_rate": 9.887619047619047e-06, "loss": 36.8096, "step": 9784 }, { "epoch": 232.97910447761194, "grad_norm": 29.684532165527344, "learning_rate": 9.886666666666668e-06, "loss": 35.3498, "step": 9785 }, { "epoch": 233.0, "grad_norm": 21.966087341308594, "learning_rate": 9.885714285714287e-06, "loss": 31.7937, "step": 9786 }, { "epoch": 233.02388059701494, "grad_norm": 26.098487854003906, "learning_rate": 9.884761904761906e-06, "loss": 35.8188, "step": 9787 }, { "epoch": 233.04776119402985, "grad_norm": 21.66274642944336, "learning_rate": 9.883809523809525e-06, "loss": 37.2063, "step": 9788 }, { "epoch": 233.07164179104478, "grad_norm": 26.546297073364258, "learning_rate": 9.882857142857144e-06, "loss": 36.5433, "step": 9789 }, { "epoch": 233.0955223880597, "grad_norm": 22.168907165527344, "learning_rate": 9.881904761904762e-06, "loss": 34.894, "step": 9790 }, { "epoch": 233.11940298507463, "grad_norm": 37.45732116699219, "learning_rate": 9.880952380952381e-06, "loss": 36.3476, "step": 9791 }, { "epoch": 233.14328358208957, "grad_norm": 26.97699546813965, "learning_rate": 9.88e-06, "loss": 35.4302, "step": 9792 }, { "epoch": 233.16716417910447, "grad_norm": 27.618061065673828, "learning_rate": 9.879047619047621e-06, "loss": 36.9248, "step": 9793 }, { "epoch": 233.1910447761194, "grad_norm": 27.036169052124023, "learning_rate": 9.878095238095238e-06, "loss": 35.6113, "step": 9794 }, { "epoch": 233.21492537313432, "grad_norm": 27.536331176757812, "learning_rate": 9.877142857142859e-06, "loss": 36.8181, "step": 9795 }, { "epoch": 233.23880597014926, "grad_norm": 24.680740356445312, "learning_rate": 9.876190476190478e-06, "loss": 37.4333, "step": 9796 }, { "epoch": 233.26268656716417, "grad_norm": 28.902076721191406, "learning_rate": 9.875238095238095e-06, "loss": 36.5323, "step": 9797 }, { "epoch": 233.2865671641791, "grad_norm": 27.464153289794922, "learning_rate": 9.874285714285715e-06, "loss": 35.7346, "step": 9798 }, { "epoch": 233.31044776119404, "grad_norm": 29.325603485107422, "learning_rate": 9.873333333333334e-06, "loss": 36.2152, "step": 9799 }, { "epoch": 233.33432835820895, "grad_norm": 27.650089263916016, "learning_rate": 9.872380952380953e-06, "loss": 36.6219, "step": 9800 }, { "epoch": 233.3582089552239, "grad_norm": 26.96459197998047, "learning_rate": 9.871428571428572e-06, "loss": 36.8084, "step": 9801 }, { "epoch": 233.3820895522388, "grad_norm": 23.522382736206055, "learning_rate": 9.870476190476191e-06, "loss": 36.5991, "step": 9802 }, { "epoch": 233.40597014925373, "grad_norm": 29.002304077148438, "learning_rate": 9.86952380952381e-06, "loss": 37.535, "step": 9803 }, { "epoch": 233.42985074626867, "grad_norm": 23.243408203125, "learning_rate": 9.86857142857143e-06, "loss": 36.7426, "step": 9804 }, { "epoch": 233.45373134328358, "grad_norm": 28.153141021728516, "learning_rate": 9.867619047619048e-06, "loss": 37.1516, "step": 9805 }, { "epoch": 233.47761194029852, "grad_norm": 25.485090255737305, "learning_rate": 9.866666666666668e-06, "loss": 36.4648, "step": 9806 }, { "epoch": 233.50149253731342, "grad_norm": 27.776960372924805, "learning_rate": 9.865714285714285e-06, "loss": 34.8475, "step": 9807 }, { "epoch": 233.52537313432836, "grad_norm": 26.18765640258789, "learning_rate": 9.864761904761906e-06, "loss": 36.1718, "step": 9808 }, { "epoch": 233.54925373134327, "grad_norm": 26.28923225402832, "learning_rate": 9.863809523809525e-06, "loss": 37.6577, "step": 9809 }, { "epoch": 233.5731343283582, "grad_norm": 24.869470596313477, "learning_rate": 9.862857142857144e-06, "loss": 35.8387, "step": 9810 }, { "epoch": 233.59701492537314, "grad_norm": 25.25719451904297, "learning_rate": 9.861904761904763e-06, "loss": 35.1382, "step": 9811 }, { "epoch": 233.62089552238805, "grad_norm": 23.99881935119629, "learning_rate": 9.860952380952382e-06, "loss": 36.8773, "step": 9812 }, { "epoch": 233.644776119403, "grad_norm": 26.899507522583008, "learning_rate": 9.86e-06, "loss": 36.1116, "step": 9813 }, { "epoch": 233.6686567164179, "grad_norm": 23.04312515258789, "learning_rate": 9.859047619047621e-06, "loss": 36.3473, "step": 9814 }, { "epoch": 233.69253731343284, "grad_norm": 27.696046829223633, "learning_rate": 9.858095238095238e-06, "loss": 36.6766, "step": 9815 }, { "epoch": 233.71641791044777, "grad_norm": 22.96893310546875, "learning_rate": 9.857142857142859e-06, "loss": 36.3639, "step": 9816 }, { "epoch": 233.74029850746268, "grad_norm": 27.865476608276367, "learning_rate": 9.856190476190478e-06, "loss": 35.4878, "step": 9817 }, { "epoch": 233.76417910447762, "grad_norm": 24.344972610473633, "learning_rate": 9.855238095238095e-06, "loss": 35.4155, "step": 9818 }, { "epoch": 233.78805970149253, "grad_norm": 26.939016342163086, "learning_rate": 9.854285714285716e-06, "loss": 37.4377, "step": 9819 }, { "epoch": 233.81194029850747, "grad_norm": 22.826608657836914, "learning_rate": 9.853333333333334e-06, "loss": 36.5141, "step": 9820 }, { "epoch": 233.83582089552237, "grad_norm": 25.923465728759766, "learning_rate": 9.852380952380953e-06, "loss": 35.8261, "step": 9821 }, { "epoch": 233.8597014925373, "grad_norm": 24.230087280273438, "learning_rate": 9.851428571428572e-06, "loss": 36.7688, "step": 9822 }, { "epoch": 233.88358208955225, "grad_norm": 27.065654754638672, "learning_rate": 9.850476190476191e-06, "loss": 35.6492, "step": 9823 }, { "epoch": 233.90746268656716, "grad_norm": 21.02733612060547, "learning_rate": 9.84952380952381e-06, "loss": 35.9388, "step": 9824 }, { "epoch": 233.9313432835821, "grad_norm": 26.16006088256836, "learning_rate": 9.848571428571429e-06, "loss": 35.9996, "step": 9825 }, { "epoch": 233.955223880597, "grad_norm": 20.3577938079834, "learning_rate": 9.847619047619048e-06, "loss": 37.1911, "step": 9826 }, { "epoch": 233.97910447761194, "grad_norm": 20.499956130981445, "learning_rate": 9.846666666666668e-06, "loss": 35.8032, "step": 9827 }, { "epoch": 234.0, "grad_norm": 19.88180160522461, "learning_rate": 9.845714285714286e-06, "loss": 31.2912, "step": 9828 }, { "epoch": 234.02388059701494, "grad_norm": 13.999608993530273, "learning_rate": 9.844761904761906e-06, "loss": 35.5007, "step": 9829 }, { "epoch": 234.04776119402985, "grad_norm": 25.21092987060547, "learning_rate": 9.843809523809525e-06, "loss": 35.4806, "step": 9830 }, { "epoch": 234.07164179104478, "grad_norm": 16.646089553833008, "learning_rate": 9.842857142857144e-06, "loss": 36.6435, "step": 9831 }, { "epoch": 234.0955223880597, "grad_norm": 25.66943359375, "learning_rate": 9.841904761904763e-06, "loss": 36.7818, "step": 9832 }, { "epoch": 234.11940298507463, "grad_norm": 20.8841495513916, "learning_rate": 9.840952380952382e-06, "loss": 36.266, "step": 9833 }, { "epoch": 234.14328358208957, "grad_norm": 21.753887176513672, "learning_rate": 9.84e-06, "loss": 35.8203, "step": 9834 }, { "epoch": 234.16716417910447, "grad_norm": 18.985937118530273, "learning_rate": 9.83904761904762e-06, "loss": 34.8694, "step": 9835 }, { "epoch": 234.1910447761194, "grad_norm": 17.199949264526367, "learning_rate": 9.838095238095238e-06, "loss": 37.2323, "step": 9836 }, { "epoch": 234.21492537313432, "grad_norm": 19.559226989746094, "learning_rate": 9.837142857142859e-06, "loss": 37.2003, "step": 9837 }, { "epoch": 234.23880597014926, "grad_norm": 16.026540756225586, "learning_rate": 9.836190476190476e-06, "loss": 35.7033, "step": 9838 }, { "epoch": 234.26268656716417, "grad_norm": 17.020076751708984, "learning_rate": 9.835238095238097e-06, "loss": 36.3054, "step": 9839 }, { "epoch": 234.2865671641791, "grad_norm": 18.18143081665039, "learning_rate": 9.834285714285716e-06, "loss": 36.8279, "step": 9840 }, { "epoch": 234.31044776119404, "grad_norm": 17.629560470581055, "learning_rate": 9.833333333333333e-06, "loss": 35.4263, "step": 9841 }, { "epoch": 234.33432835820895, "grad_norm": 14.195773124694824, "learning_rate": 9.832380952380954e-06, "loss": 36.3135, "step": 9842 }, { "epoch": 234.3582089552239, "grad_norm": 18.136837005615234, "learning_rate": 9.831428571428572e-06, "loss": 36.7588, "step": 9843 }, { "epoch": 234.3820895522388, "grad_norm": 17.47150993347168, "learning_rate": 9.830476190476191e-06, "loss": 36.3127, "step": 9844 }, { "epoch": 234.40597014925373, "grad_norm": 16.70725440979004, "learning_rate": 9.82952380952381e-06, "loss": 37.7066, "step": 9845 }, { "epoch": 234.42985074626867, "grad_norm": 19.580862045288086, "learning_rate": 9.828571428571429e-06, "loss": 36.304, "step": 9846 }, { "epoch": 234.45373134328358, "grad_norm": 20.10016441345215, "learning_rate": 9.827619047619048e-06, "loss": 35.801, "step": 9847 }, { "epoch": 234.47761194029852, "grad_norm": 18.814186096191406, "learning_rate": 9.826666666666667e-06, "loss": 36.9099, "step": 9848 }, { "epoch": 234.50149253731342, "grad_norm": 19.953445434570312, "learning_rate": 9.825714285714286e-06, "loss": 35.3052, "step": 9849 }, { "epoch": 234.52537313432836, "grad_norm": 18.7332763671875, "learning_rate": 9.824761904761906e-06, "loss": 36.9053, "step": 9850 }, { "epoch": 234.54925373134327, "grad_norm": 13.999094009399414, "learning_rate": 9.823809523809524e-06, "loss": 36.9181, "step": 9851 }, { "epoch": 234.5731343283582, "grad_norm": 18.283750534057617, "learning_rate": 9.822857142857144e-06, "loss": 36.444, "step": 9852 }, { "epoch": 234.59701492537314, "grad_norm": 22.197336196899414, "learning_rate": 9.821904761904763e-06, "loss": 35.4112, "step": 9853 }, { "epoch": 234.62089552238805, "grad_norm": 15.777986526489258, "learning_rate": 9.820952380952382e-06, "loss": 35.8148, "step": 9854 }, { "epoch": 234.644776119403, "grad_norm": 17.59519386291504, "learning_rate": 9.820000000000001e-06, "loss": 36.5398, "step": 9855 }, { "epoch": 234.6686567164179, "grad_norm": 19.24283790588379, "learning_rate": 9.81904761904762e-06, "loss": 35.8213, "step": 9856 }, { "epoch": 234.69253731343284, "grad_norm": 15.129947662353516, "learning_rate": 9.818095238095239e-06, "loss": 37.5821, "step": 9857 }, { "epoch": 234.71641791044777, "grad_norm": 19.374385833740234, "learning_rate": 9.81714285714286e-06, "loss": 36.8088, "step": 9858 }, { "epoch": 234.74029850746268, "grad_norm": 18.22612190246582, "learning_rate": 9.816190476190476e-06, "loss": 36.6973, "step": 9859 }, { "epoch": 234.76417910447762, "grad_norm": 16.317365646362305, "learning_rate": 9.815238095238097e-06, "loss": 37.3607, "step": 9860 }, { "epoch": 234.78805970149253, "grad_norm": 16.01597785949707, "learning_rate": 9.814285714285716e-06, "loss": 35.5199, "step": 9861 }, { "epoch": 234.81194029850747, "grad_norm": 17.750478744506836, "learning_rate": 9.813333333333333e-06, "loss": 35.7935, "step": 9862 }, { "epoch": 234.83582089552237, "grad_norm": 13.788301467895508, "learning_rate": 9.812380952380954e-06, "loss": 36.5824, "step": 9863 }, { "epoch": 234.8597014925373, "grad_norm": 17.28419303894043, "learning_rate": 9.811428571428571e-06, "loss": 36.2316, "step": 9864 }, { "epoch": 234.88358208955225, "grad_norm": 15.71476936340332, "learning_rate": 9.810476190476191e-06, "loss": 36.9027, "step": 9865 }, { "epoch": 234.90746268656716, "grad_norm": 20.27968406677246, "learning_rate": 9.80952380952381e-06, "loss": 35.868, "step": 9866 }, { "epoch": 234.9313432835821, "grad_norm": 16.62568473815918, "learning_rate": 9.80857142857143e-06, "loss": 36.1727, "step": 9867 }, { "epoch": 234.955223880597, "grad_norm": 21.846633911132812, "learning_rate": 9.807619047619048e-06, "loss": 35.1444, "step": 9868 }, { "epoch": 234.97910447761194, "grad_norm": 16.76075553894043, "learning_rate": 9.806666666666667e-06, "loss": 35.1602, "step": 9869 }, { "epoch": 235.0, "grad_norm": 16.464860916137695, "learning_rate": 9.805714285714286e-06, "loss": 32.9988, "step": 9870 }, { "epoch": 235.02388059701494, "grad_norm": 22.344356536865234, "learning_rate": 9.804761904761907e-06, "loss": 36.0721, "step": 9871 }, { "epoch": 235.04776119402985, "grad_norm": 16.429410934448242, "learning_rate": 9.803809523809524e-06, "loss": 35.5533, "step": 9872 }, { "epoch": 235.07164179104478, "grad_norm": 26.959598541259766, "learning_rate": 9.802857142857144e-06, "loss": 36.5131, "step": 9873 }, { "epoch": 235.0955223880597, "grad_norm": 19.566802978515625, "learning_rate": 9.801904761904763e-06, "loss": 35.2014, "step": 9874 }, { "epoch": 235.11940298507463, "grad_norm": 22.33953094482422, "learning_rate": 9.800952380952382e-06, "loss": 36.3257, "step": 9875 }, { "epoch": 235.14328358208957, "grad_norm": 19.54528045654297, "learning_rate": 9.800000000000001e-06, "loss": 36.3505, "step": 9876 }, { "epoch": 235.16716417910447, "grad_norm": 19.99598503112793, "learning_rate": 9.79904761904762e-06, "loss": 36.5828, "step": 9877 }, { "epoch": 235.1910447761194, "grad_norm": 22.05472755432129, "learning_rate": 9.798095238095239e-06, "loss": 36.444, "step": 9878 }, { "epoch": 235.21492537313432, "grad_norm": 17.299413681030273, "learning_rate": 9.797142857142858e-06, "loss": 36.8062, "step": 9879 }, { "epoch": 235.23880597014926, "grad_norm": 17.942642211914062, "learning_rate": 9.796190476190477e-06, "loss": 36.4663, "step": 9880 }, { "epoch": 235.26268656716417, "grad_norm": 18.936473846435547, "learning_rate": 9.795238095238097e-06, "loss": 36.2818, "step": 9881 }, { "epoch": 235.2865671641791, "grad_norm": 15.805120468139648, "learning_rate": 9.794285714285714e-06, "loss": 36.8603, "step": 9882 }, { "epoch": 235.31044776119404, "grad_norm": 18.38108253479004, "learning_rate": 9.793333333333333e-06, "loss": 36.0332, "step": 9883 }, { "epoch": 235.33432835820895, "grad_norm": 17.744592666625977, "learning_rate": 9.792380952380954e-06, "loss": 35.8128, "step": 9884 }, { "epoch": 235.3582089552239, "grad_norm": 15.176288604736328, "learning_rate": 9.791428571428571e-06, "loss": 36.1821, "step": 9885 }, { "epoch": 235.3820895522388, "grad_norm": 16.138187408447266, "learning_rate": 9.790476190476192e-06, "loss": 36.0667, "step": 9886 }, { "epoch": 235.40597014925373, "grad_norm": 15.533767700195312, "learning_rate": 9.78952380952381e-06, "loss": 36.667, "step": 9887 }, { "epoch": 235.42985074626867, "grad_norm": 14.583806991577148, "learning_rate": 9.78857142857143e-06, "loss": 35.383, "step": 9888 }, { "epoch": 235.45373134328358, "grad_norm": 19.801340103149414, "learning_rate": 9.787619047619048e-06, "loss": 35.8748, "step": 9889 }, { "epoch": 235.47761194029852, "grad_norm": 16.901988983154297, "learning_rate": 9.786666666666667e-06, "loss": 36.6985, "step": 9890 }, { "epoch": 235.50149253731342, "grad_norm": 17.895591735839844, "learning_rate": 9.785714285714286e-06, "loss": 36.9642, "step": 9891 }, { "epoch": 235.52537313432836, "grad_norm": 21.717445373535156, "learning_rate": 9.784761904761905e-06, "loss": 35.3154, "step": 9892 }, { "epoch": 235.54925373134327, "grad_norm": 15.22280216217041, "learning_rate": 9.783809523809524e-06, "loss": 35.9869, "step": 9893 }, { "epoch": 235.5731343283582, "grad_norm": 16.396968841552734, "learning_rate": 9.782857142857145e-06, "loss": 36.8867, "step": 9894 }, { "epoch": 235.59701492537314, "grad_norm": 19.450927734375, "learning_rate": 9.781904761904762e-06, "loss": 35.575, "step": 9895 }, { "epoch": 235.62089552238805, "grad_norm": 17.347875595092773, "learning_rate": 9.780952380952382e-06, "loss": 35.4163, "step": 9896 }, { "epoch": 235.644776119403, "grad_norm": 16.20336151123047, "learning_rate": 9.780000000000001e-06, "loss": 36.0011, "step": 9897 }, { "epoch": 235.6686567164179, "grad_norm": 17.76625633239746, "learning_rate": 9.77904761904762e-06, "loss": 37.8143, "step": 9898 }, { "epoch": 235.69253731343284, "grad_norm": 16.26557159423828, "learning_rate": 9.778095238095239e-06, "loss": 37.094, "step": 9899 }, { "epoch": 235.71641791044777, "grad_norm": 16.70842170715332, "learning_rate": 9.777142857142858e-06, "loss": 35.9713, "step": 9900 }, { "epoch": 235.74029850746268, "grad_norm": 15.71903133392334, "learning_rate": 9.776190476190477e-06, "loss": 36.8651, "step": 9901 }, { "epoch": 235.76417910447762, "grad_norm": 15.551657676696777, "learning_rate": 9.775238095238096e-06, "loss": 36.7437, "step": 9902 }, { "epoch": 235.78805970149253, "grad_norm": 15.175107955932617, "learning_rate": 9.774285714285715e-06, "loss": 36.5867, "step": 9903 }, { "epoch": 235.81194029850747, "grad_norm": 15.5099458694458, "learning_rate": 9.773333333333335e-06, "loss": 36.4173, "step": 9904 }, { "epoch": 235.83582089552237, "grad_norm": 16.476665496826172, "learning_rate": 9.772380952380952e-06, "loss": 36.1536, "step": 9905 }, { "epoch": 235.8597014925373, "grad_norm": 15.383299827575684, "learning_rate": 9.771428571428571e-06, "loss": 36.9653, "step": 9906 }, { "epoch": 235.88358208955225, "grad_norm": 15.790655136108398, "learning_rate": 9.770476190476192e-06, "loss": 36.1127, "step": 9907 }, { "epoch": 235.90746268656716, "grad_norm": 19.14923858642578, "learning_rate": 9.769523809523809e-06, "loss": 35.8418, "step": 9908 }, { "epoch": 235.9313432835821, "grad_norm": 17.147768020629883, "learning_rate": 9.76857142857143e-06, "loss": 36.0894, "step": 9909 }, { "epoch": 235.955223880597, "grad_norm": 16.384170532226562, "learning_rate": 9.767619047619049e-06, "loss": 35.4527, "step": 9910 }, { "epoch": 235.97910447761194, "grad_norm": 21.327133178710938, "learning_rate": 9.766666666666667e-06, "loss": 36.346, "step": 9911 }, { "epoch": 236.0, "grad_norm": 16.97562599182129, "learning_rate": 9.765714285714286e-06, "loss": 31.1273, "step": 9912 }, { "epoch": 236.02388059701494, "grad_norm": 16.29657554626465, "learning_rate": 9.764761904761905e-06, "loss": 37.1635, "step": 9913 }, { "epoch": 236.04776119402985, "grad_norm": 22.797019958496094, "learning_rate": 9.763809523809524e-06, "loss": 36.2798, "step": 9914 }, { "epoch": 236.07164179104478, "grad_norm": 15.591317176818848, "learning_rate": 9.762857142857145e-06, "loss": 35.9309, "step": 9915 }, { "epoch": 236.0955223880597, "grad_norm": 20.136259078979492, "learning_rate": 9.761904761904762e-06, "loss": 36.8937, "step": 9916 }, { "epoch": 236.11940298507463, "grad_norm": 16.15099334716797, "learning_rate": 9.760952380952383e-06, "loss": 34.3163, "step": 9917 }, { "epoch": 236.14328358208957, "grad_norm": 18.136035919189453, "learning_rate": 9.760000000000001e-06, "loss": 34.7456, "step": 9918 }, { "epoch": 236.16716417910447, "grad_norm": 12.46219253540039, "learning_rate": 9.75904761904762e-06, "loss": 36.1773, "step": 9919 }, { "epoch": 236.1910447761194, "grad_norm": 16.850460052490234, "learning_rate": 9.75809523809524e-06, "loss": 36.8295, "step": 9920 }, { "epoch": 236.21492537313432, "grad_norm": 18.266088485717773, "learning_rate": 9.757142857142858e-06, "loss": 35.9171, "step": 9921 }, { "epoch": 236.23880597014926, "grad_norm": 14.196195602416992, "learning_rate": 9.756190476190477e-06, "loss": 36.9765, "step": 9922 }, { "epoch": 236.26268656716417, "grad_norm": 19.22831153869629, "learning_rate": 9.755238095238096e-06, "loss": 37.2524, "step": 9923 }, { "epoch": 236.2865671641791, "grad_norm": 17.952713012695312, "learning_rate": 9.754285714285715e-06, "loss": 36.3808, "step": 9924 }, { "epoch": 236.31044776119404, "grad_norm": 12.137635231018066, "learning_rate": 9.753333333333335e-06, "loss": 36.627, "step": 9925 }, { "epoch": 236.33432835820895, "grad_norm": 18.9188289642334, "learning_rate": 9.752380952380953e-06, "loss": 36.0876, "step": 9926 }, { "epoch": 236.3582089552239, "grad_norm": 21.2408390045166, "learning_rate": 9.751428571428571e-06, "loss": 34.7637, "step": 9927 }, { "epoch": 236.3820895522388, "grad_norm": 13.691661834716797, "learning_rate": 9.750476190476192e-06, "loss": 36.667, "step": 9928 }, { "epoch": 236.40597014925373, "grad_norm": 17.704147338867188, "learning_rate": 9.74952380952381e-06, "loss": 35.2197, "step": 9929 }, { "epoch": 236.42985074626867, "grad_norm": 18.488739013671875, "learning_rate": 9.74857142857143e-06, "loss": 36.3589, "step": 9930 }, { "epoch": 236.45373134328358, "grad_norm": 16.424970626831055, "learning_rate": 9.747619047619049e-06, "loss": 37.2576, "step": 9931 }, { "epoch": 236.47761194029852, "grad_norm": 13.38017463684082, "learning_rate": 9.746666666666668e-06, "loss": 34.6685, "step": 9932 }, { "epoch": 236.50149253731342, "grad_norm": 19.310014724731445, "learning_rate": 9.745714285714287e-06, "loss": 36.7496, "step": 9933 }, { "epoch": 236.52537313432836, "grad_norm": 17.776432037353516, "learning_rate": 9.744761904761905e-06, "loss": 36.1067, "step": 9934 }, { "epoch": 236.54925373134327, "grad_norm": 17.462303161621094, "learning_rate": 9.743809523809524e-06, "loss": 35.9356, "step": 9935 }, { "epoch": 236.5731343283582, "grad_norm": 12.972521781921387, "learning_rate": 9.742857142857143e-06, "loss": 36.8305, "step": 9936 }, { "epoch": 236.59701492537314, "grad_norm": 19.28449249267578, "learning_rate": 9.741904761904762e-06, "loss": 37.2609, "step": 9937 }, { "epoch": 236.62089552238805, "grad_norm": 15.885763168334961, "learning_rate": 9.740952380952383e-06, "loss": 37.182, "step": 9938 }, { "epoch": 236.644776119403, "grad_norm": 19.846704483032227, "learning_rate": 9.74e-06, "loss": 35.3155, "step": 9939 }, { "epoch": 236.6686567164179, "grad_norm": 15.600286483764648, "learning_rate": 9.73904761904762e-06, "loss": 36.9865, "step": 9940 }, { "epoch": 236.69253731343284, "grad_norm": 15.757110595703125, "learning_rate": 9.73809523809524e-06, "loss": 35.2183, "step": 9941 }, { "epoch": 236.71641791044777, "grad_norm": 16.8973388671875, "learning_rate": 9.737142857142858e-06, "loss": 37.0222, "step": 9942 }, { "epoch": 236.74029850746268, "grad_norm": 16.777618408203125, "learning_rate": 9.736190476190477e-06, "loss": 36.3856, "step": 9943 }, { "epoch": 236.76417910447762, "grad_norm": 18.161388397216797, "learning_rate": 9.735238095238096e-06, "loss": 36.7569, "step": 9944 }, { "epoch": 236.78805970149253, "grad_norm": 16.15582847595215, "learning_rate": 9.734285714285715e-06, "loss": 36.9244, "step": 9945 }, { "epoch": 236.81194029850747, "grad_norm": 18.0284423828125, "learning_rate": 9.733333333333334e-06, "loss": 35.4237, "step": 9946 }, { "epoch": 236.83582089552237, "grad_norm": 16.975326538085938, "learning_rate": 9.732380952380953e-06, "loss": 35.6616, "step": 9947 }, { "epoch": 236.8597014925373, "grad_norm": 14.613351821899414, "learning_rate": 9.731428571428573e-06, "loss": 35.9004, "step": 9948 }, { "epoch": 236.88358208955225, "grad_norm": 18.295223236083984, "learning_rate": 9.73047619047619e-06, "loss": 35.7258, "step": 9949 }, { "epoch": 236.90746268656716, "grad_norm": 15.034687042236328, "learning_rate": 9.72952380952381e-06, "loss": 35.9824, "step": 9950 }, { "epoch": 236.9313432835821, "grad_norm": 17.6257381439209, "learning_rate": 9.72857142857143e-06, "loss": 35.0237, "step": 9951 }, { "epoch": 236.955223880597, "grad_norm": 16.51543426513672, "learning_rate": 9.727619047619047e-06, "loss": 35.6532, "step": 9952 }, { "epoch": 236.97910447761194, "grad_norm": 16.309326171875, "learning_rate": 9.726666666666668e-06, "loss": 36.6841, "step": 9953 }, { "epoch": 237.0, "grad_norm": 13.546600341796875, "learning_rate": 9.725714285714287e-06, "loss": 33.0049, "step": 9954 }, { "epoch": 237.02388059701494, "grad_norm": 15.856393814086914, "learning_rate": 9.724761904761906e-06, "loss": 35.8076, "step": 9955 }, { "epoch": 237.04776119402985, "grad_norm": 17.967824935913086, "learning_rate": 9.723809523809525e-06, "loss": 36.4542, "step": 9956 }, { "epoch": 237.07164179104478, "grad_norm": 18.078475952148438, "learning_rate": 9.722857142857143e-06, "loss": 36.9038, "step": 9957 }, { "epoch": 237.0955223880597, "grad_norm": 14.310022354125977, "learning_rate": 9.721904761904762e-06, "loss": 35.1829, "step": 9958 }, { "epoch": 237.11940298507463, "grad_norm": 27.364797592163086, "learning_rate": 9.720952380952381e-06, "loss": 35.9449, "step": 9959 }, { "epoch": 237.14328358208957, "grad_norm": 17.1212158203125, "learning_rate": 9.72e-06, "loss": 37.6461, "step": 9960 }, { "epoch": 237.16716417910447, "grad_norm": 26.285417556762695, "learning_rate": 9.71904761904762e-06, "loss": 34.547, "step": 9961 }, { "epoch": 237.1910447761194, "grad_norm": 19.969297409057617, "learning_rate": 9.718095238095238e-06, "loss": 35.9812, "step": 9962 }, { "epoch": 237.21492537313432, "grad_norm": 21.263275146484375, "learning_rate": 9.717142857142858e-06, "loss": 36.5746, "step": 9963 }, { "epoch": 237.23880597014926, "grad_norm": 26.24778938293457, "learning_rate": 9.716190476190477e-06, "loss": 35.2941, "step": 9964 }, { "epoch": 237.26268656716417, "grad_norm": 17.686864852905273, "learning_rate": 9.715238095238096e-06, "loss": 35.4527, "step": 9965 }, { "epoch": 237.2865671641791, "grad_norm": 27.805757522583008, "learning_rate": 9.714285714285715e-06, "loss": 36.5016, "step": 9966 }, { "epoch": 237.31044776119404, "grad_norm": 19.451583862304688, "learning_rate": 9.713333333333334e-06, "loss": 35.4458, "step": 9967 }, { "epoch": 237.33432835820895, "grad_norm": 25.279926300048828, "learning_rate": 9.712380952380953e-06, "loss": 35.3307, "step": 9968 }, { "epoch": 237.3582089552239, "grad_norm": 19.426441192626953, "learning_rate": 9.711428571428574e-06, "loss": 35.482, "step": 9969 }, { "epoch": 237.3820895522388, "grad_norm": 22.436826705932617, "learning_rate": 9.71047619047619e-06, "loss": 36.923, "step": 9970 }, { "epoch": 237.40597014925373, "grad_norm": 22.07547950744629, "learning_rate": 9.70952380952381e-06, "loss": 36.1381, "step": 9971 }, { "epoch": 237.42985074626867, "grad_norm": 17.01164436340332, "learning_rate": 9.70857142857143e-06, "loss": 35.5353, "step": 9972 }, { "epoch": 237.45373134328358, "grad_norm": 21.813730239868164, "learning_rate": 9.707619047619047e-06, "loss": 36.1357, "step": 9973 }, { "epoch": 237.47761194029852, "grad_norm": 22.933889389038086, "learning_rate": 9.706666666666668e-06, "loss": 35.8501, "step": 9974 }, { "epoch": 237.50149253731342, "grad_norm": 15.256769180297852, "learning_rate": 9.705714285714287e-06, "loss": 37.1791, "step": 9975 }, { "epoch": 237.52537313432836, "grad_norm": 19.60181427001953, "learning_rate": 9.704761904761906e-06, "loss": 36.2048, "step": 9976 }, { "epoch": 237.54925373134327, "grad_norm": 18.724287033081055, "learning_rate": 9.703809523809525e-06, "loss": 35.2301, "step": 9977 }, { "epoch": 237.5731343283582, "grad_norm": 15.75432014465332, "learning_rate": 9.702857142857144e-06, "loss": 36.4579, "step": 9978 }, { "epoch": 237.59701492537314, "grad_norm": 22.569398880004883, "learning_rate": 9.701904761904763e-06, "loss": 37.6286, "step": 9979 }, { "epoch": 237.62089552238805, "grad_norm": 18.756433486938477, "learning_rate": 9.700952380952381e-06, "loss": 37.0072, "step": 9980 }, { "epoch": 237.644776119403, "grad_norm": 15.693806648254395, "learning_rate": 9.7e-06, "loss": 36.7072, "step": 9981 }, { "epoch": 237.6686567164179, "grad_norm": 21.104143142700195, "learning_rate": 9.699047619047621e-06, "loss": 35.6147, "step": 9982 }, { "epoch": 237.69253731343284, "grad_norm": 18.04296875, "learning_rate": 9.698095238095238e-06, "loss": 36.7233, "step": 9983 }, { "epoch": 237.71641791044777, "grad_norm": 16.26370620727539, "learning_rate": 9.697142857142859e-06, "loss": 35.6375, "step": 9984 }, { "epoch": 237.74029850746268, "grad_norm": 14.69483470916748, "learning_rate": 9.696190476190478e-06, "loss": 35.7502, "step": 9985 }, { "epoch": 237.76417910447762, "grad_norm": 19.199661254882812, "learning_rate": 9.695238095238096e-06, "loss": 36.5614, "step": 9986 }, { "epoch": 237.78805970149253, "grad_norm": 15.763886451721191, "learning_rate": 9.694285714285715e-06, "loss": 36.1778, "step": 9987 }, { "epoch": 237.81194029850747, "grad_norm": 17.95053482055664, "learning_rate": 9.693333333333334e-06, "loss": 36.6272, "step": 9988 }, { "epoch": 237.83582089552237, "grad_norm": 15.827825546264648, "learning_rate": 9.692380952380953e-06, "loss": 36.4755, "step": 9989 }, { "epoch": 237.8597014925373, "grad_norm": 18.18069076538086, "learning_rate": 9.691428571428572e-06, "loss": 36.8826, "step": 9990 }, { "epoch": 237.88358208955225, "grad_norm": 17.14325523376465, "learning_rate": 9.690476190476191e-06, "loss": 37.1303, "step": 9991 }, { "epoch": 237.90746268656716, "grad_norm": 18.050926208496094, "learning_rate": 9.68952380952381e-06, "loss": 36.1544, "step": 9992 }, { "epoch": 237.9313432835821, "grad_norm": 17.636999130249023, "learning_rate": 9.688571428571429e-06, "loss": 36.0903, "step": 9993 }, { "epoch": 237.955223880597, "grad_norm": 15.995038986206055, "learning_rate": 9.687619047619048e-06, "loss": 35.2946, "step": 9994 }, { "epoch": 237.97910447761194, "grad_norm": 16.34441566467285, "learning_rate": 9.686666666666668e-06, "loss": 35.9815, "step": 9995 }, { "epoch": 238.0, "grad_norm": 14.362862586975098, "learning_rate": 9.685714285714285e-06, "loss": 31.6897, "step": 9996 }, { "epoch": 238.02388059701494, "grad_norm": 18.762998580932617, "learning_rate": 9.684761904761906e-06, "loss": 36.6022, "step": 9997 }, { "epoch": 238.04776119402985, "grad_norm": 19.1278133392334, "learning_rate": 9.683809523809525e-06, "loss": 35.4117, "step": 9998 }, { "epoch": 238.07164179104478, "grad_norm": 14.328730583190918, "learning_rate": 9.682857142857144e-06, "loss": 35.1485, "step": 9999 }, { "epoch": 238.0955223880597, "grad_norm": 19.069643020629883, "learning_rate": 9.681904761904763e-06, "loss": 36.3664, "step": 10000 }, { "epoch": 238.11940298507463, "grad_norm": 18.38530158996582, "learning_rate": 9.680952380952382e-06, "loss": 37.0273, "step": 10001 }, { "epoch": 238.14328358208957, "grad_norm": 18.504533767700195, "learning_rate": 9.68e-06, "loss": 35.2123, "step": 10002 }, { "epoch": 238.16716417910447, "grad_norm": 16.1649112701416, "learning_rate": 9.67904761904762e-06, "loss": 37.3847, "step": 10003 }, { "epoch": 238.1910447761194, "grad_norm": 18.605621337890625, "learning_rate": 9.678095238095238e-06, "loss": 36.542, "step": 10004 }, { "epoch": 238.21492537313432, "grad_norm": 18.89537239074707, "learning_rate": 9.677142857142859e-06, "loss": 35.8042, "step": 10005 }, { "epoch": 238.23880597014926, "grad_norm": 16.746604919433594, "learning_rate": 9.676190476190476e-06, "loss": 36.2674, "step": 10006 }, { "epoch": 238.26268656716417, "grad_norm": 20.042264938354492, "learning_rate": 9.675238095238097e-06, "loss": 37.1588, "step": 10007 }, { "epoch": 238.2865671641791, "grad_norm": 17.67850685119629, "learning_rate": 9.674285714285716e-06, "loss": 36.817, "step": 10008 }, { "epoch": 238.31044776119404, "grad_norm": 16.53818130493164, "learning_rate": 9.673333333333334e-06, "loss": 36.0362, "step": 10009 }, { "epoch": 238.33432835820895, "grad_norm": 16.945322036743164, "learning_rate": 9.672380952380953e-06, "loss": 36.4225, "step": 10010 }, { "epoch": 238.3582089552239, "grad_norm": 16.241308212280273, "learning_rate": 9.671428571428572e-06, "loss": 36.6693, "step": 10011 }, { "epoch": 238.3820895522388, "grad_norm": 21.750402450561523, "learning_rate": 9.670476190476191e-06, "loss": 37.7203, "step": 10012 }, { "epoch": 238.40597014925373, "grad_norm": 13.78532600402832, "learning_rate": 9.66952380952381e-06, "loss": 36.0114, "step": 10013 }, { "epoch": 238.42985074626867, "grad_norm": 14.291070938110352, "learning_rate": 9.668571428571429e-06, "loss": 35.7391, "step": 10014 }, { "epoch": 238.45373134328358, "grad_norm": 17.091083526611328, "learning_rate": 9.667619047619048e-06, "loss": 37.235, "step": 10015 }, { "epoch": 238.47761194029852, "grad_norm": 18.903963088989258, "learning_rate": 9.666666666666667e-06, "loss": 35.3213, "step": 10016 }, { "epoch": 238.50149253731342, "grad_norm": 17.843887329101562, "learning_rate": 9.665714285714286e-06, "loss": 35.7046, "step": 10017 }, { "epoch": 238.52537313432836, "grad_norm": 16.645095825195312, "learning_rate": 9.664761904761906e-06, "loss": 36.8381, "step": 10018 }, { "epoch": 238.54925373134327, "grad_norm": 17.44959831237793, "learning_rate": 9.663809523809523e-06, "loss": 35.6872, "step": 10019 }, { "epoch": 238.5731343283582, "grad_norm": NaN, "learning_rate": 9.662857142857144e-06, "loss": 35.9578, "step": 10020 }, { "epoch": 238.59701492537314, "grad_norm": 17.336702346801758, "learning_rate": 9.662857142857144e-06, "loss": 37.0041, "step": 10021 }, { "epoch": 238.62089552238805, "grad_norm": 14.818805694580078, "learning_rate": 9.661904761904763e-06, "loss": 36.8163, "step": 10022 }, { "epoch": 238.644776119403, "grad_norm": 14.351116180419922, "learning_rate": 9.660952380952382e-06, "loss": 34.632, "step": 10023 }, { "epoch": 238.6686567164179, "grad_norm": 16.829465866088867, "learning_rate": 9.66e-06, "loss": 35.9618, "step": 10024 }, { "epoch": 238.69253731343284, "grad_norm": 18.949562072753906, "learning_rate": 9.65904761904762e-06, "loss": 37.1074, "step": 10025 }, { "epoch": 238.71641791044777, "grad_norm": 18.230022430419922, "learning_rate": 9.658095238095238e-06, "loss": 36.8739, "step": 10026 }, { "epoch": 238.74029850746268, "grad_norm": 14.652442932128906, "learning_rate": 9.657142857142859e-06, "loss": 35.2269, "step": 10027 }, { "epoch": 238.76417910447762, "grad_norm": 18.973373413085938, "learning_rate": 9.656190476190476e-06, "loss": 35.2157, "step": 10028 }, { "epoch": 238.78805970149253, "grad_norm": 20.56492042541504, "learning_rate": 9.655238095238097e-06, "loss": 35.1376, "step": 10029 }, { "epoch": 238.81194029850747, "grad_norm": 16.110862731933594, "learning_rate": 9.654285714285716e-06, "loss": 34.2289, "step": 10030 }, { "epoch": 238.83582089552237, "grad_norm": 13.740087509155273, "learning_rate": 9.653333333333335e-06, "loss": 34.809, "step": 10031 }, { "epoch": 238.8597014925373, "grad_norm": 23.522594451904297, "learning_rate": 9.652380952380954e-06, "loss": 36.9026, "step": 10032 }, { "epoch": 238.88358208955225, "grad_norm": 18.54004669189453, "learning_rate": 9.651428571428572e-06, "loss": 35.559, "step": 10033 }, { "epoch": 238.90746268656716, "grad_norm": 13.475213050842285, "learning_rate": 9.650476190476191e-06, "loss": 35.8268, "step": 10034 }, { "epoch": 238.9313432835821, "grad_norm": 16.256486892700195, "learning_rate": 9.64952380952381e-06, "loss": 36.4832, "step": 10035 }, { "epoch": 238.955223880597, "grad_norm": 17.91010284423828, "learning_rate": 9.648571428571429e-06, "loss": 36.0897, "step": 10036 }, { "epoch": 238.97910447761194, "grad_norm": 15.188232421875, "learning_rate": 9.647619047619048e-06, "loss": 35.4762, "step": 10037 }, { "epoch": 239.0, "grad_norm": 16.662385940551758, "learning_rate": 9.646666666666667e-06, "loss": 31.8471, "step": 10038 }, { "epoch": 239.02388059701494, "grad_norm": 17.247777938842773, "learning_rate": 9.645714285714286e-06, "loss": 35.1285, "step": 10039 }, { "epoch": 239.04776119402985, "grad_norm": 18.966320037841797, "learning_rate": 9.644761904761906e-06, "loss": 36.6666, "step": 10040 }, { "epoch": 239.07164179104478, "grad_norm": 23.5268611907959, "learning_rate": 9.643809523809524e-06, "loss": 37.0704, "step": 10041 }, { "epoch": 239.0955223880597, "grad_norm": 12.94433307647705, "learning_rate": 9.642857142857144e-06, "loss": 37.5156, "step": 10042 }, { "epoch": 239.11940298507463, "grad_norm": 24.97050666809082, "learning_rate": 9.641904761904763e-06, "loss": 35.2346, "step": 10043 }, { "epoch": 239.14328358208957, "grad_norm": 19.386783599853516, "learning_rate": 9.640952380952382e-06, "loss": 35.1441, "step": 10044 }, { "epoch": 239.16716417910447, "grad_norm": 17.285106658935547, "learning_rate": 9.640000000000001e-06, "loss": 36.4102, "step": 10045 }, { "epoch": 239.1910447761194, "grad_norm": 21.81467056274414, "learning_rate": 9.63904761904762e-06, "loss": 36.1376, "step": 10046 }, { "epoch": 239.21492537313432, "grad_norm": 18.64617919921875, "learning_rate": 9.638095238095239e-06, "loss": 35.7971, "step": 10047 }, { "epoch": 239.23880597014926, "grad_norm": 16.2796688079834, "learning_rate": 9.637142857142858e-06, "loss": 35.9561, "step": 10048 }, { "epoch": 239.26268656716417, "grad_norm": 20.983346939086914, "learning_rate": 9.636190476190476e-06, "loss": 37.1588, "step": 10049 }, { "epoch": 239.2865671641791, "grad_norm": 18.213701248168945, "learning_rate": 9.635238095238097e-06, "loss": 37.4883, "step": 10050 }, { "epoch": 239.31044776119404, "grad_norm": 19.881954193115234, "learning_rate": 9.634285714285714e-06, "loss": 36.383, "step": 10051 }, { "epoch": 239.33432835820895, "grad_norm": 14.353389739990234, "learning_rate": 9.633333333333335e-06, "loss": 35.5836, "step": 10052 }, { "epoch": 239.3582089552239, "grad_norm": 23.765892028808594, "learning_rate": 9.632380952380954e-06, "loss": 36.0893, "step": 10053 }, { "epoch": 239.3820895522388, "grad_norm": 16.295591354370117, "learning_rate": 9.631428571428573e-06, "loss": 35.1261, "step": 10054 }, { "epoch": 239.40597014925373, "grad_norm": 27.36651039123535, "learning_rate": 9.630476190476192e-06, "loss": 37.2512, "step": 10055 }, { "epoch": 239.42985074626867, "grad_norm": 20.327367782592773, "learning_rate": 9.62952380952381e-06, "loss": 36.1105, "step": 10056 }, { "epoch": 239.45373134328358, "grad_norm": 18.55379295349121, "learning_rate": 9.62857142857143e-06, "loss": 35.7757, "step": 10057 }, { "epoch": 239.47761194029852, "grad_norm": 20.76352310180664, "learning_rate": 9.627619047619048e-06, "loss": 36.8951, "step": 10058 }, { "epoch": 239.50149253731342, "grad_norm": 18.17314910888672, "learning_rate": 9.626666666666667e-06, "loss": 36.5615, "step": 10059 }, { "epoch": 239.52537313432836, "grad_norm": 23.08365249633789, "learning_rate": 9.625714285714286e-06, "loss": 35.4985, "step": 10060 }, { "epoch": 239.54925373134327, "grad_norm": 20.333942413330078, "learning_rate": 9.624761904761905e-06, "loss": 34.9326, "step": 10061 }, { "epoch": 239.5731343283582, "grad_norm": 19.267133712768555, "learning_rate": 9.623809523809524e-06, "loss": 36.3737, "step": 10062 }, { "epoch": 239.59701492537314, "grad_norm": 19.04371452331543, "learning_rate": 9.622857142857144e-06, "loss": 37.6879, "step": 10063 }, { "epoch": 239.62089552238805, "grad_norm": 22.713504791259766, "learning_rate": 9.621904761904762e-06, "loss": 34.4376, "step": 10064 }, { "epoch": 239.644776119403, "grad_norm": 17.133039474487305, "learning_rate": 9.620952380952382e-06, "loss": 37.9597, "step": 10065 }, { "epoch": 239.6686567164179, "grad_norm": 25.80938720703125, "learning_rate": 9.620000000000001e-06, "loss": 34.1879, "step": 10066 }, { "epoch": 239.69253731343284, "grad_norm": 20.448535919189453, "learning_rate": 9.61904761904762e-06, "loss": 36.6585, "step": 10067 }, { "epoch": 239.71641791044777, "grad_norm": 20.890085220336914, "learning_rate": 9.618095238095239e-06, "loss": 35.2319, "step": 10068 }, { "epoch": 239.74029850746268, "grad_norm": 29.210161209106445, "learning_rate": 9.617142857142858e-06, "loss": 35.5395, "step": 10069 }, { "epoch": 239.76417910447762, "grad_norm": 22.1795597076416, "learning_rate": 9.616190476190477e-06, "loss": 35.7438, "step": 10070 }, { "epoch": 239.78805970149253, "grad_norm": 27.816953659057617, "learning_rate": 9.615238095238096e-06, "loss": 36.204, "step": 10071 }, { "epoch": 239.81194029850747, "grad_norm": 26.449193954467773, "learning_rate": 9.614285714285714e-06, "loss": 36.5446, "step": 10072 }, { "epoch": 239.83582089552237, "grad_norm": 27.123445510864258, "learning_rate": 9.613333333333335e-06, "loss": 36.8264, "step": 10073 }, { "epoch": 239.8597014925373, "grad_norm": 17.471189498901367, "learning_rate": 9.612380952380952e-06, "loss": 35.4465, "step": 10074 }, { "epoch": 239.88358208955225, "grad_norm": 22.181690216064453, "learning_rate": 9.611428571428573e-06, "loss": 36.093, "step": 10075 }, { "epoch": 239.90746268656716, "grad_norm": 23.73118019104004, "learning_rate": 9.610476190476192e-06, "loss": 35.5252, "step": 10076 }, { "epoch": 239.9313432835821, "grad_norm": 17.87926483154297, "learning_rate": 9.60952380952381e-06, "loss": 35.8809, "step": 10077 }, { "epoch": 239.955223880597, "grad_norm": 25.186649322509766, "learning_rate": 9.60857142857143e-06, "loss": 35.1261, "step": 10078 }, { "epoch": 239.97910447761194, "grad_norm": 18.682348251342773, "learning_rate": 9.607619047619048e-06, "loss": 36.4747, "step": 10079 }, { "epoch": 240.0, "grad_norm": 14.701883316040039, "learning_rate": 9.606666666666667e-06, "loss": 30.8581, "step": 10080 }, { "epoch": 240.02388059701494, "grad_norm": 24.948392868041992, "learning_rate": 9.605714285714286e-06, "loss": 35.7126, "step": 10081 }, { "epoch": 240.04776119402985, "grad_norm": 16.947065353393555, "learning_rate": 9.604761904761905e-06, "loss": 36.3141, "step": 10082 }, { "epoch": 240.07164179104478, "grad_norm": 21.494142532348633, "learning_rate": 9.603809523809524e-06, "loss": 35.4828, "step": 10083 }, { "epoch": 240.0955223880597, "grad_norm": 26.018396377563477, "learning_rate": 9.602857142857145e-06, "loss": 36.1045, "step": 10084 }, { "epoch": 240.11940298507463, "grad_norm": 16.760507583618164, "learning_rate": 9.601904761904762e-06, "loss": 36.3321, "step": 10085 }, { "epoch": 240.14328358208957, "grad_norm": 28.821687698364258, "learning_rate": 9.600952380952382e-06, "loss": 35.164, "step": 10086 }, { "epoch": 240.16716417910447, "grad_norm": 23.643774032592773, "learning_rate": 9.600000000000001e-06, "loss": 36.3261, "step": 10087 }, { "epoch": 240.1910447761194, "grad_norm": 32.337833404541016, "learning_rate": 9.59904761904762e-06, "loss": 36.7649, "step": 10088 }, { "epoch": 240.21492537313432, "grad_norm": 19.09001350402832, "learning_rate": 9.598095238095239e-06, "loss": 36.0283, "step": 10089 }, { "epoch": 240.23880597014926, "grad_norm": 37.15082931518555, "learning_rate": 9.597142857142858e-06, "loss": 34.712, "step": 10090 }, { "epoch": 240.26268656716417, "grad_norm": 27.156841278076172, "learning_rate": 9.596190476190477e-06, "loss": 36.0739, "step": 10091 }, { "epoch": 240.2865671641791, "grad_norm": 40.636470794677734, "learning_rate": 9.595238095238096e-06, "loss": 36.5028, "step": 10092 }, { "epoch": 240.31044776119404, "grad_norm": 30.511669158935547, "learning_rate": 9.594285714285715e-06, "loss": 36.1824, "step": 10093 }, { "epoch": 240.33432835820895, "grad_norm": 35.35037612915039, "learning_rate": 9.593333333333335e-06, "loss": 36.3874, "step": 10094 }, { "epoch": 240.3582089552239, "grad_norm": 31.171030044555664, "learning_rate": 9.592380952380952e-06, "loss": 36.1055, "step": 10095 }, { "epoch": 240.3820895522388, "grad_norm": 28.244115829467773, "learning_rate": 9.591428571428573e-06, "loss": 36.846, "step": 10096 }, { "epoch": 240.40597014925373, "grad_norm": 25.969768524169922, "learning_rate": 9.590476190476192e-06, "loss": 34.7639, "step": 10097 }, { "epoch": 240.42985074626867, "grad_norm": 32.089637756347656, "learning_rate": 9.58952380952381e-06, "loss": 35.7644, "step": 10098 }, { "epoch": 240.45373134328358, "grad_norm": 26.51710319519043, "learning_rate": 9.58857142857143e-06, "loss": 35.1346, "step": 10099 }, { "epoch": 240.47761194029852, "grad_norm": 32.1282958984375, "learning_rate": 9.587619047619049e-06, "loss": 35.4712, "step": 10100 }, { "epoch": 240.50149253731342, "grad_norm": 25.199325561523438, "learning_rate": 9.586666666666667e-06, "loss": 36.1035, "step": 10101 }, { "epoch": 240.52537313432836, "grad_norm": 35.87451171875, "learning_rate": 9.585714285714286e-06, "loss": 37.1188, "step": 10102 }, { "epoch": 240.54925373134327, "grad_norm": 30.406360626220703, "learning_rate": 9.584761904761905e-06, "loss": 36.0955, "step": 10103 }, { "epoch": 240.5731343283582, "grad_norm": 32.87398147583008, "learning_rate": 9.583809523809524e-06, "loss": 36.6677, "step": 10104 }, { "epoch": 240.59701492537314, "grad_norm": 27.0983829498291, "learning_rate": 9.582857142857143e-06, "loss": 36.1804, "step": 10105 }, { "epoch": 240.62089552238805, "grad_norm": 29.340635299682617, "learning_rate": 9.581904761904762e-06, "loss": 37.3455, "step": 10106 }, { "epoch": 240.644776119403, "grad_norm": 26.483362197875977, "learning_rate": 9.580952380952383e-06, "loss": 36.3505, "step": 10107 }, { "epoch": 240.6686567164179, "grad_norm": 29.381149291992188, "learning_rate": 9.58e-06, "loss": 36.7096, "step": 10108 }, { "epoch": 240.69253731343284, "grad_norm": 25.296951293945312, "learning_rate": 9.57904761904762e-06, "loss": 35.6373, "step": 10109 }, { "epoch": 240.71641791044777, "grad_norm": 33.18749237060547, "learning_rate": 9.57809523809524e-06, "loss": 37.1643, "step": 10110 }, { "epoch": 240.74029850746268, "grad_norm": 27.313159942626953, "learning_rate": 9.577142857142858e-06, "loss": 35.2829, "step": 10111 }, { "epoch": 240.76417910447762, "grad_norm": 29.12482452392578, "learning_rate": 9.576190476190477e-06, "loss": 36.1714, "step": 10112 }, { "epoch": 240.78805970149253, "grad_norm": 26.63406753540039, "learning_rate": 9.575238095238096e-06, "loss": 35.2008, "step": 10113 }, { "epoch": 240.81194029850747, "grad_norm": 26.729719161987305, "learning_rate": 9.574285714285715e-06, "loss": 35.7582, "step": 10114 }, { "epoch": 240.83582089552237, "grad_norm": 23.687339782714844, "learning_rate": 9.573333333333334e-06, "loss": 34.6782, "step": 10115 }, { "epoch": 240.8597014925373, "grad_norm": 31.28934669494629, "learning_rate": 9.572380952380953e-06, "loss": 35.9475, "step": 10116 }, { "epoch": 240.88358208955225, "grad_norm": 27.232389450073242, "learning_rate": 9.571428571428573e-06, "loss": 36.1738, "step": 10117 }, { "epoch": 240.90746268656716, "grad_norm": 33.5478630065918, "learning_rate": 9.57047619047619e-06, "loss": 35.5291, "step": 10118 }, { "epoch": 240.9313432835821, "grad_norm": 31.18778419494629, "learning_rate": 9.569523809523811e-06, "loss": 35.8111, "step": 10119 }, { "epoch": 240.955223880597, "grad_norm": 28.460308074951172, "learning_rate": 9.56857142857143e-06, "loss": 36.3354, "step": 10120 }, { "epoch": 240.97910447761194, "grad_norm": 29.261287689208984, "learning_rate": 9.567619047619049e-06, "loss": 37.0578, "step": 10121 }, { "epoch": 241.0, "grad_norm": 22.982912063598633, "learning_rate": 9.566666666666668e-06, "loss": 31.6339, "step": 10122 }, { "epoch": 241.02388059701494, "grad_norm": 24.988357543945312, "learning_rate": 9.565714285714287e-06, "loss": 36.1846, "step": 10123 }, { "epoch": 241.04776119402985, "grad_norm": 30.265308380126953, "learning_rate": 9.564761904761905e-06, "loss": 35.8522, "step": 10124 }, { "epoch": 241.07164179104478, "grad_norm": 24.690147399902344, "learning_rate": 9.563809523809524e-06, "loss": 35.5594, "step": 10125 }, { "epoch": 241.0955223880597, "grad_norm": 32.83644485473633, "learning_rate": 9.562857142857143e-06, "loss": 35.4214, "step": 10126 }, { "epoch": 241.11940298507463, "grad_norm": 27.488298416137695, "learning_rate": 9.561904761904762e-06, "loss": 37.4046, "step": 10127 }, { "epoch": 241.14328358208957, "grad_norm": 26.66729164123535, "learning_rate": 9.560952380952381e-06, "loss": 36.687, "step": 10128 }, { "epoch": 241.16716417910447, "grad_norm": 27.678674697875977, "learning_rate": 9.56e-06, "loss": 35.4188, "step": 10129 }, { "epoch": 241.1910447761194, "grad_norm": 28.47911262512207, "learning_rate": 9.55904761904762e-06, "loss": 36.3526, "step": 10130 }, { "epoch": 241.21492537313432, "grad_norm": 25.199949264526367, "learning_rate": 9.558095238095238e-06, "loss": 34.3721, "step": 10131 }, { "epoch": 241.23880597014926, "grad_norm": 28.23126983642578, "learning_rate": 9.557142857142858e-06, "loss": 36.4467, "step": 10132 }, { "epoch": 241.26268656716417, "grad_norm": 28.039873123168945, "learning_rate": 9.556190476190477e-06, "loss": 36.2541, "step": 10133 }, { "epoch": 241.2865671641791, "grad_norm": 30.603967666625977, "learning_rate": 9.555238095238096e-06, "loss": 35.7894, "step": 10134 }, { "epoch": 241.31044776119404, "grad_norm": 24.964921951293945, "learning_rate": 9.554285714285715e-06, "loss": 34.9858, "step": 10135 }, { "epoch": 241.33432835820895, "grad_norm": 29.842731475830078, "learning_rate": 9.553333333333334e-06, "loss": 36.6908, "step": 10136 }, { "epoch": 241.3582089552239, "grad_norm": 28.306358337402344, "learning_rate": 9.552380952380953e-06, "loss": 36.9513, "step": 10137 }, { "epoch": 241.3820895522388, "grad_norm": 30.514263153076172, "learning_rate": 9.551428571428573e-06, "loss": 36.1615, "step": 10138 }, { "epoch": 241.40597014925373, "grad_norm": 27.851089477539062, "learning_rate": 9.55047619047619e-06, "loss": 36.9024, "step": 10139 }, { "epoch": 241.42985074626867, "grad_norm": 29.087785720825195, "learning_rate": 9.549523809523811e-06, "loss": 36.2207, "step": 10140 }, { "epoch": 241.45373134328358, "grad_norm": 23.997663497924805, "learning_rate": 9.54857142857143e-06, "loss": 34.5732, "step": 10141 }, { "epoch": 241.47761194029852, "grad_norm": 28.922880172729492, "learning_rate": 9.547619047619049e-06, "loss": 34.9478, "step": 10142 }, { "epoch": 241.50149253731342, "grad_norm": 23.746774673461914, "learning_rate": 9.546666666666668e-06, "loss": 35.9709, "step": 10143 }, { "epoch": 241.52537313432836, "grad_norm": 29.665267944335938, "learning_rate": 9.545714285714287e-06, "loss": 35.6752, "step": 10144 }, { "epoch": 241.54925373134327, "grad_norm": 25.615671157836914, "learning_rate": 9.544761904761906e-06, "loss": 35.5674, "step": 10145 }, { "epoch": 241.5731343283582, "grad_norm": 28.494916915893555, "learning_rate": 9.543809523809525e-06, "loss": 35.2555, "step": 10146 }, { "epoch": 241.59701492537314, "grad_norm": 25.82793426513672, "learning_rate": 9.542857142857143e-06, "loss": 35.0491, "step": 10147 }, { "epoch": 241.62089552238805, "grad_norm": 28.713882446289062, "learning_rate": 9.541904761904762e-06, "loss": 36.6035, "step": 10148 }, { "epoch": 241.644776119403, "grad_norm": 26.348346710205078, "learning_rate": 9.540952380952381e-06, "loss": 34.6533, "step": 10149 }, { "epoch": 241.6686567164179, "grad_norm": 29.28573989868164, "learning_rate": 9.54e-06, "loss": 36.938, "step": 10150 }, { "epoch": 241.69253731343284, "grad_norm": 26.416595458984375, "learning_rate": 9.53904761904762e-06, "loss": 37.1741, "step": 10151 }, { "epoch": 241.71641791044777, "grad_norm": 28.165164947509766, "learning_rate": 9.538095238095238e-06, "loss": 35.7796, "step": 10152 }, { "epoch": 241.74029850746268, "grad_norm": 28.025209426879883, "learning_rate": 9.537142857142859e-06, "loss": 35.9985, "step": 10153 }, { "epoch": 241.76417910447762, "grad_norm": 27.24212074279785, "learning_rate": 9.536190476190477e-06, "loss": 36.0308, "step": 10154 }, { "epoch": 241.78805970149253, "grad_norm": 23.920875549316406, "learning_rate": 9.535238095238096e-06, "loss": 36.5636, "step": 10155 }, { "epoch": 241.81194029850747, "grad_norm": 29.511032104492188, "learning_rate": 9.534285714285715e-06, "loss": 36.862, "step": 10156 }, { "epoch": 241.83582089552237, "grad_norm": 25.657102584838867, "learning_rate": 9.533333333333334e-06, "loss": 35.3501, "step": 10157 }, { "epoch": 241.8597014925373, "grad_norm": 28.469913482666016, "learning_rate": 9.532380952380953e-06, "loss": 35.3241, "step": 10158 }, { "epoch": 241.88358208955225, "grad_norm": 27.132144927978516, "learning_rate": 9.531428571428572e-06, "loss": 35.7612, "step": 10159 }, { "epoch": 241.90746268656716, "grad_norm": 26.227014541625977, "learning_rate": 9.53047619047619e-06, "loss": 36.5643, "step": 10160 }, { "epoch": 241.9313432835821, "grad_norm": 23.205352783203125, "learning_rate": 9.529523809523811e-06, "loss": 37.4714, "step": 10161 }, { "epoch": 241.955223880597, "grad_norm": 32.46830368041992, "learning_rate": 9.528571428571429e-06, "loss": 37.3917, "step": 10162 }, { "epoch": 241.97910447761194, "grad_norm": 26.595823287963867, "learning_rate": 9.52761904761905e-06, "loss": 34.6719, "step": 10163 }, { "epoch": 242.0, "grad_norm": 26.23459815979004, "learning_rate": 9.526666666666668e-06, "loss": 31.4156, "step": 10164 }, { "epoch": 242.02388059701494, "grad_norm": 27.86235237121582, "learning_rate": 9.525714285714287e-06, "loss": 37.0225, "step": 10165 }, { "epoch": 242.04776119402985, "grad_norm": 28.9195613861084, "learning_rate": 9.524761904761906e-06, "loss": 36.7197, "step": 10166 }, { "epoch": 242.07164179104478, "grad_norm": 25.50555419921875, "learning_rate": 9.523809523809525e-06, "loss": 34.8611, "step": 10167 }, { "epoch": 242.0955223880597, "grad_norm": 27.159404754638672, "learning_rate": 9.522857142857144e-06, "loss": 35.2628, "step": 10168 }, { "epoch": 242.11940298507463, "grad_norm": 26.802696228027344, "learning_rate": 9.521904761904763e-06, "loss": 36.4047, "step": 10169 }, { "epoch": 242.14328358208957, "grad_norm": 27.503740310668945, "learning_rate": 9.520952380952381e-06, "loss": 35.6765, "step": 10170 }, { "epoch": 242.16716417910447, "grad_norm": 26.80796241760254, "learning_rate": 9.52e-06, "loss": 35.8367, "step": 10171 }, { "epoch": 242.1910447761194, "grad_norm": 26.51300621032715, "learning_rate": 9.51904761904762e-06, "loss": 35.946, "step": 10172 }, { "epoch": 242.21492537313432, "grad_norm": 23.54167938232422, "learning_rate": 9.518095238095238e-06, "loss": 36.5757, "step": 10173 }, { "epoch": 242.23880597014926, "grad_norm": 28.93527603149414, "learning_rate": 9.517142857142859e-06, "loss": 35.1881, "step": 10174 }, { "epoch": 242.26268656716417, "grad_norm": 23.34199333190918, "learning_rate": 9.516190476190476e-06, "loss": 35.704, "step": 10175 }, { "epoch": 242.2865671641791, "grad_norm": 28.4584903717041, "learning_rate": 9.515238095238097e-06, "loss": 34.5494, "step": 10176 }, { "epoch": 242.31044776119404, "grad_norm": 24.635061264038086, "learning_rate": 9.514285714285715e-06, "loss": 35.07, "step": 10177 }, { "epoch": 242.33432835820895, "grad_norm": 27.54088020324707, "learning_rate": 9.513333333333334e-06, "loss": 36.0655, "step": 10178 }, { "epoch": 242.3582089552239, "grad_norm": 27.176959991455078, "learning_rate": 9.512380952380953e-06, "loss": 37.2354, "step": 10179 }, { "epoch": 242.3820895522388, "grad_norm": 28.35399627685547, "learning_rate": 9.511428571428572e-06, "loss": 36.5617, "step": 10180 }, { "epoch": 242.40597014925373, "grad_norm": 25.509428024291992, "learning_rate": 9.510476190476191e-06, "loss": 36.1441, "step": 10181 }, { "epoch": 242.42985074626867, "grad_norm": 26.95767593383789, "learning_rate": 9.50952380952381e-06, "loss": 35.866, "step": 10182 }, { "epoch": 242.45373134328358, "grad_norm": 27.304424285888672, "learning_rate": 9.508571428571429e-06, "loss": 35.9361, "step": 10183 }, { "epoch": 242.47761194029852, "grad_norm": 29.883323669433594, "learning_rate": 9.50761904761905e-06, "loss": 36.4334, "step": 10184 }, { "epoch": 242.50149253731342, "grad_norm": 25.83658218383789, "learning_rate": 9.506666666666667e-06, "loss": 34.6341, "step": 10185 }, { "epoch": 242.52537313432836, "grad_norm": 26.64058494567871, "learning_rate": 9.505714285714287e-06, "loss": 36.2131, "step": 10186 }, { "epoch": 242.54925373134327, "grad_norm": 24.996538162231445, "learning_rate": 9.504761904761906e-06, "loss": 36.6959, "step": 10187 }, { "epoch": 242.5731343283582, "grad_norm": 30.702699661254883, "learning_rate": 9.503809523809523e-06, "loss": 36.7857, "step": 10188 }, { "epoch": 242.59701492537314, "grad_norm": 25.07971954345703, "learning_rate": 9.502857142857144e-06, "loss": 34.2304, "step": 10189 }, { "epoch": 242.62089552238805, "grad_norm": 33.22471237182617, "learning_rate": 9.501904761904763e-06, "loss": 36.0073, "step": 10190 }, { "epoch": 242.644776119403, "grad_norm": 29.005470275878906, "learning_rate": 9.500952380952382e-06, "loss": 36.4248, "step": 10191 }, { "epoch": 242.6686567164179, "grad_norm": 25.369693756103516, "learning_rate": 9.5e-06, "loss": 36.308, "step": 10192 }, { "epoch": 242.69253731343284, "grad_norm": 23.734210968017578, "learning_rate": 9.49904761904762e-06, "loss": 35.9578, "step": 10193 }, { "epoch": 242.71641791044777, "grad_norm": 26.945390701293945, "learning_rate": 9.498095238095238e-06, "loss": 35.776, "step": 10194 }, { "epoch": 242.74029850746268, "grad_norm": 24.644451141357422, "learning_rate": 9.497142857142859e-06, "loss": 35.684, "step": 10195 }, { "epoch": 242.76417910447762, "grad_norm": 29.59189796447754, "learning_rate": 9.496190476190476e-06, "loss": 35.4032, "step": 10196 }, { "epoch": 242.78805970149253, "grad_norm": 25.79729461669922, "learning_rate": 9.495238095238097e-06, "loss": 37.3598, "step": 10197 }, { "epoch": 242.81194029850747, "grad_norm": 26.13395881652832, "learning_rate": 9.494285714285716e-06, "loss": 35.872, "step": 10198 }, { "epoch": 242.83582089552237, "grad_norm": 22.409400939941406, "learning_rate": 9.493333333333334e-06, "loss": 35.7373, "step": 10199 }, { "epoch": 242.8597014925373, "grad_norm": 29.817716598510742, "learning_rate": 9.492380952380953e-06, "loss": 37.0355, "step": 10200 }, { "epoch": 242.88358208955225, "grad_norm": 25.475627899169922, "learning_rate": 9.491428571428572e-06, "loss": 35.3912, "step": 10201 }, { "epoch": 242.90746268656716, "grad_norm": 29.206342697143555, "learning_rate": 9.490476190476191e-06, "loss": 35.9925, "step": 10202 }, { "epoch": 242.9313432835821, "grad_norm": 26.866357803344727, "learning_rate": 9.48952380952381e-06, "loss": 36.1207, "step": 10203 }, { "epoch": 242.955223880597, "grad_norm": 25.74506378173828, "learning_rate": 9.488571428571429e-06, "loss": 35.8876, "step": 10204 }, { "epoch": 242.97910447761194, "grad_norm": 23.352527618408203, "learning_rate": 9.48761904761905e-06, "loss": 35.6897, "step": 10205 }, { "epoch": 243.0, "grad_norm": 24.392724990844727, "learning_rate": 9.486666666666667e-06, "loss": 32.0426, "step": 10206 }, { "epoch": 243.02388059701494, "grad_norm": 24.063154220581055, "learning_rate": 9.485714285714287e-06, "loss": 35.2103, "step": 10207 }, { "epoch": 243.04776119402985, "grad_norm": 27.244365692138672, "learning_rate": 9.484761904761906e-06, "loss": 35.7363, "step": 10208 }, { "epoch": 243.07164179104478, "grad_norm": 24.47391128540039, "learning_rate": 9.483809523809525e-06, "loss": 35.8727, "step": 10209 }, { "epoch": 243.0955223880597, "grad_norm": 26.44282341003418, "learning_rate": 9.482857142857144e-06, "loss": 35.1092, "step": 10210 }, { "epoch": 243.11940298507463, "grad_norm": 21.45967674255371, "learning_rate": 9.481904761904763e-06, "loss": 36.1031, "step": 10211 }, { "epoch": 243.14328358208957, "grad_norm": 25.74978256225586, "learning_rate": 9.480952380952382e-06, "loss": 36.1324, "step": 10212 }, { "epoch": 243.16716417910447, "grad_norm": 22.984970092773438, "learning_rate": 9.48e-06, "loss": 36.8233, "step": 10213 }, { "epoch": 243.1910447761194, "grad_norm": 26.591062545776367, "learning_rate": 9.47904761904762e-06, "loss": 36.2472, "step": 10214 }, { "epoch": 243.21492537313432, "grad_norm": NaN, "learning_rate": 9.478095238095239e-06, "loss": 40.3855, "step": 10215 }, { "epoch": 243.23880597014926, "grad_norm": 24.373151779174805, "learning_rate": 9.478095238095239e-06, "loss": 36.6809, "step": 10216 }, { "epoch": 243.26268656716417, "grad_norm": 27.12587547302246, "learning_rate": 9.477142857142857e-06, "loss": 35.7168, "step": 10217 }, { "epoch": 243.2865671641791, "grad_norm": 22.10392951965332, "learning_rate": 9.476190476190476e-06, "loss": 37.373, "step": 10218 }, { "epoch": 243.31044776119404, "grad_norm": 25.559600830078125, "learning_rate": 9.475238095238097e-06, "loss": 34.4548, "step": 10219 }, { "epoch": 243.33432835820895, "grad_norm": 22.069833755493164, "learning_rate": 9.474285714285714e-06, "loss": 36.0303, "step": 10220 }, { "epoch": 243.3582089552239, "grad_norm": 23.599863052368164, "learning_rate": 9.473333333333335e-06, "loss": 36.3221, "step": 10221 }, { "epoch": 243.3820895522388, "grad_norm": 22.96292495727539, "learning_rate": 9.472380952380954e-06, "loss": 35.2564, "step": 10222 }, { "epoch": 243.40597014925373, "grad_norm": 23.840822219848633, "learning_rate": 9.471428571428572e-06, "loss": 35.5719, "step": 10223 }, { "epoch": 243.42985074626867, "grad_norm": 20.89339256286621, "learning_rate": 9.470476190476191e-06, "loss": 35.5866, "step": 10224 }, { "epoch": 243.45373134328358, "grad_norm": 23.84319496154785, "learning_rate": 9.46952380952381e-06, "loss": 34.2371, "step": 10225 }, { "epoch": 243.47761194029852, "grad_norm": 20.901281356811523, "learning_rate": 9.46857142857143e-06, "loss": 36.5407, "step": 10226 }, { "epoch": 243.50149253731342, "grad_norm": 25.31196403503418, "learning_rate": 9.467619047619048e-06, "loss": 36.6019, "step": 10227 }, { "epoch": 243.52537313432836, "grad_norm": 20.52994728088379, "learning_rate": 9.466666666666667e-06, "loss": 36.518, "step": 10228 }, { "epoch": 243.54925373134327, "grad_norm": 19.71125602722168, "learning_rate": 9.465714285714288e-06, "loss": 35.2616, "step": 10229 }, { "epoch": 243.5731343283582, "grad_norm": 18.73887825012207, "learning_rate": 9.464761904761905e-06, "loss": 36.5793, "step": 10230 }, { "epoch": 243.59701492537314, "grad_norm": 21.38459587097168, "learning_rate": 9.463809523809525e-06, "loss": 35.9661, "step": 10231 }, { "epoch": 243.62089552238805, "grad_norm": 18.351490020751953, "learning_rate": 9.462857142857144e-06, "loss": 36.1207, "step": 10232 }, { "epoch": 243.644776119403, "grad_norm": 23.784090042114258, "learning_rate": 9.461904761904761e-06, "loss": 35.976, "step": 10233 }, { "epoch": 243.6686567164179, "grad_norm": 20.061128616333008, "learning_rate": 9.460952380952382e-06, "loss": 35.8996, "step": 10234 }, { "epoch": 243.69253731343284, "grad_norm": 18.1326847076416, "learning_rate": 9.460000000000001e-06, "loss": 35.8992, "step": 10235 }, { "epoch": 243.71641791044777, "grad_norm": 20.553115844726562, "learning_rate": 9.45904761904762e-06, "loss": 36.8324, "step": 10236 }, { "epoch": 243.74029850746268, "grad_norm": 17.57961654663086, "learning_rate": 9.458095238095239e-06, "loss": 35.5464, "step": 10237 }, { "epoch": 243.76417910447762, "grad_norm": 20.132938385009766, "learning_rate": 9.457142857142858e-06, "loss": 35.262, "step": 10238 }, { "epoch": 243.78805970149253, "grad_norm": 17.194721221923828, "learning_rate": 9.456190476190476e-06, "loss": 35.5574, "step": 10239 }, { "epoch": 243.81194029850747, "grad_norm": 14.782204627990723, "learning_rate": 9.455238095238095e-06, "loss": 36.5212, "step": 10240 }, { "epoch": 243.83582089552237, "grad_norm": 19.932167053222656, "learning_rate": 9.454285714285714e-06, "loss": 35.4265, "step": 10241 }, { "epoch": 243.8597014925373, "grad_norm": 15.766868591308594, "learning_rate": 9.453333333333335e-06, "loss": 36.4644, "step": 10242 }, { "epoch": 243.88358208955225, "grad_norm": 14.553804397583008, "learning_rate": 9.452380952380952e-06, "loss": 36.093, "step": 10243 }, { "epoch": 243.90746268656716, "grad_norm": 14.554533958435059, "learning_rate": 9.451428571428573e-06, "loss": 35.6284, "step": 10244 }, { "epoch": 243.9313432835821, "grad_norm": 18.88169288635254, "learning_rate": 9.450476190476192e-06, "loss": 37.5024, "step": 10245 }, { "epoch": 243.955223880597, "grad_norm": 17.068729400634766, "learning_rate": 9.44952380952381e-06, "loss": 35.2893, "step": 10246 }, { "epoch": 243.97910447761194, "grad_norm": 19.206130981445312, "learning_rate": 9.44857142857143e-06, "loss": 35.7482, "step": 10247 }, { "epoch": 244.0, "grad_norm": 15.875373840332031, "learning_rate": 9.447619047619048e-06, "loss": 31.5993, "step": 10248 }, { "epoch": 244.02388059701494, "grad_norm": NaN, "learning_rate": 9.446666666666667e-06, "loss": 58.6215, "step": 10249 }, { "epoch": 244.04776119402985, "grad_norm": 18.51722526550293, "learning_rate": 9.446666666666667e-06, "loss": 35.8964, "step": 10250 }, { "epoch": 244.07164179104478, "grad_norm": 19.248676300048828, "learning_rate": 9.445714285714288e-06, "loss": 36.2713, "step": 10251 }, { "epoch": 244.0955223880597, "grad_norm": 17.334125518798828, "learning_rate": 9.444761904761905e-06, "loss": 35.1127, "step": 10252 }, { "epoch": 244.11940298507463, "grad_norm": 21.649171829223633, "learning_rate": 9.443809523809526e-06, "loss": 35.9074, "step": 10253 }, { "epoch": 244.14328358208957, "grad_norm": 16.352853775024414, "learning_rate": 9.442857142857144e-06, "loss": 36.4875, "step": 10254 }, { "epoch": 244.16716417910447, "grad_norm": 21.442800521850586, "learning_rate": 9.441904761904762e-06, "loss": 35.6998, "step": 10255 }, { "epoch": 244.1910447761194, "grad_norm": 19.510009765625, "learning_rate": 9.440952380952382e-06, "loss": 36.013, "step": 10256 }, { "epoch": 244.21492537313432, "grad_norm": 21.225709915161133, "learning_rate": 9.440000000000001e-06, "loss": 34.7438, "step": 10257 }, { "epoch": 244.23880597014926, "grad_norm": 19.475543975830078, "learning_rate": 9.43904761904762e-06, "loss": 35.4845, "step": 10258 }, { "epoch": 244.26268656716417, "grad_norm": 17.805879592895508, "learning_rate": 9.438095238095239e-06, "loss": 35.3803, "step": 10259 }, { "epoch": 244.2865671641791, "grad_norm": 18.243566513061523, "learning_rate": 9.437142857142858e-06, "loss": 35.433, "step": 10260 }, { "epoch": 244.31044776119404, "grad_norm": 19.33938217163086, "learning_rate": 9.436190476190477e-06, "loss": 36.3215, "step": 10261 }, { "epoch": 244.33432835820895, "grad_norm": 18.11855125427246, "learning_rate": 9.435238095238096e-06, "loss": 35.5869, "step": 10262 }, { "epoch": 244.3582089552239, "grad_norm": 20.2562255859375, "learning_rate": 9.434285714285714e-06, "loss": 36.9068, "step": 10263 }, { "epoch": 244.3820895522388, "grad_norm": 16.003639221191406, "learning_rate": 9.433333333333335e-06, "loss": 36.0697, "step": 10264 }, { "epoch": 244.40597014925373, "grad_norm": 23.99402618408203, "learning_rate": 9.432380952380952e-06, "loss": 35.1908, "step": 10265 }, { "epoch": 244.42985074626867, "grad_norm": 17.916046142578125, "learning_rate": 9.431428571428573e-06, "loss": 36.2081, "step": 10266 }, { "epoch": 244.45373134328358, "grad_norm": 18.749805450439453, "learning_rate": 9.430476190476192e-06, "loss": 36.253, "step": 10267 }, { "epoch": 244.47761194029852, "grad_norm": 20.116228103637695, "learning_rate": 9.42952380952381e-06, "loss": 36.5355, "step": 10268 }, { "epoch": 244.50149253731342, "grad_norm": 14.6526517868042, "learning_rate": 9.42857142857143e-06, "loss": 35.943, "step": 10269 }, { "epoch": 244.52537313432836, "grad_norm": 18.03446388244629, "learning_rate": 9.427619047619048e-06, "loss": 36.748, "step": 10270 }, { "epoch": 244.54925373134327, "grad_norm": 18.715557098388672, "learning_rate": 9.426666666666667e-06, "loss": 34.953, "step": 10271 }, { "epoch": 244.5731343283582, "grad_norm": 13.508959770202637, "learning_rate": 9.425714285714286e-06, "loss": 35.8598, "step": 10272 }, { "epoch": 244.59701492537314, "grad_norm": 15.250314712524414, "learning_rate": 9.424761904761905e-06, "loss": 36.003, "step": 10273 }, { "epoch": 244.62089552238805, "grad_norm": 18.034427642822266, "learning_rate": 9.423809523809526e-06, "loss": 36.7388, "step": 10274 }, { "epoch": 244.644776119403, "grad_norm": 16.868148803710938, "learning_rate": 9.422857142857143e-06, "loss": 36.4878, "step": 10275 }, { "epoch": 244.6686567164179, "grad_norm": 16.670799255371094, "learning_rate": 9.421904761904763e-06, "loss": 35.4029, "step": 10276 }, { "epoch": 244.69253731343284, "grad_norm": 18.17951202392578, "learning_rate": 9.420952380952382e-06, "loss": 35.147, "step": 10277 }, { "epoch": 244.71641791044777, "grad_norm": 18.339149475097656, "learning_rate": 9.42e-06, "loss": 36.0372, "step": 10278 }, { "epoch": 244.74029850746268, "grad_norm": 18.232181549072266, "learning_rate": 9.41904761904762e-06, "loss": 36.8727, "step": 10279 }, { "epoch": 244.76417910447762, "grad_norm": 13.441219329833984, "learning_rate": 9.418095238095239e-06, "loss": 35.9545, "step": 10280 }, { "epoch": 244.78805970149253, "grad_norm": 19.54502296447754, "learning_rate": 9.417142857142858e-06, "loss": 36.2901, "step": 10281 }, { "epoch": 244.81194029850747, "grad_norm": 15.229294776916504, "learning_rate": 9.416190476190477e-06, "loss": 36.0402, "step": 10282 }, { "epoch": 244.83582089552237, "grad_norm": 20.756439208984375, "learning_rate": 9.415238095238096e-06, "loss": 37.4198, "step": 10283 }, { "epoch": 244.8597014925373, "grad_norm": 17.38568115234375, "learning_rate": 9.414285714285715e-06, "loss": 35.5644, "step": 10284 }, { "epoch": 244.88358208955225, "grad_norm": 16.86087417602539, "learning_rate": 9.413333333333334e-06, "loss": 35.8171, "step": 10285 }, { "epoch": 244.90746268656716, "grad_norm": 18.812097549438477, "learning_rate": 9.412380952380952e-06, "loss": 35.5254, "step": 10286 }, { "epoch": 244.9313432835821, "grad_norm": 15.785964965820312, "learning_rate": 9.411428571428573e-06, "loss": 35.1337, "step": 10287 }, { "epoch": 244.955223880597, "grad_norm": 16.645219802856445, "learning_rate": 9.41047619047619e-06, "loss": 36.283, "step": 10288 }, { "epoch": 244.97910447761194, "grad_norm": 19.539493560791016, "learning_rate": 9.40952380952381e-06, "loss": 35.3269, "step": 10289 }, { "epoch": 245.0, "grad_norm": 14.891328811645508, "learning_rate": 9.40857142857143e-06, "loss": 30.4666, "step": 10290 }, { "epoch": 245.02388059701494, "grad_norm": 18.36530876159668, "learning_rate": 9.407619047619049e-06, "loss": 36.0385, "step": 10291 }, { "epoch": 245.04776119402985, "grad_norm": 18.795873641967773, "learning_rate": 9.406666666666668e-06, "loss": 34.3661, "step": 10292 }, { "epoch": 245.07164179104478, "grad_norm": 16.63825798034668, "learning_rate": 9.405714285714286e-06, "loss": 35.9693, "step": 10293 }, { "epoch": 245.0955223880597, "grad_norm": 19.72463035583496, "learning_rate": 9.404761904761905e-06, "loss": 37.4437, "step": 10294 }, { "epoch": 245.11940298507463, "grad_norm": 15.989143371582031, "learning_rate": 9.403809523809526e-06, "loss": 35.7794, "step": 10295 }, { "epoch": 245.14328358208957, "grad_norm": 16.804611206054688, "learning_rate": 9.402857142857143e-06, "loss": 35.8952, "step": 10296 }, { "epoch": 245.16716417910447, "grad_norm": 20.517375946044922, "learning_rate": 9.401904761904764e-06, "loss": 35.2546, "step": 10297 }, { "epoch": 245.1910447761194, "grad_norm": 15.119479179382324, "learning_rate": 9.400952380952381e-06, "loss": 36.0732, "step": 10298 }, { "epoch": 245.21492537313432, "grad_norm": 18.243501663208008, "learning_rate": 9.4e-06, "loss": 36.1616, "step": 10299 }, { "epoch": 245.23880597014926, "grad_norm": 19.40731430053711, "learning_rate": 9.39904761904762e-06, "loss": 37.3905, "step": 10300 }, { "epoch": 245.26268656716417, "grad_norm": 15.139904975891113, "learning_rate": 9.398095238095238e-06, "loss": 37.0587, "step": 10301 }, { "epoch": 245.2865671641791, "grad_norm": 17.99896812438965, "learning_rate": 9.397142857142858e-06, "loss": 35.2688, "step": 10302 }, { "epoch": 245.31044776119404, "grad_norm": 22.78380584716797, "learning_rate": 9.396190476190477e-06, "loss": 36.1511, "step": 10303 }, { "epoch": 245.33432835820895, "grad_norm": 15.877861976623535, "learning_rate": 9.395238095238096e-06, "loss": 36.2515, "step": 10304 }, { "epoch": 245.3582089552239, "grad_norm": 22.85430145263672, "learning_rate": 9.394285714285715e-06, "loss": 36.4375, "step": 10305 }, { "epoch": 245.3820895522388, "grad_norm": 19.368839263916016, "learning_rate": 9.393333333333334e-06, "loss": 35.9143, "step": 10306 }, { "epoch": 245.40597014925373, "grad_norm": 14.689555168151855, "learning_rate": 9.392380952380953e-06, "loss": 35.2567, "step": 10307 }, { "epoch": 245.42985074626867, "grad_norm": 29.445722579956055, "learning_rate": 9.391428571428573e-06, "loss": 34.6743, "step": 10308 }, { "epoch": 245.45373134328358, "grad_norm": NaN, "learning_rate": 9.39047619047619e-06, "loss": 64.4562, "step": 10309 }, { "epoch": 245.47761194029852, "grad_norm": 18.444366455078125, "learning_rate": 9.39047619047619e-06, "loss": 35.5516, "step": 10310 }, { "epoch": 245.50149253731342, "grad_norm": 31.422306060791016, "learning_rate": 9.389523809523811e-06, "loss": 36.2244, "step": 10311 }, { "epoch": 245.52537313432836, "grad_norm": 23.68085289001465, "learning_rate": 9.38857142857143e-06, "loss": 34.9389, "step": 10312 }, { "epoch": 245.54925373134327, "grad_norm": 32.45897674560547, "learning_rate": 9.387619047619049e-06, "loss": 36.2821, "step": 10313 }, { "epoch": 245.5731343283582, "grad_norm": 24.41595458984375, "learning_rate": 9.386666666666668e-06, "loss": 35.7378, "step": 10314 }, { "epoch": 245.59701492537314, "grad_norm": 30.536373138427734, "learning_rate": 9.385714285714287e-06, "loss": 36.2082, "step": 10315 }, { "epoch": 245.62089552238805, "grad_norm": 24.87826156616211, "learning_rate": 9.384761904761906e-06, "loss": 35.542, "step": 10316 }, { "epoch": 245.644776119403, "grad_norm": 33.00590896606445, "learning_rate": 9.383809523809524e-06, "loss": 36.2549, "step": 10317 }, { "epoch": 245.6686567164179, "grad_norm": 28.54401397705078, "learning_rate": 9.382857142857143e-06, "loss": 36.9198, "step": 10318 }, { "epoch": 245.69253731343284, "grad_norm": 26.909244537353516, "learning_rate": 9.381904761904764e-06, "loss": 35.9212, "step": 10319 }, { "epoch": 245.71641791044777, "grad_norm": 25.473735809326172, "learning_rate": 9.380952380952381e-06, "loss": 36.0517, "step": 10320 }, { "epoch": 245.74029850746268, "grad_norm": 29.483325958251953, "learning_rate": 9.38e-06, "loss": 35.915, "step": 10321 }, { "epoch": 245.76417910447762, "grad_norm": 25.776201248168945, "learning_rate": 9.37904761904762e-06, "loss": 35.5704, "step": 10322 }, { "epoch": 245.78805970149253, "grad_norm": 31.685558319091797, "learning_rate": 9.378095238095238e-06, "loss": 35.5814, "step": 10323 }, { "epoch": 245.81194029850747, "grad_norm": 28.644330978393555, "learning_rate": 9.377142857142858e-06, "loss": 36.7554, "step": 10324 }, { "epoch": 245.83582089552237, "grad_norm": 31.40442657470703, "learning_rate": 9.376190476190477e-06, "loss": 35.4414, "step": 10325 }, { "epoch": 245.8597014925373, "grad_norm": 27.47274398803711, "learning_rate": 9.375238095238096e-06, "loss": 36.1345, "step": 10326 }, { "epoch": 245.88358208955225, "grad_norm": 31.04277992248535, "learning_rate": 9.374285714285715e-06, "loss": 36.1506, "step": 10327 }, { "epoch": 245.90746268656716, "grad_norm": 27.77751922607422, "learning_rate": 9.373333333333334e-06, "loss": 36.4583, "step": 10328 }, { "epoch": 245.9313432835821, "grad_norm": 30.050209045410156, "learning_rate": 9.372380952380953e-06, "loss": 35.4964, "step": 10329 }, { "epoch": 245.955223880597, "grad_norm": 28.161046981811523, "learning_rate": 9.371428571428572e-06, "loss": 34.7306, "step": 10330 }, { "epoch": 245.97910447761194, "grad_norm": 26.274951934814453, "learning_rate": 9.37047619047619e-06, "loss": 34.5474, "step": 10331 }, { "epoch": 246.0, "grad_norm": 21.592979431152344, "learning_rate": 9.369523809523811e-06, "loss": 30.3718, "step": 10332 }, { "epoch": 246.02388059701494, "grad_norm": 27.67121696472168, "learning_rate": 9.368571428571428e-06, "loss": 35.0143, "step": 10333 }, { "epoch": 246.04776119402985, "grad_norm": 23.754695892333984, "learning_rate": 9.367619047619049e-06, "loss": 35.4082, "step": 10334 }, { "epoch": 246.07164179104478, "grad_norm": 31.148208618164062, "learning_rate": 9.366666666666668e-06, "loss": 35.0931, "step": 10335 }, { "epoch": 246.0955223880597, "grad_norm": 27.08707618713379, "learning_rate": 9.365714285714287e-06, "loss": 34.3186, "step": 10336 }, { "epoch": 246.11940298507463, "grad_norm": 30.51011085510254, "learning_rate": 9.364761904761906e-06, "loss": 34.9747, "step": 10337 }, { "epoch": 246.14328358208957, "grad_norm": 28.628009796142578, "learning_rate": 9.363809523809525e-06, "loss": 36.8224, "step": 10338 }, { "epoch": 246.16716417910447, "grad_norm": 27.875980377197266, "learning_rate": 9.362857142857143e-06, "loss": 35.1735, "step": 10339 }, { "epoch": 246.1910447761194, "grad_norm": 22.788734436035156, "learning_rate": 9.361904761904762e-06, "loss": 35.0024, "step": 10340 }, { "epoch": 246.21492537313432, "grad_norm": 29.667194366455078, "learning_rate": 9.360952380952381e-06, "loss": 36.5179, "step": 10341 }, { "epoch": 246.23880597014926, "grad_norm": 26.609294891357422, "learning_rate": 9.360000000000002e-06, "loss": 36.5552, "step": 10342 }, { "epoch": 246.26268656716417, "grad_norm": 28.798431396484375, "learning_rate": 9.359047619047619e-06, "loss": 35.0698, "step": 10343 }, { "epoch": 246.2865671641791, "grad_norm": 25.941505432128906, "learning_rate": 9.358095238095238e-06, "loss": 36.0461, "step": 10344 }, { "epoch": 246.31044776119404, "grad_norm": 26.68019676208496, "learning_rate": 9.357142857142859e-06, "loss": 35.2952, "step": 10345 }, { "epoch": 246.33432835820895, "grad_norm": 23.037126541137695, "learning_rate": 9.356190476190476e-06, "loss": 35.8529, "step": 10346 }, { "epoch": 246.3582089552239, "grad_norm": 30.258909225463867, "learning_rate": 9.355238095238096e-06, "loss": 35.8508, "step": 10347 }, { "epoch": 246.3820895522388, "grad_norm": 27.253101348876953, "learning_rate": 9.354285714285715e-06, "loss": 35.9056, "step": 10348 }, { "epoch": 246.40597014925373, "grad_norm": 29.530893325805664, "learning_rate": 9.353333333333334e-06, "loss": 35.328, "step": 10349 }, { "epoch": 246.42985074626867, "grad_norm": 28.249046325683594, "learning_rate": 9.352380952380953e-06, "loss": 35.0326, "step": 10350 }, { "epoch": 246.45373134328358, "grad_norm": 29.183799743652344, "learning_rate": 9.351428571428572e-06, "loss": 35.6106, "step": 10351 }, { "epoch": 246.47761194029852, "grad_norm": 26.009546279907227, "learning_rate": 9.35047619047619e-06, "loss": 36.1377, "step": 10352 }, { "epoch": 246.50149253731342, "grad_norm": 29.63404083251953, "learning_rate": 9.34952380952381e-06, "loss": 37.2717, "step": 10353 }, { "epoch": 246.52537313432836, "grad_norm": 26.10556411743164, "learning_rate": 9.348571428571429e-06, "loss": 37.3727, "step": 10354 }, { "epoch": 246.54925373134327, "grad_norm": 28.26673126220703, "learning_rate": 9.34761904761905e-06, "loss": 36.8424, "step": 10355 }, { "epoch": 246.5731343283582, "grad_norm": 25.901973724365234, "learning_rate": 9.346666666666666e-06, "loss": 36.1249, "step": 10356 }, { "epoch": 246.59701492537314, "grad_norm": 28.327436447143555, "learning_rate": 9.345714285714287e-06, "loss": 35.6474, "step": 10357 }, { "epoch": 246.62089552238805, "grad_norm": 26.3010311126709, "learning_rate": 9.344761904761906e-06, "loss": 35.104, "step": 10358 }, { "epoch": 246.644776119403, "grad_norm": 30.107545852661133, "learning_rate": 9.343809523809525e-06, "loss": 36.2501, "step": 10359 }, { "epoch": 246.6686567164179, "grad_norm": 28.717906951904297, "learning_rate": 9.342857142857144e-06, "loss": 35.7715, "step": 10360 }, { "epoch": 246.69253731343284, "grad_norm": 27.056331634521484, "learning_rate": 9.341904761904763e-06, "loss": 35.9851, "step": 10361 }, { "epoch": 246.71641791044777, "grad_norm": 23.271900177001953, "learning_rate": 9.340952380952381e-06, "loss": 36.2637, "step": 10362 }, { "epoch": 246.74029850746268, "grad_norm": 26.731822967529297, "learning_rate": 9.340000000000002e-06, "loss": 34.6707, "step": 10363 }, { "epoch": 246.76417910447762, "grad_norm": 22.142702102661133, "learning_rate": 9.33904761904762e-06, "loss": 36.4722, "step": 10364 }, { "epoch": 246.78805970149253, "grad_norm": 32.12749481201172, "learning_rate": 9.338095238095238e-06, "loss": 35.502, "step": 10365 }, { "epoch": 246.81194029850747, "grad_norm": 25.281442642211914, "learning_rate": 9.337142857142859e-06, "loss": 37.0552, "step": 10366 }, { "epoch": 246.83582089552237, "grad_norm": 26.85059928894043, "learning_rate": 9.336190476190476e-06, "loss": 35.6356, "step": 10367 }, { "epoch": 246.8597014925373, "grad_norm": 24.89397430419922, "learning_rate": 9.335238095238097e-06, "loss": 35.9212, "step": 10368 }, { "epoch": 246.88358208955225, "grad_norm": 29.490575790405273, "learning_rate": 9.334285714285715e-06, "loss": 37.0296, "step": 10369 }, { "epoch": 246.90746268656716, "grad_norm": 27.18348503112793, "learning_rate": 9.333333333333334e-06, "loss": 35.7107, "step": 10370 }, { "epoch": 246.9313432835821, "grad_norm": 25.741382598876953, "learning_rate": 9.332380952380953e-06, "loss": 35.7305, "step": 10371 }, { "epoch": 246.955223880597, "grad_norm": 24.733936309814453, "learning_rate": 9.331428571428572e-06, "loss": 35.7664, "step": 10372 }, { "epoch": 246.97910447761194, "grad_norm": 26.003232955932617, "learning_rate": 9.330476190476191e-06, "loss": 36.4173, "step": 10373 }, { "epoch": 247.0, "grad_norm": 18.96998405456543, "learning_rate": 9.32952380952381e-06, "loss": 30.9039, "step": 10374 }, { "epoch": 247.02388059701494, "grad_norm": 25.966978073120117, "learning_rate": 9.328571428571429e-06, "loss": 35.8479, "step": 10375 }, { "epoch": 247.04776119402985, "grad_norm": 26.212934494018555, "learning_rate": 9.32761904761905e-06, "loss": 36.9688, "step": 10376 }, { "epoch": 247.07164179104478, "grad_norm": 27.39366340637207, "learning_rate": 9.326666666666667e-06, "loss": 36.1078, "step": 10377 }, { "epoch": 247.0955223880597, "grad_norm": 24.970836639404297, "learning_rate": 9.325714285714287e-06, "loss": 36.078, "step": 10378 }, { "epoch": 247.11940298507463, "grad_norm": 25.114280700683594, "learning_rate": 9.324761904761906e-06, "loss": 35.7343, "step": 10379 }, { "epoch": 247.14328358208957, "grad_norm": 22.088726043701172, "learning_rate": 9.323809523809525e-06, "loss": 35.0877, "step": 10380 }, { "epoch": 247.16716417910447, "grad_norm": 30.845657348632812, "learning_rate": 9.322857142857144e-06, "loss": 36.3526, "step": 10381 }, { "epoch": 247.1910447761194, "grad_norm": 22.784645080566406, "learning_rate": 9.321904761904763e-06, "loss": 36.6681, "step": 10382 }, { "epoch": 247.21492537313432, "grad_norm": 29.72661018371582, "learning_rate": 9.320952380952382e-06, "loss": 35.3475, "step": 10383 }, { "epoch": 247.23880597014926, "grad_norm": 26.268980026245117, "learning_rate": 9.32e-06, "loss": 34.6149, "step": 10384 }, { "epoch": 247.26268656716417, "grad_norm": 26.10773468017578, "learning_rate": 9.31904761904762e-06, "loss": 35.4511, "step": 10385 }, { "epoch": 247.2865671641791, "grad_norm": 23.967748641967773, "learning_rate": 9.318095238095238e-06, "loss": 35.0177, "step": 10386 }, { "epoch": 247.31044776119404, "grad_norm": 29.59598159790039, "learning_rate": 9.317142857142857e-06, "loss": 36.5897, "step": 10387 }, { "epoch": 247.33432835820895, "grad_norm": 27.748294830322266, "learning_rate": 9.316190476190476e-06, "loss": 35.9019, "step": 10388 }, { "epoch": 247.3582089552239, "grad_norm": 21.567779541015625, "learning_rate": 9.315238095238097e-06, "loss": 36.0501, "step": 10389 }, { "epoch": 247.3820895522388, "grad_norm": 20.922203063964844, "learning_rate": 9.314285714285714e-06, "loss": 36.001, "step": 10390 }, { "epoch": 247.40597014925373, "grad_norm": 23.449371337890625, "learning_rate": 9.313333333333335e-06, "loss": 36.0758, "step": 10391 }, { "epoch": 247.42985074626867, "grad_norm": 20.626108169555664, "learning_rate": 9.312380952380953e-06, "loss": 35.4947, "step": 10392 }, { "epoch": 247.45373134328358, "grad_norm": 22.764175415039062, "learning_rate": 9.311428571428572e-06, "loss": 36.4139, "step": 10393 }, { "epoch": 247.47761194029852, "grad_norm": NaN, "learning_rate": 9.310476190476191e-06, "loss": 48.6844, "step": 10394 }, { "epoch": 247.50149253731342, "grad_norm": 18.010173797607422, "learning_rate": 9.310476190476191e-06, "loss": 35.2145, "step": 10395 }, { "epoch": 247.52537313432836, "grad_norm": 23.32284927368164, "learning_rate": 9.30952380952381e-06, "loss": 35.7392, "step": 10396 }, { "epoch": 247.54925373134327, "grad_norm": 16.008895874023438, "learning_rate": 9.308571428571429e-06, "loss": 35.3958, "step": 10397 }, { "epoch": 247.5731343283582, "grad_norm": 25.21518325805664, "learning_rate": 9.307619047619048e-06, "loss": 35.2711, "step": 10398 }, { "epoch": 247.59701492537314, "grad_norm": 20.520076751708984, "learning_rate": 9.306666666666667e-06, "loss": 37.6941, "step": 10399 }, { "epoch": 247.62089552238805, "grad_norm": 21.775188446044922, "learning_rate": 9.305714285714287e-06, "loss": 35.9768, "step": 10400 }, { "epoch": 247.644776119403, "grad_norm": 19.160938262939453, "learning_rate": 9.304761904761905e-06, "loss": 35.7068, "step": 10401 }, { "epoch": 247.6686567164179, "grad_norm": 23.554811477661133, "learning_rate": 9.303809523809525e-06, "loss": 35.8011, "step": 10402 }, { "epoch": 247.69253731343284, "grad_norm": 19.529022216796875, "learning_rate": 9.302857142857144e-06, "loss": 35.9624, "step": 10403 }, { "epoch": 247.71641791044777, "grad_norm": 22.15897560119629, "learning_rate": 9.301904761904763e-06, "loss": 34.4005, "step": 10404 }, { "epoch": 247.74029850746268, "grad_norm": 19.75330352783203, "learning_rate": 9.300952380952382e-06, "loss": 36.5212, "step": 10405 }, { "epoch": 247.76417910447762, "grad_norm": 20.824283599853516, "learning_rate": 9.3e-06, "loss": 35.4547, "step": 10406 }, { "epoch": 247.78805970149253, "grad_norm": 17.906557083129883, "learning_rate": 9.29904761904762e-06, "loss": 36.7554, "step": 10407 }, { "epoch": 247.81194029850747, "grad_norm": 25.11722183227539, "learning_rate": 9.29809523809524e-06, "loss": 36.1553, "step": 10408 }, { "epoch": 247.83582089552237, "grad_norm": 18.463071823120117, "learning_rate": 9.297142857142857e-06, "loss": 35.5073, "step": 10409 }, { "epoch": 247.8597014925373, "grad_norm": 23.776803970336914, "learning_rate": 9.296190476190476e-06, "loss": 36.1157, "step": 10410 }, { "epoch": 247.88358208955225, "grad_norm": 17.98375701904297, "learning_rate": 9.295238095238095e-06, "loss": 35.5557, "step": 10411 }, { "epoch": 247.90746268656716, "grad_norm": 21.57294273376465, "learning_rate": 9.294285714285714e-06, "loss": 35.1734, "step": 10412 }, { "epoch": 247.9313432835821, "grad_norm": 18.360673904418945, "learning_rate": 9.293333333333335e-06, "loss": 34.3661, "step": 10413 }, { "epoch": 247.955223880597, "grad_norm": 18.266700744628906, "learning_rate": 9.292380952380952e-06, "loss": 35.3242, "step": 10414 }, { "epoch": 247.97910447761194, "grad_norm": 16.929805755615234, "learning_rate": 9.291428571428572e-06, "loss": 35.7476, "step": 10415 }, { "epoch": 248.0, "grad_norm": 18.62027359008789, "learning_rate": 9.290476190476191e-06, "loss": 32.6399, "step": 10416 }, { "epoch": 248.02388059701494, "grad_norm": 17.48893165588379, "learning_rate": 9.28952380952381e-06, "loss": 34.2671, "step": 10417 }, { "epoch": 248.04776119402985, "grad_norm": 15.929790496826172, "learning_rate": 9.28857142857143e-06, "loss": 36.183, "step": 10418 }, { "epoch": 248.07164179104478, "grad_norm": 20.762374877929688, "learning_rate": 9.287619047619048e-06, "loss": 34.9601, "step": 10419 }, { "epoch": 248.0955223880597, "grad_norm": 17.352806091308594, "learning_rate": 9.286666666666667e-06, "loss": 34.1468, "step": 10420 }, { "epoch": 248.11940298507463, "grad_norm": 17.688629150390625, "learning_rate": 9.285714285714288e-06, "loss": 36.46, "step": 10421 }, { "epoch": 248.14328358208957, "grad_norm": 21.053203582763672, "learning_rate": 9.284761904761905e-06, "loss": 36.585, "step": 10422 }, { "epoch": 248.16716417910447, "grad_norm": 16.51744842529297, "learning_rate": 9.283809523809525e-06, "loss": 34.8284, "step": 10423 }, { "epoch": 248.1910447761194, "grad_norm": 15.983092308044434, "learning_rate": 9.282857142857144e-06, "loss": 35.8418, "step": 10424 }, { "epoch": 248.21492537313432, "grad_norm": 21.639421463012695, "learning_rate": 9.281904761904763e-06, "loss": 34.4315, "step": 10425 }, { "epoch": 248.23880597014926, "grad_norm": 15.690654754638672, "learning_rate": 9.280952380952382e-06, "loss": 35.8647, "step": 10426 }, { "epoch": 248.26268656716417, "grad_norm": 22.966873168945312, "learning_rate": 9.280000000000001e-06, "loss": 35.9926, "step": 10427 }, { "epoch": 248.2865671641791, "grad_norm": 17.881546020507812, "learning_rate": 9.27904761904762e-06, "loss": 35.0961, "step": 10428 }, { "epoch": 248.31044776119404, "grad_norm": 16.167945861816406, "learning_rate": 9.278095238095239e-06, "loss": 35.6078, "step": 10429 }, { "epoch": 248.33432835820895, "grad_norm": 23.028915405273438, "learning_rate": 9.277142857142858e-06, "loss": 36.3881, "step": 10430 }, { "epoch": 248.3582089552239, "grad_norm": 17.872678756713867, "learning_rate": 9.276190476190477e-06, "loss": 36.1854, "step": 10431 }, { "epoch": 248.3820895522388, "grad_norm": 16.718168258666992, "learning_rate": 9.275238095238095e-06, "loss": 36.5898, "step": 10432 }, { "epoch": 248.40597014925373, "grad_norm": 14.91796875, "learning_rate": 9.274285714285714e-06, "loss": 35.5027, "step": 10433 }, { "epoch": 248.42985074626867, "grad_norm": 17.224084854125977, "learning_rate": 9.273333333333335e-06, "loss": 36.627, "step": 10434 }, { "epoch": 248.45373134328358, "grad_norm": 14.895155906677246, "learning_rate": 9.272380952380952e-06, "loss": 36.2135, "step": 10435 }, { "epoch": 248.47761194029852, "grad_norm": 17.61219596862793, "learning_rate": 9.271428571428573e-06, "loss": 36.2376, "step": 10436 }, { "epoch": 248.50149253731342, "grad_norm": 17.631935119628906, "learning_rate": 9.270476190476192e-06, "loss": 36.7248, "step": 10437 }, { "epoch": 248.52537313432836, "grad_norm": 19.364099502563477, "learning_rate": 9.26952380952381e-06, "loss": 36.2454, "step": 10438 }, { "epoch": 248.54925373134327, "grad_norm": 14.401960372924805, "learning_rate": 9.26857142857143e-06, "loss": 36.7114, "step": 10439 }, { "epoch": 248.5731343283582, "grad_norm": 24.623205184936523, "learning_rate": 9.267619047619048e-06, "loss": 37.094, "step": 10440 }, { "epoch": 248.59701492537314, "grad_norm": 19.38271141052246, "learning_rate": 9.266666666666667e-06, "loss": 36.164, "step": 10441 }, { "epoch": 248.62089552238805, "grad_norm": 18.6229305267334, "learning_rate": 9.265714285714286e-06, "loss": 35.5167, "step": 10442 }, { "epoch": 248.644776119403, "grad_norm": 23.795459747314453, "learning_rate": 9.264761904761905e-06, "loss": 36.2195, "step": 10443 }, { "epoch": 248.6686567164179, "grad_norm": 14.197778701782227, "learning_rate": 9.263809523809526e-06, "loss": 34.8214, "step": 10444 }, { "epoch": 248.69253731343284, "grad_norm": 26.66496467590332, "learning_rate": 9.262857142857143e-06, "loss": 34.9951, "step": 10445 }, { "epoch": 248.71641791044777, "grad_norm": 21.354198455810547, "learning_rate": 9.261904761904763e-06, "loss": 36.153, "step": 10446 }, { "epoch": 248.74029850746268, "grad_norm": 24.006046295166016, "learning_rate": 9.260952380952382e-06, "loss": 36.0312, "step": 10447 }, { "epoch": 248.76417910447762, "grad_norm": 19.989986419677734, "learning_rate": 9.260000000000001e-06, "loss": 36.9065, "step": 10448 }, { "epoch": 248.78805970149253, "grad_norm": 21.58749771118164, "learning_rate": 9.25904761904762e-06, "loss": 35.464, "step": 10449 }, { "epoch": 248.81194029850747, "grad_norm": 19.249740600585938, "learning_rate": 9.258095238095239e-06, "loss": 34.9151, "step": 10450 }, { "epoch": 248.83582089552237, "grad_norm": 16.742633819580078, "learning_rate": 9.257142857142858e-06, "loss": 36.4266, "step": 10451 }, { "epoch": 248.8597014925373, "grad_norm": 20.563678741455078, "learning_rate": 9.256190476190477e-06, "loss": 35.2583, "step": 10452 }, { "epoch": 248.88358208955225, "grad_norm": 17.337919235229492, "learning_rate": 9.255238095238096e-06, "loss": 34.6552, "step": 10453 }, { "epoch": 248.90746268656716, "grad_norm": 18.023143768310547, "learning_rate": 9.254285714285714e-06, "loss": 35.7979, "step": 10454 }, { "epoch": 248.9313432835821, "grad_norm": 20.726966857910156, "learning_rate": 9.253333333333333e-06, "loss": 34.7366, "step": 10455 }, { "epoch": 248.955223880597, "grad_norm": 13.486516952514648, "learning_rate": 9.252380952380952e-06, "loss": 35.0271, "step": 10456 }, { "epoch": 248.97910447761194, "grad_norm": 19.241594314575195, "learning_rate": 9.251428571428573e-06, "loss": 36.4078, "step": 10457 }, { "epoch": 249.0, "grad_norm": 18.16057586669922, "learning_rate": 9.25047619047619e-06, "loss": 32.761, "step": 10458 }, { "epoch": 249.02388059701494, "grad_norm": 15.924449920654297, "learning_rate": 9.24952380952381e-06, "loss": 35.9239, "step": 10459 }, { "epoch": 249.04776119402985, "grad_norm": 17.457326889038086, "learning_rate": 9.24857142857143e-06, "loss": 36.6521, "step": 10460 }, { "epoch": 249.07164179104478, "grad_norm": 17.974369049072266, "learning_rate": 9.247619047619048e-06, "loss": 35.8246, "step": 10461 }, { "epoch": 249.0955223880597, "grad_norm": 18.67871856689453, "learning_rate": 9.246666666666667e-06, "loss": 35.4491, "step": 10462 }, { "epoch": 249.11940298507463, "grad_norm": 15.618583679199219, "learning_rate": 9.245714285714286e-06, "loss": 35.8043, "step": 10463 }, { "epoch": 249.14328358208957, "grad_norm": 16.679800033569336, "learning_rate": 9.244761904761905e-06, "loss": 36.6722, "step": 10464 }, { "epoch": 249.16716417910447, "grad_norm": 15.760109901428223, "learning_rate": 9.243809523809526e-06, "loss": 36.4263, "step": 10465 }, { "epoch": 249.1910447761194, "grad_norm": 14.788159370422363, "learning_rate": 9.242857142857143e-06, "loss": 36.8779, "step": 10466 }, { "epoch": 249.21492537313432, "grad_norm": 19.794042587280273, "learning_rate": 9.241904761904764e-06, "loss": 36.17, "step": 10467 }, { "epoch": 249.23880597014926, "grad_norm": 14.545011520385742, "learning_rate": 9.24095238095238e-06, "loss": 34.7776, "step": 10468 }, { "epoch": 249.26268656716417, "grad_norm": 17.406023025512695, "learning_rate": 9.240000000000001e-06, "loss": 35.3433, "step": 10469 }, { "epoch": 249.2865671641791, "grad_norm": 18.526445388793945, "learning_rate": 9.23904761904762e-06, "loss": 36.0128, "step": 10470 }, { "epoch": 249.31044776119404, "grad_norm": 20.413652420043945, "learning_rate": 9.238095238095239e-06, "loss": 36.7404, "step": 10471 }, { "epoch": 249.33432835820895, "grad_norm": 14.878579139709473, "learning_rate": 9.237142857142858e-06, "loss": 34.8491, "step": 10472 }, { "epoch": 249.3582089552239, "grad_norm": 25.159494400024414, "learning_rate": 9.236190476190477e-06, "loss": 35.5789, "step": 10473 }, { "epoch": 249.3820895522388, "grad_norm": 18.520427703857422, "learning_rate": 9.235238095238096e-06, "loss": 35.8261, "step": 10474 }, { "epoch": 249.40597014925373, "grad_norm": 18.790002822875977, "learning_rate": 9.234285714285715e-06, "loss": 34.3875, "step": 10475 }, { "epoch": 249.42985074626867, "grad_norm": 18.690185546875, "learning_rate": 9.233333333333334e-06, "loss": 36.4462, "step": 10476 }, { "epoch": 249.45373134328358, "grad_norm": 18.580333709716797, "learning_rate": 9.232380952380952e-06, "loss": 35.0818, "step": 10477 }, { "epoch": 249.47761194029852, "grad_norm": 16.21982765197754, "learning_rate": 9.231428571428573e-06, "loss": 35.0682, "step": 10478 }, { "epoch": 249.50149253731342, "grad_norm": 20.19671058654785, "learning_rate": 9.23047619047619e-06, "loss": 36.4464, "step": 10479 }, { "epoch": 249.52537313432836, "grad_norm": 15.847772598266602, "learning_rate": 9.229523809523811e-06, "loss": 35.3635, "step": 10480 }, { "epoch": 249.54925373134327, "grad_norm": 22.037715911865234, "learning_rate": 9.22857142857143e-06, "loss": 35.3782, "step": 10481 }, { "epoch": 249.5731343283582, "grad_norm": 16.708955764770508, "learning_rate": 9.227619047619049e-06, "loss": 35.9639, "step": 10482 }, { "epoch": 249.59701492537314, "grad_norm": 20.463565826416016, "learning_rate": 9.226666666666668e-06, "loss": 35.7338, "step": 10483 }, { "epoch": 249.62089552238805, "grad_norm": 20.576095581054688, "learning_rate": 9.225714285714286e-06, "loss": 34.4538, "step": 10484 }, { "epoch": 249.644776119403, "grad_norm": 13.691299438476562, "learning_rate": 9.224761904761905e-06, "loss": 35.6541, "step": 10485 }, { "epoch": 249.6686567164179, "grad_norm": 25.414104461669922, "learning_rate": 9.223809523809524e-06, "loss": 36.3277, "step": 10486 }, { "epoch": 249.69253731343284, "grad_norm": 18.99018096923828, "learning_rate": 9.222857142857143e-06, "loss": 34.9627, "step": 10487 }, { "epoch": 249.71641791044777, "grad_norm": 16.70145606994629, "learning_rate": 9.221904761904764e-06, "loss": 36.5231, "step": 10488 }, { "epoch": 249.74029850746268, "grad_norm": 27.659765243530273, "learning_rate": 9.220952380952381e-06, "loss": 35.7842, "step": 10489 }, { "epoch": 249.76417910447762, "grad_norm": 18.988645553588867, "learning_rate": 9.220000000000002e-06, "loss": 34.4095, "step": 10490 }, { "epoch": 249.78805970149253, "grad_norm": 30.256803512573242, "learning_rate": 9.21904761904762e-06, "loss": 34.4638, "step": 10491 }, { "epoch": 249.81194029850747, "grad_norm": 23.25464630126953, "learning_rate": 9.21809523809524e-06, "loss": 37.1966, "step": 10492 }, { "epoch": 249.83582089552237, "grad_norm": 32.919071197509766, "learning_rate": 9.217142857142858e-06, "loss": 36.1309, "step": 10493 }, { "epoch": 249.8597014925373, "grad_norm": 25.972665786743164, "learning_rate": 9.216190476190477e-06, "loss": 36.472, "step": 10494 }, { "epoch": 249.88358208955225, "grad_norm": 35.995391845703125, "learning_rate": 9.215238095238096e-06, "loss": 35.8965, "step": 10495 }, { "epoch": 249.90746268656716, "grad_norm": 34.837398529052734, "learning_rate": 9.214285714285715e-06, "loss": 36.5224, "step": 10496 }, { "epoch": 249.9313432835821, "grad_norm": 24.088912963867188, "learning_rate": 9.213333333333334e-06, "loss": 35.1959, "step": 10497 }, { "epoch": 249.955223880597, "grad_norm": 23.177581787109375, "learning_rate": 9.212380952380953e-06, "loss": 35.1953, "step": 10498 }, { "epoch": 249.97910447761194, "grad_norm": 28.00490951538086, "learning_rate": 9.211428571428572e-06, "loss": 36.7443, "step": 10499 }, { "epoch": 250.0, "grad_norm": 18.642913818359375, "learning_rate": 9.21047619047619e-06, "loss": 30.3419, "step": 10500 }, { "epoch": 250.0, "step": 10500, "total_flos": 5.161723630445509e+17, "train_loss": 2.888430085136777, "train_runtime": 25668.5876, "train_samples_per_second": 52.126, "train_steps_per_second": 0.409 }, { "epoch": 250.02388059701494, "grad_norm": 28.627452850341797, "learning_rate": 1e-05, "loss": 36.1543, "step": 10501 }, { "epoch": 250.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99908424908425e-06, "loss": 44.2863, "step": 10502 }, { "epoch": 250.07164179104478, "grad_norm": Infinity, "learning_rate": 9.99908424908425e-06, "loss": 42.9037, "step": 10503 }, { "epoch": 250.0955223880597, "grad_norm": 468.29779052734375, "learning_rate": 9.99908424908425e-06, "loss": 43.7593, "step": 10504 }, { "epoch": 250.11940298507463, "grad_norm": 299.20428466796875, "learning_rate": 9.998168498168499e-06, "loss": 40.5097, "step": 10505 }, { "epoch": 250.14328358208957, "grad_norm": 90.93639373779297, "learning_rate": 9.997252747252748e-06, "loss": 37.8938, "step": 10506 }, { "epoch": 250.16716417910447, "grad_norm": 102.78959655761719, "learning_rate": 9.996336996336997e-06, "loss": 37.3469, "step": 10507 }, { "epoch": 250.1910447761194, "grad_norm": 79.3243408203125, "learning_rate": 9.995421245421246e-06, "loss": 35.8218, "step": 10508 }, { "epoch": 250.21492537313432, "grad_norm": 63.47758483886719, "learning_rate": 9.994505494505496e-06, "loss": 35.8017, "step": 10509 }, { "epoch": 250.23880597014926, "grad_norm": 50.43954086303711, "learning_rate": 9.993589743589745e-06, "loss": 37.1732, "step": 10510 }, { "epoch": 250.26268656716417, "grad_norm": 41.410343170166016, "learning_rate": 9.992673992673994e-06, "loss": 37.0164, "step": 10511 }, { "epoch": 250.2865671641791, "grad_norm": 39.2127685546875, "learning_rate": 9.991758241758243e-06, "loss": 36.3307, "step": 10512 }, { "epoch": 250.31044776119404, "grad_norm": 28.172439575195312, "learning_rate": 9.990842490842492e-06, "loss": 36.6386, "step": 10513 }, { "epoch": 250.33432835820895, "grad_norm": 29.20684242248535, "learning_rate": 9.98992673992674e-06, "loss": 36.7574, "step": 10514 }, { "epoch": 250.3582089552239, "grad_norm": 19.41738510131836, "learning_rate": 9.98901098901099e-06, "loss": 35.7028, "step": 10515 }, { "epoch": 250.3820895522388, "grad_norm": 23.410886764526367, "learning_rate": 9.988095238095239e-06, "loss": 36.2899, "step": 10516 }, { "epoch": 250.40597014925373, "grad_norm": 20.149150848388672, "learning_rate": 9.987179487179488e-06, "loss": 36.4283, "step": 10517 }, { "epoch": 250.42985074626867, "grad_norm": 19.36992073059082, "learning_rate": 9.986263736263737e-06, "loss": 34.6911, "step": 10518 }, { "epoch": 250.45373134328358, "grad_norm": 17.414880752563477, "learning_rate": 9.985347985347986e-06, "loss": 36.1586, "step": 10519 }, { "epoch": 250.47761194029852, "grad_norm": 23.79262924194336, "learning_rate": 9.984432234432236e-06, "loss": 36.3096, "step": 10520 }, { "epoch": 250.50149253731342, "grad_norm": 19.86983871459961, "learning_rate": 9.983516483516485e-06, "loss": 35.8956, "step": 10521 }, { "epoch": 250.52537313432836, "grad_norm": 17.115524291992188, "learning_rate": 9.982600732600734e-06, "loss": 37.0792, "step": 10522 }, { "epoch": 250.54925373134327, "grad_norm": 18.628732681274414, "learning_rate": 9.981684981684983e-06, "loss": 35.3367, "step": 10523 }, { "epoch": 250.5731343283582, "grad_norm": 18.738739013671875, "learning_rate": 9.980769230769232e-06, "loss": 36.0787, "step": 10524 }, { "epoch": 250.59701492537314, "grad_norm": 18.18105125427246, "learning_rate": 9.97985347985348e-06, "loss": 35.7178, "step": 10525 }, { "epoch": 250.62089552238805, "grad_norm": 16.89411735534668, "learning_rate": 9.97893772893773e-06, "loss": 35.0889, "step": 10526 }, { "epoch": 250.644776119403, "grad_norm": 16.15926170349121, "learning_rate": 9.978021978021979e-06, "loss": 35.7411, "step": 10527 }, { "epoch": 250.6686567164179, "grad_norm": 21.53275489807129, "learning_rate": 9.977106227106228e-06, "loss": 35.1226, "step": 10528 }, { "epoch": 250.69253731343284, "grad_norm": 18.122953414916992, "learning_rate": 9.976190476190477e-06, "loss": 36.944, "step": 10529 }, { "epoch": 250.71641791044777, "grad_norm": 17.504945755004883, "learning_rate": 9.975274725274726e-06, "loss": 35.4007, "step": 10530 }, { "epoch": 250.74029850746268, "grad_norm": 14.981329917907715, "learning_rate": 9.974358974358974e-06, "loss": 36.2675, "step": 10531 }, { "epoch": 250.76417910447762, "grad_norm": 15.348061561584473, "learning_rate": 9.973443223443225e-06, "loss": 35.8405, "step": 10532 }, { "epoch": 250.78805970149253, "grad_norm": 21.222579956054688, "learning_rate": 9.972527472527474e-06, "loss": 35.9964, "step": 10533 }, { "epoch": 250.81194029850747, "grad_norm": 14.186641693115234, "learning_rate": 9.971611721611723e-06, "loss": 35.1728, "step": 10534 }, { "epoch": 250.83582089552237, "grad_norm": 20.096670150756836, "learning_rate": 9.970695970695972e-06, "loss": 35.2764, "step": 10535 }, { "epoch": 250.8597014925373, "grad_norm": 16.014314651489258, "learning_rate": 9.969780219780221e-06, "loss": 35.7504, "step": 10536 }, { "epoch": 250.88358208955225, "grad_norm": 22.304344177246094, "learning_rate": 9.96886446886447e-06, "loss": 35.7299, "step": 10537 }, { "epoch": 250.90746268656716, "grad_norm": 17.067577362060547, "learning_rate": 9.967948717948719e-06, "loss": 36.3097, "step": 10538 }, { "epoch": 250.9313432835821, "grad_norm": 20.34279441833496, "learning_rate": 9.967032967032968e-06, "loss": 36.7156, "step": 10539 }, { "epoch": 250.955223880597, "grad_norm": 18.29696273803711, "learning_rate": 9.966117216117217e-06, "loss": 34.6629, "step": 10540 }, { "epoch": 250.97910447761194, "grad_norm": 17.84090805053711, "learning_rate": 9.965201465201466e-06, "loss": 35.3662, "step": 10541 }, { "epoch": 251.0, "grad_norm": 14.382659912109375, "learning_rate": 9.964285714285714e-06, "loss": 31.1388, "step": 10542 }, { "epoch": 251.02388059701494, "grad_norm": 16.641250610351562, "learning_rate": 9.963369963369965e-06, "loss": 35.9895, "step": 10543 }, { "epoch": 251.04776119402985, "grad_norm": 24.440488815307617, "learning_rate": 9.962454212454214e-06, "loss": 36.1701, "step": 10544 }, { "epoch": 251.07164179104478, "grad_norm": 15.083442687988281, "learning_rate": 9.961538461538463e-06, "loss": 34.2133, "step": 10545 }, { "epoch": 251.0955223880597, "grad_norm": 18.536592483520508, "learning_rate": 9.960622710622712e-06, "loss": 36.3549, "step": 10546 }, { "epoch": 251.11940298507463, "grad_norm": 21.398738861083984, "learning_rate": 9.959706959706961e-06, "loss": 36.8876, "step": 10547 }, { "epoch": 251.14328358208957, "grad_norm": 19.11338996887207, "learning_rate": 9.95879120879121e-06, "loss": 35.3419, "step": 10548 }, { "epoch": 251.16716417910447, "grad_norm": 14.9404296875, "learning_rate": 9.957875457875459e-06, "loss": 35.8585, "step": 10549 }, { "epoch": 251.1910447761194, "grad_norm": 17.329944610595703, "learning_rate": 9.956959706959708e-06, "loss": 34.8394, "step": 10550 }, { "epoch": 251.21492537313432, "grad_norm": 18.323760986328125, "learning_rate": 9.956043956043957e-06, "loss": 35.0695, "step": 10551 }, { "epoch": 251.23880597014926, "grad_norm": 14.89617919921875, "learning_rate": 9.955128205128206e-06, "loss": 36.0575, "step": 10552 }, { "epoch": 251.26268656716417, "grad_norm": 14.273953437805176, "learning_rate": 9.954212454212454e-06, "loss": 34.5598, "step": 10553 }, { "epoch": 251.2865671641791, "grad_norm": 20.694751739501953, "learning_rate": 9.953296703296705e-06, "loss": 35.7871, "step": 10554 }, { "epoch": 251.31044776119404, "grad_norm": 18.564138412475586, "learning_rate": 9.952380952380954e-06, "loss": 34.7276, "step": 10555 }, { "epoch": 251.33432835820895, "grad_norm": 16.587295532226562, "learning_rate": 9.951465201465203e-06, "loss": 33.4794, "step": 10556 }, { "epoch": 251.3582089552239, "grad_norm": 13.673843383789062, "learning_rate": 9.950549450549452e-06, "loss": 35.7263, "step": 10557 }, { "epoch": 251.3820895522388, "grad_norm": 17.324235916137695, "learning_rate": 9.949633699633701e-06, "loss": 37.2802, "step": 10558 }, { "epoch": 251.40597014925373, "grad_norm": 15.747190475463867, "learning_rate": 9.94871794871795e-06, "loss": 35.5896, "step": 10559 }, { "epoch": 251.42985074626867, "grad_norm": 13.725537300109863, "learning_rate": 9.947802197802199e-06, "loss": 34.8298, "step": 10560 }, { "epoch": 251.45373134328358, "grad_norm": 15.41905689239502, "learning_rate": 9.946886446886448e-06, "loss": 35.1748, "step": 10561 }, { "epoch": 251.47761194029852, "grad_norm": 14.468822479248047, "learning_rate": 9.945970695970697e-06, "loss": 36.4486, "step": 10562 }, { "epoch": 251.50149253731342, "grad_norm": 24.43714141845703, "learning_rate": 9.945054945054946e-06, "loss": 35.5891, "step": 10563 }, { "epoch": 251.52537313432836, "grad_norm": 15.711543083190918, "learning_rate": 9.944139194139194e-06, "loss": 34.2064, "step": 10564 }, { "epoch": 251.54925373134327, "grad_norm": 18.085830688476562, "learning_rate": 9.943223443223443e-06, "loss": 35.3084, "step": 10565 }, { "epoch": 251.5731343283582, "grad_norm": 26.0734920501709, "learning_rate": 9.942307692307694e-06, "loss": 36.4838, "step": 10566 }, { "epoch": 251.59701492537314, "grad_norm": 16.947580337524414, "learning_rate": 9.941391941391943e-06, "loss": 35.6721, "step": 10567 }, { "epoch": 251.62089552238805, "grad_norm": 14.086678504943848, "learning_rate": 9.940476190476192e-06, "loss": 35.9191, "step": 10568 }, { "epoch": 251.644776119403, "grad_norm": 21.768564224243164, "learning_rate": 9.939560439560441e-06, "loss": 37.1093, "step": 10569 }, { "epoch": 251.6686567164179, "grad_norm": 19.308162689208984, "learning_rate": 9.93864468864469e-06, "loss": 35.1927, "step": 10570 }, { "epoch": 251.69253731343284, "grad_norm": 13.51604175567627, "learning_rate": 9.937728937728939e-06, "loss": 36.3524, "step": 10571 }, { "epoch": 251.71641791044777, "grad_norm": 23.234386444091797, "learning_rate": 9.936813186813188e-06, "loss": 36.2885, "step": 10572 }, { "epoch": 251.74029850746268, "grad_norm": 20.840883255004883, "learning_rate": 9.935897435897437e-06, "loss": 35.4507, "step": 10573 }, { "epoch": 251.76417910447762, "grad_norm": 17.30851173400879, "learning_rate": 9.934981684981686e-06, "loss": 36.5154, "step": 10574 }, { "epoch": 251.78805970149253, "grad_norm": 20.508922576904297, "learning_rate": 9.934065934065935e-06, "loss": 37.1845, "step": 10575 }, { "epoch": 251.81194029850747, "grad_norm": 16.55733871459961, "learning_rate": 9.933150183150183e-06, "loss": 35.43, "step": 10576 }, { "epoch": 251.83582089552237, "grad_norm": 19.661312103271484, "learning_rate": 9.932234432234434e-06, "loss": 37.1997, "step": 10577 }, { "epoch": 251.8597014925373, "grad_norm": 19.369338989257812, "learning_rate": 9.931318681318683e-06, "loss": 35.6278, "step": 10578 }, { "epoch": 251.88358208955225, "grad_norm": 19.126426696777344, "learning_rate": 9.930402930402932e-06, "loss": 36.139, "step": 10579 }, { "epoch": 251.90746268656716, "grad_norm": 15.560017585754395, "learning_rate": 9.929487179487181e-06, "loss": 35.9964, "step": 10580 }, { "epoch": 251.9313432835821, "grad_norm": 19.47745704650879, "learning_rate": 9.92857142857143e-06, "loss": 34.6067, "step": 10581 }, { "epoch": 251.955223880597, "grad_norm": 17.10700225830078, "learning_rate": 9.927655677655679e-06, "loss": 35.3225, "step": 10582 }, { "epoch": 251.97910447761194, "grad_norm": 17.476547241210938, "learning_rate": 9.926739926739928e-06, "loss": 36.3405, "step": 10583 }, { "epoch": 252.0, "grad_norm": 13.041790962219238, "learning_rate": 9.925824175824177e-06, "loss": 30.8011, "step": 10584 }, { "epoch": 252.02388059701494, "grad_norm": 17.96108627319336, "learning_rate": 9.924908424908426e-06, "loss": 35.7446, "step": 10585 }, { "epoch": 252.04776119402985, "grad_norm": 16.00111198425293, "learning_rate": 9.923992673992675e-06, "loss": 36.71, "step": 10586 }, { "epoch": 252.07164179104478, "grad_norm": 17.1088809967041, "learning_rate": 9.923076923076923e-06, "loss": 34.772, "step": 10587 }, { "epoch": 252.0955223880597, "grad_norm": 14.14958667755127, "learning_rate": 9.922161172161174e-06, "loss": 36.1205, "step": 10588 }, { "epoch": 252.11940298507463, "grad_norm": 15.669458389282227, "learning_rate": 9.921245421245423e-06, "loss": 35.1513, "step": 10589 }, { "epoch": 252.14328358208957, "grad_norm": 13.559708595275879, "learning_rate": 9.920329670329672e-06, "loss": 36.3405, "step": 10590 }, { "epoch": 252.16716417910447, "grad_norm": 14.526890754699707, "learning_rate": 9.919413919413921e-06, "loss": 35.3845, "step": 10591 }, { "epoch": 252.1910447761194, "grad_norm": 15.401045799255371, "learning_rate": 9.91849816849817e-06, "loss": 34.7524, "step": 10592 }, { "epoch": 252.21492537313432, "grad_norm": 15.31092357635498, "learning_rate": 9.917582417582419e-06, "loss": 36.2405, "step": 10593 }, { "epoch": 252.23880597014926, "grad_norm": 17.768455505371094, "learning_rate": 9.916666666666668e-06, "loss": 36.9562, "step": 10594 }, { "epoch": 252.26268656716417, "grad_norm": 16.205974578857422, "learning_rate": 9.915750915750917e-06, "loss": 34.8221, "step": 10595 }, { "epoch": 252.2865671641791, "grad_norm": 17.758411407470703, "learning_rate": 9.914835164835166e-06, "loss": 35.4166, "step": 10596 }, { "epoch": 252.31044776119404, "grad_norm": 18.2944393157959, "learning_rate": 9.913919413919415e-06, "loss": 36.4242, "step": 10597 }, { "epoch": 252.33432835820895, "grad_norm": 15.548382759094238, "learning_rate": 9.913003663003663e-06, "loss": 36.1634, "step": 10598 }, { "epoch": 252.3582089552239, "grad_norm": NaN, "learning_rate": 9.912087912087912e-06, "loss": 39.2695, "step": 10599 }, { "epoch": 252.3820895522388, "grad_norm": 22.2453670501709, "learning_rate": 9.912087912087912e-06, "loss": 35.3513, "step": 10600 }, { "epoch": 252.40597014925373, "grad_norm": 14.400615692138672, "learning_rate": 9.911172161172163e-06, "loss": 35.7981, "step": 10601 }, { "epoch": 252.42985074626867, "grad_norm": 22.20725440979004, "learning_rate": 9.910256410256412e-06, "loss": 35.2674, "step": 10602 }, { "epoch": 252.45373134328358, "grad_norm": 19.966716766357422, "learning_rate": 9.909340659340661e-06, "loss": 35.7884, "step": 10603 }, { "epoch": 252.47761194029852, "grad_norm": 18.020273208618164, "learning_rate": 9.90842490842491e-06, "loss": 35.4788, "step": 10604 }, { "epoch": 252.50149253731342, "grad_norm": 14.49180793762207, "learning_rate": 9.907509157509159e-06, "loss": 35.6622, "step": 10605 }, { "epoch": 252.52537313432836, "grad_norm": 20.081396102905273, "learning_rate": 9.906593406593408e-06, "loss": 36.1306, "step": 10606 }, { "epoch": 252.54925373134327, "grad_norm": 16.541526794433594, "learning_rate": 9.905677655677657e-06, "loss": 35.7649, "step": 10607 }, { "epoch": 252.5731343283582, "grad_norm": 18.83778190612793, "learning_rate": 9.904761904761906e-06, "loss": 35.8396, "step": 10608 }, { "epoch": 252.59701492537314, "grad_norm": 16.112417221069336, "learning_rate": 9.903846153846155e-06, "loss": 36.2038, "step": 10609 }, { "epoch": 252.62089552238805, "grad_norm": 17.835330963134766, "learning_rate": 9.902930402930403e-06, "loss": 36.0932, "step": 10610 }, { "epoch": 252.644776119403, "grad_norm": 18.30726432800293, "learning_rate": 9.902014652014652e-06, "loss": 36.0771, "step": 10611 }, { "epoch": 252.6686567164179, "grad_norm": 15.045116424560547, "learning_rate": 9.901098901098903e-06, "loss": 36.3511, "step": 10612 }, { "epoch": 252.69253731343284, "grad_norm": 14.947028160095215, "learning_rate": 9.900183150183152e-06, "loss": 35.7482, "step": 10613 }, { "epoch": 252.71641791044777, "grad_norm": 14.970281600952148, "learning_rate": 9.899267399267401e-06, "loss": 35.0967, "step": 10614 }, { "epoch": 252.74029850746268, "grad_norm": 14.05617618560791, "learning_rate": 9.89835164835165e-06, "loss": 34.6361, "step": 10615 }, { "epoch": 252.76417910447762, "grad_norm": 18.122276306152344, "learning_rate": 9.897435897435899e-06, "loss": 35.2779, "step": 10616 }, { "epoch": 252.78805970149253, "grad_norm": 16.886306762695312, "learning_rate": 9.896520146520148e-06, "loss": 35.1227, "step": 10617 }, { "epoch": 252.81194029850747, "grad_norm": 21.922508239746094, "learning_rate": 9.895604395604397e-06, "loss": 35.4356, "step": 10618 }, { "epoch": 252.83582089552237, "grad_norm": 14.450441360473633, "learning_rate": 9.894688644688646e-06, "loss": 35.8054, "step": 10619 }, { "epoch": 252.8597014925373, "grad_norm": 18.629562377929688, "learning_rate": 9.893772893772895e-06, "loss": 34.0044, "step": 10620 }, { "epoch": 252.88358208955225, "grad_norm": 19.719114303588867, "learning_rate": 9.892857142857143e-06, "loss": 35.2392, "step": 10621 }, { "epoch": 252.90746268656716, "grad_norm": 16.302021026611328, "learning_rate": 9.891941391941392e-06, "loss": 36.1957, "step": 10622 }, { "epoch": 252.9313432835821, "grad_norm": 17.12664031982422, "learning_rate": 9.891025641025643e-06, "loss": 35.7172, "step": 10623 }, { "epoch": 252.955223880597, "grad_norm": 14.867701530456543, "learning_rate": 9.890109890109892e-06, "loss": 35.5574, "step": 10624 }, { "epoch": 252.97910447761194, "grad_norm": NaN, "learning_rate": 9.889194139194141e-06, "loss": 57.8029, "step": 10625 }, { "epoch": 253.0, "grad_norm": 16.28997230529785, "learning_rate": 9.889194139194141e-06, "loss": 31.8982, "step": 10626 }, { "epoch": 253.02388059701494, "grad_norm": 20.980283737182617, "learning_rate": 9.88827838827839e-06, "loss": 36.4026, "step": 10627 }, { "epoch": 253.04776119402985, "grad_norm": 14.806510925292969, "learning_rate": 9.887362637362639e-06, "loss": 35.5898, "step": 10628 }, { "epoch": 253.07164179104478, "grad_norm": 14.85124397277832, "learning_rate": 9.886446886446888e-06, "loss": 34.7019, "step": 10629 }, { "epoch": 253.0955223880597, "grad_norm": 16.852712631225586, "learning_rate": 9.885531135531137e-06, "loss": 35.713, "step": 10630 }, { "epoch": 253.11940298507463, "grad_norm": 18.0716552734375, "learning_rate": 9.884615384615386e-06, "loss": 36.6052, "step": 10631 }, { "epoch": 253.14328358208957, "grad_norm": 16.551170349121094, "learning_rate": 9.883699633699635e-06, "loss": 35.315, "step": 10632 }, { "epoch": 253.16716417910447, "grad_norm": 17.68378448486328, "learning_rate": 9.882783882783884e-06, "loss": 35.2735, "step": 10633 }, { "epoch": 253.1910447761194, "grad_norm": 13.689247131347656, "learning_rate": 9.881868131868132e-06, "loss": 35.1588, "step": 10634 }, { "epoch": 253.21492537313432, "grad_norm": 17.445785522460938, "learning_rate": 9.880952380952381e-06, "loss": 36.4603, "step": 10635 }, { "epoch": 253.23880597014926, "grad_norm": 14.128791809082031, "learning_rate": 9.880036630036632e-06, "loss": 35.8233, "step": 10636 }, { "epoch": 253.26268656716417, "grad_norm": 20.628108978271484, "learning_rate": 9.879120879120881e-06, "loss": 36.1753, "step": 10637 }, { "epoch": 253.2865671641791, "grad_norm": 17.83094596862793, "learning_rate": 9.87820512820513e-06, "loss": 34.08, "step": 10638 }, { "epoch": 253.31044776119404, "grad_norm": 16.69542694091797, "learning_rate": 9.877289377289379e-06, "loss": 35.9817, "step": 10639 }, { "epoch": 253.33432835820895, "grad_norm": 22.935531616210938, "learning_rate": 9.876373626373628e-06, "loss": 35.6918, "step": 10640 }, { "epoch": 253.3582089552239, "grad_norm": 17.162960052490234, "learning_rate": 9.875457875457877e-06, "loss": 35.6679, "step": 10641 }, { "epoch": 253.3820895522388, "grad_norm": 19.26456642150879, "learning_rate": 9.874542124542126e-06, "loss": 35.9056, "step": 10642 }, { "epoch": 253.40597014925373, "grad_norm": 16.83243751525879, "learning_rate": 9.873626373626375e-06, "loss": 35.8743, "step": 10643 }, { "epoch": 253.42985074626867, "grad_norm": 16.80619239807129, "learning_rate": 9.872710622710624e-06, "loss": 35.5985, "step": 10644 }, { "epoch": 253.45373134328358, "grad_norm": 17.081470489501953, "learning_rate": 9.871794871794872e-06, "loss": 36.2124, "step": 10645 }, { "epoch": 253.47761194029852, "grad_norm": 16.921592712402344, "learning_rate": 9.870879120879121e-06, "loss": 37.2168, "step": 10646 }, { "epoch": 253.50149253731342, "grad_norm": 19.568693161010742, "learning_rate": 9.869963369963372e-06, "loss": 35.4296, "step": 10647 }, { "epoch": 253.52537313432836, "grad_norm": 15.584951400756836, "learning_rate": 9.869047619047621e-06, "loss": 34.499, "step": 10648 }, { "epoch": 253.54925373134327, "grad_norm": NaN, "learning_rate": 9.86813186813187e-06, "loss": 51.2525, "step": 10649 }, { "epoch": 253.5731343283582, "grad_norm": 20.872446060180664, "learning_rate": 9.86813186813187e-06, "loss": 36.0167, "step": 10650 }, { "epoch": 253.59701492537314, "grad_norm": 15.182371139526367, "learning_rate": 9.867216117216119e-06, "loss": 35.4729, "step": 10651 }, { "epoch": 253.62089552238805, "grad_norm": 16.24883270263672, "learning_rate": 9.866300366300368e-06, "loss": 35.0087, "step": 10652 }, { "epoch": 253.644776119403, "grad_norm": 17.47576904296875, "learning_rate": 9.865384615384617e-06, "loss": 36.4064, "step": 10653 }, { "epoch": 253.6686567164179, "grad_norm": 14.746021270751953, "learning_rate": 9.864468864468866e-06, "loss": 34.953, "step": 10654 }, { "epoch": 253.69253731343284, "grad_norm": 17.191213607788086, "learning_rate": 9.863553113553115e-06, "loss": 35.8077, "step": 10655 }, { "epoch": 253.71641791044777, "grad_norm": 18.858600616455078, "learning_rate": 9.862637362637364e-06, "loss": 35.475, "step": 10656 }, { "epoch": 253.74029850746268, "grad_norm": 15.895038604736328, "learning_rate": 9.861721611721612e-06, "loss": 35.7695, "step": 10657 }, { "epoch": 253.76417910447762, "grad_norm": 16.461393356323242, "learning_rate": 9.860805860805861e-06, "loss": 34.7558, "step": 10658 }, { "epoch": 253.78805970149253, "grad_norm": 13.94180679321289, "learning_rate": 9.859890109890112e-06, "loss": 35.5977, "step": 10659 }, { "epoch": 253.81194029850747, "grad_norm": 19.791231155395508, "learning_rate": 9.858974358974361e-06, "loss": 34.6555, "step": 10660 }, { "epoch": 253.83582089552237, "grad_norm": 23.466222763061523, "learning_rate": 9.85805860805861e-06, "loss": 36.2696, "step": 10661 }, { "epoch": 253.8597014925373, "grad_norm": 13.160076141357422, "learning_rate": 9.857142857142859e-06, "loss": 34.7341, "step": 10662 }, { "epoch": 253.88358208955225, "grad_norm": 22.34493637084961, "learning_rate": 9.856227106227108e-06, "loss": 36.6567, "step": 10663 }, { "epoch": 253.90746268656716, "grad_norm": 19.96533203125, "learning_rate": 9.855311355311357e-06, "loss": 35.8252, "step": 10664 }, { "epoch": 253.9313432835821, "grad_norm": 14.463510513305664, "learning_rate": 9.854395604395606e-06, "loss": 35.7169, "step": 10665 }, { "epoch": 253.955223880597, "grad_norm": 16.703460693359375, "learning_rate": 9.853479853479855e-06, "loss": 36.2886, "step": 10666 }, { "epoch": 253.97910447761194, "grad_norm": 18.7047119140625, "learning_rate": 9.852564102564104e-06, "loss": 34.8688, "step": 10667 }, { "epoch": 254.0, "grad_norm": 18.250211715698242, "learning_rate": 9.851648351648352e-06, "loss": 30.4847, "step": 10668 }, { "epoch": 254.02388059701494, "grad_norm": 15.180882453918457, "learning_rate": 9.850732600732601e-06, "loss": 35.606, "step": 10669 }, { "epoch": 254.04776119402985, "grad_norm": 21.814985275268555, "learning_rate": 9.84981684981685e-06, "loss": 34.8113, "step": 10670 }, { "epoch": 254.07164179104478, "grad_norm": 19.73859405517578, "learning_rate": 9.848901098901101e-06, "loss": 35.4863, "step": 10671 }, { "epoch": 254.0955223880597, "grad_norm": 16.870332717895508, "learning_rate": 9.84798534798535e-06, "loss": 33.621, "step": 10672 }, { "epoch": 254.11940298507463, "grad_norm": 13.403437614440918, "learning_rate": 9.847069597069599e-06, "loss": 35.928, "step": 10673 }, { "epoch": 254.14328358208957, "grad_norm": 22.546737670898438, "learning_rate": 9.846153846153848e-06, "loss": 34.8652, "step": 10674 }, { "epoch": 254.16716417910447, "grad_norm": 21.375059127807617, "learning_rate": 9.845238095238097e-06, "loss": 35.008, "step": 10675 }, { "epoch": 254.1910447761194, "grad_norm": 13.61618423461914, "learning_rate": 9.844322344322346e-06, "loss": 35.4534, "step": 10676 }, { "epoch": 254.21492537313432, "grad_norm": 30.130348205566406, "learning_rate": 9.843406593406593e-06, "loss": 35.8521, "step": 10677 }, { "epoch": 254.23880597014926, "grad_norm": 21.24388313293457, "learning_rate": 9.842490842490844e-06, "loss": 36.396, "step": 10678 }, { "epoch": 254.26268656716417, "grad_norm": 24.324691772460938, "learning_rate": 9.841575091575092e-06, "loss": 35.4676, "step": 10679 }, { "epoch": 254.2865671641791, "grad_norm": 20.920507431030273, "learning_rate": 9.840659340659341e-06, "loss": 34.6309, "step": 10680 }, { "epoch": 254.31044776119404, "grad_norm": 14.969414710998535, "learning_rate": 9.83974358974359e-06, "loss": 35.4426, "step": 10681 }, { "epoch": 254.33432835820895, "grad_norm": 29.528079986572266, "learning_rate": 9.83882783882784e-06, "loss": 34.5512, "step": 10682 }, { "epoch": 254.3582089552239, "grad_norm": 18.79596519470215, "learning_rate": 9.837912087912088e-06, "loss": 34.5028, "step": 10683 }, { "epoch": 254.3820895522388, "grad_norm": 30.32623291015625, "learning_rate": 9.836996336996337e-06, "loss": 36.0936, "step": 10684 }, { "epoch": 254.40597014925373, "grad_norm": 19.43465232849121, "learning_rate": 9.836080586080586e-06, "loss": 35.9299, "step": 10685 }, { "epoch": 254.42985074626867, "grad_norm": 29.898040771484375, "learning_rate": 9.835164835164835e-06, "loss": 36.6122, "step": 10686 }, { "epoch": 254.45373134328358, "grad_norm": 19.775339126586914, "learning_rate": 9.834249084249084e-06, "loss": 35.9243, "step": 10687 }, { "epoch": 254.47761194029852, "grad_norm": 21.716243743896484, "learning_rate": 9.833333333333333e-06, "loss": 36.7411, "step": 10688 }, { "epoch": 254.50149253731342, "grad_norm": 26.77560806274414, "learning_rate": 9.832417582417582e-06, "loss": 35.0157, "step": 10689 }, { "epoch": 254.52537313432836, "grad_norm": 17.745500564575195, "learning_rate": 9.831501831501832e-06, "loss": 36.6571, "step": 10690 }, { "epoch": 254.54925373134327, "grad_norm": 35.65312194824219, "learning_rate": 9.830586080586081e-06, "loss": 35.8319, "step": 10691 }, { "epoch": 254.5731343283582, "grad_norm": 24.51959991455078, "learning_rate": 9.82967032967033e-06, "loss": 36.0061, "step": 10692 }, { "epoch": 254.59701492537314, "grad_norm": 32.02620315551758, "learning_rate": 9.82875457875458e-06, "loss": 36.1562, "step": 10693 }, { "epoch": 254.62089552238805, "grad_norm": 22.318777084350586, "learning_rate": 9.827838827838828e-06, "loss": 35.8546, "step": 10694 }, { "epoch": 254.644776119403, "grad_norm": 35.524166107177734, "learning_rate": 9.826923076923077e-06, "loss": 36.0162, "step": 10695 }, { "epoch": 254.6686567164179, "grad_norm": 23.13291358947754, "learning_rate": 9.826007326007326e-06, "loss": 34.7835, "step": 10696 }, { "epoch": 254.69253731343284, "grad_norm": 44.42383575439453, "learning_rate": 9.825091575091575e-06, "loss": 36.2586, "step": 10697 }, { "epoch": 254.71641791044777, "grad_norm": 36.46686935424805, "learning_rate": 9.824175824175824e-06, "loss": 35.597, "step": 10698 }, { "epoch": 254.74029850746268, "grad_norm": 28.54291534423828, "learning_rate": 9.823260073260073e-06, "loss": 35.1425, "step": 10699 }, { "epoch": 254.76417910447762, "grad_norm": 30.905284881591797, "learning_rate": 9.822344322344322e-06, "loss": 35.0142, "step": 10700 }, { "epoch": 254.78805970149253, "grad_norm": 25.988637924194336, "learning_rate": 9.821428571428573e-06, "loss": 35.7779, "step": 10701 }, { "epoch": 254.81194029850747, "grad_norm": 23.147994995117188, "learning_rate": 9.820512820512821e-06, "loss": 35.113, "step": 10702 }, { "epoch": 254.83582089552237, "grad_norm": 33.8663215637207, "learning_rate": 9.81959706959707e-06, "loss": 36.0861, "step": 10703 }, { "epoch": 254.8597014925373, "grad_norm": 27.69142723083496, "learning_rate": 9.81868131868132e-06, "loss": 34.5732, "step": 10704 }, { "epoch": 254.88358208955225, "grad_norm": 31.73871612548828, "learning_rate": 9.817765567765568e-06, "loss": 35.9929, "step": 10705 }, { "epoch": 254.90746268656716, "grad_norm": 32.973655700683594, "learning_rate": 9.816849816849817e-06, "loss": 36.3537, "step": 10706 }, { "epoch": 254.9313432835821, "grad_norm": 26.514671325683594, "learning_rate": 9.815934065934066e-06, "loss": 35.5262, "step": 10707 }, { "epoch": 254.955223880597, "grad_norm": 26.40163803100586, "learning_rate": 9.815018315018315e-06, "loss": 35.4639, "step": 10708 }, { "epoch": 254.97910447761194, "grad_norm": 31.693086624145508, "learning_rate": 9.814102564102564e-06, "loss": 36.5741, "step": 10709 }, { "epoch": 255.0, "grad_norm": 25.91537857055664, "learning_rate": 9.813186813186813e-06, "loss": 31.1786, "step": 10710 }, { "epoch": 255.02388059701494, "grad_norm": 28.340864181518555, "learning_rate": 9.812271062271062e-06, "loss": 35.6379, "step": 10711 }, { "epoch": 255.04776119402985, "grad_norm": 24.879518508911133, "learning_rate": 9.811355311355313e-06, "loss": 35.3954, "step": 10712 }, { "epoch": 255.07164179104478, "grad_norm": 35.86328887939453, "learning_rate": 9.810439560439561e-06, "loss": 35.1415, "step": 10713 }, { "epoch": 255.0955223880597, "grad_norm": 26.792383193969727, "learning_rate": 9.80952380952381e-06, "loss": 35.8137, "step": 10714 }, { "epoch": 255.11940298507463, "grad_norm": 34.05632400512695, "learning_rate": 9.80860805860806e-06, "loss": 36.3998, "step": 10715 }, { "epoch": 255.14328358208957, "grad_norm": 31.903276443481445, "learning_rate": 9.807692307692308e-06, "loss": 35.251, "step": 10716 }, { "epoch": 255.16716417910447, "grad_norm": 26.982019424438477, "learning_rate": 9.806776556776557e-06, "loss": 35.2743, "step": 10717 }, { "epoch": 255.1910447761194, "grad_norm": 25.058006286621094, "learning_rate": 9.805860805860806e-06, "loss": 35.5574, "step": 10718 }, { "epoch": 255.21492537313432, "grad_norm": 29.534523010253906, "learning_rate": 9.804945054945055e-06, "loss": 36.2936, "step": 10719 }, { "epoch": 255.23880597014926, "grad_norm": 23.080780029296875, "learning_rate": 9.804029304029304e-06, "loss": 36.2085, "step": 10720 }, { "epoch": 255.26268656716417, "grad_norm": 35.26456069946289, "learning_rate": 9.803113553113553e-06, "loss": 36.2345, "step": 10721 }, { "epoch": 255.2865671641791, "grad_norm": 31.185991287231445, "learning_rate": 9.802197802197802e-06, "loss": 36.1327, "step": 10722 }, { "epoch": 255.31044776119404, "grad_norm": 28.98785400390625, "learning_rate": 9.801282051282053e-06, "loss": 34.582, "step": 10723 }, { "epoch": 255.33432835820895, "grad_norm": 28.52055549621582, "learning_rate": 9.800366300366301e-06, "loss": 35.0338, "step": 10724 }, { "epoch": 255.3582089552239, "grad_norm": 28.13873291015625, "learning_rate": 9.79945054945055e-06, "loss": 35.6373, "step": 10725 }, { "epoch": 255.3820895522388, "grad_norm": 22.614482879638672, "learning_rate": 9.7985347985348e-06, "loss": 34.2453, "step": 10726 }, { "epoch": 255.40597014925373, "grad_norm": 31.850677490234375, "learning_rate": 9.797619047619048e-06, "loss": 35.9406, "step": 10727 }, { "epoch": 255.42985074626867, "grad_norm": 28.57715606689453, "learning_rate": 9.796703296703297e-06, "loss": 36.612, "step": 10728 }, { "epoch": 255.45373134328358, "grad_norm": 30.37266731262207, "learning_rate": 9.795787545787546e-06, "loss": 35.3086, "step": 10729 }, { "epoch": 255.47761194029852, "grad_norm": 28.511348724365234, "learning_rate": 9.794871794871795e-06, "loss": 35.2757, "step": 10730 }, { "epoch": 255.50149253731342, "grad_norm": 27.241514205932617, "learning_rate": 9.793956043956044e-06, "loss": 34.9542, "step": 10731 }, { "epoch": 255.52537313432836, "grad_norm": 25.99595069885254, "learning_rate": 9.793040293040293e-06, "loss": 35.4933, "step": 10732 }, { "epoch": 255.54925373134327, "grad_norm": 31.940399169921875, "learning_rate": 9.792124542124542e-06, "loss": 36.7967, "step": 10733 }, { "epoch": 255.5731343283582, "grad_norm": 26.10555076599121, "learning_rate": 9.79120879120879e-06, "loss": 35.4873, "step": 10734 }, { "epoch": 255.59701492537314, "grad_norm": 30.710800170898438, "learning_rate": 9.790293040293041e-06, "loss": 35.6772, "step": 10735 }, { "epoch": 255.62089552238805, "grad_norm": 26.41657829284668, "learning_rate": 9.78937728937729e-06, "loss": 35.5839, "step": 10736 }, { "epoch": 255.644776119403, "grad_norm": 27.801651000976562, "learning_rate": 9.78846153846154e-06, "loss": 36.1724, "step": 10737 }, { "epoch": 255.6686567164179, "grad_norm": 25.25094223022461, "learning_rate": 9.787545787545788e-06, "loss": 34.6832, "step": 10738 }, { "epoch": 255.69253731343284, "grad_norm": 32.438541412353516, "learning_rate": 9.786630036630037e-06, "loss": 34.7333, "step": 10739 }, { "epoch": 255.71641791044777, "grad_norm": 24.55843734741211, "learning_rate": 9.785714285714286e-06, "loss": 34.6326, "step": 10740 }, { "epoch": 255.74029850746268, "grad_norm": 33.32674789428711, "learning_rate": 9.784798534798535e-06, "loss": 35.8815, "step": 10741 }, { "epoch": 255.76417910447762, "grad_norm": 30.69706916809082, "learning_rate": 9.783882783882784e-06, "loss": 36.6207, "step": 10742 }, { "epoch": 255.78805970149253, "grad_norm": 26.328582763671875, "learning_rate": 9.782967032967033e-06, "loss": 36.0933, "step": 10743 }, { "epoch": 255.81194029850747, "grad_norm": 24.8414306640625, "learning_rate": 9.782051282051282e-06, "loss": 35.9427, "step": 10744 }, { "epoch": 255.83582089552237, "grad_norm": 27.940202713012695, "learning_rate": 9.781135531135531e-06, "loss": 34.602, "step": 10745 }, { "epoch": 255.8597014925373, "grad_norm": 25.440942764282227, "learning_rate": 9.780219780219781e-06, "loss": 35.254, "step": 10746 }, { "epoch": 255.88358208955225, "grad_norm": 32.52458572387695, "learning_rate": 9.77930402930403e-06, "loss": 35.3009, "step": 10747 }, { "epoch": 255.90746268656716, "grad_norm": 27.224624633789062, "learning_rate": 9.77838827838828e-06, "loss": 34.3565, "step": 10748 }, { "epoch": 255.9313432835821, "grad_norm": 26.033937454223633, "learning_rate": 9.777472527472528e-06, "loss": 36.2242, "step": 10749 }, { "epoch": 255.955223880597, "grad_norm": 24.01490020751953, "learning_rate": 9.776556776556777e-06, "loss": 35.5857, "step": 10750 }, { "epoch": 255.97910447761194, "grad_norm": 25.998130798339844, "learning_rate": 9.775641025641026e-06, "loss": 34.538, "step": 10751 }, { "epoch": 256.0, "grad_norm": 19.31838607788086, "learning_rate": 9.774725274725275e-06, "loss": 30.682, "step": 10752 }, { "epoch": 256.0238805970149, "grad_norm": 31.648189544677734, "learning_rate": 9.773809523809524e-06, "loss": 36.0167, "step": 10753 }, { "epoch": 256.0477611940299, "grad_norm": 27.066003799438477, "learning_rate": 9.772893772893773e-06, "loss": 36.0605, "step": 10754 }, { "epoch": 256.0716417910448, "grad_norm": 29.421175003051758, "learning_rate": 9.771978021978022e-06, "loss": 36.7364, "step": 10755 }, { "epoch": 256.0955223880597, "grad_norm": 25.722335815429688, "learning_rate": 9.771062271062271e-06, "loss": 35.1385, "step": 10756 }, { "epoch": 256.1194029850746, "grad_norm": 28.203012466430664, "learning_rate": 9.770146520146521e-06, "loss": 34.9666, "step": 10757 }, { "epoch": 256.14328358208957, "grad_norm": 26.54973793029785, "learning_rate": 9.76923076923077e-06, "loss": 35.6997, "step": 10758 }, { "epoch": 256.1671641791045, "grad_norm": 28.533681869506836, "learning_rate": 9.76831501831502e-06, "loss": 35.9888, "step": 10759 }, { "epoch": 256.1910447761194, "grad_norm": 26.133970260620117, "learning_rate": 9.767399267399268e-06, "loss": 35.8334, "step": 10760 }, { "epoch": 256.21492537313435, "grad_norm": 26.8007755279541, "learning_rate": 9.766483516483517e-06, "loss": 35.0449, "step": 10761 }, { "epoch": 256.23880597014926, "grad_norm": 25.353805541992188, "learning_rate": 9.765567765567766e-06, "loss": 35.4466, "step": 10762 }, { "epoch": 256.26268656716417, "grad_norm": 31.411041259765625, "learning_rate": 9.764652014652015e-06, "loss": 34.9599, "step": 10763 }, { "epoch": 256.28656716417913, "grad_norm": 26.66668701171875, "learning_rate": 9.763736263736264e-06, "loss": 34.7922, "step": 10764 }, { "epoch": 256.31044776119404, "grad_norm": 28.99530029296875, "learning_rate": 9.762820512820513e-06, "loss": 35.2831, "step": 10765 }, { "epoch": 256.33432835820895, "grad_norm": 24.208465576171875, "learning_rate": 9.761904761904762e-06, "loss": 36.117, "step": 10766 }, { "epoch": 256.35820895522386, "grad_norm": 30.489770889282227, "learning_rate": 9.760989010989011e-06, "loss": 35.5068, "step": 10767 }, { "epoch": 256.3820895522388, "grad_norm": 25.3145751953125, "learning_rate": 9.76007326007326e-06, "loss": 35.2146, "step": 10768 }, { "epoch": 256.40597014925373, "grad_norm": 29.982032775878906, "learning_rate": 9.75915750915751e-06, "loss": 34.6297, "step": 10769 }, { "epoch": 256.42985074626864, "grad_norm": 26.358388900756836, "learning_rate": 9.75824175824176e-06, "loss": 34.6239, "step": 10770 }, { "epoch": 256.4537313432836, "grad_norm": 28.4139404296875, "learning_rate": 9.757326007326008e-06, "loss": 34.3071, "step": 10771 }, { "epoch": 256.4776119402985, "grad_norm": 26.507587432861328, "learning_rate": 9.756410256410257e-06, "loss": 34.3401, "step": 10772 }, { "epoch": 256.5014925373134, "grad_norm": 25.461633682250977, "learning_rate": 9.755494505494506e-06, "loss": 34.5249, "step": 10773 }, { "epoch": 256.52537313432833, "grad_norm": 24.861305236816406, "learning_rate": 9.754578754578755e-06, "loss": 36.0731, "step": 10774 }, { "epoch": 256.5492537313433, "grad_norm": 25.40403938293457, "learning_rate": 9.753663003663004e-06, "loss": 35.6725, "step": 10775 }, { "epoch": 256.5731343283582, "grad_norm": 21.132911682128906, "learning_rate": 9.752747252747253e-06, "loss": 35.4077, "step": 10776 }, { "epoch": 256.5970149253731, "grad_norm": 30.389541625976562, "learning_rate": 9.751831501831502e-06, "loss": 34.7391, "step": 10777 }, { "epoch": 256.6208955223881, "grad_norm": 26.128162384033203, "learning_rate": 9.750915750915751e-06, "loss": 35.3478, "step": 10778 }, { "epoch": 256.644776119403, "grad_norm": 23.610139846801758, "learning_rate": 9.75e-06, "loss": 36.2763, "step": 10779 }, { "epoch": 256.6686567164179, "grad_norm": 24.85352897644043, "learning_rate": 9.74908424908425e-06, "loss": 35.2415, "step": 10780 }, { "epoch": 256.6925373134328, "grad_norm": 21.928003311157227, "learning_rate": 9.7481684981685e-06, "loss": 34.296, "step": 10781 }, { "epoch": 256.7164179104478, "grad_norm": 19.506946563720703, "learning_rate": 9.747252747252748e-06, "loss": 36.9694, "step": 10782 }, { "epoch": 256.7402985074627, "grad_norm": 25.680326461791992, "learning_rate": 9.746336996336997e-06, "loss": 37.0831, "step": 10783 }, { "epoch": 256.7641791044776, "grad_norm": 20.675579071044922, "learning_rate": 9.745421245421246e-06, "loss": 34.5955, "step": 10784 }, { "epoch": 256.78805970149256, "grad_norm": 23.569568634033203, "learning_rate": 9.744505494505495e-06, "loss": 35.6532, "step": 10785 }, { "epoch": 256.81194029850747, "grad_norm": 21.853796005249023, "learning_rate": 9.743589743589744e-06, "loss": 34.6575, "step": 10786 }, { "epoch": 256.8358208955224, "grad_norm": 24.558605194091797, "learning_rate": 9.742673992673993e-06, "loss": 36.6856, "step": 10787 }, { "epoch": 256.85970149253734, "grad_norm": 20.775253295898438, "learning_rate": 9.741758241758242e-06, "loss": 35.0564, "step": 10788 }, { "epoch": 256.88358208955225, "grad_norm": 23.337413787841797, "learning_rate": 9.740842490842491e-06, "loss": 36.3046, "step": 10789 }, { "epoch": 256.90746268656716, "grad_norm": 21.221128463745117, "learning_rate": 9.73992673992674e-06, "loss": 35.5655, "step": 10790 }, { "epoch": 256.93134328358207, "grad_norm": 22.46784019470215, "learning_rate": 9.73901098901099e-06, "loss": 35.6197, "step": 10791 }, { "epoch": 256.95522388059703, "grad_norm": 21.595149993896484, "learning_rate": 9.73809523809524e-06, "loss": 36.4712, "step": 10792 }, { "epoch": 256.97910447761194, "grad_norm": 26.207984924316406, "learning_rate": 9.737179487179488e-06, "loss": 36.3833, "step": 10793 }, { "epoch": 257.0, "grad_norm": 19.580183029174805, "learning_rate": 9.736263736263737e-06, "loss": 31.4192, "step": 10794 }, { "epoch": 257.0238805970149, "grad_norm": 25.709213256835938, "learning_rate": 9.735347985347986e-06, "loss": 36.315, "step": 10795 }, { "epoch": 257.0477611940299, "grad_norm": 21.611906051635742, "learning_rate": 9.734432234432235e-06, "loss": 36.14, "step": 10796 }, { "epoch": 257.0716417910448, "grad_norm": 25.615612030029297, "learning_rate": 9.733516483516484e-06, "loss": 33.9203, "step": 10797 }, { "epoch": 257.0955223880597, "grad_norm": 23.026264190673828, "learning_rate": 9.732600732600733e-06, "loss": 35.9109, "step": 10798 }, { "epoch": 257.1194029850746, "grad_norm": 24.4227352142334, "learning_rate": 9.731684981684982e-06, "loss": 35.716, "step": 10799 }, { "epoch": 257.14328358208957, "grad_norm": 22.758514404296875, "learning_rate": 9.730769230769231e-06, "loss": 36.1396, "step": 10800 }, { "epoch": 257.1671641791045, "grad_norm": 23.67724609375, "learning_rate": 9.72985347985348e-06, "loss": 34.4643, "step": 10801 }, { "epoch": 257.1910447761194, "grad_norm": 24.518661499023438, "learning_rate": 9.728937728937729e-06, "loss": 35.7916, "step": 10802 }, { "epoch": 257.21492537313435, "grad_norm": 17.434823989868164, "learning_rate": 9.72802197802198e-06, "loss": 36.2359, "step": 10803 }, { "epoch": 257.23880597014926, "grad_norm": 18.463014602661133, "learning_rate": 9.727106227106228e-06, "loss": 36.2663, "step": 10804 }, { "epoch": 257.26268656716417, "grad_norm": 17.706605911254883, "learning_rate": 9.726190476190477e-06, "loss": 36.4413, "step": 10805 }, { "epoch": 257.28656716417913, "grad_norm": 17.663585662841797, "learning_rate": 9.725274725274726e-06, "loss": 34.5734, "step": 10806 }, { "epoch": 257.31044776119404, "grad_norm": 18.207059860229492, "learning_rate": 9.724358974358975e-06, "loss": 35.1563, "step": 10807 }, { "epoch": 257.33432835820895, "grad_norm": 16.37620735168457, "learning_rate": 9.723443223443224e-06, "loss": 35.1207, "step": 10808 }, { "epoch": 257.35820895522386, "grad_norm": 22.57314682006836, "learning_rate": 9.722527472527473e-06, "loss": 35.2246, "step": 10809 }, { "epoch": 257.3820895522388, "grad_norm": 14.920421600341797, "learning_rate": 9.721611721611722e-06, "loss": 34.9203, "step": 10810 }, { "epoch": 257.40597014925373, "grad_norm": 27.248626708984375, "learning_rate": 9.720695970695971e-06, "loss": 36.1843, "step": 10811 }, { "epoch": 257.42985074626864, "grad_norm": 19.605594635009766, "learning_rate": 9.71978021978022e-06, "loss": 35.979, "step": 10812 }, { "epoch": 257.4537313432836, "grad_norm": 24.189802169799805, "learning_rate": 9.718864468864469e-06, "loss": 36.6211, "step": 10813 }, { "epoch": 257.4776119402985, "grad_norm": 21.813461303710938, "learning_rate": 9.71794871794872e-06, "loss": 35.1964, "step": 10814 }, { "epoch": 257.5014925373134, "grad_norm": 22.19347381591797, "learning_rate": 9.717032967032968e-06, "loss": 36.1637, "step": 10815 }, { "epoch": 257.52537313432833, "grad_norm": 18.67477035522461, "learning_rate": 9.716117216117217e-06, "loss": 35.0804, "step": 10816 }, { "epoch": 257.5492537313433, "grad_norm": 20.382328033447266, "learning_rate": 9.715201465201466e-06, "loss": 35.3635, "step": 10817 }, { "epoch": 257.5731343283582, "grad_norm": 17.92677116394043, "learning_rate": 9.714285714285715e-06, "loss": 36.0351, "step": 10818 }, { "epoch": 257.5970149253731, "grad_norm": 19.9560604095459, "learning_rate": 9.713369963369964e-06, "loss": 35.8682, "step": 10819 }, { "epoch": 257.6208955223881, "grad_norm": 16.868568420410156, "learning_rate": 9.712454212454213e-06, "loss": 36.0979, "step": 10820 }, { "epoch": 257.644776119403, "grad_norm": 18.30596923828125, "learning_rate": 9.711538461538462e-06, "loss": 34.9613, "step": 10821 }, { "epoch": 257.6686567164179, "grad_norm": 15.170578002929688, "learning_rate": 9.710622710622711e-06, "loss": 35.3382, "step": 10822 }, { "epoch": 257.6925373134328, "grad_norm": 15.260246276855469, "learning_rate": 9.70970695970696e-06, "loss": 34.6027, "step": 10823 }, { "epoch": 257.7164179104478, "grad_norm": 16.107736587524414, "learning_rate": 9.708791208791209e-06, "loss": 35.1339, "step": 10824 }, { "epoch": 257.7402985074627, "grad_norm": 17.758358001708984, "learning_rate": 9.70787545787546e-06, "loss": 35.9333, "step": 10825 }, { "epoch": 257.7641791044776, "grad_norm": 19.96453094482422, "learning_rate": 9.706959706959708e-06, "loss": 35.0148, "step": 10826 }, { "epoch": 257.78805970149256, "grad_norm": 16.72601318359375, "learning_rate": 9.706043956043957e-06, "loss": 34.5997, "step": 10827 }, { "epoch": 257.81194029850747, "grad_norm": 14.448206901550293, "learning_rate": 9.705128205128206e-06, "loss": 35.8951, "step": 10828 }, { "epoch": 257.8358208955224, "grad_norm": 16.692916870117188, "learning_rate": 9.704212454212455e-06, "loss": 34.6624, "step": 10829 }, { "epoch": 257.85970149253734, "grad_norm": 16.81743812561035, "learning_rate": 9.703296703296704e-06, "loss": 35.085, "step": 10830 }, { "epoch": 257.88358208955225, "grad_norm": 21.635976791381836, "learning_rate": 9.702380952380953e-06, "loss": 34.591, "step": 10831 }, { "epoch": 257.90746268656716, "grad_norm": 14.481380462646484, "learning_rate": 9.701465201465202e-06, "loss": 35.0912, "step": 10832 }, { "epoch": 257.93134328358207, "grad_norm": 20.64571189880371, "learning_rate": 9.700549450549451e-06, "loss": 34.6963, "step": 10833 }, { "epoch": 257.95522388059703, "grad_norm": 17.591278076171875, "learning_rate": 9.6996336996337e-06, "loss": 35.2977, "step": 10834 }, { "epoch": 257.97910447761194, "grad_norm": 16.043319702148438, "learning_rate": 9.698717948717949e-06, "loss": 36.273, "step": 10835 }, { "epoch": 258.0, "grad_norm": 18.798046112060547, "learning_rate": 9.697802197802198e-06, "loss": 31.4153, "step": 10836 }, { "epoch": 258.0238805970149, "grad_norm": 13.94228458404541, "learning_rate": 9.696886446886448e-06, "loss": 34.8859, "step": 10837 }, { "epoch": 258.0477611940299, "grad_norm": 18.440574645996094, "learning_rate": 9.695970695970697e-06, "loss": 35.0648, "step": 10838 }, { "epoch": 258.0716417910448, "grad_norm": 19.227081298828125, "learning_rate": 9.695054945054946e-06, "loss": 35.5322, "step": 10839 }, { "epoch": 258.0955223880597, "grad_norm": 17.29041290283203, "learning_rate": 9.694139194139195e-06, "loss": 36.2281, "step": 10840 }, { "epoch": 258.1194029850746, "grad_norm": 18.552640914916992, "learning_rate": 9.693223443223444e-06, "loss": 34.8531, "step": 10841 }, { "epoch": 258.14328358208957, "grad_norm": 17.46238136291504, "learning_rate": 9.692307692307693e-06, "loss": 36.5649, "step": 10842 }, { "epoch": 258.1671641791045, "grad_norm": 19.12788200378418, "learning_rate": 9.691391941391942e-06, "loss": 36.102, "step": 10843 }, { "epoch": 258.1910447761194, "grad_norm": 15.855060577392578, "learning_rate": 9.690476190476191e-06, "loss": 34.5336, "step": 10844 }, { "epoch": 258.21492537313435, "grad_norm": 16.86677360534668, "learning_rate": 9.68956043956044e-06, "loss": 36.6046, "step": 10845 }, { "epoch": 258.23880597014926, "grad_norm": 13.7073392868042, "learning_rate": 9.688644688644689e-06, "loss": 34.4662, "step": 10846 }, { "epoch": 258.26268656716417, "grad_norm": 18.736238479614258, "learning_rate": 9.687728937728938e-06, "loss": 34.877, "step": 10847 }, { "epoch": 258.28656716417913, "grad_norm": 15.149215698242188, "learning_rate": 9.686813186813188e-06, "loss": 35.4903, "step": 10848 }, { "epoch": 258.31044776119404, "grad_norm": 18.4781436920166, "learning_rate": 9.685897435897437e-06, "loss": 36.1894, "step": 10849 }, { "epoch": 258.33432835820895, "grad_norm": 21.694055557250977, "learning_rate": 9.684981684981686e-06, "loss": 35.5048, "step": 10850 }, { "epoch": 258.35820895522386, "grad_norm": 16.5482120513916, "learning_rate": 9.684065934065935e-06, "loss": 34.999, "step": 10851 }, { "epoch": 258.3820895522388, "grad_norm": 15.136181831359863, "learning_rate": 9.683150183150184e-06, "loss": 35.1913, "step": 10852 }, { "epoch": 258.40597014925373, "grad_norm": 17.934253692626953, "learning_rate": 9.682234432234433e-06, "loss": 35.9427, "step": 10853 }, { "epoch": 258.42985074626864, "grad_norm": 17.596426010131836, "learning_rate": 9.681318681318682e-06, "loss": 35.4773, "step": 10854 }, { "epoch": 258.4537313432836, "grad_norm": 18.374597549438477, "learning_rate": 9.680402930402931e-06, "loss": 35.7086, "step": 10855 }, { "epoch": 258.4776119402985, "grad_norm": 19.101961135864258, "learning_rate": 9.67948717948718e-06, "loss": 35.4414, "step": 10856 }, { "epoch": 258.5014925373134, "grad_norm": 15.79054069519043, "learning_rate": 9.678571428571429e-06, "loss": 34.6681, "step": 10857 }, { "epoch": 258.52537313432833, "grad_norm": 16.025163650512695, "learning_rate": 9.677655677655678e-06, "loss": 36.0638, "step": 10858 }, { "epoch": 258.5492537313433, "grad_norm": 14.512070655822754, "learning_rate": 9.676739926739928e-06, "loss": 35.4172, "step": 10859 }, { "epoch": 258.5731343283582, "grad_norm": 16.589923858642578, "learning_rate": 9.675824175824177e-06, "loss": 36.0823, "step": 10860 }, { "epoch": 258.5970149253731, "grad_norm": 18.830434799194336, "learning_rate": 9.674908424908426e-06, "loss": 36.0289, "step": 10861 }, { "epoch": 258.6208955223881, "grad_norm": 14.532703399658203, "learning_rate": 9.673992673992675e-06, "loss": 34.5007, "step": 10862 }, { "epoch": 258.644776119403, "grad_norm": 16.893735885620117, "learning_rate": 9.673076923076924e-06, "loss": 36.3404, "step": 10863 }, { "epoch": 258.6686567164179, "grad_norm": 19.440937042236328, "learning_rate": 9.672161172161173e-06, "loss": 35.9148, "step": 10864 }, { "epoch": 258.6925373134328, "grad_norm": 16.368810653686523, "learning_rate": 9.671245421245422e-06, "loss": 35.4302, "step": 10865 }, { "epoch": 258.7164179104478, "grad_norm": 15.688422203063965, "learning_rate": 9.670329670329671e-06, "loss": 35.8679, "step": 10866 }, { "epoch": 258.7402985074627, "grad_norm": 22.185697555541992, "learning_rate": 9.66941391941392e-06, "loss": 34.4393, "step": 10867 }, { "epoch": 258.7641791044776, "grad_norm": 16.52676773071289, "learning_rate": 9.668498168498169e-06, "loss": 35.5621, "step": 10868 }, { "epoch": 258.78805970149256, "grad_norm": 19.543874740600586, "learning_rate": 9.667582417582418e-06, "loss": 35.0672, "step": 10869 }, { "epoch": 258.81194029850747, "grad_norm": 21.124961853027344, "learning_rate": 9.666666666666667e-06, "loss": 35.4156, "step": 10870 }, { "epoch": 258.8358208955224, "grad_norm": 16.53673553466797, "learning_rate": 9.665750915750917e-06, "loss": 35.6703, "step": 10871 }, { "epoch": 258.85970149253734, "grad_norm": 24.19443702697754, "learning_rate": 9.664835164835166e-06, "loss": 36.3566, "step": 10872 }, { "epoch": 258.88358208955225, "grad_norm": 18.795320510864258, "learning_rate": 9.663919413919415e-06, "loss": 35.2381, "step": 10873 }, { "epoch": 258.90746268656716, "grad_norm": 16.57818031311035, "learning_rate": 9.663003663003664e-06, "loss": 35.2277, "step": 10874 }, { "epoch": 258.93134328358207, "grad_norm": 19.244136810302734, "learning_rate": 9.662087912087913e-06, "loss": 34.7622, "step": 10875 }, { "epoch": 258.95522388059703, "grad_norm": 16.864898681640625, "learning_rate": 9.661172161172162e-06, "loss": 35.8717, "step": 10876 }, { "epoch": 258.97910447761194, "grad_norm": 16.22411346435547, "learning_rate": 9.660256410256411e-06, "loss": 34.1409, "step": 10877 }, { "epoch": 259.0, "grad_norm": NaN, "learning_rate": 9.65934065934066e-06, "loss": 26.7277, "step": 10878 }, { "epoch": 259.0238805970149, "grad_norm": 17.742145538330078, "learning_rate": 9.65934065934066e-06, "loss": 34.9852, "step": 10879 }, { "epoch": 259.0477611940299, "grad_norm": 20.953575134277344, "learning_rate": 9.658424908424909e-06, "loss": 34.5062, "step": 10880 }, { "epoch": 259.0716417910448, "grad_norm": 14.631403923034668, "learning_rate": 9.657509157509158e-06, "loss": 35.8458, "step": 10881 }, { "epoch": 259.0955223880597, "grad_norm": 22.979354858398438, "learning_rate": 9.656593406593407e-06, "loss": 35.386, "step": 10882 }, { "epoch": 259.1194029850746, "grad_norm": 20.7153377532959, "learning_rate": 9.655677655677657e-06, "loss": 35.166, "step": 10883 }, { "epoch": 259.14328358208957, "grad_norm": 19.420682907104492, "learning_rate": 9.654761904761906e-06, "loss": 36.1378, "step": 10884 }, { "epoch": 259.1671641791045, "grad_norm": 23.77364730834961, "learning_rate": 9.653846153846155e-06, "loss": 35.5886, "step": 10885 }, { "epoch": 259.1910447761194, "grad_norm": 21.49879264831543, "learning_rate": 9.652930402930404e-06, "loss": 35.2042, "step": 10886 }, { "epoch": 259.21492537313435, "grad_norm": 17.184059143066406, "learning_rate": 9.652014652014653e-06, "loss": 34.5951, "step": 10887 }, { "epoch": 259.23880597014926, "grad_norm": 22.123655319213867, "learning_rate": 9.651098901098902e-06, "loss": 36.4969, "step": 10888 }, { "epoch": 259.26268656716417, "grad_norm": 20.89151954650879, "learning_rate": 9.650183150183151e-06, "loss": 35.1259, "step": 10889 }, { "epoch": 259.28656716417913, "grad_norm": 16.542654037475586, "learning_rate": 9.6492673992674e-06, "loss": 34.7581, "step": 10890 }, { "epoch": 259.31044776119404, "grad_norm": 18.11886978149414, "learning_rate": 9.648351648351649e-06, "loss": 35.1464, "step": 10891 }, { "epoch": 259.33432835820895, "grad_norm": 18.424983978271484, "learning_rate": 9.647435897435898e-06, "loss": 35.9606, "step": 10892 }, { "epoch": 259.35820895522386, "grad_norm": 17.356115341186523, "learning_rate": 9.646520146520147e-06, "loss": 34.916, "step": 10893 }, { "epoch": 259.3820895522388, "grad_norm": 19.047100067138672, "learning_rate": 9.645604395604397e-06, "loss": 35.1984, "step": 10894 }, { "epoch": 259.40597014925373, "grad_norm": 23.9052791595459, "learning_rate": 9.644688644688646e-06, "loss": 35.7599, "step": 10895 }, { "epoch": 259.42985074626864, "grad_norm": 15.961407661437988, "learning_rate": 9.643772893772895e-06, "loss": 35.9203, "step": 10896 }, { "epoch": 259.4537313432836, "grad_norm": 19.9751033782959, "learning_rate": 9.642857142857144e-06, "loss": 35.1501, "step": 10897 }, { "epoch": 259.4776119402985, "grad_norm": 19.248432159423828, "learning_rate": 9.641941391941393e-06, "loss": 35.7066, "step": 10898 }, { "epoch": 259.5014925373134, "grad_norm": 16.800792694091797, "learning_rate": 9.641025641025642e-06, "loss": 35.9651, "step": 10899 }, { "epoch": 259.52537313432833, "grad_norm": 14.706574440002441, "learning_rate": 9.640109890109891e-06, "loss": 35.5949, "step": 10900 }, { "epoch": 259.5492537313433, "grad_norm": 19.912813186645508, "learning_rate": 9.63919413919414e-06, "loss": 36.1239, "step": 10901 }, { "epoch": 259.5731343283582, "grad_norm": 16.731714248657227, "learning_rate": 9.638278388278389e-06, "loss": 35.5973, "step": 10902 }, { "epoch": 259.5970149253731, "grad_norm": 15.84050178527832, "learning_rate": 9.637362637362638e-06, "loss": 35.244, "step": 10903 }, { "epoch": 259.6208955223881, "grad_norm": 15.029394149780273, "learning_rate": 9.636446886446887e-06, "loss": 35.456, "step": 10904 }, { "epoch": 259.644776119403, "grad_norm": 17.010866165161133, "learning_rate": 9.635531135531136e-06, "loss": 34.9923, "step": 10905 }, { "epoch": 259.6686567164179, "grad_norm": 14.604995727539062, "learning_rate": 9.634615384615386e-06, "loss": 35.0997, "step": 10906 }, { "epoch": 259.6925373134328, "grad_norm": 15.063443183898926, "learning_rate": 9.633699633699635e-06, "loss": 37.2093, "step": 10907 }, { "epoch": 259.7164179104478, "grad_norm": 17.577592849731445, "learning_rate": 9.632783882783884e-06, "loss": 35.4733, "step": 10908 }, { "epoch": 259.7402985074627, "grad_norm": 14.357279777526855, "learning_rate": 9.631868131868133e-06, "loss": 35.1247, "step": 10909 }, { "epoch": 259.7641791044776, "grad_norm": 18.57563591003418, "learning_rate": 9.630952380952382e-06, "loss": 36.0164, "step": 10910 }, { "epoch": 259.78805970149256, "grad_norm": 15.390028953552246, "learning_rate": 9.630036630036631e-06, "loss": 35.0736, "step": 10911 }, { "epoch": 259.81194029850747, "grad_norm": 20.42272186279297, "learning_rate": 9.62912087912088e-06, "loss": 36.8684, "step": 10912 }, { "epoch": 259.8358208955224, "grad_norm": 15.395928382873535, "learning_rate": 9.628205128205129e-06, "loss": 35.2272, "step": 10913 }, { "epoch": 259.85970149253734, "grad_norm": NaN, "learning_rate": 9.627289377289378e-06, "loss": 30.2606, "step": 10914 }, { "epoch": 259.88358208955225, "grad_norm": 25.252601623535156, "learning_rate": 9.627289377289378e-06, "loss": 35.1984, "step": 10915 }, { "epoch": 259.90746268656716, "grad_norm": 20.136913299560547, "learning_rate": 9.626373626373627e-06, "loss": 35.1247, "step": 10916 }, { "epoch": 259.93134328358207, "grad_norm": 17.569561004638672, "learning_rate": 9.625457875457876e-06, "loss": 34.2426, "step": 10917 }, { "epoch": 259.95522388059703, "grad_norm": 25.293659210205078, "learning_rate": 9.624542124542126e-06, "loss": 36.038, "step": 10918 }, { "epoch": 259.97910447761194, "grad_norm": 16.823022842407227, "learning_rate": 9.623626373626375e-06, "loss": 35.0342, "step": 10919 }, { "epoch": 260.0, "grad_norm": 19.249588012695312, "learning_rate": 9.622710622710624e-06, "loss": 29.7084, "step": 10920 }, { "epoch": 260.0, "step": 10920, "total_flos": 5.3683595927778586e+17, "train_loss": 1.370590279303191, "train_runtime": 12821.5252, "train_samples_per_second": 108.53, "train_steps_per_second": 0.852 }, { "epoch": 260.0238805970149, "grad_norm": 21.460094451904297, "learning_rate": 1e-05, "loss": 36.1349, "step": 10921 }, { "epoch": 260.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999149659863946e-06, "loss": 40.2284, "step": 10922 }, { "epoch": 260.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999149659863946e-06, "loss": 41.4672, "step": 10923 }, { "epoch": 260.0955223880597, "grad_norm": 292.49517822265625, "learning_rate": 9.999149659863946e-06, "loss": 40.1311, "step": 10924 }, { "epoch": 260.1194029850746, "grad_norm": 132.06431579589844, "learning_rate": 9.998299319727893e-06, "loss": 39.675, "step": 10925 }, { "epoch": 260.14328358208957, "grad_norm": 93.1929702758789, "learning_rate": 9.997448979591836e-06, "loss": 38.0186, "step": 10926 }, { "epoch": 260.1671641791045, "grad_norm": 48.46129608154297, "learning_rate": 9.996598639455783e-06, "loss": 37.4163, "step": 10927 }, { "epoch": 260.1910447761194, "grad_norm": 57.307220458984375, "learning_rate": 9.995748299319729e-06, "loss": 35.9804, "step": 10928 }, { "epoch": 260.21492537313435, "grad_norm": 42.48515319824219, "learning_rate": 9.994897959183675e-06, "loss": 37.5594, "step": 10929 }, { "epoch": 260.23880597014926, "grad_norm": 46.436134338378906, "learning_rate": 9.99404761904762e-06, "loss": 36.9654, "step": 10930 }, { "epoch": 260.26268656716417, "grad_norm": 42.507808685302734, "learning_rate": 9.993197278911566e-06, "loss": 36.8308, "step": 10931 }, { "epoch": 260.28656716417913, "grad_norm": 28.198928833007812, "learning_rate": 9.992346938775511e-06, "loss": 36.0199, "step": 10932 }, { "epoch": 260.31044776119404, "grad_norm": 36.01531982421875, "learning_rate": 9.991496598639456e-06, "loss": 35.7276, "step": 10933 }, { "epoch": 260.33432835820895, "grad_norm": 27.35661506652832, "learning_rate": 9.990646258503403e-06, "loss": 36.5345, "step": 10934 }, { "epoch": 260.35820895522386, "grad_norm": 23.625049591064453, "learning_rate": 9.989795918367348e-06, "loss": 36.4087, "step": 10935 }, { "epoch": 260.3820895522388, "grad_norm": 21.105745315551758, "learning_rate": 9.988945578231294e-06, "loss": 35.4079, "step": 10936 }, { "epoch": 260.40597014925373, "grad_norm": 22.002107620239258, "learning_rate": 9.988095238095239e-06, "loss": 35.2193, "step": 10937 }, { "epoch": 260.42985074626864, "grad_norm": 18.042898178100586, "learning_rate": 9.987244897959184e-06, "loss": 35.8775, "step": 10938 }, { "epoch": 260.4537313432836, "grad_norm": 22.857986450195312, "learning_rate": 9.98639455782313e-06, "loss": 35.2346, "step": 10939 }, { "epoch": 260.4776119402985, "grad_norm": 20.077600479125977, "learning_rate": 9.985544217687076e-06, "loss": 34.0219, "step": 10940 }, { "epoch": 260.5014925373134, "grad_norm": 18.646535873413086, "learning_rate": 9.984693877551021e-06, "loss": 34.3711, "step": 10941 }, { "epoch": 260.52537313432833, "grad_norm": 14.77027416229248, "learning_rate": 9.983843537414966e-06, "loss": 31.8077, "step": 10942 }, { "epoch": 260.5492537313433, "grad_norm": 14.842784881591797, "learning_rate": 9.982993197278913e-06, "loss": 34.9431, "step": 10943 }, { "epoch": 260.5731343283582, "grad_norm": 17.824871063232422, "learning_rate": 9.982142857142858e-06, "loss": 35.4375, "step": 10944 }, { "epoch": 260.5970149253731, "grad_norm": 15.629002571105957, "learning_rate": 9.981292517006804e-06, "loss": 35.4708, "step": 10945 }, { "epoch": 260.6208955223881, "grad_norm": 14.504796981811523, "learning_rate": 9.980442176870749e-06, "loss": 35.8836, "step": 10946 }, { "epoch": 260.644776119403, "grad_norm": 14.599526405334473, "learning_rate": 9.979591836734694e-06, "loss": 34.7322, "step": 10947 }, { "epoch": 260.6686567164179, "grad_norm": 14.47644329071045, "learning_rate": 9.978741496598641e-06, "loss": 34.6886, "step": 10948 }, { "epoch": 260.6925373134328, "grad_norm": 16.613548278808594, "learning_rate": 9.977891156462586e-06, "loss": 36.2553, "step": 10949 }, { "epoch": 260.7164179104478, "grad_norm": 14.148774147033691, "learning_rate": 9.977040816326531e-06, "loss": 34.3585, "step": 10950 }, { "epoch": 260.7402985074627, "grad_norm": 19.07725715637207, "learning_rate": 9.976190476190477e-06, "loss": 34.6957, "step": 10951 }, { "epoch": 260.7641791044776, "grad_norm": 25.885623931884766, "learning_rate": 9.975340136054422e-06, "loss": 37.466, "step": 10952 }, { "epoch": 260.78805970149256, "grad_norm": 17.284061431884766, "learning_rate": 9.974489795918369e-06, "loss": 36.3699, "step": 10953 }, { "epoch": 260.81194029850747, "grad_norm": 16.200647354125977, "learning_rate": 9.973639455782314e-06, "loss": 35.8758, "step": 10954 }, { "epoch": 260.8358208955224, "grad_norm": 15.058037757873535, "learning_rate": 9.972789115646259e-06, "loss": 35.9636, "step": 10955 }, { "epoch": 260.85970149253734, "grad_norm": 14.795926094055176, "learning_rate": 9.971938775510204e-06, "loss": 36.0804, "step": 10956 }, { "epoch": 260.88358208955225, "grad_norm": 18.57579803466797, "learning_rate": 9.97108843537415e-06, "loss": 35.1295, "step": 10957 }, { "epoch": 260.90746268656716, "grad_norm": 15.663594245910645, "learning_rate": 9.970238095238096e-06, "loss": 34.9655, "step": 10958 }, { "epoch": 260.93134328358207, "grad_norm": 13.628890991210938, "learning_rate": 9.969387755102042e-06, "loss": 34.7612, "step": 10959 }, { "epoch": 260.95522388059703, "grad_norm": 14.406618118286133, "learning_rate": 9.968537414965987e-06, "loss": 33.267, "step": 10960 }, { "epoch": 260.97910447761194, "grad_norm": 21.73625373840332, "learning_rate": 9.967687074829932e-06, "loss": 36.1063, "step": 10961 }, { "epoch": 261.0, "grad_norm": 15.970307350158691, "learning_rate": 9.966836734693879e-06, "loss": 32.4093, "step": 10962 }, { "epoch": 261.0238805970149, "grad_norm": 15.53166675567627, "learning_rate": 9.965986394557824e-06, "loss": 35.7108, "step": 10963 }, { "epoch": 261.0477611940299, "grad_norm": 18.41387939453125, "learning_rate": 9.96513605442177e-06, "loss": 34.8578, "step": 10964 }, { "epoch": 261.0716417910448, "grad_norm": 17.519445419311523, "learning_rate": 9.964285714285714e-06, "loss": 34.715, "step": 10965 }, { "epoch": 261.0955223880597, "grad_norm": 14.596212387084961, "learning_rate": 9.96343537414966e-06, "loss": 36.0262, "step": 10966 }, { "epoch": 261.1194029850746, "grad_norm": 16.763545989990234, "learning_rate": 9.962585034013607e-06, "loss": 35.9315, "step": 10967 }, { "epoch": 261.14328358208957, "grad_norm": 18.176021575927734, "learning_rate": 9.961734693877552e-06, "loss": 36.1361, "step": 10968 }, { "epoch": 261.1671641791045, "grad_norm": 23.32509422302246, "learning_rate": 9.960884353741499e-06, "loss": 35.1856, "step": 10969 }, { "epoch": 261.1910447761194, "grad_norm": 14.871478080749512, "learning_rate": 9.960034013605442e-06, "loss": 36.0057, "step": 10970 }, { "epoch": 261.21492537313435, "grad_norm": 12.681305885314941, "learning_rate": 9.959183673469387e-06, "loss": 34.8678, "step": 10971 }, { "epoch": 261.23880597014926, "grad_norm": 16.676115036010742, "learning_rate": 9.958333333333334e-06, "loss": 35.8609, "step": 10972 }, { "epoch": 261.26268656716417, "grad_norm": 20.324012756347656, "learning_rate": 9.95748299319728e-06, "loss": 34.4661, "step": 10973 }, { "epoch": 261.28656716417913, "grad_norm": 18.328187942504883, "learning_rate": 9.956632653061226e-06, "loss": 34.5163, "step": 10974 }, { "epoch": 261.31044776119404, "grad_norm": 13.199163436889648, "learning_rate": 9.955782312925172e-06, "loss": 34.7459, "step": 10975 }, { "epoch": 261.33432835820895, "grad_norm": 23.660144805908203, "learning_rate": 9.954931972789117e-06, "loss": 34.9794, "step": 10976 }, { "epoch": 261.35820895522386, "grad_norm": 22.1052188873291, "learning_rate": 9.954081632653062e-06, "loss": 35.8654, "step": 10977 }, { "epoch": 261.3820895522388, "grad_norm": 14.417381286621094, "learning_rate": 9.953231292517007e-06, "loss": 35.0759, "step": 10978 }, { "epoch": 261.40597014925373, "grad_norm": 18.374778747558594, "learning_rate": 9.952380952380954e-06, "loss": 35.8716, "step": 10979 }, { "epoch": 261.42985074626864, "grad_norm": 17.47286605834961, "learning_rate": 9.9515306122449e-06, "loss": 35.3625, "step": 10980 }, { "epoch": 261.4537313432836, "grad_norm": 16.161006927490234, "learning_rate": 9.950680272108844e-06, "loss": 34.0341, "step": 10981 }, { "epoch": 261.4776119402985, "grad_norm": 15.4244966506958, "learning_rate": 9.94982993197279e-06, "loss": 35.6073, "step": 10982 }, { "epoch": 261.5014925373134, "grad_norm": 18.360742568969727, "learning_rate": 9.948979591836737e-06, "loss": 35.9493, "step": 10983 }, { "epoch": 261.52537313432833, "grad_norm": 24.266454696655273, "learning_rate": 9.948129251700682e-06, "loss": 35.0714, "step": 10984 }, { "epoch": 261.5492537313433, "grad_norm": 15.144146919250488, "learning_rate": 9.947278911564627e-06, "loss": 36.047, "step": 10985 }, { "epoch": 261.5731343283582, "grad_norm": 15.100632667541504, "learning_rate": 9.946428571428572e-06, "loss": 33.6753, "step": 10986 }, { "epoch": 261.5970149253731, "grad_norm": 18.780982971191406, "learning_rate": 9.945578231292517e-06, "loss": 36.2126, "step": 10987 }, { "epoch": 261.6208955223881, "grad_norm": 15.762274742126465, "learning_rate": 9.944727891156464e-06, "loss": 33.9035, "step": 10988 }, { "epoch": 261.644776119403, "grad_norm": 16.120458602905273, "learning_rate": 9.94387755102041e-06, "loss": 34.6882, "step": 10989 }, { "epoch": 261.6686567164179, "grad_norm": 15.893648147583008, "learning_rate": 9.943027210884355e-06, "loss": 36.0207, "step": 10990 }, { "epoch": 261.6925373134328, "grad_norm": 19.185096740722656, "learning_rate": 9.9421768707483e-06, "loss": 36.1851, "step": 10991 }, { "epoch": 261.7164179104478, "grad_norm": 16.304075241088867, "learning_rate": 9.941326530612245e-06, "loss": 35.5564, "step": 10992 }, { "epoch": 261.7402985074627, "grad_norm": 17.55813980102539, "learning_rate": 9.940476190476192e-06, "loss": 35.186, "step": 10993 }, { "epoch": 261.7641791044776, "grad_norm": 13.970108032226562, "learning_rate": 9.939625850340137e-06, "loss": 35.7754, "step": 10994 }, { "epoch": 261.78805970149256, "grad_norm": 17.354642868041992, "learning_rate": 9.938775510204082e-06, "loss": 35.0764, "step": 10995 }, { "epoch": 261.81194029850747, "grad_norm": 17.21601676940918, "learning_rate": 9.937925170068028e-06, "loss": 35.5803, "step": 10996 }, { "epoch": 261.8358208955224, "grad_norm": 16.45388412475586, "learning_rate": 9.937074829931974e-06, "loss": 35.6118, "step": 10997 }, { "epoch": 261.85970149253734, "grad_norm": 16.805095672607422, "learning_rate": 9.93622448979592e-06, "loss": 35.6055, "step": 10998 }, { "epoch": 261.88358208955225, "grad_norm": 16.22078514099121, "learning_rate": 9.935374149659865e-06, "loss": 35.8316, "step": 10999 }, { "epoch": 261.90746268656716, "grad_norm": 13.935564994812012, "learning_rate": 9.93452380952381e-06, "loss": 35.6217, "step": 11000 }, { "epoch": 261.93134328358207, "grad_norm": 17.076021194458008, "learning_rate": 9.933673469387755e-06, "loss": 34.7995, "step": 11001 }, { "epoch": 261.95522388059703, "grad_norm": 20.567550659179688, "learning_rate": 9.932823129251702e-06, "loss": 35.4546, "step": 11002 }, { "epoch": 261.97910447761194, "grad_norm": 14.342740058898926, "learning_rate": 9.931972789115647e-06, "loss": 35.7293, "step": 11003 }, { "epoch": 262.0, "grad_norm": 14.136404991149902, "learning_rate": 9.931122448979593e-06, "loss": 31.2726, "step": 11004 }, { "epoch": 262.0238805970149, "grad_norm": 14.922279357910156, "learning_rate": 9.930272108843538e-06, "loss": 34.9358, "step": 11005 }, { "epoch": 262.0477611940299, "grad_norm": 16.061193466186523, "learning_rate": 9.929421768707483e-06, "loss": 35.2566, "step": 11006 }, { "epoch": 262.0716417910448, "grad_norm": 15.24997615814209, "learning_rate": 9.92857142857143e-06, "loss": 35.074, "step": 11007 }, { "epoch": 262.0955223880597, "grad_norm": 15.471017837524414, "learning_rate": 9.927721088435375e-06, "loss": 35.4505, "step": 11008 }, { "epoch": 262.1194029850746, "grad_norm": 16.509521484375, "learning_rate": 9.92687074829932e-06, "loss": 34.2916, "step": 11009 }, { "epoch": 262.14328358208957, "grad_norm": 24.300935745239258, "learning_rate": 9.926020408163265e-06, "loss": 35.0017, "step": 11010 }, { "epoch": 262.1671641791045, "grad_norm": 15.514004707336426, "learning_rate": 9.92517006802721e-06, "loss": 34.9278, "step": 11011 }, { "epoch": 262.1910447761194, "grad_norm": 14.313405990600586, "learning_rate": 9.924319727891158e-06, "loss": 35.0097, "step": 11012 }, { "epoch": 262.21492537313435, "grad_norm": 19.78252410888672, "learning_rate": 9.923469387755103e-06, "loss": 34.6696, "step": 11013 }, { "epoch": 262.23880597014926, "grad_norm": 20.762428283691406, "learning_rate": 9.922619047619048e-06, "loss": 34.8702, "step": 11014 }, { "epoch": 262.26268656716417, "grad_norm": 15.121173858642578, "learning_rate": 9.921768707482993e-06, "loss": 35.0737, "step": 11015 }, { "epoch": 262.28656716417913, "grad_norm": 14.178095817565918, "learning_rate": 9.92091836734694e-06, "loss": 35.1259, "step": 11016 }, { "epoch": 262.31044776119404, "grad_norm": 17.63243865966797, "learning_rate": 9.920068027210885e-06, "loss": 34.6101, "step": 11017 }, { "epoch": 262.33432835820895, "grad_norm": 18.282089233398438, "learning_rate": 9.91921768707483e-06, "loss": 35.0133, "step": 11018 }, { "epoch": 262.35820895522386, "grad_norm": 15.828527450561523, "learning_rate": 9.918367346938776e-06, "loss": 35.33, "step": 11019 }, { "epoch": 262.3820895522388, "grad_norm": 17.393463134765625, "learning_rate": 9.91751700680272e-06, "loss": 36.4822, "step": 11020 }, { "epoch": 262.40597014925373, "grad_norm": 18.632503509521484, "learning_rate": 9.916666666666668e-06, "loss": 35.8496, "step": 11021 }, { "epoch": 262.42985074626864, "grad_norm": 14.081930160522461, "learning_rate": 9.915816326530613e-06, "loss": 34.9148, "step": 11022 }, { "epoch": 262.4537313432836, "grad_norm": 18.444381713867188, "learning_rate": 9.91496598639456e-06, "loss": 35.4705, "step": 11023 }, { "epoch": 262.4776119402985, "grad_norm": 14.789766311645508, "learning_rate": 9.914115646258505e-06, "loss": 35.4245, "step": 11024 }, { "epoch": 262.5014925373134, "grad_norm": 16.575105667114258, "learning_rate": 9.913265306122449e-06, "loss": 34.9822, "step": 11025 }, { "epoch": 262.52537313432833, "grad_norm": 19.523876190185547, "learning_rate": 9.912414965986395e-06, "loss": 36.3229, "step": 11026 }, { "epoch": 262.5492537313433, "grad_norm": 16.93731117248535, "learning_rate": 9.91156462585034e-06, "loss": 35.6036, "step": 11027 }, { "epoch": 262.5731343283582, "grad_norm": 15.929445266723633, "learning_rate": 9.910714285714288e-06, "loss": 35.3657, "step": 11028 }, { "epoch": 262.5970149253731, "grad_norm": 15.706140518188477, "learning_rate": 9.909863945578233e-06, "loss": 34.9803, "step": 11029 }, { "epoch": 262.6208955223881, "grad_norm": 14.913352012634277, "learning_rate": 9.909013605442178e-06, "loss": 35.1816, "step": 11030 }, { "epoch": 262.644776119403, "grad_norm": 14.16916275024414, "learning_rate": 9.908163265306123e-06, "loss": 35.0046, "step": 11031 }, { "epoch": 262.6686567164179, "grad_norm": 16.5507755279541, "learning_rate": 9.907312925170068e-06, "loss": 35.28, "step": 11032 }, { "epoch": 262.6925373134328, "grad_norm": 18.04994773864746, "learning_rate": 9.906462585034015e-06, "loss": 35.4641, "step": 11033 }, { "epoch": 262.7164179104478, "grad_norm": 13.117817878723145, "learning_rate": 9.90561224489796e-06, "loss": 36.5712, "step": 11034 }, { "epoch": 262.7402985074627, "grad_norm": 18.238929748535156, "learning_rate": 9.904761904761906e-06, "loss": 34.8052, "step": 11035 }, { "epoch": 262.7641791044776, "grad_norm": 15.109965324401855, "learning_rate": 9.90391156462585e-06, "loss": 35.5693, "step": 11036 }, { "epoch": 262.78805970149256, "grad_norm": 16.457304000854492, "learning_rate": 9.903061224489798e-06, "loss": 34.7483, "step": 11037 }, { "epoch": 262.81194029850747, "grad_norm": 17.961917877197266, "learning_rate": 9.902210884353743e-06, "loss": 35.2234, "step": 11038 }, { "epoch": 262.8358208955224, "grad_norm": 16.238235473632812, "learning_rate": 9.901360544217688e-06, "loss": 35.4148, "step": 11039 }, { "epoch": 262.85970149253734, "grad_norm": 17.8552188873291, "learning_rate": 9.900510204081633e-06, "loss": 34.473, "step": 11040 }, { "epoch": 262.88358208955225, "grad_norm": 20.533002853393555, "learning_rate": 9.899659863945579e-06, "loss": 35.4556, "step": 11041 }, { "epoch": 262.90746268656716, "grad_norm": 18.42131805419922, "learning_rate": 9.898809523809525e-06, "loss": 35.7505, "step": 11042 }, { "epoch": 262.93134328358207, "grad_norm": 18.14691925048828, "learning_rate": 9.89795918367347e-06, "loss": 35.2995, "step": 11043 }, { "epoch": 262.95522388059703, "grad_norm": 15.01701831817627, "learning_rate": 9.897108843537416e-06, "loss": 36.1057, "step": 11044 }, { "epoch": 262.97910447761194, "grad_norm": 16.602943420410156, "learning_rate": 9.896258503401361e-06, "loss": 35.7632, "step": 11045 }, { "epoch": 263.0, "grad_norm": 12.878350257873535, "learning_rate": 9.895408163265306e-06, "loss": 32.1618, "step": 11046 }, { "epoch": 263.0238805970149, "grad_norm": 16.082096099853516, "learning_rate": 9.894557823129253e-06, "loss": 35.2716, "step": 11047 }, { "epoch": 263.0477611940299, "grad_norm": 14.401817321777344, "learning_rate": 9.893707482993198e-06, "loss": 35.612, "step": 11048 }, { "epoch": 263.0716417910448, "grad_norm": 15.765049934387207, "learning_rate": 9.892857142857143e-06, "loss": 36.6397, "step": 11049 }, { "epoch": 263.0955223880597, "grad_norm": 17.880720138549805, "learning_rate": 9.892006802721089e-06, "loss": 36.4781, "step": 11050 }, { "epoch": 263.1194029850746, "grad_norm": 13.4547119140625, "learning_rate": 9.891156462585036e-06, "loss": 35.9454, "step": 11051 }, { "epoch": 263.14328358208957, "grad_norm": 18.206336975097656, "learning_rate": 9.89030612244898e-06, "loss": 34.6142, "step": 11052 }, { "epoch": 263.1671641791045, "grad_norm": 15.568166732788086, "learning_rate": 9.889455782312926e-06, "loss": 35.4857, "step": 11053 }, { "epoch": 263.1910447761194, "grad_norm": 18.235918045043945, "learning_rate": 9.888605442176871e-06, "loss": 34.8914, "step": 11054 }, { "epoch": 263.21492537313435, "grad_norm": 17.768962860107422, "learning_rate": 9.887755102040816e-06, "loss": 34.9301, "step": 11055 }, { "epoch": 263.23880597014926, "grad_norm": 17.910951614379883, "learning_rate": 9.886904761904763e-06, "loss": 36.5989, "step": 11056 }, { "epoch": 263.26268656716417, "grad_norm": 16.65070915222168, "learning_rate": 9.886054421768708e-06, "loss": 34.838, "step": 11057 }, { "epoch": 263.28656716417913, "grad_norm": 16.371315002441406, "learning_rate": 9.885204081632654e-06, "loss": 34.5002, "step": 11058 }, { "epoch": 263.31044776119404, "grad_norm": 16.746990203857422, "learning_rate": 9.884353741496599e-06, "loss": 34.7129, "step": 11059 }, { "epoch": 263.33432835820895, "grad_norm": 16.800657272338867, "learning_rate": 9.883503401360544e-06, "loss": 35.568, "step": 11060 }, { "epoch": 263.35820895522386, "grad_norm": 16.81734848022461, "learning_rate": 9.882653061224491e-06, "loss": 35.0892, "step": 11061 }, { "epoch": 263.3820895522388, "grad_norm": 14.651569366455078, "learning_rate": 9.881802721088436e-06, "loss": 35.1448, "step": 11062 }, { "epoch": 263.40597014925373, "grad_norm": 20.921037673950195, "learning_rate": 9.880952380952381e-06, "loss": 36.1377, "step": 11063 }, { "epoch": 263.42985074626864, "grad_norm": 14.959305763244629, "learning_rate": 9.880102040816327e-06, "loss": 33.6204, "step": 11064 }, { "epoch": 263.4537313432836, "grad_norm": 16.67865562438965, "learning_rate": 9.879251700680272e-06, "loss": 35.7841, "step": 11065 }, { "epoch": 263.4776119402985, "grad_norm": 18.072956085205078, "learning_rate": 9.878401360544219e-06, "loss": 36.6932, "step": 11066 }, { "epoch": 263.5014925373134, "grad_norm": 13.375307083129883, "learning_rate": 9.877551020408164e-06, "loss": 34.0739, "step": 11067 }, { "epoch": 263.52537313432833, "grad_norm": 18.288284301757812, "learning_rate": 9.87670068027211e-06, "loss": 36.3153, "step": 11068 }, { "epoch": 263.5492537313433, "grad_norm": 14.23848819732666, "learning_rate": 9.875850340136054e-06, "loss": 34.0307, "step": 11069 }, { "epoch": 263.5731343283582, "grad_norm": 16.712451934814453, "learning_rate": 9.875000000000001e-06, "loss": 35.2859, "step": 11070 }, { "epoch": 263.5970149253731, "grad_norm": 17.808401107788086, "learning_rate": 9.874149659863946e-06, "loss": 35.2393, "step": 11071 }, { "epoch": 263.6208955223881, "grad_norm": 20.396392822265625, "learning_rate": 9.873299319727892e-06, "loss": 36.3273, "step": 11072 }, { "epoch": 263.644776119403, "grad_norm": 15.763962745666504, "learning_rate": 9.872448979591838e-06, "loss": 34.1094, "step": 11073 }, { "epoch": 263.6686567164179, "grad_norm": 19.09672737121582, "learning_rate": 9.871598639455782e-06, "loss": 35.4983, "step": 11074 }, { "epoch": 263.6925373134328, "grad_norm": 22.557619094848633, "learning_rate": 9.870748299319729e-06, "loss": 34.8201, "step": 11075 }, { "epoch": 263.7164179104478, "grad_norm": 15.600252151489258, "learning_rate": 9.869897959183674e-06, "loss": 35.1453, "step": 11076 }, { "epoch": 263.7402985074627, "grad_norm": 14.198424339294434, "learning_rate": 9.869047619047621e-06, "loss": 34.9486, "step": 11077 }, { "epoch": 263.7641791044776, "grad_norm": 17.77661895751953, "learning_rate": 9.868197278911566e-06, "loss": 35.6436, "step": 11078 }, { "epoch": 263.78805970149256, "grad_norm": 15.402034759521484, "learning_rate": 9.867346938775511e-06, "loss": 35.532, "step": 11079 }, { "epoch": 263.81194029850747, "grad_norm": 16.1749267578125, "learning_rate": 9.866496598639457e-06, "loss": 35.2346, "step": 11080 }, { "epoch": 263.8358208955224, "grad_norm": 17.312959671020508, "learning_rate": 9.865646258503402e-06, "loss": 35.1054, "step": 11081 }, { "epoch": 263.85970149253734, "grad_norm": 20.187667846679688, "learning_rate": 9.864795918367349e-06, "loss": 36.0356, "step": 11082 }, { "epoch": 263.88358208955225, "grad_norm": 19.60118865966797, "learning_rate": 9.863945578231294e-06, "loss": 35.1906, "step": 11083 }, { "epoch": 263.90746268656716, "grad_norm": 15.168771743774414, "learning_rate": 9.863095238095239e-06, "loss": 35.4974, "step": 11084 }, { "epoch": 263.93134328358207, "grad_norm": 18.66791343688965, "learning_rate": 9.862244897959184e-06, "loss": 34.9326, "step": 11085 }, { "epoch": 263.95522388059703, "grad_norm": 20.811931610107422, "learning_rate": 9.86139455782313e-06, "loss": 35.0817, "step": 11086 }, { "epoch": 263.97910447761194, "grad_norm": 17.739748001098633, "learning_rate": 9.860544217687076e-06, "loss": 34.4644, "step": 11087 }, { "epoch": 264.0, "grad_norm": 13.603070259094238, "learning_rate": 9.859693877551022e-06, "loss": 29.6223, "step": 11088 }, { "epoch": 264.0238805970149, "grad_norm": 19.97246551513672, "learning_rate": 9.858843537414967e-06, "loss": 35.4455, "step": 11089 }, { "epoch": 264.0477611940299, "grad_norm": 18.857791900634766, "learning_rate": 9.857993197278912e-06, "loss": 35.2367, "step": 11090 }, { "epoch": 264.0716417910448, "grad_norm": 22.370830535888672, "learning_rate": 9.857142857142859e-06, "loss": 34.594, "step": 11091 }, { "epoch": 264.0955223880597, "grad_norm": 16.556453704833984, "learning_rate": 9.856292517006804e-06, "loss": 34.8722, "step": 11092 }, { "epoch": 264.1194029850746, "grad_norm": 16.963144302368164, "learning_rate": 9.85544217687075e-06, "loss": 35.4546, "step": 11093 }, { "epoch": 264.14328358208957, "grad_norm": 19.128211975097656, "learning_rate": 9.854591836734694e-06, "loss": 35.3999, "step": 11094 }, { "epoch": 264.1671641791045, "grad_norm": NaN, "learning_rate": 9.85374149659864e-06, "loss": 31.2429, "step": 11095 }, { "epoch": 264.1910447761194, "grad_norm": 16.205318450927734, "learning_rate": 9.85374149659864e-06, "loss": 33.6277, "step": 11096 }, { "epoch": 264.21492537313435, "grad_norm": 21.333288192749023, "learning_rate": 9.852891156462587e-06, "loss": 33.896, "step": 11097 }, { "epoch": 264.23880597014926, "grad_norm": 17.041810989379883, "learning_rate": 9.852040816326532e-06, "loss": 35.4154, "step": 11098 }, { "epoch": 264.26268656716417, "grad_norm": 15.609241485595703, "learning_rate": 9.851190476190477e-06, "loss": 34.4953, "step": 11099 }, { "epoch": 264.28656716417913, "grad_norm": 25.065488815307617, "learning_rate": 9.850340136054422e-06, "loss": 35.8314, "step": 11100 }, { "epoch": 264.31044776119404, "grad_norm": 17.720497131347656, "learning_rate": 9.849489795918367e-06, "loss": 36.2945, "step": 11101 }, { "epoch": 264.33432835820895, "grad_norm": 22.21006202697754, "learning_rate": 9.848639455782314e-06, "loss": 35.1814, "step": 11102 }, { "epoch": 264.35820895522386, "grad_norm": 19.061445236206055, "learning_rate": 9.84778911564626e-06, "loss": 34.9758, "step": 11103 }, { "epoch": 264.3820895522388, "grad_norm": 26.431421279907227, "learning_rate": 9.846938775510205e-06, "loss": 34.7944, "step": 11104 }, { "epoch": 264.40597014925373, "grad_norm": 17.299617767333984, "learning_rate": 9.84608843537415e-06, "loss": 35.0264, "step": 11105 }, { "epoch": 264.42985074626864, "grad_norm": 26.452072143554688, "learning_rate": 9.845238095238097e-06, "loss": 35.9682, "step": 11106 }, { "epoch": 264.4537313432836, "grad_norm": 19.976699829101562, "learning_rate": 9.844387755102042e-06, "loss": 36.4401, "step": 11107 }, { "epoch": 264.4776119402985, "grad_norm": 24.28655242919922, "learning_rate": 9.843537414965987e-06, "loss": 35.165, "step": 11108 }, { "epoch": 264.5014925373134, "grad_norm": 23.443822860717773, "learning_rate": 9.842687074829932e-06, "loss": 34.8307, "step": 11109 }, { "epoch": 264.52537313432833, "grad_norm": 18.813709259033203, "learning_rate": 9.841836734693878e-06, "loss": 36.4762, "step": 11110 }, { "epoch": 264.5492537313433, "grad_norm": 27.69648551940918, "learning_rate": 9.840986394557824e-06, "loss": 34.9634, "step": 11111 }, { "epoch": 264.5731343283582, "grad_norm": 19.616514205932617, "learning_rate": 9.84013605442177e-06, "loss": 35.0395, "step": 11112 }, { "epoch": 264.5970149253731, "grad_norm": 22.776201248168945, "learning_rate": 9.839285714285715e-06, "loss": 35.9003, "step": 11113 }, { "epoch": 264.6208955223881, "grad_norm": 25.816699981689453, "learning_rate": 9.83843537414966e-06, "loss": 34.4998, "step": 11114 }, { "epoch": 264.644776119403, "grad_norm": 15.887907981872559, "learning_rate": 9.837585034013605e-06, "loss": 35.5271, "step": 11115 }, { "epoch": 264.6686567164179, "grad_norm": 31.09943199157715, "learning_rate": 9.836734693877552e-06, "loss": 35.6997, "step": 11116 }, { "epoch": 264.6925373134328, "grad_norm": 19.370866775512695, "learning_rate": 9.835884353741497e-06, "loss": 35.66, "step": 11117 }, { "epoch": 264.7164179104478, "grad_norm": 36.458927154541016, "learning_rate": 9.835034013605444e-06, "loss": 36.1021, "step": 11118 }, { "epoch": 264.7402985074627, "grad_norm": 17.150787353515625, "learning_rate": 9.834183673469388e-06, "loss": 33.9725, "step": 11119 }, { "epoch": 264.7641791044776, "grad_norm": 31.396713256835938, "learning_rate": 9.833333333333333e-06, "loss": 34.9248, "step": 11120 }, { "epoch": 264.78805970149256, "grad_norm": 17.050716400146484, "learning_rate": 9.83248299319728e-06, "loss": 34.7095, "step": 11121 }, { "epoch": 264.81194029850747, "grad_norm": 33.8112678527832, "learning_rate": 9.831632653061225e-06, "loss": 34.9646, "step": 11122 }, { "epoch": 264.8358208955224, "grad_norm": 24.080162048339844, "learning_rate": 9.830782312925172e-06, "loss": 36.9544, "step": 11123 }, { "epoch": 264.85970149253734, "grad_norm": 28.9388370513916, "learning_rate": 9.829931972789115e-06, "loss": 35.5073, "step": 11124 }, { "epoch": 264.88358208955225, "grad_norm": 20.167919158935547, "learning_rate": 9.829081632653062e-06, "loss": 34.4186, "step": 11125 }, { "epoch": 264.90746268656716, "grad_norm": 35.84519958496094, "learning_rate": 9.828231292517008e-06, "loss": 35.6437, "step": 11126 }, { "epoch": 264.93134328358207, "grad_norm": 23.3156795501709, "learning_rate": 9.827380952380953e-06, "loss": 36.2353, "step": 11127 }, { "epoch": 264.95522388059703, "grad_norm": 39.56398391723633, "learning_rate": 9.8265306122449e-06, "loss": 34.7052, "step": 11128 }, { "epoch": 264.97910447761194, "grad_norm": 34.080101013183594, "learning_rate": 9.825680272108845e-06, "loss": 34.9117, "step": 11129 }, { "epoch": 265.0, "grad_norm": 30.904067993164062, "learning_rate": 9.82482993197279e-06, "loss": 29.7723, "step": 11130 }, { "epoch": 265.0238805970149, "grad_norm": 33.645423889160156, "learning_rate": 9.823979591836735e-06, "loss": 34.4562, "step": 11131 }, { "epoch": 265.0477611940299, "grad_norm": 27.822898864746094, "learning_rate": 9.823129251700682e-06, "loss": 35.9515, "step": 11132 }, { "epoch": 265.0716417910448, "grad_norm": 28.682424545288086, "learning_rate": 9.822278911564627e-06, "loss": 34.6865, "step": 11133 }, { "epoch": 265.0955223880597, "grad_norm": 31.294862747192383, "learning_rate": 9.821428571428573e-06, "loss": 35.8039, "step": 11134 }, { "epoch": 265.1194029850746, "grad_norm": 25.919233322143555, "learning_rate": 9.820578231292518e-06, "loss": 36.8074, "step": 11135 }, { "epoch": 265.14328358208957, "grad_norm": 33.744144439697266, "learning_rate": 9.819727891156463e-06, "loss": 35.3448, "step": 11136 }, { "epoch": 265.1671641791045, "grad_norm": NaN, "learning_rate": 9.81887755102041e-06, "loss": 47.9396, "step": 11137 }, { "epoch": 265.1910447761194, "grad_norm": 26.96946144104004, "learning_rate": 9.81887755102041e-06, "loss": 34.2474, "step": 11138 }, { "epoch": 265.21492537313435, "grad_norm": 33.9120979309082, "learning_rate": 9.818027210884355e-06, "loss": 35.1802, "step": 11139 }, { "epoch": 265.23880597014926, "grad_norm": 31.594274520874023, "learning_rate": 9.8171768707483e-06, "loss": 34.7878, "step": 11140 }, { "epoch": 265.26268656716417, "grad_norm": 29.770864486694336, "learning_rate": 9.816326530612245e-06, "loss": 35.0956, "step": 11141 }, { "epoch": 265.28656716417913, "grad_norm": 27.344839096069336, "learning_rate": 9.81547619047619e-06, "loss": 35.9117, "step": 11142 }, { "epoch": 265.31044776119404, "grad_norm": 34.17720413208008, "learning_rate": 9.814625850340137e-06, "loss": 35.0078, "step": 11143 }, { "epoch": 265.33432835820895, "grad_norm": 26.976036071777344, "learning_rate": 9.813775510204083e-06, "loss": 35.4266, "step": 11144 }, { "epoch": 265.35820895522386, "grad_norm": 32.93250274658203, "learning_rate": 9.812925170068028e-06, "loss": 34.8151, "step": 11145 }, { "epoch": 265.3820895522388, "grad_norm": 29.532920837402344, "learning_rate": 9.812074829931973e-06, "loss": 34.8695, "step": 11146 }, { "epoch": 265.40597014925373, "grad_norm": 31.906003952026367, "learning_rate": 9.81122448979592e-06, "loss": 35.2919, "step": 11147 }, { "epoch": 265.42985074626864, "grad_norm": 29.873435974121094, "learning_rate": 9.810374149659865e-06, "loss": 35.4161, "step": 11148 }, { "epoch": 265.4537313432836, "grad_norm": 32.02743911743164, "learning_rate": 9.80952380952381e-06, "loss": 33.8069, "step": 11149 }, { "epoch": 265.4776119402985, "grad_norm": 30.20789909362793, "learning_rate": 9.808673469387756e-06, "loss": 34.9376, "step": 11150 }, { "epoch": 265.5014925373134, "grad_norm": 30.224260330200195, "learning_rate": 9.8078231292517e-06, "loss": 35.2841, "step": 11151 }, { "epoch": 265.52537313432833, "grad_norm": 27.322105407714844, "learning_rate": 9.806972789115648e-06, "loss": 34.8103, "step": 11152 }, { "epoch": 265.5492537313433, "grad_norm": 32.584136962890625, "learning_rate": 9.806122448979593e-06, "loss": 35.3109, "step": 11153 }, { "epoch": 265.5731343283582, "grad_norm": 28.13442611694336, "learning_rate": 9.805272108843538e-06, "loss": 35.7694, "step": 11154 }, { "epoch": 265.5970149253731, "grad_norm": 32.61115264892578, "learning_rate": 9.804421768707483e-06, "loss": 34.7084, "step": 11155 }, { "epoch": 265.6208955223881, "grad_norm": 28.902996063232422, "learning_rate": 9.803571428571428e-06, "loss": 35.6423, "step": 11156 }, { "epoch": 265.644776119403, "grad_norm": 29.57887840270996, "learning_rate": 9.802721088435375e-06, "loss": 35.4606, "step": 11157 }, { "epoch": 265.6686567164179, "grad_norm": 27.093605041503906, "learning_rate": 9.80187074829932e-06, "loss": 35.4729, "step": 11158 }, { "epoch": 265.6925373134328, "grad_norm": 29.557764053344727, "learning_rate": 9.801020408163266e-06, "loss": 34.46, "step": 11159 }, { "epoch": 265.7164179104478, "grad_norm": 26.04657745361328, "learning_rate": 9.800170068027211e-06, "loss": 36.1655, "step": 11160 }, { "epoch": 265.7402985074627, "grad_norm": 34.16873550415039, "learning_rate": 9.799319727891158e-06, "loss": 34.7134, "step": 11161 }, { "epoch": 265.7641791044776, "grad_norm": 31.1507568359375, "learning_rate": 9.798469387755103e-06, "loss": 36.5413, "step": 11162 }, { "epoch": 265.78805970149256, "grad_norm": 28.49561882019043, "learning_rate": 9.797619047619048e-06, "loss": 34.321, "step": 11163 }, { "epoch": 265.81194029850747, "grad_norm": 25.886430740356445, "learning_rate": 9.796768707482993e-06, "loss": 35.7351, "step": 11164 }, { "epoch": 265.8358208955224, "grad_norm": 31.567081451416016, "learning_rate": 9.795918367346939e-06, "loss": 35.9788, "step": 11165 }, { "epoch": 265.85970149253734, "grad_norm": 27.389211654663086, "learning_rate": 9.795068027210886e-06, "loss": 35.588, "step": 11166 }, { "epoch": 265.88358208955225, "grad_norm": 31.836196899414062, "learning_rate": 9.79421768707483e-06, "loss": 33.7717, "step": 11167 }, { "epoch": 265.90746268656716, "grad_norm": 29.477245330810547, "learning_rate": 9.793367346938776e-06, "loss": 36.452, "step": 11168 }, { "epoch": 265.93134328358207, "grad_norm": 27.781171798706055, "learning_rate": 9.792517006802721e-06, "loss": 34.3081, "step": 11169 }, { "epoch": 265.95522388059703, "grad_norm": 27.0958194732666, "learning_rate": 9.791666666666666e-06, "loss": 35.0101, "step": 11170 }, { "epoch": 265.97910447761194, "grad_norm": 29.674766540527344, "learning_rate": 9.790816326530613e-06, "loss": 34.9771, "step": 11171 }, { "epoch": 266.0, "grad_norm": 23.849777221679688, "learning_rate": 9.789965986394558e-06, "loss": 29.2235, "step": 11172 }, { "epoch": 266.0238805970149, "grad_norm": 27.19853401184082, "learning_rate": 9.789115646258505e-06, "loss": 34.1162, "step": 11173 }, { "epoch": 266.0477611940299, "grad_norm": 24.806442260742188, "learning_rate": 9.78826530612245e-06, "loss": 34.4189, "step": 11174 }, { "epoch": 266.0716417910448, "grad_norm": 32.09627914428711, "learning_rate": 9.787414965986394e-06, "loss": 36.0046, "step": 11175 }, { "epoch": 266.0955223880597, "grad_norm": 26.7542667388916, "learning_rate": 9.786564625850341e-06, "loss": 34.3425, "step": 11176 }, { "epoch": 266.1194029850746, "grad_norm": 29.392253875732422, "learning_rate": 9.785714285714286e-06, "loss": 35.3791, "step": 11177 }, { "epoch": 266.14328358208957, "grad_norm": 26.908035278320312, "learning_rate": 9.784863945578233e-06, "loss": 34.5689, "step": 11178 }, { "epoch": 266.1671641791045, "grad_norm": 30.832035064697266, "learning_rate": 9.784013605442178e-06, "loss": 35.0826, "step": 11179 }, { "epoch": 266.1910447761194, "grad_norm": 25.285953521728516, "learning_rate": 9.783163265306123e-06, "loss": 36.279, "step": 11180 }, { "epoch": 266.21492537313435, "grad_norm": 29.02312660217285, "learning_rate": 9.782312925170069e-06, "loss": 34.9367, "step": 11181 }, { "epoch": 266.23880597014926, "grad_norm": 25.359874725341797, "learning_rate": 9.781462585034014e-06, "loss": 35.1541, "step": 11182 }, { "epoch": 266.26268656716417, "grad_norm": 31.250812530517578, "learning_rate": 9.78061224489796e-06, "loss": 34.5048, "step": 11183 }, { "epoch": 266.28656716417913, "grad_norm": 25.30484962463379, "learning_rate": 9.779761904761906e-06, "loss": 34.3538, "step": 11184 }, { "epoch": 266.31044776119404, "grad_norm": 29.979816436767578, "learning_rate": 9.778911564625851e-06, "loss": 36.5315, "step": 11185 }, { "epoch": 266.33432835820895, "grad_norm": 26.533626556396484, "learning_rate": 9.778061224489796e-06, "loss": 36.219, "step": 11186 }, { "epoch": 266.35820895522386, "grad_norm": 28.2723388671875, "learning_rate": 9.777210884353743e-06, "loss": 36.0145, "step": 11187 }, { "epoch": 266.3820895522388, "grad_norm": 25.69159698486328, "learning_rate": 9.776360544217688e-06, "loss": 34.5542, "step": 11188 }, { "epoch": 266.40597014925373, "grad_norm": 30.758480072021484, "learning_rate": 9.775510204081634e-06, "loss": 35.2351, "step": 11189 }, { "epoch": 266.42985074626864, "grad_norm": 28.381084442138672, "learning_rate": 9.774659863945579e-06, "loss": 34.902, "step": 11190 }, { "epoch": 266.4537313432836, "grad_norm": 31.15910530090332, "learning_rate": 9.773809523809524e-06, "loss": 35.0499, "step": 11191 }, { "epoch": 266.4776119402985, "grad_norm": 27.630773544311523, "learning_rate": 9.772959183673471e-06, "loss": 34.4043, "step": 11192 }, { "epoch": 266.5014925373134, "grad_norm": 30.58504867553711, "learning_rate": 9.772108843537416e-06, "loss": 36.0267, "step": 11193 }, { "epoch": 266.52537313432833, "grad_norm": 28.479476928710938, "learning_rate": 9.771258503401361e-06, "loss": 34.5769, "step": 11194 }, { "epoch": 266.5492537313433, "grad_norm": 27.64145851135254, "learning_rate": 9.770408163265307e-06, "loss": 34.2969, "step": 11195 }, { "epoch": 266.5731343283582, "grad_norm": 26.412824630737305, "learning_rate": 9.769557823129252e-06, "loss": 35.1859, "step": 11196 }, { "epoch": 266.5970149253731, "grad_norm": 31.01604461669922, "learning_rate": 9.768707482993199e-06, "loss": 34.4392, "step": 11197 }, { "epoch": 266.6208955223881, "grad_norm": 27.325464248657227, "learning_rate": 9.767857142857144e-06, "loss": 36.2125, "step": 11198 }, { "epoch": 266.644776119403, "grad_norm": 29.46001625061035, "learning_rate": 9.767006802721089e-06, "loss": 34.6586, "step": 11199 }, { "epoch": 266.6686567164179, "grad_norm": 26.013404846191406, "learning_rate": 9.766156462585034e-06, "loss": 36.9322, "step": 11200 }, { "epoch": 266.6925373134328, "grad_norm": 31.534645080566406, "learning_rate": 9.765306122448981e-06, "loss": 35.9709, "step": 11201 }, { "epoch": 266.7164179104478, "grad_norm": 27.22188377380371, "learning_rate": 9.764455782312926e-06, "loss": 34.5973, "step": 11202 }, { "epoch": 266.7402985074627, "grad_norm": 29.313804626464844, "learning_rate": 9.763605442176872e-06, "loss": 36.2248, "step": 11203 }, { "epoch": 266.7641791044776, "grad_norm": 24.632644653320312, "learning_rate": 9.762755102040817e-06, "loss": 35.0668, "step": 11204 }, { "epoch": 266.78805970149256, "grad_norm": 28.769880294799805, "learning_rate": 9.761904761904762e-06, "loss": 35.0193, "step": 11205 }, { "epoch": 266.81194029850747, "grad_norm": 25.255563735961914, "learning_rate": 9.761054421768709e-06, "loss": 35.7949, "step": 11206 }, { "epoch": 266.8358208955224, "grad_norm": 28.24742317199707, "learning_rate": 9.760204081632654e-06, "loss": 34.5309, "step": 11207 }, { "epoch": 266.85970149253734, "grad_norm": 24.634931564331055, "learning_rate": 9.7593537414966e-06, "loss": 34.8172, "step": 11208 }, { "epoch": 266.88358208955225, "grad_norm": 31.981687545776367, "learning_rate": 9.758503401360544e-06, "loss": 33.5913, "step": 11209 }, { "epoch": 266.90746268656716, "grad_norm": 27.23674964904785, "learning_rate": 9.75765306122449e-06, "loss": 34.5798, "step": 11210 }, { "epoch": 266.93134328358207, "grad_norm": 27.958499908447266, "learning_rate": 9.756802721088437e-06, "loss": 36.5225, "step": 11211 }, { "epoch": 266.95522388059703, "grad_norm": 28.824146270751953, "learning_rate": 9.755952380952382e-06, "loss": 34.5472, "step": 11212 }, { "epoch": 266.97910447761194, "grad_norm": NaN, "learning_rate": 9.755102040816327e-06, "loss": 35.4718, "step": 11213 }, { "epoch": 267.0, "grad_norm": 24.083078384399414, "learning_rate": 9.755102040816327e-06, "loss": 30.3082, "step": 11214 }, { "epoch": 267.0238805970149, "grad_norm": 24.65122413635254, "learning_rate": 9.754251700680272e-06, "loss": 35.0222, "step": 11215 }, { "epoch": 267.0477611940299, "grad_norm": 29.230562210083008, "learning_rate": 9.753401360544217e-06, "loss": 36.0623, "step": 11216 }, { "epoch": 267.0716417910448, "grad_norm": NaN, "learning_rate": 9.752551020408164e-06, "loss": 38.9229, "step": 11217 }, { "epoch": 267.0955223880597, "grad_norm": 22.877544403076172, "learning_rate": 9.752551020408164e-06, "loss": 35.772, "step": 11218 }, { "epoch": 267.1194029850746, "grad_norm": 33.116233825683594, "learning_rate": 9.75170068027211e-06, "loss": 35.0121, "step": 11219 }, { "epoch": 267.14328358208957, "grad_norm": 29.101282119750977, "learning_rate": 9.750850340136055e-06, "loss": 34.8035, "step": 11220 }, { "epoch": 267.1671641791045, "grad_norm": 28.55680274963379, "learning_rate": 9.75e-06, "loss": 34.4214, "step": 11221 }, { "epoch": 267.1910447761194, "grad_norm": 28.01104164123535, "learning_rate": 9.749149659863947e-06, "loss": 34.9526, "step": 11222 }, { "epoch": 267.21492537313435, "grad_norm": 26.790014266967773, "learning_rate": 9.748299319727892e-06, "loss": 34.9913, "step": 11223 }, { "epoch": 267.23880597014926, "grad_norm": 21.205453872680664, "learning_rate": 9.747448979591837e-06, "loss": 34.7966, "step": 11224 }, { "epoch": 267.26268656716417, "grad_norm": 30.262779235839844, "learning_rate": 9.746598639455784e-06, "loss": 34.7111, "step": 11225 }, { "epoch": 267.28656716417913, "grad_norm": 23.356658935546875, "learning_rate": 9.745748299319728e-06, "loss": 36.2362, "step": 11226 }, { "epoch": 267.31044776119404, "grad_norm": 32.315345764160156, "learning_rate": 9.744897959183674e-06, "loss": 34.9263, "step": 11227 }, { "epoch": 267.33432835820895, "grad_norm": 30.408292770385742, "learning_rate": 9.74404761904762e-06, "loss": 35.8779, "step": 11228 }, { "epoch": 267.35820895522386, "grad_norm": 22.527212142944336, "learning_rate": 9.743197278911567e-06, "loss": 34.943, "step": 11229 }, { "epoch": 267.3820895522388, "grad_norm": 24.883535385131836, "learning_rate": 9.742346938775512e-06, "loss": 34.7995, "step": 11230 }, { "epoch": 267.40597014925373, "grad_norm": 26.914213180541992, "learning_rate": 9.741496598639457e-06, "loss": 36.486, "step": 11231 }, { "epoch": 267.42985074626864, "grad_norm": 20.010807037353516, "learning_rate": 9.740646258503402e-06, "loss": 34.7843, "step": 11232 }, { "epoch": 267.4537313432836, "grad_norm": 33.46001434326172, "learning_rate": 9.739795918367347e-06, "loss": 35.9539, "step": 11233 }, { "epoch": 267.4776119402985, "grad_norm": 26.157451629638672, "learning_rate": 9.738945578231294e-06, "loss": 35.0678, "step": 11234 }, { "epoch": 267.5014925373134, "grad_norm": 35.644073486328125, "learning_rate": 9.73809523809524e-06, "loss": 35.87, "step": 11235 }, { "epoch": 267.52537313432833, "grad_norm": 31.433055877685547, "learning_rate": 9.737244897959185e-06, "loss": 34.5191, "step": 11236 }, { "epoch": 267.5492537313433, "grad_norm": 24.154205322265625, "learning_rate": 9.73639455782313e-06, "loss": 35.0339, "step": 11237 }, { "epoch": 267.5731343283582, "grad_norm": 23.054718017578125, "learning_rate": 9.735544217687075e-06, "loss": 35.1328, "step": 11238 }, { "epoch": 267.5970149253731, "grad_norm": 29.09781265258789, "learning_rate": 9.734693877551022e-06, "loss": 35.2278, "step": 11239 }, { "epoch": 267.6208955223881, "grad_norm": 19.367177963256836, "learning_rate": 9.733843537414967e-06, "loss": 34.4716, "step": 11240 }, { "epoch": 267.644776119403, "grad_norm": 32.98915481567383, "learning_rate": 9.732993197278912e-06, "loss": 34.7553, "step": 11241 }, { "epoch": 267.6686567164179, "grad_norm": 29.198795318603516, "learning_rate": 9.732142857142858e-06, "loss": 35.211, "step": 11242 }, { "epoch": 267.6925373134328, "grad_norm": 29.72975730895996, "learning_rate": 9.731292517006804e-06, "loss": 34.3863, "step": 11243 }, { "epoch": 267.7164179104478, "grad_norm": 24.961580276489258, "learning_rate": 9.73044217687075e-06, "loss": 34.9152, "step": 11244 }, { "epoch": 267.7402985074627, "grad_norm": 28.849666595458984, "learning_rate": 9.729591836734695e-06, "loss": 35.3199, "step": 11245 }, { "epoch": 267.7641791044776, "grad_norm": 24.429973602294922, "learning_rate": 9.72874149659864e-06, "loss": 35.2488, "step": 11246 }, { "epoch": 267.78805970149256, "grad_norm": 32.53376007080078, "learning_rate": 9.727891156462585e-06, "loss": 34.7199, "step": 11247 }, { "epoch": 267.81194029850747, "grad_norm": 25.328937530517578, "learning_rate": 9.727040816326532e-06, "loss": 34.8706, "step": 11248 }, { "epoch": 267.8358208955224, "grad_norm": 25.075822830200195, "learning_rate": 9.726190476190477e-06, "loss": 34.7175, "step": 11249 }, { "epoch": 267.85970149253734, "grad_norm": 23.769784927368164, "learning_rate": 9.725340136054422e-06, "loss": 35.775, "step": 11250 }, { "epoch": 267.88358208955225, "grad_norm": 25.97585105895996, "learning_rate": 9.724489795918368e-06, "loss": 34.1371, "step": 11251 }, { "epoch": 267.90746268656716, "grad_norm": 22.495651245117188, "learning_rate": 9.723639455782313e-06, "loss": 35.7473, "step": 11252 }, { "epoch": 267.93134328358207, "grad_norm": 25.758033752441406, "learning_rate": 9.72278911564626e-06, "loss": 35.7567, "step": 11253 }, { "epoch": 267.95522388059703, "grad_norm": NaN, "learning_rate": 9.721938775510205e-06, "loss": 48.6902, "step": 11254 }, { "epoch": 267.97910447761194, "grad_norm": 21.118587493896484, "learning_rate": 9.721938775510205e-06, "loss": 34.6988, "step": 11255 }, { "epoch": 268.0, "grad_norm": 23.438156127929688, "learning_rate": 9.72108843537415e-06, "loss": 31.1245, "step": 11256 }, { "epoch": 268.0238805970149, "grad_norm": 22.095630645751953, "learning_rate": 9.720238095238095e-06, "loss": 35.0346, "step": 11257 }, { "epoch": 268.0477611940299, "grad_norm": 19.394258499145508, "learning_rate": 9.719387755102042e-06, "loss": 35.3207, "step": 11258 }, { "epoch": 268.0716417910448, "grad_norm": 20.894695281982422, "learning_rate": 9.718537414965987e-06, "loss": 34.0132, "step": 11259 }, { "epoch": 268.0955223880597, "grad_norm": 19.614351272583008, "learning_rate": 9.717687074829933e-06, "loss": 34.6297, "step": 11260 }, { "epoch": 268.1194029850746, "grad_norm": 24.058088302612305, "learning_rate": 9.716836734693878e-06, "loss": 35.1796, "step": 11261 }, { "epoch": 268.14328358208957, "grad_norm": 17.5927734375, "learning_rate": 9.715986394557823e-06, "loss": 35.7046, "step": 11262 }, { "epoch": 268.1671641791045, "grad_norm": 21.853776931762695, "learning_rate": 9.71513605442177e-06, "loss": 35.5309, "step": 11263 }, { "epoch": 268.1910447761194, "grad_norm": 17.455530166625977, "learning_rate": 9.714285714285715e-06, "loss": 34.8216, "step": 11264 }, { "epoch": 268.21492537313435, "grad_norm": 18.30791664123535, "learning_rate": 9.71343537414966e-06, "loss": 35.6202, "step": 11265 }, { "epoch": 268.23880597014926, "grad_norm": 20.19818115234375, "learning_rate": 9.712585034013606e-06, "loss": 34.8912, "step": 11266 }, { "epoch": 268.26268656716417, "grad_norm": 19.09391975402832, "learning_rate": 9.71173469387755e-06, "loss": 35.8681, "step": 11267 }, { "epoch": 268.28656716417913, "grad_norm": 15.085587501525879, "learning_rate": 9.710884353741498e-06, "loss": 35.7501, "step": 11268 }, { "epoch": 268.31044776119404, "grad_norm": 24.19599151611328, "learning_rate": 9.710034013605443e-06, "loss": 33.5056, "step": 11269 }, { "epoch": 268.33432835820895, "grad_norm": 17.074016571044922, "learning_rate": 9.70918367346939e-06, "loss": 35.6705, "step": 11270 }, { "epoch": 268.35820895522386, "grad_norm": 20.87940216064453, "learning_rate": 9.708333333333333e-06, "loss": 36.1954, "step": 11271 }, { "epoch": 268.3820895522388, "grad_norm": 22.110916137695312, "learning_rate": 9.707482993197278e-06, "loss": 33.2204, "step": 11272 }, { "epoch": 268.40597014925373, "grad_norm": 18.115238189697266, "learning_rate": 9.706632653061225e-06, "loss": 37.2117, "step": 11273 }, { "epoch": 268.42985074626864, "grad_norm": 31.70871353149414, "learning_rate": 9.70578231292517e-06, "loss": 35.9672, "step": 11274 }, { "epoch": 268.4537313432836, "grad_norm": 24.8507022857666, "learning_rate": 9.704931972789117e-06, "loss": 35.1141, "step": 11275 }, { "epoch": 268.4776119402985, "grad_norm": 27.8154354095459, "learning_rate": 9.704081632653061e-06, "loss": 34.846, "step": 11276 }, { "epoch": 268.5014925373134, "grad_norm": 24.144790649414062, "learning_rate": 9.703231292517008e-06, "loss": 36.6943, "step": 11277 }, { "epoch": 268.52537313432833, "grad_norm": 25.23328399658203, "learning_rate": 9.702380952380953e-06, "loss": 34.1247, "step": 11278 }, { "epoch": 268.5492537313433, "grad_norm": 19.879201889038086, "learning_rate": 9.701530612244898e-06, "loss": 35.1627, "step": 11279 }, { "epoch": 268.5731343283582, "grad_norm": 22.306997299194336, "learning_rate": 9.700680272108845e-06, "loss": 33.6694, "step": 11280 }, { "epoch": 268.5970149253731, "grad_norm": 18.249324798583984, "learning_rate": 9.69982993197279e-06, "loss": 34.9906, "step": 11281 }, { "epoch": 268.6208955223881, "grad_norm": 21.096303939819336, "learning_rate": 9.698979591836736e-06, "loss": 35.7008, "step": 11282 }, { "epoch": 268.644776119403, "grad_norm": 17.10426902770996, "learning_rate": 9.69812925170068e-06, "loss": 35.0684, "step": 11283 }, { "epoch": 268.6686567164179, "grad_norm": 21.25813102722168, "learning_rate": 9.697278911564628e-06, "loss": 34.8057, "step": 11284 }, { "epoch": 268.6925373134328, "grad_norm": 16.934974670410156, "learning_rate": 9.696428571428573e-06, "loss": 34.9907, "step": 11285 }, { "epoch": 268.7164179104478, "grad_norm": 18.4528751373291, "learning_rate": 9.695578231292518e-06, "loss": 35.7507, "step": 11286 }, { "epoch": 268.7402985074627, "grad_norm": 17.586544036865234, "learning_rate": 9.694727891156463e-06, "loss": 34.9906, "step": 11287 }, { "epoch": 268.7641791044776, "grad_norm": 17.558521270751953, "learning_rate": 9.693877551020408e-06, "loss": 36.1545, "step": 11288 }, { "epoch": 268.78805970149256, "grad_norm": 14.969913482666016, "learning_rate": 9.693027210884355e-06, "loss": 35.1573, "step": 11289 }, { "epoch": 268.81194029850747, "grad_norm": 16.84869384765625, "learning_rate": 9.6921768707483e-06, "loss": 32.984, "step": 11290 }, { "epoch": 268.8358208955224, "grad_norm": 18.702037811279297, "learning_rate": 9.691326530612246e-06, "loss": 35.1227, "step": 11291 }, { "epoch": 268.85970149253734, "grad_norm": 16.950538635253906, "learning_rate": 9.690476190476191e-06, "loss": 34.2167, "step": 11292 }, { "epoch": 268.88358208955225, "grad_norm": 16.66674041748047, "learning_rate": 9.689625850340136e-06, "loss": 35.0838, "step": 11293 }, { "epoch": 268.90746268656716, "grad_norm": 22.63540267944336, "learning_rate": 9.688775510204083e-06, "loss": 35.0731, "step": 11294 }, { "epoch": 268.93134328358207, "grad_norm": 18.76118278503418, "learning_rate": 9.687925170068028e-06, "loss": 36.0074, "step": 11295 }, { "epoch": 268.95522388059703, "grad_norm": 19.704389572143555, "learning_rate": 9.687074829931973e-06, "loss": 35.7865, "step": 11296 }, { "epoch": 268.97910447761194, "grad_norm": 23.83836555480957, "learning_rate": 9.686224489795919e-06, "loss": 34.5027, "step": 11297 }, { "epoch": 269.0, "grad_norm": 15.99374771118164, "learning_rate": 9.685374149659866e-06, "loss": 30.7481, "step": 11298 }, { "epoch": 269.0238805970149, "grad_norm": 17.470727920532227, "learning_rate": 9.68452380952381e-06, "loss": 34.7472, "step": 11299 }, { "epoch": 269.0477611940299, "grad_norm": 24.246135711669922, "learning_rate": 9.683673469387756e-06, "loss": 35.3944, "step": 11300 }, { "epoch": 269.0716417910448, "grad_norm": 14.26496696472168, "learning_rate": 9.682823129251701e-06, "loss": 34.7151, "step": 11301 }, { "epoch": 269.0955223880597, "grad_norm": 25.587913513183594, "learning_rate": 9.681972789115646e-06, "loss": 34.9974, "step": 11302 }, { "epoch": 269.1194029850746, "grad_norm": 19.023223876953125, "learning_rate": 9.681122448979593e-06, "loss": 35.0883, "step": 11303 }, { "epoch": 269.14328358208957, "grad_norm": 20.049644470214844, "learning_rate": 9.680272108843538e-06, "loss": 34.819, "step": 11304 }, { "epoch": 269.1671641791045, "grad_norm": 19.549583435058594, "learning_rate": 9.679421768707484e-06, "loss": 34.5219, "step": 11305 }, { "epoch": 269.1910447761194, "grad_norm": 18.334081649780273, "learning_rate": 9.678571428571429e-06, "loss": 34.4315, "step": 11306 }, { "epoch": 269.21492537313435, "grad_norm": 22.47246742248535, "learning_rate": 9.677721088435374e-06, "loss": 34.5326, "step": 11307 }, { "epoch": 269.23880597014926, "grad_norm": 19.489429473876953, "learning_rate": 9.676870748299321e-06, "loss": 33.7929, "step": 11308 }, { "epoch": 269.26268656716417, "grad_norm": 18.83839988708496, "learning_rate": 9.676020408163266e-06, "loss": 34.0231, "step": 11309 }, { "epoch": 269.28656716417913, "grad_norm": 23.910572052001953, "learning_rate": 9.675170068027211e-06, "loss": 34.8476, "step": 11310 }, { "epoch": 269.31044776119404, "grad_norm": 17.442350387573242, "learning_rate": 9.674319727891157e-06, "loss": 35.3672, "step": 11311 }, { "epoch": 269.33432835820895, "grad_norm": 20.737442016601562, "learning_rate": 9.673469387755103e-06, "loss": 35.1205, "step": 11312 }, { "epoch": 269.35820895522386, "grad_norm": 21.674774169921875, "learning_rate": 9.672619047619049e-06, "loss": 34.7912, "step": 11313 }, { "epoch": 269.3820895522388, "grad_norm": 16.29989242553711, "learning_rate": 9.671768707482994e-06, "loss": 34.8348, "step": 11314 }, { "epoch": 269.40597014925373, "grad_norm": 20.943859100341797, "learning_rate": 9.670918367346939e-06, "loss": 34.3113, "step": 11315 }, { "epoch": 269.42985074626864, "grad_norm": 19.311450958251953, "learning_rate": 9.670068027210884e-06, "loss": 34.4545, "step": 11316 }, { "epoch": 269.4537313432836, "grad_norm": 18.16386604309082, "learning_rate": 9.669217687074831e-06, "loss": 35.0409, "step": 11317 }, { "epoch": 269.4776119402985, "grad_norm": 22.301490783691406, "learning_rate": 9.668367346938776e-06, "loss": 36.0439, "step": 11318 }, { "epoch": 269.5014925373134, "grad_norm": 18.263423919677734, "learning_rate": 9.667517006802723e-06, "loss": 34.1385, "step": 11319 }, { "epoch": 269.52537313432833, "grad_norm": 16.526214599609375, "learning_rate": 9.666666666666667e-06, "loss": 35.1601, "step": 11320 }, { "epoch": 269.5492537313433, "grad_norm": 26.40668296813965, "learning_rate": 9.665816326530612e-06, "loss": 34.8927, "step": 11321 }, { "epoch": 269.5731343283582, "grad_norm": 16.773963928222656, "learning_rate": 9.664965986394559e-06, "loss": 35.1632, "step": 11322 }, { "epoch": 269.5970149253731, "grad_norm": 23.497394561767578, "learning_rate": 9.664115646258504e-06, "loss": 35.0348, "step": 11323 }, { "epoch": 269.6208955223881, "grad_norm": 18.24677085876465, "learning_rate": 9.663265306122451e-06, "loss": 34.9783, "step": 11324 }, { "epoch": 269.644776119403, "grad_norm": 18.07054901123047, "learning_rate": 9.662414965986396e-06, "loss": 35.1278, "step": 11325 }, { "epoch": 269.6686567164179, "grad_norm": 22.57805061340332, "learning_rate": 9.66156462585034e-06, "loss": 35.4269, "step": 11326 }, { "epoch": 269.6925373134328, "grad_norm": 16.485912322998047, "learning_rate": 9.660714285714287e-06, "loss": 36.1043, "step": 11327 }, { "epoch": 269.7164179104478, "grad_norm": 19.971275329589844, "learning_rate": 9.659863945578232e-06, "loss": 35.3867, "step": 11328 }, { "epoch": 269.7402985074627, "grad_norm": 22.916345596313477, "learning_rate": 9.659013605442179e-06, "loss": 36.8476, "step": 11329 }, { "epoch": 269.7641791044776, "grad_norm": 15.270123481750488, "learning_rate": 9.658163265306124e-06, "loss": 35.4615, "step": 11330 }, { "epoch": 269.78805970149256, "grad_norm": 30.578062057495117, "learning_rate": 9.657312925170069e-06, "loss": 36.31, "step": 11331 }, { "epoch": 269.81194029850747, "grad_norm": 20.64407730102539, "learning_rate": 9.656462585034014e-06, "loss": 35.6959, "step": 11332 }, { "epoch": 269.8358208955224, "grad_norm": 28.821992874145508, "learning_rate": 9.65561224489796e-06, "loss": 36.4402, "step": 11333 }, { "epoch": 269.85970149253734, "grad_norm": 22.246870040893555, "learning_rate": 9.654761904761906e-06, "loss": 35.2498, "step": 11334 }, { "epoch": 269.88358208955225, "grad_norm": 24.748897552490234, "learning_rate": 9.653911564625852e-06, "loss": 35.5736, "step": 11335 }, { "epoch": 269.90746268656716, "grad_norm": 24.36867332458496, "learning_rate": 9.653061224489797e-06, "loss": 35.4285, "step": 11336 }, { "epoch": 269.93134328358207, "grad_norm": 18.55751609802246, "learning_rate": 9.652210884353742e-06, "loss": 35.6872, "step": 11337 }, { "epoch": 269.95522388059703, "grad_norm": 29.225358963012695, "learning_rate": 9.651360544217689e-06, "loss": 34.2868, "step": 11338 }, { "epoch": 269.97910447761194, "grad_norm": 21.866971969604492, "learning_rate": 9.650510204081634e-06, "loss": 34.3606, "step": 11339 }, { "epoch": 270.0, "grad_norm": 24.2423038482666, "learning_rate": 9.64965986394558e-06, "loss": 30.7976, "step": 11340 }, { "epoch": 270.0238805970149, "grad_norm": 23.909801483154297, "learning_rate": 9.648809523809524e-06, "loss": 34.9145, "step": 11341 }, { "epoch": 270.0477611940299, "grad_norm": 31.938671112060547, "learning_rate": 9.64795918367347e-06, "loss": 36.1431, "step": 11342 }, { "epoch": 270.0716417910448, "grad_norm": 20.3516845703125, "learning_rate": 9.647108843537416e-06, "loss": 35.3669, "step": 11343 }, { "epoch": 270.0955223880597, "grad_norm": 35.39886474609375, "learning_rate": 9.646258503401362e-06, "loss": 35.0848, "step": 11344 }, { "epoch": 270.1194029850746, "grad_norm": 28.242713928222656, "learning_rate": 9.645408163265307e-06, "loss": 34.0073, "step": 11345 }, { "epoch": 270.14328358208957, "grad_norm": 35.218013763427734, "learning_rate": 9.644557823129252e-06, "loss": 35.9957, "step": 11346 }, { "epoch": 270.1671641791045, "grad_norm": 30.707536697387695, "learning_rate": 9.643707482993197e-06, "loss": 34.4482, "step": 11347 }, { "epoch": 270.1910447761194, "grad_norm": 24.374370574951172, "learning_rate": 9.642857142857144e-06, "loss": 33.9393, "step": 11348 }, { "epoch": 270.21492537313435, "grad_norm": 25.397491455078125, "learning_rate": 9.64200680272109e-06, "loss": 35.2775, "step": 11349 }, { "epoch": 270.23880597014926, "grad_norm": 31.241239547729492, "learning_rate": 9.641156462585035e-06, "loss": 34.6514, "step": 11350 }, { "epoch": 270.26268656716417, "grad_norm": 23.18448257446289, "learning_rate": 9.64030612244898e-06, "loss": 35.7867, "step": 11351 }, { "epoch": 270.28656716417913, "grad_norm": 33.51908493041992, "learning_rate": 9.639455782312927e-06, "loss": 35.6989, "step": 11352 }, { "epoch": 270.31044776119404, "grad_norm": 25.233726501464844, "learning_rate": 9.638605442176872e-06, "loss": 34.5514, "step": 11353 }, { "epoch": 270.33432835820895, "grad_norm": 32.36314392089844, "learning_rate": 9.637755102040817e-06, "loss": 33.9941, "step": 11354 }, { "epoch": 270.35820895522386, "grad_norm": 27.87277603149414, "learning_rate": 9.636904761904762e-06, "loss": 34.882, "step": 11355 }, { "epoch": 270.3820895522388, "grad_norm": 29.331741333007812, "learning_rate": 9.636054421768707e-06, "loss": 34.7327, "step": 11356 }, { "epoch": 270.40597014925373, "grad_norm": 25.99250602722168, "learning_rate": 9.635204081632654e-06, "loss": 35.8672, "step": 11357 }, { "epoch": 270.42985074626864, "grad_norm": 27.950302124023438, "learning_rate": 9.6343537414966e-06, "loss": 34.026, "step": 11358 }, { "epoch": 270.4537313432836, "grad_norm": 25.09626579284668, "learning_rate": 9.633503401360545e-06, "loss": 35.441, "step": 11359 }, { "epoch": 270.4776119402985, "grad_norm": 33.31979751586914, "learning_rate": 9.63265306122449e-06, "loss": 35.6768, "step": 11360 }, { "epoch": 270.5014925373134, "grad_norm": 28.50912094116211, "learning_rate": 9.631802721088435e-06, "loss": 34.9504, "step": 11361 }, { "epoch": 270.52537313432833, "grad_norm": 30.334747314453125, "learning_rate": 9.630952380952382e-06, "loss": 34.6947, "step": 11362 }, { "epoch": 270.5492537313433, "grad_norm": 26.71424674987793, "learning_rate": 9.630102040816327e-06, "loss": 34.0974, "step": 11363 }, { "epoch": 270.5731343283582, "grad_norm": 28.093780517578125, "learning_rate": 9.629251700680272e-06, "loss": 34.1479, "step": 11364 }, { "epoch": 270.5970149253731, "grad_norm": 26.249269485473633, "learning_rate": 9.628401360544218e-06, "loss": 35.649, "step": 11365 }, { "epoch": 270.6208955223881, "grad_norm": 31.712121963500977, "learning_rate": 9.627551020408165e-06, "loss": 35.2305, "step": 11366 }, { "epoch": 270.644776119403, "grad_norm": 28.87372589111328, "learning_rate": 9.62670068027211e-06, "loss": 34.4322, "step": 11367 }, { "epoch": 270.6686567164179, "grad_norm": 31.236000061035156, "learning_rate": 9.625850340136055e-06, "loss": 35.8472, "step": 11368 }, { "epoch": 270.6925373134328, "grad_norm": 27.504440307617188, "learning_rate": 9.625e-06, "loss": 34.3415, "step": 11369 }, { "epoch": 270.7164179104478, "grad_norm": 28.205368041992188, "learning_rate": 9.624149659863945e-06, "loss": 35.1829, "step": 11370 }, { "epoch": 270.7402985074627, "grad_norm": 25.047800064086914, "learning_rate": 9.623299319727892e-06, "loss": 35.0551, "step": 11371 }, { "epoch": 270.7641791044776, "grad_norm": 31.453554153442383, "learning_rate": 9.622448979591837e-06, "loss": 35.1136, "step": 11372 }, { "epoch": 270.78805970149256, "grad_norm": 29.390029907226562, "learning_rate": 9.621598639455784e-06, "loss": 34.5881, "step": 11373 }, { "epoch": 270.81194029850747, "grad_norm": 30.30431365966797, "learning_rate": 9.62074829931973e-06, "loss": 35.6625, "step": 11374 }, { "epoch": 270.8358208955224, "grad_norm": 27.459156036376953, "learning_rate": 9.619897959183673e-06, "loss": 35.4493, "step": 11375 }, { "epoch": 270.85970149253734, "grad_norm": 29.01818084716797, "learning_rate": 9.61904761904762e-06, "loss": 35.478, "step": 11376 }, { "epoch": 270.88358208955225, "grad_norm": 25.12757682800293, "learning_rate": 9.618197278911565e-06, "loss": 34.8158, "step": 11377 }, { "epoch": 270.90746268656716, "grad_norm": 27.613876342773438, "learning_rate": 9.617346938775512e-06, "loss": 35.2491, "step": 11378 }, { "epoch": 270.93134328358207, "grad_norm": 26.43328857421875, "learning_rate": 9.616496598639457e-06, "loss": 34.1624, "step": 11379 }, { "epoch": 270.95522388059703, "grad_norm": 33.36186981201172, "learning_rate": 9.6156462585034e-06, "loss": 35.9641, "step": 11380 }, { "epoch": 270.97910447761194, "grad_norm": 26.321887969970703, "learning_rate": 9.614795918367348e-06, "loss": 35.3548, "step": 11381 }, { "epoch": 271.0, "grad_norm": 26.828222274780273, "learning_rate": 9.613945578231293e-06, "loss": 30.8285, "step": 11382 }, { "epoch": 271.0238805970149, "grad_norm": 30.941560745239258, "learning_rate": 9.61309523809524e-06, "loss": 35.4498, "step": 11383 }, { "epoch": 271.0477611940299, "grad_norm": 28.217666625976562, "learning_rate": 9.612244897959185e-06, "loss": 34.1935, "step": 11384 }, { "epoch": 271.0716417910448, "grad_norm": 28.074115753173828, "learning_rate": 9.61139455782313e-06, "loss": 33.9021, "step": 11385 }, { "epoch": 271.0955223880597, "grad_norm": 27.322423934936523, "learning_rate": 9.610544217687075e-06, "loss": 33.9694, "step": 11386 }, { "epoch": 271.1194029850746, "grad_norm": 21.89845085144043, "learning_rate": 9.60969387755102e-06, "loss": 35.0677, "step": 11387 }, { "epoch": 271.14328358208957, "grad_norm": 31.446561813354492, "learning_rate": 9.608843537414967e-06, "loss": 34.8357, "step": 11388 }, { "epoch": 271.1671641791045, "grad_norm": 26.480789184570312, "learning_rate": 9.607993197278913e-06, "loss": 34.9295, "step": 11389 }, { "epoch": 271.1910447761194, "grad_norm": 28.349945068359375, "learning_rate": 9.607142857142858e-06, "loss": 35.8994, "step": 11390 }, { "epoch": 271.21492537313435, "grad_norm": 28.527359008789062, "learning_rate": 9.606292517006803e-06, "loss": 35.036, "step": 11391 }, { "epoch": 271.23880597014926, "grad_norm": 26.5610408782959, "learning_rate": 9.60544217687075e-06, "loss": 35.1133, "step": 11392 }, { "epoch": 271.26268656716417, "grad_norm": 22.178897857666016, "learning_rate": 9.604591836734695e-06, "loss": 34.6387, "step": 11393 }, { "epoch": 271.28656716417913, "grad_norm": 29.09298324584961, "learning_rate": 9.60374149659864e-06, "loss": 35.7886, "step": 11394 }, { "epoch": 271.31044776119404, "grad_norm": 21.043861389160156, "learning_rate": 9.602891156462586e-06, "loss": 34.2892, "step": 11395 }, { "epoch": 271.33432835820895, "grad_norm": 37.17613220214844, "learning_rate": 9.60204081632653e-06, "loss": 36.2052, "step": 11396 }, { "epoch": 271.35820895522386, "grad_norm": 33.103919982910156, "learning_rate": 9.601190476190478e-06, "loss": 34.1138, "step": 11397 }, { "epoch": 271.3820895522388, "grad_norm": 28.71168327331543, "learning_rate": 9.600340136054423e-06, "loss": 33.737, "step": 11398 }, { "epoch": 271.40597014925373, "grad_norm": 27.4705810546875, "learning_rate": 9.599489795918368e-06, "loss": 35.4473, "step": 11399 }, { "epoch": 271.42985074626864, "grad_norm": 25.91693878173828, "learning_rate": 9.598639455782313e-06, "loss": 34.5243, "step": 11400 }, { "epoch": 271.4537313432836, "grad_norm": 20.86406707763672, "learning_rate": 9.597789115646258e-06, "loss": 35.2066, "step": 11401 }, { "epoch": 271.4776119402985, "grad_norm": 32.478431701660156, "learning_rate": 9.596938775510205e-06, "loss": 33.305, "step": 11402 }, { "epoch": 271.5014925373134, "grad_norm": 26.85079002380371, "learning_rate": 9.59608843537415e-06, "loss": 35.2552, "step": 11403 }, { "epoch": 271.52537313432833, "grad_norm": 29.79864501953125, "learning_rate": 9.595238095238096e-06, "loss": 35.222, "step": 11404 }, { "epoch": 271.5492537313433, "grad_norm": 26.909713745117188, "learning_rate": 9.594387755102041e-06, "loss": 34.6982, "step": 11405 }, { "epoch": 271.5731343283582, "grad_norm": 26.60768699645996, "learning_rate": 9.593537414965988e-06, "loss": 34.5211, "step": 11406 }, { "epoch": 271.5970149253731, "grad_norm": 23.74742889404297, "learning_rate": 9.592687074829933e-06, "loss": 36.3786, "step": 11407 }, { "epoch": 271.6208955223881, "grad_norm": 30.495195388793945, "learning_rate": 9.591836734693878e-06, "loss": 35.8866, "step": 11408 }, { "epoch": 271.644776119403, "grad_norm": 24.703819274902344, "learning_rate": 9.590986394557823e-06, "loss": 35.5444, "step": 11409 }, { "epoch": 271.6686567164179, "grad_norm": 32.43016052246094, "learning_rate": 9.590136054421769e-06, "loss": 34.7922, "step": 11410 }, { "epoch": 271.6925373134328, "grad_norm": 28.583765029907227, "learning_rate": 9.589285714285716e-06, "loss": 35.8122, "step": 11411 }, { "epoch": 271.7164179104478, "grad_norm": 23.935251235961914, "learning_rate": 9.58843537414966e-06, "loss": 35.6513, "step": 11412 }, { "epoch": 271.7402985074627, "grad_norm": 20.8134822845459, "learning_rate": 9.587585034013606e-06, "loss": 35.5131, "step": 11413 }, { "epoch": 271.7641791044776, "grad_norm": 29.37078857421875, "learning_rate": 9.586734693877551e-06, "loss": 34.3373, "step": 11414 }, { "epoch": 271.78805970149256, "grad_norm": 21.80526351928711, "learning_rate": 9.585884353741496e-06, "loss": 36.0305, "step": 11415 }, { "epoch": 271.81194029850747, "grad_norm": 33.87484359741211, "learning_rate": 9.585034013605443e-06, "loss": 34.1205, "step": 11416 }, { "epoch": 271.8358208955224, "grad_norm": 28.685937881469727, "learning_rate": 9.584183673469388e-06, "loss": 35.9275, "step": 11417 }, { "epoch": 271.85970149253734, "grad_norm": 23.839086532592773, "learning_rate": 9.583333333333335e-06, "loss": 33.1174, "step": 11418 }, { "epoch": 271.88358208955225, "grad_norm": 24.347623825073242, "learning_rate": 9.582482993197279e-06, "loss": 35.3959, "step": 11419 }, { "epoch": 271.90746268656716, "grad_norm": 24.888187408447266, "learning_rate": 9.581632653061226e-06, "loss": 34.5073, "step": 11420 }, { "epoch": 271.93134328358207, "grad_norm": 19.496639251708984, "learning_rate": 9.580782312925171e-06, "loss": 35.7947, "step": 11421 }, { "epoch": 271.95522388059703, "grad_norm": 27.817222595214844, "learning_rate": 9.579931972789116e-06, "loss": 35.4779, "step": 11422 }, { "epoch": 271.97910447761194, "grad_norm": 20.708219528198242, "learning_rate": 9.579081632653063e-06, "loss": 35.4572, "step": 11423 }, { "epoch": 272.0, "grad_norm": 29.297521591186523, "learning_rate": 9.578231292517007e-06, "loss": 30.6278, "step": 11424 }, { "epoch": 272.0238805970149, "grad_norm": 29.625965118408203, "learning_rate": 9.577380952380953e-06, "loss": 35.0764, "step": 11425 }, { "epoch": 272.0477611940299, "grad_norm": 25.00616455078125, "learning_rate": 9.576530612244899e-06, "loss": 34.7385, "step": 11426 }, { "epoch": 272.0716417910448, "grad_norm": 24.672815322875977, "learning_rate": 9.575680272108844e-06, "loss": 34.5977, "step": 11427 }, { "epoch": 272.0955223880597, "grad_norm": 26.59151268005371, "learning_rate": 9.57482993197279e-06, "loss": 34.7266, "step": 11428 }, { "epoch": 272.1194029850746, "grad_norm": 20.41710662841797, "learning_rate": 9.573979591836736e-06, "loss": 34.9463, "step": 11429 }, { "epoch": 272.14328358208957, "grad_norm": 29.853113174438477, "learning_rate": 9.573129251700681e-06, "loss": 35.3496, "step": 11430 }, { "epoch": 272.1671641791045, "grad_norm": 24.686464309692383, "learning_rate": 9.572278911564626e-06, "loss": 35.0309, "step": 11431 }, { "epoch": 272.1910447761194, "grad_norm": 27.58829689025879, "learning_rate": 9.571428571428573e-06, "loss": 34.8087, "step": 11432 }, { "epoch": 272.21492537313435, "grad_norm": 26.95330810546875, "learning_rate": 9.570578231292518e-06, "loss": 35.4672, "step": 11433 }, { "epoch": 272.23880597014926, "grad_norm": 20.95945167541504, "learning_rate": 9.569727891156464e-06, "loss": 34.7029, "step": 11434 }, { "epoch": 272.26268656716417, "grad_norm": 24.166494369506836, "learning_rate": 9.568877551020409e-06, "loss": 35.6845, "step": 11435 }, { "epoch": 272.28656716417913, "grad_norm": 23.78201675415039, "learning_rate": 9.568027210884354e-06, "loss": 35.3705, "step": 11436 }, { "epoch": 272.31044776119404, "grad_norm": 20.939838409423828, "learning_rate": 9.567176870748301e-06, "loss": 34.8826, "step": 11437 }, { "epoch": 272.33432835820895, "grad_norm": 24.9542236328125, "learning_rate": 9.566326530612246e-06, "loss": 34.1938, "step": 11438 }, { "epoch": 272.35820895522386, "grad_norm": 21.42974090576172, "learning_rate": 9.565476190476191e-06, "loss": 36.1493, "step": 11439 }, { "epoch": 272.3820895522388, "grad_norm": 27.042057037353516, "learning_rate": 9.564625850340137e-06, "loss": 34.8768, "step": 11440 }, { "epoch": 272.40597014925373, "grad_norm": 20.202251434326172, "learning_rate": 9.563775510204082e-06, "loss": 34.2451, "step": 11441 }, { "epoch": 272.42985074626864, "grad_norm": 23.43889045715332, "learning_rate": 9.562925170068029e-06, "loss": 35.0835, "step": 11442 }, { "epoch": 272.4537313432836, "grad_norm": 22.16297721862793, "learning_rate": 9.562074829931974e-06, "loss": 34.7922, "step": 11443 }, { "epoch": 272.4776119402985, "grad_norm": 25.513879776000977, "learning_rate": 9.561224489795919e-06, "loss": 35.4338, "step": 11444 }, { "epoch": 272.5014925373134, "grad_norm": 21.959760665893555, "learning_rate": 9.560374149659864e-06, "loss": 35.7945, "step": 11445 }, { "epoch": 272.52537313432833, "grad_norm": 25.527957916259766, "learning_rate": 9.559523809523811e-06, "loss": 34.7416, "step": 11446 }, { "epoch": 272.5492537313433, "grad_norm": 21.50974464416504, "learning_rate": 9.558673469387756e-06, "loss": 34.3238, "step": 11447 }, { "epoch": 272.5731343283582, "grad_norm": 22.119808197021484, "learning_rate": 9.557823129251701e-06, "loss": 35.0855, "step": 11448 }, { "epoch": 272.5970149253731, "grad_norm": 19.815471649169922, "learning_rate": 9.556972789115647e-06, "loss": 34.5338, "step": 11449 }, { "epoch": 272.6208955223881, "grad_norm": 23.734107971191406, "learning_rate": 9.556122448979592e-06, "loss": 35.5892, "step": 11450 }, { "epoch": 272.644776119403, "grad_norm": 18.64801788330078, "learning_rate": 9.555272108843539e-06, "loss": 35.9366, "step": 11451 }, { "epoch": 272.6686567164179, "grad_norm": 26.981422424316406, "learning_rate": 9.554421768707484e-06, "loss": 35.514, "step": 11452 }, { "epoch": 272.6925373134328, "grad_norm": 21.583669662475586, "learning_rate": 9.55357142857143e-06, "loss": 35.1847, "step": 11453 }, { "epoch": 272.7164179104478, "grad_norm": 20.58284568786621, "learning_rate": 9.552721088435374e-06, "loss": 35.5298, "step": 11454 }, { "epoch": 272.7402985074627, "grad_norm": 18.506633758544922, "learning_rate": 9.55187074829932e-06, "loss": 35.2539, "step": 11455 }, { "epoch": 272.7641791044776, "grad_norm": 20.344755172729492, "learning_rate": 9.551020408163266e-06, "loss": 34.4507, "step": 11456 }, { "epoch": 272.78805970149256, "grad_norm": 21.053865432739258, "learning_rate": 9.550170068027212e-06, "loss": 35.0848, "step": 11457 }, { "epoch": 272.81194029850747, "grad_norm": 19.17555809020996, "learning_rate": 9.549319727891157e-06, "loss": 34.5821, "step": 11458 }, { "epoch": 272.8358208955224, "grad_norm": 19.503799438476562, "learning_rate": 9.548469387755102e-06, "loss": 35.4881, "step": 11459 }, { "epoch": 272.85970149253734, "grad_norm": 22.57611846923828, "learning_rate": 9.547619047619049e-06, "loss": 34.5947, "step": 11460 }, { "epoch": 272.88358208955225, "grad_norm": 16.201906204223633, "learning_rate": 9.546768707482994e-06, "loss": 33.9073, "step": 11461 }, { "epoch": 272.90746268656716, "grad_norm": 29.876388549804688, "learning_rate": 9.54591836734694e-06, "loss": 34.9881, "step": 11462 }, { "epoch": 272.93134328358207, "grad_norm": 21.188796997070312, "learning_rate": 9.545068027210885e-06, "loss": 33.9589, "step": 11463 }, { "epoch": 272.95522388059703, "grad_norm": 24.822237014770508, "learning_rate": 9.54421768707483e-06, "loss": 34.5605, "step": 11464 }, { "epoch": 272.97910447761194, "grad_norm": 21.180675506591797, "learning_rate": 9.543367346938777e-06, "loss": 34.6804, "step": 11465 }, { "epoch": 273.0, "grad_norm": 22.525537490844727, "learning_rate": 9.542517006802722e-06, "loss": 31.2545, "step": 11466 }, { "epoch": 273.0238805970149, "grad_norm": 23.992300033569336, "learning_rate": 9.541666666666669e-06, "loss": 35.4219, "step": 11467 }, { "epoch": 273.0477611940299, "grad_norm": 16.92515754699707, "learning_rate": 9.540816326530612e-06, "loss": 33.5769, "step": 11468 }, { "epoch": 273.0716417910448, "grad_norm": 23.432153701782227, "learning_rate": 9.539965986394557e-06, "loss": 34.318, "step": 11469 }, { "epoch": 273.0955223880597, "grad_norm": 19.189620971679688, "learning_rate": 9.539115646258504e-06, "loss": 34.2028, "step": 11470 }, { "epoch": 273.1194029850746, "grad_norm": 19.115657806396484, "learning_rate": 9.53826530612245e-06, "loss": 34.2054, "step": 11471 }, { "epoch": 273.14328358208957, "grad_norm": 21.3024845123291, "learning_rate": 9.537414965986396e-06, "loss": 35.7482, "step": 11472 }, { "epoch": 273.1671641791045, "grad_norm": 16.934879302978516, "learning_rate": 9.536564625850342e-06, "loss": 35.7134, "step": 11473 }, { "epoch": 273.1910447761194, "grad_norm": 22.143213272094727, "learning_rate": 9.535714285714287e-06, "loss": 35.191, "step": 11474 }, { "epoch": 273.21492537313435, "grad_norm": 18.863143920898438, "learning_rate": 9.534863945578232e-06, "loss": 34.7234, "step": 11475 }, { "epoch": 273.23880597014926, "grad_norm": 16.55891990661621, "learning_rate": 9.534013605442177e-06, "loss": 36.2902, "step": 11476 }, { "epoch": 273.26268656716417, "grad_norm": 19.711896896362305, "learning_rate": 9.533163265306124e-06, "loss": 34.8988, "step": 11477 }, { "epoch": 273.28656716417913, "grad_norm": 13.028790473937988, "learning_rate": 9.53231292517007e-06, "loss": 35.5146, "step": 11478 }, { "epoch": 273.31044776119404, "grad_norm": 24.55320930480957, "learning_rate": 9.531462585034015e-06, "loss": 35.4407, "step": 11479 }, { "epoch": 273.33432835820895, "grad_norm": 18.43265151977539, "learning_rate": 9.53061224489796e-06, "loss": 33.798, "step": 11480 }, { "epoch": 273.35820895522386, "grad_norm": 22.450809478759766, "learning_rate": 9.529761904761905e-06, "loss": 35.1897, "step": 11481 }, { "epoch": 273.3820895522388, "grad_norm": 24.19524574279785, "learning_rate": 9.528911564625852e-06, "loss": 34.1183, "step": 11482 }, { "epoch": 273.40597014925373, "grad_norm": 17.7977352142334, "learning_rate": 9.528061224489797e-06, "loss": 35.96, "step": 11483 }, { "epoch": 273.42985074626864, "grad_norm": 28.38298225402832, "learning_rate": 9.527210884353742e-06, "loss": 34.8416, "step": 11484 }, { "epoch": 273.4537313432836, "grad_norm": 21.169815063476562, "learning_rate": 9.526360544217687e-06, "loss": 34.7033, "step": 11485 }, { "epoch": 273.4776119402985, "grad_norm": 29.524351119995117, "learning_rate": 9.525510204081634e-06, "loss": 35.1072, "step": 11486 }, { "epoch": 273.5014925373134, "grad_norm": 22.241783142089844, "learning_rate": 9.52465986394558e-06, "loss": 35.8894, "step": 11487 }, { "epoch": 273.52537313432833, "grad_norm": 29.821815490722656, "learning_rate": 9.523809523809525e-06, "loss": 35.7035, "step": 11488 }, { "epoch": 273.5492537313433, "grad_norm": 25.787349700927734, "learning_rate": 9.52295918367347e-06, "loss": 34.6796, "step": 11489 }, { "epoch": 273.5731343283582, "grad_norm": 32.30319595336914, "learning_rate": 9.522108843537415e-06, "loss": 34.7891, "step": 11490 }, { "epoch": 273.5970149253731, "grad_norm": 27.29175567626953, "learning_rate": 9.521258503401362e-06, "loss": 35.5402, "step": 11491 }, { "epoch": 273.6208955223881, "grad_norm": 30.5361328125, "learning_rate": 9.520408163265307e-06, "loss": 35.7587, "step": 11492 }, { "epoch": 273.644776119403, "grad_norm": 24.02892303466797, "learning_rate": 9.519557823129252e-06, "loss": 34.6691, "step": 11493 }, { "epoch": 273.6686567164179, "grad_norm": 29.685543060302734, "learning_rate": 9.518707482993198e-06, "loss": 34.6198, "step": 11494 }, { "epoch": 273.6925373134328, "grad_norm": 23.516645431518555, "learning_rate": 9.517857142857143e-06, "loss": 35.3945, "step": 11495 }, { "epoch": 273.7164179104478, "grad_norm": 30.824016571044922, "learning_rate": 9.51700680272109e-06, "loss": 34.7043, "step": 11496 }, { "epoch": 273.7402985074627, "grad_norm": 20.917694091796875, "learning_rate": 9.516156462585035e-06, "loss": 34.2073, "step": 11497 }, { "epoch": 273.7641791044776, "grad_norm": 41.69342041015625, "learning_rate": 9.51530612244898e-06, "loss": 35.1896, "step": 11498 }, { "epoch": 273.78805970149256, "grad_norm": 31.923019409179688, "learning_rate": 9.514455782312925e-06, "loss": 35.6918, "step": 11499 }, { "epoch": 273.81194029850747, "grad_norm": 31.552574157714844, "learning_rate": 9.513605442176872e-06, "loss": 35.462, "step": 11500 }, { "epoch": 273.8358208955224, "grad_norm": 31.041627883911133, "learning_rate": 9.512755102040817e-06, "loss": 35.9564, "step": 11501 }, { "epoch": 273.85970149253734, "grad_norm": 23.8511962890625, "learning_rate": 9.511904761904763e-06, "loss": 33.7351, "step": 11502 }, { "epoch": 273.88358208955225, "grad_norm": 22.056659698486328, "learning_rate": 9.511054421768708e-06, "loss": 34.0475, "step": 11503 }, { "epoch": 273.90746268656716, "grad_norm": 31.377748489379883, "learning_rate": 9.510204081632653e-06, "loss": 34.8224, "step": 11504 }, { "epoch": 273.93134328358207, "grad_norm": 25.686439514160156, "learning_rate": 9.5093537414966e-06, "loss": 34.9713, "step": 11505 }, { "epoch": 273.95522388059703, "grad_norm": 31.768295288085938, "learning_rate": 9.508503401360545e-06, "loss": 34.3472, "step": 11506 }, { "epoch": 273.97910447761194, "grad_norm": 30.48211669921875, "learning_rate": 9.50765306122449e-06, "loss": 33.5255, "step": 11507 }, { "epoch": 274.0, "grad_norm": 24.9478759765625, "learning_rate": 9.506802721088436e-06, "loss": 31.1891, "step": 11508 }, { "epoch": 274.0238805970149, "grad_norm": 26.062694549560547, "learning_rate": 9.50595238095238e-06, "loss": 35.456, "step": 11509 }, { "epoch": 274.0477611940299, "grad_norm": 28.693044662475586, "learning_rate": 9.505102040816328e-06, "loss": 35.1406, "step": 11510 }, { "epoch": 274.0716417910448, "grad_norm": 23.191530227661133, "learning_rate": 9.504251700680273e-06, "loss": 34.7138, "step": 11511 }, { "epoch": 274.0955223880597, "grad_norm": 32.414337158203125, "learning_rate": 9.503401360544218e-06, "loss": 35.28, "step": 11512 }, { "epoch": 274.1194029850746, "grad_norm": 29.824779510498047, "learning_rate": 9.502551020408163e-06, "loss": 35.4824, "step": 11513 }, { "epoch": 274.14328358208957, "grad_norm": 27.0230712890625, "learning_rate": 9.50170068027211e-06, "loss": 35.4533, "step": 11514 }, { "epoch": 274.1671641791045, "grad_norm": 23.042133331298828, "learning_rate": 9.500850340136055e-06, "loss": 34.8574, "step": 11515 }, { "epoch": 274.1910447761194, "grad_norm": 27.555049896240234, "learning_rate": 9.5e-06, "loss": 35.2714, "step": 11516 }, { "epoch": 274.21492537313435, "grad_norm": 24.51102066040039, "learning_rate": 9.499149659863946e-06, "loss": 34.5256, "step": 11517 }, { "epoch": 274.23880597014926, "grad_norm": 35.179222106933594, "learning_rate": 9.498299319727891e-06, "loss": 35.1316, "step": 11518 }, { "epoch": 274.26268656716417, "grad_norm": 30.471803665161133, "learning_rate": 9.497448979591838e-06, "loss": 35.4382, "step": 11519 }, { "epoch": 274.28656716417913, "grad_norm": 24.4095401763916, "learning_rate": 9.496598639455783e-06, "loss": 34.6631, "step": 11520 }, { "epoch": 274.31044776119404, "grad_norm": 22.290611267089844, "learning_rate": 9.49574829931973e-06, "loss": 34.9445, "step": 11521 }, { "epoch": 274.33432835820895, "grad_norm": 28.604211807250977, "learning_rate": 9.494897959183675e-06, "loss": 33.9613, "step": 11522 }, { "epoch": 274.35820895522386, "grad_norm": 23.471830368041992, "learning_rate": 9.494047619047619e-06, "loss": 34.6696, "step": 11523 }, { "epoch": 274.3820895522388, "grad_norm": 30.45530128479004, "learning_rate": 9.493197278911566e-06, "loss": 35.0699, "step": 11524 }, { "epoch": 274.40597014925373, "grad_norm": 27.771160125732422, "learning_rate": 9.49234693877551e-06, "loss": 34.8244, "step": 11525 }, { "epoch": 274.42985074626864, "grad_norm": 29.891550064086914, "learning_rate": 9.491496598639458e-06, "loss": 35.4989, "step": 11526 }, { "epoch": 274.4537313432836, "grad_norm": 28.894603729248047, "learning_rate": 9.490646258503403e-06, "loss": 35.0306, "step": 11527 }, { "epoch": 274.4776119402985, "grad_norm": 26.922245025634766, "learning_rate": 9.489795918367348e-06, "loss": 34.8424, "step": 11528 }, { "epoch": 274.5014925373134, "grad_norm": 25.6401424407959, "learning_rate": 9.488945578231293e-06, "loss": 35.5089, "step": 11529 }, { "epoch": 274.52537313432833, "grad_norm": 28.86932945251465, "learning_rate": 9.488095238095238e-06, "loss": 33.8797, "step": 11530 }, { "epoch": 274.5492537313433, "grad_norm": 25.750967025756836, "learning_rate": 9.487244897959185e-06, "loss": 34.5739, "step": 11531 }, { "epoch": 274.5731343283582, "grad_norm": 31.21046257019043, "learning_rate": 9.48639455782313e-06, "loss": 35.3033, "step": 11532 }, { "epoch": 274.5970149253731, "grad_norm": NaN, "learning_rate": 9.485544217687076e-06, "loss": 55.0899, "step": 11533 }, { "epoch": 274.6208955223881, "grad_norm": 26.225570678710938, "learning_rate": 9.485544217687076e-06, "loss": 35.0351, "step": 11534 }, { "epoch": 274.644776119403, "grad_norm": 29.53644371032715, "learning_rate": 9.484693877551021e-06, "loss": 34.2676, "step": 11535 }, { "epoch": 274.6686567164179, "grad_norm": 31.77597999572754, "learning_rate": 9.483843537414966e-06, "loss": 35.2219, "step": 11536 }, { "epoch": 274.6925373134328, "grad_norm": 27.853591918945312, "learning_rate": 9.482993197278913e-06, "loss": 35.0949, "step": 11537 }, { "epoch": 274.7164179104478, "grad_norm": 24.07490348815918, "learning_rate": 9.482142857142858e-06, "loss": 33.7657, "step": 11538 }, { "epoch": 274.7402985074627, "grad_norm": 32.067989349365234, "learning_rate": 9.481292517006803e-06, "loss": 35.1784, "step": 11539 }, { "epoch": 274.7641791044776, "grad_norm": 24.953723907470703, "learning_rate": 9.480442176870749e-06, "loss": 34.16, "step": 11540 }, { "epoch": 274.78805970149256, "grad_norm": 30.517349243164062, "learning_rate": 9.479591836734695e-06, "loss": 35.2679, "step": 11541 }, { "epoch": 274.81194029850747, "grad_norm": 30.275558471679688, "learning_rate": 9.47874149659864e-06, "loss": 35.5045, "step": 11542 }, { "epoch": 274.8358208955224, "grad_norm": 27.648090362548828, "learning_rate": 9.477891156462586e-06, "loss": 33.7273, "step": 11543 }, { "epoch": 274.85970149253734, "grad_norm": 22.869770050048828, "learning_rate": 9.477040816326531e-06, "loss": 34.6546, "step": 11544 }, { "epoch": 274.88358208955225, "grad_norm": 28.749204635620117, "learning_rate": 9.476190476190476e-06, "loss": 33.961, "step": 11545 }, { "epoch": 274.90746268656716, "grad_norm": 23.774635314941406, "learning_rate": 9.475340136054423e-06, "loss": 35.5513, "step": 11546 }, { "epoch": 274.93134328358207, "grad_norm": 29.827241897583008, "learning_rate": 9.474489795918368e-06, "loss": 34.9178, "step": 11547 }, { "epoch": 274.95522388059703, "grad_norm": 27.671878814697266, "learning_rate": 9.473639455782314e-06, "loss": 35.3901, "step": 11548 }, { "epoch": 274.97910447761194, "grad_norm": NaN, "learning_rate": 9.472789115646259e-06, "loss": 52.21, "step": 11549 }, { "epoch": 275.0, "grad_norm": 24.934322357177734, "learning_rate": 9.472789115646259e-06, "loss": 32.1842, "step": 11550 }, { "epoch": 275.0238805970149, "grad_norm": 26.665868759155273, "learning_rate": 9.471938775510204e-06, "loss": 34.1989, "step": 11551 }, { "epoch": 275.0477611940299, "grad_norm": 25.216344833374023, "learning_rate": 9.471088435374151e-06, "loss": 33.7022, "step": 11552 }, { "epoch": 275.0716417910448, "grad_norm": 23.939922332763672, "learning_rate": 9.470238095238096e-06, "loss": 34.7447, "step": 11553 }, { "epoch": 275.0955223880597, "grad_norm": 30.293737411499023, "learning_rate": 9.469387755102041e-06, "loss": 33.8805, "step": 11554 }, { "epoch": 275.1194029850746, "grad_norm": 26.180225372314453, "learning_rate": 9.468537414965986e-06, "loss": 34.3898, "step": 11555 }, { "epoch": 275.14328358208957, "grad_norm": 29.93408203125, "learning_rate": 9.467687074829933e-06, "loss": 35.4292, "step": 11556 }, { "epoch": 275.1671641791045, "grad_norm": 26.23850440979004, "learning_rate": 9.466836734693879e-06, "loss": 35.715, "step": 11557 }, { "epoch": 275.1910447761194, "grad_norm": 27.489974975585938, "learning_rate": 9.465986394557824e-06, "loss": 35.464, "step": 11558 }, { "epoch": 275.21492537313435, "grad_norm": 22.571842193603516, "learning_rate": 9.465136054421769e-06, "loss": 34.3222, "step": 11559 }, { "epoch": 275.23880597014926, "grad_norm": 29.974586486816406, "learning_rate": 9.464285714285714e-06, "loss": 34.1954, "step": 11560 }, { "epoch": 275.26268656716417, "grad_norm": 25.143552780151367, "learning_rate": 9.463435374149661e-06, "loss": 35.0162, "step": 11561 }, { "epoch": 275.28656716417913, "grad_norm": 31.299367904663086, "learning_rate": 9.462585034013606e-06, "loss": 34.9588, "step": 11562 }, { "epoch": 275.31044776119404, "grad_norm": 28.38913345336914, "learning_rate": 9.461734693877551e-06, "loss": 33.5666, "step": 11563 }, { "epoch": 275.33432835820895, "grad_norm": 26.747886657714844, "learning_rate": 9.460884353741497e-06, "loss": 35.7584, "step": 11564 }, { "epoch": 275.35820895522386, "grad_norm": 24.418148040771484, "learning_rate": 9.460034013605442e-06, "loss": 35.9323, "step": 11565 }, { "epoch": 275.3820895522388, "grad_norm": 27.281307220458984, "learning_rate": 9.459183673469389e-06, "loss": 33.2848, "step": 11566 }, { "epoch": 275.40597014925373, "grad_norm": 21.456995010375977, "learning_rate": 9.458333333333334e-06, "loss": 34.8987, "step": 11567 }, { "epoch": 275.42985074626864, "grad_norm": 27.553138732910156, "learning_rate": 9.457482993197281e-06, "loss": 33.2433, "step": 11568 }, { "epoch": 275.4537313432836, "grad_norm": 23.411741256713867, "learning_rate": 9.456632653061224e-06, "loss": 34.738, "step": 11569 }, { "epoch": 275.4776119402985, "grad_norm": 27.484281539916992, "learning_rate": 9.455782312925171e-06, "loss": 33.6812, "step": 11570 }, { "epoch": 275.5014925373134, "grad_norm": 27.311189651489258, "learning_rate": 9.454931972789116e-06, "loss": 34.7603, "step": 11571 }, { "epoch": 275.52537313432833, "grad_norm": 25.987213134765625, "learning_rate": 9.454081632653062e-06, "loss": 35.9721, "step": 11572 }, { "epoch": 275.5492537313433, "grad_norm": 24.543262481689453, "learning_rate": 9.453231292517009e-06, "loss": 35.1705, "step": 11573 }, { "epoch": 275.5731343283582, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 42.9772, "step": 11574 }, { "epoch": 275.5970149253731, "grad_norm": 30.56894874572754, "learning_rate": 9.452380952380952e-06, "loss": 34.7541, "step": 11575 }, { "epoch": 275.6208955223881, "grad_norm": 24.31935691833496, "learning_rate": 9.451530612244899e-06, "loss": 35.306, "step": 11576 }, { "epoch": 275.644776119403, "grad_norm": 26.2707462310791, "learning_rate": 9.450680272108844e-06, "loss": 35.2843, "step": 11577 }, { "epoch": 275.6686567164179, "grad_norm": 26.78156089782715, "learning_rate": 9.449829931972791e-06, "loss": 33.7731, "step": 11578 }, { "epoch": 275.6925373134328, "grad_norm": 25.12327003479004, "learning_rate": 9.448979591836736e-06, "loss": 35.9485, "step": 11579 }, { "epoch": 275.7164179104478, "grad_norm": 22.33073616027832, "learning_rate": 9.448129251700681e-06, "loss": 34.9512, "step": 11580 }, { "epoch": 275.7402985074627, "grad_norm": 28.61358642578125, "learning_rate": 9.447278911564627e-06, "loss": 35.087, "step": 11581 }, { "epoch": 275.7641791044776, "grad_norm": 24.46397590637207, "learning_rate": 9.446428571428572e-06, "loss": 35.7263, "step": 11582 }, { "epoch": 275.78805970149256, "grad_norm": 30.075510025024414, "learning_rate": 9.445578231292519e-06, "loss": 35.1137, "step": 11583 }, { "epoch": 275.81194029850747, "grad_norm": 24.947879791259766, "learning_rate": 9.444727891156464e-06, "loss": 34.9995, "step": 11584 }, { "epoch": 275.8358208955224, "grad_norm": 24.197057723999023, "learning_rate": 9.44387755102041e-06, "loss": 35.6346, "step": 11585 }, { "epoch": 275.85970149253734, "grad_norm": 22.781902313232422, "learning_rate": 9.443027210884354e-06, "loss": 36.1519, "step": 11586 }, { "epoch": 275.88358208955225, "grad_norm": 25.115249633789062, "learning_rate": 9.4421768707483e-06, "loss": 36.1326, "step": 11587 }, { "epoch": 275.90746268656716, "grad_norm": 21.32830238342285, "learning_rate": 9.441326530612246e-06, "loss": 34.9455, "step": 11588 }, { "epoch": 275.93134328358207, "grad_norm": 23.063034057617188, "learning_rate": 9.440476190476192e-06, "loss": 35.7374, "step": 11589 }, { "epoch": 275.95522388059703, "grad_norm": 16.17242431640625, "learning_rate": 9.439625850340137e-06, "loss": 34.7554, "step": 11590 }, { "epoch": 275.97910447761194, "grad_norm": 28.204742431640625, "learning_rate": 9.438775510204082e-06, "loss": 33.9266, "step": 11591 }, { "epoch": 276.0, "grad_norm": 21.808975219726562, "learning_rate": 9.437925170068027e-06, "loss": 31.0028, "step": 11592 }, { "epoch": 276.0238805970149, "grad_norm": 25.250585556030273, "learning_rate": 9.437074829931974e-06, "loss": 34.3719, "step": 11593 }, { "epoch": 276.0477611940299, "grad_norm": 22.951576232910156, "learning_rate": 9.43622448979592e-06, "loss": 33.6795, "step": 11594 }, { "epoch": 276.0716417910448, "grad_norm": 19.92416000366211, "learning_rate": 9.435374149659865e-06, "loss": 35.4251, "step": 11595 }, { "epoch": 276.0955223880597, "grad_norm": 22.649166107177734, "learning_rate": 9.43452380952381e-06, "loss": 33.3365, "step": 11596 }, { "epoch": 276.1194029850746, "grad_norm": 20.024242401123047, "learning_rate": 9.433673469387757e-06, "loss": 35.0393, "step": 11597 }, { "epoch": 276.14328358208957, "grad_norm": 16.586639404296875, "learning_rate": 9.432823129251702e-06, "loss": 34.7588, "step": 11598 }, { "epoch": 276.1671641791045, "grad_norm": 17.571060180664062, "learning_rate": 9.431972789115647e-06, "loss": 34.5063, "step": 11599 }, { "epoch": 276.1910447761194, "grad_norm": 15.963066101074219, "learning_rate": 9.431122448979592e-06, "loss": 35.7218, "step": 11600 }, { "epoch": 276.21492537313435, "grad_norm": 17.378015518188477, "learning_rate": 9.430272108843537e-06, "loss": 35.3853, "step": 11601 }, { "epoch": 276.23880597014926, "grad_norm": 14.895105361938477, "learning_rate": 9.429421768707484e-06, "loss": 35.9053, "step": 11602 }, { "epoch": 276.26268656716417, "grad_norm": 17.460681915283203, "learning_rate": 9.42857142857143e-06, "loss": 34.0455, "step": 11603 }, { "epoch": 276.28656716417913, "grad_norm": 15.1104154586792, "learning_rate": 9.427721088435375e-06, "loss": 34.0612, "step": 11604 }, { "epoch": 276.31044776119404, "grad_norm": 15.858449935913086, "learning_rate": 9.42687074829932e-06, "loss": 34.4224, "step": 11605 }, { "epoch": 276.33432835820895, "grad_norm": 14.354479789733887, "learning_rate": 9.426020408163265e-06, "loss": 35.9808, "step": 11606 }, { "epoch": 276.35820895522386, "grad_norm": 15.939604759216309, "learning_rate": 9.425170068027212e-06, "loss": 34.1325, "step": 11607 }, { "epoch": 276.3820895522388, "grad_norm": 17.995344161987305, "learning_rate": 9.424319727891157e-06, "loss": 36.2056, "step": 11608 }, { "epoch": 276.40597014925373, "grad_norm": 14.723787307739258, "learning_rate": 9.423469387755102e-06, "loss": 34.8778, "step": 11609 }, { "epoch": 276.42985074626864, "grad_norm": 17.433948516845703, "learning_rate": 9.422619047619048e-06, "loss": 34.4876, "step": 11610 }, { "epoch": 276.4537313432836, "grad_norm": 15.472698211669922, "learning_rate": 9.421768707482995e-06, "loss": 35.319, "step": 11611 }, { "epoch": 276.4776119402985, "grad_norm": 15.433340072631836, "learning_rate": 9.42091836734694e-06, "loss": 33.6886, "step": 11612 }, { "epoch": 276.5014925373134, "grad_norm": 17.547300338745117, "learning_rate": 9.420068027210885e-06, "loss": 35.1445, "step": 11613 }, { "epoch": 276.52537313432833, "grad_norm": 16.011526107788086, "learning_rate": 9.41921768707483e-06, "loss": 34.8026, "step": 11614 }, { "epoch": 276.5492537313433, "grad_norm": 15.494392395019531, "learning_rate": 9.418367346938775e-06, "loss": 35.8802, "step": 11615 }, { "epoch": 276.5731343283582, "grad_norm": 14.229571342468262, "learning_rate": 9.417517006802722e-06, "loss": 35.7169, "step": 11616 }, { "epoch": 276.5970149253731, "grad_norm": 16.28061294555664, "learning_rate": 9.416666666666667e-06, "loss": 35.9106, "step": 11617 }, { "epoch": 276.6208955223881, "grad_norm": 13.186849594116211, "learning_rate": 9.415816326530614e-06, "loss": 33.2318, "step": 11618 }, { "epoch": 276.644776119403, "grad_norm": 17.973485946655273, "learning_rate": 9.414965986394558e-06, "loss": 35.7487, "step": 11619 }, { "epoch": 276.6686567164179, "grad_norm": 16.334392547607422, "learning_rate": 9.414115646258503e-06, "loss": 34.507, "step": 11620 }, { "epoch": 276.6925373134328, "grad_norm": 15.271941184997559, "learning_rate": 9.41326530612245e-06, "loss": 33.8744, "step": 11621 }, { "epoch": 276.7164179104478, "grad_norm": 19.46748161315918, "learning_rate": 9.412414965986395e-06, "loss": 36.0643, "step": 11622 }, { "epoch": 276.7402985074627, "grad_norm": 15.582892417907715, "learning_rate": 9.411564625850342e-06, "loss": 35.6241, "step": 11623 }, { "epoch": 276.7641791044776, "grad_norm": 23.94053077697754, "learning_rate": 9.410714285714286e-06, "loss": 33.2915, "step": 11624 }, { "epoch": 276.78805970149256, "grad_norm": 20.39919090270996, "learning_rate": 9.409863945578232e-06, "loss": 34.942, "step": 11625 }, { "epoch": 276.81194029850747, "grad_norm": 15.005172729492188, "learning_rate": 9.409013605442178e-06, "loss": 34.5648, "step": 11626 }, { "epoch": 276.8358208955224, "grad_norm": 20.01251983642578, "learning_rate": 9.408163265306123e-06, "loss": 35.9609, "step": 11627 }, { "epoch": 276.85970149253734, "grad_norm": 19.874052047729492, "learning_rate": 9.40731292517007e-06, "loss": 34.1502, "step": 11628 }, { "epoch": 276.88358208955225, "grad_norm": 13.759737968444824, "learning_rate": 9.406462585034015e-06, "loss": 35.1191, "step": 11629 }, { "epoch": 276.90746268656716, "grad_norm": 16.14326286315918, "learning_rate": 9.40561224489796e-06, "loss": 35.1842, "step": 11630 }, { "epoch": 276.93134328358207, "grad_norm": 13.732664108276367, "learning_rate": 9.404761904761905e-06, "loss": 35.4432, "step": 11631 }, { "epoch": 276.95522388059703, "grad_norm": 15.850354194641113, "learning_rate": 9.403911564625852e-06, "loss": 33.7583, "step": 11632 }, { "epoch": 276.97910447761194, "grad_norm": 18.14269256591797, "learning_rate": 9.403061224489797e-06, "loss": 35.4636, "step": 11633 }, { "epoch": 277.0, "grad_norm": 14.818315505981445, "learning_rate": 9.402210884353743e-06, "loss": 30.0158, "step": 11634 }, { "epoch": 277.0238805970149, "grad_norm": 14.796257019042969, "learning_rate": 9.401360544217688e-06, "loss": 35.0704, "step": 11635 }, { "epoch": 277.0477611940299, "grad_norm": 12.80022144317627, "learning_rate": 9.400510204081633e-06, "loss": 35.1998, "step": 11636 }, { "epoch": 277.0716417910448, "grad_norm": 19.435375213623047, "learning_rate": 9.39965986394558e-06, "loss": 34.3463, "step": 11637 }, { "epoch": 277.0955223880597, "grad_norm": 13.989315032958984, "learning_rate": 9.398809523809525e-06, "loss": 33.7221, "step": 11638 }, { "epoch": 277.1194029850746, "grad_norm": 17.131755828857422, "learning_rate": 9.39795918367347e-06, "loss": 34.3613, "step": 11639 }, { "epoch": 277.14328358208957, "grad_norm": 16.77277946472168, "learning_rate": 9.397108843537416e-06, "loss": 34.9621, "step": 11640 }, { "epoch": 277.1671641791045, "grad_norm": 16.701374053955078, "learning_rate": 9.39625850340136e-06, "loss": 35.6076, "step": 11641 }, { "epoch": 277.1910447761194, "grad_norm": 16.80859375, "learning_rate": 9.395408163265308e-06, "loss": 33.6321, "step": 11642 }, { "epoch": 277.21492537313435, "grad_norm": 16.263702392578125, "learning_rate": 9.394557823129253e-06, "loss": 34.1421, "step": 11643 }, { "epoch": 277.23880597014926, "grad_norm": 19.730012893676758, "learning_rate": 9.393707482993198e-06, "loss": 34.7567, "step": 11644 }, { "epoch": 277.26268656716417, "grad_norm": 16.331052780151367, "learning_rate": 9.392857142857143e-06, "loss": 35.5914, "step": 11645 }, { "epoch": 277.28656716417913, "grad_norm": NaN, "learning_rate": 9.392006802721088e-06, "loss": 38.8077, "step": 11646 }, { "epoch": 277.31044776119404, "grad_norm": 16.834171295166016, "learning_rate": 9.392006802721088e-06, "loss": 34.0246, "step": 11647 }, { "epoch": 277.33432835820895, "grad_norm": 15.806410789489746, "learning_rate": 9.391156462585035e-06, "loss": 33.9759, "step": 11648 }, { "epoch": 277.35820895522386, "grad_norm": 16.6479549407959, "learning_rate": 9.39030612244898e-06, "loss": 35.1363, "step": 11649 }, { "epoch": 277.3820895522388, "grad_norm": 20.180774688720703, "learning_rate": 9.389455782312926e-06, "loss": 35.4411, "step": 11650 }, { "epoch": 277.40597014925373, "grad_norm": 18.839466094970703, "learning_rate": 9.388605442176871e-06, "loss": 34.8252, "step": 11651 }, { "epoch": 277.42985074626864, "grad_norm": 15.500970840454102, "learning_rate": 9.387755102040818e-06, "loss": 35.407, "step": 11652 }, { "epoch": 277.4537313432836, "grad_norm": 20.913833618164062, "learning_rate": 9.386904761904763e-06, "loss": 34.4692, "step": 11653 }, { "epoch": 277.4776119402985, "grad_norm": 14.663201332092285, "learning_rate": 9.386054421768708e-06, "loss": 34.8329, "step": 11654 }, { "epoch": 277.5014925373134, "grad_norm": 19.904327392578125, "learning_rate": 9.385204081632653e-06, "loss": 34.5005, "step": 11655 }, { "epoch": 277.52537313432833, "grad_norm": 16.9540958404541, "learning_rate": 9.384353741496599e-06, "loss": 34.4333, "step": 11656 }, { "epoch": 277.5492537313433, "grad_norm": 17.932445526123047, "learning_rate": 9.383503401360545e-06, "loss": 35.8671, "step": 11657 }, { "epoch": 277.5731343283582, "grad_norm": 17.281888961791992, "learning_rate": 9.38265306122449e-06, "loss": 34.37, "step": 11658 }, { "epoch": 277.5970149253731, "grad_norm": 16.86711883544922, "learning_rate": 9.381802721088436e-06, "loss": 33.9623, "step": 11659 }, { "epoch": 277.6208955223881, "grad_norm": 19.224029541015625, "learning_rate": 9.380952380952381e-06, "loss": 35.3118, "step": 11660 }, { "epoch": 277.644776119403, "grad_norm": 14.79770278930664, "learning_rate": 9.380102040816326e-06, "loss": 34.5133, "step": 11661 }, { "epoch": 277.6686567164179, "grad_norm": 19.909706115722656, "learning_rate": 9.379251700680273e-06, "loss": 34.6881, "step": 11662 }, { "epoch": 277.6925373134328, "grad_norm": 16.60921287536621, "learning_rate": 9.378401360544218e-06, "loss": 33.7788, "step": 11663 }, { "epoch": 277.7164179104478, "grad_norm": 15.984688758850098, "learning_rate": 9.377551020408164e-06, "loss": 35.8545, "step": 11664 }, { "epoch": 277.7402985074627, "grad_norm": 15.258697509765625, "learning_rate": 9.376700680272109e-06, "loss": 34.3763, "step": 11665 }, { "epoch": 277.7641791044776, "grad_norm": 18.777162551879883, "learning_rate": 9.375850340136056e-06, "loss": 34.5487, "step": 11666 }, { "epoch": 277.78805970149256, "grad_norm": 17.217514038085938, "learning_rate": 9.375000000000001e-06, "loss": 34.7693, "step": 11667 }, { "epoch": 277.81194029850747, "grad_norm": 16.63068389892578, "learning_rate": 9.374149659863946e-06, "loss": 35.4646, "step": 11668 }, { "epoch": 277.8358208955224, "grad_norm": 16.24852752685547, "learning_rate": 9.373299319727891e-06, "loss": 36.0635, "step": 11669 }, { "epoch": 277.85970149253734, "grad_norm": 22.07871437072754, "learning_rate": 9.372448979591836e-06, "loss": 35.4148, "step": 11670 }, { "epoch": 277.88358208955225, "grad_norm": 16.83747673034668, "learning_rate": 9.371598639455783e-06, "loss": 34.9176, "step": 11671 }, { "epoch": 277.90746268656716, "grad_norm": 19.011245727539062, "learning_rate": 9.370748299319729e-06, "loss": 35.0717, "step": 11672 }, { "epoch": 277.93134328358207, "grad_norm": 18.960601806640625, "learning_rate": 9.369897959183675e-06, "loss": 35.5138, "step": 11673 }, { "epoch": 277.95522388059703, "grad_norm": 16.586849212646484, "learning_rate": 9.36904761904762e-06, "loss": 35.1342, "step": 11674 }, { "epoch": 277.97910447761194, "grad_norm": 26.47035789489746, "learning_rate": 9.368197278911564e-06, "loss": 35.3914, "step": 11675 }, { "epoch": 278.0, "grad_norm": 15.833699226379395, "learning_rate": 9.367346938775511e-06, "loss": 29.716, "step": 11676 }, { "epoch": 278.0238805970149, "grad_norm": 21.678312301635742, "learning_rate": 9.366496598639456e-06, "loss": 35.6104, "step": 11677 }, { "epoch": 278.0477611940299, "grad_norm": 18.99468421936035, "learning_rate": 9.365646258503403e-06, "loss": 35.0982, "step": 11678 }, { "epoch": 278.0716417910448, "grad_norm": 17.7320556640625, "learning_rate": 9.364795918367348e-06, "loss": 35.9922, "step": 11679 }, { "epoch": 278.0955223880597, "grad_norm": 21.59501838684082, "learning_rate": 9.363945578231294e-06, "loss": 35.4113, "step": 11680 }, { "epoch": 278.1194029850746, "grad_norm": 17.88007164001465, "learning_rate": 9.363095238095239e-06, "loss": 36.0974, "step": 11681 }, { "epoch": 278.14328358208957, "grad_norm": 17.10157585144043, "learning_rate": 9.362244897959184e-06, "loss": 32.7194, "step": 11682 }, { "epoch": 278.1671641791045, "grad_norm": 27.03644561767578, "learning_rate": 9.361394557823131e-06, "loss": 34.95, "step": 11683 }, { "epoch": 278.1910447761194, "grad_norm": 15.274543762207031, "learning_rate": 9.360544217687076e-06, "loss": 34.1545, "step": 11684 }, { "epoch": 278.21492537313435, "grad_norm": 24.165719985961914, "learning_rate": 9.359693877551021e-06, "loss": 35.3858, "step": 11685 }, { "epoch": 278.23880597014926, "grad_norm": 17.6593017578125, "learning_rate": 9.358843537414966e-06, "loss": 34.9821, "step": 11686 }, { "epoch": 278.26268656716417, "grad_norm": 19.354694366455078, "learning_rate": 9.357993197278913e-06, "loss": 34.3421, "step": 11687 }, { "epoch": 278.28656716417913, "grad_norm": 16.19529151916504, "learning_rate": 9.357142857142859e-06, "loss": 34.8048, "step": 11688 }, { "epoch": 278.31044776119404, "grad_norm": 19.56124496459961, "learning_rate": 9.356292517006804e-06, "loss": 34.841, "step": 11689 }, { "epoch": 278.33432835820895, "grad_norm": 16.454235076904297, "learning_rate": 9.355442176870749e-06, "loss": 35.5673, "step": 11690 }, { "epoch": 278.35820895522386, "grad_norm": 18.578588485717773, "learning_rate": 9.354591836734694e-06, "loss": 35.3196, "step": 11691 }, { "epoch": 278.3820895522388, "grad_norm": 20.52625274658203, "learning_rate": 9.353741496598641e-06, "loss": 35.4304, "step": 11692 }, { "epoch": 278.40597014925373, "grad_norm": 15.972278594970703, "learning_rate": 9.352891156462586e-06, "loss": 35.0974, "step": 11693 }, { "epoch": 278.42985074626864, "grad_norm": 15.163324356079102, "learning_rate": 9.352040816326531e-06, "loss": 33.9505, "step": 11694 }, { "epoch": 278.4537313432836, "grad_norm": 16.178781509399414, "learning_rate": 9.351190476190477e-06, "loss": 33.7124, "step": 11695 }, { "epoch": 278.4776119402985, "grad_norm": 22.499116897583008, "learning_rate": 9.350340136054422e-06, "loss": 35.1534, "step": 11696 }, { "epoch": 278.5014925373134, "grad_norm": 16.468137741088867, "learning_rate": 9.349489795918369e-06, "loss": 35.167, "step": 11697 }, { "epoch": 278.52537313432833, "grad_norm": 17.90869140625, "learning_rate": 9.348639455782314e-06, "loss": 34.582, "step": 11698 }, { "epoch": 278.5492537313433, "grad_norm": 17.6704044342041, "learning_rate": 9.347789115646259e-06, "loss": 33.1312, "step": 11699 }, { "epoch": 278.5731343283582, "grad_norm": 22.600032806396484, "learning_rate": 9.346938775510204e-06, "loss": 35.5801, "step": 11700 }, { "epoch": 278.5970149253731, "grad_norm": 14.013952255249023, "learning_rate": 9.34608843537415e-06, "loss": 34.7605, "step": 11701 }, { "epoch": 278.6208955223881, "grad_norm": 31.57529067993164, "learning_rate": 9.345238095238096e-06, "loss": 34.4726, "step": 11702 }, { "epoch": 278.644776119403, "grad_norm": 22.25636100769043, "learning_rate": 9.344387755102042e-06, "loss": 35.6146, "step": 11703 }, { "epoch": 278.6686567164179, "grad_norm": 24.68794059753418, "learning_rate": 9.343537414965987e-06, "loss": 35.6026, "step": 11704 }, { "epoch": 278.6925373134328, "grad_norm": 21.93093490600586, "learning_rate": 9.342687074829932e-06, "loss": 34.4607, "step": 11705 }, { "epoch": 278.7164179104478, "grad_norm": 22.90749740600586, "learning_rate": 9.341836734693879e-06, "loss": 35.0604, "step": 11706 }, { "epoch": 278.7402985074627, "grad_norm": 23.237947463989258, "learning_rate": 9.340986394557824e-06, "loss": 32.8543, "step": 11707 }, { "epoch": 278.7641791044776, "grad_norm": 17.744321823120117, "learning_rate": 9.34013605442177e-06, "loss": 34.201, "step": 11708 }, { "epoch": 278.78805970149256, "grad_norm": 34.52104568481445, "learning_rate": 9.339285714285715e-06, "loss": 35.2452, "step": 11709 }, { "epoch": 278.81194029850747, "grad_norm": 25.336421966552734, "learning_rate": 9.33843537414966e-06, "loss": 34.8533, "step": 11710 }, { "epoch": 278.8358208955224, "grad_norm": 26.732851028442383, "learning_rate": 9.337585034013607e-06, "loss": 34.9734, "step": 11711 }, { "epoch": 278.85970149253734, "grad_norm": 22.55652618408203, "learning_rate": 9.336734693877552e-06, "loss": 34.6144, "step": 11712 }, { "epoch": 278.88358208955225, "grad_norm": 27.771093368530273, "learning_rate": 9.335884353741497e-06, "loss": 34.7722, "step": 11713 }, { "epoch": 278.90746268656716, "grad_norm": 17.773391723632812, "learning_rate": 9.335034013605442e-06, "loss": 33.6954, "step": 11714 }, { "epoch": 278.93134328358207, "grad_norm": 21.976579666137695, "learning_rate": 9.334183673469387e-06, "loss": 34.4679, "step": 11715 }, { "epoch": 278.95522388059703, "grad_norm": 20.813447952270508, "learning_rate": 9.333333333333334e-06, "loss": 34.7549, "step": 11716 }, { "epoch": 278.97910447761194, "grad_norm": 16.850330352783203, "learning_rate": 9.33248299319728e-06, "loss": 34.9686, "step": 11717 }, { "epoch": 279.0, "grad_norm": 19.154523849487305, "learning_rate": 9.331632653061225e-06, "loss": 30.0758, "step": 11718 }, { "epoch": 279.0238805970149, "grad_norm": 20.489709854125977, "learning_rate": 9.33078231292517e-06, "loss": 34.4739, "step": 11719 }, { "epoch": 279.0477611940299, "grad_norm": 16.187599182128906, "learning_rate": 9.329931972789117e-06, "loss": 34.3763, "step": 11720 }, { "epoch": 279.0716417910448, "grad_norm": 29.30412483215332, "learning_rate": 9.329081632653062e-06, "loss": 35.046, "step": 11721 }, { "epoch": 279.0955223880597, "grad_norm": 17.451623916625977, "learning_rate": 9.328231292517007e-06, "loss": 34.2565, "step": 11722 }, { "epoch": 279.1194029850746, "grad_norm": 25.524045944213867, "learning_rate": 9.327380952380954e-06, "loss": 34.9649, "step": 11723 }, { "epoch": 279.14328358208957, "grad_norm": 20.078102111816406, "learning_rate": 9.326530612244898e-06, "loss": 33.2472, "step": 11724 }, { "epoch": 279.1671641791045, "grad_norm": 24.161420822143555, "learning_rate": 9.325680272108845e-06, "loss": 33.3308, "step": 11725 }, { "epoch": 279.1910447761194, "grad_norm": 22.15292739868164, "learning_rate": 9.32482993197279e-06, "loss": 35.3597, "step": 11726 }, { "epoch": 279.21492537313435, "grad_norm": 20.223554611206055, "learning_rate": 9.323979591836737e-06, "loss": 34.2928, "step": 11727 }, { "epoch": 279.23880597014926, "grad_norm": 23.808414459228516, "learning_rate": 9.323129251700682e-06, "loss": 34.2218, "step": 11728 }, { "epoch": 279.26268656716417, "grad_norm": 15.967622756958008, "learning_rate": 9.322278911564627e-06, "loss": 36.2048, "step": 11729 }, { "epoch": 279.28656716417913, "grad_norm": 25.359920501708984, "learning_rate": 9.321428571428572e-06, "loss": 34.3644, "step": 11730 }, { "epoch": 279.31044776119404, "grad_norm": 19.8580265045166, "learning_rate": 9.320578231292517e-06, "loss": 35.0638, "step": 11731 }, { "epoch": 279.33432835820895, "grad_norm": 17.405927658081055, "learning_rate": 9.319727891156464e-06, "loss": 33.4174, "step": 11732 }, { "epoch": 279.35820895522386, "grad_norm": 26.111482620239258, "learning_rate": 9.31887755102041e-06, "loss": 32.7717, "step": 11733 }, { "epoch": 279.3820895522388, "grad_norm": 17.716827392578125, "learning_rate": 9.318027210884355e-06, "loss": 34.6962, "step": 11734 }, { "epoch": 279.40597014925373, "grad_norm": 26.403427124023438, "learning_rate": 9.3171768707483e-06, "loss": 34.4502, "step": 11735 }, { "epoch": 279.42985074626864, "grad_norm": 20.0697021484375, "learning_rate": 9.316326530612245e-06, "loss": 35.182, "step": 11736 }, { "epoch": 279.4537313432836, "grad_norm": 25.31626319885254, "learning_rate": 9.315476190476192e-06, "loss": 33.6727, "step": 11737 }, { "epoch": 279.4776119402985, "grad_norm": 17.396921157836914, "learning_rate": 9.314625850340137e-06, "loss": 34.9511, "step": 11738 }, { "epoch": 279.5014925373134, "grad_norm": 18.441740036010742, "learning_rate": 9.313775510204082e-06, "loss": 34.894, "step": 11739 }, { "epoch": 279.52537313432833, "grad_norm": 26.152395248413086, "learning_rate": 9.312925170068028e-06, "loss": 34.4218, "step": 11740 }, { "epoch": 279.5492537313433, "grad_norm": 17.21263313293457, "learning_rate": 9.312074829931974e-06, "loss": 34.8912, "step": 11741 }, { "epoch": 279.5731343283582, "grad_norm": 29.952415466308594, "learning_rate": 9.31122448979592e-06, "loss": 35.3389, "step": 11742 }, { "epoch": 279.5970149253731, "grad_norm": 19.48375129699707, "learning_rate": 9.310374149659865e-06, "loss": 34.5897, "step": 11743 }, { "epoch": 279.6208955223881, "grad_norm": 30.186059951782227, "learning_rate": 9.30952380952381e-06, "loss": 34.9371, "step": 11744 }, { "epoch": 279.644776119403, "grad_norm": 21.69789695739746, "learning_rate": 9.308673469387755e-06, "loss": 35.4772, "step": 11745 }, { "epoch": 279.6686567164179, "grad_norm": 26.690025329589844, "learning_rate": 9.307823129251702e-06, "loss": 35.0228, "step": 11746 }, { "epoch": 279.6925373134328, "grad_norm": 20.931751251220703, "learning_rate": 9.306972789115647e-06, "loss": 35.1266, "step": 11747 }, { "epoch": 279.7164179104478, "grad_norm": 20.397497177124023, "learning_rate": 9.306122448979593e-06, "loss": 34.6833, "step": 11748 }, { "epoch": 279.7402985074627, "grad_norm": 19.892305374145508, "learning_rate": 9.305272108843538e-06, "loss": 35.8163, "step": 11749 }, { "epoch": 279.7641791044776, "grad_norm": 17.510250091552734, "learning_rate": 9.304421768707483e-06, "loss": 35.9888, "step": 11750 }, { "epoch": 279.78805970149256, "grad_norm": 14.105299949645996, "learning_rate": 9.30357142857143e-06, "loss": 35.3041, "step": 11751 }, { "epoch": 279.81194029850747, "grad_norm": 19.23472785949707, "learning_rate": 9.302721088435375e-06, "loss": 34.6608, "step": 11752 }, { "epoch": 279.8358208955224, "grad_norm": 15.51583194732666, "learning_rate": 9.30187074829932e-06, "loss": 35.311, "step": 11753 }, { "epoch": 279.85970149253734, "grad_norm": 19.868854522705078, "learning_rate": 9.301020408163265e-06, "loss": 34.7399, "step": 11754 }, { "epoch": 279.88358208955225, "grad_norm": 16.499805450439453, "learning_rate": 9.30017006802721e-06, "loss": 34.7259, "step": 11755 }, { "epoch": 279.90746268656716, "grad_norm": 20.722089767456055, "learning_rate": 9.299319727891158e-06, "loss": 34.8558, "step": 11756 }, { "epoch": 279.93134328358207, "grad_norm": 15.864660263061523, "learning_rate": 9.298469387755103e-06, "loss": 34.9152, "step": 11757 }, { "epoch": 279.95522388059703, "grad_norm": 19.87424087524414, "learning_rate": 9.297619047619048e-06, "loss": 36.4404, "step": 11758 }, { "epoch": 279.97910447761194, "grad_norm": 16.940832138061523, "learning_rate": 9.296768707482993e-06, "loss": 34.991, "step": 11759 }, { "epoch": 280.0, "grad_norm": 18.82170295715332, "learning_rate": 9.29591836734694e-06, "loss": 30.1988, "step": 11760 }, { "epoch": 280.0, "step": 11760, "total_flos": 5.781277428825138e+17, "train_loss": 2.5059400242202137, "train_runtime": 25611.5463, "train_samples_per_second": 58.511, "train_steps_per_second": 0.459 }, { "epoch": 280.0238805970149, "grad_norm": 17.001306533813477, "learning_rate": 1e-05, "loss": 35.8188, "step": 11761 }, { "epoch": 280.0477611940299, "grad_norm": 219.44827270507812, "learning_rate": 9.99920634920635e-06, "loss": 39.925, "step": 11762 }, { "epoch": 280.0716417910448, "grad_norm": 118.753662109375, "learning_rate": 9.998412698412699e-06, "loss": 38.0303, "step": 11763 }, { "epoch": 280.0955223880597, "grad_norm": 56.105350494384766, "learning_rate": 9.997619047619048e-06, "loss": 37.3487, "step": 11764 }, { "epoch": 280.1194029850746, "grad_norm": 42.488067626953125, "learning_rate": 9.996825396825399e-06, "loss": 35.26, "step": 11765 }, { "epoch": 280.14328358208957, "grad_norm": 57.162506103515625, "learning_rate": 9.996031746031746e-06, "loss": 35.7255, "step": 11766 }, { "epoch": 280.1671641791045, "grad_norm": 52.685462951660156, "learning_rate": 9.995238095238095e-06, "loss": 35.8206, "step": 11767 }, { "epoch": 280.1910447761194, "grad_norm": 37.78727340698242, "learning_rate": 9.994444444444446e-06, "loss": 35.517, "step": 11768 }, { "epoch": 280.21492537313435, "grad_norm": 39.62852478027344, "learning_rate": 9.993650793650793e-06, "loss": 34.4672, "step": 11769 }, { "epoch": 280.23880597014926, "grad_norm": 32.611328125, "learning_rate": 9.992857142857144e-06, "loss": 35.009, "step": 11770 }, { "epoch": 280.26268656716417, "grad_norm": 25.22555923461914, "learning_rate": 9.992063492063493e-06, "loss": 34.7649, "step": 11771 }, { "epoch": 280.28656716417913, "grad_norm": 25.276588439941406, "learning_rate": 9.991269841269842e-06, "loss": 35.933, "step": 11772 }, { "epoch": 280.31044776119404, "grad_norm": 27.605308532714844, "learning_rate": 9.990476190476191e-06, "loss": 35.0031, "step": 11773 }, { "epoch": 280.33432835820895, "grad_norm": 24.355487823486328, "learning_rate": 9.98968253968254e-06, "loss": 34.7315, "step": 11774 }, { "epoch": 280.35820895522386, "grad_norm": 20.254823684692383, "learning_rate": 9.98888888888889e-06, "loss": 34.4345, "step": 11775 }, { "epoch": 280.3820895522388, "grad_norm": 17.66265869140625, "learning_rate": 9.988095238095239e-06, "loss": 34.0289, "step": 11776 }, { "epoch": 280.40597014925373, "grad_norm": 17.804201126098633, "learning_rate": 9.987301587301588e-06, "loss": 34.3589, "step": 11777 }, { "epoch": 280.42985074626864, "grad_norm": 16.00823974609375, "learning_rate": 9.986507936507937e-06, "loss": 35.3401, "step": 11778 }, { "epoch": 280.4537313432836, "grad_norm": 19.54131507873535, "learning_rate": 9.985714285714286e-06, "loss": 35.1887, "step": 11779 }, { "epoch": 280.4776119402985, "grad_norm": 14.041351318359375, "learning_rate": 9.984920634920637e-06, "loss": 35.2667, "step": 11780 }, { "epoch": 280.5014925373134, "grad_norm": 20.99547004699707, "learning_rate": 9.984126984126986e-06, "loss": 33.7162, "step": 11781 }, { "epoch": 280.52537313432833, "grad_norm": 18.124479293823242, "learning_rate": 9.983333333333333e-06, "loss": 34.3665, "step": 11782 }, { "epoch": 280.5492537313433, "grad_norm": 19.564178466796875, "learning_rate": 9.982539682539684e-06, "loss": 34.558, "step": 11783 }, { "epoch": 280.5731343283582, "grad_norm": 24.882999420166016, "learning_rate": 9.981746031746033e-06, "loss": 35.1123, "step": 11784 }, { "epoch": 280.5970149253731, "grad_norm": 15.504097938537598, "learning_rate": 9.980952380952382e-06, "loss": 34.8684, "step": 11785 }, { "epoch": 280.6208955223881, "grad_norm": 22.50943374633789, "learning_rate": 9.980158730158731e-06, "loss": 33.4909, "step": 11786 }, { "epoch": 280.644776119403, "grad_norm": 21.798898696899414, "learning_rate": 9.97936507936508e-06, "loss": 35.8089, "step": 11787 }, { "epoch": 280.6686567164179, "grad_norm": 19.085386276245117, "learning_rate": 9.97857142857143e-06, "loss": 35.2489, "step": 11788 }, { "epoch": 280.6925373134328, "grad_norm": 17.447267532348633, "learning_rate": 9.977777777777778e-06, "loss": 36.1361, "step": 11789 }, { "epoch": 280.7164179104478, "grad_norm": 19.983989715576172, "learning_rate": 9.976984126984128e-06, "loss": 34.7194, "step": 11790 }, { "epoch": 280.7402985074627, "grad_norm": 20.92411994934082, "learning_rate": 9.976190476190477e-06, "loss": 33.9958, "step": 11791 }, { "epoch": 280.7641791044776, "grad_norm": 14.108833312988281, "learning_rate": 9.975396825396826e-06, "loss": 34.6663, "step": 11792 }, { "epoch": 280.78805970149256, "grad_norm": 16.30893325805664, "learning_rate": 9.974603174603176e-06, "loss": 35.4368, "step": 11793 }, { "epoch": 280.81194029850747, "grad_norm": 16.229223251342773, "learning_rate": 9.973809523809524e-06, "loss": 34.9174, "step": 11794 }, { "epoch": 280.8358208955224, "grad_norm": 15.053704261779785, "learning_rate": 9.973015873015875e-06, "loss": 34.6009, "step": 11795 }, { "epoch": 280.85970149253734, "grad_norm": 19.644737243652344, "learning_rate": 9.972222222222224e-06, "loss": 34.9756, "step": 11796 }, { "epoch": 280.88358208955225, "grad_norm": NaN, "learning_rate": 9.971428571428571e-06, "loss": 59.2668, "step": 11797 }, { "epoch": 280.90746268656716, "grad_norm": 15.495433807373047, "learning_rate": 9.971428571428571e-06, "loss": 34.7684, "step": 11798 }, { "epoch": 280.93134328358207, "grad_norm": 14.9066162109375, "learning_rate": 9.970634920634922e-06, "loss": 35.7027, "step": 11799 }, { "epoch": 280.95522388059703, "grad_norm": 15.656798362731934, "learning_rate": 9.969841269841271e-06, "loss": 34.5438, "step": 11800 }, { "epoch": 280.97910447761194, "grad_norm": 26.039445877075195, "learning_rate": 9.96904761904762e-06, "loss": 34.1163, "step": 11801 }, { "epoch": 281.0, "grad_norm": 13.834368705749512, "learning_rate": 9.968253968253969e-06, "loss": 29.3649, "step": 11802 }, { "epoch": 281.0238805970149, "grad_norm": 24.26058578491211, "learning_rate": 9.967460317460318e-06, "loss": 35.4255, "step": 11803 }, { "epoch": 281.0477611940299, "grad_norm": 21.886337280273438, "learning_rate": 9.966666666666667e-06, "loss": 35.5066, "step": 11804 }, { "epoch": 281.0716417910448, "grad_norm": 17.188631057739258, "learning_rate": 9.965873015873016e-06, "loss": 35.172, "step": 11805 }, { "epoch": 281.0955223880597, "grad_norm": 26.414350509643555, "learning_rate": 9.965079365079365e-06, "loss": 33.3201, "step": 11806 }, { "epoch": 281.1194029850746, "grad_norm": 18.171688079833984, "learning_rate": 9.964285714285714e-06, "loss": 34.0477, "step": 11807 }, { "epoch": 281.14328358208957, "grad_norm": 31.080293655395508, "learning_rate": 9.963492063492064e-06, "loss": 34.5627, "step": 11808 }, { "epoch": 281.1671641791045, "grad_norm": 21.397998809814453, "learning_rate": 9.962698412698414e-06, "loss": 35.6122, "step": 11809 }, { "epoch": 281.1910447761194, "grad_norm": 23.92205047607422, "learning_rate": 9.961904761904763e-06, "loss": 34.1777, "step": 11810 }, { "epoch": 281.21492537313435, "grad_norm": 22.434926986694336, "learning_rate": 9.96111111111111e-06, "loss": 36.6884, "step": 11811 }, { "epoch": 281.23880597014926, "grad_norm": 18.58843231201172, "learning_rate": 9.960317460317462e-06, "loss": 34.3689, "step": 11812 }, { "epoch": 281.26268656716417, "grad_norm": 22.569429397583008, "learning_rate": 9.95952380952381e-06, "loss": 34.1493, "step": 11813 }, { "epoch": 281.28656716417913, "grad_norm": 18.652563095092773, "learning_rate": 9.95873015873016e-06, "loss": 35.4785, "step": 11814 }, { "epoch": 281.31044776119404, "grad_norm": 18.981735229492188, "learning_rate": 9.957936507936509e-06, "loss": 35.4806, "step": 11815 }, { "epoch": 281.33432835820895, "grad_norm": 22.05530548095703, "learning_rate": 9.957142857142858e-06, "loss": 33.4935, "step": 11816 }, { "epoch": 281.35820895522386, "grad_norm": 15.490934371948242, "learning_rate": 9.956349206349207e-06, "loss": 36.5091, "step": 11817 }, { "epoch": 281.3820895522388, "grad_norm": 29.51089096069336, "learning_rate": 9.955555555555556e-06, "loss": 35.3114, "step": 11818 }, { "epoch": 281.40597014925373, "grad_norm": 21.18665885925293, "learning_rate": 9.954761904761905e-06, "loss": 34.1899, "step": 11819 }, { "epoch": 281.42985074626864, "grad_norm": 26.58310317993164, "learning_rate": 9.953968253968254e-06, "loss": 34.4679, "step": 11820 }, { "epoch": 281.4537313432836, "grad_norm": 18.942975997924805, "learning_rate": 9.953174603174603e-06, "loss": 33.8813, "step": 11821 }, { "epoch": 281.4776119402985, "grad_norm": 20.89089012145996, "learning_rate": 9.952380952380954e-06, "loss": 35.0164, "step": 11822 }, { "epoch": 281.5014925373134, "grad_norm": 27.045583724975586, "learning_rate": 9.951587301587301e-06, "loss": 34.6906, "step": 11823 }, { "epoch": 281.52537313432833, "grad_norm": 17.110809326171875, "learning_rate": 9.950793650793652e-06, "loss": 35.6123, "step": 11824 }, { "epoch": 281.5492537313433, "grad_norm": 22.350217819213867, "learning_rate": 9.950000000000001e-06, "loss": 34.3026, "step": 11825 }, { "epoch": 281.5731343283582, "grad_norm": 19.359451293945312, "learning_rate": 9.94920634920635e-06, "loss": 34.8374, "step": 11826 }, { "epoch": 281.5970149253731, "grad_norm": 17.76999855041504, "learning_rate": 9.9484126984127e-06, "loss": 35.6175, "step": 11827 }, { "epoch": 281.6208955223881, "grad_norm": 29.642032623291016, "learning_rate": 9.947619047619049e-06, "loss": 33.6645, "step": 11828 }, { "epoch": 281.644776119403, "grad_norm": 19.500289916992188, "learning_rate": 9.946825396825398e-06, "loss": 34.8721, "step": 11829 }, { "epoch": 281.6686567164179, "grad_norm": 25.8712215423584, "learning_rate": 9.946031746031747e-06, "loss": 33.729, "step": 11830 }, { "epoch": 281.6925373134328, "grad_norm": 19.624027252197266, "learning_rate": 9.945238095238096e-06, "loss": 33.6072, "step": 11831 }, { "epoch": 281.7164179104478, "grad_norm": 30.332162857055664, "learning_rate": 9.944444444444445e-06, "loss": 35.7628, "step": 11832 }, { "epoch": 281.7402985074627, "grad_norm": 19.511499404907227, "learning_rate": 9.943650793650794e-06, "loss": 35.0269, "step": 11833 }, { "epoch": 281.7641791044776, "grad_norm": 27.628700256347656, "learning_rate": 9.942857142857145e-06, "loss": 34.9568, "step": 11834 }, { "epoch": 281.78805970149256, "grad_norm": 24.200483322143555, "learning_rate": 9.942063492063492e-06, "loss": 34.3099, "step": 11835 }, { "epoch": 281.81194029850747, "grad_norm": 19.15821647644043, "learning_rate": 9.941269841269841e-06, "loss": 35.4079, "step": 11836 }, { "epoch": 281.8358208955224, "grad_norm": 27.890596389770508, "learning_rate": 9.940476190476192e-06, "loss": 34.0082, "step": 11837 }, { "epoch": 281.85970149253734, "grad_norm": 20.02274513244629, "learning_rate": 9.939682539682541e-06, "loss": 35.1607, "step": 11838 }, { "epoch": 281.88358208955225, "grad_norm": 30.761608123779297, "learning_rate": 9.93888888888889e-06, "loss": 34.1874, "step": 11839 }, { "epoch": 281.90746268656716, "grad_norm": 20.60077667236328, "learning_rate": 9.93809523809524e-06, "loss": 34.3755, "step": 11840 }, { "epoch": 281.93134328358207, "grad_norm": 27.628490447998047, "learning_rate": 9.937301587301588e-06, "loss": 33.6654, "step": 11841 }, { "epoch": 281.95522388059703, "grad_norm": 18.82782745361328, "learning_rate": 9.936507936507937e-06, "loss": 33.9478, "step": 11842 }, { "epoch": 281.97910447761194, "grad_norm": 30.738174438476562, "learning_rate": 9.935714285714286e-06, "loss": 34.7518, "step": 11843 }, { "epoch": 282.0, "grad_norm": 17.398067474365234, "learning_rate": 9.934920634920636e-06, "loss": 29.9627, "step": 11844 }, { "epoch": 282.0238805970149, "grad_norm": 31.6385498046875, "learning_rate": 9.934126984126985e-06, "loss": 34.7136, "step": 11845 }, { "epoch": 282.0477611940299, "grad_norm": 18.935577392578125, "learning_rate": 9.933333333333334e-06, "loss": 34.1334, "step": 11846 }, { "epoch": 282.0716417910448, "grad_norm": 26.17972183227539, "learning_rate": 9.932539682539684e-06, "loss": 34.052, "step": 11847 }, { "epoch": 282.0955223880597, "grad_norm": 20.300334930419922, "learning_rate": 9.931746031746032e-06, "loss": 33.9338, "step": 11848 }, { "epoch": 282.1194029850746, "grad_norm": 22.307647705078125, "learning_rate": 9.930952380952383e-06, "loss": 34.0204, "step": 11849 }, { "epoch": 282.14328358208957, "grad_norm": 23.589879989624023, "learning_rate": 9.930158730158732e-06, "loss": 34.595, "step": 11850 }, { "epoch": 282.1671641791045, "grad_norm": 19.313642501831055, "learning_rate": 9.929365079365079e-06, "loss": 34.9566, "step": 11851 }, { "epoch": 282.1910447761194, "grad_norm": 33.995906829833984, "learning_rate": 9.92857142857143e-06, "loss": 35.4147, "step": 11852 }, { "epoch": 282.21492537313435, "grad_norm": 21.858556747436523, "learning_rate": 9.927777777777779e-06, "loss": 35.2846, "step": 11853 }, { "epoch": 282.23880597014926, "grad_norm": 32.75117874145508, "learning_rate": 9.926984126984128e-06, "loss": 34.3541, "step": 11854 }, { "epoch": 282.26268656716417, "grad_norm": 22.43109893798828, "learning_rate": 9.926190476190477e-06, "loss": 34.3842, "step": 11855 }, { "epoch": 282.28656716417913, "grad_norm": 40.62053298950195, "learning_rate": 9.925396825396826e-06, "loss": 34.3303, "step": 11856 }, { "epoch": 282.31044776119404, "grad_norm": 33.663394927978516, "learning_rate": 9.924603174603175e-06, "loss": 35.9298, "step": 11857 }, { "epoch": 282.33432835820895, "grad_norm": 30.64558982849121, "learning_rate": 9.923809523809524e-06, "loss": 34.546, "step": 11858 }, { "epoch": 282.35820895522386, "grad_norm": 29.543611526489258, "learning_rate": 9.923015873015875e-06, "loss": 34.5038, "step": 11859 }, { "epoch": 282.3820895522388, "grad_norm": 29.16376495361328, "learning_rate": 9.922222222222222e-06, "loss": 35.8454, "step": 11860 }, { "epoch": 282.40597014925373, "grad_norm": 22.400897979736328, "learning_rate": 9.921428571428572e-06, "loss": 33.9821, "step": 11861 }, { "epoch": 282.42985074626864, "grad_norm": 33.786170959472656, "learning_rate": 9.920634920634922e-06, "loss": 35.0368, "step": 11862 }, { "epoch": 282.4537313432836, "grad_norm": 29.242387771606445, "learning_rate": 9.91984126984127e-06, "loss": 34.7123, "step": 11863 }, { "epoch": 282.4776119402985, "grad_norm": 33.90926742553711, "learning_rate": 9.91904761904762e-06, "loss": 35.1735, "step": 11864 }, { "epoch": 282.5014925373134, "grad_norm": 29.547443389892578, "learning_rate": 9.91825396825397e-06, "loss": 33.1211, "step": 11865 }, { "epoch": 282.52537313432833, "grad_norm": 28.972431182861328, "learning_rate": 9.917460317460319e-06, "loss": 34.5466, "step": 11866 }, { "epoch": 282.5492537313433, "grad_norm": 24.426488876342773, "learning_rate": 9.916666666666668e-06, "loss": 34.1576, "step": 11867 }, { "epoch": 282.5731343283582, "grad_norm": 30.063594818115234, "learning_rate": 9.915873015873017e-06, "loss": 33.7799, "step": 11868 }, { "epoch": 282.5970149253731, "grad_norm": 27.062620162963867, "learning_rate": 9.915079365079366e-06, "loss": 34.6464, "step": 11869 }, { "epoch": 282.6208955223881, "grad_norm": 35.88241195678711, "learning_rate": 9.914285714285715e-06, "loss": 34.5536, "step": 11870 }, { "epoch": 282.644776119403, "grad_norm": 30.721355438232422, "learning_rate": 9.913492063492064e-06, "loss": 35.2244, "step": 11871 }, { "epoch": 282.6686567164179, "grad_norm": 26.038280487060547, "learning_rate": 9.912698412698413e-06, "loss": 34.6394, "step": 11872 }, { "epoch": 282.6925373134328, "grad_norm": 23.411800384521484, "learning_rate": 9.911904761904762e-06, "loss": 34.1463, "step": 11873 }, { "epoch": 282.7164179104478, "grad_norm": 34.48252868652344, "learning_rate": 9.911111111111113e-06, "loss": 35.2108, "step": 11874 }, { "epoch": 282.7402985074627, "grad_norm": 26.923267364501953, "learning_rate": 9.910317460317462e-06, "loss": 35.5689, "step": 11875 }, { "epoch": 282.7641791044776, "grad_norm": 32.60765075683594, "learning_rate": 9.90952380952381e-06, "loss": 35.3503, "step": 11876 }, { "epoch": 282.78805970149256, "grad_norm": 29.128307342529297, "learning_rate": 9.90873015873016e-06, "loss": 35.5577, "step": 11877 }, { "epoch": 282.81194029850747, "grad_norm": 26.722400665283203, "learning_rate": 9.90793650793651e-06, "loss": 33.9117, "step": 11878 }, { "epoch": 282.8358208955224, "grad_norm": 24.377363204956055, "learning_rate": 9.907142857142858e-06, "loss": 34.1131, "step": 11879 }, { "epoch": 282.85970149253734, "grad_norm": 31.27817153930664, "learning_rate": 9.906349206349207e-06, "loss": 34.8682, "step": 11880 }, { "epoch": 282.88358208955225, "grad_norm": 25.59556770324707, "learning_rate": 9.905555555555557e-06, "loss": 34.4288, "step": 11881 }, { "epoch": 282.90746268656716, "grad_norm": 31.731334686279297, "learning_rate": 9.904761904761906e-06, "loss": 36.0353, "step": 11882 }, { "epoch": 282.93134328358207, "grad_norm": 27.592634201049805, "learning_rate": 9.903968253968255e-06, "loss": 33.5116, "step": 11883 }, { "epoch": 282.95522388059703, "grad_norm": 30.299158096313477, "learning_rate": 9.903174603174604e-06, "loss": 34.5389, "step": 11884 }, { "epoch": 282.97910447761194, "grad_norm": 25.82707405090332, "learning_rate": 9.902380952380953e-06, "loss": 35.3144, "step": 11885 }, { "epoch": 283.0, "grad_norm": 26.001951217651367, "learning_rate": 9.901587301587302e-06, "loss": 29.8663, "step": 11886 }, { "epoch": 283.0238805970149, "grad_norm": 26.985275268554688, "learning_rate": 9.900793650793653e-06, "loss": 34.5218, "step": 11887 }, { "epoch": 283.0477611940299, "grad_norm": 27.25253677368164, "learning_rate": 9.9e-06, "loss": 34.0899, "step": 11888 }, { "epoch": 283.0716417910448, "grad_norm": 27.99782943725586, "learning_rate": 9.89920634920635e-06, "loss": 34.6255, "step": 11889 }, { "epoch": 283.0955223880597, "grad_norm": 31.501266479492188, "learning_rate": 9.8984126984127e-06, "loss": 34.5021, "step": 11890 }, { "epoch": 283.1194029850746, "grad_norm": 25.399730682373047, "learning_rate": 9.897619047619047e-06, "loss": 35.1958, "step": 11891 }, { "epoch": 283.14328358208957, "grad_norm": 33.786006927490234, "learning_rate": 9.896825396825398e-06, "loss": 35.4105, "step": 11892 }, { "epoch": 283.1671641791045, "grad_norm": 27.10110092163086, "learning_rate": 9.896031746031747e-06, "loss": 34.4679, "step": 11893 }, { "epoch": 283.1910447761194, "grad_norm": 30.048660278320312, "learning_rate": 9.895238095238096e-06, "loss": 34.3342, "step": 11894 }, { "epoch": 283.21492537313435, "grad_norm": 28.051061630249023, "learning_rate": 9.894444444444445e-06, "loss": 33.9177, "step": 11895 }, { "epoch": 283.23880597014926, "grad_norm": 24.222515106201172, "learning_rate": 9.893650793650794e-06, "loss": 34.614, "step": 11896 }, { "epoch": 283.26268656716417, "grad_norm": 22.9919490814209, "learning_rate": 9.892857142857143e-06, "loss": 35.0559, "step": 11897 }, { "epoch": 283.28656716417913, "grad_norm": 24.69420051574707, "learning_rate": 9.892063492063493e-06, "loss": 34.9936, "step": 11898 }, { "epoch": 283.31044776119404, "grad_norm": 21.76444435119629, "learning_rate": 9.891269841269842e-06, "loss": 34.3498, "step": 11899 }, { "epoch": 283.33432835820895, "grad_norm": 29.178211212158203, "learning_rate": 9.89047619047619e-06, "loss": 35.2396, "step": 11900 }, { "epoch": 283.35820895522386, "grad_norm": 24.143978118896484, "learning_rate": 9.88968253968254e-06, "loss": 34.9166, "step": 11901 }, { "epoch": 283.3820895522388, "grad_norm": 30.864849090576172, "learning_rate": 9.88888888888889e-06, "loss": 33.5803, "step": 11902 }, { "epoch": 283.40597014925373, "grad_norm": 23.996366500854492, "learning_rate": 9.88809523809524e-06, "loss": 35.4132, "step": 11903 }, { "epoch": 283.42985074626864, "grad_norm": 28.13095474243164, "learning_rate": 9.887301587301587e-06, "loss": 33.4004, "step": 11904 }, { "epoch": 283.4537313432836, "grad_norm": 24.495094299316406, "learning_rate": 9.886507936507938e-06, "loss": 35.3223, "step": 11905 }, { "epoch": 283.4776119402985, "grad_norm": 25.5897216796875, "learning_rate": 9.885714285714287e-06, "loss": 34.8353, "step": 11906 }, { "epoch": 283.5014925373134, "grad_norm": 19.90247344970703, "learning_rate": 9.884920634920636e-06, "loss": 35.1199, "step": 11907 }, { "epoch": 283.52537313432833, "grad_norm": 27.842309951782227, "learning_rate": 9.884126984126985e-06, "loss": 35.7175, "step": 11908 }, { "epoch": 283.5492537313433, "grad_norm": 22.010128021240234, "learning_rate": 9.883333333333334e-06, "loss": 34.0129, "step": 11909 }, { "epoch": 283.5731343283582, "grad_norm": 29.78243637084961, "learning_rate": 9.882539682539683e-06, "loss": 35.3333, "step": 11910 }, { "epoch": 283.5970149253731, "grad_norm": 25.89198112487793, "learning_rate": 9.881746031746032e-06, "loss": 33.2787, "step": 11911 }, { "epoch": 283.6208955223881, "grad_norm": 25.66374969482422, "learning_rate": 9.880952380952381e-06, "loss": 35.565, "step": 11912 }, { "epoch": 283.644776119403, "grad_norm": 23.362863540649414, "learning_rate": 9.88015873015873e-06, "loss": 32.2217, "step": 11913 }, { "epoch": 283.6686567164179, "grad_norm": 23.376296997070312, "learning_rate": 9.87936507936508e-06, "loss": 35.0079, "step": 11914 }, { "epoch": 283.6925373134328, "grad_norm": 21.015687942504883, "learning_rate": 9.87857142857143e-06, "loss": 34.5278, "step": 11915 }, { "epoch": 283.7164179104478, "grad_norm": 26.780033111572266, "learning_rate": 9.877777777777778e-06, "loss": 35.5065, "step": 11916 }, { "epoch": 283.7402985074627, "grad_norm": 21.850831985473633, "learning_rate": 9.876984126984128e-06, "loss": 34.1275, "step": 11917 }, { "epoch": 283.7641791044776, "grad_norm": 24.24028778076172, "learning_rate": 9.876190476190478e-06, "loss": 33.9131, "step": 11918 }, { "epoch": 283.78805970149256, "grad_norm": 20.287803649902344, "learning_rate": 9.875396825396825e-06, "loss": 34.0144, "step": 11919 }, { "epoch": 283.81194029850747, "grad_norm": 25.329547882080078, "learning_rate": 9.874603174603176e-06, "loss": 34.1357, "step": 11920 }, { "epoch": 283.8358208955224, "grad_norm": 23.08452033996582, "learning_rate": 9.873809523809525e-06, "loss": 35.355, "step": 11921 }, { "epoch": 283.85970149253734, "grad_norm": 23.316389083862305, "learning_rate": 9.873015873015874e-06, "loss": 35.2747, "step": 11922 }, { "epoch": 283.88358208955225, "grad_norm": 20.081113815307617, "learning_rate": 9.872222222222223e-06, "loss": 35.2304, "step": 11923 }, { "epoch": 283.90746268656716, "grad_norm": 23.772554397583008, "learning_rate": 9.871428571428572e-06, "loss": 34.5178, "step": 11924 }, { "epoch": 283.93134328358207, "grad_norm": 21.754993438720703, "learning_rate": 9.870634920634921e-06, "loss": 35.8339, "step": 11925 }, { "epoch": 283.95522388059703, "grad_norm": 21.76508331298828, "learning_rate": 9.86984126984127e-06, "loss": 33.1081, "step": 11926 }, { "epoch": 283.97910447761194, "grad_norm": 19.88758659362793, "learning_rate": 9.869047619047621e-06, "loss": 34.8188, "step": 11927 }, { "epoch": 284.0, "grad_norm": 21.30535125732422, "learning_rate": 9.868253968253968e-06, "loss": 30.312, "step": 11928 }, { "epoch": 284.0238805970149, "grad_norm": 22.3480224609375, "learning_rate": 9.867460317460317e-06, "loss": 34.8848, "step": 11929 }, { "epoch": 284.0477611940299, "grad_norm": 20.421735763549805, "learning_rate": 9.866666666666668e-06, "loss": 34.2704, "step": 11930 }, { "epoch": 284.0716417910448, "grad_norm": 20.93566131591797, "learning_rate": 9.865873015873017e-06, "loss": 35.1359, "step": 11931 }, { "epoch": 284.0955223880597, "grad_norm": 22.134910583496094, "learning_rate": 9.865079365079366e-06, "loss": 34.983, "step": 11932 }, { "epoch": 284.1194029850746, "grad_norm": 19.56775665283203, "learning_rate": 9.864285714285715e-06, "loss": 35.5462, "step": 11933 }, { "epoch": 284.14328358208957, "grad_norm": 20.169052124023438, "learning_rate": 9.863492063492065e-06, "loss": 33.3168, "step": 11934 }, { "epoch": 284.1671641791045, "grad_norm": 17.79511070251465, "learning_rate": 9.862698412698414e-06, "loss": 34.0852, "step": 11935 }, { "epoch": 284.1910447761194, "grad_norm": 22.726470947265625, "learning_rate": 9.861904761904763e-06, "loss": 34.2328, "step": 11936 }, { "epoch": 284.21492537313435, "grad_norm": 20.804378509521484, "learning_rate": 9.861111111111112e-06, "loss": 34.3468, "step": 11937 }, { "epoch": 284.23880597014926, "grad_norm": 19.055212020874023, "learning_rate": 9.86031746031746e-06, "loss": 34.7014, "step": 11938 }, { "epoch": 284.26268656716417, "grad_norm": 20.315317153930664, "learning_rate": 9.85952380952381e-06, "loss": 34.5101, "step": 11939 }, { "epoch": 284.28656716417913, "grad_norm": 16.841644287109375, "learning_rate": 9.858730158730159e-06, "loss": 33.8631, "step": 11940 }, { "epoch": 284.31044776119404, "grad_norm": 20.81892967224121, "learning_rate": 9.857936507936508e-06, "loss": 33.777, "step": 11941 }, { "epoch": 284.33432835820895, "grad_norm": 20.917980194091797, "learning_rate": 9.857142857142859e-06, "loss": 35.4712, "step": 11942 }, { "epoch": 284.35820895522386, "grad_norm": 18.239110946655273, "learning_rate": 9.856349206349208e-06, "loss": 34.7417, "step": 11943 }, { "epoch": 284.3820895522388, "grad_norm": 18.71514320373535, "learning_rate": 9.855555555555555e-06, "loss": 35.0344, "step": 11944 }, { "epoch": 284.40597014925373, "grad_norm": 16.92817497253418, "learning_rate": 9.854761904761906e-06, "loss": 35.4628, "step": 11945 }, { "epoch": 284.42985074626864, "grad_norm": 17.445419311523438, "learning_rate": 9.853968253968255e-06, "loss": 34.7541, "step": 11946 }, { "epoch": 284.4537313432836, "grad_norm": 12.863101959228516, "learning_rate": 9.853174603174604e-06, "loss": 34.9531, "step": 11947 }, { "epoch": 284.4776119402985, "grad_norm": 18.064464569091797, "learning_rate": 9.852380952380953e-06, "loss": 34.7739, "step": 11948 }, { "epoch": 284.5014925373134, "grad_norm": 18.38424301147461, "learning_rate": 9.851587301587302e-06, "loss": 34.4741, "step": 11949 }, { "epoch": 284.52537313432833, "grad_norm": 18.44971466064453, "learning_rate": 9.850793650793651e-06, "loss": 34.9774, "step": 11950 }, { "epoch": 284.5492537313433, "grad_norm": 15.091837882995605, "learning_rate": 9.85e-06, "loss": 34.9954, "step": 11951 }, { "epoch": 284.5731343283582, "grad_norm": 19.42574119567871, "learning_rate": 9.849206349206351e-06, "loss": 34.1234, "step": 11952 }, { "epoch": 284.5970149253731, "grad_norm": 19.454084396362305, "learning_rate": 9.848412698412699e-06, "loss": 34.8341, "step": 11953 }, { "epoch": 284.6208955223881, "grad_norm": 17.355350494384766, "learning_rate": 9.847619047619048e-06, "loss": 34.5434, "step": 11954 }, { "epoch": 284.644776119403, "grad_norm": 16.38776969909668, "learning_rate": 9.846825396825399e-06, "loss": 33.9971, "step": 11955 }, { "epoch": 284.6686567164179, "grad_norm": 17.23564338684082, "learning_rate": 9.846031746031746e-06, "loss": 35.3131, "step": 11956 }, { "epoch": 284.6925373134328, "grad_norm": 15.807302474975586, "learning_rate": 9.845238095238097e-06, "loss": 33.4787, "step": 11957 }, { "epoch": 284.7164179104478, "grad_norm": 19.17428207397461, "learning_rate": 9.844444444444446e-06, "loss": 34.3309, "step": 11958 }, { "epoch": 284.7402985074627, "grad_norm": 18.40250015258789, "learning_rate": 9.843650793650795e-06, "loss": 34.6634, "step": 11959 }, { "epoch": 284.7641791044776, "grad_norm": 18.37040138244629, "learning_rate": 9.842857142857144e-06, "loss": 34.5341, "step": 11960 }, { "epoch": 284.78805970149256, "grad_norm": 15.528295516967773, "learning_rate": 9.842063492063493e-06, "loss": 35.147, "step": 11961 }, { "epoch": 284.81194029850747, "grad_norm": 19.036741256713867, "learning_rate": 9.841269841269842e-06, "loss": 33.4691, "step": 11962 }, { "epoch": 284.8358208955224, "grad_norm": 17.90913200378418, "learning_rate": 9.840476190476191e-06, "loss": 34.1951, "step": 11963 }, { "epoch": 284.85970149253734, "grad_norm": 21.4915828704834, "learning_rate": 9.83968253968254e-06, "loss": 35.1062, "step": 11964 }, { "epoch": 284.88358208955225, "grad_norm": 16.04237174987793, "learning_rate": 9.83888888888889e-06, "loss": 33.4108, "step": 11965 }, { "epoch": 284.90746268656716, "grad_norm": 19.474821090698242, "learning_rate": 9.838095238095238e-06, "loss": 34.4099, "step": 11966 }, { "epoch": 284.93134328358207, "grad_norm": 20.65281105041504, "learning_rate": 9.837301587301588e-06, "loss": 34.8545, "step": 11967 }, { "epoch": 284.95522388059703, "grad_norm": 16.95366096496582, "learning_rate": 9.836507936507937e-06, "loss": 34.9288, "step": 11968 }, { "epoch": 284.97910447761194, "grad_norm": 17.942453384399414, "learning_rate": 9.835714285714286e-06, "loss": 36.3266, "step": 11969 }, { "epoch": 285.0, "grad_norm": 18.859390258789062, "learning_rate": 9.834920634920636e-06, "loss": 30.4459, "step": 11970 }, { "epoch": 285.0238805970149, "grad_norm": 16.060823440551758, "learning_rate": 9.834126984126986e-06, "loss": 34.7412, "step": 11971 }, { "epoch": 285.0477611940299, "grad_norm": 22.028331756591797, "learning_rate": 9.833333333333333e-06, "loss": 33.4896, "step": 11972 }, { "epoch": 285.0716417910448, "grad_norm": 24.3582820892334, "learning_rate": 9.832539682539684e-06, "loss": 34.7248, "step": 11973 }, { "epoch": 285.0955223880597, "grad_norm": 17.361093521118164, "learning_rate": 9.831746031746033e-06, "loss": 35.0884, "step": 11974 }, { "epoch": 285.1194029850746, "grad_norm": 21.988372802734375, "learning_rate": 9.830952380952382e-06, "loss": 35.6549, "step": 11975 }, { "epoch": 285.14328358208957, "grad_norm": 19.694623947143555, "learning_rate": 9.830158730158731e-06, "loss": 34.1964, "step": 11976 }, { "epoch": 285.1671641791045, "grad_norm": 16.763029098510742, "learning_rate": 9.82936507936508e-06, "loss": 33.226, "step": 11977 }, { "epoch": 285.1910447761194, "grad_norm": 18.90792465209961, "learning_rate": 9.828571428571429e-06, "loss": 35.8647, "step": 11978 }, { "epoch": 285.21492537313435, "grad_norm": 21.621692657470703, "learning_rate": 9.827777777777778e-06, "loss": 35.1498, "step": 11979 }, { "epoch": 285.23880597014926, "grad_norm": 15.991765975952148, "learning_rate": 9.826984126984129e-06, "loss": 34.9035, "step": 11980 }, { "epoch": 285.26268656716417, "grad_norm": 18.409725189208984, "learning_rate": 9.826190476190476e-06, "loss": 34.2189, "step": 11981 }, { "epoch": 285.28656716417913, "grad_norm": 23.432588577270508, "learning_rate": 9.825396825396825e-06, "loss": 35.0388, "step": 11982 }, { "epoch": 285.31044776119404, "grad_norm": 16.48472785949707, "learning_rate": 9.824603174603176e-06, "loss": 34.3214, "step": 11983 }, { "epoch": 285.33432835820895, "grad_norm": 25.051328659057617, "learning_rate": 9.823809523809524e-06, "loss": 34.3483, "step": 11984 }, { "epoch": 285.35820895522386, "grad_norm": 18.279516220092773, "learning_rate": 9.823015873015874e-06, "loss": 34.3283, "step": 11985 }, { "epoch": 285.3820895522388, "grad_norm": 18.019378662109375, "learning_rate": 9.822222222222223e-06, "loss": 34.4974, "step": 11986 }, { "epoch": 285.40597014925373, "grad_norm": NaN, "learning_rate": 9.821428571428573e-06, "loss": 48.2549, "step": 11987 }, { "epoch": 285.42985074626864, "grad_norm": 22.311250686645508, "learning_rate": 9.821428571428573e-06, "loss": 34.9845, "step": 11988 }, { "epoch": 285.4537313432836, "grad_norm": 19.11435890197754, "learning_rate": 9.820634920634922e-06, "loss": 34.9478, "step": 11989 }, { "epoch": 285.4776119402985, "grad_norm": 14.488481521606445, "learning_rate": 9.81984126984127e-06, "loss": 34.9031, "step": 11990 }, { "epoch": 285.5014925373134, "grad_norm": 21.968503952026367, "learning_rate": 9.81904761904762e-06, "loss": 34.9046, "step": 11991 }, { "epoch": 285.52537313432833, "grad_norm": 20.652868270874023, "learning_rate": 9.818253968253969e-06, "loss": 34.0136, "step": 11992 }, { "epoch": 285.5492537313433, "grad_norm": 15.696794509887695, "learning_rate": 9.817460317460318e-06, "loss": 33.358, "step": 11993 }, { "epoch": 285.5731343283582, "grad_norm": 25.74132537841797, "learning_rate": 9.816666666666667e-06, "loss": 36.0652, "step": 11994 }, { "epoch": 285.5970149253731, "grad_norm": 15.65168285369873, "learning_rate": 9.815873015873016e-06, "loss": 33.8315, "step": 11995 }, { "epoch": 285.6208955223881, "grad_norm": 20.603830337524414, "learning_rate": 9.815079365079367e-06, "loss": 35.1069, "step": 11996 }, { "epoch": 285.644776119403, "grad_norm": 21.205408096313477, "learning_rate": 9.814285714285716e-06, "loss": 34.5993, "step": 11997 }, { "epoch": 285.6686567164179, "grad_norm": 14.209935188293457, "learning_rate": 9.813492063492063e-06, "loss": 34.3532, "step": 11998 }, { "epoch": 285.6925373134328, "grad_norm": 19.43562126159668, "learning_rate": 9.812698412698414e-06, "loss": 34.0042, "step": 11999 }, { "epoch": 285.7164179104478, "grad_norm": 24.769792556762695, "learning_rate": 9.811904761904763e-06, "loss": 35.1111, "step": 12000 }, { "epoch": 285.7402985074627, "grad_norm": 14.365962028503418, "learning_rate": 9.811111111111112e-06, "loss": 34.8156, "step": 12001 }, { "epoch": 285.7641791044776, "grad_norm": 21.0401554107666, "learning_rate": 9.810317460317461e-06, "loss": 34.4963, "step": 12002 }, { "epoch": 285.78805970149256, "grad_norm": 18.862346649169922, "learning_rate": 9.80952380952381e-06, "loss": 34.5222, "step": 12003 }, { "epoch": 285.81194029850747, "grad_norm": 15.13908576965332, "learning_rate": 9.80873015873016e-06, "loss": 33.9945, "step": 12004 }, { "epoch": 285.8358208955224, "grad_norm": 18.187814712524414, "learning_rate": 9.807936507936509e-06, "loss": 33.7722, "step": 12005 }, { "epoch": 285.85970149253734, "grad_norm": 21.272560119628906, "learning_rate": 9.807142857142858e-06, "loss": 35.3827, "step": 12006 }, { "epoch": 285.88358208955225, "grad_norm": 15.041167259216309, "learning_rate": 9.806349206349207e-06, "loss": 34.2868, "step": 12007 }, { "epoch": 285.90746268656716, "grad_norm": 16.316268920898438, "learning_rate": 9.805555555555556e-06, "loss": 34.7403, "step": 12008 }, { "epoch": 285.93134328358207, "grad_norm": 17.840290069580078, "learning_rate": 9.804761904761907e-06, "loss": 34.6549, "step": 12009 }, { "epoch": 285.95522388059703, "grad_norm": 17.38083839416504, "learning_rate": 9.803968253968254e-06, "loss": 34.7757, "step": 12010 }, { "epoch": 285.97910447761194, "grad_norm": 16.983545303344727, "learning_rate": 9.803174603174605e-06, "loss": 33.8752, "step": 12011 }, { "epoch": 286.0, "grad_norm": 13.664655685424805, "learning_rate": 9.802380952380954e-06, "loss": 29.3307, "step": 12012 }, { "epoch": 286.0238805970149, "grad_norm": 21.884868621826172, "learning_rate": 9.801587301587301e-06, "loss": 35.0974, "step": 12013 }, { "epoch": 286.0477611940299, "grad_norm": 15.397043228149414, "learning_rate": 9.800793650793652e-06, "loss": 35.2422, "step": 12014 }, { "epoch": 286.0716417910448, "grad_norm": 20.124658584594727, "learning_rate": 9.800000000000001e-06, "loss": 33.7555, "step": 12015 }, { "epoch": 286.0955223880597, "grad_norm": NaN, "learning_rate": 9.79920634920635e-06, "loss": 32.9235, "step": 12016 }, { "epoch": 286.1194029850746, "grad_norm": 19.885358810424805, "learning_rate": 9.79920634920635e-06, "loss": 34.9683, "step": 12017 }, { "epoch": 286.14328358208957, "grad_norm": 15.506340980529785, "learning_rate": 9.7984126984127e-06, "loss": 34.3529, "step": 12018 }, { "epoch": 286.1671641791045, "grad_norm": 19.64795684814453, "learning_rate": 9.797619047619048e-06, "loss": 34.3434, "step": 12019 }, { "epoch": 286.1910447761194, "grad_norm": 14.998760223388672, "learning_rate": 9.796825396825397e-06, "loss": 35.0393, "step": 12020 }, { "epoch": 286.21492537313435, "grad_norm": 19.91847801208496, "learning_rate": 9.796031746031746e-06, "loss": 35.0371, "step": 12021 }, { "epoch": 286.23880597014926, "grad_norm": 16.710060119628906, "learning_rate": 9.795238095238097e-06, "loss": 33.9395, "step": 12022 }, { "epoch": 286.26268656716417, "grad_norm": 17.559694290161133, "learning_rate": 9.794444444444445e-06, "loss": 32.8833, "step": 12023 }, { "epoch": 286.28656716417913, "grad_norm": 23.84259796142578, "learning_rate": 9.793650793650794e-06, "loss": 36.1526, "step": 12024 }, { "epoch": 286.31044776119404, "grad_norm": 17.985380172729492, "learning_rate": 9.792857142857144e-06, "loss": 34.1914, "step": 12025 }, { "epoch": 286.33432835820895, "grad_norm": 16.1311092376709, "learning_rate": 9.792063492063494e-06, "loss": 34.7507, "step": 12026 }, { "epoch": 286.35820895522386, "grad_norm": 14.830122947692871, "learning_rate": 9.791269841269843e-06, "loss": 35.132, "step": 12027 }, { "epoch": 286.3820895522388, "grad_norm": 19.325284957885742, "learning_rate": 9.790476190476192e-06, "loss": 35.1478, "step": 12028 }, { "epoch": 286.40597014925373, "grad_norm": 19.017011642456055, "learning_rate": 9.78968253968254e-06, "loss": 34.5581, "step": 12029 }, { "epoch": 286.42985074626864, "grad_norm": 16.702796936035156, "learning_rate": 9.78888888888889e-06, "loss": 34.5856, "step": 12030 }, { "epoch": 286.4537313432836, "grad_norm": 16.359254837036133, "learning_rate": 9.788095238095239e-06, "loss": 35.202, "step": 12031 }, { "epoch": 286.4776119402985, "grad_norm": 17.346094131469727, "learning_rate": 9.787301587301588e-06, "loss": 34.7018, "step": 12032 }, { "epoch": 286.5014925373134, "grad_norm": 15.318071365356445, "learning_rate": 9.786507936507937e-06, "loss": 33.0346, "step": 12033 }, { "epoch": 286.52537313432833, "grad_norm": 14.456928253173828, "learning_rate": 9.785714285714286e-06, "loss": 35.2802, "step": 12034 }, { "epoch": 286.5492537313433, "grad_norm": 14.843634605407715, "learning_rate": 9.784920634920635e-06, "loss": 35.4394, "step": 12035 }, { "epoch": 286.5731343283582, "grad_norm": 15.709205627441406, "learning_rate": 9.784126984126984e-06, "loss": 33.557, "step": 12036 }, { "epoch": 286.5970149253731, "grad_norm": 17.61166000366211, "learning_rate": 9.783333333333335e-06, "loss": 35.2218, "step": 12037 }, { "epoch": 286.6208955223881, "grad_norm": 16.26697540283203, "learning_rate": 9.782539682539684e-06, "loss": 34.1617, "step": 12038 }, { "epoch": 286.644776119403, "grad_norm": 19.526288986206055, "learning_rate": 9.781746031746032e-06, "loss": 34.5198, "step": 12039 }, { "epoch": 286.6686567164179, "grad_norm": 18.525033950805664, "learning_rate": 9.780952380952382e-06, "loss": 34.8916, "step": 12040 }, { "epoch": 286.6925373134328, "grad_norm": 14.802464485168457, "learning_rate": 9.780158730158731e-06, "loss": 34.2814, "step": 12041 }, { "epoch": 286.7164179104478, "grad_norm": 13.708857536315918, "learning_rate": 9.779365079365079e-06, "loss": 34.0722, "step": 12042 }, { "epoch": 286.7402985074627, "grad_norm": 19.151531219482422, "learning_rate": 9.77857142857143e-06, "loss": 34.6693, "step": 12043 }, { "epoch": 286.7641791044776, "grad_norm": 19.884967803955078, "learning_rate": 9.777777777777779e-06, "loss": 36.2617, "step": 12044 }, { "epoch": 286.78805970149256, "grad_norm": 17.904062271118164, "learning_rate": 9.776984126984128e-06, "loss": 34.9496, "step": 12045 }, { "epoch": 286.81194029850747, "grad_norm": 15.217409133911133, "learning_rate": 9.776190476190477e-06, "loss": 32.5331, "step": 12046 }, { "epoch": 286.8358208955224, "grad_norm": 17.902698516845703, "learning_rate": 9.775396825396826e-06, "loss": 34.9822, "step": 12047 }, { "epoch": 286.85970149253734, "grad_norm": 16.96517562866211, "learning_rate": 9.774603174603175e-06, "loss": 34.4241, "step": 12048 }, { "epoch": 286.88358208955225, "grad_norm": 19.24843406677246, "learning_rate": 9.773809523809524e-06, "loss": 34.2901, "step": 12049 }, { "epoch": 286.90746268656716, "grad_norm": 17.964000701904297, "learning_rate": 9.773015873015875e-06, "loss": 34.1566, "step": 12050 }, { "epoch": 286.93134328358207, "grad_norm": 13.353113174438477, "learning_rate": 9.772222222222222e-06, "loss": 33.1719, "step": 12051 }, { "epoch": 286.95522388059703, "grad_norm": 14.79580307006836, "learning_rate": 9.771428571428571e-06, "loss": 34.417, "step": 12052 }, { "epoch": 286.97910447761194, "grad_norm": 19.75585174560547, "learning_rate": 9.770634920634922e-06, "loss": 34.1401, "step": 12053 }, { "epoch": 287.0, "grad_norm": 19.62725257873535, "learning_rate": 9.769841269841271e-06, "loss": 30.4209, "step": 12054 }, { "epoch": 287.0238805970149, "grad_norm": 16.045976638793945, "learning_rate": 9.76904761904762e-06, "loss": 34.3797, "step": 12055 }, { "epoch": 287.0477611940299, "grad_norm": 16.418935775756836, "learning_rate": 9.76825396825397e-06, "loss": 34.0024, "step": 12056 }, { "epoch": 287.0716417910448, "grad_norm": 23.848142623901367, "learning_rate": 9.767460317460318e-06, "loss": 34.7997, "step": 12057 }, { "epoch": 287.0955223880597, "grad_norm": 20.17426300048828, "learning_rate": 9.766666666666667e-06, "loss": 33.0639, "step": 12058 }, { "epoch": 287.1194029850746, "grad_norm": 14.178504943847656, "learning_rate": 9.765873015873017e-06, "loss": 34.2935, "step": 12059 }, { "epoch": 287.14328358208957, "grad_norm": 18.32088851928711, "learning_rate": 9.765079365079366e-06, "loss": 34.6321, "step": 12060 }, { "epoch": 287.1671641791045, "grad_norm": 17.045101165771484, "learning_rate": 9.764285714285715e-06, "loss": 35.5111, "step": 12061 }, { "epoch": 287.1910447761194, "grad_norm": 19.639179229736328, "learning_rate": 9.763492063492064e-06, "loss": 34.5451, "step": 12062 }, { "epoch": 287.21492537313435, "grad_norm": 18.215702056884766, "learning_rate": 9.762698412698413e-06, "loss": 34.8495, "step": 12063 }, { "epoch": 287.23880597014926, "grad_norm": 16.4918270111084, "learning_rate": 9.761904761904762e-06, "loss": 33.7022, "step": 12064 }, { "epoch": 287.26268656716417, "grad_norm": 18.707651138305664, "learning_rate": 9.761111111111113e-06, "loss": 34.4524, "step": 12065 }, { "epoch": 287.28656716417913, "grad_norm": 14.767991065979004, "learning_rate": 9.760317460317462e-06, "loss": 34.8725, "step": 12066 }, { "epoch": 287.31044776119404, "grad_norm": 23.754541397094727, "learning_rate": 9.75952380952381e-06, "loss": 34.5065, "step": 12067 }, { "epoch": 287.33432835820895, "grad_norm": 16.943313598632812, "learning_rate": 9.75873015873016e-06, "loss": 33.6943, "step": 12068 }, { "epoch": 287.35820895522386, "grad_norm": 20.705615997314453, "learning_rate": 9.757936507936509e-06, "loss": 34.9901, "step": 12069 }, { "epoch": 287.3820895522388, "grad_norm": 16.885560989379883, "learning_rate": 9.757142857142858e-06, "loss": 33.9538, "step": 12070 }, { "epoch": 287.40597014925373, "grad_norm": 16.602771759033203, "learning_rate": 9.756349206349207e-06, "loss": 34.9266, "step": 12071 }, { "epoch": 287.42985074626864, "grad_norm": 19.53326416015625, "learning_rate": 9.755555555555556e-06, "loss": 34.7959, "step": 12072 }, { "epoch": 287.4537313432836, "grad_norm": 15.950719833374023, "learning_rate": 9.754761904761905e-06, "loss": 33.6526, "step": 12073 }, { "epoch": 287.4776119402985, "grad_norm": 16.19793701171875, "learning_rate": 9.753968253968254e-06, "loss": 34.6108, "step": 12074 }, { "epoch": 287.5014925373134, "grad_norm": 16.855018615722656, "learning_rate": 9.753174603174605e-06, "loss": 33.6259, "step": 12075 }, { "epoch": 287.52537313432833, "grad_norm": 14.502487182617188, "learning_rate": 9.752380952380953e-06, "loss": 35.1415, "step": 12076 }, { "epoch": 287.5492537313433, "grad_norm": 20.946552276611328, "learning_rate": 9.751587301587302e-06, "loss": 35.2675, "step": 12077 }, { "epoch": 287.5731343283582, "grad_norm": 19.062923431396484, "learning_rate": 9.750793650793652e-06, "loss": 34.7373, "step": 12078 }, { "epoch": 287.5970149253731, "grad_norm": 15.838395118713379, "learning_rate": 9.75e-06, "loss": 34.807, "step": 12079 }, { "epoch": 287.6208955223881, "grad_norm": 16.52606773376465, "learning_rate": 9.74920634920635e-06, "loss": 35.0307, "step": 12080 }, { "epoch": 287.644776119403, "grad_norm": 17.749820709228516, "learning_rate": 9.7484126984127e-06, "loss": 35.5646, "step": 12081 }, { "epoch": 287.6686567164179, "grad_norm": 16.2675838470459, "learning_rate": 9.747619047619049e-06, "loss": 34.7169, "step": 12082 }, { "epoch": 287.6925373134328, "grad_norm": 19.575477600097656, "learning_rate": 9.746825396825398e-06, "loss": 34.7319, "step": 12083 }, { "epoch": 287.7164179104478, "grad_norm": 15.083663940429688, "learning_rate": 9.746031746031747e-06, "loss": 33.7711, "step": 12084 }, { "epoch": 287.7402985074627, "grad_norm": 14.370621681213379, "learning_rate": 9.745238095238096e-06, "loss": 33.7369, "step": 12085 }, { "epoch": 287.7641791044776, "grad_norm": 17.50619888305664, "learning_rate": 9.744444444444445e-06, "loss": 35.0661, "step": 12086 }, { "epoch": 287.78805970149256, "grad_norm": 15.515654563903809, "learning_rate": 9.743650793650794e-06, "loss": 34.3127, "step": 12087 }, { "epoch": 287.81194029850747, "grad_norm": 17.39630699157715, "learning_rate": 9.742857142857143e-06, "loss": 33.9359, "step": 12088 }, { "epoch": 287.8358208955224, "grad_norm": 16.543195724487305, "learning_rate": 9.742063492063492e-06, "loss": 34.1895, "step": 12089 }, { "epoch": 287.85970149253734, "grad_norm": 21.681650161743164, "learning_rate": 9.741269841269843e-06, "loss": 34.9595, "step": 12090 }, { "epoch": 287.88358208955225, "grad_norm": 14.407553672790527, "learning_rate": 9.74047619047619e-06, "loss": 33.3288, "step": 12091 }, { "epoch": 287.90746268656716, "grad_norm": 17.220455169677734, "learning_rate": 9.73968253968254e-06, "loss": 33.2606, "step": 12092 }, { "epoch": 287.93134328358207, "grad_norm": 15.273118019104004, "learning_rate": 9.73888888888889e-06, "loss": 35.1208, "step": 12093 }, { "epoch": 287.95522388059703, "grad_norm": 23.344297409057617, "learning_rate": 9.73809523809524e-06, "loss": 35.4974, "step": 12094 }, { "epoch": 287.97910447761194, "grad_norm": 15.585911750793457, "learning_rate": 9.737301587301588e-06, "loss": 34.1066, "step": 12095 }, { "epoch": 288.0, "grad_norm": 15.023148536682129, "learning_rate": 9.736507936507938e-06, "loss": 31.1307, "step": 12096 }, { "epoch": 288.0238805970149, "grad_norm": 23.582096099853516, "learning_rate": 9.735714285714287e-06, "loss": 34.7779, "step": 12097 }, { "epoch": 288.0477611940299, "grad_norm": 15.913206100463867, "learning_rate": 9.734920634920636e-06, "loss": 34.7806, "step": 12098 }, { "epoch": 288.0716417910448, "grad_norm": 21.027273178100586, "learning_rate": 9.734126984126985e-06, "loss": 35.0032, "step": 12099 }, { "epoch": 288.0955223880597, "grad_norm": 17.777542114257812, "learning_rate": 9.733333333333334e-06, "loss": 33.5533, "step": 12100 }, { "epoch": 288.1194029850746, "grad_norm": 26.905447006225586, "learning_rate": 9.732539682539683e-06, "loss": 34.4528, "step": 12101 }, { "epoch": 288.14328358208957, "grad_norm": 20.0372257232666, "learning_rate": 9.731746031746032e-06, "loss": 34.6477, "step": 12102 }, { "epoch": 288.1671641791045, "grad_norm": 26.21122932434082, "learning_rate": 9.730952380952383e-06, "loss": 35.3729, "step": 12103 }, { "epoch": 288.1910447761194, "grad_norm": 24.10542869567871, "learning_rate": 9.73015873015873e-06, "loss": 33.4395, "step": 12104 }, { "epoch": 288.21492537313435, "grad_norm": 19.59185218811035, "learning_rate": 9.729365079365081e-06, "loss": 34.752, "step": 12105 }, { "epoch": 288.23880597014926, "grad_norm": 21.703527450561523, "learning_rate": 9.72857142857143e-06, "loss": 35.4408, "step": 12106 }, { "epoch": 288.26268656716417, "grad_norm": 19.279748916625977, "learning_rate": 9.727777777777777e-06, "loss": 33.5879, "step": 12107 }, { "epoch": 288.28656716417913, "grad_norm": 18.189329147338867, "learning_rate": 9.726984126984128e-06, "loss": 34.9721, "step": 12108 }, { "epoch": 288.31044776119404, "grad_norm": 20.553638458251953, "learning_rate": 9.726190476190477e-06, "loss": 34.6883, "step": 12109 }, { "epoch": 288.33432835820895, "grad_norm": 20.090965270996094, "learning_rate": 9.725396825396826e-06, "loss": 34.5064, "step": 12110 }, { "epoch": 288.35820895522386, "grad_norm": 14.965476989746094, "learning_rate": 9.724603174603175e-06, "loss": 34.7103, "step": 12111 }, { "epoch": 288.3820895522388, "grad_norm": 19.639541625976562, "learning_rate": 9.723809523809525e-06, "loss": 34.633, "step": 12112 }, { "epoch": 288.40597014925373, "grad_norm": 18.138107299804688, "learning_rate": 9.723015873015874e-06, "loss": 35.3275, "step": 12113 }, { "epoch": 288.42985074626864, "grad_norm": 15.13943862915039, "learning_rate": 9.722222222222223e-06, "loss": 33.9663, "step": 12114 }, { "epoch": 288.4537313432836, "grad_norm": 15.557840347290039, "learning_rate": 9.721428571428573e-06, "loss": 34.8689, "step": 12115 }, { "epoch": 288.4776119402985, "grad_norm": 13.73161792755127, "learning_rate": 9.720634920634921e-06, "loss": 34.6142, "step": 12116 }, { "epoch": 288.5014925373134, "grad_norm": 15.27243423461914, "learning_rate": 9.71984126984127e-06, "loss": 35.3487, "step": 12117 }, { "epoch": 288.52537313432833, "grad_norm": 15.516766548156738, "learning_rate": 9.71904761904762e-06, "loss": 33.854, "step": 12118 }, { "epoch": 288.5492537313433, "grad_norm": 16.92219352722168, "learning_rate": 9.71825396825397e-06, "loss": 33.0819, "step": 12119 }, { "epoch": 288.5731343283582, "grad_norm": 16.66267204284668, "learning_rate": 9.717460317460317e-06, "loss": 34.5783, "step": 12120 }, { "epoch": 288.5970149253731, "grad_norm": 21.243785858154297, "learning_rate": 9.716666666666668e-06, "loss": 33.4727, "step": 12121 }, { "epoch": 288.6208955223881, "grad_norm": 14.848652839660645, "learning_rate": 9.715873015873017e-06, "loss": 34.1186, "step": 12122 }, { "epoch": 288.644776119403, "grad_norm": 17.60338592529297, "learning_rate": 9.715079365079366e-06, "loss": 34.6413, "step": 12123 }, { "epoch": 288.6686567164179, "grad_norm": 16.577882766723633, "learning_rate": 9.714285714285715e-06, "loss": 34.5929, "step": 12124 }, { "epoch": 288.6925373134328, "grad_norm": 19.29567527770996, "learning_rate": 9.713492063492064e-06, "loss": 35.2559, "step": 12125 }, { "epoch": 288.7164179104478, "grad_norm": 18.365034103393555, "learning_rate": 9.712698412698413e-06, "loss": 34.1905, "step": 12126 }, { "epoch": 288.7402985074627, "grad_norm": 19.5806941986084, "learning_rate": 9.711904761904762e-06, "loss": 34.1149, "step": 12127 }, { "epoch": 288.7641791044776, "grad_norm": 12.890687942504883, "learning_rate": 9.711111111111111e-06, "loss": 33.3241, "step": 12128 }, { "epoch": 288.78805970149256, "grad_norm": 18.987075805664062, "learning_rate": 9.71031746031746e-06, "loss": 34.7101, "step": 12129 }, { "epoch": 288.81194029850747, "grad_norm": 15.151649475097656, "learning_rate": 9.70952380952381e-06, "loss": 34.9443, "step": 12130 }, { "epoch": 288.8358208955224, "grad_norm": 18.86102294921875, "learning_rate": 9.70873015873016e-06, "loss": 33.5959, "step": 12131 }, { "epoch": 288.85970149253734, "grad_norm": 16.394893646240234, "learning_rate": 9.707936507936508e-06, "loss": 35.2346, "step": 12132 }, { "epoch": 288.88358208955225, "grad_norm": 20.39829444885254, "learning_rate": 9.707142857142859e-06, "loss": 34.7344, "step": 12133 }, { "epoch": 288.90746268656716, "grad_norm": 18.291614532470703, "learning_rate": 9.706349206349208e-06, "loss": 34.863, "step": 12134 }, { "epoch": 288.93134328358207, "grad_norm": 16.3395938873291, "learning_rate": 9.705555555555555e-06, "loss": 33.9108, "step": 12135 }, { "epoch": 288.95522388059703, "grad_norm": 18.286191940307617, "learning_rate": 9.704761904761906e-06, "loss": 34.3912, "step": 12136 }, { "epoch": 288.97910447761194, "grad_norm": 19.71375274658203, "learning_rate": 9.703968253968255e-06, "loss": 33.7324, "step": 12137 }, { "epoch": 289.0, "grad_norm": 14.758651733398438, "learning_rate": 9.703174603174604e-06, "loss": 29.6443, "step": 12138 }, { "epoch": 289.0238805970149, "grad_norm": 16.483644485473633, "learning_rate": 9.702380952380953e-06, "loss": 34.7059, "step": 12139 }, { "epoch": 289.0477611940299, "grad_norm": 16.072952270507812, "learning_rate": 9.701587301587302e-06, "loss": 33.5644, "step": 12140 }, { "epoch": 289.0716417910448, "grad_norm": 18.685707092285156, "learning_rate": 9.700793650793651e-06, "loss": 33.909, "step": 12141 }, { "epoch": 289.0955223880597, "grad_norm": 15.370194435119629, "learning_rate": 9.7e-06, "loss": 34.8823, "step": 12142 }, { "epoch": 289.1194029850746, "grad_norm": 17.053407669067383, "learning_rate": 9.699206349206351e-06, "loss": 34.1308, "step": 12143 }, { "epoch": 289.14328358208957, "grad_norm": 13.865734100341797, "learning_rate": 9.698412698412698e-06, "loss": 34.4891, "step": 12144 }, { "epoch": 289.1671641791045, "grad_norm": 18.639347076416016, "learning_rate": 9.697619047619048e-06, "loss": 33.9614, "step": 12145 }, { "epoch": 289.1910447761194, "grad_norm": 13.701078414916992, "learning_rate": 9.696825396825398e-06, "loss": 33.8884, "step": 12146 }, { "epoch": 289.21492537313435, "grad_norm": 20.172367095947266, "learning_rate": 9.696031746031747e-06, "loss": 35.0894, "step": 12147 }, { "epoch": 289.23880597014926, "grad_norm": 22.14353370666504, "learning_rate": 9.695238095238096e-06, "loss": 33.8598, "step": 12148 }, { "epoch": 289.26268656716417, "grad_norm": 17.53331184387207, "learning_rate": 9.694444444444446e-06, "loss": 35.4552, "step": 12149 }, { "epoch": 289.28656716417913, "grad_norm": 17.245830535888672, "learning_rate": 9.693650793650795e-06, "loss": 34.8001, "step": 12150 }, { "epoch": 289.31044776119404, "grad_norm": 15.168274879455566, "learning_rate": 9.692857142857144e-06, "loss": 34.1529, "step": 12151 }, { "epoch": 289.33432835820895, "grad_norm": 18.141550064086914, "learning_rate": 9.692063492063493e-06, "loss": 33.5119, "step": 12152 }, { "epoch": 289.35820895522386, "grad_norm": 14.61592960357666, "learning_rate": 9.691269841269842e-06, "loss": 34.165, "step": 12153 }, { "epoch": 289.3820895522388, "grad_norm": 15.107131958007812, "learning_rate": 9.690476190476191e-06, "loss": 33.3661, "step": 12154 }, { "epoch": 289.40597014925373, "grad_norm": 13.791497230529785, "learning_rate": 9.68968253968254e-06, "loss": 33.5373, "step": 12155 }, { "epoch": 289.42985074626864, "grad_norm": 15.632122993469238, "learning_rate": 9.688888888888889e-06, "loss": 34.685, "step": 12156 }, { "epoch": 289.4537313432836, "grad_norm": 14.425263404846191, "learning_rate": 9.688095238095238e-06, "loss": 33.7206, "step": 12157 }, { "epoch": 289.4776119402985, "grad_norm": 14.650760650634766, "learning_rate": 9.687301587301589e-06, "loss": 33.1826, "step": 12158 }, { "epoch": 289.5014925373134, "grad_norm": NaN, "learning_rate": 9.686507936507938e-06, "loss": 51.007, "step": 12159 }, { "epoch": 289.52537313432833, "grad_norm": 15.101150512695312, "learning_rate": 9.686507936507938e-06, "loss": 35.4393, "step": 12160 }, { "epoch": 289.5492537313433, "grad_norm": 15.44886302947998, "learning_rate": 9.685714285714285e-06, "loss": 35.3721, "step": 12161 }, { "epoch": 289.5731343283582, "grad_norm": 15.483396530151367, "learning_rate": 9.684920634920636e-06, "loss": 33.7206, "step": 12162 }, { "epoch": 289.5970149253731, "grad_norm": 18.036157608032227, "learning_rate": 9.684126984126985e-06, "loss": 34.7579, "step": 12163 }, { "epoch": 289.6208955223881, "grad_norm": 19.82551383972168, "learning_rate": 9.683333333333334e-06, "loss": 34.0443, "step": 12164 }, { "epoch": 289.644776119403, "grad_norm": 14.942346572875977, "learning_rate": 9.682539682539683e-06, "loss": 35.119, "step": 12165 }, { "epoch": 289.6686567164179, "grad_norm": 14.71619701385498, "learning_rate": 9.681746031746033e-06, "loss": 33.8474, "step": 12166 }, { "epoch": 289.6925373134328, "grad_norm": 18.883920669555664, "learning_rate": 9.680952380952382e-06, "loss": 34.5756, "step": 12167 }, { "epoch": 289.7164179104478, "grad_norm": 22.619094848632812, "learning_rate": 9.68015873015873e-06, "loss": 35.173, "step": 12168 }, { "epoch": 289.7402985074627, "grad_norm": 14.180581092834473, "learning_rate": 9.679365079365081e-06, "loss": 34.4955, "step": 12169 }, { "epoch": 289.7641791044776, "grad_norm": 22.267663955688477, "learning_rate": 9.678571428571429e-06, "loss": 35.0336, "step": 12170 }, { "epoch": 289.78805970149256, "grad_norm": 25.306835174560547, "learning_rate": 9.677777777777778e-06, "loss": 34.7632, "step": 12171 }, { "epoch": 289.81194029850747, "grad_norm": 14.044944763183594, "learning_rate": 9.676984126984129e-06, "loss": 35.3385, "step": 12172 }, { "epoch": 289.8358208955224, "grad_norm": 20.621187210083008, "learning_rate": 9.676190476190476e-06, "loss": 33.8966, "step": 12173 }, { "epoch": 289.85970149253734, "grad_norm": NaN, "learning_rate": 9.675396825396827e-06, "loss": 54.0567, "step": 12174 }, { "epoch": 289.88358208955225, "grad_norm": 22.201995849609375, "learning_rate": 9.675396825396827e-06, "loss": 35.6075, "step": 12175 }, { "epoch": 289.90746268656716, "grad_norm": 13.381609916687012, "learning_rate": 9.674603174603176e-06, "loss": 34.063, "step": 12176 }, { "epoch": 289.93134328358207, "grad_norm": 30.19305992126465, "learning_rate": 9.673809523809525e-06, "loss": 34.3243, "step": 12177 }, { "epoch": 289.95522388059703, "grad_norm": 18.34126853942871, "learning_rate": 9.673015873015874e-06, "loss": 34.54, "step": 12178 }, { "epoch": 289.97910447761194, "grad_norm": 23.226390838623047, "learning_rate": 9.672222222222223e-06, "loss": 34.5679, "step": 12179 }, { "epoch": 290.0, "grad_norm": 19.478759765625, "learning_rate": 9.671428571428572e-06, "loss": 31.2824, "step": 12180 }, { "epoch": 290.0238805970149, "grad_norm": 14.978033065795898, "learning_rate": 9.670634920634921e-06, "loss": 34.1951, "step": 12181 }, { "epoch": 290.0477611940299, "grad_norm": 31.53683853149414, "learning_rate": 9.66984126984127e-06, "loss": 34.7907, "step": 12182 }, { "epoch": 290.0716417910448, "grad_norm": 19.020130157470703, "learning_rate": 9.66904761904762e-06, "loss": 33.9402, "step": 12183 }, { "epoch": 290.0955223880597, "grad_norm": 31.166902542114258, "learning_rate": 9.668253968253969e-06, "loss": 35.2873, "step": 12184 }, { "epoch": 290.1194029850746, "grad_norm": 21.55522346496582, "learning_rate": 9.66746031746032e-06, "loss": 34.9449, "step": 12185 }, { "epoch": 290.14328358208957, "grad_norm": 34.57597351074219, "learning_rate": 9.666666666666667e-06, "loss": 33.3533, "step": 12186 }, { "epoch": 290.1671641791045, "grad_norm": 21.43866729736328, "learning_rate": 9.665873015873016e-06, "loss": 34.6336, "step": 12187 }, { "epoch": 290.1910447761194, "grad_norm": 33.21577453613281, "learning_rate": 9.665079365079367e-06, "loss": 33.9339, "step": 12188 }, { "epoch": 290.21492537313435, "grad_norm": 22.744247436523438, "learning_rate": 9.664285714285716e-06, "loss": 34.7873, "step": 12189 }, { "epoch": 290.23880597014926, "grad_norm": 33.429779052734375, "learning_rate": 9.663492063492065e-06, "loss": 33.0795, "step": 12190 }, { "epoch": 290.26268656716417, "grad_norm": 27.033706665039062, "learning_rate": 9.662698412698414e-06, "loss": 32.9232, "step": 12191 }, { "epoch": 290.28656716417913, "grad_norm": 31.327665328979492, "learning_rate": 9.661904761904763e-06, "loss": 34.7549, "step": 12192 }, { "epoch": 290.31044776119404, "grad_norm": 23.397293090820312, "learning_rate": 9.661111111111112e-06, "loss": 35.1283, "step": 12193 }, { "epoch": 290.33432835820895, "grad_norm": 27.564517974853516, "learning_rate": 9.660317460317461e-06, "loss": 34.3094, "step": 12194 }, { "epoch": 290.35820895522386, "grad_norm": NaN, "learning_rate": 9.65952380952381e-06, "loss": 58.411, "step": 12195 }, { "epoch": 290.3820895522388, "grad_norm": 22.267906188964844, "learning_rate": 9.65952380952381e-06, "loss": 34.6854, "step": 12196 }, { "epoch": 290.40597014925373, "grad_norm": 26.782926559448242, "learning_rate": 9.65873015873016e-06, "loss": 34.7152, "step": 12197 }, { "epoch": 290.42985074626864, "grad_norm": 26.812774658203125, "learning_rate": 9.657936507936508e-06, "loss": 34.3506, "step": 12198 }, { "epoch": 290.4537313432836, "grad_norm": 21.261577606201172, "learning_rate": 9.657142857142859e-06, "loss": 34.4862, "step": 12199 }, { "epoch": 290.4776119402985, "grad_norm": 33.28779602050781, "learning_rate": 9.656349206349206e-06, "loss": 33.4131, "step": 12200 }, { "epoch": 290.5014925373134, "grad_norm": 23.193044662475586, "learning_rate": 9.655555555555556e-06, "loss": 33.48, "step": 12201 }, { "epoch": 290.52537313432833, "grad_norm": 39.30086135864258, "learning_rate": 9.654761904761906e-06, "loss": 33.9847, "step": 12202 }, { "epoch": 290.5492537313433, "grad_norm": 33.107276916503906, "learning_rate": 9.653968253968254e-06, "loss": 33.361, "step": 12203 }, { "epoch": 290.5731343283582, "grad_norm": 27.033092498779297, "learning_rate": 9.653174603174604e-06, "loss": 34.1614, "step": 12204 }, { "epoch": 290.5970149253731, "grad_norm": 27.202957153320312, "learning_rate": 9.652380952380954e-06, "loss": 35.5516, "step": 12205 }, { "epoch": 290.6208955223881, "grad_norm": 28.261327743530273, "learning_rate": 9.651587301587303e-06, "loss": 34.913, "step": 12206 }, { "epoch": 290.644776119403, "grad_norm": 22.540699005126953, "learning_rate": 9.650793650793652e-06, "loss": 33.3709, "step": 12207 }, { "epoch": 290.6686567164179, "grad_norm": 28.34935760498047, "learning_rate": 9.65e-06, "loss": 34.8894, "step": 12208 }, { "epoch": 290.6925373134328, "grad_norm": 19.734691619873047, "learning_rate": 9.64920634920635e-06, "loss": 35.1564, "step": 12209 }, { "epoch": 290.7164179104478, "grad_norm": 32.46640396118164, "learning_rate": 9.648412698412699e-06, "loss": 34.5664, "step": 12210 }, { "epoch": 290.7402985074627, "grad_norm": 22.91280174255371, "learning_rate": 9.647619047619048e-06, "loss": 32.5768, "step": 12211 }, { "epoch": 290.7641791044776, "grad_norm": 28.09925079345703, "learning_rate": 9.646825396825397e-06, "loss": 35.2974, "step": 12212 }, { "epoch": 290.78805970149256, "grad_norm": 24.54458999633789, "learning_rate": 9.646031746031746e-06, "loss": 35.0112, "step": 12213 }, { "epoch": 290.81194029850747, "grad_norm": 23.932899475097656, "learning_rate": 9.645238095238097e-06, "loss": 34.8741, "step": 12214 }, { "epoch": 290.8358208955224, "grad_norm": 27.692768096923828, "learning_rate": 9.644444444444444e-06, "loss": 34.8985, "step": 12215 }, { "epoch": 290.85970149253734, "grad_norm": 18.806684494018555, "learning_rate": 9.643650793650793e-06, "loss": 34.6133, "step": 12216 }, { "epoch": 290.88358208955225, "grad_norm": 34.68870162963867, "learning_rate": 9.642857142857144e-06, "loss": 34.9589, "step": 12217 }, { "epoch": 290.90746268656716, "grad_norm": 27.245176315307617, "learning_rate": 9.642063492063493e-06, "loss": 35.1987, "step": 12218 }, { "epoch": 290.93134328358207, "grad_norm": 29.974613189697266, "learning_rate": 9.641269841269842e-06, "loss": 33.3384, "step": 12219 }, { "epoch": 290.95522388059703, "grad_norm": 25.222753524780273, "learning_rate": 9.640476190476191e-06, "loss": 34.8828, "step": 12220 }, { "epoch": 290.97910447761194, "grad_norm": 28.725669860839844, "learning_rate": 9.63968253968254e-06, "loss": 34.5293, "step": 12221 }, { "epoch": 291.0, "grad_norm": 17.669416427612305, "learning_rate": 9.63888888888889e-06, "loss": 30.053, "step": 12222 }, { "epoch": 291.0238805970149, "grad_norm": 25.961732864379883, "learning_rate": 9.638095238095239e-06, "loss": 34.0411, "step": 12223 }, { "epoch": 291.0477611940299, "grad_norm": 23.097198486328125, "learning_rate": 9.637301587301588e-06, "loss": 34.749, "step": 12224 }, { "epoch": 291.0716417910448, "grad_norm": 27.990400314331055, "learning_rate": 9.636507936507937e-06, "loss": 34.6194, "step": 12225 }, { "epoch": 291.0955223880597, "grad_norm": 23.676801681518555, "learning_rate": 9.635714285714286e-06, "loss": 33.9769, "step": 12226 }, { "epoch": 291.1194029850746, "grad_norm": 17.73317527770996, "learning_rate": 9.634920634920637e-06, "loss": 34.8169, "step": 12227 }, { "epoch": 291.14328358208957, "grad_norm": 32.58198928833008, "learning_rate": 9.634126984126984e-06, "loss": 33.9618, "step": 12228 }, { "epoch": 291.1671641791045, "grad_norm": 22.8065242767334, "learning_rate": 9.633333333333335e-06, "loss": 34.3213, "step": 12229 }, { "epoch": 291.1910447761194, "grad_norm": 38.063899993896484, "learning_rate": 9.632539682539684e-06, "loss": 33.8887, "step": 12230 }, { "epoch": 291.21492537313435, "grad_norm": 31.94734001159668, "learning_rate": 9.631746031746031e-06, "loss": 33.8354, "step": 12231 }, { "epoch": 291.23880597014926, "grad_norm": 29.75364875793457, "learning_rate": 9.630952380952382e-06, "loss": 34.4246, "step": 12232 }, { "epoch": 291.26268656716417, "grad_norm": 29.69106674194336, "learning_rate": 9.630158730158731e-06, "loss": 34.5028, "step": 12233 }, { "epoch": 291.28656716417913, "grad_norm": 27.216402053833008, "learning_rate": 9.62936507936508e-06, "loss": 33.2136, "step": 12234 }, { "epoch": 291.31044776119404, "grad_norm": NaN, "learning_rate": 9.62857142857143e-06, "loss": 62.1075, "step": 12235 }, { "epoch": 291.33432835820895, "grad_norm": 24.074199676513672, "learning_rate": 9.62857142857143e-06, "loss": 34.4417, "step": 12236 }, { "epoch": 291.35820895522386, "grad_norm": 34.20615768432617, "learning_rate": 9.627777777777778e-06, "loss": 34.924, "step": 12237 }, { "epoch": 291.3820895522388, "grad_norm": 27.6361026763916, "learning_rate": 9.626984126984127e-06, "loss": 34.4716, "step": 12238 }, { "epoch": 291.40597014925373, "grad_norm": 33.79500961303711, "learning_rate": 9.626190476190477e-06, "loss": 33.6966, "step": 12239 }, { "epoch": 291.42985074626864, "grad_norm": 31.77932357788086, "learning_rate": 9.625396825396827e-06, "loss": 34.575, "step": 12240 }, { "epoch": 291.4537313432836, "grad_norm": 30.573434829711914, "learning_rate": 9.624603174603175e-06, "loss": 32.7903, "step": 12241 }, { "epoch": 291.4776119402985, "grad_norm": 28.312847137451172, "learning_rate": 9.623809523809524e-06, "loss": 35.0707, "step": 12242 }, { "epoch": 291.5014925373134, "grad_norm": 32.0899543762207, "learning_rate": 9.623015873015875e-06, "loss": 34.2454, "step": 12243 }, { "epoch": 291.52537313432833, "grad_norm": 26.4555606842041, "learning_rate": 9.622222222222222e-06, "loss": 34.3891, "step": 12244 }, { "epoch": 291.5492537313433, "grad_norm": 33.5330924987793, "learning_rate": 9.621428571428573e-06, "loss": 33.8859, "step": 12245 }, { "epoch": 291.5731343283582, "grad_norm": 31.670372009277344, "learning_rate": 9.620634920634922e-06, "loss": 33.8622, "step": 12246 }, { "epoch": 291.5970149253731, "grad_norm": 31.936368942260742, "learning_rate": 9.619841269841271e-06, "loss": 33.6762, "step": 12247 }, { "epoch": 291.6208955223881, "grad_norm": 30.13316535949707, "learning_rate": 9.61904761904762e-06, "loss": 34.2466, "step": 12248 }, { "epoch": 291.644776119403, "grad_norm": 32.32080841064453, "learning_rate": 9.618253968253969e-06, "loss": 34.4787, "step": 12249 }, { "epoch": 291.6686567164179, "grad_norm": 26.717697143554688, "learning_rate": 9.617460317460318e-06, "loss": 34.4717, "step": 12250 }, { "epoch": 291.6925373134328, "grad_norm": 33.60049819946289, "learning_rate": 9.616666666666667e-06, "loss": 34.4618, "step": 12251 }, { "epoch": 291.7164179104478, "grad_norm": 26.2215518951416, "learning_rate": 9.615873015873016e-06, "loss": 34.5382, "step": 12252 }, { "epoch": 291.7402985074627, "grad_norm": 29.8955135345459, "learning_rate": 9.615079365079365e-06, "loss": 35.053, "step": 12253 }, { "epoch": 291.7641791044776, "grad_norm": 26.757553100585938, "learning_rate": 9.614285714285714e-06, "loss": 34.5648, "step": 12254 }, { "epoch": 291.78805970149256, "grad_norm": 32.92394256591797, "learning_rate": 9.613492063492065e-06, "loss": 34.0267, "step": 12255 }, { "epoch": 291.81194029850747, "grad_norm": 27.11213493347168, "learning_rate": 9.612698412698414e-06, "loss": 33.973, "step": 12256 }, { "epoch": 291.8358208955224, "grad_norm": 30.325374603271484, "learning_rate": 9.611904761904762e-06, "loss": 34.2087, "step": 12257 }, { "epoch": 291.85970149253734, "grad_norm": 27.406278610229492, "learning_rate": 9.611111111111112e-06, "loss": 35.4236, "step": 12258 }, { "epoch": 291.88358208955225, "grad_norm": 31.16053009033203, "learning_rate": 9.610317460317462e-06, "loss": 35.4351, "step": 12259 }, { "epoch": 291.90746268656716, "grad_norm": 26.751670837402344, "learning_rate": 9.60952380952381e-06, "loss": 34.6347, "step": 12260 }, { "epoch": 291.93134328358207, "grad_norm": 33.27516174316406, "learning_rate": 9.60873015873016e-06, "loss": 34.5843, "step": 12261 }, { "epoch": 291.95522388059703, "grad_norm": 27.722028732299805, "learning_rate": 9.607936507936509e-06, "loss": 33.5751, "step": 12262 }, { "epoch": 291.97910447761194, "grad_norm": 28.656055450439453, "learning_rate": 9.607142857142858e-06, "loss": 33.5382, "step": 12263 }, { "epoch": 292.0, "grad_norm": 24.935575485229492, "learning_rate": 9.606349206349207e-06, "loss": 30.3113, "step": 12264 }, { "epoch": 292.0238805970149, "grad_norm": 31.052854537963867, "learning_rate": 9.605555555555556e-06, "loss": 35.0746, "step": 12265 }, { "epoch": 292.0477611940299, "grad_norm": 28.53564453125, "learning_rate": 9.604761904761905e-06, "loss": 34.9898, "step": 12266 }, { "epoch": 292.0716417910448, "grad_norm": 30.77297019958496, "learning_rate": 9.603968253968254e-06, "loss": 33.4999, "step": 12267 }, { "epoch": 292.0955223880597, "grad_norm": 26.038986206054688, "learning_rate": 9.603174603174605e-06, "loss": 34.0279, "step": 12268 }, { "epoch": 292.1194029850746, "grad_norm": 33.35354232788086, "learning_rate": 9.602380952380952e-06, "loss": 34.7686, "step": 12269 }, { "epoch": 292.14328358208957, "grad_norm": 29.855701446533203, "learning_rate": 9.601587301587303e-06, "loss": 35.0543, "step": 12270 }, { "epoch": 292.1671641791045, "grad_norm": 28.970848083496094, "learning_rate": 9.600793650793652e-06, "loss": 34.4599, "step": 12271 }, { "epoch": 292.1910447761194, "grad_norm": 28.292043685913086, "learning_rate": 9.600000000000001e-06, "loss": 34.6065, "step": 12272 }, { "epoch": 292.21492537313435, "grad_norm": 28.919475555419922, "learning_rate": 9.59920634920635e-06, "loss": 34.005, "step": 12273 }, { "epoch": 292.23880597014926, "grad_norm": 24.51378631591797, "learning_rate": 9.5984126984127e-06, "loss": 34.3403, "step": 12274 }, { "epoch": 292.26268656716417, "grad_norm": 34.44512939453125, "learning_rate": 9.597619047619048e-06, "loss": 34.8771, "step": 12275 }, { "epoch": 292.28656716417913, "grad_norm": 25.570201873779297, "learning_rate": 9.596825396825398e-06, "loss": 33.9209, "step": 12276 }, { "epoch": 292.31044776119404, "grad_norm": 32.16792678833008, "learning_rate": 9.596031746031747e-06, "loss": 35.0343, "step": 12277 }, { "epoch": 292.33432835820895, "grad_norm": 27.99781036376953, "learning_rate": 9.595238095238096e-06, "loss": 33.8863, "step": 12278 }, { "epoch": 292.35820895522386, "grad_norm": 31.632658004760742, "learning_rate": 9.594444444444445e-06, "loss": 34.2391, "step": 12279 }, { "epoch": 292.3820895522388, "grad_norm": 27.672597885131836, "learning_rate": 9.593650793650794e-06, "loss": 33.8981, "step": 12280 }, { "epoch": 292.40597014925373, "grad_norm": 27.102998733520508, "learning_rate": 9.592857142857143e-06, "loss": 33.9211, "step": 12281 }, { "epoch": 292.42985074626864, "grad_norm": 28.44908905029297, "learning_rate": 9.592063492063492e-06, "loss": 34.7022, "step": 12282 }, { "epoch": 292.4537313432836, "grad_norm": 28.945695877075195, "learning_rate": 9.591269841269843e-06, "loss": 34.3976, "step": 12283 }, { "epoch": 292.4776119402985, "grad_norm": 24.871110916137695, "learning_rate": 9.590476190476192e-06, "loss": 34.3921, "step": 12284 }, { "epoch": 292.5014925373134, "grad_norm": 29.340715408325195, "learning_rate": 9.58968253968254e-06, "loss": 34.5926, "step": 12285 }, { "epoch": 292.52537313432833, "grad_norm": 26.397930145263672, "learning_rate": 9.58888888888889e-06, "loss": 34.4733, "step": 12286 }, { "epoch": 292.5492537313433, "grad_norm": 33.7936897277832, "learning_rate": 9.588095238095239e-06, "loss": 33.8797, "step": 12287 }, { "epoch": 292.5731343283582, "grad_norm": 30.71653175354004, "learning_rate": 9.587301587301588e-06, "loss": 34.3851, "step": 12288 }, { "epoch": 292.5970149253731, "grad_norm": 27.971527099609375, "learning_rate": 9.586507936507937e-06, "loss": 33.9437, "step": 12289 }, { "epoch": 292.6208955223881, "grad_norm": 23.75400161743164, "learning_rate": 9.585714285714286e-06, "loss": 34.3425, "step": 12290 }, { "epoch": 292.644776119403, "grad_norm": 28.923099517822266, "learning_rate": 9.584920634920635e-06, "loss": 34.8029, "step": 12291 }, { "epoch": 292.6686567164179, "grad_norm": 24.210174560546875, "learning_rate": 9.584126984126985e-06, "loss": 32.6122, "step": 12292 }, { "epoch": 292.6925373134328, "grad_norm": 28.171342849731445, "learning_rate": 9.583333333333335e-06, "loss": 33.9238, "step": 12293 }, { "epoch": 292.7164179104478, "grad_norm": 24.929515838623047, "learning_rate": 9.582539682539683e-06, "loss": 34.3477, "step": 12294 }, { "epoch": 292.7402985074627, "grad_norm": 31.719621658325195, "learning_rate": 9.581746031746032e-06, "loss": 34.5204, "step": 12295 }, { "epoch": 292.7641791044776, "grad_norm": 30.568191528320312, "learning_rate": 9.580952380952383e-06, "loss": 34.7862, "step": 12296 }, { "epoch": 292.78805970149256, "grad_norm": 28.750465393066406, "learning_rate": 9.58015873015873e-06, "loss": 35.5672, "step": 12297 }, { "epoch": 292.81194029850747, "grad_norm": 26.463260650634766, "learning_rate": 9.57936507936508e-06, "loss": 33.3431, "step": 12298 }, { "epoch": 292.8358208955224, "grad_norm": 27.773862838745117, "learning_rate": 9.57857142857143e-06, "loss": 36.3078, "step": 12299 }, { "epoch": 292.85970149253734, "grad_norm": 22.0357608795166, "learning_rate": 9.577777777777779e-06, "loss": 34.3722, "step": 12300 }, { "epoch": 292.88358208955225, "grad_norm": 30.44091796875, "learning_rate": 9.576984126984128e-06, "loss": 34.0244, "step": 12301 }, { "epoch": 292.90746268656716, "grad_norm": 24.222484588623047, "learning_rate": 9.576190476190477e-06, "loss": 33.6204, "step": 12302 }, { "epoch": 292.93134328358207, "grad_norm": 31.65188980102539, "learning_rate": 9.575396825396826e-06, "loss": 33.8672, "step": 12303 }, { "epoch": 292.95522388059703, "grad_norm": 27.379779815673828, "learning_rate": 9.574603174603175e-06, "loss": 33.9861, "step": 12304 }, { "epoch": 292.97910447761194, "grad_norm": 31.15701675415039, "learning_rate": 9.573809523809524e-06, "loss": 33.571, "step": 12305 }, { "epoch": 293.0, "grad_norm": 21.773134231567383, "learning_rate": 9.573015873015873e-06, "loss": 28.9808, "step": 12306 }, { "epoch": 293.0238805970149, "grad_norm": 30.449079513549805, "learning_rate": 9.572222222222222e-06, "loss": 33.7464, "step": 12307 }, { "epoch": 293.0477611940299, "grad_norm": 28.632801055908203, "learning_rate": 9.571428571428573e-06, "loss": 34.9108, "step": 12308 }, { "epoch": 293.0716417910448, "grad_norm": 30.977502822875977, "learning_rate": 9.57063492063492e-06, "loss": 35.5955, "step": 12309 }, { "epoch": 293.0955223880597, "grad_norm": 26.199066162109375, "learning_rate": 9.56984126984127e-06, "loss": 34.2995, "step": 12310 }, { "epoch": 293.1194029850746, "grad_norm": 29.398496627807617, "learning_rate": 9.56904761904762e-06, "loss": 34.0355, "step": 12311 }, { "epoch": 293.14328358208957, "grad_norm": 26.90253257751465, "learning_rate": 9.56825396825397e-06, "loss": 35.3956, "step": 12312 }, { "epoch": 293.1671641791045, "grad_norm": 28.612627029418945, "learning_rate": 9.567460317460319e-06, "loss": 34.6483, "step": 12313 }, { "epoch": 293.1910447761194, "grad_norm": 23.256914138793945, "learning_rate": 9.566666666666668e-06, "loss": 34.2701, "step": 12314 }, { "epoch": 293.21492537313435, "grad_norm": 29.50980567932129, "learning_rate": 9.565873015873017e-06, "loss": 33.7863, "step": 12315 }, { "epoch": 293.23880597014926, "grad_norm": 24.027650833129883, "learning_rate": 9.565079365079366e-06, "loss": 34.143, "step": 12316 }, { "epoch": 293.26268656716417, "grad_norm": 31.43763542175293, "learning_rate": 9.564285714285715e-06, "loss": 33.8023, "step": 12317 }, { "epoch": 293.28656716417913, "grad_norm": 26.828107833862305, "learning_rate": 9.563492063492064e-06, "loss": 34.472, "step": 12318 }, { "epoch": 293.31044776119404, "grad_norm": NaN, "learning_rate": 9.562698412698413e-06, "loss": 52.0314, "step": 12319 }, { "epoch": 293.33432835820895, "grad_norm": 29.993623733520508, "learning_rate": 9.562698412698413e-06, "loss": 33.4094, "step": 12320 }, { "epoch": 293.35820895522386, "grad_norm": 27.77976417541504, "learning_rate": 9.561904761904762e-06, "loss": 33.8334, "step": 12321 }, { "epoch": 293.3820895522388, "grad_norm": 28.90367317199707, "learning_rate": 9.561111111111113e-06, "loss": 35.6582, "step": 12322 }, { "epoch": 293.40597014925373, "grad_norm": 25.03451919555664, "learning_rate": 9.56031746031746e-06, "loss": 34.8488, "step": 12323 }, { "epoch": 293.42985074626864, "grad_norm": 34.04804992675781, "learning_rate": 9.559523809523811e-06, "loss": 34.9502, "step": 12324 }, { "epoch": 293.4537313432836, "grad_norm": 29.848726272583008, "learning_rate": 9.55873015873016e-06, "loss": 33.6212, "step": 12325 }, { "epoch": 293.4776119402985, "grad_norm": 25.464885711669922, "learning_rate": 9.557936507936508e-06, "loss": 33.1467, "step": 12326 }, { "epoch": 293.5014925373134, "grad_norm": 25.84783935546875, "learning_rate": 9.557142857142858e-06, "loss": 34.1532, "step": 12327 }, { "epoch": 293.52537313432833, "grad_norm": 28.271520614624023, "learning_rate": 9.556349206349207e-06, "loss": 34.3759, "step": 12328 }, { "epoch": 293.5492537313433, "grad_norm": 23.839200973510742, "learning_rate": 9.555555555555556e-06, "loss": 34.8524, "step": 12329 }, { "epoch": 293.5731343283582, "grad_norm": 32.99650573730469, "learning_rate": 9.554761904761906e-06, "loss": 34.4787, "step": 12330 }, { "epoch": 293.5970149253731, "grad_norm": 29.437646865844727, "learning_rate": 9.553968253968255e-06, "loss": 34.0187, "step": 12331 }, { "epoch": 293.6208955223881, "grad_norm": 29.929519653320312, "learning_rate": 9.553174603174604e-06, "loss": 33.5362, "step": 12332 }, { "epoch": 293.644776119403, "grad_norm": 28.433238983154297, "learning_rate": 9.552380952380953e-06, "loss": 35.5923, "step": 12333 }, { "epoch": 293.6686567164179, "grad_norm": 29.59156036376953, "learning_rate": 9.551587301587304e-06, "loss": 34.5687, "step": 12334 }, { "epoch": 293.6925373134328, "grad_norm": 25.87936782836914, "learning_rate": 9.550793650793651e-06, "loss": 35.4809, "step": 12335 }, { "epoch": 293.7164179104478, "grad_norm": 26.75420570373535, "learning_rate": 9.55e-06, "loss": 34.4088, "step": 12336 }, { "epoch": 293.7402985074627, "grad_norm": 24.215700149536133, "learning_rate": 9.54920634920635e-06, "loss": 34.198, "step": 12337 }, { "epoch": 293.7641791044776, "grad_norm": 32.12968826293945, "learning_rate": 9.548412698412698e-06, "loss": 33.7925, "step": 12338 }, { "epoch": 293.78805970149256, "grad_norm": 30.090240478515625, "learning_rate": 9.547619047619049e-06, "loss": 33.5707, "step": 12339 }, { "epoch": 293.81194029850747, "grad_norm": 27.812780380249023, "learning_rate": 9.546825396825398e-06, "loss": 34.3747, "step": 12340 }, { "epoch": 293.8358208955224, "grad_norm": 30.12023162841797, "learning_rate": 9.546031746031747e-06, "loss": 34.0938, "step": 12341 }, { "epoch": 293.85970149253734, "grad_norm": 26.99824333190918, "learning_rate": 9.545238095238096e-06, "loss": 34.3025, "step": 12342 }, { "epoch": 293.88358208955225, "grad_norm": 25.77198028564453, "learning_rate": 9.544444444444445e-06, "loss": 34.9137, "step": 12343 }, { "epoch": 293.90746268656716, "grad_norm": 27.67975616455078, "learning_rate": 9.543650793650794e-06, "loss": 34.4613, "step": 12344 }, { "epoch": 293.93134328358207, "grad_norm": 25.866796493530273, "learning_rate": 9.542857142857143e-06, "loss": 33.6191, "step": 12345 }, { "epoch": 293.95522388059703, "grad_norm": 28.284847259521484, "learning_rate": 9.542063492063493e-06, "loss": 34.1001, "step": 12346 }, { "epoch": 293.97910447761194, "grad_norm": 25.0577392578125, "learning_rate": 9.541269841269842e-06, "loss": 34.3683, "step": 12347 }, { "epoch": 294.0, "grad_norm": 25.284692764282227, "learning_rate": 9.54047619047619e-06, "loss": 29.5702, "step": 12348 }, { "epoch": 294.0238805970149, "grad_norm": 25.21659278869629, "learning_rate": 9.539682539682541e-06, "loss": 33.7699, "step": 12349 }, { "epoch": 294.0477611940299, "grad_norm": 26.633054733276367, "learning_rate": 9.53888888888889e-06, "loss": 34.5498, "step": 12350 }, { "epoch": 294.0716417910448, "grad_norm": 21.414623260498047, "learning_rate": 9.538095238095238e-06, "loss": 34.3403, "step": 12351 }, { "epoch": 294.0955223880597, "grad_norm": 30.119386672973633, "learning_rate": 9.537301587301589e-06, "loss": 34.4893, "step": 12352 }, { "epoch": 294.1194029850746, "grad_norm": 22.80232048034668, "learning_rate": 9.536507936507938e-06, "loss": 34.6263, "step": 12353 }, { "epoch": 294.14328358208957, "grad_norm": 25.87506675720215, "learning_rate": 9.535714285714287e-06, "loss": 33.5805, "step": 12354 }, { "epoch": 294.1671641791045, "grad_norm": NaN, "learning_rate": 9.534920634920636e-06, "loss": 33.8619, "step": 12355 }, { "epoch": 294.1910447761194, "grad_norm": 23.632522583007812, "learning_rate": 9.534920634920636e-06, "loss": 35.0454, "step": 12356 }, { "epoch": 294.21492537313435, "grad_norm": 24.637853622436523, "learning_rate": 9.534126984126985e-06, "loss": 36.1425, "step": 12357 }, { "epoch": 294.23880597014926, "grad_norm": 21.857200622558594, "learning_rate": 9.533333333333334e-06, "loss": 33.8715, "step": 12358 }, { "epoch": 294.26268656716417, "grad_norm": 26.445865631103516, "learning_rate": 9.532539682539683e-06, "loss": 33.0418, "step": 12359 }, { "epoch": 294.28656716417913, "grad_norm": 21.537635803222656, "learning_rate": 9.531746031746032e-06, "loss": 33.6292, "step": 12360 }, { "epoch": 294.31044776119404, "grad_norm": 28.713010787963867, "learning_rate": 9.530952380952381e-06, "loss": 34.4618, "step": 12361 }, { "epoch": 294.33432835820895, "grad_norm": 22.629819869995117, "learning_rate": 9.53015873015873e-06, "loss": 34.7087, "step": 12362 }, { "epoch": 294.35820895522386, "grad_norm": 25.359880447387695, "learning_rate": 9.529365079365081e-06, "loss": 33.8194, "step": 12363 }, { "epoch": 294.3820895522388, "grad_norm": 21.736087799072266, "learning_rate": 9.528571428571429e-06, "loss": 33.602, "step": 12364 }, { "epoch": 294.40597014925373, "grad_norm": 24.244258880615234, "learning_rate": 9.527777777777778e-06, "loss": 34.2988, "step": 12365 }, { "epoch": 294.42985074626864, "grad_norm": 22.63555335998535, "learning_rate": 9.526984126984128e-06, "loss": 34.4116, "step": 12366 }, { "epoch": 294.4537313432836, "grad_norm": 19.558313369750977, "learning_rate": 9.526190476190476e-06, "loss": 33.8121, "step": 12367 }, { "epoch": 294.4776119402985, "grad_norm": 25.274812698364258, "learning_rate": 9.525396825396827e-06, "loss": 35.9436, "step": 12368 }, { "epoch": 294.5014925373134, "grad_norm": 18.29305648803711, "learning_rate": 9.524603174603176e-06, "loss": 34.9848, "step": 12369 }, { "epoch": 294.52537313432833, "grad_norm": 22.77621841430664, "learning_rate": 9.523809523809525e-06, "loss": 33.5784, "step": 12370 }, { "epoch": 294.5492537313433, "grad_norm": 21.122507095336914, "learning_rate": 9.523015873015874e-06, "loss": 34.8318, "step": 12371 }, { "epoch": 294.5731343283582, "grad_norm": 22.661550521850586, "learning_rate": 9.522222222222223e-06, "loss": 35.0228, "step": 12372 }, { "epoch": 294.5970149253731, "grad_norm": 19.22883415222168, "learning_rate": 9.521428571428572e-06, "loss": 34.9678, "step": 12373 }, { "epoch": 294.6208955223881, "grad_norm": 17.760299682617188, "learning_rate": 9.520634920634921e-06, "loss": 32.7356, "step": 12374 }, { "epoch": 294.644776119403, "grad_norm": 17.98216438293457, "learning_rate": 9.51984126984127e-06, "loss": 34.2342, "step": 12375 }, { "epoch": 294.6686567164179, "grad_norm": 16.56303596496582, "learning_rate": 9.51904761904762e-06, "loss": 34.3649, "step": 12376 }, { "epoch": 294.6925373134328, "grad_norm": 20.535871505737305, "learning_rate": 9.518253968253968e-06, "loss": 33.4957, "step": 12377 }, { "epoch": 294.7164179104478, "grad_norm": 17.814687728881836, "learning_rate": 9.517460317460319e-06, "loss": 34.6065, "step": 12378 }, { "epoch": 294.7402985074627, "grad_norm": 18.159772872924805, "learning_rate": 9.516666666666668e-06, "loss": 32.49, "step": 12379 }, { "epoch": 294.7641791044776, "grad_norm": 16.40694808959961, "learning_rate": 9.515873015873016e-06, "loss": 34.4936, "step": 12380 }, { "epoch": 294.78805970149256, "grad_norm": 18.144777297973633, "learning_rate": 9.515079365079366e-06, "loss": 34.502, "step": 12381 }, { "epoch": 294.81194029850747, "grad_norm": 18.975643157958984, "learning_rate": 9.514285714285715e-06, "loss": 34.3534, "step": 12382 }, { "epoch": 294.8358208955224, "grad_norm": 17.128089904785156, "learning_rate": 9.513492063492064e-06, "loss": 33.2332, "step": 12383 }, { "epoch": 294.85970149253734, "grad_norm": 20.600330352783203, "learning_rate": 9.512698412698414e-06, "loss": 34.8824, "step": 12384 }, { "epoch": 294.88358208955225, "grad_norm": 15.444587707519531, "learning_rate": 9.511904761904763e-06, "loss": 35.1217, "step": 12385 }, { "epoch": 294.90746268656716, "grad_norm": 17.063888549804688, "learning_rate": 9.511111111111112e-06, "loss": 34.3084, "step": 12386 }, { "epoch": 294.93134328358207, "grad_norm": 18.895130157470703, "learning_rate": 9.51031746031746e-06, "loss": 34.6135, "step": 12387 }, { "epoch": 294.95522388059703, "grad_norm": 18.868885040283203, "learning_rate": 9.50952380952381e-06, "loss": 34.1046, "step": 12388 }, { "epoch": 294.97910447761194, "grad_norm": 18.9312686920166, "learning_rate": 9.508730158730159e-06, "loss": 35.0173, "step": 12389 }, { "epoch": 295.0, "grad_norm": 13.075404167175293, "learning_rate": 9.507936507936508e-06, "loss": 29.0332, "step": 12390 }, { "epoch": 295.0238805970149, "grad_norm": 17.01565170288086, "learning_rate": 9.507142857142859e-06, "loss": 33.8036, "step": 12391 }, { "epoch": 295.0477611940299, "grad_norm": 18.59445571899414, "learning_rate": 9.506349206349206e-06, "loss": 34.6043, "step": 12392 }, { "epoch": 295.0716417910448, "grad_norm": 15.26381778717041, "learning_rate": 9.505555555555557e-06, "loss": 35.2198, "step": 12393 }, { "epoch": 295.0955223880597, "grad_norm": 21.32588768005371, "learning_rate": 9.504761904761906e-06, "loss": 34.1363, "step": 12394 }, { "epoch": 295.1194029850746, "grad_norm": 17.290464401245117, "learning_rate": 9.503968253968255e-06, "loss": 33.2469, "step": 12395 }, { "epoch": 295.14328358208957, "grad_norm": 19.420989990234375, "learning_rate": 9.503174603174604e-06, "loss": 33.864, "step": 12396 }, { "epoch": 295.1671641791045, "grad_norm": 17.46050262451172, "learning_rate": 9.502380952380953e-06, "loss": 33.4547, "step": 12397 }, { "epoch": 295.1910447761194, "grad_norm": 19.51968765258789, "learning_rate": 9.501587301587302e-06, "loss": 33.0625, "step": 12398 }, { "epoch": 295.21492537313435, "grad_norm": 20.821683883666992, "learning_rate": 9.500793650793651e-06, "loss": 34.7235, "step": 12399 }, { "epoch": 295.23880597014926, "grad_norm": 18.05327033996582, "learning_rate": 9.5e-06, "loss": 34.1115, "step": 12400 }, { "epoch": 295.26268656716417, "grad_norm": 18.052072525024414, "learning_rate": 9.49920634920635e-06, "loss": 33.28, "step": 12401 }, { "epoch": 295.28656716417913, "grad_norm": 21.33596420288086, "learning_rate": 9.498412698412699e-06, "loss": 33.7036, "step": 12402 }, { "epoch": 295.31044776119404, "grad_norm": 18.234445571899414, "learning_rate": 9.49761904761905e-06, "loss": 35.0936, "step": 12403 }, { "epoch": 295.33432835820895, "grad_norm": 18.282062530517578, "learning_rate": 9.496825396825397e-06, "loss": 34.604, "step": 12404 }, { "epoch": 295.35820895522386, "grad_norm": 19.064664840698242, "learning_rate": 9.496031746031746e-06, "loss": 34.7586, "step": 12405 }, { "epoch": 295.3820895522388, "grad_norm": 17.39444923400879, "learning_rate": 9.495238095238097e-06, "loss": 34.7037, "step": 12406 }, { "epoch": 295.40597014925373, "grad_norm": 15.011940002441406, "learning_rate": 9.494444444444446e-06, "loss": 33.2601, "step": 12407 }, { "epoch": 295.42985074626864, "grad_norm": 18.480915069580078, "learning_rate": 9.493650793650795e-06, "loss": 32.9466, "step": 12408 }, { "epoch": 295.4537313432836, "grad_norm": 17.534500122070312, "learning_rate": 9.492857142857144e-06, "loss": 34.6965, "step": 12409 }, { "epoch": 295.4776119402985, "grad_norm": 14.153911590576172, "learning_rate": 9.492063492063493e-06, "loss": 33.6866, "step": 12410 }, { "epoch": 295.5014925373134, "grad_norm": 14.531407356262207, "learning_rate": 9.491269841269842e-06, "loss": 34.527, "step": 12411 }, { "epoch": 295.52537313432833, "grad_norm": 20.134963989257812, "learning_rate": 9.490476190476191e-06, "loss": 34.4852, "step": 12412 }, { "epoch": 295.5492537313433, "grad_norm": 19.988662719726562, "learning_rate": 9.48968253968254e-06, "loss": 36.11, "step": 12413 }, { "epoch": 295.5731343283582, "grad_norm": 13.376730918884277, "learning_rate": 9.48888888888889e-06, "loss": 34.295, "step": 12414 }, { "epoch": 295.5970149253731, "grad_norm": 25.447059631347656, "learning_rate": 9.488095238095238e-06, "loss": 34.694, "step": 12415 }, { "epoch": 295.6208955223881, "grad_norm": 17.149171829223633, "learning_rate": 9.48730158730159e-06, "loss": 33.6649, "step": 12416 }, { "epoch": 295.644776119403, "grad_norm": 22.13737678527832, "learning_rate": 9.486507936507937e-06, "loss": 34.735, "step": 12417 }, { "epoch": 295.6686567164179, "grad_norm": 20.037952423095703, "learning_rate": 9.485714285714287e-06, "loss": 33.5167, "step": 12418 }, { "epoch": 295.6925373134328, "grad_norm": 16.075672149658203, "learning_rate": 9.484920634920636e-06, "loss": 34.4586, "step": 12419 }, { "epoch": 295.7164179104478, "grad_norm": 25.83331298828125, "learning_rate": 9.484126984126984e-06, "loss": 33.9106, "step": 12420 }, { "epoch": 295.7402985074627, "grad_norm": 16.158767700195312, "learning_rate": 9.483333333333335e-06, "loss": 34.7901, "step": 12421 }, { "epoch": 295.7641791044776, "grad_norm": 23.243450164794922, "learning_rate": 9.482539682539684e-06, "loss": 34.4313, "step": 12422 }, { "epoch": 295.78805970149256, "grad_norm": 23.996456146240234, "learning_rate": 9.481746031746033e-06, "loss": 34.7577, "step": 12423 }, { "epoch": 295.81194029850747, "grad_norm": 14.75000286102295, "learning_rate": 9.480952380952382e-06, "loss": 34.4676, "step": 12424 }, { "epoch": 295.8358208955224, "grad_norm": 28.206483840942383, "learning_rate": 9.480158730158731e-06, "loss": 34.217, "step": 12425 }, { "epoch": 295.85970149253734, "grad_norm": 20.71973419189453, "learning_rate": 9.47936507936508e-06, "loss": 34.9382, "step": 12426 }, { "epoch": 295.88358208955225, "grad_norm": 22.474464416503906, "learning_rate": 9.478571428571429e-06, "loss": 34.1382, "step": 12427 }, { "epoch": 295.90746268656716, "grad_norm": 22.20485496520996, "learning_rate": 9.47777777777778e-06, "loss": 34.9758, "step": 12428 }, { "epoch": 295.93134328358207, "grad_norm": 19.13903045654297, "learning_rate": 9.476984126984127e-06, "loss": 33.5434, "step": 12429 }, { "epoch": 295.95522388059703, "grad_norm": 22.864238739013672, "learning_rate": 9.476190476190476e-06, "loss": 34.2639, "step": 12430 }, { "epoch": 295.97910447761194, "grad_norm": 17.49142837524414, "learning_rate": 9.475396825396827e-06, "loss": 34.6608, "step": 12431 }, { "epoch": 296.0, "grad_norm": 23.418718338012695, "learning_rate": 9.474603174603174e-06, "loss": 29.8988, "step": 12432 }, { "epoch": 296.0238805970149, "grad_norm": 17.08441925048828, "learning_rate": 9.473809523809525e-06, "loss": 33.9752, "step": 12433 }, { "epoch": 296.0477611940299, "grad_norm": 23.049734115600586, "learning_rate": 9.473015873015874e-06, "loss": 34.2149, "step": 12434 }, { "epoch": 296.0716417910448, "grad_norm": 20.94700813293457, "learning_rate": 9.472222222222223e-06, "loss": 33.7319, "step": 12435 }, { "epoch": 296.0955223880597, "grad_norm": 16.3214054107666, "learning_rate": 9.471428571428572e-06, "loss": 35.0182, "step": 12436 }, { "epoch": 296.1194029850746, "grad_norm": 26.8597469329834, "learning_rate": 9.470634920634922e-06, "loss": 33.7682, "step": 12437 }, { "epoch": 296.14328358208957, "grad_norm": 17.673561096191406, "learning_rate": 9.46984126984127e-06, "loss": 34.1338, "step": 12438 }, { "epoch": 296.1671641791045, "grad_norm": 27.371057510375977, "learning_rate": 9.46904761904762e-06, "loss": 33.285, "step": 12439 }, { "epoch": 296.1910447761194, "grad_norm": 20.176958084106445, "learning_rate": 9.468253968253969e-06, "loss": 35.1346, "step": 12440 }, { "epoch": 296.21492537313435, "grad_norm": 22.260957717895508, "learning_rate": 9.467460317460318e-06, "loss": 34.0694, "step": 12441 }, { "epoch": 296.23880597014926, "grad_norm": 21.492176055908203, "learning_rate": 9.466666666666667e-06, "loss": 35.1014, "step": 12442 }, { "epoch": 296.26268656716417, "grad_norm": 15.671597480773926, "learning_rate": 9.465873015873016e-06, "loss": 34.8614, "step": 12443 }, { "epoch": 296.28656716417913, "grad_norm": 23.840309143066406, "learning_rate": 9.465079365079367e-06, "loss": 34.1475, "step": 12444 }, { "epoch": 296.31044776119404, "grad_norm": 18.10795021057129, "learning_rate": 9.464285714285714e-06, "loss": 34.7024, "step": 12445 }, { "epoch": 296.33432835820895, "grad_norm": 17.174304962158203, "learning_rate": 9.463492063492065e-06, "loss": 33.6018, "step": 12446 }, { "epoch": 296.35820895522386, "grad_norm": 21.586544036865234, "learning_rate": 9.462698412698414e-06, "loss": 33.4138, "step": 12447 }, { "epoch": 296.3820895522388, "grad_norm": 15.472028732299805, "learning_rate": 9.461904761904761e-06, "loss": 34.4597, "step": 12448 }, { "epoch": 296.40597014925373, "grad_norm": 18.487974166870117, "learning_rate": 9.461111111111112e-06, "loss": 33.473, "step": 12449 }, { "epoch": 296.42985074626864, "grad_norm": 16.693988800048828, "learning_rate": 9.460317460317461e-06, "loss": 35.647, "step": 12450 }, { "epoch": 296.4537313432836, "grad_norm": 16.25507926940918, "learning_rate": 9.45952380952381e-06, "loss": 35.8971, "step": 12451 }, { "epoch": 296.4776119402985, "grad_norm": 15.41960334777832, "learning_rate": 9.45873015873016e-06, "loss": 34.6937, "step": 12452 }, { "epoch": 296.5014925373134, "grad_norm": 16.79047393798828, "learning_rate": 9.457936507936509e-06, "loss": 34.3645, "step": 12453 }, { "epoch": 296.52537313432833, "grad_norm": 17.38849639892578, "learning_rate": 9.457142857142858e-06, "loss": 34.0681, "step": 12454 }, { "epoch": 296.5492537313433, "grad_norm": 17.246997833251953, "learning_rate": 9.456349206349207e-06, "loss": 34.1353, "step": 12455 }, { "epoch": 296.5731343283582, "grad_norm": 22.749364852905273, "learning_rate": 9.455555555555557e-06, "loss": 34.0916, "step": 12456 }, { "epoch": 296.5970149253731, "grad_norm": 17.064678192138672, "learning_rate": 9.454761904761905e-06, "loss": 33.6057, "step": 12457 }, { "epoch": 296.6208955223881, "grad_norm": 17.87356185913086, "learning_rate": 9.453968253968254e-06, "loss": 33.3623, "step": 12458 }, { "epoch": 296.644776119403, "grad_norm": 18.962120056152344, "learning_rate": 9.453174603174605e-06, "loss": 33.5192, "step": 12459 }, { "epoch": 296.6686567164179, "grad_norm": 18.000173568725586, "learning_rate": 9.452380952380952e-06, "loss": 35.1714, "step": 12460 }, { "epoch": 296.6925373134328, "grad_norm": 13.997823715209961, "learning_rate": 9.451587301587303e-06, "loss": 33.7567, "step": 12461 }, { "epoch": 296.7164179104478, "grad_norm": 14.310981750488281, "learning_rate": 9.450793650793652e-06, "loss": 34.7802, "step": 12462 }, { "epoch": 296.7402985074627, "grad_norm": 15.097182273864746, "learning_rate": 9.450000000000001e-06, "loss": 33.7409, "step": 12463 }, { "epoch": 296.7641791044776, "grad_norm": 21.005247116088867, "learning_rate": 9.44920634920635e-06, "loss": 34.2042, "step": 12464 }, { "epoch": 296.78805970149256, "grad_norm": 16.77436637878418, "learning_rate": 9.4484126984127e-06, "loss": 34.5884, "step": 12465 }, { "epoch": 296.81194029850747, "grad_norm": 16.423629760742188, "learning_rate": 9.447619047619048e-06, "loss": 35.0852, "step": 12466 }, { "epoch": 296.8358208955224, "grad_norm": 13.942682266235352, "learning_rate": 9.446825396825397e-06, "loss": 34.443, "step": 12467 }, { "epoch": 296.85970149253734, "grad_norm": 16.88393783569336, "learning_rate": 9.446031746031746e-06, "loss": 34.1228, "step": 12468 }, { "epoch": 296.88358208955225, "grad_norm": 13.906193733215332, "learning_rate": 9.445238095238095e-06, "loss": 34.0372, "step": 12469 }, { "epoch": 296.90746268656716, "grad_norm": 19.061094284057617, "learning_rate": 9.444444444444445e-06, "loss": 33.0478, "step": 12470 }, { "epoch": 296.93134328358207, "grad_norm": 15.234111785888672, "learning_rate": 9.443650793650795e-06, "loss": 33.4145, "step": 12471 }, { "epoch": 296.95522388059703, "grad_norm": 17.861047744750977, "learning_rate": 9.442857142857144e-06, "loss": 33.4059, "step": 12472 }, { "epoch": 296.97910447761194, "grad_norm": 16.07079315185547, "learning_rate": 9.442063492063492e-06, "loss": 34.8257, "step": 12473 }, { "epoch": 297.0, "grad_norm": NaN, "learning_rate": 9.441269841269843e-06, "loss": 29.9155, "step": 12474 }, { "epoch": 297.0238805970149, "grad_norm": 20.782690048217773, "learning_rate": 9.441269841269843e-06, "loss": 33.9278, "step": 12475 }, { "epoch": 297.0477611940299, "grad_norm": 15.933671951293945, "learning_rate": 9.440476190476192e-06, "loss": 33.9953, "step": 12476 }, { "epoch": 297.0716417910448, "grad_norm": 18.559167861938477, "learning_rate": 9.43968253968254e-06, "loss": 34.4357, "step": 12477 }, { "epoch": 297.0955223880597, "grad_norm": 23.417346954345703, "learning_rate": 9.43888888888889e-06, "loss": 35.0378, "step": 12478 }, { "epoch": 297.1194029850746, "grad_norm": 14.970905303955078, "learning_rate": 9.438095238095239e-06, "loss": 33.9518, "step": 12479 }, { "epoch": 297.14328358208957, "grad_norm": 18.455663681030273, "learning_rate": 9.437301587301588e-06, "loss": 35.023, "step": 12480 }, { "epoch": 297.1671641791045, "grad_norm": 21.78778839111328, "learning_rate": 9.436507936507937e-06, "loss": 34.0907, "step": 12481 }, { "epoch": 297.1910447761194, "grad_norm": 14.536425590515137, "learning_rate": 9.435714285714286e-06, "loss": 32.558, "step": 12482 }, { "epoch": 297.21492537313435, "grad_norm": 23.07076072692871, "learning_rate": 9.434920634920635e-06, "loss": 34.0558, "step": 12483 }, { "epoch": 297.23880597014926, "grad_norm": 22.406545639038086, "learning_rate": 9.434126984126984e-06, "loss": 34.8833, "step": 12484 }, { "epoch": 297.26268656716417, "grad_norm": 15.541619300842285, "learning_rate": 9.433333333333335e-06, "loss": 34.3949, "step": 12485 }, { "epoch": 297.28656716417913, "grad_norm": 32.484676361083984, "learning_rate": 9.432539682539682e-06, "loss": 34.3982, "step": 12486 }, { "epoch": 297.31044776119404, "grad_norm": 18.681869506835938, "learning_rate": 9.431746031746033e-06, "loss": 33.5947, "step": 12487 }, { "epoch": 297.33432835820895, "grad_norm": 27.216064453125, "learning_rate": 9.430952380952382e-06, "loss": 34.0555, "step": 12488 }, { "epoch": 297.35820895522386, "grad_norm": 20.63875961303711, "learning_rate": 9.43015873015873e-06, "loss": 35.0615, "step": 12489 }, { "epoch": 297.3820895522388, "grad_norm": 30.936704635620117, "learning_rate": 9.42936507936508e-06, "loss": 33.7986, "step": 12490 }, { "epoch": 297.40597014925373, "grad_norm": 19.177112579345703, "learning_rate": 9.42857142857143e-06, "loss": 33.8598, "step": 12491 }, { "epoch": 297.42985074626864, "grad_norm": 29.62114143371582, "learning_rate": 9.427777777777779e-06, "loss": 34.225, "step": 12492 }, { "epoch": 297.4537313432836, "grad_norm": 18.298412322998047, "learning_rate": 9.426984126984128e-06, "loss": 34.0888, "step": 12493 }, { "epoch": 297.4776119402985, "grad_norm": 30.48539161682129, "learning_rate": 9.426190476190477e-06, "loss": 33.1199, "step": 12494 }, { "epoch": 297.5014925373134, "grad_norm": 19.66048812866211, "learning_rate": 9.425396825396826e-06, "loss": 34.8943, "step": 12495 }, { "epoch": 297.52537313432833, "grad_norm": 24.55680274963379, "learning_rate": 9.424603174603175e-06, "loss": 33.9494, "step": 12496 }, { "epoch": 297.5492537313433, "grad_norm": 21.85373878479004, "learning_rate": 9.423809523809526e-06, "loss": 34.5054, "step": 12497 }, { "epoch": 297.5731343283582, "grad_norm": 21.17949104309082, "learning_rate": 9.423015873015873e-06, "loss": 33.2735, "step": 12498 }, { "epoch": 297.5970149253731, "grad_norm": 26.658388137817383, "learning_rate": 9.422222222222222e-06, "loss": 35.2951, "step": 12499 }, { "epoch": 297.6208955223881, "grad_norm": 19.48539924621582, "learning_rate": 9.421428571428573e-06, "loss": 34.0202, "step": 12500 }, { "epoch": 297.644776119403, "grad_norm": 33.12431335449219, "learning_rate": 9.420634920634922e-06, "loss": 34.7648, "step": 12501 }, { "epoch": 297.6686567164179, "grad_norm": 25.690244674682617, "learning_rate": 9.419841269841271e-06, "loss": 32.8813, "step": 12502 }, { "epoch": 297.6925373134328, "grad_norm": 34.491947174072266, "learning_rate": 9.41904761904762e-06, "loss": 34.0445, "step": 12503 }, { "epoch": 297.7164179104478, "grad_norm": 31.598169326782227, "learning_rate": 9.41825396825397e-06, "loss": 35.0873, "step": 12504 }, { "epoch": 297.7402985074627, "grad_norm": 29.2357177734375, "learning_rate": 9.417460317460318e-06, "loss": 33.3787, "step": 12505 }, { "epoch": 297.7641791044776, "grad_norm": 24.363779067993164, "learning_rate": 9.416666666666667e-06, "loss": 35.3853, "step": 12506 }, { "epoch": 297.78805970149256, "grad_norm": 28.963375091552734, "learning_rate": 9.415873015873017e-06, "loss": 34.1984, "step": 12507 }, { "epoch": 297.81194029850747, "grad_norm": 23.006086349487305, "learning_rate": 9.415079365079366e-06, "loss": 34.2327, "step": 12508 }, { "epoch": 297.8358208955224, "grad_norm": 39.04518508911133, "learning_rate": 9.414285714285715e-06, "loss": 32.6477, "step": 12509 }, { "epoch": 297.85970149253734, "grad_norm": 33.81496810913086, "learning_rate": 9.413492063492064e-06, "loss": 33.4368, "step": 12510 }, { "epoch": 297.88358208955225, "grad_norm": 32.090091705322266, "learning_rate": 9.412698412698413e-06, "loss": 34.1932, "step": 12511 }, { "epoch": 297.90746268656716, "grad_norm": 30.99967384338379, "learning_rate": 9.411904761904764e-06, "loss": 34.3895, "step": 12512 }, { "epoch": 297.93134328358207, "grad_norm": 28.09487533569336, "learning_rate": 9.411111111111113e-06, "loss": 34.3309, "step": 12513 }, { "epoch": 297.95522388059703, "grad_norm": 25.880022048950195, "learning_rate": 9.41031746031746e-06, "loss": 34.4944, "step": 12514 }, { "epoch": 297.97910447761194, "grad_norm": 30.924030303955078, "learning_rate": 9.40952380952381e-06, "loss": 34.2899, "step": 12515 }, { "epoch": 298.0, "grad_norm": 24.302059173583984, "learning_rate": 9.40873015873016e-06, "loss": 30.3359, "step": 12516 }, { "epoch": 298.0238805970149, "grad_norm": 29.897274017333984, "learning_rate": 9.407936507936509e-06, "loss": 34.8396, "step": 12517 }, { "epoch": 298.0477611940299, "grad_norm": 26.808429718017578, "learning_rate": 9.407142857142858e-06, "loss": 33.7162, "step": 12518 }, { "epoch": 298.0716417910448, "grad_norm": 29.109149932861328, "learning_rate": 9.406349206349207e-06, "loss": 34.579, "step": 12519 }, { "epoch": 298.0955223880597, "grad_norm": 27.8997745513916, "learning_rate": 9.405555555555556e-06, "loss": 34.2835, "step": 12520 }, { "epoch": 298.1194029850746, "grad_norm": 29.360164642333984, "learning_rate": 9.404761904761905e-06, "loss": 35.318, "step": 12521 }, { "epoch": 298.14328358208957, "grad_norm": 29.223485946655273, "learning_rate": 9.403968253968254e-06, "loss": 33.7443, "step": 12522 }, { "epoch": 298.1671641791045, "grad_norm": 28.036426544189453, "learning_rate": 9.403174603174603e-06, "loss": 34.0103, "step": 12523 }, { "epoch": 298.1910447761194, "grad_norm": 23.568742752075195, "learning_rate": 9.402380952380953e-06, "loss": 33.8043, "step": 12524 }, { "epoch": 298.21492537313435, "grad_norm": 32.28895568847656, "learning_rate": 9.401587301587303e-06, "loss": 33.552, "step": 12525 }, { "epoch": 298.23880597014926, "grad_norm": 28.25148582458496, "learning_rate": 9.40079365079365e-06, "loss": 35.2862, "step": 12526 }, { "epoch": 298.26268656716417, "grad_norm": 31.405595779418945, "learning_rate": 9.4e-06, "loss": 33.9998, "step": 12527 }, { "epoch": 298.28656716417913, "grad_norm": 28.964921951293945, "learning_rate": 9.39920634920635e-06, "loss": 33.4029, "step": 12528 }, { "epoch": 298.31044776119404, "grad_norm": 29.21355628967285, "learning_rate": 9.3984126984127e-06, "loss": 34.1061, "step": 12529 }, { "epoch": 298.33432835820895, "grad_norm": 27.861732482910156, "learning_rate": 9.397619047619049e-06, "loss": 34.6255, "step": 12530 }, { "epoch": 298.35820895522386, "grad_norm": 30.02569580078125, "learning_rate": 9.396825396825398e-06, "loss": 34.3636, "step": 12531 }, { "epoch": 298.3820895522388, "grad_norm": 26.379541397094727, "learning_rate": 9.396031746031747e-06, "loss": 33.8475, "step": 12532 }, { "epoch": 298.40597014925373, "grad_norm": 29.752214431762695, "learning_rate": 9.395238095238096e-06, "loss": 33.6342, "step": 12533 }, { "epoch": 298.42985074626864, "grad_norm": 26.6483211517334, "learning_rate": 9.394444444444445e-06, "loss": 33.9557, "step": 12534 }, { "epoch": 298.4537313432836, "grad_norm": 30.56734275817871, "learning_rate": 9.393650793650794e-06, "loss": 32.5809, "step": 12535 }, { "epoch": 298.4776119402985, "grad_norm": 25.45297622680664, "learning_rate": 9.392857142857143e-06, "loss": 34.1128, "step": 12536 }, { "epoch": 298.5014925373134, "grad_norm": 31.98762321472168, "learning_rate": 9.392063492063492e-06, "loss": 33.5747, "step": 12537 }, { "epoch": 298.52537313432833, "grad_norm": 28.247699737548828, "learning_rate": 9.391269841269843e-06, "loss": 34.8701, "step": 12538 }, { "epoch": 298.5492537313433, "grad_norm": 27.908994674682617, "learning_rate": 9.39047619047619e-06, "loss": 34.1208, "step": 12539 }, { "epoch": 298.5731343283582, "grad_norm": 30.79598045349121, "learning_rate": 9.389682539682541e-06, "loss": 35.3856, "step": 12540 }, { "epoch": 298.5970149253731, "grad_norm": 27.6414737701416, "learning_rate": 9.38888888888889e-06, "loss": 34.36, "step": 12541 }, { "epoch": 298.6208955223881, "grad_norm": 26.428972244262695, "learning_rate": 9.388095238095238e-06, "loss": 33.9819, "step": 12542 }, { "epoch": 298.644776119403, "grad_norm": 33.0452766418457, "learning_rate": 9.387301587301588e-06, "loss": 33.901, "step": 12543 }, { "epoch": 298.6686567164179, "grad_norm": 27.6353816986084, "learning_rate": 9.386507936507938e-06, "loss": 33.7931, "step": 12544 }, { "epoch": 298.6925373134328, "grad_norm": 27.583881378173828, "learning_rate": 9.385714285714287e-06, "loss": 33.8017, "step": 12545 }, { "epoch": 298.7164179104478, "grad_norm": 28.089115142822266, "learning_rate": 9.384920634920636e-06, "loss": 34.1774, "step": 12546 }, { "epoch": 298.7402985074627, "grad_norm": 24.383211135864258, "learning_rate": 9.384126984126985e-06, "loss": 34.4093, "step": 12547 }, { "epoch": 298.7641791044776, "grad_norm": 23.508140563964844, "learning_rate": 9.383333333333334e-06, "loss": 33.464, "step": 12548 }, { "epoch": 298.78805970149256, "grad_norm": 30.98582649230957, "learning_rate": 9.382539682539683e-06, "loss": 34.3285, "step": 12549 }, { "epoch": 298.81194029850747, "grad_norm": 24.830345153808594, "learning_rate": 9.381746031746034e-06, "loss": 34.1048, "step": 12550 }, { "epoch": 298.8358208955224, "grad_norm": 32.89422607421875, "learning_rate": 9.380952380952381e-06, "loss": 34.069, "step": 12551 }, { "epoch": 298.85970149253734, "grad_norm": 27.040531158447266, "learning_rate": 9.38015873015873e-06, "loss": 34.6239, "step": 12552 }, { "epoch": 298.88358208955225, "grad_norm": 30.2327938079834, "learning_rate": 9.379365079365081e-06, "loss": 34.2197, "step": 12553 }, { "epoch": 298.90746268656716, "grad_norm": 27.648653030395508, "learning_rate": 9.378571428571428e-06, "loss": 34.4678, "step": 12554 }, { "epoch": 298.93134328358207, "grad_norm": 28.97263526916504, "learning_rate": 9.377777777777779e-06, "loss": 33.818, "step": 12555 }, { "epoch": 298.95522388059703, "grad_norm": 26.15717315673828, "learning_rate": 9.376984126984128e-06, "loss": 34.3434, "step": 12556 }, { "epoch": 298.97910447761194, "grad_norm": 29.58806800842285, "learning_rate": 9.376190476190477e-06, "loss": 33.3861, "step": 12557 }, { "epoch": 299.0, "grad_norm": 22.08994483947754, "learning_rate": 9.375396825396826e-06, "loss": 29.2248, "step": 12558 }, { "epoch": 299.0238805970149, "grad_norm": 26.545316696166992, "learning_rate": 9.374603174603175e-06, "loss": 34.4542, "step": 12559 }, { "epoch": 299.0477611940299, "grad_norm": 22.460620880126953, "learning_rate": 9.373809523809524e-06, "loss": 33.5026, "step": 12560 }, { "epoch": 299.0716417910448, "grad_norm": 31.822744369506836, "learning_rate": 9.373015873015874e-06, "loss": 35.5345, "step": 12561 }, { "epoch": 299.0955223880597, "grad_norm": 26.98124885559082, "learning_rate": 9.372222222222223e-06, "loss": 34.5696, "step": 12562 }, { "epoch": 299.1194029850746, "grad_norm": 31.908985137939453, "learning_rate": 9.371428571428572e-06, "loss": 34.0203, "step": 12563 }, { "epoch": 299.14328358208957, "grad_norm": 27.260379791259766, "learning_rate": 9.37063492063492e-06, "loss": 34.0293, "step": 12564 }, { "epoch": 299.1671641791045, "grad_norm": 27.416215896606445, "learning_rate": 9.369841269841272e-06, "loss": 34.4288, "step": 12565 }, { "epoch": 299.1910447761194, "grad_norm": 22.469879150390625, "learning_rate": 9.36904761904762e-06, "loss": 34.167, "step": 12566 }, { "epoch": 299.21492537313435, "grad_norm": 29.78142738342285, "learning_rate": 9.368253968253968e-06, "loss": 33.9699, "step": 12567 }, { "epoch": 299.23880597014926, "grad_norm": 26.320222854614258, "learning_rate": 9.367460317460319e-06, "loss": 33.7922, "step": 12568 }, { "epoch": 299.26268656716417, "grad_norm": 31.120466232299805, "learning_rate": 9.366666666666668e-06, "loss": 34.5581, "step": 12569 }, { "epoch": 299.28656716417913, "grad_norm": 28.603199005126953, "learning_rate": 9.365873015873017e-06, "loss": 34.4757, "step": 12570 }, { "epoch": 299.31044776119404, "grad_norm": 25.24419403076172, "learning_rate": 9.365079365079366e-06, "loss": 34.3327, "step": 12571 }, { "epoch": 299.33432835820895, "grad_norm": 22.70826530456543, "learning_rate": 9.364285714285715e-06, "loss": 32.9677, "step": 12572 }, { "epoch": 299.35820895522386, "grad_norm": 27.052648544311523, "learning_rate": 9.363492063492064e-06, "loss": 33.9781, "step": 12573 }, { "epoch": 299.3820895522388, "grad_norm": 21.436756134033203, "learning_rate": 9.362698412698413e-06, "loss": 33.6712, "step": 12574 }, { "epoch": 299.40597014925373, "grad_norm": 32.069488525390625, "learning_rate": 9.361904761904762e-06, "loss": 33.2873, "step": 12575 }, { "epoch": 299.42985074626864, "grad_norm": 25.22380828857422, "learning_rate": 9.361111111111111e-06, "loss": 34.0121, "step": 12576 }, { "epoch": 299.4537313432836, "grad_norm": 27.944772720336914, "learning_rate": 9.36031746031746e-06, "loss": 34.542, "step": 12577 }, { "epoch": 299.4776119402985, "grad_norm": NaN, "learning_rate": 9.359523809523811e-06, "loss": 29.7356, "step": 12578 }, { "epoch": 299.5014925373134, "grad_norm": 27.564472198486328, "learning_rate": 9.359523809523811e-06, "loss": 34.4153, "step": 12579 }, { "epoch": 299.52537313432833, "grad_norm": 29.498014450073242, "learning_rate": 9.358730158730159e-06, "loss": 33.3341, "step": 12580 }, { "epoch": 299.5492537313433, "grad_norm": 23.420347213745117, "learning_rate": 9.35793650793651e-06, "loss": 34.2089, "step": 12581 }, { "epoch": 299.5731343283582, "grad_norm": 31.40555191040039, "learning_rate": 9.357142857142859e-06, "loss": 34.7442, "step": 12582 }, { "epoch": 299.5970149253731, "grad_norm": 27.798049926757812, "learning_rate": 9.356349206349206e-06, "loss": 33.1066, "step": 12583 }, { "epoch": 299.6208955223881, "grad_norm": 30.39460563659668, "learning_rate": 9.355555555555557e-06, "loss": 34.2612, "step": 12584 }, { "epoch": 299.644776119403, "grad_norm": 28.282512664794922, "learning_rate": 9.354761904761906e-06, "loss": 34.7089, "step": 12585 }, { "epoch": 299.6686567164179, "grad_norm": 25.006746292114258, "learning_rate": 9.353968253968255e-06, "loss": 33.8158, "step": 12586 }, { "epoch": 299.6925373134328, "grad_norm": 24.87606430053711, "learning_rate": 9.353174603174604e-06, "loss": 33.8517, "step": 12587 }, { "epoch": 299.7164179104478, "grad_norm": 28.879676818847656, "learning_rate": 9.352380952380953e-06, "loss": 33.885, "step": 12588 }, { "epoch": 299.7402985074627, "grad_norm": 23.082223892211914, "learning_rate": 9.351587301587302e-06, "loss": 35.6958, "step": 12589 }, { "epoch": 299.7641791044776, "grad_norm": 28.886215209960938, "learning_rate": 9.350793650793651e-06, "loss": 33.1954, "step": 12590 }, { "epoch": 299.78805970149256, "grad_norm": 22.496732711791992, "learning_rate": 9.350000000000002e-06, "loss": 34.3587, "step": 12591 }, { "epoch": 299.81194029850747, "grad_norm": 28.374666213989258, "learning_rate": 9.34920634920635e-06, "loss": 34.6024, "step": 12592 }, { "epoch": 299.8358208955224, "grad_norm": 23.658954620361328, "learning_rate": 9.348412698412698e-06, "loss": 33.771, "step": 12593 }, { "epoch": 299.85970149253734, "grad_norm": 29.819311141967773, "learning_rate": 9.34761904761905e-06, "loss": 34.2294, "step": 12594 }, { "epoch": 299.88358208955225, "grad_norm": 23.745912551879883, "learning_rate": 9.346825396825398e-06, "loss": 34.2839, "step": 12595 }, { "epoch": 299.90746268656716, "grad_norm": 31.37458038330078, "learning_rate": 9.346031746031747e-06, "loss": 34.0879, "step": 12596 }, { "epoch": 299.93134328358207, "grad_norm": 26.10491180419922, "learning_rate": 9.345238095238096e-06, "loss": 32.6205, "step": 12597 }, { "epoch": 299.95522388059703, "grad_norm": 26.289323806762695, "learning_rate": 9.344444444444446e-06, "loss": 34.3294, "step": 12598 }, { "epoch": 299.97910447761194, "grad_norm": NaN, "learning_rate": 9.343650793650795e-06, "loss": 42.9851, "step": 12599 }, { "epoch": 300.0, "grad_norm": 21.931875228881836, "learning_rate": 9.343650793650795e-06, "loss": 29.5696, "step": 12600 }, { "epoch": 300.0, "step": 12600, "total_flos": 6.194079361991644e+17, "train_loss": 2.3002554760282004, "train_runtime": 25617.7034, "train_samples_per_second": 62.675, "train_steps_per_second": 0.492 }, { "epoch": 300.0238805970149, "grad_norm": 26.171287536621094, "learning_rate": 1e-05, "loss": 35.2852, "step": 12601 }, { "epoch": 300.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999255952380954e-06, "loss": 39.4791, "step": 12602 }, { "epoch": 300.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999255952380954e-06, "loss": 41.0054, "step": 12603 }, { "epoch": 300.0955223880597, "grad_norm": 366.8433837890625, "learning_rate": 9.999255952380954e-06, "loss": 40.4224, "step": 12604 }, { "epoch": 300.1194029850746, "grad_norm": 206.70486450195312, "learning_rate": 9.998511904761904e-06, "loss": 38.5891, "step": 12605 }, { "epoch": 300.14328358208957, "grad_norm": 90.80491638183594, "learning_rate": 9.997767857142859e-06, "loss": 36.7585, "step": 12606 }, { "epoch": 300.1671641791045, "grad_norm": 69.32449340820312, "learning_rate": 9.99702380952381e-06, "loss": 36.3685, "step": 12607 }, { "epoch": 300.1910447761194, "grad_norm": 60.954158782958984, "learning_rate": 9.996279761904763e-06, "loss": 35.251, "step": 12608 }, { "epoch": 300.21492537313435, "grad_norm": 49.41020202636719, "learning_rate": 9.995535714285715e-06, "loss": 34.9641, "step": 12609 }, { "epoch": 300.23880597014926, "grad_norm": 48.63925552368164, "learning_rate": 9.994791666666668e-06, "loss": 35.2815, "step": 12610 }, { "epoch": 300.26268656716417, "grad_norm": 33.32303237915039, "learning_rate": 9.99404761904762e-06, "loss": 33.8647, "step": 12611 }, { "epoch": 300.28656716417913, "grad_norm": 34.75282669067383, "learning_rate": 9.993303571428572e-06, "loss": 33.7036, "step": 12612 }, { "epoch": 300.31044776119404, "grad_norm": 31.14154815673828, "learning_rate": 9.992559523809524e-06, "loss": 35.0993, "step": 12613 }, { "epoch": 300.33432835820895, "grad_norm": 25.557985305786133, "learning_rate": 9.991815476190477e-06, "loss": 34.5335, "step": 12614 }, { "epoch": 300.35820895522386, "grad_norm": 28.474023818969727, "learning_rate": 9.99107142857143e-06, "loss": 32.82, "step": 12615 }, { "epoch": 300.3820895522388, "grad_norm": 22.004846572875977, "learning_rate": 9.990327380952382e-06, "loss": 34.1407, "step": 12616 }, { "epoch": 300.40597014925373, "grad_norm": 22.144994735717773, "learning_rate": 9.989583333333333e-06, "loss": 33.5889, "step": 12617 }, { "epoch": 300.42985074626864, "grad_norm": 19.678911209106445, "learning_rate": 9.988839285714286e-06, "loss": 33.8089, "step": 12618 }, { "epoch": 300.4537313432836, "grad_norm": 18.216703414916992, "learning_rate": 9.988095238095239e-06, "loss": 34.4819, "step": 12619 }, { "epoch": 300.4776119402985, "grad_norm": 19.384990692138672, "learning_rate": 9.987351190476191e-06, "loss": 33.4617, "step": 12620 }, { "epoch": 300.5014925373134, "grad_norm": 19.665416717529297, "learning_rate": 9.986607142857142e-06, "loss": 33.7936, "step": 12621 }, { "epoch": 300.52537313432833, "grad_norm": 19.364046096801758, "learning_rate": 9.985863095238097e-06, "loss": 33.9958, "step": 12622 }, { "epoch": 300.5492537313433, "grad_norm": 16.984670639038086, "learning_rate": 9.985119047619048e-06, "loss": 34.3715, "step": 12623 }, { "epoch": 300.5731343283582, "grad_norm": 20.104597091674805, "learning_rate": 9.984375e-06, "loss": 34.824, "step": 12624 }, { "epoch": 300.5970149253731, "grad_norm": 19.9632511138916, "learning_rate": 9.983630952380953e-06, "loss": 34.0996, "step": 12625 }, { "epoch": 300.6208955223881, "grad_norm": 19.555803298950195, "learning_rate": 9.982886904761906e-06, "loss": 33.1285, "step": 12626 }, { "epoch": 300.644776119403, "grad_norm": 14.718427658081055, "learning_rate": 9.982142857142858e-06, "loss": 33.6884, "step": 12627 }, { "epoch": 300.6686567164179, "grad_norm": 18.065269470214844, "learning_rate": 9.98139880952381e-06, "loss": 32.808, "step": 12628 }, { "epoch": 300.6925373134328, "grad_norm": 27.190515518188477, "learning_rate": 9.980654761904762e-06, "loss": 33.6793, "step": 12629 }, { "epoch": 300.7164179104478, "grad_norm": 18.258726119995117, "learning_rate": 9.979910714285715e-06, "loss": 34.8021, "step": 12630 }, { "epoch": 300.7402985074627, "grad_norm": 19.380048751831055, "learning_rate": 9.979166666666668e-06, "loss": 34.4981, "step": 12631 }, { "epoch": 300.7641791044776, "grad_norm": 25.334543228149414, "learning_rate": 9.97842261904762e-06, "loss": 34.8994, "step": 12632 }, { "epoch": 300.78805970149256, "grad_norm": 15.441366195678711, "learning_rate": 9.977678571428571e-06, "loss": 33.9724, "step": 12633 }, { "epoch": 300.81194029850747, "grad_norm": 16.614748001098633, "learning_rate": 9.976934523809526e-06, "loss": 34.1694, "step": 12634 }, { "epoch": 300.8358208955224, "grad_norm": 16.37385368347168, "learning_rate": 9.976190476190477e-06, "loss": 33.9853, "step": 12635 }, { "epoch": 300.85970149253734, "grad_norm": 17.56476593017578, "learning_rate": 9.97544642857143e-06, "loss": 35.3559, "step": 12636 }, { "epoch": 300.88358208955225, "grad_norm": 15.733250617980957, "learning_rate": 9.974702380952382e-06, "loss": 33.4581, "step": 12637 }, { "epoch": 300.90746268656716, "grad_norm": 17.142967224121094, "learning_rate": 9.973958333333335e-06, "loss": 34.729, "step": 12638 }, { "epoch": 300.93134328358207, "grad_norm": 23.365276336669922, "learning_rate": 9.973214285714287e-06, "loss": 35.2895, "step": 12639 }, { "epoch": 300.95522388059703, "grad_norm": 16.783174514770508, "learning_rate": 9.972470238095238e-06, "loss": 34.4815, "step": 12640 }, { "epoch": 300.97910447761194, "grad_norm": 13.008834838867188, "learning_rate": 9.971726190476191e-06, "loss": 35.215, "step": 12641 }, { "epoch": 301.0, "grad_norm": 12.587215423583984, "learning_rate": 9.970982142857144e-06, "loss": 30.3054, "step": 12642 }, { "epoch": 301.0238805970149, "grad_norm": 15.930685043334961, "learning_rate": 9.970238095238096e-06, "loss": 33.6951, "step": 12643 }, { "epoch": 301.0477611940299, "grad_norm": 20.298192977905273, "learning_rate": 9.969494047619049e-06, "loss": 34.2815, "step": 12644 }, { "epoch": 301.0716417910448, "grad_norm": 17.51775360107422, "learning_rate": 9.96875e-06, "loss": 35.7121, "step": 12645 }, { "epoch": 301.0955223880597, "grad_norm": 15.353557586669922, "learning_rate": 9.968005952380953e-06, "loss": 33.3068, "step": 12646 }, { "epoch": 301.1194029850746, "grad_norm": 13.602364540100098, "learning_rate": 9.967261904761905e-06, "loss": 34.2159, "step": 12647 }, { "epoch": 301.14328358208957, "grad_norm": 16.48188591003418, "learning_rate": 9.966517857142858e-06, "loss": 33.9076, "step": 12648 }, { "epoch": 301.1671641791045, "grad_norm": 15.48665714263916, "learning_rate": 9.965773809523809e-06, "loss": 34.5498, "step": 12649 }, { "epoch": 301.1910447761194, "grad_norm": 14.90807819366455, "learning_rate": 9.965029761904763e-06, "loss": 33.397, "step": 12650 }, { "epoch": 301.21492537313435, "grad_norm": 15.84902286529541, "learning_rate": 9.964285714285714e-06, "loss": 34.2463, "step": 12651 }, { "epoch": 301.23880597014926, "grad_norm": 16.459049224853516, "learning_rate": 9.963541666666667e-06, "loss": 33.443, "step": 12652 }, { "epoch": 301.26268656716417, "grad_norm": 16.44605827331543, "learning_rate": 9.96279761904762e-06, "loss": 35.0376, "step": 12653 }, { "epoch": 301.28656716417913, "grad_norm": 27.001888275146484, "learning_rate": 9.962053571428573e-06, "loss": 34.9869, "step": 12654 }, { "epoch": 301.31044776119404, "grad_norm": 17.03106117248535, "learning_rate": 9.961309523809525e-06, "loss": 35.5666, "step": 12655 }, { "epoch": 301.33432835820895, "grad_norm": 19.105209350585938, "learning_rate": 9.960565476190476e-06, "loss": 33.8211, "step": 12656 }, { "epoch": 301.35820895522386, "grad_norm": 24.32528305053711, "learning_rate": 9.959821428571429e-06, "loss": 33.7321, "step": 12657 }, { "epoch": 301.3820895522388, "grad_norm": 14.261208534240723, "learning_rate": 9.959077380952382e-06, "loss": 33.4276, "step": 12658 }, { "epoch": 301.40597014925373, "grad_norm": 13.748629570007324, "learning_rate": 9.958333333333334e-06, "loss": 33.5701, "step": 12659 }, { "epoch": 301.42985074626864, "grad_norm": 18.491018295288086, "learning_rate": 9.957589285714287e-06, "loss": 32.7185, "step": 12660 }, { "epoch": 301.4537313432836, "grad_norm": 17.87461280822754, "learning_rate": 9.956845238095238e-06, "loss": 33.5648, "step": 12661 }, { "epoch": 301.4776119402985, "grad_norm": 19.160520553588867, "learning_rate": 9.956101190476192e-06, "loss": 34.6172, "step": 12662 }, { "epoch": 301.5014925373134, "grad_norm": 13.538578987121582, "learning_rate": 9.955357142857143e-06, "loss": 33.9981, "step": 12663 }, { "epoch": 301.52537313432833, "grad_norm": 20.332965850830078, "learning_rate": 9.954613095238096e-06, "loss": 34.503, "step": 12664 }, { "epoch": 301.5492537313433, "grad_norm": 22.29603385925293, "learning_rate": 9.953869047619049e-06, "loss": 33.5233, "step": 12665 }, { "epoch": 301.5731343283582, "grad_norm": 14.441452026367188, "learning_rate": 9.953125000000001e-06, "loss": 33.2648, "step": 12666 }, { "epoch": 301.5970149253731, "grad_norm": 16.77598762512207, "learning_rate": 9.952380952380954e-06, "loss": 33.8784, "step": 12667 }, { "epoch": 301.6208955223881, "grad_norm": NaN, "learning_rate": 9.951636904761905e-06, "loss": 45.9914, "step": 12668 }, { "epoch": 301.644776119403, "grad_norm": 18.28978729248047, "learning_rate": 9.951636904761905e-06, "loss": 34.5955, "step": 12669 }, { "epoch": 301.6686567164179, "grad_norm": 20.85295867919922, "learning_rate": 9.950892857142858e-06, "loss": 33.6025, "step": 12670 }, { "epoch": 301.6925373134328, "grad_norm": 15.833793640136719, "learning_rate": 9.95014880952381e-06, "loss": 34.9255, "step": 12671 }, { "epoch": 301.7164179104478, "grad_norm": 25.031152725219727, "learning_rate": 9.949404761904763e-06, "loss": 35.3748, "step": 12672 }, { "epoch": 301.7402985074627, "grad_norm": 18.836427688598633, "learning_rate": 9.948660714285716e-06, "loss": 32.7802, "step": 12673 }, { "epoch": 301.7641791044776, "grad_norm": 18.327163696289062, "learning_rate": 9.947916666666667e-06, "loss": 33.6246, "step": 12674 }, { "epoch": 301.78805970149256, "grad_norm": 24.15089988708496, "learning_rate": 9.947172619047621e-06, "loss": 34.5693, "step": 12675 }, { "epoch": 301.81194029850747, "grad_norm": 16.35633087158203, "learning_rate": 9.946428571428572e-06, "loss": 34.2978, "step": 12676 }, { "epoch": 301.8358208955224, "grad_norm": 20.777875900268555, "learning_rate": 9.945684523809525e-06, "loss": 34.7239, "step": 12677 }, { "epoch": 301.85970149253734, "grad_norm": 21.883655548095703, "learning_rate": 9.944940476190476e-06, "loss": 33.3973, "step": 12678 }, { "epoch": 301.88358208955225, "grad_norm": 19.19086265563965, "learning_rate": 9.94419642857143e-06, "loss": 33.5899, "step": 12679 }, { "epoch": 301.90746268656716, "grad_norm": 16.247377395629883, "learning_rate": 9.943452380952381e-06, "loss": 34.0119, "step": 12680 }, { "epoch": 301.93134328358207, "grad_norm": 25.994731903076172, "learning_rate": 9.942708333333334e-06, "loss": 34.4688, "step": 12681 }, { "epoch": 301.95522388059703, "grad_norm": 18.13869857788086, "learning_rate": 9.941964285714287e-06, "loss": 34.0545, "step": 12682 }, { "epoch": 301.97910447761194, "grad_norm": 20.08393096923828, "learning_rate": 9.94122023809524e-06, "loss": 33.2465, "step": 12683 }, { "epoch": 302.0, "grad_norm": 19.09702491760254, "learning_rate": 9.940476190476192e-06, "loss": 30.7813, "step": 12684 }, { "epoch": 302.0238805970149, "grad_norm": 19.38040542602539, "learning_rate": 9.939732142857143e-06, "loss": 34.5853, "step": 12685 }, { "epoch": 302.0477611940299, "grad_norm": 17.767541885375977, "learning_rate": 9.938988095238096e-06, "loss": 34.1928, "step": 12686 }, { "epoch": 302.0716417910448, "grad_norm": 26.805606842041016, "learning_rate": 9.938244047619048e-06, "loss": 33.806, "step": 12687 }, { "epoch": 302.0955223880597, "grad_norm": 15.128178596496582, "learning_rate": 9.937500000000001e-06, "loss": 33.5924, "step": 12688 }, { "epoch": 302.1194029850746, "grad_norm": 24.914600372314453, "learning_rate": 9.936755952380954e-06, "loss": 33.5394, "step": 12689 }, { "epoch": 302.14328358208957, "grad_norm": 19.96113395690918, "learning_rate": 9.936011904761905e-06, "loss": 34.0537, "step": 12690 }, { "epoch": 302.1671641791045, "grad_norm": 17.563623428344727, "learning_rate": 9.935267857142859e-06, "loss": 34.6207, "step": 12691 }, { "epoch": 302.1910447761194, "grad_norm": 28.396629333496094, "learning_rate": 9.93452380952381e-06, "loss": 34.8099, "step": 12692 }, { "epoch": 302.21492537313435, "grad_norm": 16.51083755493164, "learning_rate": 9.933779761904763e-06, "loss": 34.0849, "step": 12693 }, { "epoch": 302.23880597014926, "grad_norm": 27.13544273376465, "learning_rate": 9.933035714285715e-06, "loss": 34.1322, "step": 12694 }, { "epoch": 302.26268656716417, "grad_norm": 19.99069595336914, "learning_rate": 9.932291666666668e-06, "loss": 33.3611, "step": 12695 }, { "epoch": 302.28656716417913, "grad_norm": 21.026384353637695, "learning_rate": 9.93154761904762e-06, "loss": 34.6265, "step": 12696 }, { "epoch": 302.31044776119404, "grad_norm": 21.092233657836914, "learning_rate": 9.930803571428572e-06, "loss": 33.2195, "step": 12697 }, { "epoch": 302.33432835820895, "grad_norm": 18.910707473754883, "learning_rate": 9.930059523809524e-06, "loss": 34.1341, "step": 12698 }, { "epoch": 302.35820895522386, "grad_norm": 16.45378875732422, "learning_rate": 9.929315476190477e-06, "loss": 32.7027, "step": 12699 }, { "epoch": 302.3820895522388, "grad_norm": 22.312129974365234, "learning_rate": 9.92857142857143e-06, "loss": 35.3579, "step": 12700 }, { "epoch": 302.40597014925373, "grad_norm": 16.72593879699707, "learning_rate": 9.927827380952383e-06, "loss": 32.2487, "step": 12701 }, { "epoch": 302.42985074626864, "grad_norm": 17.502164840698242, "learning_rate": 9.927083333333334e-06, "loss": 34.8174, "step": 12702 }, { "epoch": 302.4537313432836, "grad_norm": 18.55446434020996, "learning_rate": 9.926339285714288e-06, "loss": 33.5066, "step": 12703 }, { "epoch": 302.4776119402985, "grad_norm": 14.626564979553223, "learning_rate": 9.925595238095239e-06, "loss": 33.6174, "step": 12704 }, { "epoch": 302.5014925373134, "grad_norm": 20.662425994873047, "learning_rate": 9.924851190476192e-06, "loss": 33.3769, "step": 12705 }, { "epoch": 302.52537313432833, "grad_norm": 17.78053092956543, "learning_rate": 9.924107142857143e-06, "loss": 35.3668, "step": 12706 }, { "epoch": 302.5492537313433, "grad_norm": 19.1500186920166, "learning_rate": 9.923363095238097e-06, "loss": 33.919, "step": 12707 }, { "epoch": 302.5731343283582, "grad_norm": 17.39780616760254, "learning_rate": 9.922619047619048e-06, "loss": 33.9475, "step": 12708 }, { "epoch": 302.5970149253731, "grad_norm": 15.932716369628906, "learning_rate": 9.921875e-06, "loss": 33.6818, "step": 12709 }, { "epoch": 302.6208955223881, "grad_norm": 13.43214225769043, "learning_rate": 9.921130952380953e-06, "loss": 35.5086, "step": 12710 }, { "epoch": 302.644776119403, "grad_norm": 18.646276473999023, "learning_rate": 9.920386904761904e-06, "loss": 33.3648, "step": 12711 }, { "epoch": 302.6686567164179, "grad_norm": 17.119834899902344, "learning_rate": 9.919642857142859e-06, "loss": 35.7617, "step": 12712 }, { "epoch": 302.6925373134328, "grad_norm": 17.717750549316406, "learning_rate": 9.91889880952381e-06, "loss": 33.1063, "step": 12713 }, { "epoch": 302.7164179104478, "grad_norm": 17.798465728759766, "learning_rate": 9.918154761904762e-06, "loss": 34.2558, "step": 12714 }, { "epoch": 302.7402985074627, "grad_norm": 15.104628562927246, "learning_rate": 9.917410714285715e-06, "loss": 32.9761, "step": 12715 }, { "epoch": 302.7641791044776, "grad_norm": 15.719858169555664, "learning_rate": 9.916666666666668e-06, "loss": 34.086, "step": 12716 }, { "epoch": 302.78805970149256, "grad_norm": 15.24967098236084, "learning_rate": 9.91592261904762e-06, "loss": 34.8758, "step": 12717 }, { "epoch": 302.81194029850747, "grad_norm": 14.304805755615234, "learning_rate": 9.915178571428571e-06, "loss": 33.4391, "step": 12718 }, { "epoch": 302.8358208955224, "grad_norm": 15.708247184753418, "learning_rate": 9.914434523809524e-06, "loss": 33.8727, "step": 12719 }, { "epoch": 302.85970149253734, "grad_norm": 17.25934410095215, "learning_rate": 9.913690476190477e-06, "loss": 33.8216, "step": 12720 }, { "epoch": 302.88358208955225, "grad_norm": 21.16497039794922, "learning_rate": 9.91294642857143e-06, "loss": 34.6096, "step": 12721 }, { "epoch": 302.90746268656716, "grad_norm": 15.628374099731445, "learning_rate": 9.912202380952382e-06, "loss": 34.5423, "step": 12722 }, { "epoch": 302.93134328358207, "grad_norm": 15.003438949584961, "learning_rate": 9.911458333333333e-06, "loss": 33.9223, "step": 12723 }, { "epoch": 302.95522388059703, "grad_norm": 16.933246612548828, "learning_rate": 9.910714285714288e-06, "loss": 33.9144, "step": 12724 }, { "epoch": 302.97910447761194, "grad_norm": 15.308454513549805, "learning_rate": 9.909970238095238e-06, "loss": 34.005, "step": 12725 }, { "epoch": 303.0, "grad_norm": 15.518234252929688, "learning_rate": 9.909226190476191e-06, "loss": 29.1557, "step": 12726 }, { "epoch": 303.0238805970149, "grad_norm": 14.188685417175293, "learning_rate": 9.908482142857144e-06, "loss": 35.1923, "step": 12727 }, { "epoch": 303.0477611940299, "grad_norm": 19.43871307373047, "learning_rate": 9.907738095238097e-06, "loss": 33.6881, "step": 12728 }, { "epoch": 303.0716417910448, "grad_norm": 14.773733139038086, "learning_rate": 9.90699404761905e-06, "loss": 34.4568, "step": 12729 }, { "epoch": 303.0955223880597, "grad_norm": 18.925222396850586, "learning_rate": 9.90625e-06, "loss": 34.5105, "step": 12730 }, { "epoch": 303.1194029850746, "grad_norm": 15.766783714294434, "learning_rate": 9.905505952380953e-06, "loss": 35.4248, "step": 12731 }, { "epoch": 303.14328358208957, "grad_norm": 16.977514266967773, "learning_rate": 9.904761904761906e-06, "loss": 33.9733, "step": 12732 }, { "epoch": 303.1671641791045, "grad_norm": 17.148256301879883, "learning_rate": 9.904017857142858e-06, "loss": 32.9881, "step": 12733 }, { "epoch": 303.1910447761194, "grad_norm": 16.359018325805664, "learning_rate": 9.90327380952381e-06, "loss": 34.7802, "step": 12734 }, { "epoch": 303.21492537313435, "grad_norm": 17.943899154663086, "learning_rate": 9.902529761904762e-06, "loss": 34.0458, "step": 12735 }, { "epoch": 303.23880597014926, "grad_norm": 19.585962295532227, "learning_rate": 9.901785714285715e-06, "loss": 34.242, "step": 12736 }, { "epoch": 303.26268656716417, "grad_norm": 21.159753799438477, "learning_rate": 9.901041666666667e-06, "loss": 34.6812, "step": 12737 }, { "epoch": 303.28656716417913, "grad_norm": 13.98138427734375, "learning_rate": 9.90029761904762e-06, "loss": 33.2287, "step": 12738 }, { "epoch": 303.31044776119404, "grad_norm": 18.36336326599121, "learning_rate": 9.899553571428571e-06, "loss": 33.3087, "step": 12739 }, { "epoch": 303.33432835820895, "grad_norm": 15.030476570129395, "learning_rate": 9.898809523809525e-06, "loss": 35.2234, "step": 12740 }, { "epoch": 303.35820895522386, "grad_norm": 17.081613540649414, "learning_rate": 9.898065476190476e-06, "loss": 33.2825, "step": 12741 }, { "epoch": 303.3820895522388, "grad_norm": 17.4920654296875, "learning_rate": 9.897321428571429e-06, "loss": 33.3747, "step": 12742 }, { "epoch": 303.40597014925373, "grad_norm": 16.359024047851562, "learning_rate": 9.896577380952382e-06, "loss": 33.502, "step": 12743 }, { "epoch": 303.42985074626864, "grad_norm": 24.50620460510254, "learning_rate": 9.895833333333334e-06, "loss": 33.6481, "step": 12744 }, { "epoch": 303.4537313432836, "grad_norm": 14.11179256439209, "learning_rate": 9.895089285714287e-06, "loss": 33.9526, "step": 12745 }, { "epoch": 303.4776119402985, "grad_norm": 16.194307327270508, "learning_rate": 9.894345238095238e-06, "loss": 33.9687, "step": 12746 }, { "epoch": 303.5014925373134, "grad_norm": 19.30034828186035, "learning_rate": 9.89360119047619e-06, "loss": 33.1953, "step": 12747 }, { "epoch": 303.52537313432833, "grad_norm": 20.41242790222168, "learning_rate": 9.892857142857143e-06, "loss": 35.2336, "step": 12748 }, { "epoch": 303.5492537313433, "grad_norm": 19.893640518188477, "learning_rate": 9.892113095238096e-06, "loss": 33.6545, "step": 12749 }, { "epoch": 303.5731343283582, "grad_norm": 15.920687675476074, "learning_rate": 9.891369047619049e-06, "loss": 33.4267, "step": 12750 }, { "epoch": 303.5970149253731, "grad_norm": 18.475324630737305, "learning_rate": 9.890625e-06, "loss": 33.6388, "step": 12751 }, { "epoch": 303.6208955223881, "grad_norm": 17.941503524780273, "learning_rate": 9.889880952380954e-06, "loss": 33.4139, "step": 12752 }, { "epoch": 303.644776119403, "grad_norm": 19.626501083374023, "learning_rate": 9.889136904761905e-06, "loss": 33.9111, "step": 12753 }, { "epoch": 303.6686567164179, "grad_norm": 16.618045806884766, "learning_rate": 9.888392857142858e-06, "loss": 32.954, "step": 12754 }, { "epoch": 303.6925373134328, "grad_norm": 17.80182456970215, "learning_rate": 9.88764880952381e-06, "loss": 34.252, "step": 12755 }, { "epoch": 303.7164179104478, "grad_norm": 14.318121910095215, "learning_rate": 9.886904761904763e-06, "loss": 34.4709, "step": 12756 }, { "epoch": 303.7402985074627, "grad_norm": 16.709972381591797, "learning_rate": 9.886160714285716e-06, "loss": 33.7177, "step": 12757 }, { "epoch": 303.7641791044776, "grad_norm": 21.392791748046875, "learning_rate": 9.885416666666667e-06, "loss": 32.6111, "step": 12758 }, { "epoch": 303.78805970149256, "grad_norm": 19.380441665649414, "learning_rate": 9.88467261904762e-06, "loss": 34.8239, "step": 12759 }, { "epoch": 303.81194029850747, "grad_norm": 14.385751724243164, "learning_rate": 9.883928571428572e-06, "loss": 33.4982, "step": 12760 }, { "epoch": 303.8358208955224, "grad_norm": 16.864112854003906, "learning_rate": 9.883184523809525e-06, "loss": 34.4644, "step": 12761 }, { "epoch": 303.85970149253734, "grad_norm": 19.41377830505371, "learning_rate": 9.882440476190478e-06, "loss": 32.4874, "step": 12762 }, { "epoch": 303.88358208955225, "grad_norm": 18.528348922729492, "learning_rate": 9.881696428571429e-06, "loss": 34.8511, "step": 12763 }, { "epoch": 303.90746268656716, "grad_norm": 15.660223960876465, "learning_rate": 9.880952380952381e-06, "loss": 34.883, "step": 12764 }, { "epoch": 303.93134328358207, "grad_norm": 12.949760437011719, "learning_rate": 9.880208333333334e-06, "loss": 33.6117, "step": 12765 }, { "epoch": 303.95522388059703, "grad_norm": 17.378768920898438, "learning_rate": 9.879464285714287e-06, "loss": 34.1157, "step": 12766 }, { "epoch": 303.97910447761194, "grad_norm": 15.662836074829102, "learning_rate": 9.878720238095238e-06, "loss": 34.0723, "step": 12767 }, { "epoch": 304.0, "grad_norm": 19.132266998291016, "learning_rate": 9.877976190476192e-06, "loss": 30.1438, "step": 12768 }, { "epoch": 304.0238805970149, "grad_norm": 14.296299934387207, "learning_rate": 9.877232142857143e-06, "loss": 33.8153, "step": 12769 }, { "epoch": 304.0477611940299, "grad_norm": 17.48133087158203, "learning_rate": 9.876488095238096e-06, "loss": 34.5658, "step": 12770 }, { "epoch": 304.0716417910448, "grad_norm": 18.91547203063965, "learning_rate": 9.875744047619048e-06, "loss": 33.7174, "step": 12771 }, { "epoch": 304.0955223880597, "grad_norm": 19.87838363647461, "learning_rate": 9.875000000000001e-06, "loss": 33.9482, "step": 12772 }, { "epoch": 304.1194029850746, "grad_norm": 16.255050659179688, "learning_rate": 9.874255952380954e-06, "loss": 33.1052, "step": 12773 }, { "epoch": 304.14328358208957, "grad_norm": 17.426010131835938, "learning_rate": 9.873511904761905e-06, "loss": 34.3741, "step": 12774 }, { "epoch": 304.1671641791045, "grad_norm": 13.147067070007324, "learning_rate": 9.872767857142858e-06, "loss": 33.9996, "step": 12775 }, { "epoch": 304.1910447761194, "grad_norm": 25.692888259887695, "learning_rate": 9.87202380952381e-06, "loss": 34.756, "step": 12776 }, { "epoch": 304.21492537313435, "grad_norm": 18.753189086914062, "learning_rate": 9.871279761904763e-06, "loss": 32.5578, "step": 12777 }, { "epoch": 304.23880597014926, "grad_norm": 18.00774383544922, "learning_rate": 9.870535714285716e-06, "loss": 33.9436, "step": 12778 }, { "epoch": 304.26268656716417, "grad_norm": 17.329633712768555, "learning_rate": 9.869791666666667e-06, "loss": 34.6306, "step": 12779 }, { "epoch": 304.28656716417913, "grad_norm": 19.246505737304688, "learning_rate": 9.869047619047621e-06, "loss": 34.655, "step": 12780 }, { "epoch": 304.31044776119404, "grad_norm": 19.427467346191406, "learning_rate": 9.868303571428572e-06, "loss": 32.5883, "step": 12781 }, { "epoch": 304.33432835820895, "grad_norm": 20.187152862548828, "learning_rate": 9.867559523809525e-06, "loss": 33.6937, "step": 12782 }, { "epoch": 304.35820895522386, "grad_norm": 15.276482582092285, "learning_rate": 9.866815476190477e-06, "loss": 35.0563, "step": 12783 }, { "epoch": 304.3820895522388, "grad_norm": 14.907896041870117, "learning_rate": 9.86607142857143e-06, "loss": 34.5241, "step": 12784 }, { "epoch": 304.40597014925373, "grad_norm": 18.98166847229004, "learning_rate": 9.865327380952383e-06, "loss": 34.6579, "step": 12785 }, { "epoch": 304.42985074626864, "grad_norm": 19.271556854248047, "learning_rate": 9.864583333333334e-06, "loss": 33.7333, "step": 12786 }, { "epoch": 304.4537313432836, "grad_norm": 13.590328216552734, "learning_rate": 9.863839285714286e-06, "loss": 34.1618, "step": 12787 }, { "epoch": 304.4776119402985, "grad_norm": 14.266732215881348, "learning_rate": 9.863095238095239e-06, "loss": 33.2994, "step": 12788 }, { "epoch": 304.5014925373134, "grad_norm": 15.15308666229248, "learning_rate": 9.862351190476192e-06, "loss": 34.8215, "step": 12789 }, { "epoch": 304.52537313432833, "grad_norm": 18.415672302246094, "learning_rate": 9.861607142857144e-06, "loss": 33.2897, "step": 12790 }, { "epoch": 304.5492537313433, "grad_norm": 17.03144645690918, "learning_rate": 9.860863095238095e-06, "loss": 34.4784, "step": 12791 }, { "epoch": 304.5731343283582, "grad_norm": 17.239591598510742, "learning_rate": 9.860119047619048e-06, "loss": 33.5741, "step": 12792 }, { "epoch": 304.5970149253731, "grad_norm": 17.369388580322266, "learning_rate": 9.859375e-06, "loss": 33.7227, "step": 12793 }, { "epoch": 304.6208955223881, "grad_norm": 14.208517074584961, "learning_rate": 9.858630952380953e-06, "loss": 34.1275, "step": 12794 }, { "epoch": 304.644776119403, "grad_norm": 22.755041122436523, "learning_rate": 9.857886904761904e-06, "loss": 34.2113, "step": 12795 }, { "epoch": 304.6686567164179, "grad_norm": 17.733745574951172, "learning_rate": 9.857142857142859e-06, "loss": 34.4321, "step": 12796 }, { "epoch": 304.6925373134328, "grad_norm": 17.67374610900879, "learning_rate": 9.85639880952381e-06, "loss": 34.5426, "step": 12797 }, { "epoch": 304.7164179104478, "grad_norm": 14.477204322814941, "learning_rate": 9.855654761904763e-06, "loss": 34.2791, "step": 12798 }, { "epoch": 304.7402985074627, "grad_norm": 16.99796485900879, "learning_rate": 9.854910714285715e-06, "loss": 34.0483, "step": 12799 }, { "epoch": 304.7641791044776, "grad_norm": 13.359634399414062, "learning_rate": 9.854166666666668e-06, "loss": 33.8718, "step": 12800 }, { "epoch": 304.78805970149256, "grad_norm": 14.59427547454834, "learning_rate": 9.85342261904762e-06, "loss": 33.2423, "step": 12801 }, { "epoch": 304.81194029850747, "grad_norm": 19.618736267089844, "learning_rate": 9.852678571428572e-06, "loss": 33.9496, "step": 12802 }, { "epoch": 304.8358208955224, "grad_norm": 23.237140655517578, "learning_rate": 9.851934523809524e-06, "loss": 33.6882, "step": 12803 }, { "epoch": 304.85970149253734, "grad_norm": 15.610248565673828, "learning_rate": 9.851190476190477e-06, "loss": 34.0138, "step": 12804 }, { "epoch": 304.88358208955225, "grad_norm": 15.84520149230957, "learning_rate": 9.85044642857143e-06, "loss": 34.5489, "step": 12805 }, { "epoch": 304.90746268656716, "grad_norm": 22.97906494140625, "learning_rate": 9.849702380952382e-06, "loss": 32.1467, "step": 12806 }, { "epoch": 304.93134328358207, "grad_norm": 19.967920303344727, "learning_rate": 9.848958333333333e-06, "loss": 32.5344, "step": 12807 }, { "epoch": 304.95522388059703, "grad_norm": 15.402724266052246, "learning_rate": 9.848214285714288e-06, "loss": 33.953, "step": 12808 }, { "epoch": 304.97910447761194, "grad_norm": 16.7278995513916, "learning_rate": 9.847470238095239e-06, "loss": 35.0912, "step": 12809 }, { "epoch": 305.0, "grad_norm": 12.981561660766602, "learning_rate": 9.846726190476191e-06, "loss": 29.2283, "step": 12810 }, { "epoch": 305.0238805970149, "grad_norm": 19.851594924926758, "learning_rate": 9.845982142857144e-06, "loss": 33.1445, "step": 12811 }, { "epoch": 305.0477611940299, "grad_norm": 19.49011993408203, "learning_rate": 9.845238095238097e-06, "loss": 34.982, "step": 12812 }, { "epoch": 305.0716417910448, "grad_norm": 25.895421981811523, "learning_rate": 9.84449404761905e-06, "loss": 32.9737, "step": 12813 }, { "epoch": 305.0955223880597, "grad_norm": 17.449796676635742, "learning_rate": 9.84375e-06, "loss": 33.482, "step": 12814 }, { "epoch": 305.1194029850746, "grad_norm": 24.824426651000977, "learning_rate": 9.843005952380953e-06, "loss": 33.8339, "step": 12815 }, { "epoch": 305.14328358208957, "grad_norm": 24.20445442199707, "learning_rate": 9.842261904761906e-06, "loss": 32.8542, "step": 12816 }, { "epoch": 305.1671641791045, "grad_norm": 18.673444747924805, "learning_rate": 9.841517857142858e-06, "loss": 34.1516, "step": 12817 }, { "epoch": 305.1910447761194, "grad_norm": 31.55808448791504, "learning_rate": 9.840773809523811e-06, "loss": 34.3214, "step": 12818 }, { "epoch": 305.21492537313435, "grad_norm": 20.21152114868164, "learning_rate": 9.840029761904762e-06, "loss": 33.559, "step": 12819 }, { "epoch": 305.23880597014926, "grad_norm": 39.6590576171875, "learning_rate": 9.839285714285715e-06, "loss": 34.5408, "step": 12820 }, { "epoch": 305.26268656716417, "grad_norm": 29.15034294128418, "learning_rate": 9.838541666666668e-06, "loss": 33.7678, "step": 12821 }, { "epoch": 305.28656716417913, "grad_norm": 39.511295318603516, "learning_rate": 9.83779761904762e-06, "loss": 34.1371, "step": 12822 }, { "epoch": 305.31044776119404, "grad_norm": 28.5534725189209, "learning_rate": 9.837053571428571e-06, "loss": 34.1853, "step": 12823 }, { "epoch": 305.33432835820895, "grad_norm": 42.570369720458984, "learning_rate": 9.836309523809524e-06, "loss": 34.2188, "step": 12824 }, { "epoch": 305.35820895522386, "grad_norm": 37.27431869506836, "learning_rate": 9.835565476190477e-06, "loss": 34.4188, "step": 12825 }, { "epoch": 305.3820895522388, "grad_norm": 31.43735694885254, "learning_rate": 9.83482142857143e-06, "loss": 33.9932, "step": 12826 }, { "epoch": 305.40597014925373, "grad_norm": 30.48042106628418, "learning_rate": 9.834077380952382e-06, "loss": 32.4454, "step": 12827 }, { "epoch": 305.42985074626864, "grad_norm": 28.137269973754883, "learning_rate": 9.833333333333333e-06, "loss": 34.293, "step": 12828 }, { "epoch": 305.4537313432836, "grad_norm": 24.35393524169922, "learning_rate": 9.832589285714287e-06, "loss": 34.0471, "step": 12829 }, { "epoch": 305.4776119402985, "grad_norm": 39.596553802490234, "learning_rate": 9.831845238095238e-06, "loss": 33.3478, "step": 12830 }, { "epoch": 305.5014925373134, "grad_norm": 33.761383056640625, "learning_rate": 9.831101190476191e-06, "loss": 34.2828, "step": 12831 }, { "epoch": 305.52537313432833, "grad_norm": 33.55476760864258, "learning_rate": 9.830357142857144e-06, "loss": 34.1581, "step": 12832 }, { "epoch": 305.5492537313433, "grad_norm": 30.091413497924805, "learning_rate": 9.829613095238096e-06, "loss": 35.2272, "step": 12833 }, { "epoch": 305.5731343283582, "grad_norm": 30.065752029418945, "learning_rate": 9.828869047619049e-06, "loss": 33.5706, "step": 12834 }, { "epoch": 305.5970149253731, "grad_norm": 28.13954734802246, "learning_rate": 9.828125e-06, "loss": 34.4559, "step": 12835 }, { "epoch": 305.6208955223881, "grad_norm": 33.12614822387695, "learning_rate": 9.827380952380953e-06, "loss": 33.4614, "step": 12836 }, { "epoch": 305.644776119403, "grad_norm": 29.598318099975586, "learning_rate": 9.826636904761905e-06, "loss": 34.0791, "step": 12837 }, { "epoch": 305.6686567164179, "grad_norm": 35.32413101196289, "learning_rate": 9.825892857142858e-06, "loss": 34.2338, "step": 12838 }, { "epoch": 305.6925373134328, "grad_norm": 33.113548278808594, "learning_rate": 9.82514880952381e-06, "loss": 34.2677, "step": 12839 }, { "epoch": 305.7164179104478, "grad_norm": 27.92148780822754, "learning_rate": 9.824404761904762e-06, "loss": 33.1426, "step": 12840 }, { "epoch": 305.7402985074627, "grad_norm": 26.550376892089844, "learning_rate": 9.823660714285716e-06, "loss": 33.0227, "step": 12841 }, { "epoch": 305.7641791044776, "grad_norm": 29.040504455566406, "learning_rate": 9.822916666666667e-06, "loss": 34.9542, "step": 12842 }, { "epoch": 305.78805970149256, "grad_norm": 25.413545608520508, "learning_rate": 9.82217261904762e-06, "loss": 33.7384, "step": 12843 }, { "epoch": 305.81194029850747, "grad_norm": 36.03944778442383, "learning_rate": 9.821428571428573e-06, "loss": 32.7667, "step": 12844 }, { "epoch": 305.8358208955224, "grad_norm": 31.59793472290039, "learning_rate": 9.820684523809525e-06, "loss": 34.1801, "step": 12845 }, { "epoch": 305.85970149253734, "grad_norm": 32.00392532348633, "learning_rate": 9.819940476190478e-06, "loss": 34.7444, "step": 12846 }, { "epoch": 305.88358208955225, "grad_norm": 29.623685836791992, "learning_rate": 9.819196428571429e-06, "loss": 33.2053, "step": 12847 }, { "epoch": 305.90746268656716, "grad_norm": 26.65155601501465, "learning_rate": 9.818452380952382e-06, "loss": 33.0502, "step": 12848 }, { "epoch": 305.93134328358207, "grad_norm": 26.55497932434082, "learning_rate": 9.817708333333334e-06, "loss": 34.7496, "step": 12849 }, { "epoch": 305.95522388059703, "grad_norm": 34.47642135620117, "learning_rate": 9.816964285714287e-06, "loss": 34.3054, "step": 12850 }, { "epoch": 305.97910447761194, "grad_norm": 26.32135772705078, "learning_rate": 9.816220238095238e-06, "loss": 34.1304, "step": 12851 }, { "epoch": 306.0, "grad_norm": 29.453325271606445, "learning_rate": 9.81547619047619e-06, "loss": 29.8504, "step": 12852 }, { "epoch": 306.0238805970149, "grad_norm": 31.310495376586914, "learning_rate": 9.814732142857143e-06, "loss": 34.8228, "step": 12853 }, { "epoch": 306.0477611940299, "grad_norm": 29.23021697998047, "learning_rate": 9.813988095238096e-06, "loss": 34.135, "step": 12854 }, { "epoch": 306.0716417910448, "grad_norm": 28.847148895263672, "learning_rate": 9.813244047619049e-06, "loss": 34.2013, "step": 12855 }, { "epoch": 306.0955223880597, "grad_norm": 32.356266021728516, "learning_rate": 9.8125e-06, "loss": 35.2559, "step": 12856 }, { "epoch": 306.1194029850746, "grad_norm": 27.931236267089844, "learning_rate": 9.811755952380954e-06, "loss": 33.5151, "step": 12857 }, { "epoch": 306.14328358208957, "grad_norm": 28.33109474182129, "learning_rate": 9.811011904761905e-06, "loss": 33.693, "step": 12858 }, { "epoch": 306.1671641791045, "grad_norm": 30.227746963500977, "learning_rate": 9.810267857142858e-06, "loss": 34.2395, "step": 12859 }, { "epoch": 306.1910447761194, "grad_norm": 30.9976749420166, "learning_rate": 9.80952380952381e-06, "loss": 35.1999, "step": 12860 }, { "epoch": 306.21492537313435, "grad_norm": 25.662109375, "learning_rate": 9.808779761904763e-06, "loss": 34.5391, "step": 12861 }, { "epoch": 306.23880597014926, "grad_norm": 31.476734161376953, "learning_rate": 9.808035714285716e-06, "loss": 34.0768, "step": 12862 }, { "epoch": 306.26268656716417, "grad_norm": 27.349348068237305, "learning_rate": 9.807291666666667e-06, "loss": 34.3984, "step": 12863 }, { "epoch": 306.28656716417913, "grad_norm": 32.680023193359375, "learning_rate": 9.80654761904762e-06, "loss": 34.6098, "step": 12864 }, { "epoch": 306.31044776119404, "grad_norm": 28.24233627319336, "learning_rate": 9.805803571428572e-06, "loss": 34.8734, "step": 12865 }, { "epoch": 306.33432835820895, "grad_norm": 29.77298927307129, "learning_rate": 9.805059523809525e-06, "loss": 32.5477, "step": 12866 }, { "epoch": 306.35820895522386, "grad_norm": 29.427587509155273, "learning_rate": 9.804315476190477e-06, "loss": 33.5228, "step": 12867 }, { "epoch": 306.3820895522388, "grad_norm": 30.593961715698242, "learning_rate": 9.803571428571428e-06, "loss": 34.325, "step": 12868 }, { "epoch": 306.40597014925373, "grad_norm": 28.113536834716797, "learning_rate": 9.802827380952383e-06, "loss": 32.7251, "step": 12869 }, { "epoch": 306.42985074626864, "grad_norm": 30.79559326171875, "learning_rate": 9.802083333333334e-06, "loss": 33.5964, "step": 12870 }, { "epoch": 306.4537313432836, "grad_norm": 29.22075653076172, "learning_rate": 9.801339285714287e-06, "loss": 32.6929, "step": 12871 }, { "epoch": 306.4776119402985, "grad_norm": 29.520912170410156, "learning_rate": 9.80059523809524e-06, "loss": 33.0633, "step": 12872 }, { "epoch": 306.5014925373134, "grad_norm": 27.607057571411133, "learning_rate": 9.799851190476192e-06, "loss": 35.2869, "step": 12873 }, { "epoch": 306.52537313432833, "grad_norm": 31.058963775634766, "learning_rate": 9.799107142857145e-06, "loss": 33.4048, "step": 12874 }, { "epoch": 306.5492537313433, "grad_norm": 26.349206924438477, "learning_rate": 9.798363095238096e-06, "loss": 33.7153, "step": 12875 }, { "epoch": 306.5731343283582, "grad_norm": 29.270294189453125, "learning_rate": 9.797619047619048e-06, "loss": 34.4316, "step": 12876 }, { "epoch": 306.5970149253731, "grad_norm": 25.742521286010742, "learning_rate": 9.796875000000001e-06, "loss": 33.7534, "step": 12877 }, { "epoch": 306.6208955223881, "grad_norm": 32.86083221435547, "learning_rate": 9.796130952380954e-06, "loss": 33.7219, "step": 12878 }, { "epoch": 306.644776119403, "grad_norm": 30.2369384765625, "learning_rate": 9.795386904761905e-06, "loss": 32.5957, "step": 12879 }, { "epoch": 306.6686567164179, "grad_norm": 30.621706008911133, "learning_rate": 9.794642857142857e-06, "loss": 33.9441, "step": 12880 }, { "epoch": 306.6925373134328, "grad_norm": 26.510602951049805, "learning_rate": 9.79389880952381e-06, "loss": 33.5628, "step": 12881 }, { "epoch": 306.7164179104478, "grad_norm": 28.74705696105957, "learning_rate": 9.793154761904763e-06, "loss": 33.8791, "step": 12882 }, { "epoch": 306.7402985074627, "grad_norm": 26.187252044677734, "learning_rate": 9.792410714285715e-06, "loss": 33.6656, "step": 12883 }, { "epoch": 306.7641791044776, "grad_norm": 30.24416160583496, "learning_rate": 9.791666666666666e-06, "loss": 33.413, "step": 12884 }, { "epoch": 306.78805970149256, "grad_norm": 27.702054977416992, "learning_rate": 9.79092261904762e-06, "loss": 33.824, "step": 12885 }, { "epoch": 306.81194029850747, "grad_norm": 29.828664779663086, "learning_rate": 9.790178571428572e-06, "loss": 33.0718, "step": 12886 }, { "epoch": 306.8358208955224, "grad_norm": 29.822650909423828, "learning_rate": 9.789434523809524e-06, "loss": 33.8282, "step": 12887 }, { "epoch": 306.85970149253734, "grad_norm": 31.442548751831055, "learning_rate": 9.788690476190477e-06, "loss": 33.5222, "step": 12888 }, { "epoch": 306.88358208955225, "grad_norm": 28.49906349182129, "learning_rate": 9.78794642857143e-06, "loss": 32.341, "step": 12889 }, { "epoch": 306.90746268656716, "grad_norm": 31.756093978881836, "learning_rate": 9.787202380952382e-06, "loss": 33.9306, "step": 12890 }, { "epoch": 306.93134328358207, "grad_norm": 28.60268211364746, "learning_rate": 9.786458333333333e-06, "loss": 34.6159, "step": 12891 }, { "epoch": 306.95522388059703, "grad_norm": 27.625715255737305, "learning_rate": 9.785714285714286e-06, "loss": 33.7364, "step": 12892 }, { "epoch": 306.97910447761194, "grad_norm": 27.08561897277832, "learning_rate": 9.784970238095239e-06, "loss": 34.2786, "step": 12893 }, { "epoch": 307.0, "grad_norm": 26.741044998168945, "learning_rate": 9.784226190476192e-06, "loss": 29.7322, "step": 12894 }, { "epoch": 307.0238805970149, "grad_norm": 29.69178581237793, "learning_rate": 9.783482142857144e-06, "loss": 33.1324, "step": 12895 }, { "epoch": 307.0477611940299, "grad_norm": 30.632442474365234, "learning_rate": 9.782738095238095e-06, "loss": 32.5834, "step": 12896 }, { "epoch": 307.0716417910448, "grad_norm": 28.92683982849121, "learning_rate": 9.78199404761905e-06, "loss": 34.3942, "step": 12897 }, { "epoch": 307.0955223880597, "grad_norm": 27.258102416992188, "learning_rate": 9.78125e-06, "loss": 34.2923, "step": 12898 }, { "epoch": 307.1194029850746, "grad_norm": 22.587934494018555, "learning_rate": 9.780505952380953e-06, "loss": 34.0427, "step": 12899 }, { "epoch": 307.14328358208957, "grad_norm": 29.33913803100586, "learning_rate": 9.779761904761906e-06, "loss": 34.9919, "step": 12900 }, { "epoch": 307.1671641791045, "grad_norm": 25.18365478515625, "learning_rate": 9.779017857142859e-06, "loss": 34.6795, "step": 12901 }, { "epoch": 307.1910447761194, "grad_norm": 31.733814239501953, "learning_rate": 9.778273809523811e-06, "loss": 33.973, "step": 12902 }, { "epoch": 307.21492537313435, "grad_norm": 28.06524085998535, "learning_rate": 9.777529761904762e-06, "loss": 34.0458, "step": 12903 }, { "epoch": 307.23880597014926, "grad_norm": 28.391054153442383, "learning_rate": 9.776785714285715e-06, "loss": 33.2156, "step": 12904 }, { "epoch": 307.26268656716417, "grad_norm": 26.460420608520508, "learning_rate": 9.776041666666668e-06, "loss": 33.1812, "step": 12905 }, { "epoch": 307.28656716417913, "grad_norm": 24.927038192749023, "learning_rate": 9.77529761904762e-06, "loss": 33.4322, "step": 12906 }, { "epoch": 307.31044776119404, "grad_norm": 24.9912052154541, "learning_rate": 9.774553571428571e-06, "loss": 34.2864, "step": 12907 }, { "epoch": 307.33432835820895, "grad_norm": 27.373680114746094, "learning_rate": 9.773809523809524e-06, "loss": 34.2386, "step": 12908 }, { "epoch": 307.35820895522386, "grad_norm": 21.200687408447266, "learning_rate": 9.773065476190477e-06, "loss": 35.2577, "step": 12909 }, { "epoch": 307.3820895522388, "grad_norm": 26.07646369934082, "learning_rate": 9.77232142857143e-06, "loss": 33.0107, "step": 12910 }, { "epoch": 307.40597014925373, "grad_norm": 18.282670974731445, "learning_rate": 9.771577380952382e-06, "loss": 32.8028, "step": 12911 }, { "epoch": 307.42985074626864, "grad_norm": 27.18486213684082, "learning_rate": 9.770833333333333e-06, "loss": 32.7475, "step": 12912 }, { "epoch": 307.4537313432836, "grad_norm": 24.014129638671875, "learning_rate": 9.770089285714287e-06, "loss": 34.0267, "step": 12913 }, { "epoch": 307.4776119402985, "grad_norm": 25.076383590698242, "learning_rate": 9.769345238095238e-06, "loss": 33.6766, "step": 12914 }, { "epoch": 307.5014925373134, "grad_norm": 23.164873123168945, "learning_rate": 9.768601190476191e-06, "loss": 34.2511, "step": 12915 }, { "epoch": 307.52537313432833, "grad_norm": 24.044761657714844, "learning_rate": 9.767857142857144e-06, "loss": 34.5234, "step": 12916 }, { "epoch": 307.5492537313433, "grad_norm": 21.319169998168945, "learning_rate": 9.767113095238097e-06, "loss": 33.1081, "step": 12917 }, { "epoch": 307.5731343283582, "grad_norm": 20.085777282714844, "learning_rate": 9.76636904761905e-06, "loss": 33.9557, "step": 12918 }, { "epoch": 307.5970149253731, "grad_norm": 18.691049575805664, "learning_rate": 9.765625e-06, "loss": 33.961, "step": 12919 }, { "epoch": 307.6208955223881, "grad_norm": 22.317243576049805, "learning_rate": 9.764880952380953e-06, "loss": 33.8924, "step": 12920 }, { "epoch": 307.644776119403, "grad_norm": 17.514055252075195, "learning_rate": 9.764136904761906e-06, "loss": 33.7417, "step": 12921 }, { "epoch": 307.6686567164179, "grad_norm": 23.589597702026367, "learning_rate": 9.763392857142858e-06, "loss": 33.7327, "step": 12922 }, { "epoch": 307.6925373134328, "grad_norm": 20.747957229614258, "learning_rate": 9.762648809523811e-06, "loss": 34.4243, "step": 12923 }, { "epoch": 307.7164179104478, "grad_norm": 20.31751251220703, "learning_rate": 9.761904761904762e-06, "loss": 34.9525, "step": 12924 }, { "epoch": 307.7402985074627, "grad_norm": 22.384435653686523, "learning_rate": 9.761160714285715e-06, "loss": 33.6509, "step": 12925 }, { "epoch": 307.7641791044776, "grad_norm": 19.30194664001465, "learning_rate": 9.760416666666667e-06, "loss": 34.2968, "step": 12926 }, { "epoch": 307.78805970149256, "grad_norm": 21.226215362548828, "learning_rate": 9.75967261904762e-06, "loss": 34.3109, "step": 12927 }, { "epoch": 307.81194029850747, "grad_norm": 20.73770523071289, "learning_rate": 9.758928571428573e-06, "loss": 32.4683, "step": 12928 }, { "epoch": 307.8358208955224, "grad_norm": NaN, "learning_rate": 9.758184523809524e-06, "loss": 43.1886, "step": 12929 }, { "epoch": 307.85970149253734, "grad_norm": 20.99942398071289, "learning_rate": 9.758184523809524e-06, "loss": 34.5113, "step": 12930 }, { "epoch": 307.88358208955225, "grad_norm": 16.915035247802734, "learning_rate": 9.757440476190478e-06, "loss": 33.5386, "step": 12931 }, { "epoch": 307.90746268656716, "grad_norm": 22.421545028686523, "learning_rate": 9.756696428571429e-06, "loss": 33.0041, "step": 12932 }, { "epoch": 307.93134328358207, "grad_norm": 18.106794357299805, "learning_rate": 9.755952380952382e-06, "loss": 33.063, "step": 12933 }, { "epoch": 307.95522388059703, "grad_norm": 19.077604293823242, "learning_rate": 9.755208333333334e-06, "loss": 33.5393, "step": 12934 }, { "epoch": 307.97910447761194, "grad_norm": 20.95818328857422, "learning_rate": 9.754464285714287e-06, "loss": 34.7901, "step": 12935 }, { "epoch": 308.0, "grad_norm": 17.7838191986084, "learning_rate": 9.753720238095238e-06, "loss": 28.7349, "step": 12936 }, { "epoch": 308.0238805970149, "grad_norm": 16.09705924987793, "learning_rate": 9.75297619047619e-06, "loss": 34.0701, "step": 12937 }, { "epoch": 308.0477611940299, "grad_norm": 23.41761589050293, "learning_rate": 9.752232142857143e-06, "loss": 34.0079, "step": 12938 }, { "epoch": 308.0716417910448, "grad_norm": 17.222984313964844, "learning_rate": 9.751488095238096e-06, "loss": 32.2955, "step": 12939 }, { "epoch": 308.0955223880597, "grad_norm": 19.617464065551758, "learning_rate": 9.750744047619049e-06, "loss": 33.196, "step": 12940 }, { "epoch": 308.1194029850746, "grad_norm": 17.333797454833984, "learning_rate": 9.75e-06, "loss": 33.097, "step": 12941 }, { "epoch": 308.14328358208957, "grad_norm": 20.236370086669922, "learning_rate": 9.749255952380953e-06, "loss": 34.5801, "step": 12942 }, { "epoch": 308.1671641791045, "grad_norm": 15.978302955627441, "learning_rate": 9.748511904761905e-06, "loss": 33.2588, "step": 12943 }, { "epoch": 308.1910447761194, "grad_norm": 18.41038703918457, "learning_rate": 9.747767857142858e-06, "loss": 33.9537, "step": 12944 }, { "epoch": 308.21492537313435, "grad_norm": 15.606754302978516, "learning_rate": 9.74702380952381e-06, "loss": 33.7234, "step": 12945 }, { "epoch": 308.23880597014926, "grad_norm": 19.32736587524414, "learning_rate": 9.746279761904762e-06, "loss": 33.4628, "step": 12946 }, { "epoch": 308.26268656716417, "grad_norm": 18.63032341003418, "learning_rate": 9.745535714285716e-06, "loss": 33.4484, "step": 12947 }, { "epoch": 308.28656716417913, "grad_norm": 16.7097110748291, "learning_rate": 9.744791666666667e-06, "loss": 33.3486, "step": 12948 }, { "epoch": 308.31044776119404, "grad_norm": 19.329191207885742, "learning_rate": 9.74404761904762e-06, "loss": 34.6827, "step": 12949 }, { "epoch": 308.33432835820895, "grad_norm": 14.51841926574707, "learning_rate": 9.743303571428572e-06, "loss": 34.5459, "step": 12950 }, { "epoch": 308.35820895522386, "grad_norm": 22.095539093017578, "learning_rate": 9.742559523809525e-06, "loss": 33.3448, "step": 12951 }, { "epoch": 308.3820895522388, "grad_norm": 17.434141159057617, "learning_rate": 9.741815476190478e-06, "loss": 33.8781, "step": 12952 }, { "epoch": 308.40597014925373, "grad_norm": 17.55922508239746, "learning_rate": 9.741071428571429e-06, "loss": 34.3081, "step": 12953 }, { "epoch": 308.42985074626864, "grad_norm": 17.018356323242188, "learning_rate": 9.740327380952381e-06, "loss": 33.655, "step": 12954 }, { "epoch": 308.4537313432836, "grad_norm": 17.0175724029541, "learning_rate": 9.739583333333334e-06, "loss": 32.7921, "step": 12955 }, { "epoch": 308.4776119402985, "grad_norm": NaN, "learning_rate": 9.738839285714287e-06, "loss": 58.4704, "step": 12956 }, { "epoch": 308.5014925373134, "grad_norm": 16.916526794433594, "learning_rate": 9.738839285714287e-06, "loss": 35.8149, "step": 12957 }, { "epoch": 308.52537313432833, "grad_norm": 18.18893051147461, "learning_rate": 9.73809523809524e-06, "loss": 32.7847, "step": 12958 }, { "epoch": 308.5492537313433, "grad_norm": 14.804588317871094, "learning_rate": 9.73735119047619e-06, "loss": 33.8423, "step": 12959 }, { "epoch": 308.5731343283582, "grad_norm": 16.292497634887695, "learning_rate": 9.736607142857145e-06, "loss": 34.3412, "step": 12960 }, { "epoch": 308.5970149253731, "grad_norm": 18.089969635009766, "learning_rate": 9.735863095238096e-06, "loss": 34.3364, "step": 12961 }, { "epoch": 308.6208955223881, "grad_norm": 20.194747924804688, "learning_rate": 9.735119047619048e-06, "loss": 34.4244, "step": 12962 }, { "epoch": 308.644776119403, "grad_norm": 15.840314865112305, "learning_rate": 9.734375000000001e-06, "loss": 33.2715, "step": 12963 }, { "epoch": 308.6686567164179, "grad_norm": 16.695568084716797, "learning_rate": 9.733630952380954e-06, "loss": 34.0288, "step": 12964 }, { "epoch": 308.6925373134328, "grad_norm": 17.103296279907227, "learning_rate": 9.732886904761907e-06, "loss": 33.7916, "step": 12965 }, { "epoch": 308.7164179104478, "grad_norm": 16.213998794555664, "learning_rate": 9.732142857142858e-06, "loss": 34.0483, "step": 12966 }, { "epoch": 308.7402985074627, "grad_norm": 20.33165740966797, "learning_rate": 9.73139880952381e-06, "loss": 33.7052, "step": 12967 }, { "epoch": 308.7641791044776, "grad_norm": 16.87818717956543, "learning_rate": 9.730654761904763e-06, "loss": 33.2133, "step": 12968 }, { "epoch": 308.78805970149256, "grad_norm": 14.578195571899414, "learning_rate": 9.729910714285716e-06, "loss": 35.137, "step": 12969 }, { "epoch": 308.81194029850747, "grad_norm": 16.438709259033203, "learning_rate": 9.729166666666667e-06, "loss": 33.9921, "step": 12970 }, { "epoch": 308.8358208955224, "grad_norm": 15.464425086975098, "learning_rate": 9.72842261904762e-06, "loss": 33.0498, "step": 12971 }, { "epoch": 308.85970149253734, "grad_norm": NaN, "learning_rate": 9.727678571428572e-06, "loss": 34.0863, "step": 12972 }, { "epoch": 308.88358208955225, "grad_norm": 19.971511840820312, "learning_rate": 9.727678571428572e-06, "loss": 34.1805, "step": 12973 }, { "epoch": 308.90746268656716, "grad_norm": 16.907794952392578, "learning_rate": 9.726934523809525e-06, "loss": 34.1479, "step": 12974 }, { "epoch": 308.93134328358207, "grad_norm": 16.841999053955078, "learning_rate": 9.726190476190477e-06, "loss": 33.5721, "step": 12975 }, { "epoch": 308.95522388059703, "grad_norm": 19.87732696533203, "learning_rate": 9.725446428571428e-06, "loss": 33.8914, "step": 12976 }, { "epoch": 308.97910447761194, "grad_norm": 15.79689884185791, "learning_rate": 9.724702380952383e-06, "loss": 34.1774, "step": 12977 }, { "epoch": 309.0, "grad_norm": 23.370094299316406, "learning_rate": 9.723958333333334e-06, "loss": 29.8413, "step": 12978 }, { "epoch": 309.0238805970149, "grad_norm": 17.6906681060791, "learning_rate": 9.723214285714286e-06, "loss": 34.4275, "step": 12979 }, { "epoch": 309.0477611940299, "grad_norm": 24.894100189208984, "learning_rate": 9.722470238095239e-06, "loss": 33.7288, "step": 12980 }, { "epoch": 309.0716417910448, "grad_norm": 18.9682559967041, "learning_rate": 9.721726190476192e-06, "loss": 34.7069, "step": 12981 }, { "epoch": 309.0955223880597, "grad_norm": 26.61629867553711, "learning_rate": 9.720982142857144e-06, "loss": 32.9412, "step": 12982 }, { "epoch": 309.1194029850746, "grad_norm": 21.343372344970703, "learning_rate": 9.720238095238095e-06, "loss": 33.177, "step": 12983 }, { "epoch": 309.14328358208957, "grad_norm": 20.692062377929688, "learning_rate": 9.719494047619048e-06, "loss": 33.9896, "step": 12984 }, { "epoch": 309.1671641791045, "grad_norm": 22.972320556640625, "learning_rate": 9.71875e-06, "loss": 34.4122, "step": 12985 }, { "epoch": 309.1910447761194, "grad_norm": 20.217164993286133, "learning_rate": 9.718005952380953e-06, "loss": 33.561, "step": 12986 }, { "epoch": 309.21492537313435, "grad_norm": 16.543354034423828, "learning_rate": 9.717261904761906e-06, "loss": 32.8687, "step": 12987 }, { "epoch": 309.23880597014926, "grad_norm": 26.884403228759766, "learning_rate": 9.716517857142857e-06, "loss": 33.1497, "step": 12988 }, { "epoch": 309.26268656716417, "grad_norm": 16.741165161132812, "learning_rate": 9.715773809523812e-06, "loss": 33.7586, "step": 12989 }, { "epoch": 309.28656716417913, "grad_norm": 32.45367431640625, "learning_rate": 9.715029761904762e-06, "loss": 32.623, "step": 12990 }, { "epoch": 309.31044776119404, "grad_norm": 21.32443618774414, "learning_rate": 9.714285714285715e-06, "loss": 32.6459, "step": 12991 }, { "epoch": 309.33432835820895, "grad_norm": 30.66120147705078, "learning_rate": 9.713541666666668e-06, "loss": 33.9185, "step": 12992 }, { "epoch": 309.35820895522386, "grad_norm": 27.259050369262695, "learning_rate": 9.71279761904762e-06, "loss": 35.1806, "step": 12993 }, { "epoch": 309.3820895522388, "grad_norm": 30.870134353637695, "learning_rate": 9.712053571428573e-06, "loss": 32.6699, "step": 12994 }, { "epoch": 309.40597014925373, "grad_norm": 25.539621353149414, "learning_rate": 9.711309523809524e-06, "loss": 35.1825, "step": 12995 }, { "epoch": 309.42985074626864, "grad_norm": 30.160842895507812, "learning_rate": 9.710565476190477e-06, "loss": 34.7504, "step": 12996 }, { "epoch": 309.4537313432836, "grad_norm": 22.481428146362305, "learning_rate": 9.70982142857143e-06, "loss": 33.4778, "step": 12997 }, { "epoch": 309.4776119402985, "grad_norm": 30.42070198059082, "learning_rate": 9.709077380952382e-06, "loss": 33.0725, "step": 12998 }, { "epoch": 309.5014925373134, "grad_norm": 20.800201416015625, "learning_rate": 9.708333333333333e-06, "loss": 35.0291, "step": 12999 }, { "epoch": 309.52537313432833, "grad_norm": 32.657894134521484, "learning_rate": 9.707589285714286e-06, "loss": 33.2058, "step": 13000 }, { "epoch": 309.5492537313433, "grad_norm": 27.442174911499023, "learning_rate": 9.706845238095239e-06, "loss": 33.5788, "step": 13001 }, { "epoch": 309.5731343283582, "grad_norm": 30.188657760620117, "learning_rate": 9.706101190476191e-06, "loss": 33.1575, "step": 13002 }, { "epoch": 309.5970149253731, "grad_norm": 25.580913543701172, "learning_rate": 9.705357142857144e-06, "loss": 33.398, "step": 13003 }, { "epoch": 309.6208955223881, "grad_norm": 29.054780960083008, "learning_rate": 9.704613095238095e-06, "loss": 33.1039, "step": 13004 }, { "epoch": 309.644776119403, "grad_norm": 25.801345825195312, "learning_rate": 9.70386904761905e-06, "loss": 34.3573, "step": 13005 }, { "epoch": 309.6686567164179, "grad_norm": 26.575672149658203, "learning_rate": 9.703125e-06, "loss": 34.532, "step": 13006 }, { "epoch": 309.6925373134328, "grad_norm": 23.786701202392578, "learning_rate": 9.702380952380953e-06, "loss": 32.8245, "step": 13007 }, { "epoch": 309.7164179104478, "grad_norm": 24.64287757873535, "learning_rate": 9.701636904761906e-06, "loss": 33.9079, "step": 13008 }, { "epoch": 309.7402985074627, "grad_norm": 25.367307662963867, "learning_rate": 9.700892857142858e-06, "loss": 32.5972, "step": 13009 }, { "epoch": 309.7641791044776, "grad_norm": 19.622753143310547, "learning_rate": 9.700148809523811e-06, "loss": 33.8491, "step": 13010 }, { "epoch": 309.78805970149256, "grad_norm": 23.75461196899414, "learning_rate": 9.699404761904762e-06, "loss": 34.1387, "step": 13011 }, { "epoch": 309.81194029850747, "grad_norm": 19.268911361694336, "learning_rate": 9.698660714285715e-06, "loss": 35.2761, "step": 13012 }, { "epoch": 309.8358208955224, "grad_norm": 16.754852294921875, "learning_rate": 9.697916666666667e-06, "loss": 33.3417, "step": 13013 }, { "epoch": 309.85970149253734, "grad_norm": 19.657302856445312, "learning_rate": 9.69717261904762e-06, "loss": 34.0011, "step": 13014 }, { "epoch": 309.88358208955225, "grad_norm": 19.572189331054688, "learning_rate": 9.696428571428573e-06, "loss": 33.6482, "step": 13015 }, { "epoch": 309.90746268656716, "grad_norm": 13.575444221496582, "learning_rate": 9.695684523809524e-06, "loss": 34.9708, "step": 13016 }, { "epoch": 309.93134328358207, "grad_norm": 21.723373413085938, "learning_rate": 9.694940476190478e-06, "loss": 35.3739, "step": 13017 }, { "epoch": 309.95522388059703, "grad_norm": 16.05686378479004, "learning_rate": 9.69419642857143e-06, "loss": 34.6028, "step": 13018 }, { "epoch": 309.97910447761194, "grad_norm": 19.51734733581543, "learning_rate": 9.693452380952382e-06, "loss": 33.1861, "step": 13019 }, { "epoch": 310.0, "grad_norm": 15.258634567260742, "learning_rate": 9.692708333333335e-06, "loss": 28.8878, "step": 13020 }, { "epoch": 310.0238805970149, "grad_norm": 21.517152786254883, "learning_rate": 9.691964285714287e-06, "loss": 33.3936, "step": 13021 }, { "epoch": 310.0477611940299, "grad_norm": 16.182579040527344, "learning_rate": 9.69122023809524e-06, "loss": 32.5093, "step": 13022 }, { "epoch": 310.0716417910448, "grad_norm": 20.20163917541504, "learning_rate": 9.690476190476191e-06, "loss": 32.5731, "step": 13023 }, { "epoch": 310.0955223880597, "grad_norm": 18.66176414489746, "learning_rate": 9.689732142857144e-06, "loss": 34.2317, "step": 13024 }, { "epoch": 310.1194029850746, "grad_norm": 19.497772216796875, "learning_rate": 9.688988095238096e-06, "loss": 34.5274, "step": 13025 }, { "epoch": 310.14328358208957, "grad_norm": 16.244461059570312, "learning_rate": 9.688244047619049e-06, "loss": 32.709, "step": 13026 }, { "epoch": 310.1671641791045, "grad_norm": 19.932151794433594, "learning_rate": 9.6875e-06, "loss": 32.2925, "step": 13027 }, { "epoch": 310.1910447761194, "grad_norm": 18.922943115234375, "learning_rate": 9.686755952380953e-06, "loss": 34.0793, "step": 13028 }, { "epoch": 310.21492537313435, "grad_norm": 18.103727340698242, "learning_rate": 9.686011904761905e-06, "loss": 33.6132, "step": 13029 }, { "epoch": 310.23880597014926, "grad_norm": 17.927507400512695, "learning_rate": 9.685267857142858e-06, "loss": 33.7991, "step": 13030 }, { "epoch": 310.26268656716417, "grad_norm": 16.29496192932129, "learning_rate": 9.68452380952381e-06, "loss": 32.8704, "step": 13031 }, { "epoch": 310.28656716417913, "grad_norm": 17.598722457885742, "learning_rate": 9.683779761904762e-06, "loss": 34.1045, "step": 13032 }, { "epoch": 310.31044776119404, "grad_norm": 15.629679679870605, "learning_rate": 9.683035714285714e-06, "loss": 33.9642, "step": 13033 }, { "epoch": 310.33432835820895, "grad_norm": 15.261300086975098, "learning_rate": 9.682291666666667e-06, "loss": 33.927, "step": 13034 }, { "epoch": 310.35820895522386, "grad_norm": 20.092823028564453, "learning_rate": 9.68154761904762e-06, "loss": 34.0795, "step": 13035 }, { "epoch": 310.3820895522388, "grad_norm": 19.83094596862793, "learning_rate": 9.680803571428572e-06, "loss": 33.7482, "step": 13036 }, { "epoch": 310.40597014925373, "grad_norm": 16.10625457763672, "learning_rate": 9.680059523809523e-06, "loss": 33.9186, "step": 13037 }, { "epoch": 310.42985074626864, "grad_norm": 17.777740478515625, "learning_rate": 9.679315476190478e-06, "loss": 33.5461, "step": 13038 }, { "epoch": 310.4537313432836, "grad_norm": 20.631752014160156, "learning_rate": 9.678571428571429e-06, "loss": 34.6323, "step": 13039 }, { "epoch": 310.4776119402985, "grad_norm": 18.21401023864746, "learning_rate": 9.677827380952382e-06, "loss": 34.0632, "step": 13040 }, { "epoch": 310.5014925373134, "grad_norm": 15.418707847595215, "learning_rate": 9.677083333333334e-06, "loss": 32.5218, "step": 13041 }, { "epoch": 310.52537313432833, "grad_norm": NaN, "learning_rate": 9.676339285714287e-06, "loss": 55.8785, "step": 13042 }, { "epoch": 310.5492537313433, "grad_norm": 13.48697280883789, "learning_rate": 9.676339285714287e-06, "loss": 33.587, "step": 13043 }, { "epoch": 310.5731343283582, "grad_norm": 20.246139526367188, "learning_rate": 9.67559523809524e-06, "loss": 34.176, "step": 13044 }, { "epoch": 310.5970149253731, "grad_norm": 14.433034896850586, "learning_rate": 9.67485119047619e-06, "loss": 32.9637, "step": 13045 }, { "epoch": 310.6208955223881, "grad_norm": 16.697980880737305, "learning_rate": 9.674107142857143e-06, "loss": 33.3235, "step": 13046 }, { "epoch": 310.644776119403, "grad_norm": 17.75384521484375, "learning_rate": 9.673363095238096e-06, "loss": 34.1102, "step": 13047 }, { "epoch": 310.6686567164179, "grad_norm": 23.510988235473633, "learning_rate": 9.672619047619049e-06, "loss": 33.492, "step": 13048 }, { "epoch": 310.6925373134328, "grad_norm": 20.356592178344727, "learning_rate": 9.671875000000001e-06, "loss": 35.2349, "step": 13049 }, { "epoch": 310.7164179104478, "grad_norm": 15.113434791564941, "learning_rate": 9.671130952380952e-06, "loss": 34.0031, "step": 13050 }, { "epoch": 310.7402985074627, "grad_norm": 21.017969131469727, "learning_rate": 9.670386904761907e-06, "loss": 33.8223, "step": 13051 }, { "epoch": 310.7641791044776, "grad_norm": 20.728343963623047, "learning_rate": 9.669642857142858e-06, "loss": 34.2523, "step": 13052 }, { "epoch": 310.78805970149256, "grad_norm": 14.433070182800293, "learning_rate": 9.66889880952381e-06, "loss": 34.6699, "step": 13053 }, { "epoch": 310.81194029850747, "grad_norm": 22.28704261779785, "learning_rate": 9.668154761904763e-06, "loss": 33.1882, "step": 13054 }, { "epoch": 310.8358208955224, "grad_norm": 19.53321647644043, "learning_rate": 9.667410714285716e-06, "loss": 33.8604, "step": 13055 }, { "epoch": 310.85970149253734, "grad_norm": 14.87886905670166, "learning_rate": 9.666666666666667e-06, "loss": 33.4077, "step": 13056 }, { "epoch": 310.88358208955225, "grad_norm": 19.086444854736328, "learning_rate": 9.66592261904762e-06, "loss": 33.7193, "step": 13057 }, { "epoch": 310.90746268656716, "grad_norm": 19.99090003967285, "learning_rate": 9.665178571428572e-06, "loss": 34.5341, "step": 13058 }, { "epoch": 310.93134328358207, "grad_norm": 18.049787521362305, "learning_rate": 9.664434523809525e-06, "loss": 34.3963, "step": 13059 }, { "epoch": 310.95522388059703, "grad_norm": 14.74404239654541, "learning_rate": 9.663690476190477e-06, "loss": 34.2839, "step": 13060 }, { "epoch": 310.97910447761194, "grad_norm": NaN, "learning_rate": 9.662946428571428e-06, "loss": 38.865, "step": 13061 }, { "epoch": 311.0, "grad_norm": 13.75111198425293, "learning_rate": 9.662946428571428e-06, "loss": 29.714, "step": 13062 }, { "epoch": 311.0238805970149, "grad_norm": 17.926097869873047, "learning_rate": 9.662202380952381e-06, "loss": 33.7766, "step": 13063 }, { "epoch": 311.0477611940299, "grad_norm": 14.747750282287598, "learning_rate": 9.661458333333334e-06, "loss": 33.3906, "step": 13064 }, { "epoch": 311.0716417910448, "grad_norm": 18.3504695892334, "learning_rate": 9.660714285714287e-06, "loss": 33.6988, "step": 13065 }, { "epoch": 311.0955223880597, "grad_norm": 17.141036987304688, "learning_rate": 9.65997023809524e-06, "loss": 34.888, "step": 13066 }, { "epoch": 311.1194029850746, "grad_norm": 20.44035530090332, "learning_rate": 9.65922619047619e-06, "loss": 32.4416, "step": 13067 }, { "epoch": 311.14328358208957, "grad_norm": 14.6190824508667, "learning_rate": 9.658482142857145e-06, "loss": 34.6001, "step": 13068 }, { "epoch": 311.1671641791045, "grad_norm": 18.415260314941406, "learning_rate": 9.657738095238096e-06, "loss": 34.7202, "step": 13069 }, { "epoch": 311.1910447761194, "grad_norm": 16.84659194946289, "learning_rate": 9.656994047619048e-06, "loss": 32.8358, "step": 13070 }, { "epoch": 311.21492537313435, "grad_norm": 16.88626480102539, "learning_rate": 9.656250000000001e-06, "loss": 34.8116, "step": 13071 }, { "epoch": 311.23880597014926, "grad_norm": 18.828983306884766, "learning_rate": 9.655505952380954e-06, "loss": 33.107, "step": 13072 }, { "epoch": 311.26268656716417, "grad_norm": 15.67547607421875, "learning_rate": 9.654761904761906e-06, "loss": 33.8678, "step": 13073 }, { "epoch": 311.28656716417913, "grad_norm": 16.83783721923828, "learning_rate": 9.654017857142857e-06, "loss": 33.0033, "step": 13074 }, { "epoch": 311.31044776119404, "grad_norm": 17.924421310424805, "learning_rate": 9.65327380952381e-06, "loss": 33.6108, "step": 13075 }, { "epoch": 311.33432835820895, "grad_norm": 17.57686424255371, "learning_rate": 9.652529761904763e-06, "loss": 33.5779, "step": 13076 }, { "epoch": 311.35820895522386, "grad_norm": 21.02503776550293, "learning_rate": 9.651785714285715e-06, "loss": 34.783, "step": 13077 }, { "epoch": 311.3820895522388, "grad_norm": 15.584885597229004, "learning_rate": 9.651041666666668e-06, "loss": 33.3379, "step": 13078 }, { "epoch": 311.40597014925373, "grad_norm": 14.647343635559082, "learning_rate": 9.650297619047619e-06, "loss": 34.2257, "step": 13079 }, { "epoch": 311.42985074626864, "grad_norm": 16.879671096801758, "learning_rate": 9.649553571428573e-06, "loss": 34.051, "step": 13080 }, { "epoch": 311.4537313432836, "grad_norm": 18.13416290283203, "learning_rate": 9.648809523809524e-06, "loss": 33.135, "step": 13081 }, { "epoch": 311.4776119402985, "grad_norm": 20.190101623535156, "learning_rate": 9.648065476190477e-06, "loss": 34.515, "step": 13082 }, { "epoch": 311.5014925373134, "grad_norm": 14.234061241149902, "learning_rate": 9.64732142857143e-06, "loss": 33.8728, "step": 13083 }, { "epoch": 311.52537313432833, "grad_norm": 18.39228630065918, "learning_rate": 9.646577380952382e-06, "loss": 34.3784, "step": 13084 }, { "epoch": 311.5492537313433, "grad_norm": 15.494501113891602, "learning_rate": 9.645833333333333e-06, "loss": 33.5588, "step": 13085 }, { "epoch": 311.5731343283582, "grad_norm": 20.299917221069336, "learning_rate": 9.645089285714286e-06, "loss": 33.2555, "step": 13086 }, { "epoch": 311.5970149253731, "grad_norm": 14.360240936279297, "learning_rate": 9.644345238095239e-06, "loss": 32.3675, "step": 13087 }, { "epoch": 311.6208955223881, "grad_norm": 20.752193450927734, "learning_rate": 9.643601190476192e-06, "loss": 34.6455, "step": 13088 }, { "epoch": 311.644776119403, "grad_norm": 18.01266860961914, "learning_rate": 9.642857142857144e-06, "loss": 33.6162, "step": 13089 }, { "epoch": 311.6686567164179, "grad_norm": 18.568958282470703, "learning_rate": 9.642113095238095e-06, "loss": 33.3481, "step": 13090 }, { "epoch": 311.6925373134328, "grad_norm": 17.04592514038086, "learning_rate": 9.641369047619048e-06, "loss": 33.1764, "step": 13091 }, { "epoch": 311.7164179104478, "grad_norm": 16.930374145507812, "learning_rate": 9.640625e-06, "loss": 34.5636, "step": 13092 }, { "epoch": 311.7402985074627, "grad_norm": 15.942654609680176, "learning_rate": 9.639880952380953e-06, "loss": 33.9937, "step": 13093 }, { "epoch": 311.7641791044776, "grad_norm": 14.966394424438477, "learning_rate": 9.639136904761906e-06, "loss": 32.1193, "step": 13094 }, { "epoch": 311.78805970149256, "grad_norm": 14.562058448791504, "learning_rate": 9.638392857142857e-06, "loss": 33.5501, "step": 13095 }, { "epoch": 311.81194029850747, "grad_norm": 14.743454933166504, "learning_rate": 9.637648809523811e-06, "loss": 34.1824, "step": 13096 }, { "epoch": 311.8358208955224, "grad_norm": 15.229239463806152, "learning_rate": 9.636904761904762e-06, "loss": 34.4654, "step": 13097 }, { "epoch": 311.85970149253734, "grad_norm": 18.067148208618164, "learning_rate": 9.636160714285715e-06, "loss": 33.4182, "step": 13098 }, { "epoch": 311.88358208955225, "grad_norm": 16.98919677734375, "learning_rate": 9.635416666666668e-06, "loss": 33.4746, "step": 13099 }, { "epoch": 311.90746268656716, "grad_norm": 18.955854415893555, "learning_rate": 9.63467261904762e-06, "loss": 33.9087, "step": 13100 }, { "epoch": 311.93134328358207, "grad_norm": 12.923398971557617, "learning_rate": 9.633928571428573e-06, "loss": 33.244, "step": 13101 }, { "epoch": 311.95522388059703, "grad_norm": 18.70138168334961, "learning_rate": 9.633184523809524e-06, "loss": 34.3946, "step": 13102 }, { "epoch": 311.97910447761194, "grad_norm": 15.275166511535645, "learning_rate": 9.632440476190477e-06, "loss": 33.0189, "step": 13103 }, { "epoch": 312.0, "grad_norm": 25.734006881713867, "learning_rate": 9.63169642857143e-06, "loss": 30.0553, "step": 13104 }, { "epoch": 312.0238805970149, "grad_norm": 21.362735748291016, "learning_rate": 9.630952380952382e-06, "loss": 33.4624, "step": 13105 }, { "epoch": 312.0477611940299, "grad_norm": 22.884366989135742, "learning_rate": 9.630208333333335e-06, "loss": 33.4485, "step": 13106 }, { "epoch": 312.0716417910448, "grad_norm": 26.826204299926758, "learning_rate": 9.629464285714286e-06, "loss": 33.3296, "step": 13107 }, { "epoch": 312.0955223880597, "grad_norm": 17.927284240722656, "learning_rate": 9.62872023809524e-06, "loss": 33.7918, "step": 13108 }, { "epoch": 312.1194029850746, "grad_norm": 32.59355163574219, "learning_rate": 9.627976190476191e-06, "loss": 33.1228, "step": 13109 }, { "epoch": 312.14328358208957, "grad_norm": 19.587862014770508, "learning_rate": 9.627232142857144e-06, "loss": 33.6721, "step": 13110 }, { "epoch": 312.1671641791045, "grad_norm": 32.56916427612305, "learning_rate": 9.626488095238096e-06, "loss": 34.2825, "step": 13111 }, { "epoch": 312.1910447761194, "grad_norm": 19.549453735351562, "learning_rate": 9.62574404761905e-06, "loss": 33.9742, "step": 13112 }, { "epoch": 312.21492537313435, "grad_norm": 35.69709014892578, "learning_rate": 9.625e-06, "loss": 33.0387, "step": 13113 }, { "epoch": 312.23880597014926, "grad_norm": 27.12348747253418, "learning_rate": 9.624255952380953e-06, "loss": 34.3725, "step": 13114 }, { "epoch": 312.26268656716417, "grad_norm": 36.873992919921875, "learning_rate": 9.623511904761906e-06, "loss": 33.9663, "step": 13115 }, { "epoch": 312.28656716417913, "grad_norm": 29.34783363342285, "learning_rate": 9.622767857142858e-06, "loss": 34.7257, "step": 13116 }, { "epoch": 312.31044776119404, "grad_norm": 33.33332443237305, "learning_rate": 9.622023809523811e-06, "loss": 33.4715, "step": 13117 }, { "epoch": 312.33432835820895, "grad_norm": 26.727169036865234, "learning_rate": 9.621279761904762e-06, "loss": 32.0446, "step": 13118 }, { "epoch": 312.35820895522386, "grad_norm": 42.13288497924805, "learning_rate": 9.620535714285715e-06, "loss": 32.7165, "step": 13119 }, { "epoch": 312.3820895522388, "grad_norm": 33.71300506591797, "learning_rate": 9.619791666666667e-06, "loss": 33.8545, "step": 13120 }, { "epoch": 312.40597014925373, "grad_norm": 32.45813751220703, "learning_rate": 9.61904761904762e-06, "loss": 34.1513, "step": 13121 }, { "epoch": 312.42985074626864, "grad_norm": 30.027828216552734, "learning_rate": 9.618303571428573e-06, "loss": 33.3435, "step": 13122 }, { "epoch": 312.4537313432836, "grad_norm": 33.02032470703125, "learning_rate": 9.617559523809524e-06, "loss": 33.3302, "step": 13123 }, { "epoch": 312.4776119402985, "grad_norm": 24.927526473999023, "learning_rate": 9.616815476190478e-06, "loss": 32.2108, "step": 13124 }, { "epoch": 312.5014925373134, "grad_norm": 39.30095672607422, "learning_rate": 9.616071428571429e-06, "loss": 34.9208, "step": 13125 }, { "epoch": 312.52537313432833, "grad_norm": 32.02609634399414, "learning_rate": 9.615327380952382e-06, "loss": 33.3949, "step": 13126 }, { "epoch": 312.5492537313433, "grad_norm": 31.11638832092285, "learning_rate": 9.614583333333334e-06, "loss": 33.3447, "step": 13127 }, { "epoch": 312.5731343283582, "grad_norm": 30.669084548950195, "learning_rate": 9.613839285714287e-06, "loss": 34.5168, "step": 13128 }, { "epoch": 312.5970149253731, "grad_norm": 29.634124755859375, "learning_rate": 9.61309523809524e-06, "loss": 34.6274, "step": 13129 }, { "epoch": 312.6208955223881, "grad_norm": 27.90740966796875, "learning_rate": 9.61235119047619e-06, "loss": 33.4124, "step": 13130 }, { "epoch": 312.644776119403, "grad_norm": 34.158634185791016, "learning_rate": 9.611607142857143e-06, "loss": 33.0961, "step": 13131 }, { "epoch": 312.6686567164179, "grad_norm": 32.520843505859375, "learning_rate": 9.610863095238096e-06, "loss": 33.2238, "step": 13132 }, { "epoch": 312.6925373134328, "grad_norm": 33.50034713745117, "learning_rate": 9.610119047619049e-06, "loss": 32.6968, "step": 13133 }, { "epoch": 312.7164179104478, "grad_norm": 30.337360382080078, "learning_rate": 9.609375000000001e-06, "loss": 34.2137, "step": 13134 }, { "epoch": 312.7402985074627, "grad_norm": 30.0190372467041, "learning_rate": 9.608630952380952e-06, "loss": 33.8018, "step": 13135 }, { "epoch": 312.7641791044776, "grad_norm": 29.226835250854492, "learning_rate": 9.607886904761905e-06, "loss": 32.7436, "step": 13136 }, { "epoch": 312.78805970149256, "grad_norm": 31.76357650756836, "learning_rate": 9.607142857142858e-06, "loss": 33.9807, "step": 13137 }, { "epoch": 312.81194029850747, "grad_norm": 28.754568099975586, "learning_rate": 9.60639880952381e-06, "loss": 34.8479, "step": 13138 }, { "epoch": 312.8358208955224, "grad_norm": 31.17036247253418, "learning_rate": 9.605654761904763e-06, "loss": 33.1828, "step": 13139 }, { "epoch": 312.85970149253734, "grad_norm": 25.772232055664062, "learning_rate": 9.604910714285714e-06, "loss": 33.6004, "step": 13140 }, { "epoch": 312.88358208955225, "grad_norm": 35.69013214111328, "learning_rate": 9.604166666666669e-06, "loss": 33.6171, "step": 13141 }, { "epoch": 312.90746268656716, "grad_norm": 32.91059112548828, "learning_rate": 9.60342261904762e-06, "loss": 33.9886, "step": 13142 }, { "epoch": 312.93134328358207, "grad_norm": 29.913450241088867, "learning_rate": 9.602678571428572e-06, "loss": 35.3789, "step": 13143 }, { "epoch": 312.95522388059703, "grad_norm": 26.861034393310547, "learning_rate": 9.601934523809523e-06, "loss": 34.2793, "step": 13144 }, { "epoch": 312.97910447761194, "grad_norm": 30.770784378051758, "learning_rate": 9.601190476190478e-06, "loss": 35.0147, "step": 13145 }, { "epoch": 313.0, "grad_norm": 23.079103469848633, "learning_rate": 9.600446428571429e-06, "loss": 28.4521, "step": 13146 }, { "epoch": 313.0238805970149, "grad_norm": 33.886207580566406, "learning_rate": 9.599702380952381e-06, "loss": 33.3916, "step": 13147 }, { "epoch": 313.0477611940299, "grad_norm": 31.83485221862793, "learning_rate": 9.598958333333334e-06, "loss": 33.3536, "step": 13148 }, { "epoch": 313.0716417910448, "grad_norm": 29.32632827758789, "learning_rate": 9.598214285714287e-06, "loss": 32.6224, "step": 13149 }, { "epoch": 313.0955223880597, "grad_norm": 24.941131591796875, "learning_rate": 9.59747023809524e-06, "loss": 34.0434, "step": 13150 }, { "epoch": 313.1194029850746, "grad_norm": 34.72981643676758, "learning_rate": 9.59672619047619e-06, "loss": 32.9394, "step": 13151 }, { "epoch": 313.14328358208957, "grad_norm": 29.2474365234375, "learning_rate": 9.595982142857143e-06, "loss": 33.6498, "step": 13152 }, { "epoch": 313.1671641791045, "grad_norm": 30.723867416381836, "learning_rate": 9.595238095238096e-06, "loss": 32.7678, "step": 13153 }, { "epoch": 313.1910447761194, "grad_norm": 25.35555648803711, "learning_rate": 9.594494047619048e-06, "loss": 33.7004, "step": 13154 }, { "epoch": 313.21492537313435, "grad_norm": 31.069316864013672, "learning_rate": 9.593750000000001e-06, "loss": 33.2519, "step": 13155 }, { "epoch": 313.23880597014926, "grad_norm": 25.28008460998535, "learning_rate": 9.593005952380952e-06, "loss": 33.5377, "step": 13156 }, { "epoch": 313.26268656716417, "grad_norm": 31.649578094482422, "learning_rate": 9.592261904761906e-06, "loss": 33.7276, "step": 13157 }, { "epoch": 313.28656716417913, "grad_norm": 28.521392822265625, "learning_rate": 9.591517857142857e-06, "loss": 35.3197, "step": 13158 }, { "epoch": 313.31044776119404, "grad_norm": 33.34397506713867, "learning_rate": 9.59077380952381e-06, "loss": 34.1836, "step": 13159 }, { "epoch": 313.33432835820895, "grad_norm": 27.889270782470703, "learning_rate": 9.590029761904763e-06, "loss": 33.4468, "step": 13160 }, { "epoch": 313.35820895522386, "grad_norm": 32.41287612915039, "learning_rate": 9.589285714285716e-06, "loss": 33.6495, "step": 13161 }, { "epoch": 313.3820895522388, "grad_norm": 26.815305709838867, "learning_rate": 9.588541666666668e-06, "loss": 35.5384, "step": 13162 }, { "epoch": 313.40597014925373, "grad_norm": 32.66407012939453, "learning_rate": 9.58779761904762e-06, "loss": 33.0279, "step": 13163 }, { "epoch": 313.42985074626864, "grad_norm": 30.003036499023438, "learning_rate": 9.587053571428572e-06, "loss": 33.8409, "step": 13164 }, { "epoch": 313.4537313432836, "grad_norm": 28.761077880859375, "learning_rate": 9.586309523809525e-06, "loss": 32.8447, "step": 13165 }, { "epoch": 313.4776119402985, "grad_norm": 27.731586456298828, "learning_rate": 9.585565476190477e-06, "loss": 33.9266, "step": 13166 }, { "epoch": 313.5014925373134, "grad_norm": 31.831533432006836, "learning_rate": 9.58482142857143e-06, "loss": 33.5968, "step": 13167 }, { "epoch": 313.52537313432833, "grad_norm": 28.910619735717773, "learning_rate": 9.584077380952381e-06, "loss": 33.6701, "step": 13168 }, { "epoch": 313.5492537313433, "grad_norm": 31.396425247192383, "learning_rate": 9.583333333333335e-06, "loss": 33.6382, "step": 13169 }, { "epoch": 313.5731343283582, "grad_norm": 28.483938217163086, "learning_rate": 9.582589285714286e-06, "loss": 33.671, "step": 13170 }, { "epoch": 313.5970149253731, "grad_norm": 32.068485260009766, "learning_rate": 9.581845238095239e-06, "loss": 33.3914, "step": 13171 }, { "epoch": 313.6208955223881, "grad_norm": 29.09919548034668, "learning_rate": 9.58110119047619e-06, "loss": 32.9918, "step": 13172 }, { "epoch": 313.644776119403, "grad_norm": 30.923269271850586, "learning_rate": 9.580357142857144e-06, "loss": 33.3338, "step": 13173 }, { "epoch": 313.6686567164179, "grad_norm": 26.374174118041992, "learning_rate": 9.579613095238095e-06, "loss": 33.6883, "step": 13174 }, { "epoch": 313.6925373134328, "grad_norm": 29.18936538696289, "learning_rate": 9.578869047619048e-06, "loss": 33.8253, "step": 13175 }, { "epoch": 313.7164179104478, "grad_norm": 25.20747947692871, "learning_rate": 9.578125e-06, "loss": 34.1158, "step": 13176 }, { "epoch": 313.7402985074627, "grad_norm": 32.462371826171875, "learning_rate": 9.577380952380953e-06, "loss": 34.0465, "step": 13177 }, { "epoch": 313.7641791044776, "grad_norm": 33.96836471557617, "learning_rate": 9.576636904761906e-06, "loss": 35.0779, "step": 13178 }, { "epoch": 313.78805970149256, "grad_norm": 27.8643856048584, "learning_rate": 9.575892857142857e-06, "loss": 33.4839, "step": 13179 }, { "epoch": 313.81194029850747, "grad_norm": 25.58667755126953, "learning_rate": 9.57514880952381e-06, "loss": 33.3427, "step": 13180 }, { "epoch": 313.8358208955224, "grad_norm": 31.390897750854492, "learning_rate": 9.574404761904762e-06, "loss": 32.8023, "step": 13181 }, { "epoch": 313.85970149253734, "grad_norm": 27.014434814453125, "learning_rate": 9.573660714285715e-06, "loss": 34.274, "step": 13182 }, { "epoch": 313.88358208955225, "grad_norm": 29.599308013916016, "learning_rate": 9.572916666666668e-06, "loss": 34.3252, "step": 13183 }, { "epoch": 313.90746268656716, "grad_norm": 27.959007263183594, "learning_rate": 9.572172619047619e-06, "loss": 34.3166, "step": 13184 }, { "epoch": 313.93134328358207, "grad_norm": 30.436656951904297, "learning_rate": 9.571428571428573e-06, "loss": 32.5167, "step": 13185 }, { "epoch": 313.95522388059703, "grad_norm": 26.427406311035156, "learning_rate": 9.570684523809524e-06, "loss": 33.1055, "step": 13186 }, { "epoch": 313.97910447761194, "grad_norm": 29.928449630737305, "learning_rate": 9.569940476190477e-06, "loss": 33.9663, "step": 13187 }, { "epoch": 314.0, "grad_norm": 24.019044876098633, "learning_rate": 9.56919642857143e-06, "loss": 29.408, "step": 13188 }, { "epoch": 314.0238805970149, "grad_norm": 30.695575714111328, "learning_rate": 9.568452380952382e-06, "loss": 33.567, "step": 13189 }, { "epoch": 314.0477611940299, "grad_norm": 26.884841918945312, "learning_rate": 9.567708333333335e-06, "loss": 32.6028, "step": 13190 }, { "epoch": 314.0716417910448, "grad_norm": 28.075511932373047, "learning_rate": 9.566964285714286e-06, "loss": 32.629, "step": 13191 }, { "epoch": 314.0955223880597, "grad_norm": 28.43887710571289, "learning_rate": 9.566220238095239e-06, "loss": 33.202, "step": 13192 }, { "epoch": 314.1194029850746, "grad_norm": 30.43881607055664, "learning_rate": 9.565476190476191e-06, "loss": 33.9846, "step": 13193 }, { "epoch": 314.14328358208957, "grad_norm": 28.45292854309082, "learning_rate": 9.564732142857144e-06, "loss": 33.7065, "step": 13194 }, { "epoch": 314.1671641791045, "grad_norm": 32.04459762573242, "learning_rate": 9.563988095238097e-06, "loss": 33.7555, "step": 13195 }, { "epoch": 314.1910447761194, "grad_norm": 25.435636520385742, "learning_rate": 9.563244047619048e-06, "loss": 33.9608, "step": 13196 }, { "epoch": 314.21492537313435, "grad_norm": 29.92577362060547, "learning_rate": 9.562500000000002e-06, "loss": 32.9624, "step": 13197 }, { "epoch": 314.23880597014926, "grad_norm": 26.913516998291016, "learning_rate": 9.561755952380953e-06, "loss": 33.7854, "step": 13198 }, { "epoch": 314.26268656716417, "grad_norm": 28.719717025756836, "learning_rate": 9.561011904761906e-06, "loss": 33.8993, "step": 13199 }, { "epoch": 314.28656716417913, "grad_norm": 25.073945999145508, "learning_rate": 9.560267857142857e-06, "loss": 32.7555, "step": 13200 }, { "epoch": 314.31044776119404, "grad_norm": 34.63137435913086, "learning_rate": 9.559523809523811e-06, "loss": 34.0187, "step": 13201 }, { "epoch": 314.33432835820895, "grad_norm": 28.46396827697754, "learning_rate": 9.558779761904762e-06, "loss": 34.6484, "step": 13202 }, { "epoch": 314.35820895522386, "grad_norm": 29.185827255249023, "learning_rate": 9.558035714285715e-06, "loss": 34.3272, "step": 13203 }, { "epoch": 314.3820895522388, "grad_norm": 30.818063735961914, "learning_rate": 9.557291666666667e-06, "loss": 34.4256, "step": 13204 }, { "epoch": 314.40597014925373, "grad_norm": 27.696474075317383, "learning_rate": 9.55654761904762e-06, "loss": 34.1674, "step": 13205 }, { "epoch": 314.42985074626864, "grad_norm": 24.56853675842285, "learning_rate": 9.555803571428573e-06, "loss": 34.6993, "step": 13206 }, { "epoch": 314.4537313432836, "grad_norm": 34.097747802734375, "learning_rate": 9.555059523809524e-06, "loss": 32.0478, "step": 13207 }, { "epoch": 314.4776119402985, "grad_norm": 30.866113662719727, "learning_rate": 9.554315476190477e-06, "loss": 34.5621, "step": 13208 }, { "epoch": 314.5014925373134, "grad_norm": 30.24768829345703, "learning_rate": 9.55357142857143e-06, "loss": 32.2858, "step": 13209 }, { "epoch": 314.52537313432833, "grad_norm": 27.956802368164062, "learning_rate": 9.552827380952382e-06, "loss": 32.5598, "step": 13210 }, { "epoch": 314.5492537313433, "grad_norm": 29.042564392089844, "learning_rate": 9.552083333333335e-06, "loss": 33.5197, "step": 13211 }, { "epoch": 314.5731343283582, "grad_norm": 25.140233993530273, "learning_rate": 9.551339285714286e-06, "loss": 34.1419, "step": 13212 }, { "epoch": 314.5970149253731, "grad_norm": 32.64944076538086, "learning_rate": 9.55059523809524e-06, "loss": 33.718, "step": 13213 }, { "epoch": 314.6208955223881, "grad_norm": 25.82027816772461, "learning_rate": 9.549851190476191e-06, "loss": 33.3611, "step": 13214 }, { "epoch": 314.644776119403, "grad_norm": 31.322126388549805, "learning_rate": 9.549107142857144e-06, "loss": 35.1478, "step": 13215 }, { "epoch": 314.6686567164179, "grad_norm": 29.41959571838379, "learning_rate": 9.548363095238096e-06, "loss": 33.1272, "step": 13216 }, { "epoch": 314.6925373134328, "grad_norm": 26.451223373413086, "learning_rate": 9.547619047619049e-06, "loss": 34.8698, "step": 13217 }, { "epoch": 314.7164179104478, "grad_norm": 24.3366756439209, "learning_rate": 9.546875000000002e-06, "loss": 33.8223, "step": 13218 }, { "epoch": 314.7402985074627, "grad_norm": 31.37820053100586, "learning_rate": 9.546130952380953e-06, "loss": 34.2753, "step": 13219 }, { "epoch": 314.7641791044776, "grad_norm": 26.769657135009766, "learning_rate": 9.545386904761905e-06, "loss": 33.1156, "step": 13220 }, { "epoch": 314.78805970149256, "grad_norm": 32.10585021972656, "learning_rate": 9.544642857142858e-06, "loss": 34.1055, "step": 13221 }, { "epoch": 314.81194029850747, "grad_norm": 30.26285171508789, "learning_rate": 9.54389880952381e-06, "loss": 33.7381, "step": 13222 }, { "epoch": 314.8358208955224, "grad_norm": 27.461288452148438, "learning_rate": 9.543154761904763e-06, "loss": 33.7444, "step": 13223 }, { "epoch": 314.85970149253734, "grad_norm": 26.743072509765625, "learning_rate": 9.542410714285714e-06, "loss": 34.2697, "step": 13224 }, { "epoch": 314.88358208955225, "grad_norm": 28.258312225341797, "learning_rate": 9.541666666666669e-06, "loss": 32.4133, "step": 13225 }, { "epoch": 314.90746268656716, "grad_norm": 22.394941329956055, "learning_rate": 9.54092261904762e-06, "loss": 32.903, "step": 13226 }, { "epoch": 314.93134328358207, "grad_norm": 30.91286277770996, "learning_rate": 9.540178571428572e-06, "loss": 33.2735, "step": 13227 }, { "epoch": 314.95522388059703, "grad_norm": 26.42424774169922, "learning_rate": 9.539434523809525e-06, "loss": 33.1956, "step": 13228 }, { "epoch": 314.97910447761194, "grad_norm": 27.133575439453125, "learning_rate": 9.538690476190478e-06, "loss": 33.8882, "step": 13229 }, { "epoch": 315.0, "grad_norm": 22.723310470581055, "learning_rate": 9.537946428571429e-06, "loss": 28.9, "step": 13230 }, { "epoch": 315.0238805970149, "grad_norm": 26.996562957763672, "learning_rate": 9.537202380952381e-06, "loss": 33.7979, "step": 13231 }, { "epoch": 315.0477611940299, "grad_norm": 23.435802459716797, "learning_rate": 9.536458333333334e-06, "loss": 34.8608, "step": 13232 }, { "epoch": 315.0716417910448, "grad_norm": 32.33258819580078, "learning_rate": 9.535714285714287e-06, "loss": 32.982, "step": 13233 }, { "epoch": 315.0955223880597, "grad_norm": 28.66262435913086, "learning_rate": 9.53497023809524e-06, "loss": 33.3808, "step": 13234 }, { "epoch": 315.1194029850746, "grad_norm": 31.232383728027344, "learning_rate": 9.53422619047619e-06, "loss": 33.6076, "step": 13235 }, { "epoch": 315.14328358208957, "grad_norm": 28.15165138244629, "learning_rate": 9.533482142857143e-06, "loss": 33.4063, "step": 13236 }, { "epoch": 315.1671641791045, "grad_norm": 26.927061080932617, "learning_rate": 9.532738095238096e-06, "loss": 34.2987, "step": 13237 }, { "epoch": 315.1910447761194, "grad_norm": 25.32600975036621, "learning_rate": 9.531994047619049e-06, "loss": 33.1051, "step": 13238 }, { "epoch": 315.21492537313435, "grad_norm": 27.133333206176758, "learning_rate": 9.531250000000001e-06, "loss": 34.715, "step": 13239 }, { "epoch": 315.23880597014926, "grad_norm": 23.734281539916992, "learning_rate": 9.530505952380952e-06, "loss": 32.7595, "step": 13240 }, { "epoch": 315.26268656716417, "grad_norm": 28.86510467529297, "learning_rate": 9.529761904761905e-06, "loss": 33.33, "step": 13241 }, { "epoch": 315.28656716417913, "grad_norm": 28.08711051940918, "learning_rate": 9.529017857142858e-06, "loss": 33.5788, "step": 13242 }, { "epoch": 315.31044776119404, "grad_norm": 30.653244018554688, "learning_rate": 9.52827380952381e-06, "loss": 33.0861, "step": 13243 }, { "epoch": 315.33432835820895, "grad_norm": 28.1591796875, "learning_rate": 9.527529761904763e-06, "loss": 34.6708, "step": 13244 }, { "epoch": 315.35820895522386, "grad_norm": 26.023433685302734, "learning_rate": 9.526785714285714e-06, "loss": 32.9737, "step": 13245 }, { "epoch": 315.3820895522388, "grad_norm": 20.71750831604004, "learning_rate": 9.526041666666668e-06, "loss": 32.6744, "step": 13246 }, { "epoch": 315.40597014925373, "grad_norm": 29.809507369995117, "learning_rate": 9.52529761904762e-06, "loss": 33.5887, "step": 13247 }, { "epoch": 315.42985074626864, "grad_norm": 24.837921142578125, "learning_rate": 9.524553571428572e-06, "loss": 33.7889, "step": 13248 }, { "epoch": 315.4537313432836, "grad_norm": 34.88161087036133, "learning_rate": 9.523809523809525e-06, "loss": 34.8549, "step": 13249 }, { "epoch": 315.4776119402985, "grad_norm": 28.89891242980957, "learning_rate": 9.523065476190477e-06, "loss": 32.6047, "step": 13250 }, { "epoch": 315.5014925373134, "grad_norm": 24.327743530273438, "learning_rate": 9.52232142857143e-06, "loss": 32.3216, "step": 13251 }, { "epoch": 315.52537313432833, "grad_norm": 25.089570999145508, "learning_rate": 9.521577380952381e-06, "loss": 33.4233, "step": 13252 }, { "epoch": 315.5492537313433, "grad_norm": 23.953487396240234, "learning_rate": 9.520833333333334e-06, "loss": 33.0815, "step": 13253 }, { "epoch": 315.5731343283582, "grad_norm": 20.837909698486328, "learning_rate": 9.520089285714286e-06, "loss": 34.3127, "step": 13254 }, { "epoch": 315.5970149253731, "grad_norm": 26.91604995727539, "learning_rate": 9.51934523809524e-06, "loss": 34.0851, "step": 13255 }, { "epoch": 315.6208955223881, "grad_norm": 20.253692626953125, "learning_rate": 9.518601190476192e-06, "loss": 33.4698, "step": 13256 }, { "epoch": 315.644776119403, "grad_norm": 31.84703826904297, "learning_rate": 9.517857142857143e-06, "loss": 33.5676, "step": 13257 }, { "epoch": 315.6686567164179, "grad_norm": 27.487943649291992, "learning_rate": 9.517113095238096e-06, "loss": 34.4207, "step": 13258 }, { "epoch": 315.6925373134328, "grad_norm": 33.138919830322266, "learning_rate": 9.516369047619048e-06, "loss": 34.3429, "step": 13259 }, { "epoch": 315.7164179104478, "grad_norm": 28.251028060913086, "learning_rate": 9.515625000000001e-06, "loss": 33.6487, "step": 13260 }, { "epoch": 315.7402985074627, "grad_norm": 27.63349151611328, "learning_rate": 9.514880952380952e-06, "loss": 32.2774, "step": 13261 }, { "epoch": 315.7641791044776, "grad_norm": 24.83226776123047, "learning_rate": 9.514136904761906e-06, "loss": 33.2531, "step": 13262 }, { "epoch": 315.78805970149256, "grad_norm": 26.027666091918945, "learning_rate": 9.513392857142857e-06, "loss": 33.6466, "step": 13263 }, { "epoch": 315.81194029850747, "grad_norm": 20.55242919921875, "learning_rate": 9.51264880952381e-06, "loss": 32.6482, "step": 13264 }, { "epoch": 315.8358208955224, "grad_norm": 25.64906120300293, "learning_rate": 9.511904761904763e-06, "loss": 35.2883, "step": 13265 }, { "epoch": 315.85970149253734, "grad_norm": 19.154644012451172, "learning_rate": 9.511160714285715e-06, "loss": 33.8606, "step": 13266 }, { "epoch": 315.88358208955225, "grad_norm": 26.634958267211914, "learning_rate": 9.510416666666668e-06, "loss": 33.1455, "step": 13267 }, { "epoch": 315.90746268656716, "grad_norm": 21.878103256225586, "learning_rate": 9.509672619047619e-06, "loss": 33.9436, "step": 13268 }, { "epoch": 315.93134328358207, "grad_norm": 27.495351791381836, "learning_rate": 9.508928571428572e-06, "loss": 34.0173, "step": 13269 }, { "epoch": 315.95522388059703, "grad_norm": 23.8179874420166, "learning_rate": 9.508184523809524e-06, "loss": 35.3989, "step": 13270 }, { "epoch": 315.97910447761194, "grad_norm": 24.536479949951172, "learning_rate": 9.507440476190477e-06, "loss": 32.9694, "step": 13271 }, { "epoch": 316.0, "grad_norm": 18.76787757873535, "learning_rate": 9.50669642857143e-06, "loss": 27.8328, "step": 13272 }, { "epoch": 316.0238805970149, "grad_norm": 25.226884841918945, "learning_rate": 9.50595238095238e-06, "loss": 32.6869, "step": 13273 }, { "epoch": 316.0477611940299, "grad_norm": 20.781539916992188, "learning_rate": 9.505208333333335e-06, "loss": 34.4851, "step": 13274 }, { "epoch": 316.0716417910448, "grad_norm": 25.45966148376465, "learning_rate": 9.504464285714286e-06, "loss": 32.5034, "step": 13275 }, { "epoch": 316.0955223880597, "grad_norm": 21.303007125854492, "learning_rate": 9.503720238095239e-06, "loss": 34.2349, "step": 13276 }, { "epoch": 316.1194029850746, "grad_norm": 19.09128761291504, "learning_rate": 9.502976190476191e-06, "loss": 33.4227, "step": 13277 }, { "epoch": 316.14328358208957, "grad_norm": 23.925003051757812, "learning_rate": 9.502232142857144e-06, "loss": 34.4937, "step": 13278 }, { "epoch": 316.1671641791045, "grad_norm": 17.679929733276367, "learning_rate": 9.501488095238097e-06, "loss": 33.3494, "step": 13279 }, { "epoch": 316.1910447761194, "grad_norm": 27.09799575805664, "learning_rate": 9.500744047619048e-06, "loss": 33.8014, "step": 13280 }, { "epoch": 316.21492537313435, "grad_norm": 18.58030128479004, "learning_rate": 9.5e-06, "loss": 33.3887, "step": 13281 }, { "epoch": 316.23880597014926, "grad_norm": 27.787328720092773, "learning_rate": 9.499255952380953e-06, "loss": 33.9049, "step": 13282 }, { "epoch": 316.26268656716417, "grad_norm": 24.678585052490234, "learning_rate": 9.498511904761906e-06, "loss": 33.7619, "step": 13283 }, { "epoch": 316.28656716417913, "grad_norm": 24.157333374023438, "learning_rate": 9.497767857142859e-06, "loss": 33.4499, "step": 13284 }, { "epoch": 316.31044776119404, "grad_norm": 21.003877639770508, "learning_rate": 9.49702380952381e-06, "loss": 33.6756, "step": 13285 }, { "epoch": 316.33432835820895, "grad_norm": 24.18219757080078, "learning_rate": 9.496279761904762e-06, "loss": 33.0057, "step": 13286 }, { "epoch": 316.35820895522386, "grad_norm": 19.8808650970459, "learning_rate": 9.495535714285715e-06, "loss": 34.1521, "step": 13287 }, { "epoch": 316.3820895522388, "grad_norm": 26.05632781982422, "learning_rate": 9.494791666666668e-06, "loss": 34.3963, "step": 13288 }, { "epoch": 316.40597014925373, "grad_norm": 21.351581573486328, "learning_rate": 9.494047619047619e-06, "loss": 34.2365, "step": 13289 }, { "epoch": 316.42985074626864, "grad_norm": 26.41434097290039, "learning_rate": 9.493303571428573e-06, "loss": 34.328, "step": 13290 }, { "epoch": 316.4537313432836, "grad_norm": 19.21407699584961, "learning_rate": 9.492559523809524e-06, "loss": 33.1316, "step": 13291 }, { "epoch": 316.4776119402985, "grad_norm": 25.514041900634766, "learning_rate": 9.491815476190477e-06, "loss": 32.9505, "step": 13292 }, { "epoch": 316.5014925373134, "grad_norm": 22.2117919921875, "learning_rate": 9.49107142857143e-06, "loss": 33.7875, "step": 13293 }, { "epoch": 316.52537313432833, "grad_norm": 22.043800354003906, "learning_rate": 9.490327380952382e-06, "loss": 34.766, "step": 13294 }, { "epoch": 316.5492537313433, "grad_norm": 19.45757293701172, "learning_rate": 9.489583333333335e-06, "loss": 33.4709, "step": 13295 }, { "epoch": 316.5731343283582, "grad_norm": 19.759675979614258, "learning_rate": 9.488839285714286e-06, "loss": 33.7494, "step": 13296 }, { "epoch": 316.5970149253731, "grad_norm": 19.454973220825195, "learning_rate": 9.488095238095238e-06, "loss": 34.3957, "step": 13297 }, { "epoch": 316.6208955223881, "grad_norm": 20.610828399658203, "learning_rate": 9.487351190476191e-06, "loss": 32.684, "step": 13298 }, { "epoch": 316.644776119403, "grad_norm": 17.435138702392578, "learning_rate": 9.486607142857144e-06, "loss": 32.6552, "step": 13299 }, { "epoch": 316.6686567164179, "grad_norm": 20.769853591918945, "learning_rate": 9.485863095238096e-06, "loss": 33.4368, "step": 13300 }, { "epoch": 316.6925373134328, "grad_norm": 16.646352767944336, "learning_rate": 9.485119047619047e-06, "loss": 33.1157, "step": 13301 }, { "epoch": 316.7164179104478, "grad_norm": 18.582733154296875, "learning_rate": 9.484375000000002e-06, "loss": 34.6879, "step": 13302 }, { "epoch": 316.7402985074627, "grad_norm": 17.33199119567871, "learning_rate": 9.483630952380953e-06, "loss": 34.0831, "step": 13303 }, { "epoch": 316.7641791044776, "grad_norm": 18.06012725830078, "learning_rate": 9.482886904761906e-06, "loss": 32.4395, "step": 13304 }, { "epoch": 316.78805970149256, "grad_norm": 16.276689529418945, "learning_rate": 9.482142857142858e-06, "loss": 33.2182, "step": 13305 }, { "epoch": 316.81194029850747, "grad_norm": 17.239099502563477, "learning_rate": 9.481398809523811e-06, "loss": 33.6361, "step": 13306 }, { "epoch": 316.8358208955224, "grad_norm": 14.224556922912598, "learning_rate": 9.480654761904764e-06, "loss": 33.7979, "step": 13307 }, { "epoch": 316.85970149253734, "grad_norm": 17.398347854614258, "learning_rate": 9.479910714285715e-06, "loss": 32.7602, "step": 13308 }, { "epoch": 316.88358208955225, "grad_norm": 16.360572814941406, "learning_rate": 9.479166666666667e-06, "loss": 32.7625, "step": 13309 }, { "epoch": 316.90746268656716, "grad_norm": 19.49298858642578, "learning_rate": 9.47842261904762e-06, "loss": 32.7342, "step": 13310 }, { "epoch": 316.93134328358207, "grad_norm": 16.53718376159668, "learning_rate": 9.477678571428573e-06, "loss": 34.6333, "step": 13311 }, { "epoch": 316.95522388059703, "grad_norm": 17.28750228881836, "learning_rate": 9.476934523809525e-06, "loss": 33.6695, "step": 13312 }, { "epoch": 316.97910447761194, "grad_norm": 17.403059005737305, "learning_rate": 9.476190476190476e-06, "loss": 33.5419, "step": 13313 }, { "epoch": 317.0, "grad_norm": 15.339287757873535, "learning_rate": 9.475446428571429e-06, "loss": 29.8176, "step": 13314 }, { "epoch": 317.0238805970149, "grad_norm": 16.032194137573242, "learning_rate": 9.474702380952382e-06, "loss": 33.0393, "step": 13315 }, { "epoch": 317.0477611940299, "grad_norm": 15.282236099243164, "learning_rate": 9.473958333333334e-06, "loss": 31.657, "step": 13316 }, { "epoch": 317.0716417910448, "grad_norm": 19.055511474609375, "learning_rate": 9.473214285714285e-06, "loss": 33.7761, "step": 13317 }, { "epoch": 317.0955223880597, "grad_norm": 17.56702423095703, "learning_rate": 9.47247023809524e-06, "loss": 32.8771, "step": 13318 }, { "epoch": 317.1194029850746, "grad_norm": 17.464839935302734, "learning_rate": 9.47172619047619e-06, "loss": 33.3261, "step": 13319 }, { "epoch": 317.14328358208957, "grad_norm": 13.382113456726074, "learning_rate": 9.470982142857143e-06, "loss": 31.7969, "step": 13320 }, { "epoch": 317.1671641791045, "grad_norm": 18.935489654541016, "learning_rate": 9.470238095238096e-06, "loss": 33.3907, "step": 13321 }, { "epoch": 317.1910447761194, "grad_norm": 15.993891716003418, "learning_rate": 9.469494047619049e-06, "loss": 33.4379, "step": 13322 }, { "epoch": 317.21492537313435, "grad_norm": 19.52393913269043, "learning_rate": 9.468750000000001e-06, "loss": 34.6754, "step": 13323 }, { "epoch": 317.23880597014926, "grad_norm": 15.924966812133789, "learning_rate": 9.468005952380952e-06, "loss": 33.9633, "step": 13324 }, { "epoch": 317.26268656716417, "grad_norm": 20.609539031982422, "learning_rate": 9.467261904761905e-06, "loss": 33.7411, "step": 13325 }, { "epoch": 317.28656716417913, "grad_norm": 15.784844398498535, "learning_rate": 9.466517857142858e-06, "loss": 34.4262, "step": 13326 }, { "epoch": 317.31044776119404, "grad_norm": 19.99517822265625, "learning_rate": 9.46577380952381e-06, "loss": 32.7825, "step": 13327 }, { "epoch": 317.33432835820895, "grad_norm": 18.543437957763672, "learning_rate": 9.465029761904763e-06, "loss": 34.3556, "step": 13328 }, { "epoch": 317.35820895522386, "grad_norm": 16.29489517211914, "learning_rate": 9.464285714285714e-06, "loss": 32.9021, "step": 13329 }, { "epoch": 317.3820895522388, "grad_norm": 20.580856323242188, "learning_rate": 9.463541666666669e-06, "loss": 33.8491, "step": 13330 }, { "epoch": 317.40597014925373, "grad_norm": 20.105560302734375, "learning_rate": 9.46279761904762e-06, "loss": 33.9351, "step": 13331 }, { "epoch": 317.42985074626864, "grad_norm": 16.851537704467773, "learning_rate": 9.462053571428572e-06, "loss": 33.1687, "step": 13332 }, { "epoch": 317.4537313432836, "grad_norm": 21.92867660522461, "learning_rate": 9.461309523809525e-06, "loss": 32.6336, "step": 13333 }, { "epoch": 317.4776119402985, "grad_norm": 16.571556091308594, "learning_rate": 9.460565476190478e-06, "loss": 34.0379, "step": 13334 }, { "epoch": 317.5014925373134, "grad_norm": NaN, "learning_rate": 9.45982142857143e-06, "loss": 30.4668, "step": 13335 }, { "epoch": 317.52537313432833, "grad_norm": 18.779396057128906, "learning_rate": 9.45982142857143e-06, "loss": 33.8917, "step": 13336 }, { "epoch": 317.5492537313433, "grad_norm": NaN, "learning_rate": 9.459077380952381e-06, "loss": 42.368, "step": 13337 }, { "epoch": 317.5731343283582, "grad_norm": 19.674638748168945, "learning_rate": 9.459077380952381e-06, "loss": 34.1599, "step": 13338 }, { "epoch": 317.5970149253731, "grad_norm": 19.24485206604004, "learning_rate": 9.458333333333334e-06, "loss": 32.0584, "step": 13339 }, { "epoch": 317.6208955223881, "grad_norm": 14.414884567260742, "learning_rate": 9.457589285714287e-06, "loss": 34.9061, "step": 13340 }, { "epoch": 317.644776119403, "grad_norm": 21.131359100341797, "learning_rate": 9.45684523809524e-06, "loss": 33.7017, "step": 13341 }, { "epoch": 317.6686567164179, "grad_norm": 15.800726890563965, "learning_rate": 9.456101190476192e-06, "loss": 33.9411, "step": 13342 }, { "epoch": 317.6925373134328, "grad_norm": 21.80306625366211, "learning_rate": 9.455357142857143e-06, "loss": 33.8175, "step": 13343 }, { "epoch": 317.7164179104478, "grad_norm": 16.657373428344727, "learning_rate": 9.454613095238097e-06, "loss": 33.26, "step": 13344 }, { "epoch": 317.7402985074627, "grad_norm": 17.922826766967773, "learning_rate": 9.453869047619048e-06, "loss": 34.078, "step": 13345 }, { "epoch": 317.7641791044776, "grad_norm": 19.15614891052246, "learning_rate": 9.453125000000001e-06, "loss": 33.5982, "step": 13346 }, { "epoch": 317.78805970149256, "grad_norm": 15.744733810424805, "learning_rate": 9.452380952380952e-06, "loss": 33.6735, "step": 13347 }, { "epoch": 317.81194029850747, "grad_norm": 16.33230209350586, "learning_rate": 9.451636904761905e-06, "loss": 33.8818, "step": 13348 }, { "epoch": 317.8358208955224, "grad_norm": 17.132356643676758, "learning_rate": 9.450892857142857e-06, "loss": 33.674, "step": 13349 }, { "epoch": 317.85970149253734, "grad_norm": 15.774198532104492, "learning_rate": 9.45014880952381e-06, "loss": 32.8711, "step": 13350 }, { "epoch": 317.88358208955225, "grad_norm": 16.86773109436035, "learning_rate": 9.449404761904763e-06, "loss": 32.9124, "step": 13351 }, { "epoch": 317.90746268656716, "grad_norm": 16.00055503845215, "learning_rate": 9.448660714285714e-06, "loss": 33.6463, "step": 13352 }, { "epoch": 317.93134328358207, "grad_norm": 18.052488327026367, "learning_rate": 9.447916666666668e-06, "loss": 33.5122, "step": 13353 }, { "epoch": 317.95522388059703, "grad_norm": 17.009675979614258, "learning_rate": 9.44717261904762e-06, "loss": 34.4861, "step": 13354 }, { "epoch": 317.97910447761194, "grad_norm": 16.055465698242188, "learning_rate": 9.446428571428572e-06, "loss": 34.481, "step": 13355 }, { "epoch": 318.0, "grad_norm": 15.88033390045166, "learning_rate": 9.445684523809525e-06, "loss": 28.3286, "step": 13356 }, { "epoch": 318.0238805970149, "grad_norm": 15.0000638961792, "learning_rate": 9.444940476190477e-06, "loss": 31.8116, "step": 13357 }, { "epoch": 318.0477611940299, "grad_norm": 18.877826690673828, "learning_rate": 9.44419642857143e-06, "loss": 33.1175, "step": 13358 }, { "epoch": 318.0716417910448, "grad_norm": 22.391193389892578, "learning_rate": 9.443452380952381e-06, "loss": 31.6244, "step": 13359 }, { "epoch": 318.0955223880597, "grad_norm": 20.479522705078125, "learning_rate": 9.442708333333334e-06, "loss": 32.5579, "step": 13360 }, { "epoch": 318.1194029850746, "grad_norm": 17.490135192871094, "learning_rate": 9.441964285714286e-06, "loss": 34.694, "step": 13361 }, { "epoch": 318.14328358208957, "grad_norm": 19.028148651123047, "learning_rate": 9.441220238095239e-06, "loss": 34.1734, "step": 13362 }, { "epoch": 318.1671641791045, "grad_norm": 17.874404907226562, "learning_rate": 9.440476190476192e-06, "loss": 34.1923, "step": 13363 }, { "epoch": 318.1910447761194, "grad_norm": 15.767306327819824, "learning_rate": 9.439732142857143e-06, "loss": 32.3749, "step": 13364 }, { "epoch": 318.21492537313435, "grad_norm": 24.332582473754883, "learning_rate": 9.438988095238097e-06, "loss": 33.3351, "step": 13365 }, { "epoch": 318.23880597014926, "grad_norm": 16.11850929260254, "learning_rate": 9.438244047619048e-06, "loss": 34.5669, "step": 13366 }, { "epoch": 318.26268656716417, "grad_norm": 24.260330200195312, "learning_rate": 9.4375e-06, "loss": 33.4101, "step": 13367 }, { "epoch": 318.28656716417913, "grad_norm": 17.738794326782227, "learning_rate": 9.436755952380953e-06, "loss": 34.4332, "step": 13368 }, { "epoch": 318.31044776119404, "grad_norm": 21.241287231445312, "learning_rate": 9.436011904761906e-06, "loss": 33.3193, "step": 13369 }, { "epoch": 318.33432835820895, "grad_norm": 23.853557586669922, "learning_rate": 9.435267857142859e-06, "loss": 33.697, "step": 13370 }, { "epoch": 318.35820895522386, "grad_norm": 17.392017364501953, "learning_rate": 9.43452380952381e-06, "loss": 33.655, "step": 13371 }, { "epoch": 318.3820895522388, "grad_norm": 16.22450065612793, "learning_rate": 9.433779761904762e-06, "loss": 34.6262, "step": 13372 }, { "epoch": 318.40597014925373, "grad_norm": 17.703052520751953, "learning_rate": 9.433035714285715e-06, "loss": 33.3349, "step": 13373 }, { "epoch": 318.42985074626864, "grad_norm": 19.10869598388672, "learning_rate": 9.432291666666668e-06, "loss": 33.0067, "step": 13374 }, { "epoch": 318.4537313432836, "grad_norm": 18.446027755737305, "learning_rate": 9.431547619047619e-06, "loss": 33.9296, "step": 13375 }, { "epoch": 318.4776119402985, "grad_norm": 15.902649879455566, "learning_rate": 9.430803571428571e-06, "loss": 32.2868, "step": 13376 }, { "epoch": 318.5014925373134, "grad_norm": 14.964925765991211, "learning_rate": 9.430059523809524e-06, "loss": 33.1198, "step": 13377 }, { "epoch": 318.52537313432833, "grad_norm": 19.35045623779297, "learning_rate": 9.429315476190477e-06, "loss": 34.2293, "step": 13378 }, { "epoch": 318.5492537313433, "grad_norm": 20.05290412902832, "learning_rate": 9.42857142857143e-06, "loss": 34.2937, "step": 13379 }, { "epoch": 318.5731343283582, "grad_norm": 16.73937225341797, "learning_rate": 9.42782738095238e-06, "loss": 33.7444, "step": 13380 }, { "epoch": 318.5970149253731, "grad_norm": 21.402694702148438, "learning_rate": 9.427083333333335e-06, "loss": 33.3548, "step": 13381 }, { "epoch": 318.6208955223881, "grad_norm": 17.070066452026367, "learning_rate": 9.426339285714286e-06, "loss": 31.5075, "step": 13382 }, { "epoch": 318.644776119403, "grad_norm": 17.344465255737305, "learning_rate": 9.425595238095239e-06, "loss": 32.3674, "step": 13383 }, { "epoch": 318.6686567164179, "grad_norm": 21.59908103942871, "learning_rate": 9.424851190476191e-06, "loss": 34.1737, "step": 13384 }, { "epoch": 318.6925373134328, "grad_norm": 18.76130485534668, "learning_rate": 9.424107142857144e-06, "loss": 34.4727, "step": 13385 }, { "epoch": 318.7164179104478, "grad_norm": 22.039154052734375, "learning_rate": 9.423363095238097e-06, "loss": 33.493, "step": 13386 }, { "epoch": 318.7402985074627, "grad_norm": 29.23167610168457, "learning_rate": 9.422619047619048e-06, "loss": 34.2462, "step": 13387 }, { "epoch": 318.7641791044776, "grad_norm": 18.083833694458008, "learning_rate": 9.421875e-06, "loss": 33.8711, "step": 13388 }, { "epoch": 318.78805970149256, "grad_norm": 39.312557220458984, "learning_rate": 9.421130952380953e-06, "loss": 33.8413, "step": 13389 }, { "epoch": 318.81194029850747, "grad_norm": 31.588090896606445, "learning_rate": 9.420386904761906e-06, "loss": 32.65, "step": 13390 }, { "epoch": 318.8358208955224, "grad_norm": 36.38351821899414, "learning_rate": 9.419642857142858e-06, "loss": 33.493, "step": 13391 }, { "epoch": 318.85970149253734, "grad_norm": 33.490604400634766, "learning_rate": 9.41889880952381e-06, "loss": 33.9192, "step": 13392 }, { "epoch": 318.88358208955225, "grad_norm": 29.810832977294922, "learning_rate": 9.418154761904764e-06, "loss": 35.1813, "step": 13393 }, { "epoch": 318.90746268656716, "grad_norm": 26.900054931640625, "learning_rate": 9.417410714285715e-06, "loss": 33.8841, "step": 13394 }, { "epoch": 318.93134328358207, "grad_norm": 30.67934226989746, "learning_rate": 9.416666666666667e-06, "loss": 33.7924, "step": 13395 }, { "epoch": 318.95522388059703, "grad_norm": 23.40369987487793, "learning_rate": 9.41592261904762e-06, "loss": 33.03, "step": 13396 }, { "epoch": 318.97910447761194, "grad_norm": 32.82838821411133, "learning_rate": 9.415178571428573e-06, "loss": 33.2242, "step": 13397 }, { "epoch": 319.0, "grad_norm": 22.915206909179688, "learning_rate": 9.414434523809525e-06, "loss": 29.838, "step": 13398 }, { "epoch": 319.0238805970149, "grad_norm": 35.52446746826172, "learning_rate": 9.413690476190476e-06, "loss": 33.485, "step": 13399 }, { "epoch": 319.0477611940299, "grad_norm": 29.487043380737305, "learning_rate": 9.41294642857143e-06, "loss": 34.3338, "step": 13400 }, { "epoch": 319.0716417910448, "grad_norm": 36.47984313964844, "learning_rate": 9.412202380952382e-06, "loss": 34.2982, "step": 13401 }, { "epoch": 319.0955223880597, "grad_norm": 33.790008544921875, "learning_rate": 9.411458333333335e-06, "loss": 34.5241, "step": 13402 }, { "epoch": 319.1194029850746, "grad_norm": 27.084001541137695, "learning_rate": 9.410714285714286e-06, "loss": 34.5703, "step": 13403 }, { "epoch": 319.14328358208957, "grad_norm": 26.034971237182617, "learning_rate": 9.409970238095238e-06, "loss": 33.2482, "step": 13404 }, { "epoch": 319.1671641791045, "grad_norm": 28.040359497070312, "learning_rate": 9.409226190476191e-06, "loss": 33.4084, "step": 13405 }, { "epoch": 319.1910447761194, "grad_norm": 23.912641525268555, "learning_rate": 9.408482142857144e-06, "loss": 33.0828, "step": 13406 }, { "epoch": 319.21492537313435, "grad_norm": 31.83680534362793, "learning_rate": 9.407738095238096e-06, "loss": 33.9288, "step": 13407 }, { "epoch": 319.23880597014926, "grad_norm": 28.342632293701172, "learning_rate": 9.406994047619047e-06, "loss": 33.3371, "step": 13408 }, { "epoch": 319.26268656716417, "grad_norm": 34.55799102783203, "learning_rate": 9.406250000000002e-06, "loss": 33.6974, "step": 13409 }, { "epoch": 319.28656716417913, "grad_norm": 30.760406494140625, "learning_rate": 9.405505952380953e-06, "loss": 32.5653, "step": 13410 }, { "epoch": 319.31044776119404, "grad_norm": 28.75543785095215, "learning_rate": 9.404761904761905e-06, "loss": 33.0336, "step": 13411 }, { "epoch": 319.33432835820895, "grad_norm": 26.500202178955078, "learning_rate": 9.404017857142858e-06, "loss": 32.265, "step": 13412 }, { "epoch": 319.35820895522386, "grad_norm": 29.624961853027344, "learning_rate": 9.40327380952381e-06, "loss": 34.9516, "step": 13413 }, { "epoch": 319.3820895522388, "grad_norm": 29.508689880371094, "learning_rate": 9.402529761904763e-06, "loss": 33.3541, "step": 13414 }, { "epoch": 319.40597014925373, "grad_norm": 27.894058227539062, "learning_rate": 9.401785714285714e-06, "loss": 33.3962, "step": 13415 }, { "epoch": 319.42985074626864, "grad_norm": 25.896587371826172, "learning_rate": 9.401041666666667e-06, "loss": 32.8412, "step": 13416 }, { "epoch": 319.4537313432836, "grad_norm": 31.310272216796875, "learning_rate": 9.40029761904762e-06, "loss": 33.6401, "step": 13417 }, { "epoch": 319.4776119402985, "grad_norm": 25.35869598388672, "learning_rate": 9.399553571428572e-06, "loss": 33.2427, "step": 13418 }, { "epoch": 319.5014925373134, "grad_norm": 34.811676025390625, "learning_rate": 9.398809523809525e-06, "loss": 32.974, "step": 13419 }, { "epoch": 319.52537313432833, "grad_norm": 33.234920501708984, "learning_rate": 9.398065476190476e-06, "loss": 32.5361, "step": 13420 }, { "epoch": 319.5492537313433, "grad_norm": 30.192657470703125, "learning_rate": 9.39732142857143e-06, "loss": 32.8948, "step": 13421 }, { "epoch": 319.5731343283582, "grad_norm": 28.203994750976562, "learning_rate": 9.396577380952381e-06, "loss": 34.2486, "step": 13422 }, { "epoch": 319.5970149253731, "grad_norm": 27.182098388671875, "learning_rate": 9.395833333333334e-06, "loss": 33.7982, "step": 13423 }, { "epoch": 319.6208955223881, "grad_norm": 22.803674697875977, "learning_rate": 9.395089285714287e-06, "loss": 32.5016, "step": 13424 }, { "epoch": 319.644776119403, "grad_norm": 35.993038177490234, "learning_rate": 9.39434523809524e-06, "loss": 32.6825, "step": 13425 }, { "epoch": 319.6686567164179, "grad_norm": 31.017728805541992, "learning_rate": 9.393601190476192e-06, "loss": 32.9535, "step": 13426 }, { "epoch": 319.6925373134328, "grad_norm": 27.446929931640625, "learning_rate": 9.392857142857143e-06, "loss": 31.9826, "step": 13427 }, { "epoch": 319.7164179104478, "grad_norm": 28.538211822509766, "learning_rate": 9.392113095238096e-06, "loss": 33.4122, "step": 13428 }, { "epoch": 319.7402985074627, "grad_norm": 30.983957290649414, "learning_rate": 9.391369047619049e-06, "loss": 33.2667, "step": 13429 }, { "epoch": 319.7641791044776, "grad_norm": 26.290386199951172, "learning_rate": 9.390625000000001e-06, "loss": 34.2779, "step": 13430 }, { "epoch": 319.78805970149256, "grad_norm": 32.49992370605469, "learning_rate": 9.389880952380954e-06, "loss": 34.1709, "step": 13431 }, { "epoch": 319.81194029850747, "grad_norm": 29.13446044921875, "learning_rate": 9.389136904761905e-06, "loss": 33.7185, "step": 13432 }, { "epoch": 319.8358208955224, "grad_norm": 28.562192916870117, "learning_rate": 9.388392857142858e-06, "loss": 33.248, "step": 13433 }, { "epoch": 319.85970149253734, "grad_norm": 28.523258209228516, "learning_rate": 9.38764880952381e-06, "loss": 34.4778, "step": 13434 }, { "epoch": 319.88358208955225, "grad_norm": 29.057281494140625, "learning_rate": 9.386904761904763e-06, "loss": 34.3028, "step": 13435 }, { "epoch": 319.90746268656716, "grad_norm": 25.81745147705078, "learning_rate": 9.386160714285714e-06, "loss": 32.741, "step": 13436 }, { "epoch": 319.93134328358207, "grad_norm": 30.437976837158203, "learning_rate": 9.385416666666668e-06, "loss": 33.759, "step": 13437 }, { "epoch": 319.95522388059703, "grad_norm": 28.955860137939453, "learning_rate": 9.38467261904762e-06, "loss": 33.6976, "step": 13438 }, { "epoch": 319.97910447761194, "grad_norm": 29.05436897277832, "learning_rate": 9.383928571428572e-06, "loss": 33.157, "step": 13439 }, { "epoch": 320.0, "grad_norm": 22.26370620727539, "learning_rate": 9.383184523809525e-06, "loss": 29.4054, "step": 13440 }, { "epoch": 320.0, "step": 13440, "total_flos": 6.60699028639923e+17, "train_loss": 2.113268469345002, "train_runtime": 25654.5719, "train_samples_per_second": 66.758, "train_steps_per_second": 0.524 }, { "epoch": 320.0238805970149, "grad_norm": 28.20069122314453, "learning_rate": 1e-05, "loss": 34.3924, "step": 13441 }, { "epoch": 320.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999299719887955e-06, "loss": 41.6766, "step": 13442 }, { "epoch": 320.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999299719887955e-06, "loss": 40.1499, "step": 13443 }, { "epoch": 320.0955223880597, "grad_norm": 457.32623291015625, "learning_rate": 9.999299719887955e-06, "loss": 41.1706, "step": 13444 }, { "epoch": 320.1194029850746, "grad_norm": 280.8936462402344, "learning_rate": 9.998599439775911e-06, "loss": 38.602, "step": 13445 }, { "epoch": 320.14328358208957, "grad_norm": 97.15399932861328, "learning_rate": 9.997899159663866e-06, "loss": 35.4426, "step": 13446 }, { "epoch": 320.1671641791045, "grad_norm": 90.87565612792969, "learning_rate": 9.997198879551822e-06, "loss": 35.2902, "step": 13447 }, { "epoch": 320.1910447761194, "grad_norm": 83.8344497680664, "learning_rate": 9.996498599439777e-06, "loss": 34.5836, "step": 13448 }, { "epoch": 320.21492537313435, "grad_norm": 49.05875778198242, "learning_rate": 9.995798319327733e-06, "loss": 34.2659, "step": 13449 }, { "epoch": 320.23880597014926, "grad_norm": 40.812782287597656, "learning_rate": 9.995098039215687e-06, "loss": 34.6545, "step": 13450 }, { "epoch": 320.26268656716417, "grad_norm": 36.72706604003906, "learning_rate": 9.994397759103642e-06, "loss": 33.7638, "step": 13451 }, { "epoch": 320.28656716417913, "grad_norm": 26.809864044189453, "learning_rate": 9.993697478991598e-06, "loss": 34.318, "step": 13452 }, { "epoch": 320.31044776119404, "grad_norm": 24.38344383239746, "learning_rate": 9.992997198879552e-06, "loss": 34.3693, "step": 13453 }, { "epoch": 320.33432835820895, "grad_norm": 23.269834518432617, "learning_rate": 9.992296918767508e-06, "loss": 33.8004, "step": 13454 }, { "epoch": 320.35820895522386, "grad_norm": 25.902122497558594, "learning_rate": 9.991596638655463e-06, "loss": 33.1348, "step": 13455 }, { "epoch": 320.3820895522388, "grad_norm": 24.044321060180664, "learning_rate": 9.990896358543417e-06, "loss": 33.7546, "step": 13456 }, { "epoch": 320.40597014925373, "grad_norm": 21.03065299987793, "learning_rate": 9.990196078431374e-06, "loss": 36.0693, "step": 13457 }, { "epoch": 320.42985074626864, "grad_norm": 18.3806209564209, "learning_rate": 9.989495798319328e-06, "loss": 33.4097, "step": 13458 }, { "epoch": 320.4537313432836, "grad_norm": 21.226511001586914, "learning_rate": 9.988795518207284e-06, "loss": 34.5919, "step": 13459 }, { "epoch": 320.4776119402985, "grad_norm": 19.58074188232422, "learning_rate": 9.988095238095239e-06, "loss": 34.0975, "step": 13460 }, { "epoch": 320.5014925373134, "grad_norm": 20.49414825439453, "learning_rate": 9.987394957983195e-06, "loss": 32.4821, "step": 13461 }, { "epoch": 320.52537313432833, "grad_norm": 15.913312911987305, "learning_rate": 9.98669467787115e-06, "loss": 33.5827, "step": 13462 }, { "epoch": 320.5492537313433, "grad_norm": 19.110130310058594, "learning_rate": 9.985994397759104e-06, "loss": 32.8113, "step": 13463 }, { "epoch": 320.5731343283582, "grad_norm": 17.153520584106445, "learning_rate": 9.98529411764706e-06, "loss": 33.1583, "step": 13464 }, { "epoch": 320.5970149253731, "grad_norm": 21.01837158203125, "learning_rate": 9.984593837535014e-06, "loss": 34.1792, "step": 13465 }, { "epoch": 320.6208955223881, "grad_norm": 19.879121780395508, "learning_rate": 9.98389355742297e-06, "loss": 32.8184, "step": 13466 }, { "epoch": 320.644776119403, "grad_norm": 18.8907470703125, "learning_rate": 9.983193277310925e-06, "loss": 34.2799, "step": 13467 }, { "epoch": 320.6686567164179, "grad_norm": 15.665071487426758, "learning_rate": 9.982492997198881e-06, "loss": 32.427, "step": 13468 }, { "epoch": 320.6925373134328, "grad_norm": 21.888671875, "learning_rate": 9.981792717086836e-06, "loss": 33.1517, "step": 13469 }, { "epoch": 320.7164179104478, "grad_norm": 24.189502716064453, "learning_rate": 9.98109243697479e-06, "loss": 33.1814, "step": 13470 }, { "epoch": 320.7402985074627, "grad_norm": 16.405902862548828, "learning_rate": 9.980392156862746e-06, "loss": 34.7406, "step": 13471 }, { "epoch": 320.7641791044776, "grad_norm": 20.98548698425293, "learning_rate": 9.9796918767507e-06, "loss": 33.1669, "step": 13472 }, { "epoch": 320.78805970149256, "grad_norm": 21.629098892211914, "learning_rate": 9.978991596638657e-06, "loss": 32.2965, "step": 13473 }, { "epoch": 320.81194029850747, "grad_norm": 15.458964347839355, "learning_rate": 9.978291316526611e-06, "loss": 33.3416, "step": 13474 }, { "epoch": 320.8358208955224, "grad_norm": 17.867895126342773, "learning_rate": 9.977591036414566e-06, "loss": 33.2184, "step": 13475 }, { "epoch": 320.85970149253734, "grad_norm": 20.45394515991211, "learning_rate": 9.976890756302522e-06, "loss": 34.4215, "step": 13476 }, { "epoch": 320.88358208955225, "grad_norm": 17.875850677490234, "learning_rate": 9.976190476190477e-06, "loss": 33.7006, "step": 13477 }, { "epoch": 320.90746268656716, "grad_norm": 15.243967056274414, "learning_rate": 9.975490196078433e-06, "loss": 34.8511, "step": 13478 }, { "epoch": 320.93134328358207, "grad_norm": 15.511274337768555, "learning_rate": 9.974789915966387e-06, "loss": 32.6283, "step": 13479 }, { "epoch": 320.95522388059703, "grad_norm": 16.13764762878418, "learning_rate": 9.974089635854343e-06, "loss": 33.5455, "step": 13480 }, { "epoch": 320.97910447761194, "grad_norm": 17.346710205078125, "learning_rate": 9.973389355742298e-06, "loss": 33.0761, "step": 13481 }, { "epoch": 321.0, "grad_norm": 17.808698654174805, "learning_rate": 9.972689075630252e-06, "loss": 28.4847, "step": 13482 }, { "epoch": 321.0238805970149, "grad_norm": 18.072603225708008, "learning_rate": 9.971988795518209e-06, "loss": 32.941, "step": 13483 }, { "epoch": 321.0477611940299, "grad_norm": 17.086442947387695, "learning_rate": 9.971288515406163e-06, "loss": 33.6863, "step": 13484 }, { "epoch": 321.0716417910448, "grad_norm": 16.671628952026367, "learning_rate": 9.970588235294119e-06, "loss": 33.9508, "step": 13485 }, { "epoch": 321.0955223880597, "grad_norm": 18.872169494628906, "learning_rate": 9.969887955182074e-06, "loss": 33.033, "step": 13486 }, { "epoch": 321.1194029850746, "grad_norm": 15.115374565124512, "learning_rate": 9.969187675070028e-06, "loss": 34.2851, "step": 13487 }, { "epoch": 321.14328358208957, "grad_norm": 20.84256362915039, "learning_rate": 9.968487394957984e-06, "loss": 34.6891, "step": 13488 }, { "epoch": 321.1671641791045, "grad_norm": 17.528242111206055, "learning_rate": 9.967787114845939e-06, "loss": 33.3708, "step": 13489 }, { "epoch": 321.1910447761194, "grad_norm": 20.999752044677734, "learning_rate": 9.967086834733895e-06, "loss": 32.796, "step": 13490 }, { "epoch": 321.21492537313435, "grad_norm": 19.596973419189453, "learning_rate": 9.96638655462185e-06, "loss": 34.272, "step": 13491 }, { "epoch": 321.23880597014926, "grad_norm": 14.58626651763916, "learning_rate": 9.965686274509806e-06, "loss": 33.3363, "step": 13492 }, { "epoch": 321.26268656716417, "grad_norm": 17.104713439941406, "learning_rate": 9.96498599439776e-06, "loss": 33.3183, "step": 13493 }, { "epoch": 321.28656716417913, "grad_norm": 20.347379684448242, "learning_rate": 9.964285714285714e-06, "loss": 32.8743, "step": 13494 }, { "epoch": 321.31044776119404, "grad_norm": 18.913585662841797, "learning_rate": 9.96358543417367e-06, "loss": 33.2669, "step": 13495 }, { "epoch": 321.33432835820895, "grad_norm": 14.634831428527832, "learning_rate": 9.962885154061625e-06, "loss": 31.4955, "step": 13496 }, { "epoch": 321.35820895522386, "grad_norm": 27.42943572998047, "learning_rate": 9.962184873949581e-06, "loss": 33.7415, "step": 13497 }, { "epoch": 321.3820895522388, "grad_norm": 17.25787925720215, "learning_rate": 9.961484593837536e-06, "loss": 33.0441, "step": 13498 }, { "epoch": 321.40597014925373, "grad_norm": 24.47115135192871, "learning_rate": 9.960784313725492e-06, "loss": 33.32, "step": 13499 }, { "epoch": 321.42985074626864, "grad_norm": 23.04132652282715, "learning_rate": 9.960084033613446e-06, "loss": 33.6568, "step": 13500 }, { "epoch": 321.4537313432836, "grad_norm": 17.9874324798584, "learning_rate": 9.959383753501401e-06, "loss": 33.5015, "step": 13501 }, { "epoch": 321.4776119402985, "grad_norm": 23.682321548461914, "learning_rate": 9.958683473389357e-06, "loss": 33.7339, "step": 13502 }, { "epoch": 321.5014925373134, "grad_norm": 18.65334129333496, "learning_rate": 9.957983193277312e-06, "loss": 33.5823, "step": 13503 }, { "epoch": 321.52537313432833, "grad_norm": 24.34517478942871, "learning_rate": 9.957282913165268e-06, "loss": 32.9567, "step": 13504 }, { "epoch": 321.5492537313433, "grad_norm": 23.741682052612305, "learning_rate": 9.956582633053222e-06, "loss": 33.0551, "step": 13505 }, { "epoch": 321.5731343283582, "grad_norm": 18.40947723388672, "learning_rate": 9.955882352941177e-06, "loss": 34.1149, "step": 13506 }, { "epoch": 321.5970149253731, "grad_norm": 22.952302932739258, "learning_rate": 9.955182072829133e-06, "loss": 32.7585, "step": 13507 }, { "epoch": 321.6208955223881, "grad_norm": NaN, "learning_rate": 9.954481792717087e-06, "loss": 54.3415, "step": 13508 }, { "epoch": 321.644776119403, "grad_norm": 18.612133026123047, "learning_rate": 9.954481792717087e-06, "loss": 32.2534, "step": 13509 }, { "epoch": 321.6686567164179, "grad_norm": 16.69934844970703, "learning_rate": 9.953781512605043e-06, "loss": 34.2393, "step": 13510 }, { "epoch": 321.6925373134328, "grad_norm": 24.02232551574707, "learning_rate": 9.953081232492998e-06, "loss": 33.7258, "step": 13511 }, { "epoch": 321.7164179104478, "grad_norm": 21.12546730041504, "learning_rate": 9.952380952380954e-06, "loss": 33.6632, "step": 13512 }, { "epoch": 321.7402985074627, "grad_norm": 13.482281684875488, "learning_rate": 9.951680672268909e-06, "loss": 33.2125, "step": 13513 }, { "epoch": 321.7641791044776, "grad_norm": 20.659025192260742, "learning_rate": 9.950980392156863e-06, "loss": 33.9939, "step": 13514 }, { "epoch": 321.78805970149256, "grad_norm": 22.202695846557617, "learning_rate": 9.95028011204482e-06, "loss": 34.4735, "step": 13515 }, { "epoch": 321.81194029850747, "grad_norm": 14.220898628234863, "learning_rate": 9.949579831932774e-06, "loss": 33.6932, "step": 13516 }, { "epoch": 321.8358208955224, "grad_norm": 17.50078773498535, "learning_rate": 9.94887955182073e-06, "loss": 33.6944, "step": 13517 }, { "epoch": 321.85970149253734, "grad_norm": 21.947223663330078, "learning_rate": 9.948179271708684e-06, "loss": 33.8986, "step": 13518 }, { "epoch": 321.88358208955225, "grad_norm": 18.7849063873291, "learning_rate": 9.94747899159664e-06, "loss": 33.6639, "step": 13519 }, { "epoch": 321.90746268656716, "grad_norm": 15.1717529296875, "learning_rate": 9.946778711484595e-06, "loss": 33.1748, "step": 13520 }, { "epoch": 321.93134328358207, "grad_norm": 15.6008939743042, "learning_rate": 9.94607843137255e-06, "loss": 32.5078, "step": 13521 }, { "epoch": 321.95522388059703, "grad_norm": 16.304113388061523, "learning_rate": 9.945378151260506e-06, "loss": 32.8308, "step": 13522 }, { "epoch": 321.97910447761194, "grad_norm": NaN, "learning_rate": 9.94467787114846e-06, "loss": 33.8362, "step": 13523 }, { "epoch": 322.0, "grad_norm": 15.540569305419922, "learning_rate": 9.94467787114846e-06, "loss": 29.535, "step": 13524 }, { "epoch": 322.0238805970149, "grad_norm": 14.937338829040527, "learning_rate": 9.943977591036416e-06, "loss": 33.0239, "step": 13525 }, { "epoch": 322.0477611940299, "grad_norm": 22.070587158203125, "learning_rate": 9.94327731092437e-06, "loss": 34.0065, "step": 13526 }, { "epoch": 322.0716417910448, "grad_norm": 18.071996688842773, "learning_rate": 9.942577030812325e-06, "loss": 33.5174, "step": 13527 }, { "epoch": 322.0955223880597, "grad_norm": 12.858171463012695, "learning_rate": 9.941876750700281e-06, "loss": 33.5769, "step": 13528 }, { "epoch": 322.1194029850746, "grad_norm": 16.61067771911621, "learning_rate": 9.941176470588236e-06, "loss": 33.72, "step": 13529 }, { "epoch": 322.14328358208957, "grad_norm": 20.197229385375977, "learning_rate": 9.940476190476192e-06, "loss": 34.2886, "step": 13530 }, { "epoch": 322.1671641791045, "grad_norm": 16.887353897094727, "learning_rate": 9.939775910364146e-06, "loss": 34.51, "step": 13531 }, { "epoch": 322.1910447761194, "grad_norm": 16.60744857788086, "learning_rate": 9.939075630252103e-06, "loss": 33.5272, "step": 13532 }, { "epoch": 322.21492537313435, "grad_norm": 15.348031997680664, "learning_rate": 9.938375350140057e-06, "loss": 32.1391, "step": 13533 }, { "epoch": 322.23880597014926, "grad_norm": 19.63516616821289, "learning_rate": 9.937675070028012e-06, "loss": 32.9583, "step": 13534 }, { "epoch": 322.26268656716417, "grad_norm": 15.16062068939209, "learning_rate": 9.936974789915968e-06, "loss": 33.1537, "step": 13535 }, { "epoch": 322.28656716417913, "grad_norm": 17.731372833251953, "learning_rate": 9.936274509803922e-06, "loss": 33.2881, "step": 13536 }, { "epoch": 322.31044776119404, "grad_norm": 17.786243438720703, "learning_rate": 9.935574229691878e-06, "loss": 33.0445, "step": 13537 }, { "epoch": 322.33432835820895, "grad_norm": 17.036073684692383, "learning_rate": 9.934873949579833e-06, "loss": 33.6986, "step": 13538 }, { "epoch": 322.35820895522386, "grad_norm": 14.746337890625, "learning_rate": 9.934173669467789e-06, "loss": 33.6975, "step": 13539 }, { "epoch": 322.3820895522388, "grad_norm": 22.118701934814453, "learning_rate": 9.933473389355743e-06, "loss": 32.4272, "step": 13540 }, { "epoch": 322.40597014925373, "grad_norm": 14.167494773864746, "learning_rate": 9.932773109243698e-06, "loss": 32.7008, "step": 13541 }, { "epoch": 322.42985074626864, "grad_norm": 24.237863540649414, "learning_rate": 9.932072829131654e-06, "loss": 32.5664, "step": 13542 }, { "epoch": 322.4537313432836, "grad_norm": 20.92695426940918, "learning_rate": 9.931372549019609e-06, "loss": 34.5714, "step": 13543 }, { "epoch": 322.4776119402985, "grad_norm": 30.745403289794922, "learning_rate": 9.930672268907565e-06, "loss": 34.4972, "step": 13544 }, { "epoch": 322.5014925373134, "grad_norm": 19.916486740112305, "learning_rate": 9.92997198879552e-06, "loss": 34.0381, "step": 13545 }, { "epoch": 322.52537313432833, "grad_norm": 36.06111145019531, "learning_rate": 9.929271708683474e-06, "loss": 33.7978, "step": 13546 }, { "epoch": 322.5492537313433, "grad_norm": 29.741985321044922, "learning_rate": 9.92857142857143e-06, "loss": 34.1062, "step": 13547 }, { "epoch": 322.5731343283582, "grad_norm": 29.04802131652832, "learning_rate": 9.927871148459384e-06, "loss": 32.8579, "step": 13548 }, { "epoch": 322.5970149253731, "grad_norm": 24.02836036682129, "learning_rate": 9.92717086834734e-06, "loss": 33.5527, "step": 13549 }, { "epoch": 322.6208955223881, "grad_norm": 28.645822525024414, "learning_rate": 9.926470588235295e-06, "loss": 32.353, "step": 13550 }, { "epoch": 322.644776119403, "grad_norm": 24.66539192199707, "learning_rate": 9.925770308123251e-06, "loss": 34.2256, "step": 13551 }, { "epoch": 322.6686567164179, "grad_norm": 29.890790939331055, "learning_rate": 9.925070028011206e-06, "loss": 34.1128, "step": 13552 }, { "epoch": 322.6925373134328, "grad_norm": 25.194040298461914, "learning_rate": 9.92436974789916e-06, "loss": 33.6929, "step": 13553 }, { "epoch": 322.7164179104478, "grad_norm": 24.205354690551758, "learning_rate": 9.923669467787116e-06, "loss": 33.0597, "step": 13554 }, { "epoch": 322.7402985074627, "grad_norm": 27.894758224487305, "learning_rate": 9.92296918767507e-06, "loss": 32.5365, "step": 13555 }, { "epoch": 322.7641791044776, "grad_norm": 18.561086654663086, "learning_rate": 9.922268907563027e-06, "loss": 34.658, "step": 13556 }, { "epoch": 322.78805970149256, "grad_norm": 32.595176696777344, "learning_rate": 9.921568627450981e-06, "loss": 33.1968, "step": 13557 }, { "epoch": 322.81194029850747, "grad_norm": 24.86639404296875, "learning_rate": 9.920868347338937e-06, "loss": 33.1497, "step": 13558 }, { "epoch": 322.8358208955224, "grad_norm": 34.50840377807617, "learning_rate": 9.920168067226892e-06, "loss": 33.9656, "step": 13559 }, { "epoch": 322.85970149253734, "grad_norm": 28.974563598632812, "learning_rate": 9.919467787114846e-06, "loss": 34.0148, "step": 13560 }, { "epoch": 322.88358208955225, "grad_norm": 33.857723236083984, "learning_rate": 9.918767507002803e-06, "loss": 32.444, "step": 13561 }, { "epoch": 322.90746268656716, "grad_norm": 24.41487693786621, "learning_rate": 9.918067226890757e-06, "loss": 32.2329, "step": 13562 }, { "epoch": 322.93134328358207, "grad_norm": 34.2807731628418, "learning_rate": 9.917366946778713e-06, "loss": 32.5119, "step": 13563 }, { "epoch": 322.95522388059703, "grad_norm": 29.484289169311523, "learning_rate": 9.916666666666668e-06, "loss": 33.5622, "step": 13564 }, { "epoch": 322.97910447761194, "grad_norm": 34.17534637451172, "learning_rate": 9.915966386554622e-06, "loss": 33.6399, "step": 13565 }, { "epoch": 323.0, "grad_norm": 25.70732879638672, "learning_rate": 9.915266106442578e-06, "loss": 28.7622, "step": 13566 }, { "epoch": 323.0238805970149, "grad_norm": 37.89349365234375, "learning_rate": 9.914565826330533e-06, "loss": 32.6216, "step": 13567 }, { "epoch": 323.0477611940299, "grad_norm": 31.649921417236328, "learning_rate": 9.913865546218489e-06, "loss": 33.972, "step": 13568 }, { "epoch": 323.0716417910448, "grad_norm": 29.75125503540039, "learning_rate": 9.913165266106443e-06, "loss": 34.0283, "step": 13569 }, { "epoch": 323.0955223880597, "grad_norm": 28.45866584777832, "learning_rate": 9.9124649859944e-06, "loss": 34.4295, "step": 13570 }, { "epoch": 323.1194029850746, "grad_norm": 32.22605895996094, "learning_rate": 9.911764705882354e-06, "loss": 33.6811, "step": 13571 }, { "epoch": 323.14328358208957, "grad_norm": 23.550739288330078, "learning_rate": 9.911064425770309e-06, "loss": 33.5812, "step": 13572 }, { "epoch": 323.1671641791045, "grad_norm": 36.9298210144043, "learning_rate": 9.910364145658265e-06, "loss": 32.711, "step": 13573 }, { "epoch": 323.1910447761194, "grad_norm": 30.044294357299805, "learning_rate": 9.90966386554622e-06, "loss": 33.1097, "step": 13574 }, { "epoch": 323.21492537313435, "grad_norm": 30.1539249420166, "learning_rate": 9.908963585434175e-06, "loss": 33.0006, "step": 13575 }, { "epoch": 323.23880597014926, "grad_norm": 29.418554306030273, "learning_rate": 9.90826330532213e-06, "loss": 34.5263, "step": 13576 }, { "epoch": 323.26268656716417, "grad_norm": 29.885913848876953, "learning_rate": 9.907563025210084e-06, "loss": 32.6311, "step": 13577 }, { "epoch": 323.28656716417913, "grad_norm": 27.49765968322754, "learning_rate": 9.90686274509804e-06, "loss": 32.1066, "step": 13578 }, { "epoch": 323.31044776119404, "grad_norm": 34.50300979614258, "learning_rate": 9.906162464985995e-06, "loss": 34.8589, "step": 13579 }, { "epoch": 323.33432835820895, "grad_norm": 30.008867263793945, "learning_rate": 9.905462184873951e-06, "loss": 34.358, "step": 13580 }, { "epoch": 323.35820895522386, "grad_norm": 25.91068458557129, "learning_rate": 9.904761904761906e-06, "loss": 32.0865, "step": 13581 }, { "epoch": 323.3820895522388, "grad_norm": 27.69209098815918, "learning_rate": 9.904061624649862e-06, "loss": 33.5425, "step": 13582 }, { "epoch": 323.40597014925373, "grad_norm": 31.449119567871094, "learning_rate": 9.903361344537816e-06, "loss": 32.9621, "step": 13583 }, { "epoch": 323.42985074626864, "grad_norm": 27.970502853393555, "learning_rate": 9.90266106442577e-06, "loss": 33.5539, "step": 13584 }, { "epoch": 323.4537313432836, "grad_norm": 34.021053314208984, "learning_rate": 9.901960784313727e-06, "loss": 34.3935, "step": 13585 }, { "epoch": 323.4776119402985, "grad_norm": 31.655317306518555, "learning_rate": 9.901260504201681e-06, "loss": 33.3564, "step": 13586 }, { "epoch": 323.5014925373134, "grad_norm": 31.00661849975586, "learning_rate": 9.900560224089638e-06, "loss": 34.4517, "step": 13587 }, { "epoch": 323.52537313432833, "grad_norm": 27.06855010986328, "learning_rate": 9.899859943977592e-06, "loss": 34.3873, "step": 13588 }, { "epoch": 323.5492537313433, "grad_norm": 28.78382682800293, "learning_rate": 9.899159663865548e-06, "loss": 34.0071, "step": 13589 }, { "epoch": 323.5731343283582, "grad_norm": 23.918489456176758, "learning_rate": 9.898459383753503e-06, "loss": 32.5166, "step": 13590 }, { "epoch": 323.5970149253731, "grad_norm": 31.071475982666016, "learning_rate": 9.897759103641457e-06, "loss": 32.4659, "step": 13591 }, { "epoch": 323.6208955223881, "grad_norm": NaN, "learning_rate": 9.897058823529413e-06, "loss": 33.3437, "step": 13592 }, { "epoch": 323.644776119403, "grad_norm": 27.127063751220703, "learning_rate": 9.897058823529413e-06, "loss": 33.7336, "step": 13593 }, { "epoch": 323.6686567164179, "grad_norm": 32.65732955932617, "learning_rate": 9.896358543417368e-06, "loss": 34.4095, "step": 13594 }, { "epoch": 323.6925373134328, "grad_norm": 65.4323501586914, "learning_rate": 9.895658263305324e-06, "loss": 33.8844, "step": 13595 }, { "epoch": 323.7164179104478, "grad_norm": 17.279874801635742, "learning_rate": 9.894957983193278e-06, "loss": 32.8755, "step": 13596 }, { "epoch": 323.7402985074627, "grad_norm": 25.479598999023438, "learning_rate": 9.894257703081233e-06, "loss": 31.697, "step": 13597 }, { "epoch": 323.7641791044776, "grad_norm": 21.34996795654297, "learning_rate": 9.893557422969189e-06, "loss": 33.1568, "step": 13598 }, { "epoch": 323.78805970149256, "grad_norm": 21.393423080444336, "learning_rate": 9.892857142857143e-06, "loss": 33.6244, "step": 13599 }, { "epoch": 323.81194029850747, "grad_norm": 19.496997833251953, "learning_rate": 9.8921568627451e-06, "loss": 33.3905, "step": 13600 }, { "epoch": 323.8358208955224, "grad_norm": 21.00841522216797, "learning_rate": 9.891456582633054e-06, "loss": 34.5918, "step": 13601 }, { "epoch": 323.85970149253734, "grad_norm": 16.694263458251953, "learning_rate": 9.89075630252101e-06, "loss": 33.3754, "step": 13602 }, { "epoch": 323.88358208955225, "grad_norm": 23.65617561340332, "learning_rate": 9.890056022408965e-06, "loss": 34.0507, "step": 13603 }, { "epoch": 323.90746268656716, "grad_norm": 17.38776969909668, "learning_rate": 9.88935574229692e-06, "loss": 32.1662, "step": 13604 }, { "epoch": 323.93134328358207, "grad_norm": 20.21595001220703, "learning_rate": 9.888655462184875e-06, "loss": 32.3216, "step": 13605 }, { "epoch": 323.95522388059703, "grad_norm": 16.476106643676758, "learning_rate": 9.88795518207283e-06, "loss": 31.9163, "step": 13606 }, { "epoch": 323.97910447761194, "grad_norm": 22.86574935913086, "learning_rate": 9.887254901960786e-06, "loss": 33.1187, "step": 13607 }, { "epoch": 324.0, "grad_norm": 16.346389770507812, "learning_rate": 9.88655462184874e-06, "loss": 28.4992, "step": 13608 }, { "epoch": 324.0238805970149, "grad_norm": 19.007625579833984, "learning_rate": 9.885854341736697e-06, "loss": 32.1862, "step": 13609 }, { "epoch": 324.0477611940299, "grad_norm": 19.5588321685791, "learning_rate": 9.885154061624651e-06, "loss": 33.376, "step": 13610 }, { "epoch": 324.0716417910448, "grad_norm": 18.324626922607422, "learning_rate": 9.884453781512606e-06, "loss": 34.0184, "step": 13611 }, { "epoch": 324.0955223880597, "grad_norm": 19.581687927246094, "learning_rate": 9.883753501400562e-06, "loss": 33.5424, "step": 13612 }, { "epoch": 324.1194029850746, "grad_norm": 17.476701736450195, "learning_rate": 9.883053221288516e-06, "loss": 34.3911, "step": 13613 }, { "epoch": 324.14328358208957, "grad_norm": 17.81161880493164, "learning_rate": 9.882352941176472e-06, "loss": 34.0169, "step": 13614 }, { "epoch": 324.1671641791045, "grad_norm": 17.989206314086914, "learning_rate": 9.881652661064427e-06, "loss": 33.5374, "step": 13615 }, { "epoch": 324.1910447761194, "grad_norm": 19.036617279052734, "learning_rate": 9.880952380952381e-06, "loss": 34.3941, "step": 13616 }, { "epoch": 324.21492537313435, "grad_norm": 16.212793350219727, "learning_rate": 9.880252100840338e-06, "loss": 33.5002, "step": 13617 }, { "epoch": 324.23880597014926, "grad_norm": 20.590904235839844, "learning_rate": 9.879551820728292e-06, "loss": 32.86, "step": 13618 }, { "epoch": 324.26268656716417, "grad_norm": 17.457944869995117, "learning_rate": 9.878851540616248e-06, "loss": 32.5841, "step": 13619 }, { "epoch": 324.28656716417913, "grad_norm": 18.962217330932617, "learning_rate": 9.878151260504203e-06, "loss": 33.2863, "step": 13620 }, { "epoch": 324.31044776119404, "grad_norm": 18.682661056518555, "learning_rate": 9.877450980392159e-06, "loss": 32.7218, "step": 13621 }, { "epoch": 324.33432835820895, "grad_norm": 17.554895401000977, "learning_rate": 9.876750700280113e-06, "loss": 33.2864, "step": 13622 }, { "epoch": 324.35820895522386, "grad_norm": 18.783573150634766, "learning_rate": 9.876050420168068e-06, "loss": 32.5933, "step": 13623 }, { "epoch": 324.3820895522388, "grad_norm": 23.039701461791992, "learning_rate": 9.875350140056024e-06, "loss": 33.9732, "step": 13624 }, { "epoch": 324.40597014925373, "grad_norm": 17.68995475769043, "learning_rate": 9.874649859943978e-06, "loss": 34.6603, "step": 13625 }, { "epoch": 324.42985074626864, "grad_norm": 14.835780143737793, "learning_rate": 9.873949579831935e-06, "loss": 32.5128, "step": 13626 }, { "epoch": 324.4537313432836, "grad_norm": 16.030521392822266, "learning_rate": 9.873249299719889e-06, "loss": 33.105, "step": 13627 }, { "epoch": 324.4776119402985, "grad_norm": 17.529977798461914, "learning_rate": 9.872549019607845e-06, "loss": 33.6703, "step": 13628 }, { "epoch": 324.5014925373134, "grad_norm": 16.216899871826172, "learning_rate": 9.8718487394958e-06, "loss": 33.3219, "step": 13629 }, { "epoch": 324.52537313432833, "grad_norm": 18.147930145263672, "learning_rate": 9.871148459383754e-06, "loss": 33.5631, "step": 13630 }, { "epoch": 324.5492537313433, "grad_norm": 15.612064361572266, "learning_rate": 9.87044817927171e-06, "loss": 33.8981, "step": 13631 }, { "epoch": 324.5731343283582, "grad_norm": 18.85101890563965, "learning_rate": 9.869747899159665e-06, "loss": 33.0225, "step": 13632 }, { "epoch": 324.5970149253731, "grad_norm": 18.226970672607422, "learning_rate": 9.869047619047621e-06, "loss": 34.2184, "step": 13633 }, { "epoch": 324.6208955223881, "grad_norm": 15.451777458190918, "learning_rate": 9.868347338935575e-06, "loss": 33.7866, "step": 13634 }, { "epoch": 324.644776119403, "grad_norm": 16.593460083007812, "learning_rate": 9.86764705882353e-06, "loss": 33.2074, "step": 13635 }, { "epoch": 324.6686567164179, "grad_norm": 21.80547523498535, "learning_rate": 9.866946778711486e-06, "loss": 33.3635, "step": 13636 }, { "epoch": 324.6925373134328, "grad_norm": 16.522993087768555, "learning_rate": 9.86624649859944e-06, "loss": 32.4248, "step": 13637 }, { "epoch": 324.7164179104478, "grad_norm": 19.181533813476562, "learning_rate": 9.865546218487397e-06, "loss": 32.4989, "step": 13638 }, { "epoch": 324.7402985074627, "grad_norm": 15.311877250671387, "learning_rate": 9.864845938375351e-06, "loss": 33.9752, "step": 13639 }, { "epoch": 324.7641791044776, "grad_norm": 16.906938552856445, "learning_rate": 9.864145658263307e-06, "loss": 32.8355, "step": 13640 }, { "epoch": 324.78805970149256, "grad_norm": 19.86159324645996, "learning_rate": 9.863445378151262e-06, "loss": 33.4738, "step": 13641 }, { "epoch": 324.81194029850747, "grad_norm": 16.217754364013672, "learning_rate": 9.862745098039216e-06, "loss": 33.3106, "step": 13642 }, { "epoch": 324.8358208955224, "grad_norm": 18.017282485961914, "learning_rate": 9.862044817927172e-06, "loss": 33.6728, "step": 13643 }, { "epoch": 324.85970149253734, "grad_norm": 19.675174713134766, "learning_rate": 9.861344537815127e-06, "loss": 33.3716, "step": 13644 }, { "epoch": 324.88358208955225, "grad_norm": 16.68932342529297, "learning_rate": 9.860644257703083e-06, "loss": 32.9598, "step": 13645 }, { "epoch": 324.90746268656716, "grad_norm": 19.709884643554688, "learning_rate": 9.859943977591038e-06, "loss": 33.3069, "step": 13646 }, { "epoch": 324.93134328358207, "grad_norm": 16.139236450195312, "learning_rate": 9.859243697478994e-06, "loss": 32.6742, "step": 13647 }, { "epoch": 324.95522388059703, "grad_norm": 19.281776428222656, "learning_rate": 9.858543417366948e-06, "loss": 33.8049, "step": 13648 }, { "epoch": 324.97910447761194, "grad_norm": 20.00028419494629, "learning_rate": 9.857843137254903e-06, "loss": 33.6957, "step": 13649 }, { "epoch": 325.0, "grad_norm": 15.398335456848145, "learning_rate": 9.857142857142859e-06, "loss": 28.2842, "step": 13650 }, { "epoch": 325.0238805970149, "grad_norm": 16.986392974853516, "learning_rate": 9.856442577030813e-06, "loss": 32.5149, "step": 13651 }, { "epoch": 325.0477611940299, "grad_norm": 18.45128631591797, "learning_rate": 9.85574229691877e-06, "loss": 33.4055, "step": 13652 }, { "epoch": 325.0716417910448, "grad_norm": 18.596572875976562, "learning_rate": 9.855042016806724e-06, "loss": 33.3133, "step": 13653 }, { "epoch": 325.0955223880597, "grad_norm": 15.175846099853516, "learning_rate": 9.854341736694678e-06, "loss": 34.0683, "step": 13654 }, { "epoch": 325.1194029850746, "grad_norm": NaN, "learning_rate": 9.853641456582635e-06, "loss": 53.2419, "step": 13655 }, { "epoch": 325.14328358208957, "grad_norm": 18.705032348632812, "learning_rate": 9.853641456582635e-06, "loss": 34.0698, "step": 13656 }, { "epoch": 325.1671641791045, "grad_norm": 15.651199340820312, "learning_rate": 9.852941176470589e-06, "loss": 32.5187, "step": 13657 }, { "epoch": 325.1910447761194, "grad_norm": 22.214773178100586, "learning_rate": 9.852240896358545e-06, "loss": 32.1164, "step": 13658 }, { "epoch": 325.21492537313435, "grad_norm": 17.206459045410156, "learning_rate": 9.8515406162465e-06, "loss": 33.3022, "step": 13659 }, { "epoch": 325.23880597014926, "grad_norm": 22.133628845214844, "learning_rate": 9.850840336134456e-06, "loss": 34.2905, "step": 13660 }, { "epoch": 325.26268656716417, "grad_norm": 21.9619140625, "learning_rate": 9.85014005602241e-06, "loss": 32.8755, "step": 13661 }, { "epoch": 325.28656716417913, "grad_norm": 17.542688369750977, "learning_rate": 9.849439775910365e-06, "loss": 33.2138, "step": 13662 }, { "epoch": 325.31044776119404, "grad_norm": 17.905460357666016, "learning_rate": 9.848739495798321e-06, "loss": 33.2268, "step": 13663 }, { "epoch": 325.33432835820895, "grad_norm": 22.583412170410156, "learning_rate": 9.848039215686275e-06, "loss": 33.402, "step": 13664 }, { "epoch": 325.35820895522386, "grad_norm": 16.1328067779541, "learning_rate": 9.847338935574232e-06, "loss": 33.3237, "step": 13665 }, { "epoch": 325.3820895522388, "grad_norm": 18.228809356689453, "learning_rate": 9.846638655462186e-06, "loss": 33.5428, "step": 13666 }, { "epoch": 325.40597014925373, "grad_norm": 31.189430236816406, "learning_rate": 9.84593837535014e-06, "loss": 32.7037, "step": 13667 }, { "epoch": 325.42985074626864, "grad_norm": 17.707508087158203, "learning_rate": 9.845238095238097e-06, "loss": 32.8267, "step": 13668 }, { "epoch": 325.4537313432836, "grad_norm": 41.8672981262207, "learning_rate": 9.844537815126051e-06, "loss": 34.5395, "step": 13669 }, { "epoch": 325.4776119402985, "grad_norm": 36.355098724365234, "learning_rate": 9.843837535014007e-06, "loss": 32.3588, "step": 13670 }, { "epoch": 325.5014925373134, "grad_norm": 33.80672073364258, "learning_rate": 9.843137254901962e-06, "loss": 34.3203, "step": 13671 }, { "epoch": 325.52537313432833, "grad_norm": 33.45317459106445, "learning_rate": 9.842436974789916e-06, "loss": 34.4399, "step": 13672 }, { "epoch": 325.5492537313433, "grad_norm": 30.496381759643555, "learning_rate": 9.84173669467787e-06, "loss": 32.9872, "step": 13673 }, { "epoch": 325.5731343283582, "grad_norm": 26.961637496948242, "learning_rate": 9.841036414565827e-06, "loss": 32.7022, "step": 13674 }, { "epoch": 325.5970149253731, "grad_norm": 38.1742057800293, "learning_rate": 9.840336134453781e-06, "loss": 33.1485, "step": 13675 }, { "epoch": 325.6208955223881, "grad_norm": 28.846521377563477, "learning_rate": 9.839635854341738e-06, "loss": 33.2062, "step": 13676 }, { "epoch": 325.644776119403, "grad_norm": 31.336265563964844, "learning_rate": 9.838935574229692e-06, "loss": 33.4373, "step": 13677 }, { "epoch": 325.6686567164179, "grad_norm": 29.466768264770508, "learning_rate": 9.838235294117647e-06, "loss": 33.34, "step": 13678 }, { "epoch": 325.6925373134328, "grad_norm": 29.28076171875, "learning_rate": 9.837535014005603e-06, "loss": 32.5852, "step": 13679 }, { "epoch": 325.7164179104478, "grad_norm": 25.039804458618164, "learning_rate": 9.836834733893557e-06, "loss": 32.6094, "step": 13680 }, { "epoch": 325.7402985074627, "grad_norm": 34.17839050292969, "learning_rate": 9.836134453781513e-06, "loss": 32.9673, "step": 13681 }, { "epoch": 325.7641791044776, "grad_norm": 32.4091682434082, "learning_rate": 9.835434173669468e-06, "loss": 34.6231, "step": 13682 }, { "epoch": 325.78805970149256, "grad_norm": 33.738441467285156, "learning_rate": 9.834733893557424e-06, "loss": 33.7207, "step": 13683 }, { "epoch": 325.81194029850747, "grad_norm": 32.711727142333984, "learning_rate": 9.834033613445378e-06, "loss": 33.501, "step": 13684 }, { "epoch": 325.8358208955224, "grad_norm": 28.681665420532227, "learning_rate": 9.833333333333333e-06, "loss": 31.8158, "step": 13685 }, { "epoch": 325.85970149253734, "grad_norm": 26.327085494995117, "learning_rate": 9.832633053221289e-06, "loss": 33.5188, "step": 13686 }, { "epoch": 325.88358208955225, "grad_norm": 32.601863861083984, "learning_rate": 9.831932773109244e-06, "loss": 33.1276, "step": 13687 }, { "epoch": 325.90746268656716, "grad_norm": 27.080595016479492, "learning_rate": 9.8312324929972e-06, "loss": 33.992, "step": 13688 }, { "epoch": 325.93134328358207, "grad_norm": 29.46283531188965, "learning_rate": 9.830532212885154e-06, "loss": 33.9901, "step": 13689 }, { "epoch": 325.95522388059703, "grad_norm": 30.93303680419922, "learning_rate": 9.82983193277311e-06, "loss": 33.834, "step": 13690 }, { "epoch": 325.97910447761194, "grad_norm": 30.247838973999023, "learning_rate": 9.829131652661065e-06, "loss": 33.9371, "step": 13691 }, { "epoch": 326.0, "grad_norm": 23.760398864746094, "learning_rate": 9.82843137254902e-06, "loss": 28.7255, "step": 13692 }, { "epoch": 326.0238805970149, "grad_norm": 33.12372589111328, "learning_rate": 9.827731092436975e-06, "loss": 33.5896, "step": 13693 }, { "epoch": 326.0477611940299, "grad_norm": 26.778270721435547, "learning_rate": 9.82703081232493e-06, "loss": 32.5469, "step": 13694 }, { "epoch": 326.0716417910448, "grad_norm": 33.43372344970703, "learning_rate": 9.826330532212886e-06, "loss": 33.4109, "step": 13695 }, { "epoch": 326.0955223880597, "grad_norm": 31.532108306884766, "learning_rate": 9.82563025210084e-06, "loss": 32.6422, "step": 13696 }, { "epoch": 326.1194029850746, "grad_norm": 27.20742416381836, "learning_rate": 9.824929971988795e-06, "loss": 32.4127, "step": 13697 }, { "epoch": 326.14328358208957, "grad_norm": 26.293964385986328, "learning_rate": 9.824229691876751e-06, "loss": 34.7321, "step": 13698 }, { "epoch": 326.1671641791045, "grad_norm": 29.879390716552734, "learning_rate": 9.823529411764706e-06, "loss": 33.0367, "step": 13699 }, { "epoch": 326.1910447761194, "grad_norm": 23.536033630371094, "learning_rate": 9.822829131652662e-06, "loss": 32.8617, "step": 13700 }, { "epoch": 326.21492537313435, "grad_norm": 33.06008529663086, "learning_rate": 9.822128851540616e-06, "loss": 33.4983, "step": 13701 }, { "epoch": 326.23880597014926, "grad_norm": 30.79986000061035, "learning_rate": 9.821428571428573e-06, "loss": 32.9738, "step": 13702 }, { "epoch": 326.26268656716417, "grad_norm": 27.89885139465332, "learning_rate": 9.820728291316527e-06, "loss": 33.0293, "step": 13703 }, { "epoch": 326.28656716417913, "grad_norm": 24.00870132446289, "learning_rate": 9.820028011204481e-06, "loss": 32.3513, "step": 13704 }, { "epoch": 326.31044776119404, "grad_norm": 30.02005386352539, "learning_rate": 9.819327731092438e-06, "loss": 33.1095, "step": 13705 }, { "epoch": 326.33432835820895, "grad_norm": 21.62329864501953, "learning_rate": 9.818627450980392e-06, "loss": 32.3877, "step": 13706 }, { "epoch": 326.35820895522386, "grad_norm": 37.7509651184082, "learning_rate": 9.817927170868348e-06, "loss": 33.7486, "step": 13707 }, { "epoch": 326.3820895522388, "grad_norm": 33.87962341308594, "learning_rate": 9.817226890756303e-06, "loss": 32.0312, "step": 13708 }, { "epoch": 326.40597014925373, "grad_norm": 29.85454559326172, "learning_rate": 9.816526610644259e-06, "loss": 34.6575, "step": 13709 }, { "epoch": 326.42985074626864, "grad_norm": 27.53980255126953, "learning_rate": 9.815826330532213e-06, "loss": 34.0853, "step": 13710 }, { "epoch": 326.4537313432836, "grad_norm": 28.751605987548828, "learning_rate": 9.815126050420168e-06, "loss": 35.1167, "step": 13711 }, { "epoch": 326.4776119402985, "grad_norm": 23.75326919555664, "learning_rate": 9.814425770308124e-06, "loss": 33.5894, "step": 13712 }, { "epoch": 326.5014925373134, "grad_norm": 34.8567008972168, "learning_rate": 9.813725490196078e-06, "loss": 32.5829, "step": 13713 }, { "epoch": 326.52537313432833, "grad_norm": 28.78684425354004, "learning_rate": 9.813025210084035e-06, "loss": 33.82, "step": 13714 }, { "epoch": 326.5492537313433, "grad_norm": 30.194087982177734, "learning_rate": 9.812324929971989e-06, "loss": 34.3857, "step": 13715 }, { "epoch": 326.5731343283582, "grad_norm": 29.297618865966797, "learning_rate": 9.811624649859944e-06, "loss": 33.5312, "step": 13716 }, { "epoch": 326.5970149253731, "grad_norm": 30.788827896118164, "learning_rate": 9.8109243697479e-06, "loss": 32.5731, "step": 13717 }, { "epoch": 326.6208955223881, "grad_norm": 25.011629104614258, "learning_rate": 9.810224089635854e-06, "loss": 31.8591, "step": 13718 }, { "epoch": 326.644776119403, "grad_norm": 33.271392822265625, "learning_rate": 9.80952380952381e-06, "loss": 33.0879, "step": 13719 }, { "epoch": 326.6686567164179, "grad_norm": 30.057992935180664, "learning_rate": 9.808823529411765e-06, "loss": 34.2234, "step": 13720 }, { "epoch": 326.6925373134328, "grad_norm": 31.202980041503906, "learning_rate": 9.808123249299721e-06, "loss": 32.5695, "step": 13721 }, { "epoch": 326.7164179104478, "grad_norm": 27.695194244384766, "learning_rate": 9.807422969187676e-06, "loss": 32.8393, "step": 13722 }, { "epoch": 326.7402985074627, "grad_norm": 26.714426040649414, "learning_rate": 9.80672268907563e-06, "loss": 33.0164, "step": 13723 }, { "epoch": 326.7641791044776, "grad_norm": 26.310474395751953, "learning_rate": 9.806022408963586e-06, "loss": 33.0049, "step": 13724 }, { "epoch": 326.78805970149256, "grad_norm": 27.75501823425293, "learning_rate": 9.80532212885154e-06, "loss": 33.4657, "step": 13725 }, { "epoch": 326.81194029850747, "grad_norm": 29.981639862060547, "learning_rate": 9.804621848739497e-06, "loss": 33.5849, "step": 13726 }, { "epoch": 326.8358208955224, "grad_norm": 30.551393508911133, "learning_rate": 9.803921568627451e-06, "loss": 34.0015, "step": 13727 }, { "epoch": 326.85970149253734, "grad_norm": 29.319795608520508, "learning_rate": 9.803221288515406e-06, "loss": 33.4938, "step": 13728 }, { "epoch": 326.88358208955225, "grad_norm": 27.391109466552734, "learning_rate": 9.802521008403362e-06, "loss": 32.328, "step": 13729 }, { "epoch": 326.90746268656716, "grad_norm": 26.53662109375, "learning_rate": 9.801820728291316e-06, "loss": 34.162, "step": 13730 }, { "epoch": 326.93134328358207, "grad_norm": 29.478504180908203, "learning_rate": 9.801120448179273e-06, "loss": 33.469, "step": 13731 }, { "epoch": 326.95522388059703, "grad_norm": 25.39511489868164, "learning_rate": 9.800420168067227e-06, "loss": 34.1298, "step": 13732 }, { "epoch": 326.97910447761194, "grad_norm": 29.726009368896484, "learning_rate": 9.799719887955183e-06, "loss": 31.8594, "step": 13733 }, { "epoch": 327.0, "grad_norm": 24.417423248291016, "learning_rate": 9.799019607843138e-06, "loss": 29.1477, "step": 13734 }, { "epoch": 327.0238805970149, "grad_norm": 29.06763458251953, "learning_rate": 9.798319327731092e-06, "loss": 33.1099, "step": 13735 }, { "epoch": 327.0477611940299, "grad_norm": 24.732807159423828, "learning_rate": 9.797619047619048e-06, "loss": 32.7671, "step": 13736 }, { "epoch": 327.0716417910448, "grad_norm": 33.24147033691406, "learning_rate": 9.796918767507003e-06, "loss": 33.5277, "step": 13737 }, { "epoch": 327.0955223880597, "grad_norm": 29.07042694091797, "learning_rate": 9.796218487394959e-06, "loss": 32.2064, "step": 13738 }, { "epoch": 327.1194029850746, "grad_norm": 26.222522735595703, "learning_rate": 9.795518207282913e-06, "loss": 32.2448, "step": 13739 }, { "epoch": 327.14328358208957, "grad_norm": 21.09087371826172, "learning_rate": 9.79481792717087e-06, "loss": 32.451, "step": 13740 }, { "epoch": 327.1671641791045, "grad_norm": 31.495807647705078, "learning_rate": 9.794117647058824e-06, "loss": 33.4598, "step": 13741 }, { "epoch": 327.1910447761194, "grad_norm": 24.461978912353516, "learning_rate": 9.793417366946778e-06, "loss": 33.8964, "step": 13742 }, { "epoch": 327.21492537313435, "grad_norm": 32.850738525390625, "learning_rate": 9.792717086834735e-06, "loss": 33.3842, "step": 13743 }, { "epoch": 327.23880597014926, "grad_norm": 30.899707794189453, "learning_rate": 9.792016806722689e-06, "loss": 34.2587, "step": 13744 }, { "epoch": 327.26268656716417, "grad_norm": 26.87644386291504, "learning_rate": 9.791316526610645e-06, "loss": 33.047, "step": 13745 }, { "epoch": 327.28656716417913, "grad_norm": 29.510034561157227, "learning_rate": 9.7906162464986e-06, "loss": 33.3014, "step": 13746 }, { "epoch": 327.31044776119404, "grad_norm": 27.24442481994629, "learning_rate": 9.789915966386554e-06, "loss": 33.0478, "step": 13747 }, { "epoch": 327.33432835820895, "grad_norm": 24.131513595581055, "learning_rate": 9.78921568627451e-06, "loss": 32.6325, "step": 13748 }, { "epoch": 327.35820895522386, "grad_norm": 30.028263092041016, "learning_rate": 9.788515406162465e-06, "loss": 33.5486, "step": 13749 }, { "epoch": 327.3820895522388, "grad_norm": 23.574739456176758, "learning_rate": 9.787815126050421e-06, "loss": 32.6552, "step": 13750 }, { "epoch": 327.40597014925373, "grad_norm": 32.62236022949219, "learning_rate": 9.787114845938376e-06, "loss": 34.3096, "step": 13751 }, { "epoch": 327.42985074626864, "grad_norm": 27.095157623291016, "learning_rate": 9.786414565826332e-06, "loss": 32.5015, "step": 13752 }, { "epoch": 327.4537313432836, "grad_norm": 30.924848556518555, "learning_rate": 9.785714285714286e-06, "loss": 34.0637, "step": 13753 }, { "epoch": 327.4776119402985, "grad_norm": 28.348159790039062, "learning_rate": 9.78501400560224e-06, "loss": 33.0623, "step": 13754 }, { "epoch": 327.5014925373134, "grad_norm": 27.94837188720703, "learning_rate": 9.784313725490197e-06, "loss": 31.8369, "step": 13755 }, { "epoch": 327.52537313432833, "grad_norm": 26.77708625793457, "learning_rate": 9.783613445378151e-06, "loss": 34.8802, "step": 13756 }, { "epoch": 327.5492537313433, "grad_norm": 29.679018020629883, "learning_rate": 9.782913165266107e-06, "loss": 32.9828, "step": 13757 }, { "epoch": 327.5731343283582, "grad_norm": 25.49074363708496, "learning_rate": 9.782212885154062e-06, "loss": 32.5008, "step": 13758 }, { "epoch": 327.5970149253731, "grad_norm": 28.3179988861084, "learning_rate": 9.781512605042018e-06, "loss": 34.165, "step": 13759 }, { "epoch": 327.6208955223881, "grad_norm": 25.559968948364258, "learning_rate": 9.780812324929973e-06, "loss": 32.8886, "step": 13760 }, { "epoch": 327.644776119403, "grad_norm": NaN, "learning_rate": 9.780112044817927e-06, "loss": 39.2756, "step": 13761 }, { "epoch": 327.6686567164179, "grad_norm": 27.406524658203125, "learning_rate": 9.780112044817927e-06, "loss": 32.9339, "step": 13762 }, { "epoch": 327.6925373134328, "grad_norm": 21.1428165435791, "learning_rate": 9.779411764705883e-06, "loss": 33.5437, "step": 13763 }, { "epoch": 327.7164179104478, "grad_norm": 26.37807273864746, "learning_rate": 9.778711484593838e-06, "loss": 33.3502, "step": 13764 }, { "epoch": 327.7402985074627, "grad_norm": 20.410871505737305, "learning_rate": 9.778011204481794e-06, "loss": 32.9569, "step": 13765 }, { "epoch": 327.7641791044776, "grad_norm": 33.719017028808594, "learning_rate": 9.777310924369748e-06, "loss": 33.8595, "step": 13766 }, { "epoch": 327.78805970149256, "grad_norm": 25.57400894165039, "learning_rate": 9.776610644257703e-06, "loss": 33.9709, "step": 13767 }, { "epoch": 327.81194029850747, "grad_norm": 30.30806541442871, "learning_rate": 9.775910364145659e-06, "loss": 34.6261, "step": 13768 }, { "epoch": 327.8358208955224, "grad_norm": 26.39168930053711, "learning_rate": 9.775210084033613e-06, "loss": 31.7733, "step": 13769 }, { "epoch": 327.85970149253734, "grad_norm": 27.448068618774414, "learning_rate": 9.77450980392157e-06, "loss": 34.3574, "step": 13770 }, { "epoch": 327.88358208955225, "grad_norm": 24.3183650970459, "learning_rate": 9.773809523809524e-06, "loss": 33.3622, "step": 13771 }, { "epoch": 327.90746268656716, "grad_norm": 24.722354888916016, "learning_rate": 9.77310924369748e-06, "loss": 33.3757, "step": 13772 }, { "epoch": 327.93134328358207, "grad_norm": 21.480276107788086, "learning_rate": 9.772408963585435e-06, "loss": 33.0721, "step": 13773 }, { "epoch": 327.95522388059703, "grad_norm": 24.6737003326416, "learning_rate": 9.77170868347339e-06, "loss": 33.1352, "step": 13774 }, { "epoch": 327.97910447761194, "grad_norm": 19.086183547973633, "learning_rate": 9.771008403361345e-06, "loss": 33.8907, "step": 13775 }, { "epoch": 328.0, "grad_norm": 21.551876068115234, "learning_rate": 9.7703081232493e-06, "loss": 29.3541, "step": 13776 }, { "epoch": 328.0238805970149, "grad_norm": 20.208740234375, "learning_rate": 9.769607843137256e-06, "loss": 34.1627, "step": 13777 }, { "epoch": 328.0477611940299, "grad_norm": 22.711566925048828, "learning_rate": 9.76890756302521e-06, "loss": 33.5005, "step": 13778 }, { "epoch": 328.0716417910448, "grad_norm": 16.841520309448242, "learning_rate": 9.768207282913167e-06, "loss": 33.1527, "step": 13779 }, { "epoch": 328.0955223880597, "grad_norm": 24.088666915893555, "learning_rate": 9.767507002801121e-06, "loss": 32.2791, "step": 13780 }, { "epoch": 328.1194029850746, "grad_norm": 19.65779685974121, "learning_rate": 9.766806722689076e-06, "loss": 32.6904, "step": 13781 }, { "epoch": 328.14328358208957, "grad_norm": 22.842262268066406, "learning_rate": 9.766106442577032e-06, "loss": 33.4977, "step": 13782 }, { "epoch": 328.1671641791045, "grad_norm": 20.877944946289062, "learning_rate": 9.765406162464986e-06, "loss": 33.1758, "step": 13783 }, { "epoch": 328.1910447761194, "grad_norm": 21.674705505371094, "learning_rate": 9.764705882352942e-06, "loss": 33.8479, "step": 13784 }, { "epoch": 328.21492537313435, "grad_norm": 21.891651153564453, "learning_rate": 9.764005602240897e-06, "loss": 33.2256, "step": 13785 }, { "epoch": 328.23880597014926, "grad_norm": 18.32332992553711, "learning_rate": 9.763305322128851e-06, "loss": 33.4584, "step": 13786 }, { "epoch": 328.26268656716417, "grad_norm": 24.69319725036621, "learning_rate": 9.762605042016807e-06, "loss": 33.13, "step": 13787 }, { "epoch": 328.28656716417913, "grad_norm": 17.938190460205078, "learning_rate": 9.761904761904762e-06, "loss": 32.9584, "step": 13788 }, { "epoch": 328.31044776119404, "grad_norm": 19.446102142333984, "learning_rate": 9.761204481792718e-06, "loss": 33.2534, "step": 13789 }, { "epoch": 328.33432835820895, "grad_norm": 21.508390426635742, "learning_rate": 9.760504201680673e-06, "loss": 33.1961, "step": 13790 }, { "epoch": 328.35820895522386, "grad_norm": 16.572961807250977, "learning_rate": 9.759803921568629e-06, "loss": 34.3795, "step": 13791 }, { "epoch": 328.3820895522388, "grad_norm": 18.777446746826172, "learning_rate": 9.759103641456583e-06, "loss": 33.2926, "step": 13792 }, { "epoch": 328.40597014925373, "grad_norm": 20.581424713134766, "learning_rate": 9.758403361344538e-06, "loss": 33.4043, "step": 13793 }, { "epoch": 328.42985074626864, "grad_norm": 17.72150421142578, "learning_rate": 9.757703081232494e-06, "loss": 32.8018, "step": 13794 }, { "epoch": 328.4537313432836, "grad_norm": 16.94236946105957, "learning_rate": 9.757002801120448e-06, "loss": 32.8077, "step": 13795 }, { "epoch": 328.4776119402985, "grad_norm": 14.381454467773438, "learning_rate": 9.756302521008404e-06, "loss": 33.0152, "step": 13796 }, { "epoch": 328.5014925373134, "grad_norm": 17.40692138671875, "learning_rate": 9.755602240896359e-06, "loss": 33.7176, "step": 13797 }, { "epoch": 328.52537313432833, "grad_norm": 16.10710334777832, "learning_rate": 9.754901960784315e-06, "loss": 32.749, "step": 13798 }, { "epoch": 328.5492537313433, "grad_norm": 18.579635620117188, "learning_rate": 9.75420168067227e-06, "loss": 34.2422, "step": 13799 }, { "epoch": 328.5731343283582, "grad_norm": 20.709930419921875, "learning_rate": 9.753501400560224e-06, "loss": 33.2791, "step": 13800 }, { "epoch": 328.5970149253731, "grad_norm": 15.478633880615234, "learning_rate": 9.75280112044818e-06, "loss": 32.2788, "step": 13801 }, { "epoch": 328.6208955223881, "grad_norm": 25.690916061401367, "learning_rate": 9.752100840336135e-06, "loss": 33.4642, "step": 13802 }, { "epoch": 328.644776119403, "grad_norm": 17.838212966918945, "learning_rate": 9.751400560224091e-06, "loss": 33.502, "step": 13803 }, { "epoch": 328.6686567164179, "grad_norm": 16.468379974365234, "learning_rate": 9.750700280112045e-06, "loss": 32.5036, "step": 13804 }, { "epoch": 328.6925373134328, "grad_norm": 21.25994873046875, "learning_rate": 9.75e-06, "loss": 31.7945, "step": 13805 }, { "epoch": 328.7164179104478, "grad_norm": 15.796037673950195, "learning_rate": 9.749299719887956e-06, "loss": 33.8214, "step": 13806 }, { "epoch": 328.7402985074627, "grad_norm": 25.20906639099121, "learning_rate": 9.74859943977591e-06, "loss": 33.6889, "step": 13807 }, { "epoch": 328.7641791044776, "grad_norm": 20.97492218017578, "learning_rate": 9.747899159663867e-06, "loss": 33.1668, "step": 13808 }, { "epoch": 328.78805970149256, "grad_norm": 24.81602668762207, "learning_rate": 9.747198879551821e-06, "loss": 31.7059, "step": 13809 }, { "epoch": 328.81194029850747, "grad_norm": 22.154918670654297, "learning_rate": 9.746498599439777e-06, "loss": 33.4172, "step": 13810 }, { "epoch": 328.8358208955224, "grad_norm": 21.796457290649414, "learning_rate": 9.745798319327732e-06, "loss": 33.4108, "step": 13811 }, { "epoch": 328.85970149253734, "grad_norm": 23.29953384399414, "learning_rate": 9.745098039215686e-06, "loss": 33.3716, "step": 13812 }, { "epoch": 328.88358208955225, "grad_norm": 18.363006591796875, "learning_rate": 9.744397759103642e-06, "loss": 32.8874, "step": 13813 }, { "epoch": 328.90746268656716, "grad_norm": 21.968074798583984, "learning_rate": 9.743697478991597e-06, "loss": 33.6003, "step": 13814 }, { "epoch": 328.93134328358207, "grad_norm": 23.111059188842773, "learning_rate": 9.742997198879553e-06, "loss": 34.0608, "step": 13815 }, { "epoch": 328.95522388059703, "grad_norm": 16.447967529296875, "learning_rate": 9.742296918767507e-06, "loss": 34.2898, "step": 13816 }, { "epoch": 328.97910447761194, "grad_norm": 26.502540588378906, "learning_rate": 9.741596638655462e-06, "loss": 32.8774, "step": 13817 }, { "epoch": 329.0, "grad_norm": 15.590022087097168, "learning_rate": 9.740896358543418e-06, "loss": 29.0381, "step": 13818 }, { "epoch": 329.0238805970149, "grad_norm": 26.835067749023438, "learning_rate": 9.740196078431373e-06, "loss": 35.393, "step": 13819 }, { "epoch": 329.0477611940299, "grad_norm": 20.022567749023438, "learning_rate": 9.739495798319329e-06, "loss": 31.9097, "step": 13820 }, { "epoch": 329.0716417910448, "grad_norm": 24.32552146911621, "learning_rate": 9.738795518207283e-06, "loss": 33.6466, "step": 13821 }, { "epoch": 329.0955223880597, "grad_norm": 21.088720321655273, "learning_rate": 9.73809523809524e-06, "loss": 34.5795, "step": 13822 }, { "epoch": 329.1194029850746, "grad_norm": 20.32341194152832, "learning_rate": 9.737394957983194e-06, "loss": 33.4106, "step": 13823 }, { "epoch": 329.14328358208957, "grad_norm": 21.68365478515625, "learning_rate": 9.736694677871148e-06, "loss": 33.1667, "step": 13824 }, { "epoch": 329.1671641791045, "grad_norm": 21.892330169677734, "learning_rate": 9.735994397759105e-06, "loss": 32.7454, "step": 13825 }, { "epoch": 329.1910447761194, "grad_norm": 16.508142471313477, "learning_rate": 9.735294117647059e-06, "loss": 33.4753, "step": 13826 }, { "epoch": 329.21492537313435, "grad_norm": 24.337032318115234, "learning_rate": 9.734593837535015e-06, "loss": 32.7196, "step": 13827 }, { "epoch": 329.23880597014926, "grad_norm": 18.94352912902832, "learning_rate": 9.73389355742297e-06, "loss": 33.6667, "step": 13828 }, { "epoch": 329.26268656716417, "grad_norm": 19.301166534423828, "learning_rate": 9.733193277310926e-06, "loss": 32.2252, "step": 13829 }, { "epoch": 329.28656716417913, "grad_norm": 17.05006217956543, "learning_rate": 9.73249299719888e-06, "loss": 31.9598, "step": 13830 }, { "epoch": 329.31044776119404, "grad_norm": 20.985246658325195, "learning_rate": 9.731792717086835e-06, "loss": 33.0741, "step": 13831 }, { "epoch": 329.33432835820895, "grad_norm": 16.112356185913086, "learning_rate": 9.731092436974791e-06, "loss": 33.7621, "step": 13832 }, { "epoch": 329.35820895522386, "grad_norm": 21.292627334594727, "learning_rate": 9.730392156862745e-06, "loss": 33.2529, "step": 13833 }, { "epoch": 329.3820895522388, "grad_norm": 17.180091857910156, "learning_rate": 9.729691876750702e-06, "loss": 33.0345, "step": 13834 }, { "epoch": 329.40597014925373, "grad_norm": 23.38367462158203, "learning_rate": 9.728991596638656e-06, "loss": 32.5008, "step": 13835 }, { "epoch": 329.42985074626864, "grad_norm": 21.075260162353516, "learning_rate": 9.72829131652661e-06, "loss": 34.2434, "step": 13836 }, { "epoch": 329.4537313432836, "grad_norm": 19.69710350036621, "learning_rate": 9.727591036414567e-06, "loss": 32.2472, "step": 13837 }, { "epoch": 329.4776119402985, "grad_norm": 20.674158096313477, "learning_rate": 9.726890756302521e-06, "loss": 33.478, "step": 13838 }, { "epoch": 329.5014925373134, "grad_norm": 18.335329055786133, "learning_rate": 9.726190476190477e-06, "loss": 33.369, "step": 13839 }, { "epoch": 329.52537313432833, "grad_norm": 19.51376724243164, "learning_rate": 9.725490196078432e-06, "loss": 32.905, "step": 13840 }, { "epoch": 329.5492537313433, "grad_norm": 15.209076881408691, "learning_rate": 9.724789915966388e-06, "loss": 33.9427, "step": 13841 }, { "epoch": 329.5731343283582, "grad_norm": 15.64625072479248, "learning_rate": 9.724089635854342e-06, "loss": 32.1097, "step": 13842 }, { "epoch": 329.5970149253731, "grad_norm": 16.06794548034668, "learning_rate": 9.723389355742297e-06, "loss": 33.6461, "step": 13843 }, { "epoch": 329.6208955223881, "grad_norm": 19.47937774658203, "learning_rate": 9.722689075630253e-06, "loss": 33.8203, "step": 13844 }, { "epoch": 329.644776119403, "grad_norm": 17.933704376220703, "learning_rate": 9.721988795518208e-06, "loss": 32.5787, "step": 13845 }, { "epoch": 329.6686567164179, "grad_norm": 16.346975326538086, "learning_rate": 9.721288515406164e-06, "loss": 32.7298, "step": 13846 }, { "epoch": 329.6925373134328, "grad_norm": 16.253942489624023, "learning_rate": 9.720588235294118e-06, "loss": 34.1091, "step": 13847 }, { "epoch": 329.7164179104478, "grad_norm": 14.6922607421875, "learning_rate": 9.719887955182074e-06, "loss": 33.5265, "step": 13848 }, { "epoch": 329.7402985074627, "grad_norm": 17.18062973022461, "learning_rate": 9.719187675070029e-06, "loss": 32.9379, "step": 13849 }, { "epoch": 329.7641791044776, "grad_norm": 16.036989212036133, "learning_rate": 9.718487394957983e-06, "loss": 33.8745, "step": 13850 }, { "epoch": 329.78805970149256, "grad_norm": 17.42502784729004, "learning_rate": 9.71778711484594e-06, "loss": 33.2884, "step": 13851 }, { "epoch": 329.81194029850747, "grad_norm": 15.50560474395752, "learning_rate": 9.717086834733894e-06, "loss": 33.3494, "step": 13852 }, { "epoch": 329.8358208955224, "grad_norm": 17.172216415405273, "learning_rate": 9.71638655462185e-06, "loss": 32.7325, "step": 13853 }, { "epoch": 329.85970149253734, "grad_norm": NaN, "learning_rate": 9.715686274509805e-06, "loss": 50.0166, "step": 13854 }, { "epoch": 329.88358208955225, "grad_norm": 17.617584228515625, "learning_rate": 9.715686274509805e-06, "loss": 33.122, "step": 13855 }, { "epoch": 329.90746268656716, "grad_norm": 14.241527557373047, "learning_rate": 9.714985994397759e-06, "loss": 33.2442, "step": 13856 }, { "epoch": 329.93134328358207, "grad_norm": 14.549175262451172, "learning_rate": 9.714285714285715e-06, "loss": 33.4723, "step": 13857 }, { "epoch": 329.95522388059703, "grad_norm": 19.67365837097168, "learning_rate": 9.71358543417367e-06, "loss": 31.7926, "step": 13858 }, { "epoch": 329.97910447761194, "grad_norm": 19.864776611328125, "learning_rate": 9.712885154061626e-06, "loss": 33.3995, "step": 13859 }, { "epoch": 330.0, "grad_norm": 14.34056282043457, "learning_rate": 9.71218487394958e-06, "loss": 28.802, "step": 13860 }, { "epoch": 330.0238805970149, "grad_norm": 14.028547286987305, "learning_rate": 9.711484593837536e-06, "loss": 31.7811, "step": 13861 }, { "epoch": 330.0477611940299, "grad_norm": 15.838154792785645, "learning_rate": 9.710784313725491e-06, "loss": 33.3897, "step": 13862 }, { "epoch": 330.0716417910448, "grad_norm": 15.39886474609375, "learning_rate": 9.710084033613445e-06, "loss": 33.2612, "step": 13863 }, { "epoch": 330.0955223880597, "grad_norm": 15.418465614318848, "learning_rate": 9.709383753501402e-06, "loss": 33.7369, "step": 13864 }, { "epoch": 330.1194029850746, "grad_norm": 16.513729095458984, "learning_rate": 9.708683473389356e-06, "loss": 32.0597, "step": 13865 }, { "epoch": 330.14328358208957, "grad_norm": 16.598331451416016, "learning_rate": 9.707983193277312e-06, "loss": 33.886, "step": 13866 }, { "epoch": 330.1671641791045, "grad_norm": 19.797420501708984, "learning_rate": 9.707282913165267e-06, "loss": 32.8522, "step": 13867 }, { "epoch": 330.1910447761194, "grad_norm": 20.554298400878906, "learning_rate": 9.706582633053223e-06, "loss": 33.5831, "step": 13868 }, { "epoch": 330.21492537313435, "grad_norm": 17.538660049438477, "learning_rate": 9.705882352941177e-06, "loss": 33.5932, "step": 13869 }, { "epoch": 330.23880597014926, "grad_norm": 20.958953857421875, "learning_rate": 9.705182072829132e-06, "loss": 33.0203, "step": 13870 }, { "epoch": 330.26268656716417, "grad_norm": 22.544586181640625, "learning_rate": 9.704481792717088e-06, "loss": 33.6431, "step": 13871 }, { "epoch": 330.28656716417913, "grad_norm": 14.928828239440918, "learning_rate": 9.703781512605042e-06, "loss": 32.5074, "step": 13872 }, { "epoch": 330.31044776119404, "grad_norm": 28.557313919067383, "learning_rate": 9.703081232492999e-06, "loss": 33.2598, "step": 13873 }, { "epoch": 330.33432835820895, "grad_norm": 17.667194366455078, "learning_rate": 9.702380952380953e-06, "loss": 33.1412, "step": 13874 }, { "epoch": 330.35820895522386, "grad_norm": 26.1396541595459, "learning_rate": 9.701680672268908e-06, "loss": 33.7127, "step": 13875 }, { "epoch": 330.3820895522388, "grad_norm": 22.766267776489258, "learning_rate": 9.700980392156864e-06, "loss": 32.7905, "step": 13876 }, { "epoch": 330.40597014925373, "grad_norm": 16.63962745666504, "learning_rate": 9.700280112044818e-06, "loss": 33.6493, "step": 13877 }, { "epoch": 330.42985074626864, "grad_norm": 26.941375732421875, "learning_rate": 9.699579831932774e-06, "loss": 33.0094, "step": 13878 }, { "epoch": 330.4537313432836, "grad_norm": 20.222993850708008, "learning_rate": 9.698879551820729e-06, "loss": 32.7451, "step": 13879 }, { "epoch": 330.4776119402985, "grad_norm": 21.765905380249023, "learning_rate": 9.698179271708685e-06, "loss": 33.3798, "step": 13880 }, { "epoch": 330.5014925373134, "grad_norm": 22.75497817993164, "learning_rate": 9.69747899159664e-06, "loss": 33.6727, "step": 13881 }, { "epoch": 330.52537313432833, "grad_norm": 16.854711532592773, "learning_rate": 9.696778711484594e-06, "loss": 33.3058, "step": 13882 }, { "epoch": 330.5492537313433, "grad_norm": 24.363628387451172, "learning_rate": 9.69607843137255e-06, "loss": 33.4192, "step": 13883 }, { "epoch": 330.5731343283582, "grad_norm": 20.040849685668945, "learning_rate": 9.695378151260505e-06, "loss": 33.0446, "step": 13884 }, { "epoch": 330.5970149253731, "grad_norm": 17.442806243896484, "learning_rate": 9.69467787114846e-06, "loss": 32.7335, "step": 13885 }, { "epoch": 330.6208955223881, "grad_norm": 19.68790626525879, "learning_rate": 9.693977591036415e-06, "loss": 31.7422, "step": 13886 }, { "epoch": 330.644776119403, "grad_norm": 20.931894302368164, "learning_rate": 9.693277310924371e-06, "loss": 33.383, "step": 13887 }, { "epoch": 330.6686567164179, "grad_norm": 18.293874740600586, "learning_rate": 9.692577030812326e-06, "loss": 33.3228, "step": 13888 }, { "epoch": 330.6925373134328, "grad_norm": 23.76629066467285, "learning_rate": 9.69187675070028e-06, "loss": 33.1281, "step": 13889 }, { "epoch": 330.7164179104478, "grad_norm": 22.490915298461914, "learning_rate": 9.691176470588236e-06, "loss": 34.066, "step": 13890 }, { "epoch": 330.7402985074627, "grad_norm": 17.901865005493164, "learning_rate": 9.690476190476191e-06, "loss": 32.9509, "step": 13891 }, { "epoch": 330.7641791044776, "grad_norm": 27.096599578857422, "learning_rate": 9.689775910364147e-06, "loss": 32.5744, "step": 13892 }, { "epoch": 330.78805970149256, "grad_norm": 18.732515335083008, "learning_rate": 9.689075630252102e-06, "loss": 32.6709, "step": 13893 }, { "epoch": 330.81194029850747, "grad_norm": 30.85783576965332, "learning_rate": 9.688375350140056e-06, "loss": 32.5919, "step": 13894 }, { "epoch": 330.8358208955224, "grad_norm": 18.67921257019043, "learning_rate": 9.687675070028012e-06, "loss": 32.8729, "step": 13895 }, { "epoch": 330.85970149253734, "grad_norm": 32.25803756713867, "learning_rate": 9.686974789915967e-06, "loss": 33.9697, "step": 13896 }, { "epoch": 330.88358208955225, "grad_norm": 16.962650299072266, "learning_rate": 9.686274509803923e-06, "loss": 33.5899, "step": 13897 }, { "epoch": 330.90746268656716, "grad_norm": 38.310264587402344, "learning_rate": 9.685574229691877e-06, "loss": 34.1434, "step": 13898 }, { "epoch": 330.93134328358207, "grad_norm": 25.830568313598633, "learning_rate": 9.684873949579834e-06, "loss": 33.5231, "step": 13899 }, { "epoch": 330.95522388059703, "grad_norm": 32.52248001098633, "learning_rate": 9.684173669467788e-06, "loss": 32.9083, "step": 13900 }, { "epoch": 330.97910447761194, "grad_norm": 28.310823440551758, "learning_rate": 9.683473389355742e-06, "loss": 32.5874, "step": 13901 }, { "epoch": 331.0, "grad_norm": 28.601337432861328, "learning_rate": 9.682773109243699e-06, "loss": 29.7366, "step": 13902 }, { "epoch": 331.0238805970149, "grad_norm": 30.404438018798828, "learning_rate": 9.682072829131653e-06, "loss": 34.2454, "step": 13903 }, { "epoch": 331.0477611940299, "grad_norm": 28.96394157409668, "learning_rate": 9.68137254901961e-06, "loss": 32.3148, "step": 13904 }, { "epoch": 331.0716417910448, "grad_norm": 25.191787719726562, "learning_rate": 9.680672268907564e-06, "loss": 33.5151, "step": 13905 }, { "epoch": 331.0955223880597, "grad_norm": 31.659870147705078, "learning_rate": 9.679971988795518e-06, "loss": 33.4593, "step": 13906 }, { "epoch": 331.1194029850746, "grad_norm": 23.066295623779297, "learning_rate": 9.679271708683474e-06, "loss": 33.3316, "step": 13907 }, { "epoch": 331.14328358208957, "grad_norm": 26.33380699157715, "learning_rate": 9.678571428571429e-06, "loss": 33.7243, "step": 13908 }, { "epoch": 331.1671641791045, "grad_norm": 23.34626579284668, "learning_rate": 9.677871148459385e-06, "loss": 34.1913, "step": 13909 }, { "epoch": 331.1910447761194, "grad_norm": 28.661367416381836, "learning_rate": 9.67717086834734e-06, "loss": 32.7433, "step": 13910 }, { "epoch": 331.21492537313435, "grad_norm": 24.23579216003418, "learning_rate": 9.676470588235296e-06, "loss": 33.5343, "step": 13911 }, { "epoch": 331.23880597014926, "grad_norm": 25.383020401000977, "learning_rate": 9.67577030812325e-06, "loss": 33.8556, "step": 13912 }, { "epoch": 331.26268656716417, "grad_norm": 26.202468872070312, "learning_rate": 9.675070028011205e-06, "loss": 33.7905, "step": 13913 }, { "epoch": 331.28656716417913, "grad_norm": 17.759017944335938, "learning_rate": 9.67436974789916e-06, "loss": 32.7533, "step": 13914 }, { "epoch": 331.31044776119404, "grad_norm": 27.195541381835938, "learning_rate": 9.673669467787115e-06, "loss": 33.0935, "step": 13915 }, { "epoch": 331.33432835820895, "grad_norm": 20.963619232177734, "learning_rate": 9.672969187675071e-06, "loss": 33.7381, "step": 13916 }, { "epoch": 331.35820895522386, "grad_norm": 28.524673461914062, "learning_rate": 9.672268907563026e-06, "loss": 34.3043, "step": 13917 }, { "epoch": 331.3820895522388, "grad_norm": 23.942951202392578, "learning_rate": 9.671568627450982e-06, "loss": 33.1677, "step": 13918 }, { "epoch": 331.40597014925373, "grad_norm": 20.868167877197266, "learning_rate": 9.670868347338937e-06, "loss": 31.3524, "step": 13919 }, { "epoch": 331.42985074626864, "grad_norm": 22.58417510986328, "learning_rate": 9.670168067226891e-06, "loss": 32.643, "step": 13920 }, { "epoch": 331.4537313432836, "grad_norm": 17.47084617614746, "learning_rate": 9.669467787114847e-06, "loss": 34.4185, "step": 13921 }, { "epoch": 331.4776119402985, "grad_norm": 25.291521072387695, "learning_rate": 9.668767507002802e-06, "loss": 32.9547, "step": 13922 }, { "epoch": 331.5014925373134, "grad_norm": 18.16826820373535, "learning_rate": 9.668067226890758e-06, "loss": 32.4785, "step": 13923 }, { "epoch": 331.52537313432833, "grad_norm": 22.666799545288086, "learning_rate": 9.667366946778712e-06, "loss": 33.1056, "step": 13924 }, { "epoch": 331.5492537313433, "grad_norm": 23.67050552368164, "learning_rate": 9.666666666666667e-06, "loss": 33.1822, "step": 13925 }, { "epoch": 331.5731343283582, "grad_norm": 15.939326286315918, "learning_rate": 9.665966386554623e-06, "loss": 33.1894, "step": 13926 }, { "epoch": 331.5970149253731, "grad_norm": 21.62961769104004, "learning_rate": 9.665266106442577e-06, "loss": 33.0778, "step": 13927 }, { "epoch": 331.6208955223881, "grad_norm": 18.050323486328125, "learning_rate": 9.664565826330534e-06, "loss": 31.8663, "step": 13928 }, { "epoch": 331.644776119403, "grad_norm": 20.63920783996582, "learning_rate": 9.663865546218488e-06, "loss": 32.4513, "step": 13929 }, { "epoch": 331.6686567164179, "grad_norm": 22.199052810668945, "learning_rate": 9.663165266106444e-06, "loss": 32.0772, "step": 13930 }, { "epoch": 331.6925373134328, "grad_norm": 15.793177604675293, "learning_rate": 9.662464985994399e-06, "loss": 33.7744, "step": 13931 }, { "epoch": 331.7164179104478, "grad_norm": 20.595054626464844, "learning_rate": 9.661764705882353e-06, "loss": 32.2003, "step": 13932 }, { "epoch": 331.7402985074627, "grad_norm": 17.664241790771484, "learning_rate": 9.66106442577031e-06, "loss": 33.8355, "step": 13933 }, { "epoch": 331.7641791044776, "grad_norm": 22.56214141845703, "learning_rate": 9.660364145658264e-06, "loss": 33.6375, "step": 13934 }, { "epoch": 331.78805970149256, "grad_norm": 16.93956756591797, "learning_rate": 9.65966386554622e-06, "loss": 32.1945, "step": 13935 }, { "epoch": 331.81194029850747, "grad_norm": 16.997602462768555, "learning_rate": 9.658963585434174e-06, "loss": 34.3467, "step": 13936 }, { "epoch": 331.8358208955224, "grad_norm": 17.921266555786133, "learning_rate": 9.65826330532213e-06, "loss": 33.2735, "step": 13937 }, { "epoch": 331.85970149253734, "grad_norm": 14.891024589538574, "learning_rate": 9.657563025210085e-06, "loss": 31.8917, "step": 13938 }, { "epoch": 331.88358208955225, "grad_norm": 20.693897247314453, "learning_rate": 9.65686274509804e-06, "loss": 32.9931, "step": 13939 }, { "epoch": 331.90746268656716, "grad_norm": 18.60137176513672, "learning_rate": 9.656162464985996e-06, "loss": 34.6641, "step": 13940 }, { "epoch": 331.93134328358207, "grad_norm": 20.24396514892578, "learning_rate": 9.65546218487395e-06, "loss": 33.1107, "step": 13941 }, { "epoch": 331.95522388059703, "grad_norm": 16.75682258605957, "learning_rate": 9.654761904761906e-06, "loss": 31.5462, "step": 13942 }, { "epoch": 331.97910447761194, "grad_norm": 15.7743501663208, "learning_rate": 9.65406162464986e-06, "loss": 33.2106, "step": 13943 }, { "epoch": 332.0, "grad_norm": 16.924671173095703, "learning_rate": 9.653361344537815e-06, "loss": 27.6228, "step": 13944 }, { "epoch": 332.0238805970149, "grad_norm": 16.688642501831055, "learning_rate": 9.652661064425771e-06, "loss": 33.0291, "step": 13945 }, { "epoch": 332.0477611940299, "grad_norm": 19.363203048706055, "learning_rate": 9.651960784313726e-06, "loss": 33.4778, "step": 13946 }, { "epoch": 332.0716417910448, "grad_norm": 17.70576286315918, "learning_rate": 9.651260504201682e-06, "loss": 33.9613, "step": 13947 }, { "epoch": 332.0955223880597, "grad_norm": 21.67820930480957, "learning_rate": 9.650560224089637e-06, "loss": 33.6494, "step": 13948 }, { "epoch": 332.1194029850746, "grad_norm": 24.726451873779297, "learning_rate": 9.649859943977593e-06, "loss": 32.8248, "step": 13949 }, { "epoch": 332.14328358208957, "grad_norm": 15.790563583374023, "learning_rate": 9.649159663865547e-06, "loss": 32.1063, "step": 13950 }, { "epoch": 332.1671641791045, "grad_norm": 19.399120330810547, "learning_rate": 9.648459383753502e-06, "loss": 32.1274, "step": 13951 }, { "epoch": 332.1910447761194, "grad_norm": 24.268129348754883, "learning_rate": 9.647759103641458e-06, "loss": 32.1004, "step": 13952 }, { "epoch": 332.21492537313435, "grad_norm": 16.836997985839844, "learning_rate": 9.647058823529412e-06, "loss": 32.8896, "step": 13953 }, { "epoch": 332.23880597014926, "grad_norm": 21.344093322753906, "learning_rate": 9.646358543417368e-06, "loss": 32.7662, "step": 13954 }, { "epoch": 332.26268656716417, "grad_norm": 16.73006820678711, "learning_rate": 9.645658263305323e-06, "loss": 33.0612, "step": 13955 }, { "epoch": 332.28656716417913, "grad_norm": 19.402740478515625, "learning_rate": 9.644957983193279e-06, "loss": 32.4261, "step": 13956 }, { "epoch": 332.31044776119404, "grad_norm": 17.733530044555664, "learning_rate": 9.644257703081234e-06, "loss": 33.215, "step": 13957 }, { "epoch": 332.33432835820895, "grad_norm": 16.590065002441406, "learning_rate": 9.643557422969188e-06, "loss": 33.7284, "step": 13958 }, { "epoch": 332.35820895522386, "grad_norm": 20.320560455322266, "learning_rate": 9.642857142857144e-06, "loss": 33.1951, "step": 13959 }, { "epoch": 332.3820895522388, "grad_norm": 15.218728065490723, "learning_rate": 9.642156862745099e-06, "loss": 33.8555, "step": 13960 }, { "epoch": 332.40597014925373, "grad_norm": 20.751352310180664, "learning_rate": 9.641456582633055e-06, "loss": 33.3031, "step": 13961 }, { "epoch": 332.42985074626864, "grad_norm": 16.567758560180664, "learning_rate": 9.64075630252101e-06, "loss": 33.3654, "step": 13962 }, { "epoch": 332.4537313432836, "grad_norm": 18.319110870361328, "learning_rate": 9.640056022408964e-06, "loss": 32.4031, "step": 13963 }, { "epoch": 332.4776119402985, "grad_norm": 15.850713729858398, "learning_rate": 9.63935574229692e-06, "loss": 33.133, "step": 13964 }, { "epoch": 332.5014925373134, "grad_norm": 19.366064071655273, "learning_rate": 9.638655462184874e-06, "loss": 33.4396, "step": 13965 }, { "epoch": 332.52537313432833, "grad_norm": 15.888068199157715, "learning_rate": 9.63795518207283e-06, "loss": 33.0262, "step": 13966 }, { "epoch": 332.5492537313433, "grad_norm": 18.64305305480957, "learning_rate": 9.637254901960785e-06, "loss": 32.2376, "step": 13967 }, { "epoch": 332.5731343283582, "grad_norm": 17.76449203491211, "learning_rate": 9.636554621848741e-06, "loss": 33.7193, "step": 13968 }, { "epoch": 332.5970149253731, "grad_norm": 16.376487731933594, "learning_rate": 9.635854341736696e-06, "loss": 33.5743, "step": 13969 }, { "epoch": 332.6208955223881, "grad_norm": 15.979970932006836, "learning_rate": 9.63515406162465e-06, "loss": 33.1573, "step": 13970 }, { "epoch": 332.644776119403, "grad_norm": 19.043180465698242, "learning_rate": 9.634453781512606e-06, "loss": 32.1751, "step": 13971 }, { "epoch": 332.6686567164179, "grad_norm": 20.143051147460938, "learning_rate": 9.63375350140056e-06, "loss": 32.593, "step": 13972 }, { "epoch": 332.6925373134328, "grad_norm": 18.239852905273438, "learning_rate": 9.633053221288517e-06, "loss": 33.3211, "step": 13973 }, { "epoch": 332.7164179104478, "grad_norm": 14.168808937072754, "learning_rate": 9.632352941176471e-06, "loss": 32.7175, "step": 13974 }, { "epoch": 332.7402985074627, "grad_norm": 14.109582901000977, "learning_rate": 9.631652661064426e-06, "loss": 33.0436, "step": 13975 }, { "epoch": 332.7641791044776, "grad_norm": 16.472972869873047, "learning_rate": 9.630952380952382e-06, "loss": 33.6842, "step": 13976 }, { "epoch": 332.78805970149256, "grad_norm": 18.264995574951172, "learning_rate": 9.630252100840337e-06, "loss": 33.4841, "step": 13977 }, { "epoch": 332.81194029850747, "grad_norm": 15.346653938293457, "learning_rate": 9.629551820728293e-06, "loss": 33.1951, "step": 13978 }, { "epoch": 332.8358208955224, "grad_norm": 16.865934371948242, "learning_rate": 9.628851540616247e-06, "loss": 33.9217, "step": 13979 }, { "epoch": 332.85970149253734, "grad_norm": 18.869901657104492, "learning_rate": 9.628151260504203e-06, "loss": 32.4363, "step": 13980 }, { "epoch": 332.88358208955225, "grad_norm": 23.717571258544922, "learning_rate": 9.627450980392158e-06, "loss": 33.8844, "step": 13981 }, { "epoch": 332.90746268656716, "grad_norm": 15.471491813659668, "learning_rate": 9.626750700280112e-06, "loss": 33.0996, "step": 13982 }, { "epoch": 332.93134328358207, "grad_norm": 16.660005569458008, "learning_rate": 9.626050420168068e-06, "loss": 33.1315, "step": 13983 }, { "epoch": 332.95522388059703, "grad_norm": 17.334712982177734, "learning_rate": 9.625350140056023e-06, "loss": 32.7679, "step": 13984 }, { "epoch": 332.97910447761194, "grad_norm": 22.827878952026367, "learning_rate": 9.624649859943979e-06, "loss": 33.2822, "step": 13985 }, { "epoch": 333.0, "grad_norm": 14.875100135803223, "learning_rate": 9.623949579831934e-06, "loss": 29.5673, "step": 13986 }, { "epoch": 333.0238805970149, "grad_norm": 22.145021438598633, "learning_rate": 9.62324929971989e-06, "loss": 31.9636, "step": 13987 }, { "epoch": 333.0477611940299, "grad_norm": 19.351001739501953, "learning_rate": 9.622549019607844e-06, "loss": 32.8588, "step": 13988 }, { "epoch": 333.0716417910448, "grad_norm": 18.321104049682617, "learning_rate": 9.621848739495799e-06, "loss": 33.3639, "step": 13989 }, { "epoch": 333.0955223880597, "grad_norm": 20.317712783813477, "learning_rate": 9.621148459383755e-06, "loss": 32.5452, "step": 13990 }, { "epoch": 333.1194029850746, "grad_norm": 23.717632293701172, "learning_rate": 9.62044817927171e-06, "loss": 33.1989, "step": 13991 }, { "epoch": 333.14328358208957, "grad_norm": 15.063462257385254, "learning_rate": 9.619747899159665e-06, "loss": 32.3719, "step": 13992 }, { "epoch": 333.1671641791045, "grad_norm": 18.103557586669922, "learning_rate": 9.61904761904762e-06, "loss": 33.6221, "step": 13993 }, { "epoch": 333.1910447761194, "grad_norm": 19.14454460144043, "learning_rate": 9.618347338935574e-06, "loss": 33.2016, "step": 13994 }, { "epoch": 333.21492537313435, "grad_norm": 18.08968734741211, "learning_rate": 9.61764705882353e-06, "loss": 33.3974, "step": 13995 }, { "epoch": 333.23880597014926, "grad_norm": 15.61563777923584, "learning_rate": 9.616946778711485e-06, "loss": 32.0759, "step": 13996 }, { "epoch": 333.26268656716417, "grad_norm": 16.365129470825195, "learning_rate": 9.616246498599441e-06, "loss": 33.7363, "step": 13997 }, { "epoch": 333.28656716417913, "grad_norm": 16.383771896362305, "learning_rate": 9.615546218487396e-06, "loss": 33.9902, "step": 13998 }, { "epoch": 333.31044776119404, "grad_norm": 18.144445419311523, "learning_rate": 9.614845938375352e-06, "loss": 32.1076, "step": 13999 }, { "epoch": 333.33432835820895, "grad_norm": 15.468334197998047, "learning_rate": 9.614145658263306e-06, "loss": 32.1578, "step": 14000 }, { "epoch": 333.35820895522386, "grad_norm": 16.751747131347656, "learning_rate": 9.61344537815126e-06, "loss": 33.2815, "step": 14001 }, { "epoch": 333.3820895522388, "grad_norm": 19.470293045043945, "learning_rate": 9.612745098039217e-06, "loss": 33.7582, "step": 14002 }, { "epoch": 333.40597014925373, "grad_norm": 16.38135528564453, "learning_rate": 9.612044817927171e-06, "loss": 33.5953, "step": 14003 }, { "epoch": 333.42985074626864, "grad_norm": 20.763050079345703, "learning_rate": 9.611344537815128e-06, "loss": 32.5819, "step": 14004 }, { "epoch": 333.4537313432836, "grad_norm": 16.893936157226562, "learning_rate": 9.610644257703082e-06, "loss": 33.2231, "step": 14005 }, { "epoch": 333.4776119402985, "grad_norm": 15.477121353149414, "learning_rate": 9.609943977591038e-06, "loss": 32.3317, "step": 14006 }, { "epoch": 333.5014925373134, "grad_norm": 16.337400436401367, "learning_rate": 9.609243697478993e-06, "loss": 33.1102, "step": 14007 }, { "epoch": 333.52537313432833, "grad_norm": 19.2496337890625, "learning_rate": 9.608543417366947e-06, "loss": 33.1967, "step": 14008 }, { "epoch": 333.5492537313433, "grad_norm": 18.249853134155273, "learning_rate": 9.607843137254903e-06, "loss": 33.4722, "step": 14009 }, { "epoch": 333.5731343283582, "grad_norm": 16.634292602539062, "learning_rate": 9.607142857142858e-06, "loss": 33.5561, "step": 14010 }, { "epoch": 333.5970149253731, "grad_norm": 18.253963470458984, "learning_rate": 9.606442577030814e-06, "loss": 34.2951, "step": 14011 }, { "epoch": 333.6208955223881, "grad_norm": 17.239595413208008, "learning_rate": 9.605742296918768e-06, "loss": 32.8808, "step": 14012 }, { "epoch": 333.644776119403, "grad_norm": 24.83218002319336, "learning_rate": 9.605042016806723e-06, "loss": 33.9055, "step": 14013 }, { "epoch": 333.6686567164179, "grad_norm": 17.564966201782227, "learning_rate": 9.604341736694679e-06, "loss": 32.2637, "step": 14014 }, { "epoch": 333.6925373134328, "grad_norm": 15.301651954650879, "learning_rate": 9.603641456582634e-06, "loss": 34.7816, "step": 14015 }, { "epoch": 333.7164179104478, "grad_norm": 14.405645370483398, "learning_rate": 9.60294117647059e-06, "loss": 32.6637, "step": 14016 }, { "epoch": 333.7402985074627, "grad_norm": 19.288917541503906, "learning_rate": 9.602240896358544e-06, "loss": 33.4437, "step": 14017 }, { "epoch": 333.7641791044776, "grad_norm": 21.326412200927734, "learning_rate": 9.6015406162465e-06, "loss": 32.9978, "step": 14018 }, { "epoch": 333.78805970149256, "grad_norm": 14.843213081359863, "learning_rate": 9.600840336134455e-06, "loss": 33.1911, "step": 14019 }, { "epoch": 333.81194029850747, "grad_norm": 15.07947826385498, "learning_rate": 9.60014005602241e-06, "loss": 33.2798, "step": 14020 }, { "epoch": 333.8358208955224, "grad_norm": 19.959087371826172, "learning_rate": 9.599439775910366e-06, "loss": 33.4415, "step": 14021 }, { "epoch": 333.85970149253734, "grad_norm": 13.796445846557617, "learning_rate": 9.59873949579832e-06, "loss": 32.0115, "step": 14022 }, { "epoch": 333.88358208955225, "grad_norm": 21.80230712890625, "learning_rate": 9.598039215686276e-06, "loss": 33.4, "step": 14023 }, { "epoch": 333.90746268656716, "grad_norm": 17.446523666381836, "learning_rate": 9.59733893557423e-06, "loss": 32.1369, "step": 14024 }, { "epoch": 333.93134328358207, "grad_norm": 18.63462257385254, "learning_rate": 9.596638655462187e-06, "loss": 32.3353, "step": 14025 }, { "epoch": 333.95522388059703, "grad_norm": 17.132659912109375, "learning_rate": 9.595938375350141e-06, "loss": 32.9065, "step": 14026 }, { "epoch": 333.97910447761194, "grad_norm": 15.178614616394043, "learning_rate": 9.595238095238096e-06, "loss": 32.0331, "step": 14027 }, { "epoch": 334.0, "grad_norm": 17.505582809448242, "learning_rate": 9.594537815126052e-06, "loss": 29.6946, "step": 14028 }, { "epoch": 334.0238805970149, "grad_norm": 15.85994815826416, "learning_rate": 9.593837535014006e-06, "loss": 32.9535, "step": 14029 }, { "epoch": 334.0477611940299, "grad_norm": 19.577545166015625, "learning_rate": 9.593137254901963e-06, "loss": 34.2717, "step": 14030 }, { "epoch": 334.0716417910448, "grad_norm": 16.205463409423828, "learning_rate": 9.592436974789917e-06, "loss": 32.1737, "step": 14031 }, { "epoch": 334.0955223880597, "grad_norm": 19.973770141601562, "learning_rate": 9.591736694677871e-06, "loss": 32.687, "step": 14032 }, { "epoch": 334.1194029850746, "grad_norm": 19.85550308227539, "learning_rate": 9.591036414565828e-06, "loss": 32.7678, "step": 14033 }, { "epoch": 334.14328358208957, "grad_norm": 16.340126037597656, "learning_rate": 9.590336134453782e-06, "loss": 33.6692, "step": 14034 }, { "epoch": 334.1671641791045, "grad_norm": 19.330669403076172, "learning_rate": 9.589635854341738e-06, "loss": 32.5186, "step": 14035 }, { "epoch": 334.1910447761194, "grad_norm": 21.676372528076172, "learning_rate": 9.588935574229693e-06, "loss": 32.613, "step": 14036 }, { "epoch": 334.21492537313435, "grad_norm": 17.480918884277344, "learning_rate": 9.588235294117649e-06, "loss": 32.9289, "step": 14037 }, { "epoch": 334.23880597014926, "grad_norm": 20.410520553588867, "learning_rate": 9.587535014005603e-06, "loss": 33.6284, "step": 14038 }, { "epoch": 334.26268656716417, "grad_norm": 26.22980499267578, "learning_rate": 9.586834733893558e-06, "loss": 33.986, "step": 14039 }, { "epoch": 334.28656716417913, "grad_norm": 17.150678634643555, "learning_rate": 9.586134453781514e-06, "loss": 33.1126, "step": 14040 }, { "epoch": 334.31044776119404, "grad_norm": 16.347063064575195, "learning_rate": 9.585434173669469e-06, "loss": 32.9667, "step": 14041 }, { "epoch": 334.33432835820895, "grad_norm": 18.706422805786133, "learning_rate": 9.584733893557425e-06, "loss": 33.6479, "step": 14042 }, { "epoch": 334.35820895522386, "grad_norm": 22.569150924682617, "learning_rate": 9.584033613445379e-06, "loss": 32.4412, "step": 14043 }, { "epoch": 334.3820895522388, "grad_norm": 16.099870681762695, "learning_rate": 9.583333333333335e-06, "loss": 32.0928, "step": 14044 }, { "epoch": 334.40597014925373, "grad_norm": NaN, "learning_rate": 9.58263305322129e-06, "loss": 52.6876, "step": 14045 }, { "epoch": 334.42985074626864, "grad_norm": 16.205289840698242, "learning_rate": 9.58263305322129e-06, "loss": 32.7782, "step": 14046 }, { "epoch": 334.4537313432836, "grad_norm": 15.66600513458252, "learning_rate": 9.581932773109244e-06, "loss": 34.0963, "step": 14047 }, { "epoch": 334.4776119402985, "grad_norm": 16.28512191772461, "learning_rate": 9.5812324929972e-06, "loss": 33.2025, "step": 14048 }, { "epoch": 334.5014925373134, "grad_norm": 18.70054817199707, "learning_rate": 9.580532212885155e-06, "loss": 32.3075, "step": 14049 }, { "epoch": 334.52537313432833, "grad_norm": 18.112051010131836, "learning_rate": 9.579831932773111e-06, "loss": 32.886, "step": 14050 }, { "epoch": 334.5492537313433, "grad_norm": 18.727407455444336, "learning_rate": 9.579131652661066e-06, "loss": 32.1961, "step": 14051 }, { "epoch": 334.5731343283582, "grad_norm": 15.974007606506348, "learning_rate": 9.57843137254902e-06, "loss": 33.9628, "step": 14052 }, { "epoch": 334.5970149253731, "grad_norm": 18.82468605041504, "learning_rate": 9.577731092436976e-06, "loss": 34.2077, "step": 14053 }, { "epoch": 334.6208955223881, "grad_norm": 16.585046768188477, "learning_rate": 9.57703081232493e-06, "loss": 32.2484, "step": 14054 }, { "epoch": 334.644776119403, "grad_norm": 21.884016036987305, "learning_rate": 9.576330532212887e-06, "loss": 32.4839, "step": 14055 }, { "epoch": 334.6686567164179, "grad_norm": 22.25446891784668, "learning_rate": 9.575630252100841e-06, "loss": 32.5081, "step": 14056 }, { "epoch": 334.6925373134328, "grad_norm": 18.503116607666016, "learning_rate": 9.574929971988797e-06, "loss": 33.2844, "step": 14057 }, { "epoch": 334.7164179104478, "grad_norm": 17.866487503051758, "learning_rate": 9.574229691876752e-06, "loss": 33.05, "step": 14058 }, { "epoch": 334.7402985074627, "grad_norm": 17.552989959716797, "learning_rate": 9.573529411764706e-06, "loss": 32.8999, "step": 14059 }, { "epoch": 334.7641791044776, "grad_norm": 18.457199096679688, "learning_rate": 9.572829131652663e-06, "loss": 33.4919, "step": 14060 }, { "epoch": 334.78805970149256, "grad_norm": 16.289291381835938, "learning_rate": 9.572128851540617e-06, "loss": 33.1962, "step": 14061 }, { "epoch": 334.81194029850747, "grad_norm": 17.95917320251465, "learning_rate": 9.571428571428573e-06, "loss": 33.0722, "step": 14062 }, { "epoch": 334.8358208955224, "grad_norm": 17.195363998413086, "learning_rate": 9.570728291316528e-06, "loss": 32.9456, "step": 14063 }, { "epoch": 334.85970149253734, "grad_norm": 19.218435287475586, "learning_rate": 9.570028011204482e-06, "loss": 33.409, "step": 14064 }, { "epoch": 334.88358208955225, "grad_norm": 20.15607261657715, "learning_rate": 9.569327731092438e-06, "loss": 31.6368, "step": 14065 }, { "epoch": 334.90746268656716, "grad_norm": 20.235383987426758, "learning_rate": 9.568627450980393e-06, "loss": 33.4735, "step": 14066 }, { "epoch": 334.93134328358207, "grad_norm": 15.10827350616455, "learning_rate": 9.567927170868349e-06, "loss": 32.3908, "step": 14067 }, { "epoch": 334.95522388059703, "grad_norm": 14.904987335205078, "learning_rate": 9.567226890756303e-06, "loss": 33.6434, "step": 14068 }, { "epoch": 334.97910447761194, "grad_norm": 16.30361557006836, "learning_rate": 9.56652661064426e-06, "loss": 33.929, "step": 14069 }, { "epoch": 335.0, "grad_norm": 14.410323143005371, "learning_rate": 9.565826330532214e-06, "loss": 28.253, "step": 14070 }, { "epoch": 335.0238805970149, "grad_norm": 16.226764678955078, "learning_rate": 9.565126050420169e-06, "loss": 32.8985, "step": 14071 }, { "epoch": 335.0477611940299, "grad_norm": 19.064477920532227, "learning_rate": 9.564425770308125e-06, "loss": 33.6443, "step": 14072 }, { "epoch": 335.0716417910448, "grad_norm": 20.508211135864258, "learning_rate": 9.56372549019608e-06, "loss": 31.2859, "step": 14073 }, { "epoch": 335.0955223880597, "grad_norm": 16.088598251342773, "learning_rate": 9.563025210084035e-06, "loss": 32.9429, "step": 14074 }, { "epoch": 335.1194029850746, "grad_norm": 17.129329681396484, "learning_rate": 9.56232492997199e-06, "loss": 32.0086, "step": 14075 }, { "epoch": 335.14328358208957, "grad_norm": 22.49455451965332, "learning_rate": 9.561624649859946e-06, "loss": 32.0695, "step": 14076 }, { "epoch": 335.1671641791045, "grad_norm": 22.862733840942383, "learning_rate": 9.5609243697479e-06, "loss": 32.9638, "step": 14077 }, { "epoch": 335.1910447761194, "grad_norm": 14.46081256866455, "learning_rate": 9.560224089635855e-06, "loss": 33.0671, "step": 14078 }, { "epoch": 335.21492537313435, "grad_norm": 22.365453720092773, "learning_rate": 9.559523809523811e-06, "loss": 33.8696, "step": 14079 }, { "epoch": 335.23880597014926, "grad_norm": 18.069684982299805, "learning_rate": 9.558823529411766e-06, "loss": 34.1228, "step": 14080 }, { "epoch": 335.26268656716417, "grad_norm": 19.54884147644043, "learning_rate": 9.558123249299722e-06, "loss": 32.6555, "step": 14081 }, { "epoch": 335.28656716417913, "grad_norm": 18.429580688476562, "learning_rate": 9.557422969187676e-06, "loss": 32.3539, "step": 14082 }, { "epoch": 335.31044776119404, "grad_norm": 20.241392135620117, "learning_rate": 9.55672268907563e-06, "loss": 33.1865, "step": 14083 }, { "epoch": 335.33432835820895, "grad_norm": 16.518409729003906, "learning_rate": 9.556022408963587e-06, "loss": 33.8972, "step": 14084 }, { "epoch": 335.35820895522386, "grad_norm": 18.1431884765625, "learning_rate": 9.555322128851541e-06, "loss": 33.9726, "step": 14085 }, { "epoch": 335.3820895522388, "grad_norm": 16.836904525756836, "learning_rate": 9.554621848739497e-06, "loss": 34.3594, "step": 14086 }, { "epoch": 335.40597014925373, "grad_norm": 23.994831085205078, "learning_rate": 9.553921568627452e-06, "loss": 32.9725, "step": 14087 }, { "epoch": 335.42985074626864, "grad_norm": 19.052099227905273, "learning_rate": 9.553221288515408e-06, "loss": 32.6177, "step": 14088 }, { "epoch": 335.4537313432836, "grad_norm": 21.062023162841797, "learning_rate": 9.552521008403363e-06, "loss": 34.386, "step": 14089 }, { "epoch": 335.4776119402985, "grad_norm": 20.814306259155273, "learning_rate": 9.551820728291317e-06, "loss": 32.3291, "step": 14090 }, { "epoch": 335.5014925373134, "grad_norm": 18.172229766845703, "learning_rate": 9.551120448179273e-06, "loss": 34.0514, "step": 14091 }, { "epoch": 335.52537313432833, "grad_norm": 15.15807056427002, "learning_rate": 9.550420168067228e-06, "loss": 32.2528, "step": 14092 }, { "epoch": 335.5492537313433, "grad_norm": 23.364456176757812, "learning_rate": 9.549719887955184e-06, "loss": 32.6772, "step": 14093 }, { "epoch": 335.5731343283582, "grad_norm": 20.49326515197754, "learning_rate": 9.549019607843138e-06, "loss": 32.9553, "step": 14094 }, { "epoch": 335.5970149253731, "grad_norm": 18.637468338012695, "learning_rate": 9.548319327731095e-06, "loss": 33.2998, "step": 14095 }, { "epoch": 335.6208955223881, "grad_norm": 16.121950149536133, "learning_rate": 9.547619047619049e-06, "loss": 32.7043, "step": 14096 }, { "epoch": 335.644776119403, "grad_norm": 15.5372314453125, "learning_rate": 9.546918767507003e-06, "loss": 33.3733, "step": 14097 }, { "epoch": 335.6686567164179, "grad_norm": 22.66282844543457, "learning_rate": 9.54621848739496e-06, "loss": 32.5467, "step": 14098 }, { "epoch": 335.6925373134328, "grad_norm": 17.79058265686035, "learning_rate": 9.545518207282914e-06, "loss": 31.6198, "step": 14099 }, { "epoch": 335.7164179104478, "grad_norm": 16.973054885864258, "learning_rate": 9.54481792717087e-06, "loss": 33.6689, "step": 14100 }, { "epoch": 335.7402985074627, "grad_norm": 17.641132354736328, "learning_rate": 9.544117647058825e-06, "loss": 32.5873, "step": 14101 }, { "epoch": 335.7641791044776, "grad_norm": 19.078208923339844, "learning_rate": 9.54341736694678e-06, "loss": 32.9321, "step": 14102 }, { "epoch": 335.78805970149256, "grad_norm": 16.99062156677246, "learning_rate": 9.542717086834735e-06, "loss": 33.0054, "step": 14103 }, { "epoch": 335.81194029850747, "grad_norm": 15.178858757019043, "learning_rate": 9.54201680672269e-06, "loss": 32.666, "step": 14104 }, { "epoch": 335.8358208955224, "grad_norm": 25.96713638305664, "learning_rate": 9.541316526610646e-06, "loss": 33.3611, "step": 14105 }, { "epoch": 335.85970149253734, "grad_norm": 19.31339454650879, "learning_rate": 9.5406162464986e-06, "loss": 34.1691, "step": 14106 }, { "epoch": 335.88358208955225, "grad_norm": 14.538851737976074, "learning_rate": 9.539915966386557e-06, "loss": 32.2317, "step": 14107 }, { "epoch": 335.90746268656716, "grad_norm": 20.270048141479492, "learning_rate": 9.539215686274511e-06, "loss": 31.934, "step": 14108 }, { "epoch": 335.93134328358207, "grad_norm": 15.543272972106934, "learning_rate": 9.538515406162466e-06, "loss": 32.8392, "step": 14109 }, { "epoch": 335.95522388059703, "grad_norm": 16.131620407104492, "learning_rate": 9.537815126050422e-06, "loss": 32.7216, "step": 14110 }, { "epoch": 335.97910447761194, "grad_norm": 17.379873275756836, "learning_rate": 9.537114845938376e-06, "loss": 33.9147, "step": 14111 }, { "epoch": 336.0, "grad_norm": 17.886241912841797, "learning_rate": 9.536414565826332e-06, "loss": 28.57, "step": 14112 }, { "epoch": 336.0238805970149, "grad_norm": 16.74209213256836, "learning_rate": 9.535714285714287e-06, "loss": 33.6038, "step": 14113 }, { "epoch": 336.0477611940299, "grad_norm": 18.454927444458008, "learning_rate": 9.535014005602243e-06, "loss": 33.8839, "step": 14114 }, { "epoch": 336.0716417910448, "grad_norm": 17.335752487182617, "learning_rate": 9.534313725490198e-06, "loss": 32.6463, "step": 14115 }, { "epoch": 336.0955223880597, "grad_norm": 14.936349868774414, "learning_rate": 9.533613445378152e-06, "loss": 32.6998, "step": 14116 }, { "epoch": 336.1194029850746, "grad_norm": 20.028823852539062, "learning_rate": 9.532913165266108e-06, "loss": 32.6714, "step": 14117 }, { "epoch": 336.14328358208957, "grad_norm": 19.465471267700195, "learning_rate": 9.532212885154063e-06, "loss": 32.4848, "step": 14118 }, { "epoch": 336.1671641791045, "grad_norm": 15.841813087463379, "learning_rate": 9.531512605042019e-06, "loss": 33.5726, "step": 14119 }, { "epoch": 336.1910447761194, "grad_norm": 20.18979835510254, "learning_rate": 9.530812324929972e-06, "loss": 33.4828, "step": 14120 }, { "epoch": 336.21492537313435, "grad_norm": 19.321622848510742, "learning_rate": 9.530112044817928e-06, "loss": 33.9114, "step": 14121 }, { "epoch": 336.23880597014926, "grad_norm": 20.468835830688477, "learning_rate": 9.529411764705882e-06, "loss": 34.7023, "step": 14122 }, { "epoch": 336.26268656716417, "grad_norm": 19.255521774291992, "learning_rate": 9.528711484593838e-06, "loss": 32.7568, "step": 14123 }, { "epoch": 336.28656716417913, "grad_norm": 15.941425323486328, "learning_rate": 9.528011204481793e-06, "loss": 32.7218, "step": 14124 }, { "epoch": 336.31044776119404, "grad_norm": 15.645880699157715, "learning_rate": 9.527310924369747e-06, "loss": 32.4347, "step": 14125 }, { "epoch": 336.33432835820895, "grad_norm": 18.97342872619629, "learning_rate": 9.526610644257703e-06, "loss": 32.9201, "step": 14126 }, { "epoch": 336.35820895522386, "grad_norm": 17.18836212158203, "learning_rate": 9.525910364145658e-06, "loss": 33.3319, "step": 14127 }, { "epoch": 336.3820895522388, "grad_norm": 21.085561752319336, "learning_rate": 9.525210084033614e-06, "loss": 33.0258, "step": 14128 }, { "epoch": 336.40597014925373, "grad_norm": 23.493581771850586, "learning_rate": 9.524509803921569e-06, "loss": 32.7748, "step": 14129 }, { "epoch": 336.42985074626864, "grad_norm": 23.530038833618164, "learning_rate": 9.523809523809525e-06, "loss": 32.6311, "step": 14130 }, { "epoch": 336.4537313432836, "grad_norm": 15.708246231079102, "learning_rate": 9.52310924369748e-06, "loss": 32.34, "step": 14131 }, { "epoch": 336.4776119402985, "grad_norm": 16.440576553344727, "learning_rate": 9.522408963585434e-06, "loss": 30.9526, "step": 14132 }, { "epoch": 336.5014925373134, "grad_norm": 19.99802017211914, "learning_rate": 9.52170868347339e-06, "loss": 34.0244, "step": 14133 }, { "epoch": 336.52537313432833, "grad_norm": 14.879878997802734, "learning_rate": 9.521008403361344e-06, "loss": 32.7524, "step": 14134 }, { "epoch": 336.5492537313433, "grad_norm": 18.270427703857422, "learning_rate": 9.5203081232493e-06, "loss": 32.9277, "step": 14135 }, { "epoch": 336.5731343283582, "grad_norm": 15.599655151367188, "learning_rate": 9.519607843137255e-06, "loss": 33.0719, "step": 14136 }, { "epoch": 336.5970149253731, "grad_norm": 20.563583374023438, "learning_rate": 9.518907563025211e-06, "loss": 32.2879, "step": 14137 }, { "epoch": 336.6208955223881, "grad_norm": 19.32246208190918, "learning_rate": 9.518207282913166e-06, "loss": 33.6501, "step": 14138 }, { "epoch": 336.644776119403, "grad_norm": 17.838897705078125, "learning_rate": 9.51750700280112e-06, "loss": 32.6958, "step": 14139 }, { "epoch": 336.6686567164179, "grad_norm": 17.472997665405273, "learning_rate": 9.516806722689076e-06, "loss": 31.9068, "step": 14140 }, { "epoch": 336.6925373134328, "grad_norm": 17.576868057250977, "learning_rate": 9.51610644257703e-06, "loss": 33.8977, "step": 14141 }, { "epoch": 336.7164179104478, "grad_norm": 17.706974029541016, "learning_rate": 9.515406162464987e-06, "loss": 32.6955, "step": 14142 }, { "epoch": 336.7402985074627, "grad_norm": 19.88766098022461, "learning_rate": 9.514705882352941e-06, "loss": 31.2888, "step": 14143 }, { "epoch": 336.7641791044776, "grad_norm": 18.691804885864258, "learning_rate": 9.514005602240896e-06, "loss": 32.2483, "step": 14144 }, { "epoch": 336.78805970149256, "grad_norm": 14.847087860107422, "learning_rate": 9.513305322128852e-06, "loss": 34.7386, "step": 14145 }, { "epoch": 336.81194029850747, "grad_norm": 16.232799530029297, "learning_rate": 9.512605042016806e-06, "loss": 31.5283, "step": 14146 }, { "epoch": 336.8358208955224, "grad_norm": 17.496410369873047, "learning_rate": 9.511904761904763e-06, "loss": 32.8851, "step": 14147 }, { "epoch": 336.85970149253734, "grad_norm": 16.615825653076172, "learning_rate": 9.511204481792717e-06, "loss": 32.3683, "step": 14148 }, { "epoch": 336.88358208955225, "grad_norm": NaN, "learning_rate": 9.510504201680673e-06, "loss": 59.2316, "step": 14149 }, { "epoch": 336.90746268656716, "grad_norm": 15.206809043884277, "learning_rate": 9.510504201680673e-06, "loss": 32.6558, "step": 14150 }, { "epoch": 336.93134328358207, "grad_norm": 18.026573181152344, "learning_rate": 9.509803921568628e-06, "loss": 34.1254, "step": 14151 }, { "epoch": 336.95522388059703, "grad_norm": 20.462858200073242, "learning_rate": 9.509103641456582e-06, "loss": 32.6371, "step": 14152 }, { "epoch": 336.97910447761194, "grad_norm": 18.992738723754883, "learning_rate": 9.508403361344538e-06, "loss": 33.2755, "step": 14153 }, { "epoch": 337.0, "grad_norm": 14.200139999389648, "learning_rate": 9.507703081232493e-06, "loss": 29.2591, "step": 14154 }, { "epoch": 337.0238805970149, "grad_norm": 14.875282287597656, "learning_rate": 9.507002801120449e-06, "loss": 32.4425, "step": 14155 }, { "epoch": 337.0477611940299, "grad_norm": 17.062175750732422, "learning_rate": 9.506302521008403e-06, "loss": 32.4821, "step": 14156 }, { "epoch": 337.0716417910448, "grad_norm": 19.384296417236328, "learning_rate": 9.50560224089636e-06, "loss": 32.3341, "step": 14157 }, { "epoch": 337.0955223880597, "grad_norm": 18.337078094482422, "learning_rate": 9.504901960784314e-06, "loss": 33.4632, "step": 14158 }, { "epoch": 337.1194029850746, "grad_norm": 17.43202781677246, "learning_rate": 9.504201680672269e-06, "loss": 33.616, "step": 14159 }, { "epoch": 337.14328358208957, "grad_norm": 15.656824111938477, "learning_rate": 9.503501400560225e-06, "loss": 32.8101, "step": 14160 }, { "epoch": 337.1671641791045, "grad_norm": 15.518818855285645, "learning_rate": 9.50280112044818e-06, "loss": 32.4492, "step": 14161 }, { "epoch": 337.1910447761194, "grad_norm": 16.95665168762207, "learning_rate": 9.502100840336135e-06, "loss": 32.6265, "step": 14162 }, { "epoch": 337.21492537313435, "grad_norm": 20.434589385986328, "learning_rate": 9.50140056022409e-06, "loss": 33.026, "step": 14163 }, { "epoch": 337.23880597014926, "grad_norm": 16.341533660888672, "learning_rate": 9.500700280112044e-06, "loss": 32.0338, "step": 14164 }, { "epoch": 337.26268656716417, "grad_norm": 17.249235153198242, "learning_rate": 9.5e-06, "loss": 32.2044, "step": 14165 }, { "epoch": 337.28656716417913, "grad_norm": 24.686254501342773, "learning_rate": 9.499299719887955e-06, "loss": 32.7926, "step": 14166 }, { "epoch": 337.31044776119404, "grad_norm": 21.519001007080078, "learning_rate": 9.498599439775911e-06, "loss": 32.7667, "step": 14167 }, { "epoch": 337.33432835820895, "grad_norm": 18.206024169921875, "learning_rate": 9.497899159663866e-06, "loss": 33.5748, "step": 14168 }, { "epoch": 337.35820895522386, "grad_norm": 18.19744110107422, "learning_rate": 9.497198879551822e-06, "loss": 32.5232, "step": 14169 }, { "epoch": 337.3820895522388, "grad_norm": 27.09054183959961, "learning_rate": 9.496498599439776e-06, "loss": 33.1358, "step": 14170 }, { "epoch": 337.40597014925373, "grad_norm": 14.919368743896484, "learning_rate": 9.49579831932773e-06, "loss": 33.5639, "step": 14171 }, { "epoch": 337.42985074626864, "grad_norm": 24.887348175048828, "learning_rate": 9.495098039215687e-06, "loss": 32.4732, "step": 14172 }, { "epoch": 337.4537313432836, "grad_norm": 21.802900314331055, "learning_rate": 9.494397759103641e-06, "loss": 32.8481, "step": 14173 }, { "epoch": 337.4776119402985, "grad_norm": 17.48831558227539, "learning_rate": 9.493697478991598e-06, "loss": 32.9001, "step": 14174 }, { "epoch": 337.5014925373134, "grad_norm": 29.25938606262207, "learning_rate": 9.492997198879552e-06, "loss": 32.6143, "step": 14175 }, { "epoch": 337.52537313432833, "grad_norm": 18.84782600402832, "learning_rate": 9.492296918767508e-06, "loss": 32.6812, "step": 14176 }, { "epoch": 337.5492537313433, "grad_norm": 26.44182586669922, "learning_rate": 9.491596638655463e-06, "loss": 32.6849, "step": 14177 }, { "epoch": 337.5731343283582, "grad_norm": 19.890064239501953, "learning_rate": 9.490896358543417e-06, "loss": 32.6251, "step": 14178 }, { "epoch": 337.5970149253731, "grad_norm": 24.663236618041992, "learning_rate": 9.490196078431373e-06, "loss": 33.1442, "step": 14179 }, { "epoch": 337.6208955223881, "grad_norm": 21.09729766845703, "learning_rate": 9.489495798319328e-06, "loss": 32.1798, "step": 14180 }, { "epoch": 337.644776119403, "grad_norm": 24.468088150024414, "learning_rate": 9.488795518207284e-06, "loss": 33.5802, "step": 14181 }, { "epoch": 337.6686567164179, "grad_norm": 19.231128692626953, "learning_rate": 9.488095238095238e-06, "loss": 32.989, "step": 14182 }, { "epoch": 337.6925373134328, "grad_norm": 28.940576553344727, "learning_rate": 9.487394957983193e-06, "loss": 34.0419, "step": 14183 }, { "epoch": 337.7164179104478, "grad_norm": 21.28771209716797, "learning_rate": 9.486694677871149e-06, "loss": 33.7646, "step": 14184 }, { "epoch": 337.7402985074627, "grad_norm": 31.944252014160156, "learning_rate": 9.485994397759104e-06, "loss": 33.3507, "step": 14185 }, { "epoch": 337.7641791044776, "grad_norm": 28.118261337280273, "learning_rate": 9.48529411764706e-06, "loss": 34.0282, "step": 14186 }, { "epoch": 337.78805970149256, "grad_norm": 27.06432342529297, "learning_rate": 9.484593837535014e-06, "loss": 31.9742, "step": 14187 }, { "epoch": 337.81194029850747, "grad_norm": 24.204050064086914, "learning_rate": 9.48389355742297e-06, "loss": 33.7301, "step": 14188 }, { "epoch": 337.8358208955224, "grad_norm": 22.309860229492188, "learning_rate": 9.483193277310925e-06, "loss": 34.0992, "step": 14189 }, { "epoch": 337.85970149253734, "grad_norm": 27.0042724609375, "learning_rate": 9.48249299719888e-06, "loss": 32.8996, "step": 14190 }, { "epoch": 337.88358208955225, "grad_norm": 20.163787841796875, "learning_rate": 9.481792717086835e-06, "loss": 31.9279, "step": 14191 }, { "epoch": 337.90746268656716, "grad_norm": 26.834156036376953, "learning_rate": 9.48109243697479e-06, "loss": 33.403, "step": 14192 }, { "epoch": 337.93134328358207, "grad_norm": 19.65085792541504, "learning_rate": 9.480392156862746e-06, "loss": 32.299, "step": 14193 }, { "epoch": 337.95522388059703, "grad_norm": 24.564346313476562, "learning_rate": 9.4796918767507e-06, "loss": 32.4174, "step": 14194 }, { "epoch": 337.97910447761194, "grad_norm": 25.409894943237305, "learning_rate": 9.478991596638657e-06, "loss": 32.9881, "step": 14195 }, { "epoch": 338.0, "grad_norm": 15.833084106445312, "learning_rate": 9.478291316526611e-06, "loss": 29.332, "step": 14196 }, { "epoch": 338.0238805970149, "grad_norm": 26.590150833129883, "learning_rate": 9.477591036414566e-06, "loss": 33.5099, "step": 14197 }, { "epoch": 338.0477611940299, "grad_norm": 20.627695083618164, "learning_rate": 9.476890756302522e-06, "loss": 33.9682, "step": 14198 }, { "epoch": 338.0716417910448, "grad_norm": 22.874095916748047, "learning_rate": 9.476190476190476e-06, "loss": 32.7461, "step": 14199 }, { "epoch": 338.0955223880597, "grad_norm": 24.205812454223633, "learning_rate": 9.475490196078432e-06, "loss": 32.2976, "step": 14200 }, { "epoch": 338.1194029850746, "grad_norm": 19.102245330810547, "learning_rate": 9.474789915966387e-06, "loss": 33.6175, "step": 14201 }, { "epoch": 338.14328358208957, "grad_norm": 24.30849266052246, "learning_rate": 9.474089635854341e-06, "loss": 32.922, "step": 14202 }, { "epoch": 338.1671641791045, "grad_norm": 21.702083587646484, "learning_rate": 9.473389355742298e-06, "loss": 32.5274, "step": 14203 }, { "epoch": 338.1910447761194, "grad_norm": 18.384666442871094, "learning_rate": 9.472689075630252e-06, "loss": 33.8106, "step": 14204 }, { "epoch": 338.21492537313435, "grad_norm": 26.47401237487793, "learning_rate": 9.471988795518208e-06, "loss": 31.6716, "step": 14205 }, { "epoch": 338.23880597014926, "grad_norm": 21.23504638671875, "learning_rate": 9.471288515406163e-06, "loss": 32.9033, "step": 14206 }, { "epoch": 338.26268656716417, "grad_norm": 19.06757354736328, "learning_rate": 9.470588235294119e-06, "loss": 32.307, "step": 14207 }, { "epoch": 338.28656716417913, "grad_norm": 28.329666137695312, "learning_rate": 9.469887955182073e-06, "loss": 32.2881, "step": 14208 }, { "epoch": 338.31044776119404, "grad_norm": 15.802681922912598, "learning_rate": 9.469187675070028e-06, "loss": 31.7809, "step": 14209 }, { "epoch": 338.33432835820895, "grad_norm": 27.86570167541504, "learning_rate": 9.468487394957984e-06, "loss": 31.7268, "step": 14210 }, { "epoch": 338.35820895522386, "grad_norm": 19.854049682617188, "learning_rate": 9.467787114845938e-06, "loss": 32.9964, "step": 14211 }, { "epoch": 338.3820895522388, "grad_norm": 21.1588134765625, "learning_rate": 9.467086834733895e-06, "loss": 31.9979, "step": 14212 }, { "epoch": 338.40597014925373, "grad_norm": 28.05729103088379, "learning_rate": 9.466386554621849e-06, "loss": 33.3669, "step": 14213 }, { "epoch": 338.42985074626864, "grad_norm": 17.63733673095703, "learning_rate": 9.465686274509804e-06, "loss": 32.252, "step": 14214 }, { "epoch": 338.4537313432836, "grad_norm": 35.62979507446289, "learning_rate": 9.46498599439776e-06, "loss": 34.5511, "step": 14215 }, { "epoch": 338.4776119402985, "grad_norm": 21.031435012817383, "learning_rate": 9.464285714285714e-06, "loss": 32.5291, "step": 14216 }, { "epoch": 338.5014925373134, "grad_norm": 34.751930236816406, "learning_rate": 9.46358543417367e-06, "loss": 33.8158, "step": 14217 }, { "epoch": 338.52537313432833, "grad_norm": 22.33123779296875, "learning_rate": 9.462885154061625e-06, "loss": 32.7993, "step": 14218 }, { "epoch": 338.5492537313433, "grad_norm": 40.59553527832031, "learning_rate": 9.462184873949581e-06, "loss": 32.3558, "step": 14219 }, { "epoch": 338.5731343283582, "grad_norm": 30.890233993530273, "learning_rate": 9.461484593837535e-06, "loss": 32.699, "step": 14220 }, { "epoch": 338.5970149253731, "grad_norm": 40.78409194946289, "learning_rate": 9.46078431372549e-06, "loss": 32.6561, "step": 14221 }, { "epoch": 338.6208955223881, "grad_norm": 33.0892333984375, "learning_rate": 9.460084033613446e-06, "loss": 31.7217, "step": 14222 }, { "epoch": 338.644776119403, "grad_norm": 32.41324996948242, "learning_rate": 9.4593837535014e-06, "loss": 34.3416, "step": 14223 }, { "epoch": 338.6686567164179, "grad_norm": 32.27386474609375, "learning_rate": 9.458683473389357e-06, "loss": 33.208, "step": 14224 }, { "epoch": 338.6925373134328, "grad_norm": 32.213863372802734, "learning_rate": 9.457983193277311e-06, "loss": 33.9612, "step": 14225 }, { "epoch": 338.7164179104478, "grad_norm": 25.6570987701416, "learning_rate": 9.457282913165267e-06, "loss": 33.7285, "step": 14226 }, { "epoch": 338.7402985074627, "grad_norm": NaN, "learning_rate": 9.456582633053222e-06, "loss": 56.5918, "step": 14227 }, { "epoch": 338.7641791044776, "grad_norm": 31.062545776367188, "learning_rate": 9.456582633053222e-06, "loss": 32.0069, "step": 14228 }, { "epoch": 338.78805970149256, "grad_norm": 24.51154327392578, "learning_rate": 9.455882352941176e-06, "loss": 32.0931, "step": 14229 }, { "epoch": 338.81194029850747, "grad_norm": 39.734127044677734, "learning_rate": 9.455182072829132e-06, "loss": 33.1966, "step": 14230 }, { "epoch": 338.8358208955224, "grad_norm": 33.052085876464844, "learning_rate": 9.454481792717087e-06, "loss": 32.4541, "step": 14231 }, { "epoch": 338.85970149253734, "grad_norm": 37.13149642944336, "learning_rate": 9.453781512605043e-06, "loss": 33.1593, "step": 14232 }, { "epoch": 338.88358208955225, "grad_norm": 35.28886413574219, "learning_rate": 9.453081232492998e-06, "loss": 34.4264, "step": 14233 }, { "epoch": 338.90746268656716, "grad_norm": 26.909751892089844, "learning_rate": 9.452380952380952e-06, "loss": 33.0516, "step": 14234 }, { "epoch": 338.93134328358207, "grad_norm": 28.23269271850586, "learning_rate": 9.451680672268908e-06, "loss": 33.2262, "step": 14235 }, { "epoch": 338.95522388059703, "grad_norm": NaN, "learning_rate": 9.450980392156863e-06, "loss": 55.4614, "step": 14236 }, { "epoch": 338.97910447761194, "grad_norm": 31.025379180908203, "learning_rate": 9.450980392156863e-06, "loss": 32.6424, "step": 14237 }, { "epoch": 339.0, "grad_norm": 22.70488929748535, "learning_rate": 9.450280112044819e-06, "loss": 29.9982, "step": 14238 }, { "epoch": 339.0238805970149, "grad_norm": 36.64794158935547, "learning_rate": 9.449579831932773e-06, "loss": 32.7851, "step": 14239 }, { "epoch": 339.0477611940299, "grad_norm": 33.03408432006836, "learning_rate": 9.44887955182073e-06, "loss": 34.3438, "step": 14240 }, { "epoch": 339.0716417910448, "grad_norm": 32.48908996582031, "learning_rate": 9.448179271708684e-06, "loss": 32.5063, "step": 14241 }, { "epoch": 339.0955223880597, "grad_norm": 28.791791915893555, "learning_rate": 9.447478991596638e-06, "loss": 32.1059, "step": 14242 }, { "epoch": 339.1194029850746, "grad_norm": 34.80320358276367, "learning_rate": 9.446778711484595e-06, "loss": 33.997, "step": 14243 }, { "epoch": 339.14328358208957, "grad_norm": 29.57621955871582, "learning_rate": 9.446078431372549e-06, "loss": 31.8533, "step": 14244 }, { "epoch": 339.1671641791045, "grad_norm": 35.61138916015625, "learning_rate": 9.445378151260505e-06, "loss": 33.6139, "step": 14245 }, { "epoch": 339.1910447761194, "grad_norm": 34.38545608520508, "learning_rate": 9.44467787114846e-06, "loss": 33.6592, "step": 14246 }, { "epoch": 339.21492537313435, "grad_norm": 26.435632705688477, "learning_rate": 9.443977591036416e-06, "loss": 32.1824, "step": 14247 }, { "epoch": 339.23880597014926, "grad_norm": 27.746368408203125, "learning_rate": 9.44327731092437e-06, "loss": 32.9943, "step": 14248 }, { "epoch": 339.26268656716417, "grad_norm": 33.04074478149414, "learning_rate": 9.442577030812325e-06, "loss": 33.0518, "step": 14249 }, { "epoch": 339.28656716417913, "grad_norm": 23.841264724731445, "learning_rate": 9.441876750700281e-06, "loss": 32.3814, "step": 14250 }, { "epoch": 339.31044776119404, "grad_norm": 39.37582778930664, "learning_rate": 9.441176470588235e-06, "loss": 33.3083, "step": 14251 }, { "epoch": 339.33432835820895, "grad_norm": 34.970680236816406, "learning_rate": 9.440476190476192e-06, "loss": 31.2412, "step": 14252 }, { "epoch": 339.35820895522386, "grad_norm": 30.473628997802734, "learning_rate": 9.439775910364146e-06, "loss": 33.0853, "step": 14253 }, { "epoch": 339.3820895522388, "grad_norm": 31.16111946105957, "learning_rate": 9.4390756302521e-06, "loss": 32.9067, "step": 14254 }, { "epoch": 339.40597014925373, "grad_norm": 30.015321731567383, "learning_rate": 9.438375350140057e-06, "loss": 33.0965, "step": 14255 }, { "epoch": 339.42985074626864, "grad_norm": 24.88060188293457, "learning_rate": 9.437675070028011e-06, "loss": 33.2003, "step": 14256 }, { "epoch": 339.4537313432836, "grad_norm": 36.02987289428711, "learning_rate": 9.436974789915967e-06, "loss": 33.1847, "step": 14257 }, { "epoch": 339.4776119402985, "grad_norm": 29.7424259185791, "learning_rate": 9.436274509803922e-06, "loss": 32.0017, "step": 14258 }, { "epoch": 339.5014925373134, "grad_norm": 33.275089263916016, "learning_rate": 9.435574229691878e-06, "loss": 32.6896, "step": 14259 }, { "epoch": 339.52537313432833, "grad_norm": 28.949687957763672, "learning_rate": 9.434873949579833e-06, "loss": 31.5255, "step": 14260 }, { "epoch": 339.5492537313433, "grad_norm": 31.03632926940918, "learning_rate": 9.434173669467787e-06, "loss": 31.9781, "step": 14261 }, { "epoch": 339.5731343283582, "grad_norm": 28.23992919921875, "learning_rate": 9.433473389355743e-06, "loss": 31.3936, "step": 14262 }, { "epoch": 339.5970149253731, "grad_norm": 29.979907989501953, "learning_rate": 9.432773109243698e-06, "loss": 33.0441, "step": 14263 }, { "epoch": 339.6208955223881, "grad_norm": 24.1731014251709, "learning_rate": 9.432072829131654e-06, "loss": 33.505, "step": 14264 }, { "epoch": 339.644776119403, "grad_norm": 31.158857345581055, "learning_rate": 9.431372549019608e-06, "loss": 32.4075, "step": 14265 }, { "epoch": 339.6686567164179, "grad_norm": 24.861671447753906, "learning_rate": 9.430672268907564e-06, "loss": 32.0919, "step": 14266 }, { "epoch": 339.6925373134328, "grad_norm": 36.93232727050781, "learning_rate": 9.429971988795519e-06, "loss": 33.1194, "step": 14267 }, { "epoch": 339.7164179104478, "grad_norm": 34.79704284667969, "learning_rate": 9.429271708683473e-06, "loss": 33.9816, "step": 14268 }, { "epoch": 339.7402985074627, "grad_norm": 24.96257209777832, "learning_rate": 9.42857142857143e-06, "loss": 32.5782, "step": 14269 }, { "epoch": 339.7641791044776, "grad_norm": 26.007413864135742, "learning_rate": 9.427871148459384e-06, "loss": 32.541, "step": 14270 }, { "epoch": 339.78805970149256, "grad_norm": 29.220415115356445, "learning_rate": 9.42717086834734e-06, "loss": 31.3541, "step": 14271 }, { "epoch": 339.81194029850747, "grad_norm": 21.893226623535156, "learning_rate": 9.426470588235295e-06, "loss": 33.4779, "step": 14272 }, { "epoch": 339.8358208955224, "grad_norm": 38.02252197265625, "learning_rate": 9.425770308123249e-06, "loss": 33.7362, "step": 14273 }, { "epoch": 339.85970149253734, "grad_norm": 31.56594467163086, "learning_rate": 9.425070028011205e-06, "loss": 33.641, "step": 14274 }, { "epoch": 339.88358208955225, "grad_norm": 28.384130477905273, "learning_rate": 9.42436974789916e-06, "loss": 32.6683, "step": 14275 }, { "epoch": 339.90746268656716, "grad_norm": 26.01172637939453, "learning_rate": 9.423669467787116e-06, "loss": 33.7216, "step": 14276 }, { "epoch": 339.93134328358207, "grad_norm": 29.72218894958496, "learning_rate": 9.42296918767507e-06, "loss": 32.7691, "step": 14277 }, { "epoch": 339.95522388059703, "grad_norm": 25.04932403564453, "learning_rate": 9.422268907563027e-06, "loss": 33.1087, "step": 14278 }, { "epoch": 339.97910447761194, "grad_norm": 35.899986267089844, "learning_rate": 9.421568627450981e-06, "loss": 33.5594, "step": 14279 }, { "epoch": 340.0, "grad_norm": 26.640682220458984, "learning_rate": 9.420868347338936e-06, "loss": 29.6241, "step": 14280 }, { "epoch": 340.0, "step": 14280, "total_flos": 7.019919021570625e+17, "train_loss": 1.9593938636512649, "train_runtime": 25678.8464, "train_samples_per_second": 70.863, "train_steps_per_second": 0.556 }, { "epoch": 340.0238805970149, "grad_norm": 27.959510803222656, "learning_rate": 1e-05, "loss": 32.6037, "step": 14281 }, { "epoch": 340.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999338624338626e-06, "loss": 40.8788, "step": 14282 }, { "epoch": 340.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999338624338626e-06, "loss": 39.7832, "step": 14283 }, { "epoch": 340.0955223880597, "grad_norm": 477.1483459472656, "learning_rate": 9.999338624338626e-06, "loss": 40.5832, "step": 14284 }, { "epoch": 340.1194029850746, "grad_norm": 299.5267028808594, "learning_rate": 9.99867724867725e-06, "loss": 38.0195, "step": 14285 }, { "epoch": 340.14328358208957, "grad_norm": 93.81217956542969, "learning_rate": 9.998015873015874e-06, "loss": 35.5388, "step": 14286 }, { "epoch": 340.1671641791045, "grad_norm": 118.56687927246094, "learning_rate": 9.997354497354498e-06, "loss": 35.5971, "step": 14287 }, { "epoch": 340.1910447761194, "grad_norm": 92.6313705444336, "learning_rate": 9.996693121693123e-06, "loss": 33.6818, "step": 14288 }, { "epoch": 340.21492537313435, "grad_norm": 62.361122131347656, "learning_rate": 9.996031746031746e-06, "loss": 32.9934, "step": 14289 }, { "epoch": 340.23880597014926, "grad_norm": 43.54608917236328, "learning_rate": 9.995370370370371e-06, "loss": 34.6618, "step": 14290 }, { "epoch": 340.26268656716417, "grad_norm": 38.393516540527344, "learning_rate": 9.994708994708996e-06, "loss": 32.9076, "step": 14291 }, { "epoch": 340.28656716417913, "grad_norm": 31.553220748901367, "learning_rate": 9.99404761904762e-06, "loss": 32.6114, "step": 14292 }, { "epoch": 340.31044776119404, "grad_norm": 30.89167022705078, "learning_rate": 9.993386243386244e-06, "loss": 34.1163, "step": 14293 }, { "epoch": 340.33432835820895, "grad_norm": 27.52607536315918, "learning_rate": 9.992724867724869e-06, "loss": 32.6266, "step": 14294 }, { "epoch": 340.35820895522386, "grad_norm": 26.616046905517578, "learning_rate": 9.992063492063493e-06, "loss": 33.7885, "step": 14295 }, { "epoch": 340.3820895522388, "grad_norm": 26.06792640686035, "learning_rate": 9.991402116402116e-06, "loss": 31.6442, "step": 14296 }, { "epoch": 340.40597014925373, "grad_norm": 20.864423751831055, "learning_rate": 9.990740740740741e-06, "loss": 33.6534, "step": 14297 }, { "epoch": 340.42985074626864, "grad_norm": 18.44462013244629, "learning_rate": 9.990079365079366e-06, "loss": 32.3902, "step": 14298 }, { "epoch": 340.4537313432836, "grad_norm": 22.97857666015625, "learning_rate": 9.989417989417989e-06, "loss": 33.2743, "step": 14299 }, { "epoch": 340.4776119402985, "grad_norm": 24.830507278442383, "learning_rate": 9.988756613756616e-06, "loss": 33.1315, "step": 14300 }, { "epoch": 340.5014925373134, "grad_norm": 18.54153060913086, "learning_rate": 9.988095238095239e-06, "loss": 33.2882, "step": 14301 }, { "epoch": 340.52537313432833, "grad_norm": 24.46211051940918, "learning_rate": 9.987433862433864e-06, "loss": 33.7494, "step": 14302 }, { "epoch": 340.5492537313433, "grad_norm": 19.64615821838379, "learning_rate": 9.986772486772488e-06, "loss": 33.2569, "step": 14303 }, { "epoch": 340.5731343283582, "grad_norm": 20.825637817382812, "learning_rate": 9.986111111111111e-06, "loss": 33.0892, "step": 14304 }, { "epoch": 340.5970149253731, "grad_norm": 15.84910774230957, "learning_rate": 9.985449735449736e-06, "loss": 33.0573, "step": 14305 }, { "epoch": 340.6208955223881, "grad_norm": 21.941417694091797, "learning_rate": 9.984788359788361e-06, "loss": 33.7433, "step": 14306 }, { "epoch": 340.644776119403, "grad_norm": 18.16843605041504, "learning_rate": 9.984126984126986e-06, "loss": 32.7713, "step": 14307 }, { "epoch": 340.6686567164179, "grad_norm": 19.012653350830078, "learning_rate": 9.983465608465609e-06, "loss": 33.2743, "step": 14308 }, { "epoch": 340.6925373134328, "grad_norm": 17.871700286865234, "learning_rate": 9.982804232804234e-06, "loss": 33.5691, "step": 14309 }, { "epoch": 340.7164179104478, "grad_norm": 17.754318237304688, "learning_rate": 9.982142857142858e-06, "loss": 32.1907, "step": 14310 }, { "epoch": 340.7402985074627, "grad_norm": 17.912324905395508, "learning_rate": 9.981481481481482e-06, "loss": 33.3217, "step": 14311 }, { "epoch": 340.7641791044776, "grad_norm": 17.187820434570312, "learning_rate": 9.980820105820106e-06, "loss": 32.7716, "step": 14312 }, { "epoch": 340.78805970149256, "grad_norm": 21.306100845336914, "learning_rate": 9.980158730158731e-06, "loss": 32.4835, "step": 14313 }, { "epoch": 340.81194029850747, "grad_norm": 19.078615188598633, "learning_rate": 9.979497354497354e-06, "loss": 33.5441, "step": 14314 }, { "epoch": 340.8358208955224, "grad_norm": 17.367889404296875, "learning_rate": 9.97883597883598e-06, "loss": 32.7492, "step": 14315 }, { "epoch": 340.85970149253734, "grad_norm": 18.947738647460938, "learning_rate": 9.978174603174604e-06, "loss": 33.1542, "step": 14316 }, { "epoch": 340.88358208955225, "grad_norm": 22.891326904296875, "learning_rate": 9.977513227513229e-06, "loss": 32.932, "step": 14317 }, { "epoch": 340.90746268656716, "grad_norm": 15.350235939025879, "learning_rate": 9.976851851851853e-06, "loss": 32.6713, "step": 14318 }, { "epoch": 340.93134328358207, "grad_norm": 26.475204467773438, "learning_rate": 9.976190476190477e-06, "loss": 33.5343, "step": 14319 }, { "epoch": 340.95522388059703, "grad_norm": 21.695802688598633, "learning_rate": 9.975529100529101e-06, "loss": 32.3089, "step": 14320 }, { "epoch": 340.97910447761194, "grad_norm": 20.613195419311523, "learning_rate": 9.974867724867726e-06, "loss": 33.2205, "step": 14321 }, { "epoch": 341.0, "grad_norm": 14.636250495910645, "learning_rate": 9.97420634920635e-06, "loss": 28.0786, "step": 14322 }, { "epoch": 341.0238805970149, "grad_norm": 20.600358963012695, "learning_rate": 9.973544973544974e-06, "loss": 32.294, "step": 14323 }, { "epoch": 341.0477611940299, "grad_norm": 16.827571868896484, "learning_rate": 9.972883597883599e-06, "loss": 32.5052, "step": 14324 }, { "epoch": 341.0716417910448, "grad_norm": 20.91819953918457, "learning_rate": 9.972222222222224e-06, "loss": 33.0947, "step": 14325 }, { "epoch": 341.0955223880597, "grad_norm": 21.71983528137207, "learning_rate": 9.971560846560847e-06, "loss": 32.3301, "step": 14326 }, { "epoch": 341.1194029850746, "grad_norm": 17.501211166381836, "learning_rate": 9.970899470899472e-06, "loss": 33.0806, "step": 14327 }, { "epoch": 341.14328358208957, "grad_norm": 21.825477600097656, "learning_rate": 9.970238095238096e-06, "loss": 33.318, "step": 14328 }, { "epoch": 341.1671641791045, "grad_norm": 19.944629669189453, "learning_rate": 9.96957671957672e-06, "loss": 32.1097, "step": 14329 }, { "epoch": 341.1910447761194, "grad_norm": 20.726354598999023, "learning_rate": 9.968915343915346e-06, "loss": 32.8561, "step": 14330 }, { "epoch": 341.21492537313435, "grad_norm": 15.42965030670166, "learning_rate": 9.968253968253969e-06, "loss": 34.1319, "step": 14331 }, { "epoch": 341.23880597014926, "grad_norm": 19.265260696411133, "learning_rate": 9.967592592592594e-06, "loss": 33.0066, "step": 14332 }, { "epoch": 341.26268656716417, "grad_norm": 15.428890228271484, "learning_rate": 9.966931216931219e-06, "loss": 30.7677, "step": 14333 }, { "epoch": 341.28656716417913, "grad_norm": 17.121864318847656, "learning_rate": 9.966269841269842e-06, "loss": 32.5364, "step": 14334 }, { "epoch": 341.31044776119404, "grad_norm": 17.536283493041992, "learning_rate": 9.965608465608467e-06, "loss": 34.0252, "step": 14335 }, { "epoch": 341.33432835820895, "grad_norm": 18.656221389770508, "learning_rate": 9.964947089947091e-06, "loss": 33.532, "step": 14336 }, { "epoch": 341.35820895522386, "grad_norm": 18.07448387145996, "learning_rate": 9.964285714285714e-06, "loss": 32.296, "step": 14337 }, { "epoch": 341.3820895522388, "grad_norm": 15.17371654510498, "learning_rate": 9.96362433862434e-06, "loss": 33.5718, "step": 14338 }, { "epoch": 341.40597014925373, "grad_norm": 21.652860641479492, "learning_rate": 9.962962962962964e-06, "loss": 33.834, "step": 14339 }, { "epoch": 341.42985074626864, "grad_norm": 20.939512252807617, "learning_rate": 9.962301587301589e-06, "loss": 31.4863, "step": 14340 }, { "epoch": 341.4537313432836, "grad_norm": 16.739261627197266, "learning_rate": 9.961640211640212e-06, "loss": 31.4979, "step": 14341 }, { "epoch": 341.4776119402985, "grad_norm": 15.421792984008789, "learning_rate": 9.960978835978837e-06, "loss": 33.6466, "step": 14342 }, { "epoch": 341.5014925373134, "grad_norm": 17.82432746887207, "learning_rate": 9.960317460317462e-06, "loss": 33.2464, "step": 14343 }, { "epoch": 341.52537313432833, "grad_norm": 20.29499053955078, "learning_rate": 9.959656084656085e-06, "loss": 32.4114, "step": 14344 }, { "epoch": 341.5492537313433, "grad_norm": 16.78494644165039, "learning_rate": 9.958994708994711e-06, "loss": 32.0153, "step": 14345 }, { "epoch": 341.5731343283582, "grad_norm": 17.644960403442383, "learning_rate": 9.958333333333334e-06, "loss": 32.7187, "step": 14346 }, { "epoch": 341.5970149253731, "grad_norm": 14.848749160766602, "learning_rate": 9.957671957671959e-06, "loss": 33.4641, "step": 14347 }, { "epoch": 341.6208955223881, "grad_norm": 16.474485397338867, "learning_rate": 9.957010582010584e-06, "loss": 33.8312, "step": 14348 }, { "epoch": 341.644776119403, "grad_norm": 18.354982376098633, "learning_rate": 9.956349206349207e-06, "loss": 33.069, "step": 14349 }, { "epoch": 341.6686567164179, "grad_norm": 15.309164047241211, "learning_rate": 9.955687830687832e-06, "loss": 31.9663, "step": 14350 }, { "epoch": 341.6925373134328, "grad_norm": 16.006591796875, "learning_rate": 9.955026455026457e-06, "loss": 32.8982, "step": 14351 }, { "epoch": 341.7164179104478, "grad_norm": 16.061914443969727, "learning_rate": 9.95436507936508e-06, "loss": 32.4113, "step": 14352 }, { "epoch": 341.7402985074627, "grad_norm": 22.423648834228516, "learning_rate": 9.953703703703704e-06, "loss": 33.2202, "step": 14353 }, { "epoch": 341.7641791044776, "grad_norm": 15.490160942077637, "learning_rate": 9.95304232804233e-06, "loss": 33.1065, "step": 14354 }, { "epoch": 341.78805970149256, "grad_norm": 18.584867477416992, "learning_rate": 9.952380952380954e-06, "loss": 34.0665, "step": 14355 }, { "epoch": 341.81194029850747, "grad_norm": 16.091171264648438, "learning_rate": 9.951719576719577e-06, "loss": 33.7751, "step": 14356 }, { "epoch": 341.8358208955224, "grad_norm": 19.1419734954834, "learning_rate": 9.951058201058202e-06, "loss": 32.8372, "step": 14357 }, { "epoch": 341.85970149253734, "grad_norm": 16.991975784301758, "learning_rate": 9.950396825396827e-06, "loss": 32.4311, "step": 14358 }, { "epoch": 341.88358208955225, "grad_norm": 21.18120765686035, "learning_rate": 9.94973544973545e-06, "loss": 33.2953, "step": 14359 }, { "epoch": 341.90746268656716, "grad_norm": 19.203752517700195, "learning_rate": 9.949074074074075e-06, "loss": 32.4372, "step": 14360 }, { "epoch": 341.93134328358207, "grad_norm": 19.6485595703125, "learning_rate": 9.9484126984127e-06, "loss": 32.4662, "step": 14361 }, { "epoch": 341.95522388059703, "grad_norm": 15.468148231506348, "learning_rate": 9.947751322751323e-06, "loss": 32.9375, "step": 14362 }, { "epoch": 341.97910447761194, "grad_norm": 15.480298042297363, "learning_rate": 9.947089947089947e-06, "loss": 32.8921, "step": 14363 }, { "epoch": 342.0, "grad_norm": 13.073140144348145, "learning_rate": 9.946428571428572e-06, "loss": 27.3888, "step": 14364 }, { "epoch": 342.0238805970149, "grad_norm": 21.75962257385254, "learning_rate": 9.945767195767197e-06, "loss": 33.6421, "step": 14365 }, { "epoch": 342.0477611940299, "grad_norm": 18.32163429260254, "learning_rate": 9.94510582010582e-06, "loss": 32.984, "step": 14366 }, { "epoch": 342.0716417910448, "grad_norm": 13.932046890258789, "learning_rate": 9.944444444444445e-06, "loss": 32.6151, "step": 14367 }, { "epoch": 342.0955223880597, "grad_norm": 26.55956268310547, "learning_rate": 9.94378306878307e-06, "loss": 32.9738, "step": 14368 }, { "epoch": 342.1194029850746, "grad_norm": 22.966833114624023, "learning_rate": 9.943121693121693e-06, "loss": 32.3167, "step": 14369 }, { "epoch": 342.14328358208957, "grad_norm": 16.46741485595703, "learning_rate": 9.94246031746032e-06, "loss": 32.2897, "step": 14370 }, { "epoch": 342.1671641791045, "grad_norm": 22.425783157348633, "learning_rate": 9.941798941798942e-06, "loss": 32.5772, "step": 14371 }, { "epoch": 342.1910447761194, "grad_norm": 18.857101440429688, "learning_rate": 9.941137566137567e-06, "loss": 33.0011, "step": 14372 }, { "epoch": 342.21492537313435, "grad_norm": 19.67682456970215, "learning_rate": 9.940476190476192e-06, "loss": 32.5695, "step": 14373 }, { "epoch": 342.23880597014926, "grad_norm": 20.178865432739258, "learning_rate": 9.939814814814815e-06, "loss": 33.4321, "step": 14374 }, { "epoch": 342.26268656716417, "grad_norm": 18.824743270874023, "learning_rate": 9.93915343915344e-06, "loss": 32.723, "step": 14375 }, { "epoch": 342.28656716417913, "grad_norm": 18.322490692138672, "learning_rate": 9.938492063492065e-06, "loss": 31.9804, "step": 14376 }, { "epoch": 342.31044776119404, "grad_norm": 14.578727722167969, "learning_rate": 9.937830687830688e-06, "loss": 32.5729, "step": 14377 }, { "epoch": 342.33432835820895, "grad_norm": 24.338285446166992, "learning_rate": 9.937169312169313e-06, "loss": 33.423, "step": 14378 }, { "epoch": 342.35820895522386, "grad_norm": 19.33673858642578, "learning_rate": 9.936507936507937e-06, "loss": 33.3762, "step": 14379 }, { "epoch": 342.3820895522388, "grad_norm": 18.73155975341797, "learning_rate": 9.935846560846562e-06, "loss": 32.4231, "step": 14380 }, { "epoch": 342.40597014925373, "grad_norm": 22.124692916870117, "learning_rate": 9.935185185185185e-06, "loss": 34.2178, "step": 14381 }, { "epoch": 342.42985074626864, "grad_norm": 14.40739631652832, "learning_rate": 9.93452380952381e-06, "loss": 34.4726, "step": 14382 }, { "epoch": 342.4537313432836, "grad_norm": 24.34845733642578, "learning_rate": 9.933862433862435e-06, "loss": 33.1978, "step": 14383 }, { "epoch": 342.4776119402985, "grad_norm": 21.977155685424805, "learning_rate": 9.933201058201058e-06, "loss": 33.5581, "step": 14384 }, { "epoch": 342.5014925373134, "grad_norm": 20.774227142333984, "learning_rate": 9.932539682539684e-06, "loss": 33.1747, "step": 14385 }, { "epoch": 342.52537313432833, "grad_norm": 18.0212345123291, "learning_rate": 9.931878306878308e-06, "loss": 33.0015, "step": 14386 }, { "epoch": 342.5492537313433, "grad_norm": 23.497987747192383, "learning_rate": 9.931216931216932e-06, "loss": 33.0074, "step": 14387 }, { "epoch": 342.5731343283582, "grad_norm": 21.486970901489258, "learning_rate": 9.930555555555557e-06, "loss": 33.8315, "step": 14388 }, { "epoch": 342.5970149253731, "grad_norm": 16.688438415527344, "learning_rate": 9.92989417989418e-06, "loss": 33.0534, "step": 14389 }, { "epoch": 342.6208955223881, "grad_norm": 20.908672332763672, "learning_rate": 9.929232804232805e-06, "loss": 32.9924, "step": 14390 }, { "epoch": 342.644776119403, "grad_norm": 18.626937866210938, "learning_rate": 9.92857142857143e-06, "loss": 33.0165, "step": 14391 }, { "epoch": 342.6686567164179, "grad_norm": 19.471647262573242, "learning_rate": 9.927910052910053e-06, "loss": 32.2877, "step": 14392 }, { "epoch": 342.6925373134328, "grad_norm": 18.289623260498047, "learning_rate": 9.927248677248678e-06, "loss": 30.9186, "step": 14393 }, { "epoch": 342.7164179104478, "grad_norm": 15.482297897338867, "learning_rate": 9.926587301587303e-06, "loss": 32.0131, "step": 14394 }, { "epoch": 342.7402985074627, "grad_norm": 17.17262840270996, "learning_rate": 9.925925925925927e-06, "loss": 32.9729, "step": 14395 }, { "epoch": 342.7641791044776, "grad_norm": 16.10808753967285, "learning_rate": 9.92526455026455e-06, "loss": 32.9315, "step": 14396 }, { "epoch": 342.78805970149256, "grad_norm": 17.409530639648438, "learning_rate": 9.924603174603175e-06, "loss": 32.7374, "step": 14397 }, { "epoch": 342.81194029850747, "grad_norm": 15.100672721862793, "learning_rate": 9.9239417989418e-06, "loss": 31.1945, "step": 14398 }, { "epoch": 342.8358208955224, "grad_norm": 19.96903419494629, "learning_rate": 9.923280423280423e-06, "loss": 31.4145, "step": 14399 }, { "epoch": 342.85970149253734, "grad_norm": 19.02230453491211, "learning_rate": 9.922619047619048e-06, "loss": 33.2449, "step": 14400 }, { "epoch": 342.88358208955225, "grad_norm": 16.819826126098633, "learning_rate": 9.921957671957673e-06, "loss": 33.9047, "step": 14401 }, { "epoch": 342.90746268656716, "grad_norm": 14.629315376281738, "learning_rate": 9.921296296296296e-06, "loss": 32.331, "step": 14402 }, { "epoch": 342.93134328358207, "grad_norm": 15.982880592346191, "learning_rate": 9.920634920634922e-06, "loss": 32.6046, "step": 14403 }, { "epoch": 342.95522388059703, "grad_norm": 14.962193489074707, "learning_rate": 9.919973544973545e-06, "loss": 32.3762, "step": 14404 }, { "epoch": 342.97910447761194, "grad_norm": 22.48440170288086, "learning_rate": 9.91931216931217e-06, "loss": 32.403, "step": 14405 }, { "epoch": 343.0, "grad_norm": 15.533278465270996, "learning_rate": 9.918650793650795e-06, "loss": 27.591, "step": 14406 }, { "epoch": 343.0238805970149, "grad_norm": 15.413907051086426, "learning_rate": 9.917989417989418e-06, "loss": 33.8138, "step": 14407 }, { "epoch": 343.0477611940299, "grad_norm": 19.02525520324707, "learning_rate": 9.917328042328043e-06, "loss": 32.8945, "step": 14408 }, { "epoch": 343.0716417910448, "grad_norm": 16.191198348999023, "learning_rate": 9.916666666666668e-06, "loss": 32.4773, "step": 14409 }, { "epoch": 343.0955223880597, "grad_norm": 17.758012771606445, "learning_rate": 9.916005291005293e-06, "loss": 32.2462, "step": 14410 }, { "epoch": 343.1194029850746, "grad_norm": 16.209293365478516, "learning_rate": 9.915343915343916e-06, "loss": 33.1102, "step": 14411 }, { "epoch": 343.14328358208957, "grad_norm": 17.826519012451172, "learning_rate": 9.91468253968254e-06, "loss": 32.0688, "step": 14412 }, { "epoch": 343.1671641791045, "grad_norm": 14.85556697845459, "learning_rate": 9.914021164021165e-06, "loss": 31.965, "step": 14413 }, { "epoch": 343.1910447761194, "grad_norm": 17.07773780822754, "learning_rate": 9.913359788359788e-06, "loss": 32.7075, "step": 14414 }, { "epoch": 343.21492537313435, "grad_norm": 13.941993713378906, "learning_rate": 9.912698412698413e-06, "loss": 31.4292, "step": 14415 }, { "epoch": 343.23880597014926, "grad_norm": 16.280799865722656, "learning_rate": 9.912037037037038e-06, "loss": 32.8106, "step": 14416 }, { "epoch": 343.26268656716417, "grad_norm": 14.638901710510254, "learning_rate": 9.911375661375661e-06, "loss": 32.9818, "step": 14417 }, { "epoch": 343.28656716417913, "grad_norm": 16.58753204345703, "learning_rate": 9.910714285714288e-06, "loss": 32.6556, "step": 14418 }, { "epoch": 343.31044776119404, "grad_norm": 15.208233833312988, "learning_rate": 9.91005291005291e-06, "loss": 33.0974, "step": 14419 }, { "epoch": 343.33432835820895, "grad_norm": 16.727413177490234, "learning_rate": 9.909391534391535e-06, "loss": 31.9921, "step": 14420 }, { "epoch": 343.35820895522386, "grad_norm": 14.190279006958008, "learning_rate": 9.90873015873016e-06, "loss": 32.3217, "step": 14421 }, { "epoch": 343.3820895522388, "grad_norm": 17.157922744750977, "learning_rate": 9.908068783068783e-06, "loss": 32.5896, "step": 14422 }, { "epoch": 343.40597014925373, "grad_norm": 16.291887283325195, "learning_rate": 9.907407407407408e-06, "loss": 32.3837, "step": 14423 }, { "epoch": 343.42985074626864, "grad_norm": 18.021459579467773, "learning_rate": 9.906746031746033e-06, "loss": 31.1202, "step": 14424 }, { "epoch": 343.4537313432836, "grad_norm": 19.96455955505371, "learning_rate": 9.906084656084658e-06, "loss": 32.5496, "step": 14425 }, { "epoch": 343.4776119402985, "grad_norm": 18.93027687072754, "learning_rate": 9.90542328042328e-06, "loss": 33.0246, "step": 14426 }, { "epoch": 343.5014925373134, "grad_norm": 18.157634735107422, "learning_rate": 9.904761904761906e-06, "loss": 31.5134, "step": 14427 }, { "epoch": 343.52537313432833, "grad_norm": 18.85066032409668, "learning_rate": 9.90410052910053e-06, "loss": 33.0202, "step": 14428 }, { "epoch": 343.5492537313433, "grad_norm": 19.623821258544922, "learning_rate": 9.903439153439154e-06, "loss": 32.3831, "step": 14429 }, { "epoch": 343.5731343283582, "grad_norm": 18.22905731201172, "learning_rate": 9.902777777777778e-06, "loss": 33.3364, "step": 14430 }, { "epoch": 343.5970149253731, "grad_norm": 17.250316619873047, "learning_rate": 9.902116402116403e-06, "loss": 31.8466, "step": 14431 }, { "epoch": 343.6208955223881, "grad_norm": 16.9425048828125, "learning_rate": 9.901455026455026e-06, "loss": 32.9163, "step": 14432 }, { "epoch": 343.644776119403, "grad_norm": 13.741183280944824, "learning_rate": 9.900793650793653e-06, "loss": 33.4662, "step": 14433 }, { "epoch": 343.6686567164179, "grad_norm": 15.247157096862793, "learning_rate": 9.900132275132276e-06, "loss": 33.4205, "step": 14434 }, { "epoch": 343.6925373134328, "grad_norm": 13.897851943969727, "learning_rate": 9.8994708994709e-06, "loss": 33.4061, "step": 14435 }, { "epoch": 343.7164179104478, "grad_norm": 16.523433685302734, "learning_rate": 9.898809523809525e-06, "loss": 32.8969, "step": 14436 }, { "epoch": 343.7402985074627, "grad_norm": 16.474576950073242, "learning_rate": 9.898148148148148e-06, "loss": 31.8715, "step": 14437 }, { "epoch": 343.7641791044776, "grad_norm": 21.49550437927246, "learning_rate": 9.897486772486773e-06, "loss": 34.0384, "step": 14438 }, { "epoch": 343.78805970149256, "grad_norm": 21.20180320739746, "learning_rate": 9.896825396825398e-06, "loss": 34.3682, "step": 14439 }, { "epoch": 343.81194029850747, "grad_norm": 14.964547157287598, "learning_rate": 9.896164021164021e-06, "loss": 32.9994, "step": 14440 }, { "epoch": 343.8358208955224, "grad_norm": 14.992241859436035, "learning_rate": 9.895502645502646e-06, "loss": 33.961, "step": 14441 }, { "epoch": 343.85970149253734, "grad_norm": 15.811175346374512, "learning_rate": 9.89484126984127e-06, "loss": 33.093, "step": 14442 }, { "epoch": 343.88358208955225, "grad_norm": 24.445816040039062, "learning_rate": 9.894179894179896e-06, "loss": 33.3162, "step": 14443 }, { "epoch": 343.90746268656716, "grad_norm": 16.5667781829834, "learning_rate": 9.893518518518519e-06, "loss": 32.5101, "step": 14444 }, { "epoch": 343.93134328358207, "grad_norm": 18.14990997314453, "learning_rate": 9.892857142857143e-06, "loss": 32.4721, "step": 14445 }, { "epoch": 343.95522388059703, "grad_norm": 18.30742645263672, "learning_rate": 9.892195767195768e-06, "loss": 33.2712, "step": 14446 }, { "epoch": 343.97910447761194, "grad_norm": 24.58142852783203, "learning_rate": 9.891534391534391e-06, "loss": 32.5449, "step": 14447 }, { "epoch": 344.0, "grad_norm": 19.144222259521484, "learning_rate": 9.890873015873018e-06, "loss": 29.1607, "step": 14448 }, { "epoch": 344.0238805970149, "grad_norm": 17.23626708984375, "learning_rate": 9.890211640211641e-06, "loss": 33.7711, "step": 14449 }, { "epoch": 344.0477611940299, "grad_norm": 23.77984046936035, "learning_rate": 9.889550264550266e-06, "loss": 32.5528, "step": 14450 }, { "epoch": 344.0716417910448, "grad_norm": 21.655672073364258, "learning_rate": 9.88888888888889e-06, "loss": 32.9569, "step": 14451 }, { "epoch": 344.0955223880597, "grad_norm": 18.255815505981445, "learning_rate": 9.888227513227514e-06, "loss": 32.7276, "step": 14452 }, { "epoch": 344.1194029850746, "grad_norm": 17.8723201751709, "learning_rate": 9.887566137566138e-06, "loss": 32.9329, "step": 14453 }, { "epoch": 344.14328358208957, "grad_norm": 25.975536346435547, "learning_rate": 9.886904761904763e-06, "loss": 32.0884, "step": 14454 }, { "epoch": 344.1671641791045, "grad_norm": 17.716848373413086, "learning_rate": 9.886243386243386e-06, "loss": 31.6352, "step": 14455 }, { "epoch": 344.1910447761194, "grad_norm": 25.284719467163086, "learning_rate": 9.885582010582011e-06, "loss": 32.7679, "step": 14456 }, { "epoch": 344.21492537313435, "grad_norm": 19.202789306640625, "learning_rate": 9.884920634920636e-06, "loss": 32.5243, "step": 14457 }, { "epoch": 344.23880597014926, "grad_norm": 26.566465377807617, "learning_rate": 9.88425925925926e-06, "loss": 33.6448, "step": 14458 }, { "epoch": 344.26268656716417, "grad_norm": 20.592832565307617, "learning_rate": 9.883597883597884e-06, "loss": 34.2431, "step": 14459 }, { "epoch": 344.28656716417913, "grad_norm": 26.993072509765625, "learning_rate": 9.882936507936509e-06, "loss": 32.3125, "step": 14460 }, { "epoch": 344.31044776119404, "grad_norm": 24.073251724243164, "learning_rate": 9.882275132275133e-06, "loss": 32.8236, "step": 14461 }, { "epoch": 344.33432835820895, "grad_norm": 24.742605209350586, "learning_rate": 9.881613756613757e-06, "loss": 32.8045, "step": 14462 }, { "epoch": 344.35820895522386, "grad_norm": 22.220346450805664, "learning_rate": 9.880952380952381e-06, "loss": 32.2585, "step": 14463 }, { "epoch": 344.3820895522388, "grad_norm": 22.460216522216797, "learning_rate": 9.880291005291006e-06, "loss": 32.2342, "step": 14464 }, { "epoch": 344.40597014925373, "grad_norm": 19.556997299194336, "learning_rate": 9.87962962962963e-06, "loss": 31.6273, "step": 14465 }, { "epoch": 344.42985074626864, "grad_norm": 21.854801177978516, "learning_rate": 9.878968253968256e-06, "loss": 32.9147, "step": 14466 }, { "epoch": 344.4537313432836, "grad_norm": 18.510866165161133, "learning_rate": 9.878306878306879e-06, "loss": 32.0147, "step": 14467 }, { "epoch": 344.4776119402985, "grad_norm": 18.42432975769043, "learning_rate": 9.877645502645504e-06, "loss": 31.8964, "step": 14468 }, { "epoch": 344.5014925373134, "grad_norm": 18.292217254638672, "learning_rate": 9.876984126984128e-06, "loss": 33.6321, "step": 14469 }, { "epoch": 344.52537313432833, "grad_norm": 18.253793716430664, "learning_rate": 9.876322751322752e-06, "loss": 33.6365, "step": 14470 }, { "epoch": 344.5492537313433, "grad_norm": 20.801607131958008, "learning_rate": 9.875661375661376e-06, "loss": 33.5187, "step": 14471 }, { "epoch": 344.5731343283582, "grad_norm": 15.532673835754395, "learning_rate": 9.875000000000001e-06, "loss": 31.9724, "step": 14472 }, { "epoch": 344.5970149253731, "grad_norm": 22.409029006958008, "learning_rate": 9.874338624338626e-06, "loss": 32.0922, "step": 14473 }, { "epoch": 344.6208955223881, "grad_norm": 15.50053596496582, "learning_rate": 9.873677248677249e-06, "loss": 31.9287, "step": 14474 }, { "epoch": 344.644776119403, "grad_norm": 22.280168533325195, "learning_rate": 9.873015873015874e-06, "loss": 33.626, "step": 14475 }, { "epoch": 344.6686567164179, "grad_norm": 18.608139038085938, "learning_rate": 9.872354497354499e-06, "loss": 32.9671, "step": 14476 }, { "epoch": 344.6925373134328, "grad_norm": 22.933162689208984, "learning_rate": 9.871693121693122e-06, "loss": 33.3424, "step": 14477 }, { "epoch": 344.7164179104478, "grad_norm": 19.895978927612305, "learning_rate": 9.871031746031747e-06, "loss": 31.7091, "step": 14478 }, { "epoch": 344.7402985074627, "grad_norm": 19.396108627319336, "learning_rate": 9.870370370370371e-06, "loss": 32.6533, "step": 14479 }, { "epoch": 344.7641791044776, "grad_norm": 19.97950553894043, "learning_rate": 9.869708994708994e-06, "loss": 32.891, "step": 14480 }, { "epoch": 344.78805970149256, "grad_norm": 19.812124252319336, "learning_rate": 9.869047619047621e-06, "loss": 32.8538, "step": 14481 }, { "epoch": 344.81194029850747, "grad_norm": 21.236356735229492, "learning_rate": 9.868386243386244e-06, "loss": 31.6757, "step": 14482 }, { "epoch": 344.8358208955224, "grad_norm": 21.389366149902344, "learning_rate": 9.867724867724869e-06, "loss": 32.5475, "step": 14483 }, { "epoch": 344.85970149253734, "grad_norm": 19.750301361083984, "learning_rate": 9.867063492063494e-06, "loss": 33.2045, "step": 14484 }, { "epoch": 344.88358208955225, "grad_norm": 20.80890655517578, "learning_rate": 9.866402116402117e-06, "loss": 32.949, "step": 14485 }, { "epoch": 344.90746268656716, "grad_norm": 16.11481285095215, "learning_rate": 9.865740740740742e-06, "loss": 33.3379, "step": 14486 }, { "epoch": 344.93134328358207, "grad_norm": 23.29161262512207, "learning_rate": 9.865079365079366e-06, "loss": 33.3099, "step": 14487 }, { "epoch": 344.95522388059703, "grad_norm": 18.72956657409668, "learning_rate": 9.864417989417991e-06, "loss": 32.5956, "step": 14488 }, { "epoch": 344.97910447761194, "grad_norm": 16.80988883972168, "learning_rate": 9.863756613756614e-06, "loss": 33.2447, "step": 14489 }, { "epoch": 345.0, "grad_norm": 13.624998092651367, "learning_rate": 9.863095238095239e-06, "loss": 27.8146, "step": 14490 }, { "epoch": 345.0238805970149, "grad_norm": 17.24331283569336, "learning_rate": 9.862433862433864e-06, "loss": 31.8899, "step": 14491 }, { "epoch": 345.0477611940299, "grad_norm": 18.86675262451172, "learning_rate": 9.861772486772487e-06, "loss": 33.113, "step": 14492 }, { "epoch": 345.0716417910448, "grad_norm": 16.538835525512695, "learning_rate": 9.861111111111112e-06, "loss": 32.6275, "step": 14493 }, { "epoch": 345.0955223880597, "grad_norm": 18.611021041870117, "learning_rate": 9.860449735449737e-06, "loss": 33.4015, "step": 14494 }, { "epoch": 345.1194029850746, "grad_norm": 15.672499656677246, "learning_rate": 9.85978835978836e-06, "loss": 32.9165, "step": 14495 }, { "epoch": 345.14328358208957, "grad_norm": 17.5117130279541, "learning_rate": 9.859126984126986e-06, "loss": 33.469, "step": 14496 }, { "epoch": 345.1671641791045, "grad_norm": 15.414146423339844, "learning_rate": 9.85846560846561e-06, "loss": 32.4254, "step": 14497 }, { "epoch": 345.1910447761194, "grad_norm": 24.0014591217041, "learning_rate": 9.857804232804234e-06, "loss": 32.4304, "step": 14498 }, { "epoch": 345.21492537313435, "grad_norm": 17.462278366088867, "learning_rate": 9.857142857142859e-06, "loss": 33.4874, "step": 14499 }, { "epoch": 345.23880597014926, "grad_norm": 15.571789741516113, "learning_rate": 9.856481481481482e-06, "loss": 32.2926, "step": 14500 }, { "epoch": 345.26268656716417, "grad_norm": 21.77423858642578, "learning_rate": 9.855820105820107e-06, "loss": 33.1389, "step": 14501 }, { "epoch": 345.28656716417913, "grad_norm": 18.773069381713867, "learning_rate": 9.855158730158732e-06, "loss": 32.1091, "step": 14502 }, { "epoch": 345.31044776119404, "grad_norm": 16.718849182128906, "learning_rate": 9.854497354497355e-06, "loss": 33.1977, "step": 14503 }, { "epoch": 345.33432835820895, "grad_norm": 16.68386459350586, "learning_rate": 9.85383597883598e-06, "loss": 32.0855, "step": 14504 }, { "epoch": 345.35820895522386, "grad_norm": 14.361015319824219, "learning_rate": 9.853174603174604e-06, "loss": 33.9809, "step": 14505 }, { "epoch": 345.3820895522388, "grad_norm": 16.64942169189453, "learning_rate": 9.852513227513229e-06, "loss": 32.5625, "step": 14506 }, { "epoch": 345.40597014925373, "grad_norm": 18.994455337524414, "learning_rate": 9.851851851851852e-06, "loss": 31.9444, "step": 14507 }, { "epoch": 345.42985074626864, "grad_norm": 21.159523010253906, "learning_rate": 9.851190476190477e-06, "loss": 32.7126, "step": 14508 }, { "epoch": 345.4537313432836, "grad_norm": 20.6182861328125, "learning_rate": 9.850529100529102e-06, "loss": 31.1335, "step": 14509 }, { "epoch": 345.4776119402985, "grad_norm": 16.21684455871582, "learning_rate": 9.849867724867725e-06, "loss": 33.4699, "step": 14510 }, { "epoch": 345.5014925373134, "grad_norm": 19.0106258392334, "learning_rate": 9.849206349206351e-06, "loss": 32.6018, "step": 14511 }, { "epoch": 345.52537313432833, "grad_norm": 16.533052444458008, "learning_rate": 9.848544973544974e-06, "loss": 32.9582, "step": 14512 }, { "epoch": 345.5492537313433, "grad_norm": 16.521297454833984, "learning_rate": 9.8478835978836e-06, "loss": 32.0621, "step": 14513 }, { "epoch": 345.5731343283582, "grad_norm": 17.655977249145508, "learning_rate": 9.847222222222224e-06, "loss": 32.5079, "step": 14514 }, { "epoch": 345.5970149253731, "grad_norm": 17.147764205932617, "learning_rate": 9.846560846560847e-06, "loss": 32.5903, "step": 14515 }, { "epoch": 345.6208955223881, "grad_norm": 15.925622940063477, "learning_rate": 9.845899470899472e-06, "loss": 31.5798, "step": 14516 }, { "epoch": 345.644776119403, "grad_norm": 15.333617210388184, "learning_rate": 9.845238095238097e-06, "loss": 33.0217, "step": 14517 }, { "epoch": 345.6686567164179, "grad_norm": 15.204148292541504, "learning_rate": 9.84457671957672e-06, "loss": 31.8359, "step": 14518 }, { "epoch": 345.6925373134328, "grad_norm": 14.474899291992188, "learning_rate": 9.843915343915345e-06, "loss": 33.1772, "step": 14519 }, { "epoch": 345.7164179104478, "grad_norm": 18.257648468017578, "learning_rate": 9.843253968253968e-06, "loss": 32.8769, "step": 14520 }, { "epoch": 345.7402985074627, "grad_norm": 23.30188751220703, "learning_rate": 9.842592592592594e-06, "loss": 31.956, "step": 14521 }, { "epoch": 345.7641791044776, "grad_norm": 17.30249786376953, "learning_rate": 9.841931216931217e-06, "loss": 33.4029, "step": 14522 }, { "epoch": 345.78805970149256, "grad_norm": 15.513245582580566, "learning_rate": 9.841269841269842e-06, "loss": 32.5606, "step": 14523 }, { "epoch": 345.81194029850747, "grad_norm": 26.08751678466797, "learning_rate": 9.840608465608467e-06, "loss": 32.9169, "step": 14524 }, { "epoch": 345.8358208955224, "grad_norm": NaN, "learning_rate": 9.83994708994709e-06, "loss": 33.0905, "step": 14525 }, { "epoch": 345.85970149253734, "grad_norm": 20.650205612182617, "learning_rate": 9.83994708994709e-06, "loss": 33.4088, "step": 14526 }, { "epoch": 345.88358208955225, "grad_norm": 17.080671310424805, "learning_rate": 9.839285714285715e-06, "loss": 32.482, "step": 14527 }, { "epoch": 345.90746268656716, "grad_norm": 19.794126510620117, "learning_rate": 9.83862433862434e-06, "loss": 33.2448, "step": 14528 }, { "epoch": 345.93134328358207, "grad_norm": 24.577285766601562, "learning_rate": 9.837962962962964e-06, "loss": 33.5371, "step": 14529 }, { "epoch": 345.95522388059703, "grad_norm": 16.948318481445312, "learning_rate": 9.837301587301588e-06, "loss": 32.265, "step": 14530 }, { "epoch": 345.97910447761194, "grad_norm": 17.720129013061523, "learning_rate": 9.836640211640212e-06, "loss": 33.6891, "step": 14531 }, { "epoch": 346.0, "grad_norm": 18.448644638061523, "learning_rate": 9.835978835978837e-06, "loss": 27.1259, "step": 14532 }, { "epoch": 346.0238805970149, "grad_norm": 20.12361717224121, "learning_rate": 9.83531746031746e-06, "loss": 32.4664, "step": 14533 }, { "epoch": 346.0477611940299, "grad_norm": 16.14518165588379, "learning_rate": 9.834656084656085e-06, "loss": 32.8733, "step": 14534 }, { "epoch": 346.0716417910448, "grad_norm": 17.641845703125, "learning_rate": 9.83399470899471e-06, "loss": 32.8976, "step": 14535 }, { "epoch": 346.0955223880597, "grad_norm": 18.597990036010742, "learning_rate": 9.833333333333333e-06, "loss": 31.9901, "step": 14536 }, { "epoch": 346.1194029850746, "grad_norm": 21.50041961669922, "learning_rate": 9.83267195767196e-06, "loss": 33.8644, "step": 14537 }, { "epoch": 346.14328358208957, "grad_norm": 18.245433807373047, "learning_rate": 9.832010582010583e-06, "loss": 32.4087, "step": 14538 }, { "epoch": 346.1671641791045, "grad_norm": 14.784394264221191, "learning_rate": 9.831349206349207e-06, "loss": 33.6864, "step": 14539 }, { "epoch": 346.1910447761194, "grad_norm": 19.643970489501953, "learning_rate": 9.830687830687832e-06, "loss": 32.7441, "step": 14540 }, { "epoch": 346.21492537313435, "grad_norm": 21.35646629333496, "learning_rate": 9.830026455026455e-06, "loss": 33.2991, "step": 14541 }, { "epoch": 346.23880597014926, "grad_norm": 20.9975528717041, "learning_rate": 9.82936507936508e-06, "loss": 33.2202, "step": 14542 }, { "epoch": 346.26268656716417, "grad_norm": 15.574922561645508, "learning_rate": 9.828703703703705e-06, "loss": 33.7688, "step": 14543 }, { "epoch": 346.28656716417913, "grad_norm": 30.816774368286133, "learning_rate": 9.828042328042328e-06, "loss": 33.074, "step": 14544 }, { "epoch": 346.31044776119404, "grad_norm": 21.059690475463867, "learning_rate": 9.827380952380953e-06, "loss": 32.5879, "step": 14545 }, { "epoch": 346.33432835820895, "grad_norm": 21.59850311279297, "learning_rate": 9.826719576719578e-06, "loss": 31.9833, "step": 14546 }, { "epoch": 346.35820895522386, "grad_norm": 25.910024642944336, "learning_rate": 9.826058201058202e-06, "loss": 33.3459, "step": 14547 }, { "epoch": 346.3820895522388, "grad_norm": 17.163654327392578, "learning_rate": 9.825396825396825e-06, "loss": 32.7096, "step": 14548 }, { "epoch": 346.40597014925373, "grad_norm": 17.937894821166992, "learning_rate": 9.82473544973545e-06, "loss": 33.802, "step": 14549 }, { "epoch": 346.42985074626864, "grad_norm": 20.08127784729004, "learning_rate": 9.824074074074075e-06, "loss": 32.4472, "step": 14550 }, { "epoch": 346.4537313432836, "grad_norm": 16.959835052490234, "learning_rate": 9.823412698412698e-06, "loss": 31.187, "step": 14551 }, { "epoch": 346.4776119402985, "grad_norm": 17.028697967529297, "learning_rate": 9.822751322751325e-06, "loss": 32.1524, "step": 14552 }, { "epoch": 346.5014925373134, "grad_norm": 18.651256561279297, "learning_rate": 9.822089947089948e-06, "loss": 31.7408, "step": 14553 }, { "epoch": 346.52537313432833, "grad_norm": 26.050403594970703, "learning_rate": 9.821428571428573e-06, "loss": 33.4135, "step": 14554 }, { "epoch": 346.5492537313433, "grad_norm": 17.289928436279297, "learning_rate": 9.820767195767197e-06, "loss": 32.3145, "step": 14555 }, { "epoch": 346.5731343283582, "grad_norm": 17.333473205566406, "learning_rate": 9.82010582010582e-06, "loss": 33.268, "step": 14556 }, { "epoch": 346.5970149253731, "grad_norm": 24.8438720703125, "learning_rate": 9.819444444444445e-06, "loss": 31.7599, "step": 14557 }, { "epoch": 346.6208955223881, "grad_norm": 19.637678146362305, "learning_rate": 9.81878306878307e-06, "loss": 32.0226, "step": 14558 }, { "epoch": 346.644776119403, "grad_norm": 14.42353343963623, "learning_rate": 9.818121693121693e-06, "loss": 32.3305, "step": 14559 }, { "epoch": 346.6686567164179, "grad_norm": 17.578075408935547, "learning_rate": 9.817460317460318e-06, "loss": 32.8084, "step": 14560 }, { "epoch": 346.6925373134328, "grad_norm": 14.608057975769043, "learning_rate": 9.816798941798943e-06, "loss": 32.1249, "step": 14561 }, { "epoch": 346.7164179104478, "grad_norm": 20.10359001159668, "learning_rate": 9.816137566137567e-06, "loss": 33.1616, "step": 14562 }, { "epoch": 346.7402985074627, "grad_norm": 18.161693572998047, "learning_rate": 9.81547619047619e-06, "loss": 31.9219, "step": 14563 }, { "epoch": 346.7641791044776, "grad_norm": 19.661170959472656, "learning_rate": 9.814814814814815e-06, "loss": 32.9629, "step": 14564 }, { "epoch": 346.78805970149256, "grad_norm": 17.32904052734375, "learning_rate": 9.81415343915344e-06, "loss": 32.985, "step": 14565 }, { "epoch": 346.81194029850747, "grad_norm": 20.90936851501465, "learning_rate": 9.813492063492063e-06, "loss": 32.7361, "step": 14566 }, { "epoch": 346.8358208955224, "grad_norm": 21.710256576538086, "learning_rate": 9.812830687830688e-06, "loss": 32.5502, "step": 14567 }, { "epoch": 346.85970149253734, "grad_norm": 17.692102432250977, "learning_rate": 9.812169312169313e-06, "loss": 33.4243, "step": 14568 }, { "epoch": 346.88358208955225, "grad_norm": 16.292560577392578, "learning_rate": 9.811507936507938e-06, "loss": 32.7452, "step": 14569 }, { "epoch": 346.90746268656716, "grad_norm": 14.09742259979248, "learning_rate": 9.810846560846562e-06, "loss": 32.486, "step": 14570 }, { "epoch": 346.93134328358207, "grad_norm": 17.492095947265625, "learning_rate": 9.810185185185186e-06, "loss": 31.9111, "step": 14571 }, { "epoch": 346.95522388059703, "grad_norm": 16.279251098632812, "learning_rate": 9.80952380952381e-06, "loss": 31.7253, "step": 14572 }, { "epoch": 346.97910447761194, "grad_norm": 20.205720901489258, "learning_rate": 9.808862433862435e-06, "loss": 32.4854, "step": 14573 }, { "epoch": 347.0, "grad_norm": 14.61187744140625, "learning_rate": 9.808201058201058e-06, "loss": 28.3053, "step": 14574 }, { "epoch": 347.0238805970149, "grad_norm": 19.006540298461914, "learning_rate": 9.807539682539683e-06, "loss": 32.7437, "step": 14575 }, { "epoch": 347.0477611940299, "grad_norm": 22.644012451171875, "learning_rate": 9.806878306878308e-06, "loss": 32.8132, "step": 14576 }, { "epoch": 347.0716417910448, "grad_norm": 20.906108856201172, "learning_rate": 9.806216931216933e-06, "loss": 31.0339, "step": 14577 }, { "epoch": 347.0955223880597, "grad_norm": 16.691179275512695, "learning_rate": 9.805555555555556e-06, "loss": 33.0482, "step": 14578 }, { "epoch": 347.1194029850746, "grad_norm": 15.065079689025879, "learning_rate": 9.80489417989418e-06, "loss": 33.9182, "step": 14579 }, { "epoch": 347.14328358208957, "grad_norm": 22.79800033569336, "learning_rate": 9.804232804232805e-06, "loss": 31.8172, "step": 14580 }, { "epoch": 347.1671641791045, "grad_norm": 17.989540100097656, "learning_rate": 9.803571428571428e-06, "loss": 33.3956, "step": 14581 }, { "epoch": 347.1910447761194, "grad_norm": 17.740514755249023, "learning_rate": 9.802910052910053e-06, "loss": 32.1055, "step": 14582 }, { "epoch": 347.21492537313435, "grad_norm": 26.839365005493164, "learning_rate": 9.802248677248678e-06, "loss": 33.0859, "step": 14583 }, { "epoch": 347.23880597014926, "grad_norm": 18.93688201904297, "learning_rate": 9.801587301587301e-06, "loss": 32.4117, "step": 14584 }, { "epoch": 347.26268656716417, "grad_norm": 15.530482292175293, "learning_rate": 9.800925925925928e-06, "loss": 31.8495, "step": 14585 }, { "epoch": 347.28656716417913, "grad_norm": 21.48668670654297, "learning_rate": 9.80026455026455e-06, "loss": 32.381, "step": 14586 }, { "epoch": 347.31044776119404, "grad_norm": 20.40810203552246, "learning_rate": 9.799603174603176e-06, "loss": 32.6337, "step": 14587 }, { "epoch": 347.33432835820895, "grad_norm": 16.721519470214844, "learning_rate": 9.7989417989418e-06, "loss": 32.8879, "step": 14588 }, { "epoch": 347.35820895522386, "grad_norm": 16.888235092163086, "learning_rate": 9.798280423280423e-06, "loss": 32.7801, "step": 14589 }, { "epoch": 347.3820895522388, "grad_norm": 17.261720657348633, "learning_rate": 9.797619047619048e-06, "loss": 33.2261, "step": 14590 }, { "epoch": 347.40597014925373, "grad_norm": 19.447309494018555, "learning_rate": 9.796957671957673e-06, "loss": 32.6286, "step": 14591 }, { "epoch": 347.42985074626864, "grad_norm": 15.644414901733398, "learning_rate": 9.796296296296298e-06, "loss": 32.3827, "step": 14592 }, { "epoch": 347.4537313432836, "grad_norm": 16.57158660888672, "learning_rate": 9.795634920634921e-06, "loss": 33.821, "step": 14593 }, { "epoch": 347.4776119402985, "grad_norm": 14.660304069519043, "learning_rate": 9.794973544973546e-06, "loss": 32.3864, "step": 14594 }, { "epoch": 347.5014925373134, "grad_norm": 16.841506958007812, "learning_rate": 9.79431216931217e-06, "loss": 32.6457, "step": 14595 }, { "epoch": 347.52537313432833, "grad_norm": 18.000137329101562, "learning_rate": 9.793650793650794e-06, "loss": 32.2096, "step": 14596 }, { "epoch": 347.5492537313433, "grad_norm": 19.90860939025879, "learning_rate": 9.792989417989418e-06, "loss": 33.2074, "step": 14597 }, { "epoch": 347.5731343283582, "grad_norm": 14.733869552612305, "learning_rate": 9.792328042328043e-06, "loss": 33.3162, "step": 14598 }, { "epoch": 347.5970149253731, "grad_norm": 16.11147689819336, "learning_rate": 9.791666666666666e-06, "loss": 33.028, "step": 14599 }, { "epoch": 347.6208955223881, "grad_norm": 16.726499557495117, "learning_rate": 9.791005291005293e-06, "loss": 31.7532, "step": 14600 }, { "epoch": 347.644776119403, "grad_norm": 15.86723804473877, "learning_rate": 9.790343915343916e-06, "loss": 31.5954, "step": 14601 }, { "epoch": 347.6686567164179, "grad_norm": 23.73784828186035, "learning_rate": 9.78968253968254e-06, "loss": 32.2351, "step": 14602 }, { "epoch": 347.6925373134328, "grad_norm": 18.645854949951172, "learning_rate": 9.789021164021166e-06, "loss": 32.3827, "step": 14603 }, { "epoch": 347.7164179104478, "grad_norm": 16.774049758911133, "learning_rate": 9.788359788359789e-06, "loss": 33.8348, "step": 14604 }, { "epoch": 347.7402985074627, "grad_norm": 18.117555618286133, "learning_rate": 9.787698412698413e-06, "loss": 31.7104, "step": 14605 }, { "epoch": 347.7641791044776, "grad_norm": 17.258329391479492, "learning_rate": 9.787037037037038e-06, "loss": 32.4091, "step": 14606 }, { "epoch": 347.78805970149256, "grad_norm": 19.49449920654297, "learning_rate": 9.786375661375661e-06, "loss": 33.4336, "step": 14607 }, { "epoch": 347.81194029850747, "grad_norm": 18.486204147338867, "learning_rate": 9.785714285714286e-06, "loss": 31.5633, "step": 14608 }, { "epoch": 347.8358208955224, "grad_norm": 16.00359344482422, "learning_rate": 9.785052910052911e-06, "loss": 31.9202, "step": 14609 }, { "epoch": 347.85970149253734, "grad_norm": 14.867855072021484, "learning_rate": 9.784391534391536e-06, "loss": 33.3096, "step": 14610 }, { "epoch": 347.88358208955225, "grad_norm": 21.041004180908203, "learning_rate": 9.783730158730159e-06, "loss": 33.7092, "step": 14611 }, { "epoch": 347.90746268656716, "grad_norm": 21.410348892211914, "learning_rate": 9.783068783068784e-06, "loss": 32.6474, "step": 14612 }, { "epoch": 347.93134328358207, "grad_norm": 21.48059844970703, "learning_rate": 9.782407407407408e-06, "loss": 32.0023, "step": 14613 }, { "epoch": 347.95522388059703, "grad_norm": 14.269651412963867, "learning_rate": 9.781746031746032e-06, "loss": 32.7085, "step": 14614 }, { "epoch": 347.97910447761194, "grad_norm": 22.927631378173828, "learning_rate": 9.781084656084658e-06, "loss": 33.4512, "step": 14615 }, { "epoch": 348.0, "grad_norm": 17.4822998046875, "learning_rate": 9.780423280423281e-06, "loss": 27.6399, "step": 14616 }, { "epoch": 348.0238805970149, "grad_norm": 16.791461944580078, "learning_rate": 9.779761904761906e-06, "loss": 33.8503, "step": 14617 }, { "epoch": 348.0477611940299, "grad_norm": 15.67280387878418, "learning_rate": 9.77910052910053e-06, "loss": 31.505, "step": 14618 }, { "epoch": 348.0716417910448, "grad_norm": 20.893356323242188, "learning_rate": 9.778439153439154e-06, "loss": 33.9971, "step": 14619 }, { "epoch": 348.0955223880597, "grad_norm": 21.14359474182129, "learning_rate": 9.777777777777779e-06, "loss": 32.555, "step": 14620 }, { "epoch": 348.1194029850746, "grad_norm": 16.92644691467285, "learning_rate": 9.777116402116403e-06, "loss": 32.2008, "step": 14621 }, { "epoch": 348.14328358208957, "grad_norm": 15.187609672546387, "learning_rate": 9.776455026455027e-06, "loss": 32.3186, "step": 14622 }, { "epoch": 348.1671641791045, "grad_norm": 17.225370407104492, "learning_rate": 9.775793650793651e-06, "loss": 32.3924, "step": 14623 }, { "epoch": 348.1910447761194, "grad_norm": 17.131383895874023, "learning_rate": 9.775132275132276e-06, "loss": 32.3736, "step": 14624 }, { "epoch": 348.21492537313435, "grad_norm": 15.347402572631836, "learning_rate": 9.774470899470901e-06, "loss": 33.362, "step": 14625 }, { "epoch": 348.23880597014926, "grad_norm": 17.12799644470215, "learning_rate": 9.773809523809524e-06, "loss": 32.2947, "step": 14626 }, { "epoch": 348.26268656716417, "grad_norm": 16.040346145629883, "learning_rate": 9.773148148148149e-06, "loss": 31.6114, "step": 14627 }, { "epoch": 348.28656716417913, "grad_norm": 15.4306001663208, "learning_rate": 9.772486772486774e-06, "loss": 33.3488, "step": 14628 }, { "epoch": 348.31044776119404, "grad_norm": 20.037302017211914, "learning_rate": 9.771825396825397e-06, "loss": 32.9626, "step": 14629 }, { "epoch": 348.33432835820895, "grad_norm": 16.938810348510742, "learning_rate": 9.771164021164023e-06, "loss": 32.3464, "step": 14630 }, { "epoch": 348.35820895522386, "grad_norm": 16.684492111206055, "learning_rate": 9.770502645502646e-06, "loss": 31.795, "step": 14631 }, { "epoch": 348.3820895522388, "grad_norm": 20.565393447875977, "learning_rate": 9.769841269841271e-06, "loss": 32.8336, "step": 14632 }, { "epoch": 348.40597014925373, "grad_norm": 19.35353660583496, "learning_rate": 9.769179894179896e-06, "loss": 32.5364, "step": 14633 }, { "epoch": 348.42985074626864, "grad_norm": 16.575313568115234, "learning_rate": 9.768518518518519e-06, "loss": 33.0467, "step": 14634 }, { "epoch": 348.4537313432836, "grad_norm": 18.80159568786621, "learning_rate": 9.767857142857144e-06, "loss": 33.4302, "step": 14635 }, { "epoch": 348.4776119402985, "grad_norm": 19.14153289794922, "learning_rate": 9.767195767195769e-06, "loss": 32.5667, "step": 14636 }, { "epoch": 348.5014925373134, "grad_norm": 18.234790802001953, "learning_rate": 9.766534391534392e-06, "loss": 32.2728, "step": 14637 }, { "epoch": 348.52537313432833, "grad_norm": 17.12832260131836, "learning_rate": 9.765873015873017e-06, "loss": 32.2903, "step": 14638 }, { "epoch": 348.5492537313433, "grad_norm": 19.046112060546875, "learning_rate": 9.765211640211641e-06, "loss": 32.9503, "step": 14639 }, { "epoch": 348.5731343283582, "grad_norm": 14.653257369995117, "learning_rate": 9.764550264550266e-06, "loss": 32.4275, "step": 14640 }, { "epoch": 348.5970149253731, "grad_norm": 21.176921844482422, "learning_rate": 9.76388888888889e-06, "loss": 33.7491, "step": 14641 }, { "epoch": 348.6208955223881, "grad_norm": 17.440414428710938, "learning_rate": 9.763227513227514e-06, "loss": 32.8406, "step": 14642 }, { "epoch": 348.644776119403, "grad_norm": 18.87099838256836, "learning_rate": 9.762566137566139e-06, "loss": 32.2602, "step": 14643 }, { "epoch": 348.6686567164179, "grad_norm": 18.95827293395996, "learning_rate": 9.761904761904762e-06, "loss": 32.3678, "step": 14644 }, { "epoch": 348.6925373134328, "grad_norm": 19.204607009887695, "learning_rate": 9.761243386243387e-06, "loss": 32.2119, "step": 14645 }, { "epoch": 348.7164179104478, "grad_norm": 18.41114044189453, "learning_rate": 9.760582010582012e-06, "loss": 32.9687, "step": 14646 }, { "epoch": 348.7402985074627, "grad_norm": 13.804076194763184, "learning_rate": 9.759920634920635e-06, "loss": 33.004, "step": 14647 }, { "epoch": 348.7641791044776, "grad_norm": 15.167065620422363, "learning_rate": 9.759259259259261e-06, "loss": 30.597, "step": 14648 }, { "epoch": 348.78805970149256, "grad_norm": 14.439536094665527, "learning_rate": 9.758597883597884e-06, "loss": 33.1182, "step": 14649 }, { "epoch": 348.81194029850747, "grad_norm": 16.821346282958984, "learning_rate": 9.757936507936509e-06, "loss": 33.4213, "step": 14650 }, { "epoch": 348.8358208955224, "grad_norm": 16.579566955566406, "learning_rate": 9.757275132275134e-06, "loss": 32.6801, "step": 14651 }, { "epoch": 348.85970149253734, "grad_norm": 19.574453353881836, "learning_rate": 9.756613756613757e-06, "loss": 32.377, "step": 14652 }, { "epoch": 348.88358208955225, "grad_norm": 19.954803466796875, "learning_rate": 9.755952380952382e-06, "loss": 31.6744, "step": 14653 }, { "epoch": 348.90746268656716, "grad_norm": 20.58474349975586, "learning_rate": 9.755291005291007e-06, "loss": 30.9358, "step": 14654 }, { "epoch": 348.93134328358207, "grad_norm": 18.414278030395508, "learning_rate": 9.754629629629631e-06, "loss": 33.2836, "step": 14655 }, { "epoch": 348.95522388059703, "grad_norm": 20.93559455871582, "learning_rate": 9.753968253968254e-06, "loss": 33.688, "step": 14656 }, { "epoch": 348.97910447761194, "grad_norm": 18.284198760986328, "learning_rate": 9.75330687830688e-06, "loss": 33.2417, "step": 14657 }, { "epoch": 349.0, "grad_norm": 18.535316467285156, "learning_rate": 9.752645502645504e-06, "loss": 26.9225, "step": 14658 }, { "epoch": 349.0238805970149, "grad_norm": 17.07691764831543, "learning_rate": 9.751984126984127e-06, "loss": 32.4279, "step": 14659 }, { "epoch": 349.0477611940299, "grad_norm": 21.65434455871582, "learning_rate": 9.751322751322752e-06, "loss": 33.2138, "step": 14660 }, { "epoch": 349.0716417910448, "grad_norm": 19.45406723022461, "learning_rate": 9.750661375661377e-06, "loss": 32.5876, "step": 14661 }, { "epoch": 349.0955223880597, "grad_norm": 17.751808166503906, "learning_rate": 9.75e-06, "loss": 31.7013, "step": 14662 }, { "epoch": 349.1194029850746, "grad_norm": NaN, "learning_rate": 9.749338624338626e-06, "loss": 27.7308, "step": 14663 }, { "epoch": 349.14328358208957, "grad_norm": 17.530637741088867, "learning_rate": 9.749338624338626e-06, "loss": 32.323, "step": 14664 }, { "epoch": 349.1671641791045, "grad_norm": 17.759815216064453, "learning_rate": 9.74867724867725e-06, "loss": 31.6154, "step": 14665 }, { "epoch": 349.1910447761194, "grad_norm": 22.851415634155273, "learning_rate": 9.748015873015874e-06, "loss": 32.3775, "step": 14666 }, { "epoch": 349.21492537313435, "grad_norm": 16.060312271118164, "learning_rate": 9.747354497354499e-06, "loss": 33.3385, "step": 14667 }, { "epoch": 349.23880597014926, "grad_norm": 18.590045928955078, "learning_rate": 9.746693121693122e-06, "loss": 31.1666, "step": 14668 }, { "epoch": 349.26268656716417, "grad_norm": 16.344722747802734, "learning_rate": 9.746031746031747e-06, "loss": 33.0891, "step": 14669 }, { "epoch": 349.28656716417913, "grad_norm": 20.865840911865234, "learning_rate": 9.745370370370372e-06, "loss": 32.9344, "step": 14670 }, { "epoch": 349.31044776119404, "grad_norm": 18.58599853515625, "learning_rate": 9.744708994708997e-06, "loss": 32.9551, "step": 14671 }, { "epoch": 349.33432835820895, "grad_norm": 20.075489044189453, "learning_rate": 9.74404761904762e-06, "loss": 32.1846, "step": 14672 }, { "epoch": 349.35820895522386, "grad_norm": 20.111770629882812, "learning_rate": 9.743386243386244e-06, "loss": 32.8219, "step": 14673 }, { "epoch": 349.3820895522388, "grad_norm": 18.247739791870117, "learning_rate": 9.74272486772487e-06, "loss": 31.3079, "step": 14674 }, { "epoch": 349.40597014925373, "grad_norm": 16.764577865600586, "learning_rate": 9.742063492063492e-06, "loss": 33.5343, "step": 14675 }, { "epoch": 349.42985074626864, "grad_norm": 19.740215301513672, "learning_rate": 9.741402116402117e-06, "loss": 32.6692, "step": 14676 }, { "epoch": 349.4537313432836, "grad_norm": 16.972673416137695, "learning_rate": 9.740740740740742e-06, "loss": 32.0592, "step": 14677 }, { "epoch": 349.4776119402985, "grad_norm": 16.89126205444336, "learning_rate": 9.740079365079365e-06, "loss": 32.9391, "step": 14678 }, { "epoch": 349.5014925373134, "grad_norm": 16.81437873840332, "learning_rate": 9.73941798941799e-06, "loss": 33.5152, "step": 14679 }, { "epoch": 349.52537313432833, "grad_norm": 14.742659568786621, "learning_rate": 9.738756613756615e-06, "loss": 31.4651, "step": 14680 }, { "epoch": 349.5492537313433, "grad_norm": 15.249462127685547, "learning_rate": 9.73809523809524e-06, "loss": 32.7273, "step": 14681 }, { "epoch": 349.5731343283582, "grad_norm": 17.645551681518555, "learning_rate": 9.737433862433863e-06, "loss": 33.063, "step": 14682 }, { "epoch": 349.5970149253731, "grad_norm": 19.15085792541504, "learning_rate": 9.736772486772487e-06, "loss": 33.7299, "step": 14683 }, { "epoch": 349.6208955223881, "grad_norm": 19.513601303100586, "learning_rate": 9.736111111111112e-06, "loss": 32.3101, "step": 14684 }, { "epoch": 349.644776119403, "grad_norm": 19.61646270751953, "learning_rate": 9.735449735449735e-06, "loss": 32.3686, "step": 14685 }, { "epoch": 349.6686567164179, "grad_norm": 14.699602127075195, "learning_rate": 9.73478835978836e-06, "loss": 32.0377, "step": 14686 }, { "epoch": 349.6925373134328, "grad_norm": 18.042131423950195, "learning_rate": 9.734126984126985e-06, "loss": 31.0546, "step": 14687 }, { "epoch": 349.7164179104478, "grad_norm": 17.329553604125977, "learning_rate": 9.733465608465608e-06, "loss": 32.3948, "step": 14688 }, { "epoch": 349.7402985074627, "grad_norm": 20.620149612426758, "learning_rate": 9.732804232804234e-06, "loss": 32.8701, "step": 14689 }, { "epoch": 349.7641791044776, "grad_norm": 15.625349044799805, "learning_rate": 9.732142857142858e-06, "loss": 32.0927, "step": 14690 }, { "epoch": 349.78805970149256, "grad_norm": 20.403350830078125, "learning_rate": 9.731481481481482e-06, "loss": 32.8683, "step": 14691 }, { "epoch": 349.81194029850747, "grad_norm": 16.423643112182617, "learning_rate": 9.730820105820107e-06, "loss": 32.6511, "step": 14692 }, { "epoch": 349.8358208955224, "grad_norm": 17.448591232299805, "learning_rate": 9.73015873015873e-06, "loss": 32.8787, "step": 14693 }, { "epoch": 349.85970149253734, "grad_norm": 18.498205184936523, "learning_rate": 9.729497354497355e-06, "loss": 33.0482, "step": 14694 }, { "epoch": 349.88358208955225, "grad_norm": 24.00061798095703, "learning_rate": 9.72883597883598e-06, "loss": 33.0809, "step": 14695 }, { "epoch": 349.90746268656716, "grad_norm": 18.76821517944336, "learning_rate": 9.728174603174605e-06, "loss": 32.7319, "step": 14696 }, { "epoch": 349.93134328358207, "grad_norm": 20.155773162841797, "learning_rate": 9.727513227513228e-06, "loss": 33.1262, "step": 14697 }, { "epoch": 349.95522388059703, "grad_norm": 23.968116760253906, "learning_rate": 9.726851851851852e-06, "loss": 32.8412, "step": 14698 }, { "epoch": 349.97910447761194, "grad_norm": 19.672527313232422, "learning_rate": 9.726190476190477e-06, "loss": 32.5545, "step": 14699 }, { "epoch": 350.0, "grad_norm": 13.98314094543457, "learning_rate": 9.7255291005291e-06, "loss": 28.4527, "step": 14700 }, { "epoch": 350.0238805970149, "grad_norm": 16.52663803100586, "learning_rate": 9.724867724867725e-06, "loss": 32.3273, "step": 14701 }, { "epoch": 350.0477611940299, "grad_norm": 16.786659240722656, "learning_rate": 9.72420634920635e-06, "loss": 32.8609, "step": 14702 }, { "epoch": 350.0716417910448, "grad_norm": 17.514373779296875, "learning_rate": 9.723544973544973e-06, "loss": 32.098, "step": 14703 }, { "epoch": 350.0955223880597, "grad_norm": 15.79894733428955, "learning_rate": 9.7228835978836e-06, "loss": 32.8837, "step": 14704 }, { "epoch": 350.1194029850746, "grad_norm": 21.42318344116211, "learning_rate": 9.722222222222223e-06, "loss": 32.7655, "step": 14705 }, { "epoch": 350.14328358208957, "grad_norm": 18.59439468383789, "learning_rate": 9.721560846560847e-06, "loss": 31.6826, "step": 14706 }, { "epoch": 350.1671641791045, "grad_norm": 14.719908714294434, "learning_rate": 9.720899470899472e-06, "loss": 31.9127, "step": 14707 }, { "epoch": 350.1910447761194, "grad_norm": 17.84770393371582, "learning_rate": 9.720238095238095e-06, "loss": 32.6657, "step": 14708 }, { "epoch": 350.21492537313435, "grad_norm": 13.739038467407227, "learning_rate": 9.71957671957672e-06, "loss": 31.439, "step": 14709 }, { "epoch": 350.23880597014926, "grad_norm": 17.086442947387695, "learning_rate": 9.718915343915345e-06, "loss": 32.8535, "step": 14710 }, { "epoch": 350.26268656716417, "grad_norm": 16.6844482421875, "learning_rate": 9.71825396825397e-06, "loss": 33.1887, "step": 14711 }, { "epoch": 350.28656716417913, "grad_norm": 18.202463150024414, "learning_rate": 9.717592592592593e-06, "loss": 32.3802, "step": 14712 }, { "epoch": 350.31044776119404, "grad_norm": 17.434301376342773, "learning_rate": 9.716931216931218e-06, "loss": 31.6296, "step": 14713 }, { "epoch": 350.33432835820895, "grad_norm": 18.2510929107666, "learning_rate": 9.716269841269842e-06, "loss": 32.1291, "step": 14714 }, { "epoch": 350.35820895522386, "grad_norm": 16.6622371673584, "learning_rate": 9.715608465608466e-06, "loss": 32.2846, "step": 14715 }, { "epoch": 350.3820895522388, "grad_norm": NaN, "learning_rate": 9.71494708994709e-06, "loss": 46.9879, "step": 14716 }, { "epoch": 350.40597014925373, "grad_norm": 18.69403648376465, "learning_rate": 9.71494708994709e-06, "loss": 31.8159, "step": 14717 }, { "epoch": 350.42985074626864, "grad_norm": 15.543919563293457, "learning_rate": 9.714285714285715e-06, "loss": 33.0337, "step": 14718 }, { "epoch": 350.4537313432836, "grad_norm": 20.756275177001953, "learning_rate": 9.713624338624338e-06, "loss": 33.46, "step": 14719 }, { "epoch": 350.4776119402985, "grad_norm": 14.887667655944824, "learning_rate": 9.712962962962965e-06, "loss": 31.1977, "step": 14720 }, { "epoch": 350.5014925373134, "grad_norm": 24.108150482177734, "learning_rate": 9.712301587301588e-06, "loss": 33.2452, "step": 14721 }, { "epoch": 350.52537313432833, "grad_norm": 18.020376205444336, "learning_rate": 9.711640211640213e-06, "loss": 33.3585, "step": 14722 }, { "epoch": 350.5492537313433, "grad_norm": 25.169206619262695, "learning_rate": 9.710978835978837e-06, "loss": 33.5354, "step": 14723 }, { "epoch": 350.5731343283582, "grad_norm": 23.525800704956055, "learning_rate": 9.71031746031746e-06, "loss": 32.5876, "step": 14724 }, { "epoch": 350.5970149253731, "grad_norm": 19.09469223022461, "learning_rate": 9.709656084656085e-06, "loss": 31.3368, "step": 14725 }, { "epoch": 350.6208955223881, "grad_norm": 19.304349899291992, "learning_rate": 9.70899470899471e-06, "loss": 32.4693, "step": 14726 }, { "epoch": 350.644776119403, "grad_norm": 24.641857147216797, "learning_rate": 9.708333333333333e-06, "loss": 31.8913, "step": 14727 }, { "epoch": 350.6686567164179, "grad_norm": 19.130863189697266, "learning_rate": 9.707671957671958e-06, "loss": 33.6099, "step": 14728 }, { "epoch": 350.6925373134328, "grad_norm": 22.144309997558594, "learning_rate": 9.707010582010583e-06, "loss": 32.9643, "step": 14729 }, { "epoch": 350.7164179104478, "grad_norm": 22.078603744506836, "learning_rate": 9.706349206349208e-06, "loss": 33.8755, "step": 14730 }, { "epoch": 350.7402985074627, "grad_norm": 19.594648361206055, "learning_rate": 9.70568783068783e-06, "loss": 32.0336, "step": 14731 }, { "epoch": 350.7641791044776, "grad_norm": 17.195146560668945, "learning_rate": 9.705026455026456e-06, "loss": 33.3585, "step": 14732 }, { "epoch": 350.78805970149256, "grad_norm": 24.541912078857422, "learning_rate": 9.70436507936508e-06, "loss": 32.4815, "step": 14733 }, { "epoch": 350.81194029850747, "grad_norm": 21.507633209228516, "learning_rate": 9.703703703703703e-06, "loss": 33.0714, "step": 14734 }, { "epoch": 350.8358208955224, "grad_norm": 15.345576286315918, "learning_rate": 9.70304232804233e-06, "loss": 32.1094, "step": 14735 }, { "epoch": 350.85970149253734, "grad_norm": 17.41144371032715, "learning_rate": 9.702380952380953e-06, "loss": 33.1787, "step": 14736 }, { "epoch": 350.88358208955225, "grad_norm": 14.923789024353027, "learning_rate": 9.701719576719578e-06, "loss": 32.0986, "step": 14737 }, { "epoch": 350.90746268656716, "grad_norm": 17.747167587280273, "learning_rate": 9.701058201058203e-06, "loss": 32.5708, "step": 14738 }, { "epoch": 350.93134328358207, "grad_norm": 19.74225616455078, "learning_rate": 9.700396825396826e-06, "loss": 31.5536, "step": 14739 }, { "epoch": 350.95522388059703, "grad_norm": 21.36675262451172, "learning_rate": 9.69973544973545e-06, "loss": 33.7125, "step": 14740 }, { "epoch": 350.97910447761194, "grad_norm": 21.644737243652344, "learning_rate": 9.699074074074075e-06, "loss": 32.2303, "step": 14741 }, { "epoch": 351.0, "grad_norm": 17.01936912536621, "learning_rate": 9.698412698412698e-06, "loss": 28.8026, "step": 14742 }, { "epoch": 351.0238805970149, "grad_norm": 17.195436477661133, "learning_rate": 9.697751322751323e-06, "loss": 32.2544, "step": 14743 }, { "epoch": 351.0477611940299, "grad_norm": 20.987239837646484, "learning_rate": 9.697089947089948e-06, "loss": 32.4869, "step": 14744 }, { "epoch": 351.0716417910448, "grad_norm": 16.024938583374023, "learning_rate": 9.696428571428573e-06, "loss": 31.1712, "step": 14745 }, { "epoch": 351.0955223880597, "grad_norm": 18.800140380859375, "learning_rate": 9.695767195767196e-06, "loss": 31.1837, "step": 14746 }, { "epoch": 351.1194029850746, "grad_norm": 18.07328224182129, "learning_rate": 9.69510582010582e-06, "loss": 33.2909, "step": 14747 }, { "epoch": 351.14328358208957, "grad_norm": 18.07699966430664, "learning_rate": 9.694444444444446e-06, "loss": 32.3287, "step": 14748 }, { "epoch": 351.1671641791045, "grad_norm": 21.439491271972656, "learning_rate": 9.693783068783069e-06, "loss": 32.8626, "step": 14749 }, { "epoch": 351.1910447761194, "grad_norm": 20.884723663330078, "learning_rate": 9.693121693121693e-06, "loss": 31.8939, "step": 14750 }, { "epoch": 351.21492537313435, "grad_norm": 17.19676971435547, "learning_rate": 9.692460317460318e-06, "loss": 32.0953, "step": 14751 }, { "epoch": 351.23880597014926, "grad_norm": 18.076078414916992, "learning_rate": 9.691798941798943e-06, "loss": 32.7481, "step": 14752 }, { "epoch": 351.26268656716417, "grad_norm": 17.14596939086914, "learning_rate": 9.691137566137568e-06, "loss": 32.3306, "step": 14753 }, { "epoch": 351.28656716417913, "grad_norm": 15.715779304504395, "learning_rate": 9.690476190476191e-06, "loss": 32.7884, "step": 14754 }, { "epoch": 351.31044776119404, "grad_norm": 17.186420440673828, "learning_rate": 9.689814814814816e-06, "loss": 32.7943, "step": 14755 }, { "epoch": 351.33432835820895, "grad_norm": 19.32579803466797, "learning_rate": 9.68915343915344e-06, "loss": 33.0978, "step": 14756 }, { "epoch": 351.35820895522386, "grad_norm": 14.649863243103027, "learning_rate": 9.688492063492064e-06, "loss": 30.6359, "step": 14757 }, { "epoch": 351.3820895522388, "grad_norm": 20.003084182739258, "learning_rate": 9.687830687830688e-06, "loss": 33.1828, "step": 14758 }, { "epoch": 351.40597014925373, "grad_norm": 22.395776748657227, "learning_rate": 9.687169312169313e-06, "loss": 32.9852, "step": 14759 }, { "epoch": 351.42985074626864, "grad_norm": 18.03234100341797, "learning_rate": 9.686507936507938e-06, "loss": 31.451, "step": 14760 }, { "epoch": 351.4537313432836, "grad_norm": 18.269107818603516, "learning_rate": 9.685846560846561e-06, "loss": 32.2838, "step": 14761 }, { "epoch": 351.4776119402985, "grad_norm": 18.537240982055664, "learning_rate": 9.685185185185186e-06, "loss": 32.2356, "step": 14762 }, { "epoch": 351.5014925373134, "grad_norm": 28.76149559020996, "learning_rate": 9.68452380952381e-06, "loss": 33.0583, "step": 14763 }, { "epoch": 351.52537313432833, "grad_norm": NaN, "learning_rate": 9.683862433862434e-06, "loss": 41.3145, "step": 14764 }, { "epoch": 351.5492537313433, "grad_norm": 17.292757034301758, "learning_rate": 9.683862433862434e-06, "loss": 32.472, "step": 14765 }, { "epoch": 351.5731343283582, "grad_norm": 23.66617774963379, "learning_rate": 9.683201058201059e-06, "loss": 32.015, "step": 14766 }, { "epoch": 351.5970149253731, "grad_norm": 26.030893325805664, "learning_rate": 9.682539682539683e-06, "loss": 31.8104, "step": 14767 }, { "epoch": 351.6208955223881, "grad_norm": 17.178730010986328, "learning_rate": 9.681878306878307e-06, "loss": 33.3612, "step": 14768 }, { "epoch": 351.644776119403, "grad_norm": 29.958009719848633, "learning_rate": 9.681216931216933e-06, "loss": 32.4896, "step": 14769 }, { "epoch": 351.6686567164179, "grad_norm": 19.59082794189453, "learning_rate": 9.680555555555556e-06, "loss": 33.4508, "step": 14770 }, { "epoch": 351.6925373134328, "grad_norm": 27.186344146728516, "learning_rate": 9.679894179894181e-06, "loss": 32.5828, "step": 14771 }, { "epoch": 351.7164179104478, "grad_norm": 24.611085891723633, "learning_rate": 9.679232804232806e-06, "loss": 33.0101, "step": 14772 }, { "epoch": 351.7402985074627, "grad_norm": 16.794775009155273, "learning_rate": 9.678571428571429e-06, "loss": 32.1631, "step": 14773 }, { "epoch": 351.7641791044776, "grad_norm": 27.443695068359375, "learning_rate": 9.677910052910054e-06, "loss": 33.0032, "step": 14774 }, { "epoch": 351.78805970149256, "grad_norm": 19.315256118774414, "learning_rate": 9.677248677248678e-06, "loss": 32.4942, "step": 14775 }, { "epoch": 351.81194029850747, "grad_norm": 19.994688034057617, "learning_rate": 9.676587301587303e-06, "loss": 31.8404, "step": 14776 }, { "epoch": 351.8358208955224, "grad_norm": 18.42207908630371, "learning_rate": 9.675925925925926e-06, "loss": 33.3821, "step": 14777 }, { "epoch": 351.85970149253734, "grad_norm": 15.855660438537598, "learning_rate": 9.675264550264551e-06, "loss": 32.95, "step": 14778 }, { "epoch": 351.88358208955225, "grad_norm": 18.28765869140625, "learning_rate": 9.674603174603176e-06, "loss": 32.3438, "step": 14779 }, { "epoch": 351.90746268656716, "grad_norm": 20.875213623046875, "learning_rate": 9.673941798941799e-06, "loss": 32.5501, "step": 14780 }, { "epoch": 351.93134328358207, "grad_norm": 19.235822677612305, "learning_rate": 9.673280423280424e-06, "loss": 32.7862, "step": 14781 }, { "epoch": 351.95522388059703, "grad_norm": 13.67168140411377, "learning_rate": 9.672619047619049e-06, "loss": 32.4176, "step": 14782 }, { "epoch": 351.97910447761194, "grad_norm": 15.900962829589844, "learning_rate": 9.671957671957672e-06, "loss": 32.442, "step": 14783 }, { "epoch": 352.0, "grad_norm": 21.373632431030273, "learning_rate": 9.671296296296298e-06, "loss": 29.1896, "step": 14784 }, { "epoch": 352.0238805970149, "grad_norm": 16.60938262939453, "learning_rate": 9.670634920634921e-06, "loss": 32.0761, "step": 14785 }, { "epoch": 352.0477611940299, "grad_norm": 19.753610610961914, "learning_rate": 9.669973544973546e-06, "loss": 32.3375, "step": 14786 }, { "epoch": 352.0716417910448, "grad_norm": 25.297861099243164, "learning_rate": 9.669312169312171e-06, "loss": 31.8711, "step": 14787 }, { "epoch": 352.0955223880597, "grad_norm": 17.906513214111328, "learning_rate": 9.668650793650794e-06, "loss": 32.5138, "step": 14788 }, { "epoch": 352.1194029850746, "grad_norm": 23.18354034423828, "learning_rate": 9.667989417989419e-06, "loss": 33.2922, "step": 14789 }, { "epoch": 352.14328358208957, "grad_norm": 22.812021255493164, "learning_rate": 9.667328042328044e-06, "loss": 32.5472, "step": 14790 }, { "epoch": 352.1671641791045, "grad_norm": 21.94281005859375, "learning_rate": 9.666666666666667e-06, "loss": 33.1584, "step": 14791 }, { "epoch": 352.1910447761194, "grad_norm": 16.490760803222656, "learning_rate": 9.666005291005292e-06, "loss": 32.7039, "step": 14792 }, { "epoch": 352.21492537313435, "grad_norm": 25.750459671020508, "learning_rate": 9.665343915343916e-06, "loss": 32.4468, "step": 14793 }, { "epoch": 352.23880597014926, "grad_norm": 17.788372039794922, "learning_rate": 9.664682539682541e-06, "loss": 31.177, "step": 14794 }, { "epoch": 352.26268656716417, "grad_norm": 17.548831939697266, "learning_rate": 9.664021164021164e-06, "loss": 31.8691, "step": 14795 }, { "epoch": 352.28656716417913, "grad_norm": 28.672712326049805, "learning_rate": 9.663359788359789e-06, "loss": 33.6552, "step": 14796 }, { "epoch": 352.31044776119404, "grad_norm": 17.572633743286133, "learning_rate": 9.662698412698414e-06, "loss": 32.707, "step": 14797 }, { "epoch": 352.33432835820895, "grad_norm": 22.447053909301758, "learning_rate": 9.662037037037037e-06, "loss": 31.766, "step": 14798 }, { "epoch": 352.35820895522386, "grad_norm": 25.783077239990234, "learning_rate": 9.661375661375663e-06, "loss": 31.7204, "step": 14799 }, { "epoch": 352.3820895522388, "grad_norm": 16.482032775878906, "learning_rate": 9.660714285714287e-06, "loss": 31.9653, "step": 14800 }, { "epoch": 352.40597014925373, "grad_norm": 28.299856185913086, "learning_rate": 9.660052910052911e-06, "loss": 32.7796, "step": 14801 }, { "epoch": 352.42985074626864, "grad_norm": 23.327533721923828, "learning_rate": 9.659391534391536e-06, "loss": 32.8088, "step": 14802 }, { "epoch": 352.4537313432836, "grad_norm": 18.625377655029297, "learning_rate": 9.65873015873016e-06, "loss": 32.1429, "step": 14803 }, { "epoch": 352.4776119402985, "grad_norm": NaN, "learning_rate": 9.658068783068784e-06, "loss": 44.9996, "step": 14804 }, { "epoch": 352.5014925373134, "grad_norm": 28.77292251586914, "learning_rate": 9.658068783068784e-06, "loss": 33.0945, "step": 14805 }, { "epoch": 352.52537313432833, "grad_norm": 19.699316024780273, "learning_rate": 9.657407407407409e-06, "loss": 32.5718, "step": 14806 }, { "epoch": 352.5492537313433, "grad_norm": 21.003192901611328, "learning_rate": 9.656746031746032e-06, "loss": 32.6405, "step": 14807 }, { "epoch": 352.5731343283582, "grad_norm": 24.010530471801758, "learning_rate": 9.656084656084657e-06, "loss": 32.8653, "step": 14808 }, { "epoch": 352.5970149253731, "grad_norm": 20.95771598815918, "learning_rate": 9.655423280423282e-06, "loss": 31.7435, "step": 14809 }, { "epoch": 352.6208955223881, "grad_norm": 16.861574172973633, "learning_rate": 9.654761904761906e-06, "loss": 31.9477, "step": 14810 }, { "epoch": 352.644776119403, "grad_norm": 29.624996185302734, "learning_rate": 9.65410052910053e-06, "loss": 32.9392, "step": 14811 }, { "epoch": 352.6686567164179, "grad_norm": 17.968767166137695, "learning_rate": 9.653439153439154e-06, "loss": 31.531, "step": 14812 }, { "epoch": 352.6925373134328, "grad_norm": 22.400312423706055, "learning_rate": 9.652777777777779e-06, "loss": 32.9245, "step": 14813 }, { "epoch": 352.7164179104478, "grad_norm": 27.22949981689453, "learning_rate": 9.652116402116402e-06, "loss": 33.7673, "step": 14814 }, { "epoch": 352.7402985074627, "grad_norm": 17.837738037109375, "learning_rate": 9.651455026455027e-06, "loss": 32.177, "step": 14815 }, { "epoch": 352.7641791044776, "grad_norm": 27.297584533691406, "learning_rate": 9.650793650793652e-06, "loss": 31.3903, "step": 14816 }, { "epoch": 352.78805970149256, "grad_norm": 19.96441078186035, "learning_rate": 9.650132275132276e-06, "loss": 33.6202, "step": 14817 }, { "epoch": 352.81194029850747, "grad_norm": 21.580228805541992, "learning_rate": 9.649470899470901e-06, "loss": 31.7302, "step": 14818 }, { "epoch": 352.8358208955224, "grad_norm": 24.934663772583008, "learning_rate": 9.648809523809524e-06, "loss": 32.5863, "step": 14819 }, { "epoch": 352.85970149253734, "grad_norm": 17.15755271911621, "learning_rate": 9.64814814814815e-06, "loss": 32.2025, "step": 14820 }, { "epoch": 352.88358208955225, "grad_norm": 30.80393409729004, "learning_rate": 9.647486772486774e-06, "loss": 31.6753, "step": 14821 }, { "epoch": 352.90746268656716, "grad_norm": 20.597753524780273, "learning_rate": 9.646825396825397e-06, "loss": 32.8839, "step": 14822 }, { "epoch": 352.93134328358207, "grad_norm": 27.592512130737305, "learning_rate": 9.646164021164022e-06, "loss": 33.2483, "step": 14823 }, { "epoch": 352.95522388059703, "grad_norm": 22.31157875061035, "learning_rate": 9.645502645502647e-06, "loss": 33.4955, "step": 14824 }, { "epoch": 352.97910447761194, "grad_norm": 24.850711822509766, "learning_rate": 9.644841269841271e-06, "loss": 32.5427, "step": 14825 }, { "epoch": 353.0, "grad_norm": 25.0136661529541, "learning_rate": 9.644179894179895e-06, "loss": 27.6829, "step": 14826 }, { "epoch": 353.0238805970149, "grad_norm": 18.588224411010742, "learning_rate": 9.64351851851852e-06, "loss": 32.0289, "step": 14827 }, { "epoch": 353.0477611940299, "grad_norm": 27.74751091003418, "learning_rate": 9.642857142857144e-06, "loss": 31.8052, "step": 14828 }, { "epoch": 353.0716417910448, "grad_norm": 19.6511173248291, "learning_rate": 9.642195767195767e-06, "loss": 31.3612, "step": 14829 }, { "epoch": 353.0955223880597, "grad_norm": 25.314407348632812, "learning_rate": 9.641534391534392e-06, "loss": 33.1017, "step": 14830 }, { "epoch": 353.1194029850746, "grad_norm": 25.031667709350586, "learning_rate": 9.640873015873017e-06, "loss": 32.6708, "step": 14831 }, { "epoch": 353.14328358208957, "grad_norm": 17.91745948791504, "learning_rate": 9.64021164021164e-06, "loss": 33.5659, "step": 14832 }, { "epoch": 353.1671641791045, "grad_norm": 33.04819869995117, "learning_rate": 9.639550264550266e-06, "loss": 33.0337, "step": 14833 }, { "epoch": 353.1910447761194, "grad_norm": 21.4101505279541, "learning_rate": 9.63888888888889e-06, "loss": 32.53, "step": 14834 }, { "epoch": 353.21492537313435, "grad_norm": 38.68268585205078, "learning_rate": 9.638227513227514e-06, "loss": 32.503, "step": 14835 }, { "epoch": 353.23880597014926, "grad_norm": 24.02882194519043, "learning_rate": 9.63756613756614e-06, "loss": 32.7662, "step": 14836 }, { "epoch": 353.26268656716417, "grad_norm": 41.039188385009766, "learning_rate": 9.636904761904762e-06, "loss": 33.1644, "step": 14837 }, { "epoch": 353.28656716417913, "grad_norm": 29.76303482055664, "learning_rate": 9.636243386243387e-06, "loss": 32.5574, "step": 14838 }, { "epoch": 353.31044776119404, "grad_norm": 43.067386627197266, "learning_rate": 9.635582010582012e-06, "loss": 31.4812, "step": 14839 }, { "epoch": 353.33432835820895, "grad_norm": 40.734962463378906, "learning_rate": 9.634920634920637e-06, "loss": 31.8917, "step": 14840 }, { "epoch": 353.35820895522386, "grad_norm": 25.5545597076416, "learning_rate": 9.63425925925926e-06, "loss": 32.9401, "step": 14841 }, { "epoch": 353.3820895522388, "grad_norm": 30.88152503967285, "learning_rate": 9.633597883597885e-06, "loss": 31.2318, "step": 14842 }, { "epoch": 353.40597014925373, "grad_norm": 28.586931228637695, "learning_rate": 9.63293650793651e-06, "loss": 32.8859, "step": 14843 }, { "epoch": 353.42985074626864, "grad_norm": 23.067806243896484, "learning_rate": 9.632275132275132e-06, "loss": 32.103, "step": 14844 }, { "epoch": 353.4537313432836, "grad_norm": 39.63471221923828, "learning_rate": 9.631613756613757e-06, "loss": 31.1948, "step": 14845 }, { "epoch": 353.4776119402985, "grad_norm": 29.925830841064453, "learning_rate": 9.630952380952382e-06, "loss": 32.4599, "step": 14846 }, { "epoch": 353.5014925373134, "grad_norm": 36.394508361816406, "learning_rate": 9.630291005291005e-06, "loss": 33.2981, "step": 14847 }, { "epoch": 353.52537313432833, "grad_norm": 34.93574523925781, "learning_rate": 9.62962962962963e-06, "loss": 32.0258, "step": 14848 }, { "epoch": 353.5492537313433, "grad_norm": 29.750062942504883, "learning_rate": 9.628968253968255e-06, "loss": 31.8738, "step": 14849 }, { "epoch": 353.5731343283582, "grad_norm": 26.949481964111328, "learning_rate": 9.62830687830688e-06, "loss": 32.2195, "step": 14850 }, { "epoch": 353.5970149253731, "grad_norm": 32.741798400878906, "learning_rate": 9.627645502645503e-06, "loss": 32.9702, "step": 14851 }, { "epoch": 353.6208955223881, "grad_norm": 29.208690643310547, "learning_rate": 9.626984126984127e-06, "loss": 32.2488, "step": 14852 }, { "epoch": 353.644776119403, "grad_norm": 40.328269958496094, "learning_rate": 9.626322751322752e-06, "loss": 32.2062, "step": 14853 }, { "epoch": 353.6686567164179, "grad_norm": 35.718238830566406, "learning_rate": 9.625661375661375e-06, "loss": 32.1761, "step": 14854 }, { "epoch": 353.6925373134328, "grad_norm": 32.72736740112305, "learning_rate": 9.625e-06, "loss": 32.0387, "step": 14855 }, { "epoch": 353.7164179104478, "grad_norm": 28.951156616210938, "learning_rate": 9.624338624338625e-06, "loss": 32.8885, "step": 14856 }, { "epoch": 353.7402985074627, "grad_norm": 30.452449798583984, "learning_rate": 9.62367724867725e-06, "loss": 31.3375, "step": 14857 }, { "epoch": 353.7641791044776, "grad_norm": 29.340457916259766, "learning_rate": 9.623015873015875e-06, "loss": 33.4326, "step": 14858 }, { "epoch": 353.78805970149256, "grad_norm": 36.30168151855469, "learning_rate": 9.622354497354498e-06, "loss": 32.6153, "step": 14859 }, { "epoch": 353.81194029850747, "grad_norm": 29.859689712524414, "learning_rate": 9.621693121693122e-06, "loss": 32.4116, "step": 14860 }, { "epoch": 353.8358208955224, "grad_norm": 30.943918228149414, "learning_rate": 9.621031746031747e-06, "loss": 31.7455, "step": 14861 }, { "epoch": 353.85970149253734, "grad_norm": 29.39823341369629, "learning_rate": 9.62037037037037e-06, "loss": 33.0786, "step": 14862 }, { "epoch": 353.88358208955225, "grad_norm": 34.88687515258789, "learning_rate": 9.619708994708995e-06, "loss": 32.0198, "step": 14863 }, { "epoch": 353.90746268656716, "grad_norm": 26.524593353271484, "learning_rate": 9.61904761904762e-06, "loss": 32.578, "step": 14864 }, { "epoch": 353.93134328358207, "grad_norm": 36.95452117919922, "learning_rate": 9.618386243386245e-06, "loss": 32.7933, "step": 14865 }, { "epoch": 353.95522388059703, "grad_norm": 31.791065216064453, "learning_rate": 9.617724867724868e-06, "loss": 32.5098, "step": 14866 }, { "epoch": 353.97910447761194, "grad_norm": 31.311098098754883, "learning_rate": 9.617063492063493e-06, "loss": 33.022, "step": 14867 }, { "epoch": 354.0, "grad_norm": 23.458200454711914, "learning_rate": 9.616402116402117e-06, "loss": 28.9878, "step": 14868 }, { "epoch": 354.0238805970149, "grad_norm": 32.75605010986328, "learning_rate": 9.61574074074074e-06, "loss": 31.8392, "step": 14869 }, { "epoch": 354.0477611940299, "grad_norm": 27.395606994628906, "learning_rate": 9.615079365079365e-06, "loss": 32.863, "step": 14870 }, { "epoch": 354.0716417910448, "grad_norm": 35.647464752197266, "learning_rate": 9.61441798941799e-06, "loss": 32.3482, "step": 14871 }, { "epoch": 354.0955223880597, "grad_norm": 32.841495513916016, "learning_rate": 9.613756613756613e-06, "loss": 32.7021, "step": 14872 }, { "epoch": 354.1194029850746, "grad_norm": 30.325456619262695, "learning_rate": 9.61309523809524e-06, "loss": 31.0254, "step": 14873 }, { "epoch": 354.14328358208957, "grad_norm": 30.15027618408203, "learning_rate": 9.612433862433863e-06, "loss": 32.3485, "step": 14874 }, { "epoch": 354.1671641791045, "grad_norm": 30.046512603759766, "learning_rate": 9.611772486772488e-06, "loss": 31.0794, "step": 14875 }, { "epoch": 354.1910447761194, "grad_norm": 28.54079246520996, "learning_rate": 9.611111111111112e-06, "loss": 32.5211, "step": 14876 }, { "epoch": 354.21492537313435, "grad_norm": 33.021080017089844, "learning_rate": 9.610449735449736e-06, "loss": 32.0495, "step": 14877 }, { "epoch": 354.23880597014926, "grad_norm": 28.060853958129883, "learning_rate": 9.60978835978836e-06, "loss": 32.697, "step": 14878 }, { "epoch": 354.26268656716417, "grad_norm": 35.05024719238281, "learning_rate": 9.609126984126985e-06, "loss": 31.7419, "step": 14879 }, { "epoch": 354.28656716417913, "grad_norm": 27.963586807250977, "learning_rate": 9.60846560846561e-06, "loss": 33.1233, "step": 14880 }, { "epoch": 354.31044776119404, "grad_norm": 32.25556945800781, "learning_rate": 9.607804232804233e-06, "loss": 32.6409, "step": 14881 }, { "epoch": 354.33432835820895, "grad_norm": 27.298051834106445, "learning_rate": 9.607142857142858e-06, "loss": 32.189, "step": 14882 }, { "epoch": 354.35820895522386, "grad_norm": 33.5682487487793, "learning_rate": 9.606481481481483e-06, "loss": 31.9503, "step": 14883 }, { "epoch": 354.3820895522388, "grad_norm": 27.984455108642578, "learning_rate": 9.605820105820106e-06, "loss": 32.4672, "step": 14884 }, { "epoch": 354.40597014925373, "grad_norm": 32.073486328125, "learning_rate": 9.60515873015873e-06, "loss": 32.324, "step": 14885 }, { "epoch": 354.42985074626864, "grad_norm": 29.038410186767578, "learning_rate": 9.604497354497355e-06, "loss": 31.9666, "step": 14886 }, { "epoch": 354.4537313432836, "grad_norm": 31.138967514038086, "learning_rate": 9.603835978835978e-06, "loss": 31.3937, "step": 14887 }, { "epoch": 354.4776119402985, "grad_norm": 28.261775970458984, "learning_rate": 9.603174603174605e-06, "loss": 31.8848, "step": 14888 }, { "epoch": 354.5014925373134, "grad_norm": 32.29547119140625, "learning_rate": 9.602513227513228e-06, "loss": 32.8057, "step": 14889 }, { "epoch": 354.52537313432833, "grad_norm": 28.475051879882812, "learning_rate": 9.601851851851853e-06, "loss": 33.2205, "step": 14890 }, { "epoch": 354.5492537313433, "grad_norm": 32.84428024291992, "learning_rate": 9.601190476190478e-06, "loss": 32.2397, "step": 14891 }, { "epoch": 354.5731343283582, "grad_norm": 28.925617218017578, "learning_rate": 9.6005291005291e-06, "loss": 32.1004, "step": 14892 }, { "epoch": 354.5970149253731, "grad_norm": 32.93207931518555, "learning_rate": 9.599867724867726e-06, "loss": 33.4937, "step": 14893 }, { "epoch": 354.6208955223881, "grad_norm": 25.15668487548828, "learning_rate": 9.59920634920635e-06, "loss": 33.2984, "step": 14894 }, { "epoch": 354.644776119403, "grad_norm": 31.57761001586914, "learning_rate": 9.598544973544973e-06, "loss": 31.2805, "step": 14895 }, { "epoch": 354.6686567164179, "grad_norm": 26.353530883789062, "learning_rate": 9.597883597883598e-06, "loss": 31.7676, "step": 14896 }, { "epoch": 354.6925373134328, "grad_norm": 29.290597915649414, "learning_rate": 9.597222222222223e-06, "loss": 31.8581, "step": 14897 }, { "epoch": 354.7164179104478, "grad_norm": 29.742828369140625, "learning_rate": 9.596560846560848e-06, "loss": 33.3169, "step": 14898 }, { "epoch": 354.7402985074627, "grad_norm": 28.00301170349121, "learning_rate": 9.595899470899471e-06, "loss": 34.1137, "step": 14899 }, { "epoch": 354.7641791044776, "grad_norm": 24.516002655029297, "learning_rate": 9.595238095238096e-06, "loss": 32.3348, "step": 14900 }, { "epoch": 354.78805970149256, "grad_norm": 29.55282974243164, "learning_rate": 9.59457671957672e-06, "loss": 32.2715, "step": 14901 }, { "epoch": 354.81194029850747, "grad_norm": 23.89673614501953, "learning_rate": 9.593915343915344e-06, "loss": 32.8552, "step": 14902 }, { "epoch": 354.8358208955224, "grad_norm": 33.77177429199219, "learning_rate": 9.59325396825397e-06, "loss": 32.3179, "step": 14903 }, { "epoch": 354.85970149253734, "grad_norm": 29.023235321044922, "learning_rate": 9.592592592592593e-06, "loss": 32.3885, "step": 14904 }, { "epoch": 354.88358208955225, "grad_norm": 29.500022888183594, "learning_rate": 9.591931216931218e-06, "loss": 32.2777, "step": 14905 }, { "epoch": 354.90746268656716, "grad_norm": 28.313243865966797, "learning_rate": 9.591269841269843e-06, "loss": 33.1268, "step": 14906 }, { "epoch": 354.93134328358207, "grad_norm": 28.699420928955078, "learning_rate": 9.590608465608466e-06, "loss": 32.0499, "step": 14907 }, { "epoch": 354.95522388059703, "grad_norm": 24.747024536132812, "learning_rate": 9.58994708994709e-06, "loss": 33.132, "step": 14908 }, { "epoch": 354.97910447761194, "grad_norm": 24.802593231201172, "learning_rate": 9.589285714285716e-06, "loss": 31.8087, "step": 14909 }, { "epoch": 355.0, "grad_norm": 19.17487335205078, "learning_rate": 9.588624338624339e-06, "loss": 28.8846, "step": 14910 }, { "epoch": 355.0238805970149, "grad_norm": 28.91864776611328, "learning_rate": 9.587962962962963e-06, "loss": 32.7224, "step": 14911 }, { "epoch": 355.0477611940299, "grad_norm": 21.518117904663086, "learning_rate": 9.587301587301588e-06, "loss": 33.0438, "step": 14912 }, { "epoch": 355.0716417910448, "grad_norm": 27.8236083984375, "learning_rate": 9.586640211640213e-06, "loss": 32.5284, "step": 14913 }, { "epoch": 355.0955223880597, "grad_norm": 24.90439224243164, "learning_rate": 9.585978835978836e-06, "loss": 32.3166, "step": 14914 }, { "epoch": 355.1194029850746, "grad_norm": 24.12470054626465, "learning_rate": 9.585317460317461e-06, "loss": 32.409, "step": 14915 }, { "epoch": 355.14328358208957, "grad_norm": 24.157527923583984, "learning_rate": 9.584656084656086e-06, "loss": 30.7544, "step": 14916 }, { "epoch": 355.1671641791045, "grad_norm": 22.023895263671875, "learning_rate": 9.583994708994709e-06, "loss": 31.9371, "step": 14917 }, { "epoch": 355.1910447761194, "grad_norm": 20.419158935546875, "learning_rate": 9.583333333333335e-06, "loss": 31.5149, "step": 14918 }, { "epoch": 355.21492537313435, "grad_norm": 19.5517520904541, "learning_rate": 9.582671957671958e-06, "loss": 31.7053, "step": 14919 }, { "epoch": 355.23880597014926, "grad_norm": 16.270599365234375, "learning_rate": 9.582010582010583e-06, "loss": 32.5354, "step": 14920 }, { "epoch": 355.26268656716417, "grad_norm": 27.07267189025879, "learning_rate": 9.581349206349208e-06, "loss": 33.035, "step": 14921 }, { "epoch": 355.28656716417913, "grad_norm": 18.50597381591797, "learning_rate": 9.580687830687831e-06, "loss": 32.2181, "step": 14922 }, { "epoch": 355.31044776119404, "grad_norm": 26.5690975189209, "learning_rate": 9.580026455026456e-06, "loss": 32.1257, "step": 14923 }, { "epoch": 355.33432835820895, "grad_norm": 21.633460998535156, "learning_rate": 9.57936507936508e-06, "loss": 31.5428, "step": 14924 }, { "epoch": 355.35820895522386, "grad_norm": 21.906354904174805, "learning_rate": 9.578703703703704e-06, "loss": 32.6405, "step": 14925 }, { "epoch": 355.3820895522388, "grad_norm": 20.9173641204834, "learning_rate": 9.578042328042329e-06, "loss": 32.5555, "step": 14926 }, { "epoch": 355.40597014925373, "grad_norm": 20.949565887451172, "learning_rate": 9.577380952380953e-06, "loss": 32.351, "step": 14927 }, { "epoch": 355.42985074626864, "grad_norm": 19.44186019897461, "learning_rate": 9.576719576719578e-06, "loss": 31.9537, "step": 14928 }, { "epoch": 355.4537313432836, "grad_norm": 20.792724609375, "learning_rate": 9.576058201058201e-06, "loss": 32.3724, "step": 14929 }, { "epoch": 355.4776119402985, "grad_norm": 17.375606536865234, "learning_rate": 9.575396825396826e-06, "loss": 32.4854, "step": 14930 }, { "epoch": 355.5014925373134, "grad_norm": 22.074893951416016, "learning_rate": 9.574735449735451e-06, "loss": 32.8543, "step": 14931 }, { "epoch": 355.52537313432833, "grad_norm": 15.514545440673828, "learning_rate": 9.574074074074074e-06, "loss": 32.9056, "step": 14932 }, { "epoch": 355.5492537313433, "grad_norm": 21.257991790771484, "learning_rate": 9.573412698412699e-06, "loss": 32.339, "step": 14933 }, { "epoch": 355.5731343283582, "grad_norm": 17.15261459350586, "learning_rate": 9.572751322751324e-06, "loss": 32.3944, "step": 14934 }, { "epoch": 355.5970149253731, "grad_norm": 21.776504516601562, "learning_rate": 9.572089947089947e-06, "loss": 32.9607, "step": 14935 }, { "epoch": 355.6208955223881, "grad_norm": 18.435537338256836, "learning_rate": 9.571428571428573e-06, "loss": 31.9289, "step": 14936 }, { "epoch": 355.644776119403, "grad_norm": 21.378620147705078, "learning_rate": 9.570767195767196e-06, "loss": 31.3542, "step": 14937 }, { "epoch": 355.6686567164179, "grad_norm": 20.051088333129883, "learning_rate": 9.570105820105821e-06, "loss": 33.0266, "step": 14938 }, { "epoch": 355.6925373134328, "grad_norm": 17.859416961669922, "learning_rate": 9.569444444444446e-06, "loss": 32.055, "step": 14939 }, { "epoch": 355.7164179104478, "grad_norm": 20.963573455810547, "learning_rate": 9.568783068783069e-06, "loss": 31.4541, "step": 14940 }, { "epoch": 355.7402985074627, "grad_norm": 19.61673355102539, "learning_rate": 9.568121693121694e-06, "loss": 32.1547, "step": 14941 }, { "epoch": 355.7641791044776, "grad_norm": 16.462936401367188, "learning_rate": 9.567460317460319e-06, "loss": 32.4982, "step": 14942 }, { "epoch": 355.78805970149256, "grad_norm": 17.522436141967773, "learning_rate": 9.566798941798943e-06, "loss": 32.8807, "step": 14943 }, { "epoch": 355.81194029850747, "grad_norm": 19.806222915649414, "learning_rate": 9.566137566137567e-06, "loss": 32.1656, "step": 14944 }, { "epoch": 355.8358208955224, "grad_norm": 21.00943374633789, "learning_rate": 9.565476190476191e-06, "loss": 33.3594, "step": 14945 }, { "epoch": 355.85970149253734, "grad_norm": 18.697425842285156, "learning_rate": 9.564814814814816e-06, "loss": 31.4778, "step": 14946 }, { "epoch": 355.88358208955225, "grad_norm": 16.346233367919922, "learning_rate": 9.56415343915344e-06, "loss": 32.8923, "step": 14947 }, { "epoch": 355.90746268656716, "grad_norm": 29.13170623779297, "learning_rate": 9.563492063492064e-06, "loss": 33.6557, "step": 14948 }, { "epoch": 355.93134328358207, "grad_norm": 19.528850555419922, "learning_rate": 9.562830687830689e-06, "loss": 33.0077, "step": 14949 }, { "epoch": 355.95522388059703, "grad_norm": 28.07571792602539, "learning_rate": 9.562169312169312e-06, "loss": 31.9175, "step": 14950 }, { "epoch": 355.97910447761194, "grad_norm": 21.989622116088867, "learning_rate": 9.561507936507938e-06, "loss": 33.2373, "step": 14951 }, { "epoch": 356.0, "grad_norm": 21.870967864990234, "learning_rate": 9.560846560846561e-06, "loss": 28.9643, "step": 14952 }, { "epoch": 356.0238805970149, "grad_norm": 20.74629020690918, "learning_rate": 9.560185185185186e-06, "loss": 31.936, "step": 14953 }, { "epoch": 356.0477611940299, "grad_norm": 20.529767990112305, "learning_rate": 9.559523809523811e-06, "loss": 32.4032, "step": 14954 }, { "epoch": 356.0716417910448, "grad_norm": 18.911712646484375, "learning_rate": 9.558862433862434e-06, "loss": 33.1696, "step": 14955 }, { "epoch": 356.0955223880597, "grad_norm": 22.036422729492188, "learning_rate": 9.558201058201059e-06, "loss": 32.2987, "step": 14956 }, { "epoch": 356.1194029850746, "grad_norm": 20.848241806030273, "learning_rate": 9.557539682539684e-06, "loss": 33.6154, "step": 14957 }, { "epoch": 356.14328358208957, "grad_norm": 18.03307342529297, "learning_rate": 9.556878306878309e-06, "loss": 32.6677, "step": 14958 }, { "epoch": 356.1671641791045, "grad_norm": 24.9796142578125, "learning_rate": 9.556216931216932e-06, "loss": 32.6979, "step": 14959 }, { "epoch": 356.1910447761194, "grad_norm": 18.944683074951172, "learning_rate": 9.555555555555556e-06, "loss": 32.6752, "step": 14960 }, { "epoch": 356.21492537313435, "grad_norm": 17.547496795654297, "learning_rate": 9.554894179894181e-06, "loss": 32.0206, "step": 14961 }, { "epoch": 356.23880597014926, "grad_norm": 24.731046676635742, "learning_rate": 9.554232804232804e-06, "loss": 32.2894, "step": 14962 }, { "epoch": 356.26268656716417, "grad_norm": 18.288780212402344, "learning_rate": 9.55357142857143e-06, "loss": 31.5285, "step": 14963 }, { "epoch": 356.28656716417913, "grad_norm": 20.468063354492188, "learning_rate": 9.552910052910054e-06, "loss": 31.8537, "step": 14964 }, { "epoch": 356.31044776119404, "grad_norm": 19.860734939575195, "learning_rate": 9.552248677248677e-06, "loss": 32.7907, "step": 14965 }, { "epoch": 356.33432835820895, "grad_norm": 16.64264678955078, "learning_rate": 9.551587301587304e-06, "loss": 30.7387, "step": 14966 }, { "epoch": 356.35820895522386, "grad_norm": 22.245464324951172, "learning_rate": 9.550925925925927e-06, "loss": 32.9044, "step": 14967 }, { "epoch": 356.3820895522388, "grad_norm": 16.65418815612793, "learning_rate": 9.550264550264551e-06, "loss": 31.781, "step": 14968 }, { "epoch": 356.40597014925373, "grad_norm": 24.147584915161133, "learning_rate": 9.549603174603176e-06, "loss": 32.6479, "step": 14969 }, { "epoch": 356.42985074626864, "grad_norm": 17.681825637817383, "learning_rate": 9.5489417989418e-06, "loss": 31.2675, "step": 14970 }, { "epoch": 356.4537313432836, "grad_norm": 23.947404861450195, "learning_rate": 9.548280423280424e-06, "loss": 31.1554, "step": 14971 }, { "epoch": 356.4776119402985, "grad_norm": 19.93479347229004, "learning_rate": 9.547619047619049e-06, "loss": 32.2155, "step": 14972 }, { "epoch": 356.5014925373134, "grad_norm": 21.344940185546875, "learning_rate": 9.546957671957672e-06, "loss": 31.7149, "step": 14973 }, { "epoch": 356.52537313432833, "grad_norm": 21.07832908630371, "learning_rate": 9.546296296296297e-06, "loss": 32.7319, "step": 14974 }, { "epoch": 356.5492537313433, "grad_norm": 21.95323371887207, "learning_rate": 9.545634920634922e-06, "loss": 32.2067, "step": 14975 }, { "epoch": 356.5731343283582, "grad_norm": 20.22541046142578, "learning_rate": 9.544973544973546e-06, "loss": 32.8922, "step": 14976 }, { "epoch": 356.5970149253731, "grad_norm": 22.408796310424805, "learning_rate": 9.54431216931217e-06, "loss": 33.4193, "step": 14977 }, { "epoch": 356.6208955223881, "grad_norm": 19.652568817138672, "learning_rate": 9.543650793650794e-06, "loss": 32.5599, "step": 14978 }, { "epoch": 356.644776119403, "grad_norm": 18.608518600463867, "learning_rate": 9.54298941798942e-06, "loss": 31.5812, "step": 14979 }, { "epoch": 356.6686567164179, "grad_norm": 20.28130340576172, "learning_rate": 9.542328042328042e-06, "loss": 33.1113, "step": 14980 }, { "epoch": 356.6925373134328, "grad_norm": 19.574302673339844, "learning_rate": 9.541666666666669e-06, "loss": 33.2153, "step": 14981 }, { "epoch": 356.7164179104478, "grad_norm": 18.869596481323242, "learning_rate": 9.541005291005292e-06, "loss": 32.5085, "step": 14982 }, { "epoch": 356.7402985074627, "grad_norm": 16.84361457824707, "learning_rate": 9.540343915343917e-06, "loss": 31.5301, "step": 14983 }, { "epoch": 356.7641791044776, "grad_norm": 22.837491989135742, "learning_rate": 9.539682539682541e-06, "loss": 33.0972, "step": 14984 }, { "epoch": 356.78805970149256, "grad_norm": 17.742788314819336, "learning_rate": 9.539021164021165e-06, "loss": 30.3043, "step": 14985 }, { "epoch": 356.81194029850747, "grad_norm": 21.453231811523438, "learning_rate": 9.53835978835979e-06, "loss": 32.3847, "step": 14986 }, { "epoch": 356.8358208955224, "grad_norm": 17.288801193237305, "learning_rate": 9.537698412698414e-06, "loss": 32.9337, "step": 14987 }, { "epoch": 356.85970149253734, "grad_norm": 23.859575271606445, "learning_rate": 9.537037037037037e-06, "loss": 32.251, "step": 14988 }, { "epoch": 356.88358208955225, "grad_norm": 19.979860305786133, "learning_rate": 9.536375661375662e-06, "loss": 32.9284, "step": 14989 }, { "epoch": 356.90746268656716, "grad_norm": 22.138931274414062, "learning_rate": 9.535714285714287e-06, "loss": 33.8049, "step": 14990 }, { "epoch": 356.93134328358207, "grad_norm": 20.50640296936035, "learning_rate": 9.535052910052912e-06, "loss": 32.672, "step": 14991 }, { "epoch": 356.95522388059703, "grad_norm": 16.06356430053711, "learning_rate": 9.534391534391535e-06, "loss": 32.0177, "step": 14992 }, { "epoch": 356.97910447761194, "grad_norm": 21.270681381225586, "learning_rate": 9.53373015873016e-06, "loss": 32.0279, "step": 14993 }, { "epoch": 357.0, "grad_norm": 16.916532516479492, "learning_rate": 9.533068783068784e-06, "loss": 28.727, "step": 14994 }, { "epoch": 357.0238805970149, "grad_norm": 18.77668571472168, "learning_rate": 9.532407407407407e-06, "loss": 32.0947, "step": 14995 }, { "epoch": 357.0477611940299, "grad_norm": 19.10407257080078, "learning_rate": 9.531746031746032e-06, "loss": 33.2131, "step": 14996 }, { "epoch": 357.0716417910448, "grad_norm": 16.22281837463379, "learning_rate": 9.531084656084657e-06, "loss": 32.4244, "step": 14997 }, { "epoch": 357.0955223880597, "grad_norm": 20.847332000732422, "learning_rate": 9.530423280423282e-06, "loss": 32.713, "step": 14998 }, { "epoch": 357.1194029850746, "grad_norm": 17.989212036132812, "learning_rate": 9.529761904761905e-06, "loss": 33.399, "step": 14999 }, { "epoch": 357.14328358208957, "grad_norm": 24.799148559570312, "learning_rate": 9.52910052910053e-06, "loss": 32.0786, "step": 15000 }, { "epoch": 357.1671641791045, "grad_norm": 21.575510025024414, "learning_rate": 9.528439153439155e-06, "loss": 32.4382, "step": 15001 }, { "epoch": 357.1910447761194, "grad_norm": 16.28173828125, "learning_rate": 9.527777777777778e-06, "loss": 31.9868, "step": 15002 }, { "epoch": 357.21492537313435, "grad_norm": 28.259313583374023, "learning_rate": 9.527116402116402e-06, "loss": 32.7567, "step": 15003 }, { "epoch": 357.23880597014926, "grad_norm": 18.24700927734375, "learning_rate": 9.526455026455027e-06, "loss": 31.7657, "step": 15004 }, { "epoch": 357.26268656716417, "grad_norm": 18.468481063842773, "learning_rate": 9.52579365079365e-06, "loss": 33.6397, "step": 15005 }, { "epoch": 357.28656716417913, "grad_norm": 25.735326766967773, "learning_rate": 9.525132275132277e-06, "loss": 32.1366, "step": 15006 }, { "epoch": 357.31044776119404, "grad_norm": 18.402223587036133, "learning_rate": 9.5244708994709e-06, "loss": 32.029, "step": 15007 }, { "epoch": 357.33432835820895, "grad_norm": 17.230976104736328, "learning_rate": 9.523809523809525e-06, "loss": 32.1935, "step": 15008 }, { "epoch": 357.35820895522386, "grad_norm": 23.654565811157227, "learning_rate": 9.52314814814815e-06, "loss": 31.0136, "step": 15009 }, { "epoch": 357.3820895522388, "grad_norm": 22.325735092163086, "learning_rate": 9.522486772486773e-06, "loss": 31.2313, "step": 15010 }, { "epoch": 357.40597014925373, "grad_norm": 14.84327220916748, "learning_rate": 9.521825396825397e-06, "loss": 32.0371, "step": 15011 }, { "epoch": 357.42985074626864, "grad_norm": 26.068971633911133, "learning_rate": 9.521164021164022e-06, "loss": 32.2656, "step": 15012 }, { "epoch": 357.4537313432836, "grad_norm": 22.828689575195312, "learning_rate": 9.520502645502645e-06, "loss": 32.0258, "step": 15013 }, { "epoch": 357.4776119402985, "grad_norm": 21.023258209228516, "learning_rate": 9.51984126984127e-06, "loss": 32.6881, "step": 15014 }, { "epoch": 357.5014925373134, "grad_norm": 25.693632125854492, "learning_rate": 9.519179894179895e-06, "loss": 32.1838, "step": 15015 }, { "epoch": 357.52537313432833, "grad_norm": 22.227737426757812, "learning_rate": 9.51851851851852e-06, "loss": 33.7588, "step": 15016 }, { "epoch": 357.5492537313433, "grad_norm": 23.099042892456055, "learning_rate": 9.517857142857143e-06, "loss": 32.1085, "step": 15017 }, { "epoch": 357.5731343283582, "grad_norm": 25.152877807617188, "learning_rate": 9.517195767195768e-06, "loss": 31.9446, "step": 15018 }, { "epoch": 357.5970149253731, "grad_norm": 18.60989761352539, "learning_rate": 9.516534391534392e-06, "loss": 33.194, "step": 15019 }, { "epoch": 357.6208955223881, "grad_norm": 21.53455924987793, "learning_rate": 9.515873015873016e-06, "loss": 31.9685, "step": 15020 }, { "epoch": 357.644776119403, "grad_norm": 23.571380615234375, "learning_rate": 9.515211640211642e-06, "loss": 32.6799, "step": 15021 }, { "epoch": 357.6686567164179, "grad_norm": 17.009485244750977, "learning_rate": 9.514550264550265e-06, "loss": 31.5809, "step": 15022 }, { "epoch": 357.6925373134328, "grad_norm": 20.138269424438477, "learning_rate": 9.51388888888889e-06, "loss": 32.7123, "step": 15023 }, { "epoch": 357.7164179104478, "grad_norm": 24.550018310546875, "learning_rate": 9.513227513227515e-06, "loss": 32.1209, "step": 15024 }, { "epoch": 357.7402985074627, "grad_norm": 15.26170825958252, "learning_rate": 9.512566137566138e-06, "loss": 31.6934, "step": 15025 }, { "epoch": 357.7641791044776, "grad_norm": 19.679811477661133, "learning_rate": 9.511904761904763e-06, "loss": 32.5993, "step": 15026 }, { "epoch": 357.78805970149256, "grad_norm": 24.347787857055664, "learning_rate": 9.511243386243387e-06, "loss": 32.2636, "step": 15027 }, { "epoch": 357.81194029850747, "grad_norm": 16.888120651245117, "learning_rate": 9.51058201058201e-06, "loss": 31.4244, "step": 15028 }, { "epoch": 357.8358208955224, "grad_norm": 17.593503952026367, "learning_rate": 9.509920634920635e-06, "loss": 32.2956, "step": 15029 }, { "epoch": 357.85970149253734, "grad_norm": 20.981962203979492, "learning_rate": 9.50925925925926e-06, "loss": 32.8367, "step": 15030 }, { "epoch": 357.88358208955225, "grad_norm": 19.466169357299805, "learning_rate": 9.508597883597885e-06, "loss": 32.9313, "step": 15031 }, { "epoch": 357.90746268656716, "grad_norm": NaN, "learning_rate": 9.507936507936508e-06, "loss": 44.3242, "step": 15032 }, { "epoch": 357.93134328358207, "grad_norm": 13.813207626342773, "learning_rate": 9.507936507936508e-06, "loss": 32.4276, "step": 15033 }, { "epoch": 357.95522388059703, "grad_norm": 24.915910720825195, "learning_rate": 9.507275132275133e-06, "loss": 32.183, "step": 15034 }, { "epoch": 357.97910447761194, "grad_norm": 17.859813690185547, "learning_rate": 9.506613756613758e-06, "loss": 32.1332, "step": 15035 }, { "epoch": 358.0, "grad_norm": 15.236315727233887, "learning_rate": 9.50595238095238e-06, "loss": 28.0183, "step": 15036 }, { "epoch": 358.0238805970149, "grad_norm": 24.482973098754883, "learning_rate": 9.505291005291006e-06, "loss": 31.2572, "step": 15037 }, { "epoch": 358.0477611940299, "grad_norm": 18.803373336791992, "learning_rate": 9.50462962962963e-06, "loss": 31.8213, "step": 15038 }, { "epoch": 358.0716417910448, "grad_norm": 18.289478302001953, "learning_rate": 9.503968253968255e-06, "loss": 31.7839, "step": 15039 }, { "epoch": 358.0955223880597, "grad_norm": 23.65920639038086, "learning_rate": 9.50330687830688e-06, "loss": 30.9145, "step": 15040 }, { "epoch": 358.1194029850746, "grad_norm": 18.217016220092773, "learning_rate": 9.502645502645503e-06, "loss": 31.6862, "step": 15041 }, { "epoch": 358.14328358208957, "grad_norm": 18.234375, "learning_rate": 9.501984126984128e-06, "loss": 31.6968, "step": 15042 }, { "epoch": 358.1671641791045, "grad_norm": 20.62415885925293, "learning_rate": 9.501322751322753e-06, "loss": 30.8351, "step": 15043 }, { "epoch": 358.1910447761194, "grad_norm": 15.774876594543457, "learning_rate": 9.500661375661376e-06, "loss": 31.4542, "step": 15044 }, { "epoch": 358.21492537313435, "grad_norm": 17.491928100585938, "learning_rate": 9.5e-06, "loss": 31.8844, "step": 15045 }, { "epoch": 358.23880597014926, "grad_norm": 13.995555877685547, "learning_rate": 9.499338624338625e-06, "loss": 33.1773, "step": 15046 }, { "epoch": 358.26268656716417, "grad_norm": 21.541467666625977, "learning_rate": 9.49867724867725e-06, "loss": 32.439, "step": 15047 }, { "epoch": 358.28656716417913, "grad_norm": 18.12356185913086, "learning_rate": 9.498015873015873e-06, "loss": 32.5128, "step": 15048 }, { "epoch": 358.31044776119404, "grad_norm": 19.17829132080078, "learning_rate": 9.497354497354498e-06, "loss": 32.4194, "step": 15049 }, { "epoch": 358.33432835820895, "grad_norm": 18.12895393371582, "learning_rate": 9.496693121693123e-06, "loss": 33.0875, "step": 15050 }, { "epoch": 358.35820895522386, "grad_norm": 17.93811798095703, "learning_rate": 9.496031746031746e-06, "loss": 33.0323, "step": 15051 }, { "epoch": 358.3820895522388, "grad_norm": 22.62415885925293, "learning_rate": 9.49537037037037e-06, "loss": 32.2606, "step": 15052 }, { "epoch": 358.40597014925373, "grad_norm": 20.261709213256836, "learning_rate": 9.494708994708996e-06, "loss": 30.8552, "step": 15053 }, { "epoch": 358.42985074626864, "grad_norm": 21.426820755004883, "learning_rate": 9.494047619047619e-06, "loss": 32.7622, "step": 15054 }, { "epoch": 358.4537313432836, "grad_norm": 19.726211547851562, "learning_rate": 9.493386243386245e-06, "loss": 32.8708, "step": 15055 }, { "epoch": 358.4776119402985, "grad_norm": 21.264524459838867, "learning_rate": 9.492724867724868e-06, "loss": 31.4836, "step": 15056 }, { "epoch": 358.5014925373134, "grad_norm": 21.882110595703125, "learning_rate": 9.492063492063493e-06, "loss": 32.7838, "step": 15057 }, { "epoch": 358.52537313432833, "grad_norm": 17.10589027404785, "learning_rate": 9.491402116402118e-06, "loss": 32.6003, "step": 15058 }, { "epoch": 358.5492537313433, "grad_norm": 20.396278381347656, "learning_rate": 9.490740740740741e-06, "loss": 32.3933, "step": 15059 }, { "epoch": 358.5731343283582, "grad_norm": 19.05466651916504, "learning_rate": 9.490079365079366e-06, "loss": 31.852, "step": 15060 }, { "epoch": 358.5970149253731, "grad_norm": 16.59945297241211, "learning_rate": 9.48941798941799e-06, "loss": 31.9461, "step": 15061 }, { "epoch": 358.6208955223881, "grad_norm": 19.087873458862305, "learning_rate": 9.488756613756615e-06, "loss": 31.1564, "step": 15062 }, { "epoch": 358.644776119403, "grad_norm": 19.61060905456543, "learning_rate": 9.488095238095238e-06, "loss": 32.3501, "step": 15063 }, { "epoch": 358.6686567164179, "grad_norm": 16.20462417602539, "learning_rate": 9.487433862433863e-06, "loss": 31.9814, "step": 15064 }, { "epoch": 358.6925373134328, "grad_norm": 15.829198837280273, "learning_rate": 9.486772486772488e-06, "loss": 33.2889, "step": 15065 }, { "epoch": 358.7164179104478, "grad_norm": 17.540855407714844, "learning_rate": 9.486111111111111e-06, "loss": 33.2141, "step": 15066 }, { "epoch": 358.7402985074627, "grad_norm": 15.826498031616211, "learning_rate": 9.485449735449736e-06, "loss": 33.7902, "step": 15067 }, { "epoch": 358.7641791044776, "grad_norm": 18.389429092407227, "learning_rate": 9.48478835978836e-06, "loss": 32.2526, "step": 15068 }, { "epoch": 358.78805970149256, "grad_norm": 15.800008773803711, "learning_rate": 9.484126984126984e-06, "loss": 32.2332, "step": 15069 }, { "epoch": 358.81194029850747, "grad_norm": 18.420787811279297, "learning_rate": 9.48346560846561e-06, "loss": 32.9463, "step": 15070 }, { "epoch": 358.8358208955224, "grad_norm": 20.59718132019043, "learning_rate": 9.482804232804233e-06, "loss": 33.6183, "step": 15071 }, { "epoch": 358.85970149253734, "grad_norm": 16.356536865234375, "learning_rate": 9.482142857142858e-06, "loss": 32.2114, "step": 15072 }, { "epoch": 358.88358208955225, "grad_norm": 14.856832504272461, "learning_rate": 9.481481481481483e-06, "loss": 32.3674, "step": 15073 }, { "epoch": 358.90746268656716, "grad_norm": 18.862398147583008, "learning_rate": 9.480820105820106e-06, "loss": 32.3993, "step": 15074 }, { "epoch": 358.93134328358207, "grad_norm": 19.472637176513672, "learning_rate": 9.480158730158731e-06, "loss": 32.7058, "step": 15075 }, { "epoch": 358.95522388059703, "grad_norm": 17.979694366455078, "learning_rate": 9.479497354497356e-06, "loss": 32.1515, "step": 15076 }, { "epoch": 358.97910447761194, "grad_norm": 15.228195190429688, "learning_rate": 9.478835978835979e-06, "loss": 32.6082, "step": 15077 }, { "epoch": 359.0, "grad_norm": 16.04807472229004, "learning_rate": 9.478174603174604e-06, "loss": 29.2817, "step": 15078 }, { "epoch": 359.0238805970149, "grad_norm": 17.568143844604492, "learning_rate": 9.477513227513228e-06, "loss": 32.422, "step": 15079 }, { "epoch": 359.0477611940299, "grad_norm": 18.491682052612305, "learning_rate": 9.476851851851853e-06, "loss": 32.4046, "step": 15080 }, { "epoch": 359.0716417910448, "grad_norm": 16.725357055664062, "learning_rate": 9.476190476190476e-06, "loss": 31.3566, "step": 15081 }, { "epoch": 359.0955223880597, "grad_norm": 14.86788558959961, "learning_rate": 9.475529100529101e-06, "loss": 31.243, "step": 15082 }, { "epoch": 359.1194029850746, "grad_norm": 18.561227798461914, "learning_rate": 9.474867724867726e-06, "loss": 31.8954, "step": 15083 }, { "epoch": 359.14328358208957, "grad_norm": 18.431364059448242, "learning_rate": 9.474206349206349e-06, "loss": 32.5703, "step": 15084 }, { "epoch": 359.1671641791045, "grad_norm": 19.41144371032715, "learning_rate": 9.473544973544975e-06, "loss": 32.4983, "step": 15085 }, { "epoch": 359.1910447761194, "grad_norm": 17.109878540039062, "learning_rate": 9.472883597883599e-06, "loss": 31.6643, "step": 15086 }, { "epoch": 359.21492537313435, "grad_norm": 16.63594627380371, "learning_rate": 9.472222222222223e-06, "loss": 32.945, "step": 15087 }, { "epoch": 359.23880597014926, "grad_norm": 15.511810302734375, "learning_rate": 9.471560846560848e-06, "loss": 32.0218, "step": 15088 }, { "epoch": 359.26268656716417, "grad_norm": 15.681384086608887, "learning_rate": 9.470899470899471e-06, "loss": 32.5628, "step": 15089 }, { "epoch": 359.28656716417913, "grad_norm": 15.733247756958008, "learning_rate": 9.470238095238096e-06, "loss": 32.2761, "step": 15090 }, { "epoch": 359.31044776119404, "grad_norm": 19.590049743652344, "learning_rate": 9.469576719576721e-06, "loss": 31.4616, "step": 15091 }, { "epoch": 359.33432835820895, "grad_norm": 20.36347198486328, "learning_rate": 9.468915343915344e-06, "loss": 32.6567, "step": 15092 }, { "epoch": 359.35820895522386, "grad_norm": 16.04654884338379, "learning_rate": 9.468253968253969e-06, "loss": 32.9795, "step": 15093 }, { "epoch": 359.3820895522388, "grad_norm": 15.172518730163574, "learning_rate": 9.467592592592594e-06, "loss": 31.9688, "step": 15094 }, { "epoch": 359.40597014925373, "grad_norm": 14.613859176635742, "learning_rate": 9.466931216931218e-06, "loss": 32.2976, "step": 15095 }, { "epoch": 359.42985074626864, "grad_norm": 20.026838302612305, "learning_rate": 9.466269841269841e-06, "loss": 32.5515, "step": 15096 }, { "epoch": 359.4537313432836, "grad_norm": 20.542890548706055, "learning_rate": 9.465608465608466e-06, "loss": 32.0271, "step": 15097 }, { "epoch": 359.4776119402985, "grad_norm": 16.293359756469727, "learning_rate": 9.464947089947091e-06, "loss": 32.4827, "step": 15098 }, { "epoch": 359.5014925373134, "grad_norm": 18.699499130249023, "learning_rate": 9.464285714285714e-06, "loss": 32.3706, "step": 15099 }, { "epoch": 359.52537313432833, "grad_norm": 18.629541397094727, "learning_rate": 9.463624338624339e-06, "loss": 31.9718, "step": 15100 }, { "epoch": 359.5492537313433, "grad_norm": 16.9014949798584, "learning_rate": 9.462962962962964e-06, "loss": 32.1638, "step": 15101 }, { "epoch": 359.5731343283582, "grad_norm": NaN, "learning_rate": 9.462301587301589e-06, "loss": 51.2537, "step": 15102 }, { "epoch": 359.5970149253731, "grad_norm": 20.027040481567383, "learning_rate": 9.462301587301589e-06, "loss": 32.284, "step": 15103 }, { "epoch": 359.6208955223881, "grad_norm": 20.181591033935547, "learning_rate": 9.461640211640213e-06, "loss": 31.7524, "step": 15104 }, { "epoch": 359.644776119403, "grad_norm": 16.75054359436035, "learning_rate": 9.460978835978836e-06, "loss": 31.7709, "step": 15105 }, { "epoch": 359.6686567164179, "grad_norm": 21.57439613342285, "learning_rate": 9.460317460317461e-06, "loss": 33.2595, "step": 15106 }, { "epoch": 359.6925373134328, "grad_norm": 15.870322227478027, "learning_rate": 9.459656084656086e-06, "loss": 32.8171, "step": 15107 }, { "epoch": 359.7164179104478, "grad_norm": 18.507720947265625, "learning_rate": 9.45899470899471e-06, "loss": 31.6635, "step": 15108 }, { "epoch": 359.7402985074627, "grad_norm": 19.092636108398438, "learning_rate": 9.458333333333334e-06, "loss": 32.6238, "step": 15109 }, { "epoch": 359.7641791044776, "grad_norm": 16.813720703125, "learning_rate": 9.457671957671959e-06, "loss": 33.4931, "step": 15110 }, { "epoch": 359.78805970149256, "grad_norm": 19.020538330078125, "learning_rate": 9.457010582010584e-06, "loss": 32.5465, "step": 15111 }, { "epoch": 359.81194029850747, "grad_norm": 17.173288345336914, "learning_rate": 9.456349206349207e-06, "loss": 30.842, "step": 15112 }, { "epoch": 359.8358208955224, "grad_norm": 16.530750274658203, "learning_rate": 9.455687830687831e-06, "loss": 32.7027, "step": 15113 }, { "epoch": 359.85970149253734, "grad_norm": 16.611957550048828, "learning_rate": 9.455026455026456e-06, "loss": 32.7209, "step": 15114 }, { "epoch": 359.88358208955225, "grad_norm": 20.9792423248291, "learning_rate": 9.45436507936508e-06, "loss": 32.247, "step": 15115 }, { "epoch": 359.90746268656716, "grad_norm": 17.569683074951172, "learning_rate": 9.453703703703704e-06, "loss": 32.5358, "step": 15116 }, { "epoch": 359.93134328358207, "grad_norm": 16.445842742919922, "learning_rate": 9.453042328042329e-06, "loss": 33.1839, "step": 15117 }, { "epoch": 359.95522388059703, "grad_norm": 17.492259979248047, "learning_rate": 9.452380952380952e-06, "loss": 31.6665, "step": 15118 }, { "epoch": 359.97910447761194, "grad_norm": 17.20362091064453, "learning_rate": 9.451719576719579e-06, "loss": 32.2951, "step": 15119 }, { "epoch": 360.0, "grad_norm": 15.607762336730957, "learning_rate": 9.451058201058202e-06, "loss": 27.6531, "step": 15120 }, { "epoch": 360.0, "step": 15120, "total_flos": 7.433098170764248e+17, "train_loss": 1.8099347542202662, "train_runtime": 25633.3904, "train_samples_per_second": 75.164, "train_steps_per_second": 0.59 }, { "epoch": 360.0238805970149, "grad_norm": 17.132539749145508, "learning_rate": 1e-05, "loss": 32.4364, "step": 15121 }, { "epoch": 360.0477611940299, "grad_norm": Infinity, "learning_rate": 9.99937343358396e-06, "loss": 36.1293, "step": 15122 }, { "epoch": 360.0716417910448, "grad_norm": 256.2436218261719, "learning_rate": 9.99937343358396e-06, "loss": 37.1061, "step": 15123 }, { "epoch": 360.0955223880597, "grad_norm": 126.70600128173828, "learning_rate": 9.99874686716792e-06, "loss": 35.8634, "step": 15124 }, { "epoch": 360.1194029850746, "grad_norm": 70.51016235351562, "learning_rate": 9.99812030075188e-06, "loss": 34.1869, "step": 15125 }, { "epoch": 360.14328358208957, "grad_norm": 51.900020599365234, "learning_rate": 9.99749373433584e-06, "loss": 33.8237, "step": 15126 }, { "epoch": 360.1671641791045, "grad_norm": 61.7481689453125, "learning_rate": 9.9968671679198e-06, "loss": 34.1598, "step": 15127 }, { "epoch": 360.1910447761194, "grad_norm": 47.25251007080078, "learning_rate": 9.996240601503761e-06, "loss": 32.6713, "step": 15128 }, { "epoch": 360.21492537313435, "grad_norm": 39.941184997558594, "learning_rate": 9.995614035087721e-06, "loss": 32.4269, "step": 15129 }, { "epoch": 360.23880597014926, "grad_norm": 28.25043296813965, "learning_rate": 9.99498746867168e-06, "loss": 34.0428, "step": 15130 }, { "epoch": 360.26268656716417, "grad_norm": 32.02793502807617, "learning_rate": 9.994360902255639e-06, "loss": 32.6393, "step": 15131 }, { "epoch": 360.28656716417913, "grad_norm": 30.364452362060547, "learning_rate": 9.9937343358396e-06, "loss": 32.22, "step": 15132 }, { "epoch": 360.31044776119404, "grad_norm": 24.977066040039062, "learning_rate": 9.99310776942356e-06, "loss": 32.555, "step": 15133 }, { "epoch": 360.33432835820895, "grad_norm": 20.768693923950195, "learning_rate": 9.992481203007518e-06, "loss": 31.885, "step": 15134 }, { "epoch": 360.35820895522386, "grad_norm": 21.985082626342773, "learning_rate": 9.99185463659148e-06, "loss": 32.3355, "step": 15135 }, { "epoch": 360.3820895522388, "grad_norm": 21.16714859008789, "learning_rate": 9.99122807017544e-06, "loss": 32.2855, "step": 15136 }, { "epoch": 360.40597014925373, "grad_norm": 25.63187026977539, "learning_rate": 9.9906015037594e-06, "loss": 31.8372, "step": 15137 }, { "epoch": 360.42985074626864, "grad_norm": 22.163450241088867, "learning_rate": 9.98997493734336e-06, "loss": 31.9503, "step": 15138 }, { "epoch": 360.4537313432836, "grad_norm": 22.825502395629883, "learning_rate": 9.989348370927319e-06, "loss": 31.8992, "step": 15139 }, { "epoch": 360.4776119402985, "grad_norm": 22.60196304321289, "learning_rate": 9.988721804511279e-06, "loss": 31.5811, "step": 15140 }, { "epoch": 360.5014925373134, "grad_norm": 21.542665481567383, "learning_rate": 9.988095238095239e-06, "loss": 32.0599, "step": 15141 }, { "epoch": 360.52537313432833, "grad_norm": 16.821033477783203, "learning_rate": 9.987468671679199e-06, "loss": 31.8262, "step": 15142 }, { "epoch": 360.5492537313433, "grad_norm": 17.12664222717285, "learning_rate": 9.98684210526316e-06, "loss": 32.5633, "step": 15143 }, { "epoch": 360.5731343283582, "grad_norm": 19.482683181762695, "learning_rate": 9.986215538847118e-06, "loss": 31.6738, "step": 15144 }, { "epoch": 360.5970149253731, "grad_norm": 21.525285720825195, "learning_rate": 9.985588972431078e-06, "loss": 32.8867, "step": 15145 }, { "epoch": 360.6208955223881, "grad_norm": 20.889223098754883, "learning_rate": 9.984962406015038e-06, "loss": 31.0746, "step": 15146 }, { "epoch": 360.644776119403, "grad_norm": 16.877544403076172, "learning_rate": 9.984335839599e-06, "loss": 31.6427, "step": 15147 }, { "epoch": 360.6686567164179, "grad_norm": 16.711463928222656, "learning_rate": 9.983709273182957e-06, "loss": 32.912, "step": 15148 }, { "epoch": 360.6925373134328, "grad_norm": 20.331371307373047, "learning_rate": 9.983082706766917e-06, "loss": 32.2909, "step": 15149 }, { "epoch": 360.7164179104478, "grad_norm": 24.232513427734375, "learning_rate": 9.982456140350879e-06, "loss": 33.2445, "step": 15150 }, { "epoch": 360.7402985074627, "grad_norm": 22.532785415649414, "learning_rate": 9.981829573934838e-06, "loss": 31.4148, "step": 15151 }, { "epoch": 360.7641791044776, "grad_norm": 16.70780372619629, "learning_rate": 9.981203007518798e-06, "loss": 32.9421, "step": 15152 }, { "epoch": 360.78805970149256, "grad_norm": 20.62165069580078, "learning_rate": 9.980576441102758e-06, "loss": 32.7361, "step": 15153 }, { "epoch": 360.81194029850747, "grad_norm": 22.46186637878418, "learning_rate": 9.979949874686718e-06, "loss": 31.7574, "step": 15154 }, { "epoch": 360.8358208955224, "grad_norm": 22.450260162353516, "learning_rate": 9.979323308270678e-06, "loss": 33.2606, "step": 15155 }, { "epoch": 360.85970149253734, "grad_norm": 15.548917770385742, "learning_rate": 9.978696741854637e-06, "loss": 32.1242, "step": 15156 }, { "epoch": 360.88358208955225, "grad_norm": 16.999727249145508, "learning_rate": 9.978070175438597e-06, "loss": 32.9468, "step": 15157 }, { "epoch": 360.90746268656716, "grad_norm": 16.765602111816406, "learning_rate": 9.977443609022557e-06, "loss": 32.8646, "step": 15158 }, { "epoch": 360.93134328358207, "grad_norm": 16.898029327392578, "learning_rate": 9.976817042606517e-06, "loss": 33.7647, "step": 15159 }, { "epoch": 360.95522388059703, "grad_norm": 15.133593559265137, "learning_rate": 9.976190476190477e-06, "loss": 32.4601, "step": 15160 }, { "epoch": 360.97910447761194, "grad_norm": 18.777118682861328, "learning_rate": 9.975563909774436e-06, "loss": 32.0783, "step": 15161 }, { "epoch": 361.0, "grad_norm": 17.703529357910156, "learning_rate": 9.974937343358396e-06, "loss": 27.7518, "step": 15162 }, { "epoch": 361.0238805970149, "grad_norm": 17.120885848999023, "learning_rate": 9.974310776942356e-06, "loss": 32.3899, "step": 15163 }, { "epoch": 361.0477611940299, "grad_norm": 18.410572052001953, "learning_rate": 9.973684210526316e-06, "loss": 32.0294, "step": 15164 }, { "epoch": 361.0716417910448, "grad_norm": 15.773614883422852, "learning_rate": 9.973057644110277e-06, "loss": 31.6015, "step": 15165 }, { "epoch": 361.0955223880597, "grad_norm": 25.361528396606445, "learning_rate": 9.972431077694237e-06, "loss": 32.1149, "step": 15166 }, { "epoch": 361.1194029850746, "grad_norm": 21.404897689819336, "learning_rate": 9.971804511278195e-06, "loss": 32.3963, "step": 15167 }, { "epoch": 361.14328358208957, "grad_norm": 15.300490379333496, "learning_rate": 9.971177944862157e-06, "loss": 31.4946, "step": 15168 }, { "epoch": 361.1671641791045, "grad_norm": 23.240764617919922, "learning_rate": 9.970551378446116e-06, "loss": 33.2305, "step": 15169 }, { "epoch": 361.1910447761194, "grad_norm": 23.104097366333008, "learning_rate": 9.969924812030076e-06, "loss": 31.5535, "step": 15170 }, { "epoch": 361.21492537313435, "grad_norm": 18.92438507080078, "learning_rate": 9.969298245614036e-06, "loss": 30.3473, "step": 15171 }, { "epoch": 361.23880597014926, "grad_norm": 17.293514251708984, "learning_rate": 9.968671679197996e-06, "loss": 32.2234, "step": 15172 }, { "epoch": 361.26268656716417, "grad_norm": 18.993066787719727, "learning_rate": 9.968045112781956e-06, "loss": 32.7033, "step": 15173 }, { "epoch": 361.28656716417913, "grad_norm": 19.64884376525879, "learning_rate": 9.967418546365915e-06, "loss": 32.5276, "step": 15174 }, { "epoch": 361.31044776119404, "grad_norm": 18.286048889160156, "learning_rate": 9.966791979949875e-06, "loss": 32.4066, "step": 15175 }, { "epoch": 361.33432835820895, "grad_norm": 15.611220359802246, "learning_rate": 9.966165413533837e-06, "loss": 32.4372, "step": 15176 }, { "epoch": 361.35820895522386, "grad_norm": 23.98234748840332, "learning_rate": 9.965538847117795e-06, "loss": 31.8022, "step": 15177 }, { "epoch": 361.3820895522388, "grad_norm": 20.376148223876953, "learning_rate": 9.964912280701755e-06, "loss": 32.0176, "step": 15178 }, { "epoch": 361.40597014925373, "grad_norm": 15.575143814086914, "learning_rate": 9.964285714285714e-06, "loss": 32.4731, "step": 15179 }, { "epoch": 361.42985074626864, "grad_norm": 24.95465087890625, "learning_rate": 9.963659147869676e-06, "loss": 33.1329, "step": 15180 }, { "epoch": 361.4537313432836, "grad_norm": 21.045879364013672, "learning_rate": 9.963032581453634e-06, "loss": 33.2694, "step": 15181 }, { "epoch": 361.4776119402985, "grad_norm": 14.200586318969727, "learning_rate": 9.962406015037594e-06, "loss": 32.3127, "step": 15182 }, { "epoch": 361.5014925373134, "grad_norm": 25.738948822021484, "learning_rate": 9.961779448621555e-06, "loss": 31.5539, "step": 15183 }, { "epoch": 361.52537313432833, "grad_norm": 21.601974487304688, "learning_rate": 9.961152882205515e-06, "loss": 30.7075, "step": 15184 }, { "epoch": 361.5492537313433, "grad_norm": 13.03670883178711, "learning_rate": 9.960526315789475e-06, "loss": 32.0896, "step": 15185 }, { "epoch": 361.5731343283582, "grad_norm": 23.764514923095703, "learning_rate": 9.959899749373435e-06, "loss": 32.5814, "step": 15186 }, { "epoch": 361.5970149253731, "grad_norm": 22.003652572631836, "learning_rate": 9.959273182957395e-06, "loss": 32.2197, "step": 15187 }, { "epoch": 361.6208955223881, "grad_norm": 17.986604690551758, "learning_rate": 9.958646616541354e-06, "loss": 32.1586, "step": 15188 }, { "epoch": 361.644776119403, "grad_norm": 19.210546493530273, "learning_rate": 9.958020050125314e-06, "loss": 32.8158, "step": 15189 }, { "epoch": 361.6686567164179, "grad_norm": 24.086362838745117, "learning_rate": 9.957393483709274e-06, "loss": 33.3453, "step": 15190 }, { "epoch": 361.6925373134328, "grad_norm": 19.50816535949707, "learning_rate": 9.956766917293234e-06, "loss": 31.7828, "step": 15191 }, { "epoch": 361.7164179104478, "grad_norm": 17.095325469970703, "learning_rate": 9.956140350877194e-06, "loss": 32.0506, "step": 15192 }, { "epoch": 361.7402985074627, "grad_norm": 22.07693862915039, "learning_rate": 9.955513784461153e-06, "loss": 31.8724, "step": 15193 }, { "epoch": 361.7641791044776, "grad_norm": 20.5903263092041, "learning_rate": 9.954887218045113e-06, "loss": 31.8127, "step": 15194 }, { "epoch": 361.78805970149256, "grad_norm": 16.421934127807617, "learning_rate": 9.954260651629073e-06, "loss": 31.5456, "step": 15195 }, { "epoch": 361.81194029850747, "grad_norm": 24.3125, "learning_rate": 9.953634085213033e-06, "loss": 31.9808, "step": 15196 }, { "epoch": 361.8358208955224, "grad_norm": 23.15384864807129, "learning_rate": 9.953007518796993e-06, "loss": 33.4299, "step": 15197 }, { "epoch": 361.85970149253734, "grad_norm": 18.186634063720703, "learning_rate": 9.952380952380954e-06, "loss": 34.1768, "step": 15198 }, { "epoch": 361.88358208955225, "grad_norm": 20.971479415893555, "learning_rate": 9.951754385964914e-06, "loss": 31.6073, "step": 15199 }, { "epoch": 361.90746268656716, "grad_norm": 21.818872451782227, "learning_rate": 9.951127819548872e-06, "loss": 32.7098, "step": 15200 }, { "epoch": 361.93134328358207, "grad_norm": 19.670656204223633, "learning_rate": 9.950501253132833e-06, "loss": 31.7838, "step": 15201 }, { "epoch": 361.95522388059703, "grad_norm": 17.483009338378906, "learning_rate": 9.949874686716793e-06, "loss": 32.1778, "step": 15202 }, { "epoch": 361.97910447761194, "grad_norm": 16.347700119018555, "learning_rate": 9.949248120300753e-06, "loss": 32.058, "step": 15203 }, { "epoch": 362.0, "grad_norm": 17.010881423950195, "learning_rate": 9.948621553884713e-06, "loss": 28.1426, "step": 15204 }, { "epoch": 362.0238805970149, "grad_norm": 19.743928909301758, "learning_rate": 9.947994987468673e-06, "loss": 32.6264, "step": 15205 }, { "epoch": 362.0477611940299, "grad_norm": 14.946175575256348, "learning_rate": 9.947368421052632e-06, "loss": 30.7231, "step": 15206 }, { "epoch": 362.0716417910448, "grad_norm": 17.401426315307617, "learning_rate": 9.946741854636592e-06, "loss": 32.1345, "step": 15207 }, { "epoch": 362.0955223880597, "grad_norm": 15.826093673706055, "learning_rate": 9.946115288220552e-06, "loss": 32.8926, "step": 15208 }, { "epoch": 362.1194029850746, "grad_norm": 16.590192794799805, "learning_rate": 9.945488721804512e-06, "loss": 31.0876, "step": 15209 }, { "epoch": 362.14328358208957, "grad_norm": 23.43697166442871, "learning_rate": 9.944862155388472e-06, "loss": 31.8363, "step": 15210 }, { "epoch": 362.1671641791045, "grad_norm": 18.69304084777832, "learning_rate": 9.944235588972431e-06, "loss": 32.4287, "step": 15211 }, { "epoch": 362.1910447761194, "grad_norm": 17.14446449279785, "learning_rate": 9.943609022556391e-06, "loss": 31.4971, "step": 15212 }, { "epoch": 362.21492537313435, "grad_norm": 16.17544937133789, "learning_rate": 9.942982456140353e-06, "loss": 33.4381, "step": 15213 }, { "epoch": 362.23880597014926, "grad_norm": 21.8199520111084, "learning_rate": 9.942355889724311e-06, "loss": 32.3361, "step": 15214 }, { "epoch": 362.26268656716417, "grad_norm": 23.29813003540039, "learning_rate": 9.94172932330827e-06, "loss": 32.6938, "step": 15215 }, { "epoch": 362.28656716417913, "grad_norm": 22.747772216796875, "learning_rate": 9.941102756892232e-06, "loss": 31.8064, "step": 15216 }, { "epoch": 362.31044776119404, "grad_norm": 17.181781768798828, "learning_rate": 9.940476190476192e-06, "loss": 31.7559, "step": 15217 }, { "epoch": 362.33432835820895, "grad_norm": 22.056598663330078, "learning_rate": 9.939849624060152e-06, "loss": 32.0445, "step": 15218 }, { "epoch": 362.35820895522386, "grad_norm": 20.21261978149414, "learning_rate": 9.93922305764411e-06, "loss": 33.0214, "step": 15219 }, { "epoch": 362.3820895522388, "grad_norm": 15.272744178771973, "learning_rate": 9.938596491228071e-06, "loss": 30.1479, "step": 15220 }, { "epoch": 362.40597014925373, "grad_norm": 17.363733291625977, "learning_rate": 9.937969924812031e-06, "loss": 32.0461, "step": 15221 }, { "epoch": 362.42985074626864, "grad_norm": 14.621134757995605, "learning_rate": 9.937343358395991e-06, "loss": 32.2182, "step": 15222 }, { "epoch": 362.4537313432836, "grad_norm": 19.389097213745117, "learning_rate": 9.93671679197995e-06, "loss": 32.7509, "step": 15223 }, { "epoch": 362.4776119402985, "grad_norm": 20.173542022705078, "learning_rate": 9.93609022556391e-06, "loss": 32.1246, "step": 15224 }, { "epoch": 362.5014925373134, "grad_norm": 18.420679092407227, "learning_rate": 9.93546365914787e-06, "loss": 33.4089, "step": 15225 }, { "epoch": 362.52537313432833, "grad_norm": 16.837743759155273, "learning_rate": 9.93483709273183e-06, "loss": 31.5921, "step": 15226 }, { "epoch": 362.5492537313433, "grad_norm": 15.984968185424805, "learning_rate": 9.93421052631579e-06, "loss": 32.4424, "step": 15227 }, { "epoch": 362.5731343283582, "grad_norm": 19.200674057006836, "learning_rate": 9.93358395989975e-06, "loss": 32.0312, "step": 15228 }, { "epoch": 362.5970149253731, "grad_norm": 19.186283111572266, "learning_rate": 9.93295739348371e-06, "loss": 30.7211, "step": 15229 }, { "epoch": 362.6208955223881, "grad_norm": 19.88683319091797, "learning_rate": 9.93233082706767e-06, "loss": 32.1018, "step": 15230 }, { "epoch": 362.644776119403, "grad_norm": 16.639360427856445, "learning_rate": 9.93170426065163e-06, "loss": 32.0127, "step": 15231 }, { "epoch": 362.6686567164179, "grad_norm": 24.132434844970703, "learning_rate": 9.93107769423559e-06, "loss": 32.8554, "step": 15232 }, { "epoch": 362.6925373134328, "grad_norm": 23.40017318725586, "learning_rate": 9.930451127819549e-06, "loss": 32.1667, "step": 15233 }, { "epoch": 362.7164179104478, "grad_norm": 20.206464767456055, "learning_rate": 9.929824561403509e-06, "loss": 31.7039, "step": 15234 }, { "epoch": 362.7402985074627, "grad_norm": 21.73969268798828, "learning_rate": 9.92919799498747e-06, "loss": 32.7147, "step": 15235 }, { "epoch": 362.7641791044776, "grad_norm": 18.947214126586914, "learning_rate": 9.92857142857143e-06, "loss": 33.2989, "step": 15236 }, { "epoch": 362.78805970149256, "grad_norm": 21.250171661376953, "learning_rate": 9.92794486215539e-06, "loss": 31.2918, "step": 15237 }, { "epoch": 362.81194029850747, "grad_norm": 17.559688568115234, "learning_rate": 9.92731829573935e-06, "loss": 32.6922, "step": 15238 }, { "epoch": 362.8358208955224, "grad_norm": 19.79376983642578, "learning_rate": 9.92669172932331e-06, "loss": 32.9719, "step": 15239 }, { "epoch": 362.85970149253734, "grad_norm": 21.98496437072754, "learning_rate": 9.926065162907269e-06, "loss": 32.4065, "step": 15240 }, { "epoch": 362.88358208955225, "grad_norm": 19.461105346679688, "learning_rate": 9.925438596491229e-06, "loss": 33.0088, "step": 15241 }, { "epoch": 362.90746268656716, "grad_norm": 21.3272705078125, "learning_rate": 9.924812030075189e-06, "loss": 32.0047, "step": 15242 }, { "epoch": 362.93134328358207, "grad_norm": 16.512256622314453, "learning_rate": 9.924185463659148e-06, "loss": 31.7071, "step": 15243 }, { "epoch": 362.95522388059703, "grad_norm": 24.910062789916992, "learning_rate": 9.923558897243108e-06, "loss": 32.8821, "step": 15244 }, { "epoch": 362.97910447761194, "grad_norm": 19.84198760986328, "learning_rate": 9.922932330827068e-06, "loss": 31.2735, "step": 15245 }, { "epoch": 363.0, "grad_norm": 17.119239807128906, "learning_rate": 9.92230576441103e-06, "loss": 28.0328, "step": 15246 }, { "epoch": 363.0238805970149, "grad_norm": 26.13155174255371, "learning_rate": 9.921679197994988e-06, "loss": 32.0354, "step": 15247 }, { "epoch": 363.0477611940299, "grad_norm": 22.28853416442871, "learning_rate": 9.921052631578947e-06, "loss": 32.1523, "step": 15248 }, { "epoch": 363.0716417910448, "grad_norm": 16.86628532409668, "learning_rate": 9.920426065162907e-06, "loss": 33.1073, "step": 15249 }, { "epoch": 363.0955223880597, "grad_norm": 20.905820846557617, "learning_rate": 9.919799498746869e-06, "loss": 32.6834, "step": 15250 }, { "epoch": 363.1194029850746, "grad_norm": 17.573211669921875, "learning_rate": 9.919172932330829e-06, "loss": 31.2877, "step": 15251 }, { "epoch": 363.14328358208957, "grad_norm": 16.618595123291016, "learning_rate": 9.918546365914787e-06, "loss": 31.8507, "step": 15252 }, { "epoch": 363.1671641791045, "grad_norm": 19.411659240722656, "learning_rate": 9.917919799498748e-06, "loss": 32.3002, "step": 15253 }, { "epoch": 363.1910447761194, "grad_norm": 18.91495704650879, "learning_rate": 9.917293233082708e-06, "loss": 32.5148, "step": 15254 }, { "epoch": 363.21492537313435, "grad_norm": 18.91404914855957, "learning_rate": 9.916666666666668e-06, "loss": 31.3953, "step": 15255 }, { "epoch": 363.23880597014926, "grad_norm": 19.728281021118164, "learning_rate": 9.916040100250628e-06, "loss": 32.4247, "step": 15256 }, { "epoch": 363.26268656716417, "grad_norm": 22.37010955810547, "learning_rate": 9.915413533834587e-06, "loss": 34.0721, "step": 15257 }, { "epoch": 363.28656716417913, "grad_norm": NaN, "learning_rate": 9.914786967418547e-06, "loss": 55.5244, "step": 15258 }, { "epoch": 363.31044776119404, "grad_norm": 19.158201217651367, "learning_rate": 9.914786967418547e-06, "loss": 32.7449, "step": 15259 }, { "epoch": 363.33432835820895, "grad_norm": 17.627317428588867, "learning_rate": 9.914160401002507e-06, "loss": 31.4861, "step": 15260 }, { "epoch": 363.35820895522386, "grad_norm": 16.3303279876709, "learning_rate": 9.913533834586467e-06, "loss": 32.5336, "step": 15261 }, { "epoch": 363.3820895522388, "grad_norm": 18.387176513671875, "learning_rate": 9.912907268170427e-06, "loss": 31.5398, "step": 15262 }, { "epoch": 363.40597014925373, "grad_norm": 20.99032211303711, "learning_rate": 9.912280701754386e-06, "loss": 31.3534, "step": 15263 }, { "epoch": 363.42985074626864, "grad_norm": 22.79098129272461, "learning_rate": 9.911654135338346e-06, "loss": 32.3939, "step": 15264 }, { "epoch": 363.4537313432836, "grad_norm": 17.615060806274414, "learning_rate": 9.911027568922308e-06, "loss": 32.314, "step": 15265 }, { "epoch": 363.4776119402985, "grad_norm": 16.081541061401367, "learning_rate": 9.910401002506267e-06, "loss": 32.4229, "step": 15266 }, { "epoch": 363.5014925373134, "grad_norm": 23.015844345092773, "learning_rate": 9.909774436090226e-06, "loss": 31.3226, "step": 15267 }, { "epoch": 363.52537313432833, "grad_norm": 25.138582229614258, "learning_rate": 9.909147869674185e-06, "loss": 31.9168, "step": 15268 }, { "epoch": 363.5492537313433, "grad_norm": 16.33774757385254, "learning_rate": 9.908521303258147e-06, "loss": 32.3593, "step": 15269 }, { "epoch": 363.5731343283582, "grad_norm": 18.47342300415039, "learning_rate": 9.907894736842107e-06, "loss": 31.7626, "step": 15270 }, { "epoch": 363.5970149253731, "grad_norm": 24.26117706298828, "learning_rate": 9.907268170426066e-06, "loss": 32.4874, "step": 15271 }, { "epoch": 363.6208955223881, "grad_norm": 19.61480712890625, "learning_rate": 9.906641604010026e-06, "loss": 31.4948, "step": 15272 }, { "epoch": 363.644776119403, "grad_norm": 19.58074951171875, "learning_rate": 9.906015037593986e-06, "loss": 31.8857, "step": 15273 }, { "epoch": 363.6686567164179, "grad_norm": 19.42598533630371, "learning_rate": 9.905388471177946e-06, "loss": 32.2685, "step": 15274 }, { "epoch": 363.6925373134328, "grad_norm": 21.81300163269043, "learning_rate": 9.904761904761906e-06, "loss": 32.3431, "step": 15275 }, { "epoch": 363.7164179104478, "grad_norm": 21.299245834350586, "learning_rate": 9.904135338345865e-06, "loss": 32.1986, "step": 15276 }, { "epoch": 363.7402985074627, "grad_norm": 16.842878341674805, "learning_rate": 9.903508771929825e-06, "loss": 33.189, "step": 15277 }, { "epoch": 363.7641791044776, "grad_norm": 22.124414443969727, "learning_rate": 9.902882205513785e-06, "loss": 32.4759, "step": 15278 }, { "epoch": 363.78805970149256, "grad_norm": 20.511117935180664, "learning_rate": 9.902255639097745e-06, "loss": 31.369, "step": 15279 }, { "epoch": 363.81194029850747, "grad_norm": 18.832853317260742, "learning_rate": 9.901629072681706e-06, "loss": 31.8353, "step": 15280 }, { "epoch": 363.8358208955224, "grad_norm": 20.221786499023438, "learning_rate": 9.901002506265664e-06, "loss": 30.8457, "step": 15281 }, { "epoch": 363.85970149253734, "grad_norm": 25.181447982788086, "learning_rate": 9.900375939849624e-06, "loss": 32.4595, "step": 15282 }, { "epoch": 363.88358208955225, "grad_norm": 16.329669952392578, "learning_rate": 9.899749373433584e-06, "loss": 31.1605, "step": 15283 }, { "epoch": 363.90746268656716, "grad_norm": 17.075014114379883, "learning_rate": 9.899122807017545e-06, "loss": 32.9915, "step": 15284 }, { "epoch": 363.93134328358207, "grad_norm": 23.282970428466797, "learning_rate": 9.898496240601505e-06, "loss": 31.0115, "step": 15285 }, { "epoch": 363.95522388059703, "grad_norm": 17.927663803100586, "learning_rate": 9.897869674185463e-06, "loss": 33.3532, "step": 15286 }, { "epoch": 363.97910447761194, "grad_norm": 17.54277992248535, "learning_rate": 9.897243107769425e-06, "loss": 31.9632, "step": 15287 }, { "epoch": 364.0, "grad_norm": 15.090412139892578, "learning_rate": 9.896616541353385e-06, "loss": 28.6456, "step": 15288 }, { "epoch": 364.0238805970149, "grad_norm": 17.34769630432129, "learning_rate": 9.895989974937344e-06, "loss": 31.6095, "step": 15289 }, { "epoch": 364.0477611940299, "grad_norm": 23.21035385131836, "learning_rate": 9.895363408521304e-06, "loss": 31.5968, "step": 15290 }, { "epoch": 364.0716417910448, "grad_norm": 21.537927627563477, "learning_rate": 9.894736842105264e-06, "loss": 32.173, "step": 15291 }, { "epoch": 364.0955223880597, "grad_norm": 18.95825958251953, "learning_rate": 9.894110275689224e-06, "loss": 32.0189, "step": 15292 }, { "epoch": 364.1194029850746, "grad_norm": 15.338730812072754, "learning_rate": 9.893483709273184e-06, "loss": 32.2987, "step": 15293 }, { "epoch": 364.14328358208957, "grad_norm": 21.401004791259766, "learning_rate": 9.892857142857143e-06, "loss": 30.9803, "step": 15294 }, { "epoch": 364.1671641791045, "grad_norm": 17.64253044128418, "learning_rate": 9.892230576441103e-06, "loss": 33.7867, "step": 15295 }, { "epoch": 364.1910447761194, "grad_norm": 21.7203311920166, "learning_rate": 9.891604010025063e-06, "loss": 31.395, "step": 15296 }, { "epoch": 364.21492537313435, "grad_norm": 22.429622650146484, "learning_rate": 9.890977443609023e-06, "loss": 31.6195, "step": 15297 }, { "epoch": 364.23880597014926, "grad_norm": 19.789377212524414, "learning_rate": 9.890350877192983e-06, "loss": 32.3123, "step": 15298 }, { "epoch": 364.26268656716417, "grad_norm": 16.696474075317383, "learning_rate": 9.889724310776944e-06, "loss": 31.7702, "step": 15299 }, { "epoch": 364.28656716417913, "grad_norm": 22.65117073059082, "learning_rate": 9.889097744360902e-06, "loss": 32.9917, "step": 15300 }, { "epoch": 364.31044776119404, "grad_norm": 18.027467727661133, "learning_rate": 9.888471177944862e-06, "loss": 31.2865, "step": 15301 }, { "epoch": 364.33432835820895, "grad_norm": 22.309864044189453, "learning_rate": 9.887844611528824e-06, "loss": 33.268, "step": 15302 }, { "epoch": 364.35820895522386, "grad_norm": 17.887165069580078, "learning_rate": 9.887218045112783e-06, "loss": 32.3819, "step": 15303 }, { "epoch": 364.3820895522388, "grad_norm": 20.570512771606445, "learning_rate": 9.886591478696743e-06, "loss": 32.2796, "step": 15304 }, { "epoch": 364.40597014925373, "grad_norm": 16.284791946411133, "learning_rate": 9.885964912280703e-06, "loss": 30.3214, "step": 15305 }, { "epoch": 364.42985074626864, "grad_norm": 20.710899353027344, "learning_rate": 9.885338345864663e-06, "loss": 32.5604, "step": 15306 }, { "epoch": 364.4537313432836, "grad_norm": 15.39677906036377, "learning_rate": 9.884711779448623e-06, "loss": 32.2054, "step": 15307 }, { "epoch": 364.4776119402985, "grad_norm": 17.5885009765625, "learning_rate": 9.884085213032582e-06, "loss": 32.0225, "step": 15308 }, { "epoch": 364.5014925373134, "grad_norm": 17.18393898010254, "learning_rate": 9.883458646616542e-06, "loss": 32.6099, "step": 15309 }, { "epoch": 364.52537313432833, "grad_norm": 22.8990421295166, "learning_rate": 9.882832080200502e-06, "loss": 31.5182, "step": 15310 }, { "epoch": 364.5492537313433, "grad_norm": 18.452173233032227, "learning_rate": 9.882205513784462e-06, "loss": 33.1553, "step": 15311 }, { "epoch": 364.5731343283582, "grad_norm": 18.07838249206543, "learning_rate": 9.881578947368422e-06, "loss": 31.8111, "step": 15312 }, { "epoch": 364.5970149253731, "grad_norm": 20.9888916015625, "learning_rate": 9.880952380952381e-06, "loss": 31.1067, "step": 15313 }, { "epoch": 364.6208955223881, "grad_norm": 20.998638153076172, "learning_rate": 9.880325814536341e-06, "loss": 32.2242, "step": 15314 }, { "epoch": 364.644776119403, "grad_norm": 18.86063003540039, "learning_rate": 9.879699248120301e-06, "loss": 31.5959, "step": 15315 }, { "epoch": 364.6686567164179, "grad_norm": 14.274968147277832, "learning_rate": 9.87907268170426e-06, "loss": 31.1165, "step": 15316 }, { "epoch": 364.6925373134328, "grad_norm": 15.340092658996582, "learning_rate": 9.878446115288222e-06, "loss": 31.7937, "step": 15317 }, { "epoch": 364.7164179104478, "grad_norm": 24.58694076538086, "learning_rate": 9.877819548872182e-06, "loss": 32.4217, "step": 15318 }, { "epoch": 364.7402985074627, "grad_norm": 20.13288688659668, "learning_rate": 9.87719298245614e-06, "loss": 32.5419, "step": 15319 }, { "epoch": 364.7641791044776, "grad_norm": 12.716923713684082, "learning_rate": 9.876566416040102e-06, "loss": 32.7927, "step": 15320 }, { "epoch": 364.78805970149256, "grad_norm": 18.81435775756836, "learning_rate": 9.875939849624061e-06, "loss": 31.619, "step": 15321 }, { "epoch": 364.81194029850747, "grad_norm": 25.388444900512695, "learning_rate": 9.875313283208021e-06, "loss": 33.0537, "step": 15322 }, { "epoch": 364.8358208955224, "grad_norm": 17.26911163330078, "learning_rate": 9.87468671679198e-06, "loss": 31.6359, "step": 15323 }, { "epoch": 364.85970149253734, "grad_norm": 15.614147186279297, "learning_rate": 9.874060150375941e-06, "loss": 32.3819, "step": 15324 }, { "epoch": 364.88358208955225, "grad_norm": 18.926908493041992, "learning_rate": 9.8734335839599e-06, "loss": 32.628, "step": 15325 }, { "epoch": 364.90746268656716, "grad_norm": 16.784101486206055, "learning_rate": 9.87280701754386e-06, "loss": 34.1808, "step": 15326 }, { "epoch": 364.93134328358207, "grad_norm": 21.025915145874023, "learning_rate": 9.87218045112782e-06, "loss": 31.5357, "step": 15327 }, { "epoch": 364.95522388059703, "grad_norm": 15.31556224822998, "learning_rate": 9.87155388471178e-06, "loss": 32.2547, "step": 15328 }, { "epoch": 364.97910447761194, "grad_norm": 24.622791290283203, "learning_rate": 9.87092731829574e-06, "loss": 31.9887, "step": 15329 }, { "epoch": 365.0, "grad_norm": 14.526107788085938, "learning_rate": 9.8703007518797e-06, "loss": 27.4657, "step": 15330 }, { "epoch": 365.0238805970149, "grad_norm": 19.568317413330078, "learning_rate": 9.86967418546366e-06, "loss": 31.4216, "step": 15331 }, { "epoch": 365.0477611940299, "grad_norm": 19.773418426513672, "learning_rate": 9.869047619047621e-06, "loss": 32.037, "step": 15332 }, { "epoch": 365.0716417910448, "grad_norm": 19.401296615600586, "learning_rate": 9.868421052631579e-06, "loss": 31.4882, "step": 15333 }, { "epoch": 365.0955223880597, "grad_norm": 22.989543914794922, "learning_rate": 9.867794486215539e-06, "loss": 30.8285, "step": 15334 }, { "epoch": 365.1194029850746, "grad_norm": 18.464834213256836, "learning_rate": 9.8671679197995e-06, "loss": 32.0074, "step": 15335 }, { "epoch": 365.14328358208957, "grad_norm": 19.494367599487305, "learning_rate": 9.86654135338346e-06, "loss": 32.3485, "step": 15336 }, { "epoch": 365.1671641791045, "grad_norm": 24.23535919189453, "learning_rate": 9.86591478696742e-06, "loss": 31.6978, "step": 15337 }, { "epoch": 365.1910447761194, "grad_norm": 19.43990135192871, "learning_rate": 9.86528822055138e-06, "loss": 31.9308, "step": 15338 }, { "epoch": 365.21492537313435, "grad_norm": 18.06524085998535, "learning_rate": 9.86466165413534e-06, "loss": 31.2191, "step": 15339 }, { "epoch": 365.23880597014926, "grad_norm": 17.638917922973633, "learning_rate": 9.8640350877193e-06, "loss": 32.2167, "step": 15340 }, { "epoch": 365.26268656716417, "grad_norm": 17.34427833557129, "learning_rate": 9.86340852130326e-06, "loss": 32.0774, "step": 15341 }, { "epoch": 365.28656716417913, "grad_norm": 22.937944412231445, "learning_rate": 9.862781954887219e-06, "loss": 31.2344, "step": 15342 }, { "epoch": 365.31044776119404, "grad_norm": 18.434293746948242, "learning_rate": 9.862155388471179e-06, "loss": 32.0202, "step": 15343 }, { "epoch": 365.33432835820895, "grad_norm": 17.503267288208008, "learning_rate": 9.861528822055139e-06, "loss": 31.5617, "step": 15344 }, { "epoch": 365.35820895522386, "grad_norm": 16.172712326049805, "learning_rate": 9.860902255639098e-06, "loss": 32.8991, "step": 15345 }, { "epoch": 365.3820895522388, "grad_norm": 20.303129196166992, "learning_rate": 9.860275689223058e-06, "loss": 32.6047, "step": 15346 }, { "epoch": 365.40597014925373, "grad_norm": 22.519086837768555, "learning_rate": 9.859649122807018e-06, "loss": 32.259, "step": 15347 }, { "epoch": 365.42985074626864, "grad_norm": 19.33053207397461, "learning_rate": 9.859022556390978e-06, "loss": 32.6486, "step": 15348 }, { "epoch": 365.4537313432836, "grad_norm": 18.114622116088867, "learning_rate": 9.858395989974938e-06, "loss": 32.7031, "step": 15349 }, { "epoch": 365.4776119402985, "grad_norm": 18.428302764892578, "learning_rate": 9.857769423558899e-06, "loss": 32.2902, "step": 15350 }, { "epoch": 365.5014925373134, "grad_norm": 14.867958068847656, "learning_rate": 9.857142857142859e-06, "loss": 33.123, "step": 15351 }, { "epoch": 365.52537313432833, "grad_norm": 20.644237518310547, "learning_rate": 9.856516290726817e-06, "loss": 32.3267, "step": 15352 }, { "epoch": 365.5492537313433, "grad_norm": 18.06245994567871, "learning_rate": 9.855889724310778e-06, "loss": 31.8889, "step": 15353 }, { "epoch": 365.5731343283582, "grad_norm": 17.88252830505371, "learning_rate": 9.855263157894738e-06, "loss": 31.7894, "step": 15354 }, { "epoch": 365.5970149253731, "grad_norm": NaN, "learning_rate": 9.854636591478698e-06, "loss": 27.8248, "step": 15355 }, { "epoch": 365.6208955223881, "grad_norm": 21.058269500732422, "learning_rate": 9.854636591478698e-06, "loss": 32.0298, "step": 15356 }, { "epoch": 365.644776119403, "grad_norm": 21.552438735961914, "learning_rate": 9.854010025062656e-06, "loss": 33.0316, "step": 15357 }, { "epoch": 365.6686567164179, "grad_norm": 16.302946090698242, "learning_rate": 9.853383458646618e-06, "loss": 31.7935, "step": 15358 }, { "epoch": 365.6925373134328, "grad_norm": 18.238101959228516, "learning_rate": 9.852756892230577e-06, "loss": 31.5756, "step": 15359 }, { "epoch": 365.7164179104478, "grad_norm": 17.57839012145996, "learning_rate": 9.852130325814537e-06, "loss": 32.1403, "step": 15360 }, { "epoch": 365.7402985074627, "grad_norm": 21.027997970581055, "learning_rate": 9.851503759398497e-06, "loss": 33.4299, "step": 15361 }, { "epoch": 365.7641791044776, "grad_norm": 23.699108123779297, "learning_rate": 9.850877192982457e-06, "loss": 32.4048, "step": 15362 }, { "epoch": 365.78805970149256, "grad_norm": 16.43022346496582, "learning_rate": 9.850250626566417e-06, "loss": 32.8855, "step": 15363 }, { "epoch": 365.81194029850747, "grad_norm": 21.189767837524414, "learning_rate": 9.849624060150376e-06, "loss": 30.8562, "step": 15364 }, { "epoch": 365.8358208955224, "grad_norm": 25.296646118164062, "learning_rate": 9.848997493734336e-06, "loss": 32.8415, "step": 15365 }, { "epoch": 365.85970149253734, "grad_norm": 23.162118911743164, "learning_rate": 9.848370927318298e-06, "loss": 31.0586, "step": 15366 }, { "epoch": 365.88358208955225, "grad_norm": 15.77981185913086, "learning_rate": 9.847744360902256e-06, "loss": 32.1768, "step": 15367 }, { "epoch": 365.90746268656716, "grad_norm": 25.820587158203125, "learning_rate": 9.847117794486216e-06, "loss": 31.7466, "step": 15368 }, { "epoch": 365.93134328358207, "grad_norm": 22.924257278442383, "learning_rate": 9.846491228070177e-06, "loss": 32.3555, "step": 15369 }, { "epoch": 365.95522388059703, "grad_norm": 20.160552978515625, "learning_rate": 9.845864661654137e-06, "loss": 31.8706, "step": 15370 }, { "epoch": 365.97910447761194, "grad_norm": 19.70369529724121, "learning_rate": 9.845238095238097e-06, "loss": 31.9827, "step": 15371 }, { "epoch": 366.0, "grad_norm": 19.337146759033203, "learning_rate": 9.844611528822055e-06, "loss": 28.0124, "step": 15372 }, { "epoch": 366.0238805970149, "grad_norm": 16.080902099609375, "learning_rate": 9.843984962406016e-06, "loss": 32.6589, "step": 15373 }, { "epoch": 366.0477611940299, "grad_norm": 20.87893295288086, "learning_rate": 9.843358395989976e-06, "loss": 32.5517, "step": 15374 }, { "epoch": 366.0716417910448, "grad_norm": 18.223037719726562, "learning_rate": 9.842731829573936e-06, "loss": 31.3611, "step": 15375 }, { "epoch": 366.0955223880597, "grad_norm": 19.568954467773438, "learning_rate": 9.842105263157896e-06, "loss": 32.4866, "step": 15376 }, { "epoch": 366.1194029850746, "grad_norm": 17.641977310180664, "learning_rate": 9.841478696741856e-06, "loss": 32.9956, "step": 15377 }, { "epoch": 366.14328358208957, "grad_norm": 18.96194076538086, "learning_rate": 9.840852130325815e-06, "loss": 32.7475, "step": 15378 }, { "epoch": 366.1671641791045, "grad_norm": 19.562108993530273, "learning_rate": 9.840225563909775e-06, "loss": 31.9512, "step": 15379 }, { "epoch": 366.1910447761194, "grad_norm": 20.464187622070312, "learning_rate": 9.839598997493735e-06, "loss": 32.5754, "step": 15380 }, { "epoch": 366.21492537313435, "grad_norm": 20.65941047668457, "learning_rate": 9.838972431077695e-06, "loss": 31.1942, "step": 15381 }, { "epoch": 366.23880597014926, "grad_norm": 18.312759399414062, "learning_rate": 9.838345864661655e-06, "loss": 31.6319, "step": 15382 }, { "epoch": 366.26268656716417, "grad_norm": 21.73177146911621, "learning_rate": 9.837719298245614e-06, "loss": 33.0737, "step": 15383 }, { "epoch": 366.28656716417913, "grad_norm": 19.632993698120117, "learning_rate": 9.837092731829576e-06, "loss": 31.7084, "step": 15384 }, { "epoch": 366.31044776119404, "grad_norm": 17.026018142700195, "learning_rate": 9.836466165413536e-06, "loss": 32.1507, "step": 15385 }, { "epoch": 366.33432835820895, "grad_norm": 27.969884872436523, "learning_rate": 9.835839598997494e-06, "loss": 31.6364, "step": 15386 }, { "epoch": 366.35820895522386, "grad_norm": 25.337657928466797, "learning_rate": 9.835213032581454e-06, "loss": 32.8782, "step": 15387 }, { "epoch": 366.3820895522388, "grad_norm": 18.363353729248047, "learning_rate": 9.834586466165415e-06, "loss": 30.9665, "step": 15388 }, { "epoch": 366.40597014925373, "grad_norm": 25.76776123046875, "learning_rate": 9.833959899749375e-06, "loss": 32.6728, "step": 15389 }, { "epoch": 366.42985074626864, "grad_norm": 23.761690139770508, "learning_rate": 9.833333333333333e-06, "loss": 31.9841, "step": 15390 }, { "epoch": 366.4537313432836, "grad_norm": 17.141319274902344, "learning_rate": 9.832706766917294e-06, "loss": 32.5077, "step": 15391 }, { "epoch": 366.4776119402985, "grad_norm": 25.57672691345215, "learning_rate": 9.832080200501254e-06, "loss": 31.7279, "step": 15392 }, { "epoch": 366.5014925373134, "grad_norm": 22.10552406311035, "learning_rate": 9.831453634085214e-06, "loss": 32.2644, "step": 15393 }, { "epoch": 366.52537313432833, "grad_norm": 16.392301559448242, "learning_rate": 9.830827067669174e-06, "loss": 31.6418, "step": 15394 }, { "epoch": 366.5492537313433, "grad_norm": 22.449726104736328, "learning_rate": 9.830200501253134e-06, "loss": 32.3267, "step": 15395 }, { "epoch": 366.5731343283582, "grad_norm": 22.13176727294922, "learning_rate": 9.829573934837093e-06, "loss": 32.8179, "step": 15396 }, { "epoch": 366.5970149253731, "grad_norm": 18.91019058227539, "learning_rate": 9.828947368421053e-06, "loss": 31.3633, "step": 15397 }, { "epoch": 366.6208955223881, "grad_norm": 15.951644897460938, "learning_rate": 9.828320802005013e-06, "loss": 31.5082, "step": 15398 }, { "epoch": 366.644776119403, "grad_norm": 28.609750747680664, "learning_rate": 9.827694235588975e-06, "loss": 32.2829, "step": 15399 }, { "epoch": 366.6686567164179, "grad_norm": 17.81928062438965, "learning_rate": 9.827067669172933e-06, "loss": 32.403, "step": 15400 }, { "epoch": 366.6925373134328, "grad_norm": 22.5119571685791, "learning_rate": 9.826441102756892e-06, "loss": 32.0073, "step": 15401 }, { "epoch": 366.7164179104478, "grad_norm": 26.724044799804688, "learning_rate": 9.825814536340852e-06, "loss": 31.6893, "step": 15402 }, { "epoch": 366.7402985074627, "grad_norm": 18.280824661254883, "learning_rate": 9.825187969924814e-06, "loss": 32.9523, "step": 15403 }, { "epoch": 366.7641791044776, "grad_norm": 29.23700523376465, "learning_rate": 9.824561403508772e-06, "loss": 32.6632, "step": 15404 }, { "epoch": 366.78805970149256, "grad_norm": 22.60460090637207, "learning_rate": 9.823934837092732e-06, "loss": 31.6254, "step": 15405 }, { "epoch": 366.81194029850747, "grad_norm": 24.471580505371094, "learning_rate": 9.823308270676693e-06, "loss": 32.4686, "step": 15406 }, { "epoch": 366.8358208955224, "grad_norm": 23.822912216186523, "learning_rate": 9.822681704260653e-06, "loss": 30.3543, "step": 15407 }, { "epoch": 366.85970149253734, "grad_norm": 20.79631805419922, "learning_rate": 9.822055137844613e-06, "loss": 30.3284, "step": 15408 }, { "epoch": 366.88358208955225, "grad_norm": 15.284931182861328, "learning_rate": 9.821428571428573e-06, "loss": 32.3226, "step": 15409 }, { "epoch": 366.90746268656716, "grad_norm": 17.51460838317871, "learning_rate": 9.820802005012532e-06, "loss": 32.3922, "step": 15410 }, { "epoch": 366.93134328358207, "grad_norm": 20.007862091064453, "learning_rate": 9.820175438596492e-06, "loss": 31.3975, "step": 15411 }, { "epoch": 366.95522388059703, "grad_norm": 19.774919509887695, "learning_rate": 9.819548872180452e-06, "loss": 31.7776, "step": 15412 }, { "epoch": 366.97910447761194, "grad_norm": 21.901992797851562, "learning_rate": 9.818922305764412e-06, "loss": 32.1234, "step": 15413 }, { "epoch": 367.0, "grad_norm": 16.642581939697266, "learning_rate": 9.818295739348372e-06, "loss": 26.9963, "step": 15414 }, { "epoch": 367.0238805970149, "grad_norm": 17.27568817138672, "learning_rate": 9.817669172932331e-06, "loss": 32.547, "step": 15415 }, { "epoch": 367.0477611940299, "grad_norm": 15.555951118469238, "learning_rate": 9.817042606516291e-06, "loss": 31.6967, "step": 15416 }, { "epoch": 367.0716417910448, "grad_norm": 15.493464469909668, "learning_rate": 9.816416040100251e-06, "loss": 31.5815, "step": 15417 }, { "epoch": 367.0955223880597, "grad_norm": 17.905778884887695, "learning_rate": 9.815789473684212e-06, "loss": 31.9997, "step": 15418 }, { "epoch": 367.1194029850746, "grad_norm": 18.846046447753906, "learning_rate": 9.81516290726817e-06, "loss": 31.3564, "step": 15419 }, { "epoch": 367.14328358208957, "grad_norm": 16.605934143066406, "learning_rate": 9.81453634085213e-06, "loss": 31.3784, "step": 15420 }, { "epoch": 367.1671641791045, "grad_norm": 17.084476470947266, "learning_rate": 9.813909774436092e-06, "loss": 31.1145, "step": 15421 }, { "epoch": 367.1910447761194, "grad_norm": 17.43899154663086, "learning_rate": 9.813283208020052e-06, "loss": 32.7294, "step": 15422 }, { "epoch": 367.21492537313435, "grad_norm": 24.862119674682617, "learning_rate": 9.81265664160401e-06, "loss": 30.8308, "step": 15423 }, { "epoch": 367.23880597014926, "grad_norm": 17.063512802124023, "learning_rate": 9.812030075187971e-06, "loss": 31.9816, "step": 15424 }, { "epoch": 367.26268656716417, "grad_norm": 17.12827491760254, "learning_rate": 9.811403508771931e-06, "loss": 32.8559, "step": 15425 }, { "epoch": 367.28656716417913, "grad_norm": 23.62639808654785, "learning_rate": 9.81077694235589e-06, "loss": 31.6583, "step": 15426 }, { "epoch": 367.31044776119404, "grad_norm": 22.921585083007812, "learning_rate": 9.81015037593985e-06, "loss": 31.7304, "step": 15427 }, { "epoch": 367.33432835820895, "grad_norm": 17.045209884643555, "learning_rate": 9.80952380952381e-06, "loss": 32.3467, "step": 15428 }, { "epoch": 367.35820895522386, "grad_norm": 29.46330451965332, "learning_rate": 9.80889724310777e-06, "loss": 31.5553, "step": 15429 }, { "epoch": 367.3820895522388, "grad_norm": 21.51797866821289, "learning_rate": 9.80827067669173e-06, "loss": 31.6182, "step": 15430 }, { "epoch": 367.40597014925373, "grad_norm": 18.44357681274414, "learning_rate": 9.80764411027569e-06, "loss": 31.3722, "step": 15431 }, { "epoch": 367.42985074626864, "grad_norm": 30.695262908935547, "learning_rate": 9.80701754385965e-06, "loss": 32.5535, "step": 15432 }, { "epoch": 367.4537313432836, "grad_norm": 17.565475463867188, "learning_rate": 9.80639097744361e-06, "loss": 32.4373, "step": 15433 }, { "epoch": 367.4776119402985, "grad_norm": 25.367204666137695, "learning_rate": 9.80576441102757e-06, "loss": 30.8352, "step": 15434 }, { "epoch": 367.5014925373134, "grad_norm": 23.94452667236328, "learning_rate": 9.805137844611529e-06, "loss": 32.6506, "step": 15435 }, { "epoch": 367.52537313432833, "grad_norm": 18.084810256958008, "learning_rate": 9.80451127819549e-06, "loss": 32.1855, "step": 15436 }, { "epoch": 367.5492537313433, "grad_norm": 31.32501792907715, "learning_rate": 9.803884711779449e-06, "loss": 32.3171, "step": 15437 }, { "epoch": 367.5731343283582, "grad_norm": 20.915185928344727, "learning_rate": 9.803258145363408e-06, "loss": 31.6824, "step": 15438 }, { "epoch": 367.5970149253731, "grad_norm": 21.990827560424805, "learning_rate": 9.80263157894737e-06, "loss": 32.0792, "step": 15439 }, { "epoch": 367.6208955223881, "grad_norm": 30.322816848754883, "learning_rate": 9.80200501253133e-06, "loss": 31.6604, "step": 15440 }, { "epoch": 367.644776119403, "grad_norm": 19.280437469482422, "learning_rate": 9.80137844611529e-06, "loss": 31.5484, "step": 15441 }, { "epoch": 367.6686567164179, "grad_norm": 30.336301803588867, "learning_rate": 9.80075187969925e-06, "loss": 31.835, "step": 15442 }, { "epoch": 367.6925373134328, "grad_norm": 25.579570770263672, "learning_rate": 9.800125313283209e-06, "loss": 31.8172, "step": 15443 }, { "epoch": 367.7164179104478, "grad_norm": 24.215070724487305, "learning_rate": 9.799498746867169e-06, "loss": 32.6645, "step": 15444 }, { "epoch": 367.7402985074627, "grad_norm": 32.025482177734375, "learning_rate": 9.798872180451129e-06, "loss": 32.8574, "step": 15445 }, { "epoch": 367.7641791044776, "grad_norm": 21.791353225708008, "learning_rate": 9.798245614035088e-06, "loss": 32.7522, "step": 15446 }, { "epoch": 367.78805970149256, "grad_norm": 41.8706169128418, "learning_rate": 9.797619047619048e-06, "loss": 32.2187, "step": 15447 }, { "epoch": 367.81194029850747, "grad_norm": 29.62680435180664, "learning_rate": 9.796992481203008e-06, "loss": 31.8115, "step": 15448 }, { "epoch": 367.8358208955224, "grad_norm": 44.39241409301758, "learning_rate": 9.796365914786968e-06, "loss": 32.425, "step": 15449 }, { "epoch": 367.85970149253734, "grad_norm": 40.109642028808594, "learning_rate": 9.795739348370928e-06, "loss": 32.9681, "step": 15450 }, { "epoch": 367.88358208955225, "grad_norm": 37.884368896484375, "learning_rate": 9.79511278195489e-06, "loss": 32.4673, "step": 15451 }, { "epoch": 367.90746268656716, "grad_norm": 36.89011001586914, "learning_rate": 9.794486215538847e-06, "loss": 32.4061, "step": 15452 }, { "epoch": 367.93134328358207, "grad_norm": 32.9327278137207, "learning_rate": 9.793859649122807e-06, "loss": 32.0519, "step": 15453 }, { "epoch": 367.95522388059703, "grad_norm": 26.91158676147461, "learning_rate": 9.793233082706769e-06, "loss": 32.754, "step": 15454 }, { "epoch": 367.97910447761194, "grad_norm": 42.379486083984375, "learning_rate": 9.792606516290728e-06, "loss": 31.3521, "step": 15455 }, { "epoch": 368.0, "grad_norm": 30.096933364868164, "learning_rate": 9.791979949874686e-06, "loss": 28.0914, "step": 15456 }, { "epoch": 368.0238805970149, "grad_norm": 36.74195861816406, "learning_rate": 9.791353383458648e-06, "loss": 30.9952, "step": 15457 }, { "epoch": 368.0477611940299, "grad_norm": 35.80557632446289, "learning_rate": 9.790726817042608e-06, "loss": 32.0456, "step": 15458 }, { "epoch": 368.0716417910448, "grad_norm": 30.618431091308594, "learning_rate": 9.790100250626568e-06, "loss": 32.3686, "step": 15459 }, { "epoch": 368.0955223880597, "grad_norm": 29.104036331176758, "learning_rate": 9.789473684210527e-06, "loss": 31.9376, "step": 15460 }, { "epoch": 368.1194029850746, "grad_norm": 34.558807373046875, "learning_rate": 9.788847117794487e-06, "loss": 30.0659, "step": 15461 }, { "epoch": 368.14328358208957, "grad_norm": 28.875043869018555, "learning_rate": 9.788220551378447e-06, "loss": 30.8547, "step": 15462 }, { "epoch": 368.1671641791045, "grad_norm": 38.92327117919922, "learning_rate": 9.787593984962407e-06, "loss": 31.9357, "step": 15463 }, { "epoch": 368.1910447761194, "grad_norm": 34.628334045410156, "learning_rate": 9.786967418546367e-06, "loss": 31.4637, "step": 15464 }, { "epoch": 368.21492537313435, "grad_norm": 36.315650939941406, "learning_rate": 9.786340852130326e-06, "loss": 31.6243, "step": 15465 }, { "epoch": 368.23880597014926, "grad_norm": 35.161319732666016, "learning_rate": 9.785714285714286e-06, "loss": 32.0192, "step": 15466 }, { "epoch": 368.26268656716417, "grad_norm": 31.11237335205078, "learning_rate": 9.785087719298246e-06, "loss": 32.99, "step": 15467 }, { "epoch": 368.28656716417913, "grad_norm": 29.740867614746094, "learning_rate": 9.784461152882206e-06, "loss": 31.46, "step": 15468 }, { "epoch": 368.31044776119404, "grad_norm": 35.33416748046875, "learning_rate": 9.783834586466167e-06, "loss": 32.2135, "step": 15469 }, { "epoch": 368.33432835820895, "grad_norm": 32.053707122802734, "learning_rate": 9.783208020050125e-06, "loss": 32.5681, "step": 15470 }, { "epoch": 368.35820895522386, "grad_norm": 32.8370361328125, "learning_rate": 9.782581453634085e-06, "loss": 32.3504, "step": 15471 }, { "epoch": 368.3820895522388, "grad_norm": 29.674190521240234, "learning_rate": 9.781954887218047e-06, "loss": 32.2585, "step": 15472 }, { "epoch": 368.40597014925373, "grad_norm": 36.9009895324707, "learning_rate": 9.781328320802006e-06, "loss": 32.2817, "step": 15473 }, { "epoch": 368.42985074626864, "grad_norm": 30.275714874267578, "learning_rate": 9.780701754385966e-06, "loss": 32.0449, "step": 15474 }, { "epoch": 368.4537313432836, "grad_norm": 34.580535888671875, "learning_rate": 9.780075187969924e-06, "loss": 32.2336, "step": 15475 }, { "epoch": 368.4776119402985, "grad_norm": 31.394521713256836, "learning_rate": 9.779448621553886e-06, "loss": 33.726, "step": 15476 }, { "epoch": 368.5014925373134, "grad_norm": 36.203285217285156, "learning_rate": 9.778822055137846e-06, "loss": 31.1039, "step": 15477 }, { "epoch": 368.52537313432833, "grad_norm": 29.48996353149414, "learning_rate": 9.778195488721805e-06, "loss": 31.3228, "step": 15478 }, { "epoch": 368.5492537313433, "grad_norm": 37.20621109008789, "learning_rate": 9.777568922305765e-06, "loss": 32.4839, "step": 15479 }, { "epoch": 368.5731343283582, "grad_norm": 31.56957244873047, "learning_rate": 9.776942355889725e-06, "loss": 31.7537, "step": 15480 }, { "epoch": 368.5970149253731, "grad_norm": 33.576663970947266, "learning_rate": 9.776315789473685e-06, "loss": 32.0297, "step": 15481 }, { "epoch": 368.6208955223881, "grad_norm": 30.444557189941406, "learning_rate": 9.775689223057645e-06, "loss": 31.9534, "step": 15482 }, { "epoch": 368.644776119403, "grad_norm": 34.19621658325195, "learning_rate": 9.775062656641604e-06, "loss": 31.6328, "step": 15483 }, { "epoch": 368.6686567164179, "grad_norm": 33.4950065612793, "learning_rate": 9.774436090225564e-06, "loss": 32.7578, "step": 15484 }, { "epoch": 368.6925373134328, "grad_norm": 31.00712776184082, "learning_rate": 9.773809523809524e-06, "loss": 32.7168, "step": 15485 }, { "epoch": 368.7164179104478, "grad_norm": 30.438560485839844, "learning_rate": 9.773182957393484e-06, "loss": 32.4116, "step": 15486 }, { "epoch": 368.7402985074627, "grad_norm": 35.17342758178711, "learning_rate": 9.772556390977445e-06, "loss": 32.0912, "step": 15487 }, { "epoch": 368.7641791044776, "grad_norm": 28.074779510498047, "learning_rate": 9.771929824561405e-06, "loss": 32.1994, "step": 15488 }, { "epoch": 368.78805970149256, "grad_norm": 35.372432708740234, "learning_rate": 9.771303258145363e-06, "loss": 31.5893, "step": 15489 }, { "epoch": 368.81194029850747, "grad_norm": 30.71903419494629, "learning_rate": 9.770676691729323e-06, "loss": 31.6006, "step": 15490 }, { "epoch": 368.8358208955224, "grad_norm": 33.70318603515625, "learning_rate": 9.770050125313285e-06, "loss": 31.2515, "step": 15491 }, { "epoch": 368.85970149253734, "grad_norm": 30.457311630249023, "learning_rate": 9.769423558897244e-06, "loss": 31.5946, "step": 15492 }, { "epoch": 368.88358208955225, "grad_norm": 34.40906524658203, "learning_rate": 9.768796992481204e-06, "loss": 32.6181, "step": 15493 }, { "epoch": 368.90746268656716, "grad_norm": 31.876022338867188, "learning_rate": 9.768170426065164e-06, "loss": 31.9773, "step": 15494 }, { "epoch": 368.93134328358207, "grad_norm": 34.63753128051758, "learning_rate": 9.767543859649124e-06, "loss": 32.5075, "step": 15495 }, { "epoch": 368.95522388059703, "grad_norm": 28.70196533203125, "learning_rate": 9.766917293233084e-06, "loss": 32.4539, "step": 15496 }, { "epoch": 368.97910447761194, "grad_norm": 33.3589973449707, "learning_rate": 9.766290726817043e-06, "loss": 30.9086, "step": 15497 }, { "epoch": 369.0, "grad_norm": 25.43604850769043, "learning_rate": 9.765664160401003e-06, "loss": 28.0329, "step": 15498 }, { "epoch": 369.0238805970149, "grad_norm": 35.03908920288086, "learning_rate": 9.765037593984963e-06, "loss": 33.3028, "step": 15499 }, { "epoch": 369.0477611940299, "grad_norm": 31.213130950927734, "learning_rate": 9.764411027568923e-06, "loss": 32.0712, "step": 15500 }, { "epoch": 369.0716417910448, "grad_norm": 34.93855285644531, "learning_rate": 9.763784461152883e-06, "loss": 33.2838, "step": 15501 }, { "epoch": 369.0955223880597, "grad_norm": 30.033199310302734, "learning_rate": 9.763157894736844e-06, "loss": 31.3056, "step": 15502 }, { "epoch": 369.1194029850746, "grad_norm": 31.440475463867188, "learning_rate": 9.762531328320802e-06, "loss": 32.4002, "step": 15503 }, { "epoch": 369.14328358208957, "grad_norm": 24.40007781982422, "learning_rate": 9.761904761904762e-06, "loss": 31.126, "step": 15504 }, { "epoch": 369.1671641791045, "grad_norm": 35.60890579223633, "learning_rate": 9.761278195488722e-06, "loss": 31.9125, "step": 15505 }, { "epoch": 369.1910447761194, "grad_norm": 30.02541160583496, "learning_rate": 9.760651629072683e-06, "loss": 31.2605, "step": 15506 }, { "epoch": 369.21492537313435, "grad_norm": 32.43452453613281, "learning_rate": 9.760025062656643e-06, "loss": 31.4892, "step": 15507 }, { "epoch": 369.23880597014926, "grad_norm": 31.58152198791504, "learning_rate": 9.759398496240601e-06, "loss": 31.267, "step": 15508 }, { "epoch": 369.26268656716417, "grad_norm": 31.37113380432129, "learning_rate": 9.758771929824563e-06, "loss": 32.1276, "step": 15509 }, { "epoch": 369.28656716417913, "grad_norm": 26.584365844726562, "learning_rate": 9.758145363408522e-06, "loss": 32.9294, "step": 15510 }, { "epoch": 369.31044776119404, "grad_norm": 30.561748504638672, "learning_rate": 9.757518796992482e-06, "loss": 32.3237, "step": 15511 }, { "epoch": 369.33432835820895, "grad_norm": 25.796714782714844, "learning_rate": 9.756892230576442e-06, "loss": 33.5997, "step": 15512 }, { "epoch": 369.35820895522386, "grad_norm": 35.041847229003906, "learning_rate": 9.756265664160402e-06, "loss": 31.7871, "step": 15513 }, { "epoch": 369.3820895522388, "grad_norm": 29.74791717529297, "learning_rate": 9.755639097744362e-06, "loss": 32.6398, "step": 15514 }, { "epoch": 369.40597014925373, "grad_norm": 34.51224899291992, "learning_rate": 9.755012531328321e-06, "loss": 31.3808, "step": 15515 }, { "epoch": 369.42985074626864, "grad_norm": 30.90576171875, "learning_rate": 9.754385964912281e-06, "loss": 32.1477, "step": 15516 }, { "epoch": 369.4537313432836, "grad_norm": 32.87046813964844, "learning_rate": 9.753759398496241e-06, "loss": 30.7815, "step": 15517 }, { "epoch": 369.4776119402985, "grad_norm": 31.34151268005371, "learning_rate": 9.7531328320802e-06, "loss": 32.4918, "step": 15518 }, { "epoch": 369.5014925373134, "grad_norm": 33.1551513671875, "learning_rate": 9.75250626566416e-06, "loss": 31.5781, "step": 15519 }, { "epoch": 369.52537313432833, "grad_norm": 30.585973739624023, "learning_rate": 9.751879699248122e-06, "loss": 31.218, "step": 15520 }, { "epoch": 369.5492537313433, "grad_norm": NaN, "learning_rate": 9.751253132832082e-06, "loss": 45.2538, "step": 15521 }, { "epoch": 369.5731343283582, "grad_norm": 30.716432571411133, "learning_rate": 9.751253132832082e-06, "loss": 31.3152, "step": 15522 }, { "epoch": 369.5970149253731, "grad_norm": 26.349685668945312, "learning_rate": 9.75062656641604e-06, "loss": 32.5287, "step": 15523 }, { "epoch": 369.6208955223881, "grad_norm": 35.84343338012695, "learning_rate": 9.75e-06, "loss": 33.6801, "step": 15524 }, { "epoch": 369.644776119403, "grad_norm": 29.796785354614258, "learning_rate": 9.749373433583961e-06, "loss": 31.9983, "step": 15525 }, { "epoch": 369.6686567164179, "grad_norm": 36.817138671875, "learning_rate": 9.748746867167921e-06, "loss": 31.5356, "step": 15526 }, { "epoch": 369.6925373134328, "grad_norm": 33.89388656616211, "learning_rate": 9.748120300751881e-06, "loss": 30.51, "step": 15527 }, { "epoch": 369.7164179104478, "grad_norm": 29.24090576171875, "learning_rate": 9.74749373433584e-06, "loss": 31.9835, "step": 15528 }, { "epoch": 369.7402985074627, "grad_norm": 30.145151138305664, "learning_rate": 9.7468671679198e-06, "loss": 31.6624, "step": 15529 }, { "epoch": 369.7641791044776, "grad_norm": 30.085168838500977, "learning_rate": 9.74624060150376e-06, "loss": 31.5058, "step": 15530 }, { "epoch": 369.78805970149256, "grad_norm": 25.119464874267578, "learning_rate": 9.74561403508772e-06, "loss": 31.6317, "step": 15531 }, { "epoch": 369.81194029850747, "grad_norm": 38.49943923950195, "learning_rate": 9.74498746867168e-06, "loss": 31.6026, "step": 15532 }, { "epoch": 369.8358208955224, "grad_norm": 30.145709991455078, "learning_rate": 9.74436090225564e-06, "loss": 30.7972, "step": 15533 }, { "epoch": 369.85970149253734, "grad_norm": 32.580440521240234, "learning_rate": 9.7437343358396e-06, "loss": 32.9526, "step": 15534 }, { "epoch": 369.88358208955225, "grad_norm": 32.929630279541016, "learning_rate": 9.74310776942356e-06, "loss": 31.8029, "step": 15535 }, { "epoch": 369.90746268656716, "grad_norm": 32.2010612487793, "learning_rate": 9.74248120300752e-06, "loss": 31.5956, "step": 15536 }, { "epoch": 369.93134328358207, "grad_norm": 26.953540802001953, "learning_rate": 9.741854636591479e-06, "loss": 32.5965, "step": 15537 }, { "epoch": 369.95522388059703, "grad_norm": 35.538597106933594, "learning_rate": 9.741228070175439e-06, "loss": 31.7904, "step": 15538 }, { "epoch": 369.97910447761194, "grad_norm": 30.791345596313477, "learning_rate": 9.740601503759399e-06, "loss": 31.886, "step": 15539 }, { "epoch": 370.0, "grad_norm": 26.10861587524414, "learning_rate": 9.73997493734336e-06, "loss": 26.5258, "step": 15540 }, { "epoch": 370.0238805970149, "grad_norm": 25.21748924255371, "learning_rate": 9.73934837092732e-06, "loss": 32.4107, "step": 15541 }, { "epoch": 370.0477611940299, "grad_norm": 33.55727767944336, "learning_rate": 9.738721804511278e-06, "loss": 31.2604, "step": 15542 }, { "epoch": 370.0716417910448, "grad_norm": 27.492733001708984, "learning_rate": 9.73809523809524e-06, "loss": 31.3192, "step": 15543 }, { "epoch": 370.0955223880597, "grad_norm": 35.26737976074219, "learning_rate": 9.7374686716792e-06, "loss": 32.7499, "step": 15544 }, { "epoch": 370.1194029850746, "grad_norm": 29.664501190185547, "learning_rate": 9.736842105263159e-06, "loss": 30.9097, "step": 15545 }, { "epoch": 370.14328358208957, "grad_norm": 29.96634292602539, "learning_rate": 9.736215538847119e-06, "loss": 32.626, "step": 15546 }, { "epoch": 370.1671641791045, "grad_norm": 26.555383682250977, "learning_rate": 9.735588972431079e-06, "loss": 31.5584, "step": 15547 }, { "epoch": 370.1910447761194, "grad_norm": 31.332529067993164, "learning_rate": 9.734962406015038e-06, "loss": 32.3188, "step": 15548 }, { "epoch": 370.21492537313435, "grad_norm": 24.305049896240234, "learning_rate": 9.734335839598998e-06, "loss": 31.6605, "step": 15549 }, { "epoch": 370.23880597014926, "grad_norm": 30.789113998413086, "learning_rate": 9.733709273182958e-06, "loss": 30.8001, "step": 15550 }, { "epoch": 370.26268656716417, "grad_norm": 25.05321502685547, "learning_rate": 9.733082706766918e-06, "loss": 32.8161, "step": 15551 }, { "epoch": 370.28656716417913, "grad_norm": 33.553836822509766, "learning_rate": 9.732456140350878e-06, "loss": 32.5293, "step": 15552 }, { "epoch": 370.31044776119404, "grad_norm": 30.329069137573242, "learning_rate": 9.731829573934837e-06, "loss": 32.8475, "step": 15553 }, { "epoch": 370.33432835820895, "grad_norm": 32.331119537353516, "learning_rate": 9.731203007518797e-06, "loss": 31.3911, "step": 15554 }, { "epoch": 370.35820895522386, "grad_norm": 26.98367691040039, "learning_rate": 9.730576441102759e-06, "loss": 31.6221, "step": 15555 }, { "epoch": 370.3820895522388, "grad_norm": 30.56206703186035, "learning_rate": 9.729949874686717e-06, "loss": 31.9551, "step": 15556 }, { "epoch": 370.40597014925373, "grad_norm": 27.096773147583008, "learning_rate": 9.729323308270677e-06, "loss": 31.6267, "step": 15557 }, { "epoch": 370.42985074626864, "grad_norm": 30.748807907104492, "learning_rate": 9.728696741854638e-06, "loss": 31.1861, "step": 15558 }, { "epoch": 370.4537313432836, "grad_norm": 27.02344512939453, "learning_rate": 9.728070175438598e-06, "loss": 31.6813, "step": 15559 }, { "epoch": 370.4776119402985, "grad_norm": 33.22273254394531, "learning_rate": 9.727443609022558e-06, "loss": 32.8792, "step": 15560 }, { "epoch": 370.5014925373134, "grad_norm": 27.815967559814453, "learning_rate": 9.726817042606517e-06, "loss": 31.3247, "step": 15561 }, { "epoch": 370.52537313432833, "grad_norm": 28.645957946777344, "learning_rate": 9.726190476190477e-06, "loss": 31.9764, "step": 15562 }, { "epoch": 370.5492537313433, "grad_norm": 22.48628807067871, "learning_rate": 9.725563909774437e-06, "loss": 30.3754, "step": 15563 }, { "epoch": 370.5731343283582, "grad_norm": 26.176103591918945, "learning_rate": 9.724937343358397e-06, "loss": 32.7292, "step": 15564 }, { "epoch": 370.5970149253731, "grad_norm": 22.917192459106445, "learning_rate": 9.724310776942357e-06, "loss": 31.6366, "step": 15565 }, { "epoch": 370.6208955223881, "grad_norm": 30.325681686401367, "learning_rate": 9.723684210526316e-06, "loss": 31.0988, "step": 15566 }, { "epoch": 370.644776119403, "grad_norm": 25.483360290527344, "learning_rate": 9.723057644110276e-06, "loss": 32.2486, "step": 15567 }, { "epoch": 370.6686567164179, "grad_norm": 29.788089752197266, "learning_rate": 9.722431077694236e-06, "loss": 32.6861, "step": 15568 }, { "epoch": 370.6925373134328, "grad_norm": 27.898733139038086, "learning_rate": 9.721804511278196e-06, "loss": 32.5099, "step": 15569 }, { "epoch": 370.7164179104478, "grad_norm": 23.3122615814209, "learning_rate": 9.721177944862156e-06, "loss": 30.6325, "step": 15570 }, { "epoch": 370.7402985074627, "grad_norm": 25.64780616760254, "learning_rate": 9.720551378446115e-06, "loss": 32.0149, "step": 15571 }, { "epoch": 370.7641791044776, "grad_norm": 24.018203735351562, "learning_rate": 9.719924812030075e-06, "loss": 31.7357, "step": 15572 }, { "epoch": 370.78805970149256, "grad_norm": 21.4426212310791, "learning_rate": 9.719298245614037e-06, "loss": 32.3928, "step": 15573 }, { "epoch": 370.81194029850747, "grad_norm": 22.026609420776367, "learning_rate": 9.718671679197997e-06, "loss": 31.4606, "step": 15574 }, { "epoch": 370.8358208955224, "grad_norm": 19.70415687561035, "learning_rate": 9.718045112781955e-06, "loss": 31.9559, "step": 15575 }, { "epoch": 370.85970149253734, "grad_norm": 19.977237701416016, "learning_rate": 9.717418546365916e-06, "loss": 32.4238, "step": 15576 }, { "epoch": 370.88358208955225, "grad_norm": 20.55643653869629, "learning_rate": 9.716791979949876e-06, "loss": 31.3513, "step": 15577 }, { "epoch": 370.90746268656716, "grad_norm": 18.112289428710938, "learning_rate": 9.716165413533836e-06, "loss": 33.0936, "step": 15578 }, { "epoch": 370.93134328358207, "grad_norm": 19.38970184326172, "learning_rate": 9.715538847117796e-06, "loss": 31.9432, "step": 15579 }, { "epoch": 370.95522388059703, "grad_norm": 19.477205276489258, "learning_rate": 9.714912280701755e-06, "loss": 32.2867, "step": 15580 }, { "epoch": 370.97910447761194, "grad_norm": 18.255844116210938, "learning_rate": 9.714285714285715e-06, "loss": 32.1032, "step": 15581 }, { "epoch": 371.0, "grad_norm": 17.171831130981445, "learning_rate": 9.713659147869675e-06, "loss": 28.3181, "step": 15582 }, { "epoch": 371.0238805970149, "grad_norm": 20.054550170898438, "learning_rate": 9.713032581453635e-06, "loss": 31.2504, "step": 15583 }, { "epoch": 371.0477611940299, "grad_norm": 16.24350357055664, "learning_rate": 9.712406015037595e-06, "loss": 30.9002, "step": 15584 }, { "epoch": 371.0716417910448, "grad_norm": 22.360483169555664, "learning_rate": 9.711779448621554e-06, "loss": 31.9409, "step": 15585 }, { "epoch": 371.0955223880597, "grad_norm": 15.936921119689941, "learning_rate": 9.711152882205514e-06, "loss": 32.561, "step": 15586 }, { "epoch": 371.1194029850746, "grad_norm": 24.830108642578125, "learning_rate": 9.710526315789474e-06, "loss": 32.1191, "step": 15587 }, { "epoch": 371.14328358208957, "grad_norm": 19.574199676513672, "learning_rate": 9.709899749373435e-06, "loss": 32.28, "step": 15588 }, { "epoch": 371.1671641791045, "grad_norm": 23.13572120666504, "learning_rate": 9.709273182957394e-06, "loss": 31.4571, "step": 15589 }, { "epoch": 371.1910447761194, "grad_norm": 18.860767364501953, "learning_rate": 9.708646616541353e-06, "loss": 32.0729, "step": 15590 }, { "epoch": 371.21492537313435, "grad_norm": 23.69676399230957, "learning_rate": 9.708020050125315e-06, "loss": 33.1218, "step": 15591 }, { "epoch": 371.23880597014926, "grad_norm": 23.62067222595215, "learning_rate": 9.707393483709275e-06, "loss": 31.9943, "step": 15592 }, { "epoch": 371.26268656716417, "grad_norm": 17.43523406982422, "learning_rate": 9.706766917293234e-06, "loss": 31.6582, "step": 15593 }, { "epoch": 371.28656716417913, "grad_norm": 23.645328521728516, "learning_rate": 9.706140350877193e-06, "loss": 32.9624, "step": 15594 }, { "epoch": 371.31044776119404, "grad_norm": 21.034313201904297, "learning_rate": 9.705513784461154e-06, "loss": 32.4597, "step": 15595 }, { "epoch": 371.33432835820895, "grad_norm": 21.051218032836914, "learning_rate": 9.704887218045114e-06, "loss": 31.1774, "step": 15596 }, { "epoch": 371.35820895522386, "grad_norm": 19.982168197631836, "learning_rate": 9.704260651629074e-06, "loss": 31.3354, "step": 15597 }, { "epoch": 371.3820895522388, "grad_norm": 18.409671783447266, "learning_rate": 9.703634085213033e-06, "loss": 31.4788, "step": 15598 }, { "epoch": 371.40597014925373, "grad_norm": 22.119007110595703, "learning_rate": 9.703007518796993e-06, "loss": 33.4, "step": 15599 }, { "epoch": 371.42985074626864, "grad_norm": 18.121231079101562, "learning_rate": 9.702380952380953e-06, "loss": 32.2274, "step": 15600 }, { "epoch": 371.4537313432836, "grad_norm": 20.418014526367188, "learning_rate": 9.701754385964913e-06, "loss": 31.1862, "step": 15601 }, { "epoch": 371.4776119402985, "grad_norm": 20.588905334472656, "learning_rate": 9.701127819548873e-06, "loss": 32.4219, "step": 15602 }, { "epoch": 371.5014925373134, "grad_norm": 19.312116622924805, "learning_rate": 9.700501253132832e-06, "loss": 32.1805, "step": 15603 }, { "epoch": 371.52537313432833, "grad_norm": 30.35874366760254, "learning_rate": 9.699874686716792e-06, "loss": 31.7602, "step": 15604 }, { "epoch": 371.5492537313433, "grad_norm": 20.58683204650879, "learning_rate": 9.699248120300752e-06, "loss": 32.6349, "step": 15605 }, { "epoch": 371.5731343283582, "grad_norm": 27.020854949951172, "learning_rate": 9.698621553884714e-06, "loss": 31.0244, "step": 15606 }, { "epoch": 371.5970149253731, "grad_norm": 19.93199348449707, "learning_rate": 9.697994987468673e-06, "loss": 31.4117, "step": 15607 }, { "epoch": 371.6208955223881, "grad_norm": 26.27665901184082, "learning_rate": 9.697368421052631e-06, "loss": 32.1331, "step": 15608 }, { "epoch": 371.644776119403, "grad_norm": 23.66201400756836, "learning_rate": 9.696741854636593e-06, "loss": 32.7146, "step": 15609 }, { "epoch": 371.6686567164179, "grad_norm": 22.81130027770996, "learning_rate": 9.696115288220553e-06, "loss": 31.8624, "step": 15610 }, { "epoch": 371.6925373134328, "grad_norm": 19.37236976623535, "learning_rate": 9.695488721804513e-06, "loss": 32.7146, "step": 15611 }, { "epoch": 371.7164179104478, "grad_norm": 25.079851150512695, "learning_rate": 9.69486215538847e-06, "loss": 31.806, "step": 15612 }, { "epoch": 371.7402985074627, "grad_norm": 21.69959831237793, "learning_rate": 9.694235588972432e-06, "loss": 31.6021, "step": 15613 }, { "epoch": 371.7641791044776, "grad_norm": 20.25904083251953, "learning_rate": 9.693609022556392e-06, "loss": 30.7198, "step": 15614 }, { "epoch": 371.78805970149256, "grad_norm": 20.798961639404297, "learning_rate": 9.692982456140352e-06, "loss": 32.8119, "step": 15615 }, { "epoch": 371.81194029850747, "grad_norm": 19.60314178466797, "learning_rate": 9.692355889724312e-06, "loss": 31.6579, "step": 15616 }, { "epoch": 371.8358208955224, "grad_norm": 18.63255500793457, "learning_rate": 9.691729323308271e-06, "loss": 31.6115, "step": 15617 }, { "epoch": 371.85970149253734, "grad_norm": 22.781856536865234, "learning_rate": 9.691102756892231e-06, "loss": 31.7108, "step": 15618 }, { "epoch": 371.88358208955225, "grad_norm": 18.63038444519043, "learning_rate": 9.690476190476191e-06, "loss": 30.5746, "step": 15619 }, { "epoch": 371.90746268656716, "grad_norm": 18.407224655151367, "learning_rate": 9.68984962406015e-06, "loss": 32.8008, "step": 15620 }, { "epoch": 371.93134328358207, "grad_norm": 15.794146537780762, "learning_rate": 9.689223057644112e-06, "loss": 31.8143, "step": 15621 }, { "epoch": 371.95522388059703, "grad_norm": 15.639073371887207, "learning_rate": 9.68859649122807e-06, "loss": 31.0757, "step": 15622 }, { "epoch": 371.97910447761194, "grad_norm": 18.33678436279297, "learning_rate": 9.68796992481203e-06, "loss": 32.9441, "step": 15623 }, { "epoch": 372.0, "grad_norm": 14.055055618286133, "learning_rate": 9.687343358395992e-06, "loss": 27.1119, "step": 15624 }, { "epoch": 372.0238805970149, "grad_norm": 20.265544891357422, "learning_rate": 9.686716791979951e-06, "loss": 32.5663, "step": 15625 }, { "epoch": 372.0477611940299, "grad_norm": 16.922901153564453, "learning_rate": 9.686090225563911e-06, "loss": 32.1715, "step": 15626 }, { "epoch": 372.0716417910448, "grad_norm": 17.308961868286133, "learning_rate": 9.68546365914787e-06, "loss": 31.0583, "step": 15627 }, { "epoch": 372.0955223880597, "grad_norm": 17.07660675048828, "learning_rate": 9.68483709273183e-06, "loss": 32.5213, "step": 15628 }, { "epoch": 372.1194029850746, "grad_norm": 19.201412200927734, "learning_rate": 9.68421052631579e-06, "loss": 32.5643, "step": 15629 }, { "epoch": 372.14328358208957, "grad_norm": 17.836727142333984, "learning_rate": 9.68358395989975e-06, "loss": 31.0902, "step": 15630 }, { "epoch": 372.1671641791045, "grad_norm": 18.333969116210938, "learning_rate": 9.68295739348371e-06, "loss": 31.5131, "step": 15631 }, { "epoch": 372.1910447761194, "grad_norm": 19.234643936157227, "learning_rate": 9.68233082706767e-06, "loss": 31.4236, "step": 15632 }, { "epoch": 372.21492537313435, "grad_norm": 19.713449478149414, "learning_rate": 9.68170426065163e-06, "loss": 31.9958, "step": 15633 }, { "epoch": 372.23880597014926, "grad_norm": 16.030332565307617, "learning_rate": 9.68107769423559e-06, "loss": 31.6064, "step": 15634 }, { "epoch": 372.26268656716417, "grad_norm": 19.08253288269043, "learning_rate": 9.68045112781955e-06, "loss": 32.1442, "step": 15635 }, { "epoch": 372.28656716417913, "grad_norm": 17.089033126831055, "learning_rate": 9.67982456140351e-06, "loss": 31.3209, "step": 15636 }, { "epoch": 372.31044776119404, "grad_norm": 20.829944610595703, "learning_rate": 9.679197994987469e-06, "loss": 31.4231, "step": 15637 }, { "epoch": 372.33432835820895, "grad_norm": 20.799938201904297, "learning_rate": 9.678571428571429e-06, "loss": 31.0764, "step": 15638 }, { "epoch": 372.35820895522386, "grad_norm": 16.779449462890625, "learning_rate": 9.67794486215539e-06, "loss": 31.2131, "step": 15639 }, { "epoch": 372.3820895522388, "grad_norm": 20.484376907348633, "learning_rate": 9.67731829573935e-06, "loss": 32.8235, "step": 15640 }, { "epoch": 372.40597014925373, "grad_norm": 17.515758514404297, "learning_rate": 9.676691729323308e-06, "loss": 31.6492, "step": 15641 }, { "epoch": 372.42985074626864, "grad_norm": 22.78826332092285, "learning_rate": 9.676065162907268e-06, "loss": 31.8365, "step": 15642 }, { "epoch": 372.4537313432836, "grad_norm": 19.350875854492188, "learning_rate": 9.67543859649123e-06, "loss": 31.5171, "step": 15643 }, { "epoch": 372.4776119402985, "grad_norm": 23.918296813964844, "learning_rate": 9.67481203007519e-06, "loss": 31.1798, "step": 15644 }, { "epoch": 372.5014925373134, "grad_norm": 18.345638275146484, "learning_rate": 9.674185463659147e-06, "loss": 33.647, "step": 15645 }, { "epoch": 372.52537313432833, "grad_norm": 17.2088565826416, "learning_rate": 9.673558897243109e-06, "loss": 30.8432, "step": 15646 }, { "epoch": 372.5492537313433, "grad_norm": 17.569700241088867, "learning_rate": 9.672932330827069e-06, "loss": 30.4097, "step": 15647 }, { "epoch": 372.5731343283582, "grad_norm": 19.014482498168945, "learning_rate": 9.672305764411029e-06, "loss": 31.684, "step": 15648 }, { "epoch": 372.5970149253731, "grad_norm": 18.570009231567383, "learning_rate": 9.671679197994988e-06, "loss": 32.3407, "step": 15649 }, { "epoch": 372.6208955223881, "grad_norm": 18.89525032043457, "learning_rate": 9.671052631578948e-06, "loss": 31.7127, "step": 15650 }, { "epoch": 372.644776119403, "grad_norm": 18.356260299682617, "learning_rate": 9.670426065162908e-06, "loss": 31.7904, "step": 15651 }, { "epoch": 372.6686567164179, "grad_norm": 17.954633712768555, "learning_rate": 9.669799498746868e-06, "loss": 31.2474, "step": 15652 }, { "epoch": 372.6925373134328, "grad_norm": 20.029273986816406, "learning_rate": 9.669172932330828e-06, "loss": 32.123, "step": 15653 }, { "epoch": 372.7164179104478, "grad_norm": 24.08721351623535, "learning_rate": 9.668546365914789e-06, "loss": 31.1802, "step": 15654 }, { "epoch": 372.7402985074627, "grad_norm": 19.90455436706543, "learning_rate": 9.667919799498747e-06, "loss": 32.2033, "step": 15655 }, { "epoch": 372.7641791044776, "grad_norm": 16.073408126831055, "learning_rate": 9.667293233082707e-06, "loss": 32.0278, "step": 15656 }, { "epoch": 372.78805970149256, "grad_norm": 19.517078399658203, "learning_rate": 9.666666666666667e-06, "loss": 31.9246, "step": 15657 }, { "epoch": 372.81194029850747, "grad_norm": 19.3916072845459, "learning_rate": 9.666040100250628e-06, "loss": 32.5814, "step": 15658 }, { "epoch": 372.8358208955224, "grad_norm": 19.744491577148438, "learning_rate": 9.665413533834588e-06, "loss": 32.2292, "step": 15659 }, { "epoch": 372.85970149253734, "grad_norm": 17.65901756286621, "learning_rate": 9.664786967418546e-06, "loss": 33.2274, "step": 15660 }, { "epoch": 372.88358208955225, "grad_norm": 16.525672912597656, "learning_rate": 9.664160401002508e-06, "loss": 32.1232, "step": 15661 }, { "epoch": 372.90746268656716, "grad_norm": 19.38490867614746, "learning_rate": 9.663533834586467e-06, "loss": 31.7979, "step": 15662 }, { "epoch": 372.93134328358207, "grad_norm": 19.699527740478516, "learning_rate": 9.662907268170427e-06, "loss": 31.924, "step": 15663 }, { "epoch": 372.95522388059703, "grad_norm": 16.93585777282715, "learning_rate": 9.662280701754387e-06, "loss": 33.2152, "step": 15664 }, { "epoch": 372.97910447761194, "grad_norm": 19.587158203125, "learning_rate": 9.661654135338347e-06, "loss": 31.9852, "step": 15665 }, { "epoch": 373.0, "grad_norm": 15.894246101379395, "learning_rate": 9.661027568922307e-06, "loss": 27.6835, "step": 15666 }, { "epoch": 373.0238805970149, "grad_norm": 21.86368751525879, "learning_rate": 9.660401002506266e-06, "loss": 32.8987, "step": 15667 }, { "epoch": 373.0477611940299, "grad_norm": 19.782215118408203, "learning_rate": 9.659774436090226e-06, "loss": 32.9165, "step": 15668 }, { "epoch": 373.0716417910448, "grad_norm": 21.27768325805664, "learning_rate": 9.659147869674186e-06, "loss": 32.9157, "step": 15669 }, { "epoch": 373.0955223880597, "grad_norm": 17.359445571899414, "learning_rate": 9.658521303258146e-06, "loss": 32.5595, "step": 15670 }, { "epoch": 373.1194029850746, "grad_norm": 15.899066925048828, "learning_rate": 9.657894736842106e-06, "loss": 31.5267, "step": 15671 }, { "epoch": 373.14328358208957, "grad_norm": 18.509159088134766, "learning_rate": 9.657268170426065e-06, "loss": 32.7882, "step": 15672 }, { "epoch": 373.1671641791045, "grad_norm": 17.6567440032959, "learning_rate": 9.656641604010027e-06, "loss": 29.9962, "step": 15673 }, { "epoch": 373.1910447761194, "grad_norm": 22.238590240478516, "learning_rate": 9.656015037593985e-06, "loss": 30.9683, "step": 15674 }, { "epoch": 373.21492537313435, "grad_norm": 18.666589736938477, "learning_rate": 9.655388471177945e-06, "loss": 31.52, "step": 15675 }, { "epoch": 373.23880597014926, "grad_norm": 15.589720726013184, "learning_rate": 9.654761904761906e-06, "loss": 32.5969, "step": 15676 }, { "epoch": 373.26268656716417, "grad_norm": 17.04974365234375, "learning_rate": 9.654135338345866e-06, "loss": 32.6573, "step": 15677 }, { "epoch": 373.28656716417913, "grad_norm": 17.244007110595703, "learning_rate": 9.653508771929824e-06, "loss": 31.636, "step": 15678 }, { "epoch": 373.31044776119404, "grad_norm": 20.437562942504883, "learning_rate": 9.652882205513786e-06, "loss": 31.147, "step": 15679 }, { "epoch": 373.33432835820895, "grad_norm": 17.9154109954834, "learning_rate": 9.652255639097746e-06, "loss": 32.4387, "step": 15680 }, { "epoch": 373.35820895522386, "grad_norm": 13.466055870056152, "learning_rate": 9.651629072681705e-06, "loss": 31.4355, "step": 15681 }, { "epoch": 373.3820895522388, "grad_norm": 19.356807708740234, "learning_rate": 9.651002506265665e-06, "loss": 33.0777, "step": 15682 }, { "epoch": 373.40597014925373, "grad_norm": 17.105945587158203, "learning_rate": 9.650375939849625e-06, "loss": 32.6633, "step": 15683 }, { "epoch": 373.42985074626864, "grad_norm": 23.539051055908203, "learning_rate": 9.649749373433585e-06, "loss": 31.5375, "step": 15684 }, { "epoch": 373.4537313432836, "grad_norm": 17.529346466064453, "learning_rate": 9.649122807017545e-06, "loss": 30.6411, "step": 15685 }, { "epoch": 373.4776119402985, "grad_norm": 22.33460235595703, "learning_rate": 9.648496240601504e-06, "loss": 32.7147, "step": 15686 }, { "epoch": 373.5014925373134, "grad_norm": 18.314443588256836, "learning_rate": 9.647869674185464e-06, "loss": 32.3927, "step": 15687 }, { "epoch": 373.52537313432833, "grad_norm": 23.103107452392578, "learning_rate": 9.647243107769424e-06, "loss": 31.2902, "step": 15688 }, { "epoch": 373.5492537313433, "grad_norm": 18.87906837463379, "learning_rate": 9.646616541353384e-06, "loss": 31.5671, "step": 15689 }, { "epoch": 373.5731343283582, "grad_norm": 20.747264862060547, "learning_rate": 9.645989974937343e-06, "loss": 31.3365, "step": 15690 }, { "epoch": 373.5970149253731, "grad_norm": 18.24740982055664, "learning_rate": 9.645363408521305e-06, "loss": 32.4551, "step": 15691 }, { "epoch": 373.6208955223881, "grad_norm": 25.681413650512695, "learning_rate": 9.644736842105263e-06, "loss": 32.2727, "step": 15692 }, { "epoch": 373.644776119403, "grad_norm": 19.273601531982422, "learning_rate": 9.644110275689223e-06, "loss": 30.9468, "step": 15693 }, { "epoch": 373.6686567164179, "grad_norm": 20.211137771606445, "learning_rate": 9.643483709273184e-06, "loss": 31.6142, "step": 15694 }, { "epoch": 373.6925373134328, "grad_norm": 21.14042091369629, "learning_rate": 9.642857142857144e-06, "loss": 32.7588, "step": 15695 }, { "epoch": 373.7164179104478, "grad_norm": 23.442846298217773, "learning_rate": 9.642230576441104e-06, "loss": 31.6502, "step": 15696 }, { "epoch": 373.7402985074627, "grad_norm": 18.872739791870117, "learning_rate": 9.641604010025064e-06, "loss": 31.2705, "step": 15697 }, { "epoch": 373.7641791044776, "grad_norm": 23.400981903076172, "learning_rate": 9.640977443609024e-06, "loss": 30.9387, "step": 15698 }, { "epoch": 373.78805970149256, "grad_norm": 25.57870101928711, "learning_rate": 9.640350877192983e-06, "loss": 32.485, "step": 15699 }, { "epoch": 373.81194029850747, "grad_norm": 17.87088394165039, "learning_rate": 9.639724310776943e-06, "loss": 32.1647, "step": 15700 }, { "epoch": 373.8358208955224, "grad_norm": 21.626007080078125, "learning_rate": 9.639097744360903e-06, "loss": 31.8758, "step": 15701 }, { "epoch": 373.85970149253734, "grad_norm": 21.196897506713867, "learning_rate": 9.638471177944863e-06, "loss": 32.1598, "step": 15702 }, { "epoch": 373.88358208955225, "grad_norm": 20.520910263061523, "learning_rate": 9.637844611528823e-06, "loss": 32.0218, "step": 15703 }, { "epoch": 373.90746268656716, "grad_norm": 17.130661010742188, "learning_rate": 9.637218045112782e-06, "loss": 31.28, "step": 15704 }, { "epoch": 373.93134328358207, "grad_norm": 22.795211791992188, "learning_rate": 9.636591478696742e-06, "loss": 30.8157, "step": 15705 }, { "epoch": 373.95522388059703, "grad_norm": 17.75351333618164, "learning_rate": 9.635964912280704e-06, "loss": 31.2262, "step": 15706 }, { "epoch": 373.97910447761194, "grad_norm": 19.189939498901367, "learning_rate": 9.635338345864662e-06, "loss": 30.2862, "step": 15707 }, { "epoch": 374.0, "grad_norm": 18.197975158691406, "learning_rate": 9.634711779448622e-06, "loss": 27.8654, "step": 15708 }, { "epoch": 374.0238805970149, "grad_norm": 20.827064514160156, "learning_rate": 9.634085213032583e-06, "loss": 32.659, "step": 15709 }, { "epoch": 374.0477611940299, "grad_norm": 16.823833465576172, "learning_rate": 9.633458646616543e-06, "loss": 31.2229, "step": 15710 }, { "epoch": 374.0716417910448, "grad_norm": 15.454418182373047, "learning_rate": 9.632832080200501e-06, "loss": 32.8586, "step": 15711 }, { "epoch": 374.0955223880597, "grad_norm": 15.673126220703125, "learning_rate": 9.632205513784462e-06, "loss": 31.9886, "step": 15712 }, { "epoch": 374.1194029850746, "grad_norm": 20.704084396362305, "learning_rate": 9.631578947368422e-06, "loss": 30.682, "step": 15713 }, { "epoch": 374.14328358208957, "grad_norm": 19.827491760253906, "learning_rate": 9.630952380952382e-06, "loss": 31.6092, "step": 15714 }, { "epoch": 374.1671641791045, "grad_norm": 20.26668930053711, "learning_rate": 9.630325814536342e-06, "loss": 30.9277, "step": 15715 }, { "epoch": 374.1910447761194, "grad_norm": 16.02630615234375, "learning_rate": 9.629699248120302e-06, "loss": 32.166, "step": 15716 }, { "epoch": 374.21492537313435, "grad_norm": 17.158720016479492, "learning_rate": 9.629072681704261e-06, "loss": 30.8832, "step": 15717 }, { "epoch": 374.23880597014926, "grad_norm": 18.231409072875977, "learning_rate": 9.628446115288221e-06, "loss": 31.5202, "step": 15718 }, { "epoch": 374.26268656716417, "grad_norm": 16.47313117980957, "learning_rate": 9.627819548872181e-06, "loss": 31.6077, "step": 15719 }, { "epoch": 374.28656716417913, "grad_norm": 19.122621536254883, "learning_rate": 9.627192982456141e-06, "loss": 32.1889, "step": 15720 }, { "epoch": 374.31044776119404, "grad_norm": 16.968183517456055, "learning_rate": 9.6265664160401e-06, "loss": 30.3898, "step": 15721 }, { "epoch": 374.33432835820895, "grad_norm": 17.954517364501953, "learning_rate": 9.62593984962406e-06, "loss": 30.8276, "step": 15722 }, { "epoch": 374.35820895522386, "grad_norm": 16.388755798339844, "learning_rate": 9.62531328320802e-06, "loss": 32.2938, "step": 15723 }, { "epoch": 374.3820895522388, "grad_norm": 16.530471801757812, "learning_rate": 9.624686716791982e-06, "loss": 31.9841, "step": 15724 }, { "epoch": 374.40597014925373, "grad_norm": 19.26321792602539, "learning_rate": 9.62406015037594e-06, "loss": 32.2286, "step": 15725 }, { "epoch": 374.42985074626864, "grad_norm": 15.312089920043945, "learning_rate": 9.6234335839599e-06, "loss": 31.0212, "step": 15726 }, { "epoch": 374.4537313432836, "grad_norm": 17.374874114990234, "learning_rate": 9.622807017543861e-06, "loss": 31.0968, "step": 15727 }, { "epoch": 374.4776119402985, "grad_norm": 17.588796615600586, "learning_rate": 9.622180451127821e-06, "loss": 32.8473, "step": 15728 }, { "epoch": 374.5014925373134, "grad_norm": 22.50474739074707, "learning_rate": 9.62155388471178e-06, "loss": 32.6445, "step": 15729 }, { "epoch": 374.52537313432833, "grad_norm": 20.12819480895996, "learning_rate": 9.620927318295739e-06, "loss": 32.2587, "step": 15730 }, { "epoch": 374.5492537313433, "grad_norm": 18.700468063354492, "learning_rate": 9.6203007518797e-06, "loss": 30.9637, "step": 15731 }, { "epoch": 374.5731343283582, "grad_norm": 16.99765396118164, "learning_rate": 9.61967418546366e-06, "loss": 33.1013, "step": 15732 }, { "epoch": 374.5970149253731, "grad_norm": 24.254283905029297, "learning_rate": 9.61904761904762e-06, "loss": 31.9349, "step": 15733 }, { "epoch": 374.6208955223881, "grad_norm": 22.096355438232422, "learning_rate": 9.61842105263158e-06, "loss": 31.8806, "step": 15734 }, { "epoch": 374.644776119403, "grad_norm": 15.354461669921875, "learning_rate": 9.61779448621554e-06, "loss": 32.1886, "step": 15735 }, { "epoch": 374.6686567164179, "grad_norm": 18.554244995117188, "learning_rate": 9.6171679197995e-06, "loss": 31.6642, "step": 15736 }, { "epoch": 374.6925373134328, "grad_norm": 25.11833381652832, "learning_rate": 9.61654135338346e-06, "loss": 31.1263, "step": 15737 }, { "epoch": 374.7164179104478, "grad_norm": 19.309633255004883, "learning_rate": 9.615914786967419e-06, "loss": 32.7638, "step": 15738 }, { "epoch": 374.7402985074627, "grad_norm": 17.912731170654297, "learning_rate": 9.61528822055138e-06, "loss": 32.5479, "step": 15739 }, { "epoch": 374.7641791044776, "grad_norm": 22.228496551513672, "learning_rate": 9.614661654135339e-06, "loss": 32.5027, "step": 15740 }, { "epoch": 374.78805970149256, "grad_norm": 14.644289016723633, "learning_rate": 9.614035087719298e-06, "loss": 31.0092, "step": 15741 }, { "epoch": 374.81194029850747, "grad_norm": 19.665285110473633, "learning_rate": 9.61340852130326e-06, "loss": 31.5028, "step": 15742 }, { "epoch": 374.8358208955224, "grad_norm": 17.751829147338867, "learning_rate": 9.61278195488722e-06, "loss": 32.241, "step": 15743 }, { "epoch": 374.85970149253734, "grad_norm": 21.286407470703125, "learning_rate": 9.612155388471178e-06, "loss": 31.8838, "step": 15744 }, { "epoch": 374.88358208955225, "grad_norm": 21.53445816040039, "learning_rate": 9.611528822055138e-06, "loss": 31.1677, "step": 15745 }, { "epoch": 374.90746268656716, "grad_norm": 22.49662971496582, "learning_rate": 9.610902255639099e-06, "loss": 33.3431, "step": 15746 }, { "epoch": 374.93134328358207, "grad_norm": 16.694021224975586, "learning_rate": 9.610275689223059e-06, "loss": 32.0352, "step": 15747 }, { "epoch": 374.95522388059703, "grad_norm": 31.050386428833008, "learning_rate": 9.609649122807019e-06, "loss": 30.8092, "step": 15748 }, { "epoch": 374.97910447761194, "grad_norm": 21.933225631713867, "learning_rate": 9.609022556390978e-06, "loss": 31.636, "step": 15749 }, { "epoch": 375.0, "grad_norm": 29.1340274810791, "learning_rate": 9.608395989974938e-06, "loss": 27.5906, "step": 15750 }, { "epoch": 375.0238805970149, "grad_norm": 25.01865577697754, "learning_rate": 9.607769423558898e-06, "loss": 32.5635, "step": 15751 }, { "epoch": 375.0477611940299, "grad_norm": 25.84816551208496, "learning_rate": 9.607142857142858e-06, "loss": 31.8677, "step": 15752 }, { "epoch": 375.0716417910448, "grad_norm": 24.823163986206055, "learning_rate": 9.606516290726818e-06, "loss": 31.0163, "step": 15753 }, { "epoch": 375.0955223880597, "grad_norm": 18.789514541625977, "learning_rate": 9.605889724310777e-06, "loss": 31.1502, "step": 15754 }, { "epoch": 375.1194029850746, "grad_norm": 32.61874771118164, "learning_rate": 9.605263157894737e-06, "loss": 32.6863, "step": 15755 }, { "epoch": 375.14328358208957, "grad_norm": 21.44687271118164, "learning_rate": 9.604636591478697e-06, "loss": 32.0578, "step": 15756 }, { "epoch": 375.1671641791045, "grad_norm": 32.22695541381836, "learning_rate": 9.604010025062659e-06, "loss": 33.2771, "step": 15757 }, { "epoch": 375.1910447761194, "grad_norm": 23.191083908081055, "learning_rate": 9.603383458646617e-06, "loss": 32.1562, "step": 15758 }, { "epoch": 375.21492537313435, "grad_norm": 30.16989517211914, "learning_rate": 9.602756892230576e-06, "loss": 31.4468, "step": 15759 }, { "epoch": 375.23880597014926, "grad_norm": 25.701231002807617, "learning_rate": 9.602130325814536e-06, "loss": 30.8878, "step": 15760 }, { "epoch": 375.26268656716417, "grad_norm": 20.336566925048828, "learning_rate": 9.601503759398498e-06, "loss": 30.8714, "step": 15761 }, { "epoch": 375.28656716417913, "grad_norm": 34.1949577331543, "learning_rate": 9.600877192982458e-06, "loss": 30.8927, "step": 15762 }, { "epoch": 375.31044776119404, "grad_norm": 24.111207962036133, "learning_rate": 9.600250626566416e-06, "loss": 31.5991, "step": 15763 }, { "epoch": 375.33432835820895, "grad_norm": 37.756351470947266, "learning_rate": 9.599624060150377e-06, "loss": 31.1229, "step": 15764 }, { "epoch": 375.35820895522386, "grad_norm": 24.177133560180664, "learning_rate": 9.598997493734337e-06, "loss": 31.7933, "step": 15765 }, { "epoch": 375.3820895522388, "grad_norm": 36.99074935913086, "learning_rate": 9.598370927318297e-06, "loss": 30.9657, "step": 15766 }, { "epoch": 375.40597014925373, "grad_norm": 27.523313522338867, "learning_rate": 9.597744360902257e-06, "loss": 32.164, "step": 15767 }, { "epoch": 375.42985074626864, "grad_norm": 46.68133544921875, "learning_rate": 9.597117794486216e-06, "loss": 30.1314, "step": 15768 }, { "epoch": 375.4537313432836, "grad_norm": 39.167694091796875, "learning_rate": 9.596491228070176e-06, "loss": 31.8979, "step": 15769 }, { "epoch": 375.4776119402985, "grad_norm": 36.873687744140625, "learning_rate": 9.595864661654136e-06, "loss": 31.4334, "step": 15770 }, { "epoch": 375.5014925373134, "grad_norm": 34.393184661865234, "learning_rate": 9.595238095238096e-06, "loss": 31.9177, "step": 15771 }, { "epoch": 375.52537313432833, "grad_norm": 32.181156158447266, "learning_rate": 9.594611528822056e-06, "loss": 32.4528, "step": 15772 }, { "epoch": 375.5492537313433, "grad_norm": 25.708959579467773, "learning_rate": 9.593984962406015e-06, "loss": 32.719, "step": 15773 }, { "epoch": 375.5731343283582, "grad_norm": 37.73577880859375, "learning_rate": 9.593358395989975e-06, "loss": 32.6842, "step": 15774 }, { "epoch": 375.5970149253731, "grad_norm": 30.247201919555664, "learning_rate": 9.592731829573937e-06, "loss": 31.812, "step": 15775 }, { "epoch": 375.6208955223881, "grad_norm": 40.35224533081055, "learning_rate": 9.592105263157896e-06, "loss": 31.7453, "step": 15776 }, { "epoch": 375.644776119403, "grad_norm": 35.667171478271484, "learning_rate": 9.591478696741855e-06, "loss": 32.4635, "step": 15777 }, { "epoch": 375.6686567164179, "grad_norm": 32.05931091308594, "learning_rate": 9.590852130325814e-06, "loss": 30.7586, "step": 15778 }, { "epoch": 375.6925373134328, "grad_norm": 29.923818588256836, "learning_rate": 9.590225563909776e-06, "loss": 31.6638, "step": 15779 }, { "epoch": 375.7164179104478, "grad_norm": 35.31935501098633, "learning_rate": 9.589598997493736e-06, "loss": 32.3195, "step": 15780 }, { "epoch": 375.7402985074627, "grad_norm": 28.098405838012695, "learning_rate": 9.588972431077695e-06, "loss": 32.5611, "step": 15781 }, { "epoch": 375.7641791044776, "grad_norm": 35.44413375854492, "learning_rate": 9.588345864661655e-06, "loss": 32.3807, "step": 15782 }, { "epoch": 375.78805970149256, "grad_norm": 35.95784378051758, "learning_rate": 9.587719298245615e-06, "loss": 31.6648, "step": 15783 }, { "epoch": 375.81194029850747, "grad_norm": 33.91968536376953, "learning_rate": 9.587092731829575e-06, "loss": 32.5861, "step": 15784 }, { "epoch": 375.8358208955224, "grad_norm": 34.18654251098633, "learning_rate": 9.586466165413535e-06, "loss": 31.0572, "step": 15785 }, { "epoch": 375.85970149253734, "grad_norm": 29.548994064331055, "learning_rate": 9.585839598997494e-06, "loss": 31.0244, "step": 15786 }, { "epoch": 375.88358208955225, "grad_norm": 26.251121520996094, "learning_rate": 9.585213032581454e-06, "loss": 31.1787, "step": 15787 }, { "epoch": 375.90746268656716, "grad_norm": 32.742435455322266, "learning_rate": 9.584586466165414e-06, "loss": 33.0015, "step": 15788 }, { "epoch": 375.93134328358207, "grad_norm": 30.169097900390625, "learning_rate": 9.583959899749374e-06, "loss": 32.0246, "step": 15789 }, { "epoch": 375.95522388059703, "grad_norm": 37.93808364868164, "learning_rate": 9.583333333333335e-06, "loss": 30.7534, "step": 15790 }, { "epoch": 375.97910447761194, "grad_norm": 33.832611083984375, "learning_rate": 9.582706766917293e-06, "loss": 31.6429, "step": 15791 }, { "epoch": 376.0, "grad_norm": 26.81100845336914, "learning_rate": 9.582080200501253e-06, "loss": 27.1944, "step": 15792 }, { "epoch": 376.0238805970149, "grad_norm": 31.282583236694336, "learning_rate": 9.581453634085213e-06, "loss": 31.937, "step": 15793 }, { "epoch": 376.0477611940299, "grad_norm": 31.36951446533203, "learning_rate": 9.580827067669175e-06, "loss": 31.5685, "step": 15794 }, { "epoch": 376.0716417910448, "grad_norm": 29.539073944091797, "learning_rate": 9.580200501253134e-06, "loss": 31.4499, "step": 15795 }, { "epoch": 376.0955223880597, "grad_norm": 35.193016052246094, "learning_rate": 9.579573934837092e-06, "loss": 32.0183, "step": 15796 }, { "epoch": 376.1194029850746, "grad_norm": 29.613739013671875, "learning_rate": 9.578947368421054e-06, "loss": 32.0695, "step": 15797 }, { "epoch": 376.14328358208957, "grad_norm": 34.028934478759766, "learning_rate": 9.578320802005014e-06, "loss": 32.6362, "step": 15798 }, { "epoch": 376.1671641791045, "grad_norm": 26.947418212890625, "learning_rate": 9.577694235588974e-06, "loss": 31.9747, "step": 15799 }, { "epoch": 376.1910447761194, "grad_norm": 34.34019088745117, "learning_rate": 9.577067669172933e-06, "loss": 31.2205, "step": 15800 }, { "epoch": 376.21492537313435, "grad_norm": 32.156585693359375, "learning_rate": 9.576441102756893e-06, "loss": 31.2934, "step": 15801 }, { "epoch": 376.23880597014926, "grad_norm": 32.523345947265625, "learning_rate": 9.575814536340853e-06, "loss": 30.6958, "step": 15802 }, { "epoch": 376.26268656716417, "grad_norm": 28.242023468017578, "learning_rate": 9.575187969924813e-06, "loss": 31.0181, "step": 15803 }, { "epoch": 376.28656716417913, "grad_norm": 34.437557220458984, "learning_rate": 9.574561403508773e-06, "loss": 31.7272, "step": 15804 }, { "epoch": 376.31044776119404, "grad_norm": 29.275766372680664, "learning_rate": 9.573934837092732e-06, "loss": 32.7677, "step": 15805 }, { "epoch": 376.33432835820895, "grad_norm": 32.32860565185547, "learning_rate": 9.573308270676692e-06, "loss": 31.3942, "step": 15806 }, { "epoch": 376.35820895522386, "grad_norm": 32.09244918823242, "learning_rate": 9.572681704260652e-06, "loss": 31.0439, "step": 15807 }, { "epoch": 376.3820895522388, "grad_norm": 34.416072845458984, "learning_rate": 9.572055137844612e-06, "loss": 31.642, "step": 15808 }, { "epoch": 376.40597014925373, "grad_norm": 28.986270904541016, "learning_rate": 9.571428571428573e-06, "loss": 31.3785, "step": 15809 }, { "epoch": 376.42985074626864, "grad_norm": 36.026180267333984, "learning_rate": 9.570802005012531e-06, "loss": 32.0038, "step": 15810 }, { "epoch": 376.4537313432836, "grad_norm": 30.704662322998047, "learning_rate": 9.570175438596491e-06, "loss": 32.4572, "step": 15811 }, { "epoch": 376.4776119402985, "grad_norm": 35.821624755859375, "learning_rate": 9.569548872180453e-06, "loss": 31.7397, "step": 15812 }, { "epoch": 376.5014925373134, "grad_norm": 30.665729522705078, "learning_rate": 9.568922305764412e-06, "loss": 32.1376, "step": 15813 }, { "epoch": 376.52537313432833, "grad_norm": 30.9582576751709, "learning_rate": 9.568295739348372e-06, "loss": 32.7472, "step": 15814 }, { "epoch": 376.5492537313433, "grad_norm": 27.143753051757812, "learning_rate": 9.567669172932332e-06, "loss": 31.5181, "step": 15815 }, { "epoch": 376.5731343283582, "grad_norm": 34.9139289855957, "learning_rate": 9.567042606516292e-06, "loss": 31.7119, "step": 15816 }, { "epoch": 376.5970149253731, "grad_norm": 30.557527542114258, "learning_rate": 9.566416040100252e-06, "loss": 32.5282, "step": 15817 }, { "epoch": 376.6208955223881, "grad_norm": 32.080833435058594, "learning_rate": 9.565789473684211e-06, "loss": 32.4353, "step": 15818 }, { "epoch": 376.644776119403, "grad_norm": 28.52341079711914, "learning_rate": 9.565162907268171e-06, "loss": 31.566, "step": 15819 }, { "epoch": 376.6686567164179, "grad_norm": 34.749046325683594, "learning_rate": 9.564536340852131e-06, "loss": 31.0211, "step": 15820 }, { "epoch": 376.6925373134328, "grad_norm": 28.574386596679688, "learning_rate": 9.56390977443609e-06, "loss": 31.212, "step": 15821 }, { "epoch": 376.7164179104478, "grad_norm": 31.630035400390625, "learning_rate": 9.56328320802005e-06, "loss": 31.7167, "step": 15822 }, { "epoch": 376.7402985074627, "grad_norm": 25.646991729736328, "learning_rate": 9.56265664160401e-06, "loss": 29.6201, "step": 15823 }, { "epoch": 376.7641791044776, "grad_norm": 32.602535247802734, "learning_rate": 9.56203007518797e-06, "loss": 32.3173, "step": 15824 }, { "epoch": 376.78805970149256, "grad_norm": 26.45676040649414, "learning_rate": 9.56140350877193e-06, "loss": 32.0254, "step": 15825 }, { "epoch": 376.81194029850747, "grad_norm": 33.60383987426758, "learning_rate": 9.56077694235589e-06, "loss": 31.7161, "step": 15826 }, { "epoch": 376.8358208955224, "grad_norm": 29.133054733276367, "learning_rate": 9.560150375939851e-06, "loss": 30.4912, "step": 15827 }, { "epoch": 376.85970149253734, "grad_norm": 34.06317901611328, "learning_rate": 9.559523809523811e-06, "loss": 32.2731, "step": 15828 }, { "epoch": 376.88358208955225, "grad_norm": 29.86116600036621, "learning_rate": 9.55889724310777e-06, "loss": 31.0667, "step": 15829 }, { "epoch": 376.90746268656716, "grad_norm": 28.151329040527344, "learning_rate": 9.55827067669173e-06, "loss": 31.4379, "step": 15830 }, { "epoch": 376.93134328358207, "grad_norm": 29.177358627319336, "learning_rate": 9.55764411027569e-06, "loss": 32.7566, "step": 15831 }, { "epoch": 376.95522388059703, "grad_norm": 30.21954917907715, "learning_rate": 9.55701754385965e-06, "loss": 31.9125, "step": 15832 }, { "epoch": 376.97910447761194, "grad_norm": 23.19196319580078, "learning_rate": 9.55639097744361e-06, "loss": 31.6665, "step": 15833 }, { "epoch": 377.0, "grad_norm": 29.176729202270508, "learning_rate": 9.55576441102757e-06, "loss": 27.8317, "step": 15834 }, { "epoch": 377.0238805970149, "grad_norm": 27.250619888305664, "learning_rate": 9.55513784461153e-06, "loss": 31.0447, "step": 15835 }, { "epoch": 377.0477611940299, "grad_norm": 32.7923698425293, "learning_rate": 9.55451127819549e-06, "loss": 30.9549, "step": 15836 }, { "epoch": 377.0716417910448, "grad_norm": 30.054567337036133, "learning_rate": 9.55388471177945e-06, "loss": 31.3272, "step": 15837 }, { "epoch": 377.0955223880597, "grad_norm": 30.509685516357422, "learning_rate": 9.553258145363409e-06, "loss": 31.5636, "step": 15838 }, { "epoch": 377.1194029850746, "grad_norm": 27.59921646118164, "learning_rate": 9.552631578947369e-06, "loss": 31.7532, "step": 15839 }, { "epoch": 377.14328358208957, "grad_norm": 26.052112579345703, "learning_rate": 9.552005012531329e-06, "loss": 31.6361, "step": 15840 }, { "epoch": 377.1671641791045, "grad_norm": 24.729148864746094, "learning_rate": 9.551378446115288e-06, "loss": 32.0229, "step": 15841 }, { "epoch": 377.1910447761194, "grad_norm": 25.934402465820312, "learning_rate": 9.55075187969925e-06, "loss": 31.8813, "step": 15842 }, { "epoch": 377.21492537313435, "grad_norm": 22.117822647094727, "learning_rate": 9.550125313283208e-06, "loss": 32.8609, "step": 15843 }, { "epoch": 377.23880597014926, "grad_norm": 30.511245727539062, "learning_rate": 9.549498746867168e-06, "loss": 31.8948, "step": 15844 }, { "epoch": 377.26268656716417, "grad_norm": 21.63555145263672, "learning_rate": 9.54887218045113e-06, "loss": 31.1702, "step": 15845 }, { "epoch": 377.28656716417913, "grad_norm": 29.63176155090332, "learning_rate": 9.54824561403509e-06, "loss": 31.411, "step": 15846 }, { "epoch": 377.31044776119404, "grad_norm": 22.744958877563477, "learning_rate": 9.547619047619049e-06, "loss": 31.5629, "step": 15847 }, { "epoch": 377.33432835820895, "grad_norm": 30.34880828857422, "learning_rate": 9.546992481203007e-06, "loss": 30.4462, "step": 15848 }, { "epoch": 377.35820895522386, "grad_norm": 26.772872924804688, "learning_rate": 9.546365914786969e-06, "loss": 31.9283, "step": 15849 }, { "epoch": 377.3820895522388, "grad_norm": 28.795146942138672, "learning_rate": 9.545739348370928e-06, "loss": 31.2553, "step": 15850 }, { "epoch": 377.40597014925373, "grad_norm": 26.94765281677246, "learning_rate": 9.545112781954888e-06, "loss": 32.3547, "step": 15851 }, { "epoch": 377.42985074626864, "grad_norm": 28.66550636291504, "learning_rate": 9.544486215538848e-06, "loss": 31.7639, "step": 15852 }, { "epoch": 377.4537313432836, "grad_norm": 23.641977310180664, "learning_rate": 9.543859649122808e-06, "loss": 31.9693, "step": 15853 }, { "epoch": 377.4776119402985, "grad_norm": 30.01308250427246, "learning_rate": 9.543233082706768e-06, "loss": 32.1116, "step": 15854 }, { "epoch": 377.5014925373134, "grad_norm": 22.378013610839844, "learning_rate": 9.542606516290727e-06, "loss": 32.2518, "step": 15855 }, { "epoch": 377.52537313432833, "grad_norm": 26.711362838745117, "learning_rate": 9.541979949874687e-06, "loss": 32.4148, "step": 15856 }, { "epoch": 377.5492537313433, "grad_norm": 22.41970443725586, "learning_rate": 9.541353383458647e-06, "loss": 30.8866, "step": 15857 }, { "epoch": 377.5731343283582, "grad_norm": 25.54366111755371, "learning_rate": 9.540726817042607e-06, "loss": 31.7993, "step": 15858 }, { "epoch": 377.5970149253731, "grad_norm": 22.378780364990234, "learning_rate": 9.540100250626567e-06, "loss": 32.0723, "step": 15859 }, { "epoch": 377.6208955223881, "grad_norm": 23.338041305541992, "learning_rate": 9.539473684210528e-06, "loss": 32.2472, "step": 15860 }, { "epoch": 377.644776119403, "grad_norm": 22.04779624938965, "learning_rate": 9.538847117794488e-06, "loss": 31.6033, "step": 15861 }, { "epoch": 377.6686567164179, "grad_norm": 23.686084747314453, "learning_rate": 9.538220551378446e-06, "loss": 31.2253, "step": 15862 }, { "epoch": 377.6925373134328, "grad_norm": 18.225317001342773, "learning_rate": 9.537593984962407e-06, "loss": 32.2873, "step": 15863 }, { "epoch": 377.7164179104478, "grad_norm": 19.244224548339844, "learning_rate": 9.536967418546367e-06, "loss": 31.3335, "step": 15864 }, { "epoch": 377.7402985074627, "grad_norm": 20.40607261657715, "learning_rate": 9.536340852130327e-06, "loss": 32.3113, "step": 15865 }, { "epoch": 377.7641791044776, "grad_norm": 17.419404983520508, "learning_rate": 9.535714285714287e-06, "loss": 32.3475, "step": 15866 }, { "epoch": 377.78805970149256, "grad_norm": 17.925275802612305, "learning_rate": 9.535087719298247e-06, "loss": 31.5005, "step": 15867 }, { "epoch": 377.81194029850747, "grad_norm": 17.21285057067871, "learning_rate": 9.534461152882206e-06, "loss": 32.8105, "step": 15868 }, { "epoch": 377.8358208955224, "grad_norm": NaN, "learning_rate": 9.533834586466166e-06, "loss": 50.8566, "step": 15869 }, { "epoch": 377.85970149253734, "grad_norm": 18.049938201904297, "learning_rate": 9.533834586466166e-06, "loss": 30.4199, "step": 15870 }, { "epoch": 377.88358208955225, "grad_norm": 17.153806686401367, "learning_rate": 9.533208020050126e-06, "loss": 31.5917, "step": 15871 }, { "epoch": 377.90746268656716, "grad_norm": 19.73189353942871, "learning_rate": 9.532581453634086e-06, "loss": 31.6223, "step": 15872 }, { "epoch": 377.93134328358207, "grad_norm": 15.848172187805176, "learning_rate": 9.531954887218046e-06, "loss": 31.8707, "step": 15873 }, { "epoch": 377.95522388059703, "grad_norm": 19.68431282043457, "learning_rate": 9.531328320802005e-06, "loss": 31.1429, "step": 15874 }, { "epoch": 377.97910447761194, "grad_norm": 16.735843658447266, "learning_rate": 9.530701754385965e-06, "loss": 31.9342, "step": 15875 }, { "epoch": 378.0, "grad_norm": NaN, "learning_rate": 9.530075187969927e-06, "loss": 35.8194, "step": 15876 }, { "epoch": 378.0238805970149, "grad_norm": 19.21783447265625, "learning_rate": 9.530075187969927e-06, "loss": 31.7106, "step": 15877 }, { "epoch": 378.0477611940299, "grad_norm": 17.19239044189453, "learning_rate": 9.529448621553885e-06, "loss": 31.4753, "step": 15878 }, { "epoch": 378.0716417910448, "grad_norm": 15.164022445678711, "learning_rate": 9.528822055137845e-06, "loss": 30.4925, "step": 15879 }, { "epoch": 378.0955223880597, "grad_norm": 18.01201820373535, "learning_rate": 9.528195488721806e-06, "loss": 31.7913, "step": 15880 }, { "epoch": 378.1194029850746, "grad_norm": 20.186861038208008, "learning_rate": 9.527568922305766e-06, "loss": 32.5956, "step": 15881 }, { "epoch": 378.14328358208957, "grad_norm": 20.592960357666016, "learning_rate": 9.526942355889726e-06, "loss": 32.0871, "step": 15882 }, { "epoch": 378.1671641791045, "grad_norm": 16.473604202270508, "learning_rate": 9.526315789473684e-06, "loss": 31.6023, "step": 15883 }, { "epoch": 378.1910447761194, "grad_norm": 22.3590087890625, "learning_rate": 9.525689223057645e-06, "loss": 33.0083, "step": 15884 }, { "epoch": 378.21492537313435, "grad_norm": 19.668336868286133, "learning_rate": 9.525062656641605e-06, "loss": 31.3263, "step": 15885 }, { "epoch": 378.23880597014926, "grad_norm": 19.205453872680664, "learning_rate": 9.524436090225565e-06, "loss": 31.2491, "step": 15886 }, { "epoch": 378.26268656716417, "grad_norm": 19.854339599609375, "learning_rate": 9.523809523809525e-06, "loss": 31.0131, "step": 15887 }, { "epoch": 378.28656716417913, "grad_norm": 27.01983070373535, "learning_rate": 9.523182957393485e-06, "loss": 30.894, "step": 15888 }, { "epoch": 378.31044776119404, "grad_norm": 19.966522216796875, "learning_rate": 9.522556390977444e-06, "loss": 31.726, "step": 15889 }, { "epoch": 378.33432835820895, "grad_norm": 19.188745498657227, "learning_rate": 9.521929824561404e-06, "loss": 31.0292, "step": 15890 }, { "epoch": 378.35820895522386, "grad_norm": 19.201169967651367, "learning_rate": 9.521303258145364e-06, "loss": 30.6026, "step": 15891 }, { "epoch": 378.3820895522388, "grad_norm": 22.15947723388672, "learning_rate": 9.520676691729324e-06, "loss": 32.1625, "step": 15892 }, { "epoch": 378.40597014925373, "grad_norm": 17.25479507446289, "learning_rate": 9.520050125313284e-06, "loss": 32.7679, "step": 15893 }, { "epoch": 378.42985074626864, "grad_norm": 25.67967987060547, "learning_rate": 9.519423558897243e-06, "loss": 32.2053, "step": 15894 }, { "epoch": 378.4537313432836, "grad_norm": 20.512603759765625, "learning_rate": 9.518796992481205e-06, "loss": 32.23, "step": 15895 }, { "epoch": 378.4776119402985, "grad_norm": 16.905094146728516, "learning_rate": 9.518170426065165e-06, "loss": 31.393, "step": 15896 }, { "epoch": 378.5014925373134, "grad_norm": 23.89948272705078, "learning_rate": 9.517543859649123e-06, "loss": 31.3879, "step": 15897 }, { "epoch": 378.52537313432833, "grad_norm": 17.382038116455078, "learning_rate": 9.516917293233083e-06, "loss": 31.3227, "step": 15898 }, { "epoch": 378.5492537313433, "grad_norm": 20.53512191772461, "learning_rate": 9.516290726817044e-06, "loss": 31.5636, "step": 15899 }, { "epoch": 378.5731343283582, "grad_norm": 17.52760124206543, "learning_rate": 9.515664160401004e-06, "loss": 31.7658, "step": 15900 }, { "epoch": 378.5970149253731, "grad_norm": 20.51746940612793, "learning_rate": 9.515037593984964e-06, "loss": 31.2039, "step": 15901 }, { "epoch": 378.6208955223881, "grad_norm": 18.75714874267578, "learning_rate": 9.514411027568923e-06, "loss": 32.9847, "step": 15902 }, { "epoch": 378.644776119403, "grad_norm": 17.246376037597656, "learning_rate": 9.513784461152883e-06, "loss": 31.9951, "step": 15903 }, { "epoch": 378.6686567164179, "grad_norm": 25.295597076416016, "learning_rate": 9.513157894736843e-06, "loss": 31.5805, "step": 15904 }, { "epoch": 378.6925373134328, "grad_norm": 18.569110870361328, "learning_rate": 9.512531328320803e-06, "loss": 31.6563, "step": 15905 }, { "epoch": 378.7164179104478, "grad_norm": 19.77447509765625, "learning_rate": 9.511904761904763e-06, "loss": 31.7076, "step": 15906 }, { "epoch": 378.7402985074627, "grad_norm": 22.064210891723633, "learning_rate": 9.511278195488722e-06, "loss": 31.7247, "step": 15907 }, { "epoch": 378.7641791044776, "grad_norm": 20.426063537597656, "learning_rate": 9.510651629072682e-06, "loss": 32.5137, "step": 15908 }, { "epoch": 378.78805970149256, "grad_norm": 16.537830352783203, "learning_rate": 9.510025062656642e-06, "loss": 31.0183, "step": 15909 }, { "epoch": 378.81194029850747, "grad_norm": 17.598913192749023, "learning_rate": 9.509398496240604e-06, "loss": 31.5591, "step": 15910 }, { "epoch": 378.8358208955224, "grad_norm": 18.24433135986328, "learning_rate": 9.508771929824562e-06, "loss": 30.3728, "step": 15911 }, { "epoch": 378.85970149253734, "grad_norm": 17.156044006347656, "learning_rate": 9.508145363408521e-06, "loss": 31.8933, "step": 15912 }, { "epoch": 378.88358208955225, "grad_norm": 19.96763801574707, "learning_rate": 9.507518796992481e-06, "loss": 31.8627, "step": 15913 }, { "epoch": 378.90746268656716, "grad_norm": 17.492136001586914, "learning_rate": 9.506892230576443e-06, "loss": 32.9634, "step": 15914 }, { "epoch": 378.93134328358207, "grad_norm": NaN, "learning_rate": 9.506265664160403e-06, "loss": 31.3351, "step": 15915 }, { "epoch": 378.95522388059703, "grad_norm": 22.429630279541016, "learning_rate": 9.506265664160403e-06, "loss": 32.1638, "step": 15916 }, { "epoch": 378.97910447761194, "grad_norm": 19.357730865478516, "learning_rate": 9.50563909774436e-06, "loss": 31.4918, "step": 15917 }, { "epoch": 379.0, "grad_norm": 17.785320281982422, "learning_rate": 9.505012531328322e-06, "loss": 27.6541, "step": 15918 }, { "epoch": 379.0238805970149, "grad_norm": 19.500259399414062, "learning_rate": 9.504385964912282e-06, "loss": 31.3774, "step": 15919 }, { "epoch": 379.0477611940299, "grad_norm": 21.011730194091797, "learning_rate": 9.503759398496242e-06, "loss": 31.8218, "step": 15920 }, { "epoch": 379.0716417910448, "grad_norm": 22.962175369262695, "learning_rate": 9.503132832080202e-06, "loss": 32.6586, "step": 15921 }, { "epoch": 379.0955223880597, "grad_norm": 19.155452728271484, "learning_rate": 9.502506265664161e-06, "loss": 32.2327, "step": 15922 }, { "epoch": 379.1194029850746, "grad_norm": 23.785982131958008, "learning_rate": 9.501879699248121e-06, "loss": 32.3278, "step": 15923 }, { "epoch": 379.14328358208957, "grad_norm": 24.060657501220703, "learning_rate": 9.501253132832081e-06, "loss": 31.0592, "step": 15924 }, { "epoch": 379.1671641791045, "grad_norm": 19.281307220458984, "learning_rate": 9.50062656641604e-06, "loss": 30.9093, "step": 15925 }, { "epoch": 379.1910447761194, "grad_norm": 21.47970199584961, "learning_rate": 9.5e-06, "loss": 30.323, "step": 15926 }, { "epoch": 379.21492537313435, "grad_norm": 18.192237854003906, "learning_rate": 9.49937343358396e-06, "loss": 30.895, "step": 15927 }, { "epoch": 379.23880597014926, "grad_norm": 24.31977653503418, "learning_rate": 9.49874686716792e-06, "loss": 29.9196, "step": 15928 }, { "epoch": 379.26268656716417, "grad_norm": 16.276206970214844, "learning_rate": 9.49812030075188e-06, "loss": 30.6136, "step": 15929 }, { "epoch": 379.28656716417913, "grad_norm": 23.11754035949707, "learning_rate": 9.497493734335841e-06, "loss": 31.0312, "step": 15930 }, { "epoch": 379.31044776119404, "grad_norm": 17.16490364074707, "learning_rate": 9.4968671679198e-06, "loss": 30.3436, "step": 15931 }, { "epoch": 379.33432835820895, "grad_norm": 20.933879852294922, "learning_rate": 9.49624060150376e-06, "loss": 31.749, "step": 15932 }, { "epoch": 379.35820895522386, "grad_norm": 22.423006057739258, "learning_rate": 9.49561403508772e-06, "loss": 32.1347, "step": 15933 }, { "epoch": 379.3820895522388, "grad_norm": 18.124557495117188, "learning_rate": 9.49498746867168e-06, "loss": 31.5066, "step": 15934 }, { "epoch": 379.40597014925373, "grad_norm": 22.877532958984375, "learning_rate": 9.494360902255639e-06, "loss": 31.8723, "step": 15935 }, { "epoch": 379.42985074626864, "grad_norm": 20.344417572021484, "learning_rate": 9.4937343358396e-06, "loss": 31.1676, "step": 15936 }, { "epoch": 379.4537313432836, "grad_norm": 26.261857986450195, "learning_rate": 9.49310776942356e-06, "loss": 31.6491, "step": 15937 }, { "epoch": 379.4776119402985, "grad_norm": 18.061038970947266, "learning_rate": 9.49248120300752e-06, "loss": 31.3321, "step": 15938 }, { "epoch": 379.5014925373134, "grad_norm": 23.36351203918457, "learning_rate": 9.49185463659148e-06, "loss": 31.2883, "step": 15939 }, { "epoch": 379.52537313432833, "grad_norm": 21.14858055114746, "learning_rate": 9.49122807017544e-06, "loss": 31.9338, "step": 15940 }, { "epoch": 379.5492537313433, "grad_norm": 21.407957077026367, "learning_rate": 9.4906015037594e-06, "loss": 31.6206, "step": 15941 }, { "epoch": 379.5731343283582, "grad_norm": 19.971158981323242, "learning_rate": 9.489974937343359e-06, "loss": 31.3695, "step": 15942 }, { "epoch": 379.5970149253731, "grad_norm": 23.457340240478516, "learning_rate": 9.489348370927319e-06, "loss": 30.0718, "step": 15943 }, { "epoch": 379.6208955223881, "grad_norm": 18.409650802612305, "learning_rate": 9.488721804511279e-06, "loss": 32.2795, "step": 15944 }, { "epoch": 379.644776119403, "grad_norm": 22.336862564086914, "learning_rate": 9.488095238095238e-06, "loss": 32.4552, "step": 15945 }, { "epoch": 379.6686567164179, "grad_norm": 21.455549240112305, "learning_rate": 9.487468671679198e-06, "loss": 32.0294, "step": 15946 }, { "epoch": 379.6925373134328, "grad_norm": 17.936628341674805, "learning_rate": 9.486842105263158e-06, "loss": 31.2746, "step": 15947 }, { "epoch": 379.7164179104478, "grad_norm": 16.496122360229492, "learning_rate": 9.48621553884712e-06, "loss": 32.2504, "step": 15948 }, { "epoch": 379.7402985074627, "grad_norm": 20.644977569580078, "learning_rate": 9.48558897243108e-06, "loss": 32.3653, "step": 15949 }, { "epoch": 379.7641791044776, "grad_norm": 20.545482635498047, "learning_rate": 9.484962406015037e-06, "loss": 32.4636, "step": 15950 }, { "epoch": 379.78805970149256, "grad_norm": 17.463809967041016, "learning_rate": 9.484335839598999e-06, "loss": 32.6064, "step": 15951 }, { "epoch": 379.81194029850747, "grad_norm": 14.48058032989502, "learning_rate": 9.483709273182959e-06, "loss": 30.6907, "step": 15952 }, { "epoch": 379.8358208955224, "grad_norm": 18.902481079101562, "learning_rate": 9.483082706766919e-06, "loss": 32.8878, "step": 15953 }, { "epoch": 379.85970149253734, "grad_norm": 15.660988807678223, "learning_rate": 9.482456140350878e-06, "loss": 32.0592, "step": 15954 }, { "epoch": 379.88358208955225, "grad_norm": 18.97564125061035, "learning_rate": 9.481829573934838e-06, "loss": 32.7558, "step": 15955 }, { "epoch": 379.90746268656716, "grad_norm": 20.05362892150879, "learning_rate": 9.481203007518798e-06, "loss": 32.3255, "step": 15956 }, { "epoch": 379.93134328358207, "grad_norm": 17.90984344482422, "learning_rate": 9.480576441102758e-06, "loss": 32.6887, "step": 15957 }, { "epoch": 379.95522388059703, "grad_norm": 18.937055587768555, "learning_rate": 9.479949874686717e-06, "loss": 31.7227, "step": 15958 }, { "epoch": 379.97910447761194, "grad_norm": 15.068392753601074, "learning_rate": 9.479323308270677e-06, "loss": 31.5952, "step": 15959 }, { "epoch": 380.0, "grad_norm": 17.841472625732422, "learning_rate": 9.478696741854637e-06, "loss": 27.6074, "step": 15960 }, { "epoch": 380.0, "step": 15960, "total_flos": 7.845630018318678e+17, "train_loss": 1.6809812497973142, "train_runtime": 25596.4735, "train_samples_per_second": 79.455, "train_steps_per_second": 0.624 }, { "epoch": 380.0238805970149, "grad_norm": 19.81171226501465, "learning_rate": 1e-05, "loss": 31.8237, "step": 15961 }, { "epoch": 380.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999404761904763e-06, "loss": 37.0622, "step": 15962 }, { "epoch": 380.0716417910448, "grad_norm": 271.6819152832031, "learning_rate": 9.999404761904763e-06, "loss": 37.5461, "step": 15963 }, { "epoch": 380.0955223880597, "grad_norm": 141.90977478027344, "learning_rate": 9.998809523809524e-06, "loss": 34.3859, "step": 15964 }, { "epoch": 380.1194029850746, "grad_norm": 83.8337173461914, "learning_rate": 9.998214285714286e-06, "loss": 32.6125, "step": 15965 }, { "epoch": 380.14328358208957, "grad_norm": 77.03105163574219, "learning_rate": 9.997619047619048e-06, "loss": 33.4331, "step": 15966 }, { "epoch": 380.1671641791045, "grad_norm": 44.83564376831055, "learning_rate": 9.99702380952381e-06, "loss": 32.8591, "step": 15967 }, { "epoch": 380.1910447761194, "grad_norm": 65.24427032470703, "learning_rate": 9.996428571428572e-06, "loss": 32.9025, "step": 15968 }, { "epoch": 380.21492537313435, "grad_norm": 42.944244384765625, "learning_rate": 9.995833333333335e-06, "loss": 32.5031, "step": 15969 }, { "epoch": 380.23880597014926, "grad_norm": 47.43098831176758, "learning_rate": 9.995238095238095e-06, "loss": 32.2138, "step": 15970 }, { "epoch": 380.26268656716417, "grad_norm": 39.91004943847656, "learning_rate": 9.994642857142857e-06, "loss": 32.6998, "step": 15971 }, { "epoch": 380.28656716417913, "grad_norm": 29.325395584106445, "learning_rate": 9.99404761904762e-06, "loss": 32.2484, "step": 15972 }, { "epoch": 380.31044776119404, "grad_norm": 30.982845306396484, "learning_rate": 9.993452380952382e-06, "loss": 31.9461, "step": 15973 }, { "epoch": 380.33432835820895, "grad_norm": 30.943849563598633, "learning_rate": 9.992857142857144e-06, "loss": 31.8489, "step": 15974 }, { "epoch": 380.35820895522386, "grad_norm": 25.19125747680664, "learning_rate": 9.992261904761904e-06, "loss": 32.146, "step": 15975 }, { "epoch": 380.3820895522388, "grad_norm": 24.344026565551758, "learning_rate": 9.991666666666668e-06, "loss": 32.2715, "step": 15976 }, { "epoch": 380.40597014925373, "grad_norm": 26.513288497924805, "learning_rate": 9.99107142857143e-06, "loss": 32.0756, "step": 15977 }, { "epoch": 380.42985074626864, "grad_norm": 21.315874099731445, "learning_rate": 9.990476190476191e-06, "loss": 31.8548, "step": 15978 }, { "epoch": 380.4537313432836, "grad_norm": 20.785608291625977, "learning_rate": 9.989880952380953e-06, "loss": 31.4687, "step": 15979 }, { "epoch": 380.4776119402985, "grad_norm": 26.352123260498047, "learning_rate": 9.989285714285715e-06, "loss": 31.7443, "step": 15980 }, { "epoch": 380.5014925373134, "grad_norm": 21.69326400756836, "learning_rate": 9.988690476190477e-06, "loss": 31.3887, "step": 15981 }, { "epoch": 380.52537313432833, "grad_norm": 22.32582664489746, "learning_rate": 9.988095238095239e-06, "loss": 32.7526, "step": 15982 }, { "epoch": 380.5492537313433, "grad_norm": 18.500606536865234, "learning_rate": 9.9875e-06, "loss": 31.9896, "step": 15983 }, { "epoch": 380.5731343283582, "grad_norm": 23.360305786132812, "learning_rate": 9.986904761904764e-06, "loss": 31.7416, "step": 15984 }, { "epoch": 380.5970149253731, "grad_norm": 19.942039489746094, "learning_rate": 9.986309523809524e-06, "loss": 30.7637, "step": 15985 }, { "epoch": 380.6208955223881, "grad_norm": 17.783775329589844, "learning_rate": 9.985714285714286e-06, "loss": 32.0459, "step": 15986 }, { "epoch": 380.644776119403, "grad_norm": 17.822635650634766, "learning_rate": 9.985119047619048e-06, "loss": 31.7512, "step": 15987 }, { "epoch": 380.6686567164179, "grad_norm": 22.624650955200195, "learning_rate": 9.984523809523811e-06, "loss": 31.7233, "step": 15988 }, { "epoch": 380.6925373134328, "grad_norm": 20.165393829345703, "learning_rate": 9.983928571428573e-06, "loss": 32.0389, "step": 15989 }, { "epoch": 380.7164179104478, "grad_norm": 17.92303466796875, "learning_rate": 9.983333333333333e-06, "loss": 31.3312, "step": 15990 }, { "epoch": 380.7402985074627, "grad_norm": 16.85744285583496, "learning_rate": 9.982738095238097e-06, "loss": 30.7716, "step": 15991 }, { "epoch": 380.7641791044776, "grad_norm": 16.1578369140625, "learning_rate": 9.982142857142858e-06, "loss": 32.3187, "step": 15992 }, { "epoch": 380.78805970149256, "grad_norm": 15.582061767578125, "learning_rate": 9.98154761904762e-06, "loss": 31.0295, "step": 15993 }, { "epoch": 380.81194029850747, "grad_norm": 21.836551666259766, "learning_rate": 9.980952380952382e-06, "loss": 31.7255, "step": 15994 }, { "epoch": 380.8358208955224, "grad_norm": 16.40296173095703, "learning_rate": 9.980357142857144e-06, "loss": 30.8564, "step": 15995 }, { "epoch": 380.85970149253734, "grad_norm": 16.774675369262695, "learning_rate": 9.979761904761906e-06, "loss": 32.031, "step": 15996 }, { "epoch": 380.88358208955225, "grad_norm": 21.959762573242188, "learning_rate": 9.979166666666668e-06, "loss": 31.8713, "step": 15997 }, { "epoch": 380.90746268656716, "grad_norm": 16.705278396606445, "learning_rate": 9.97857142857143e-06, "loss": 31.5967, "step": 15998 }, { "epoch": 380.93134328358207, "grad_norm": 18.2200927734375, "learning_rate": 9.977976190476191e-06, "loss": 32.4937, "step": 15999 }, { "epoch": 380.95522388059703, "grad_norm": 19.012588500976562, "learning_rate": 9.977380952380953e-06, "loss": 30.3078, "step": 16000 }, { "epoch": 380.97910447761194, "grad_norm": 21.131380081176758, "learning_rate": 9.976785714285715e-06, "loss": 31.5742, "step": 16001 }, { "epoch": 381.0, "grad_norm": 17.37620735168457, "learning_rate": 9.976190476190477e-06, "loss": 27.9876, "step": 16002 }, { "epoch": 381.0238805970149, "grad_norm": NaN, "learning_rate": 9.975595238095238e-06, "loss": 39.6585, "step": 16003 }, { "epoch": 381.0477611940299, "grad_norm": 17.03713607788086, "learning_rate": 9.975595238095238e-06, "loss": 32.1147, "step": 16004 }, { "epoch": 381.0716417910448, "grad_norm": 20.864765167236328, "learning_rate": 9.975000000000002e-06, "loss": 31.7377, "step": 16005 }, { "epoch": 381.0955223880597, "grad_norm": 17.91866683959961, "learning_rate": 9.974404761904762e-06, "loss": 31.8208, "step": 16006 }, { "epoch": 381.1194029850746, "grad_norm": 19.627910614013672, "learning_rate": 9.973809523809524e-06, "loss": 31.6915, "step": 16007 }, { "epoch": 381.14328358208957, "grad_norm": 16.735336303710938, "learning_rate": 9.973214285714287e-06, "loss": 30.6379, "step": 16008 }, { "epoch": 381.1671641791045, "grad_norm": 18.15534782409668, "learning_rate": 9.972619047619049e-06, "loss": 32.1459, "step": 16009 }, { "epoch": 381.1910447761194, "grad_norm": 17.133621215820312, "learning_rate": 9.972023809523811e-06, "loss": 31.3118, "step": 16010 }, { "epoch": 381.21492537313435, "grad_norm": 18.383630752563477, "learning_rate": 9.971428571428571e-06, "loss": 31.7941, "step": 16011 }, { "epoch": 381.23880597014926, "grad_norm": 24.900360107421875, "learning_rate": 9.970833333333335e-06, "loss": 32.1381, "step": 16012 }, { "epoch": 381.26268656716417, "grad_norm": 17.420650482177734, "learning_rate": 9.970238095238096e-06, "loss": 30.8753, "step": 16013 }, { "epoch": 381.28656716417913, "grad_norm": 16.8143310546875, "learning_rate": 9.969642857142858e-06, "loss": 30.8699, "step": 16014 }, { "epoch": 381.31044776119404, "grad_norm": 19.68670082092285, "learning_rate": 9.96904761904762e-06, "loss": 31.2582, "step": 16015 }, { "epoch": 381.33432835820895, "grad_norm": 21.80328369140625, "learning_rate": 9.968452380952382e-06, "loss": 31.3006, "step": 16016 }, { "epoch": 381.35820895522386, "grad_norm": 17.671180725097656, "learning_rate": 9.967857142857144e-06, "loss": 31.4751, "step": 16017 }, { "epoch": 381.3820895522388, "grad_norm": 20.94354820251465, "learning_rate": 9.967261904761905e-06, "loss": 32.2476, "step": 16018 }, { "epoch": 381.40597014925373, "grad_norm": 20.885517120361328, "learning_rate": 9.966666666666667e-06, "loss": 31.1364, "step": 16019 }, { "epoch": 381.42985074626864, "grad_norm": 22.11631965637207, "learning_rate": 9.966071428571429e-06, "loss": 32.5009, "step": 16020 }, { "epoch": 381.4537313432836, "grad_norm": 19.251211166381836, "learning_rate": 9.965476190476191e-06, "loss": 31.3807, "step": 16021 }, { "epoch": 381.4776119402985, "grad_norm": 20.826810836791992, "learning_rate": 9.964880952380953e-06, "loss": 32.882, "step": 16022 }, { "epoch": 381.5014925373134, "grad_norm": 16.566261291503906, "learning_rate": 9.964285714285714e-06, "loss": 30.5622, "step": 16023 }, { "epoch": 381.52537313432833, "grad_norm": 19.458803176879883, "learning_rate": 9.963690476190478e-06, "loss": 31.2937, "step": 16024 }, { "epoch": 381.5492537313433, "grad_norm": 17.66868019104004, "learning_rate": 9.963095238095238e-06, "loss": 32.559, "step": 16025 }, { "epoch": 381.5731343283582, "grad_norm": 21.109437942504883, "learning_rate": 9.9625e-06, "loss": 32.6014, "step": 16026 }, { "epoch": 381.5970149253731, "grad_norm": 17.954509735107422, "learning_rate": 9.961904761904763e-06, "loss": 32.064, "step": 16027 }, { "epoch": 381.6208955223881, "grad_norm": 17.789915084838867, "learning_rate": 9.961309523809525e-06, "loss": 30.7316, "step": 16028 }, { "epoch": 381.644776119403, "grad_norm": 16.219350814819336, "learning_rate": 9.960714285714287e-06, "loss": 31.7867, "step": 16029 }, { "epoch": 381.6686567164179, "grad_norm": 20.78518295288086, "learning_rate": 9.960119047619047e-06, "loss": 30.9799, "step": 16030 }, { "epoch": 381.6925373134328, "grad_norm": 19.862668991088867, "learning_rate": 9.95952380952381e-06, "loss": 32.1743, "step": 16031 }, { "epoch": 381.7164179104478, "grad_norm": 18.970291137695312, "learning_rate": 9.958928571428572e-06, "loss": 32.6788, "step": 16032 }, { "epoch": 381.7402985074627, "grad_norm": 20.33418083190918, "learning_rate": 9.958333333333334e-06, "loss": 32.6219, "step": 16033 }, { "epoch": 381.7641791044776, "grad_norm": 19.823413848876953, "learning_rate": 9.957738095238096e-06, "loss": 31.3587, "step": 16034 }, { "epoch": 381.78805970149256, "grad_norm": 22.553861618041992, "learning_rate": 9.957142857142858e-06, "loss": 31.6313, "step": 16035 }, { "epoch": 381.81194029850747, "grad_norm": 21.496826171875, "learning_rate": 9.95654761904762e-06, "loss": 31.3406, "step": 16036 }, { "epoch": 381.8358208955224, "grad_norm": 17.12688446044922, "learning_rate": 9.955952380952382e-06, "loss": 31.0774, "step": 16037 }, { "epoch": 381.85970149253734, "grad_norm": 18.4323673248291, "learning_rate": 9.955357142857143e-06, "loss": 30.6829, "step": 16038 }, { "epoch": 381.88358208955225, "grad_norm": 18.555824279785156, "learning_rate": 9.954761904761905e-06, "loss": 31.2206, "step": 16039 }, { "epoch": 381.90746268656716, "grad_norm": 22.945341110229492, "learning_rate": 9.954166666666667e-06, "loss": 30.9577, "step": 16040 }, { "epoch": 381.93134328358207, "grad_norm": 21.579174041748047, "learning_rate": 9.953571428571429e-06, "loss": 32.3625, "step": 16041 }, { "epoch": 381.95522388059703, "grad_norm": 17.47161293029785, "learning_rate": 9.95297619047619e-06, "loss": 31.8817, "step": 16042 }, { "epoch": 381.97910447761194, "grad_norm": 22.1416072845459, "learning_rate": 9.952380952380954e-06, "loss": 30.065, "step": 16043 }, { "epoch": 382.0, "grad_norm": 17.564512252807617, "learning_rate": 9.951785714285716e-06, "loss": 28.0914, "step": 16044 }, { "epoch": 382.0238805970149, "grad_norm": 30.5643310546875, "learning_rate": 9.951190476190476e-06, "loss": 31.4639, "step": 16045 }, { "epoch": 382.0477611940299, "grad_norm": 20.0091609954834, "learning_rate": 9.95059523809524e-06, "loss": 32.4623, "step": 16046 }, { "epoch": 382.0716417910448, "grad_norm": 30.843324661254883, "learning_rate": 9.950000000000001e-06, "loss": 31.474, "step": 16047 }, { "epoch": 382.0955223880597, "grad_norm": 23.881017684936523, "learning_rate": 9.949404761904763e-06, "loss": 30.2202, "step": 16048 }, { "epoch": 382.1194029850746, "grad_norm": 27.615142822265625, "learning_rate": 9.948809523809525e-06, "loss": 30.1956, "step": 16049 }, { "epoch": 382.14328358208957, "grad_norm": 27.60844612121582, "learning_rate": 9.948214285714287e-06, "loss": 32.6057, "step": 16050 }, { "epoch": 382.1671641791045, "grad_norm": 18.87521743774414, "learning_rate": 9.947619047619049e-06, "loss": 30.0185, "step": 16051 }, { "epoch": 382.1910447761194, "grad_norm": 28.86406707763672, "learning_rate": 9.94702380952381e-06, "loss": 31.3004, "step": 16052 }, { "epoch": 382.21492537313435, "grad_norm": 20.739280700683594, "learning_rate": 9.946428571428572e-06, "loss": 30.4528, "step": 16053 }, { "epoch": 382.23880597014926, "grad_norm": 21.37101936340332, "learning_rate": 9.945833333333334e-06, "loss": 31.3095, "step": 16054 }, { "epoch": 382.26268656716417, "grad_norm": 20.987924575805664, "learning_rate": 9.945238095238096e-06, "loss": 31.601, "step": 16055 }, { "epoch": 382.28656716417913, "grad_norm": 22.637855529785156, "learning_rate": 9.944642857142858e-06, "loss": 32.697, "step": 16056 }, { "epoch": 382.31044776119404, "grad_norm": 21.173524856567383, "learning_rate": 9.94404761904762e-06, "loss": 31.425, "step": 16057 }, { "epoch": 382.33432835820895, "grad_norm": 16.42003631591797, "learning_rate": 9.943452380952381e-06, "loss": 30.9219, "step": 16058 }, { "epoch": 382.35820895522386, "grad_norm": 29.418729782104492, "learning_rate": 9.942857142857145e-06, "loss": 31.1095, "step": 16059 }, { "epoch": 382.3820895522388, "grad_norm": 18.15989875793457, "learning_rate": 9.942261904761905e-06, "loss": 31.6159, "step": 16060 }, { "epoch": 382.40597014925373, "grad_norm": 25.514652252197266, "learning_rate": 9.941666666666667e-06, "loss": 30.9924, "step": 16061 }, { "epoch": 382.42985074626864, "grad_norm": 26.61665916442871, "learning_rate": 9.94107142857143e-06, "loss": 30.3087, "step": 16062 }, { "epoch": 382.4537313432836, "grad_norm": 21.26274299621582, "learning_rate": 9.940476190476192e-06, "loss": 32.2182, "step": 16063 }, { "epoch": 382.4776119402985, "grad_norm": 17.34431266784668, "learning_rate": 9.939880952380954e-06, "loss": 30.6975, "step": 16064 }, { "epoch": 382.5014925373134, "grad_norm": 29.654024124145508, "learning_rate": 9.939285714285714e-06, "loss": 31.4566, "step": 16065 }, { "epoch": 382.52537313432833, "grad_norm": 19.027162551879883, "learning_rate": 9.938690476190477e-06, "loss": 31.0827, "step": 16066 }, { "epoch": 382.5492537313433, "grad_norm": 41.36701965332031, "learning_rate": 9.93809523809524e-06, "loss": 32.6753, "step": 16067 }, { "epoch": 382.5731343283582, "grad_norm": 31.004261016845703, "learning_rate": 9.937500000000001e-06, "loss": 32.7999, "step": 16068 }, { "epoch": 382.5970149253731, "grad_norm": 37.49189376831055, "learning_rate": 9.936904761904763e-06, "loss": 32.5376, "step": 16069 }, { "epoch": 382.6208955223881, "grad_norm": 27.8145751953125, "learning_rate": 9.936309523809525e-06, "loss": 32.1167, "step": 16070 }, { "epoch": 382.644776119403, "grad_norm": 42.59127426147461, "learning_rate": 9.935714285714286e-06, "loss": 31.4012, "step": 16071 }, { "epoch": 382.6686567164179, "grad_norm": 28.887666702270508, "learning_rate": 9.935119047619048e-06, "loss": 31.6601, "step": 16072 }, { "epoch": 382.6925373134328, "grad_norm": 40.80937194824219, "learning_rate": 9.93452380952381e-06, "loss": 31.5855, "step": 16073 }, { "epoch": 382.7164179104478, "grad_norm": 34.60734939575195, "learning_rate": 9.933928571428574e-06, "loss": 31.6968, "step": 16074 }, { "epoch": 382.7402985074627, "grad_norm": 37.64997863769531, "learning_rate": 9.933333333333334e-06, "loss": 30.9189, "step": 16075 }, { "epoch": 382.7641791044776, "grad_norm": 36.916038513183594, "learning_rate": 9.932738095238095e-06, "loss": 32.6017, "step": 16076 }, { "epoch": 382.78805970149256, "grad_norm": 35.18901062011719, "learning_rate": 9.932142857142857e-06, "loss": 31.8938, "step": 16077 }, { "epoch": 382.81194029850747, "grad_norm": 31.393796920776367, "learning_rate": 9.93154761904762e-06, "loss": 31.788, "step": 16078 }, { "epoch": 382.8358208955224, "grad_norm": 40.976837158203125, "learning_rate": 9.930952380952383e-06, "loss": 31.6394, "step": 16079 }, { "epoch": 382.85970149253734, "grad_norm": 31.78545379638672, "learning_rate": 9.930357142857143e-06, "loss": 32.4881, "step": 16080 }, { "epoch": 382.88358208955225, "grad_norm": 39.9713020324707, "learning_rate": 9.929761904761906e-06, "loss": 32.4877, "step": 16081 }, { "epoch": 382.90746268656716, "grad_norm": 35.2440185546875, "learning_rate": 9.929166666666668e-06, "loss": 31.6711, "step": 16082 }, { "epoch": 382.93134328358207, "grad_norm": 35.897239685058594, "learning_rate": 9.92857142857143e-06, "loss": 31.0469, "step": 16083 }, { "epoch": 382.95522388059703, "grad_norm": 32.89672088623047, "learning_rate": 9.927976190476192e-06, "loss": 31.9327, "step": 16084 }, { "epoch": 382.97910447761194, "grad_norm": 31.14480972290039, "learning_rate": 9.927380952380953e-06, "loss": 32.2283, "step": 16085 }, { "epoch": 383.0, "grad_norm": 23.345359802246094, "learning_rate": 9.926785714285715e-06, "loss": 27.4965, "step": 16086 }, { "epoch": 383.0238805970149, "grad_norm": 36.6619987487793, "learning_rate": 9.926190476190477e-06, "loss": 31.1128, "step": 16087 }, { "epoch": 383.0477611940299, "grad_norm": 29.31681251525879, "learning_rate": 9.925595238095239e-06, "loss": 31.9634, "step": 16088 }, { "epoch": 383.0716417910448, "grad_norm": 36.53302001953125, "learning_rate": 9.925e-06, "loss": 31.8536, "step": 16089 }, { "epoch": 383.0955223880597, "grad_norm": 35.891845703125, "learning_rate": 9.924404761904762e-06, "loss": 30.4767, "step": 16090 }, { "epoch": 383.1194029850746, "grad_norm": 36.07611846923828, "learning_rate": 9.923809523809524e-06, "loss": 32.2482, "step": 16091 }, { "epoch": 383.14328358208957, "grad_norm": 30.02564811706543, "learning_rate": 9.923214285714286e-06, "loss": 31.355, "step": 16092 }, { "epoch": 383.1671641791045, "grad_norm": 32.45337677001953, "learning_rate": 9.922619047619048e-06, "loss": 31.4327, "step": 16093 }, { "epoch": 383.1910447761194, "grad_norm": 29.737163543701172, "learning_rate": 9.922023809523811e-06, "loss": 30.0838, "step": 16094 }, { "epoch": 383.21492537313435, "grad_norm": 36.99846267700195, "learning_rate": 9.921428571428572e-06, "loss": 32.8689, "step": 16095 }, { "epoch": 383.23880597014926, "grad_norm": 33.277320861816406, "learning_rate": 9.920833333333333e-06, "loss": 30.6591, "step": 16096 }, { "epoch": 383.26268656716417, "grad_norm": 35.308570861816406, "learning_rate": 9.920238095238097e-06, "loss": 31.7971, "step": 16097 }, { "epoch": 383.28656716417913, "grad_norm": 32.15773391723633, "learning_rate": 9.919642857142859e-06, "loss": 30.9182, "step": 16098 }, { "epoch": 383.31044776119404, "grad_norm": 35.116065979003906, "learning_rate": 9.91904761904762e-06, "loss": 31.0777, "step": 16099 }, { "epoch": 383.33432835820895, "grad_norm": 32.25497817993164, "learning_rate": 9.91845238095238e-06, "loss": 31.5772, "step": 16100 }, { "epoch": 383.35820895522386, "grad_norm": 35.16862106323242, "learning_rate": 9.917857142857144e-06, "loss": 30.7156, "step": 16101 }, { "epoch": 383.3820895522388, "grad_norm": 32.17951583862305, "learning_rate": 9.917261904761906e-06, "loss": 31.9786, "step": 16102 }, { "epoch": 383.40597014925373, "grad_norm": NaN, "learning_rate": 9.916666666666668e-06, "loss": 52.2399, "step": 16103 }, { "epoch": 383.42985074626864, "grad_norm": 34.20357131958008, "learning_rate": 9.916666666666668e-06, "loss": 32.0766, "step": 16104 }, { "epoch": 383.4537313432836, "grad_norm": 27.18743896484375, "learning_rate": 9.91607142857143e-06, "loss": 31.8092, "step": 16105 }, { "epoch": 383.4776119402985, "grad_norm": 36.65757369995117, "learning_rate": 9.915476190476191e-06, "loss": 31.9558, "step": 16106 }, { "epoch": 383.5014925373134, "grad_norm": 30.82261848449707, "learning_rate": 9.914880952380953e-06, "loss": 31.7545, "step": 16107 }, { "epoch": 383.52537313432833, "grad_norm": 37.73588562011719, "learning_rate": 9.914285714285715e-06, "loss": 30.3501, "step": 16108 }, { "epoch": 383.5492537313433, "grad_norm": 31.677982330322266, "learning_rate": 9.913690476190477e-06, "loss": 31.3569, "step": 16109 }, { "epoch": 383.5731343283582, "grad_norm": 32.6319694519043, "learning_rate": 9.91309523809524e-06, "loss": 31.7832, "step": 16110 }, { "epoch": 383.5970149253731, "grad_norm": 27.835472106933594, "learning_rate": 9.9125e-06, "loss": 31.5501, "step": 16111 }, { "epoch": 383.6208955223881, "grad_norm": 33.154388427734375, "learning_rate": 9.911904761904762e-06, "loss": 31.3517, "step": 16112 }, { "epoch": 383.644776119403, "grad_norm": 30.630441665649414, "learning_rate": 9.911309523809524e-06, "loss": 31.8968, "step": 16113 }, { "epoch": 383.6686567164179, "grad_norm": 42.44910430908203, "learning_rate": 9.910714285714288e-06, "loss": 32.3752, "step": 16114 }, { "epoch": 383.6925373134328, "grad_norm": 35.22804641723633, "learning_rate": 9.91011904761905e-06, "loss": 31.7221, "step": 16115 }, { "epoch": 383.7164179104478, "grad_norm": 31.43850326538086, "learning_rate": 9.90952380952381e-06, "loss": 32.5193, "step": 16116 }, { "epoch": 383.7402985074627, "grad_norm": 27.685434341430664, "learning_rate": 9.908928571428573e-06, "loss": 31.3131, "step": 16117 }, { "epoch": 383.7641791044776, "grad_norm": 31.522960662841797, "learning_rate": 9.908333333333335e-06, "loss": 31.9096, "step": 16118 }, { "epoch": 383.78805970149256, "grad_norm": 27.524206161499023, "learning_rate": 9.907738095238097e-06, "loss": 31.0031, "step": 16119 }, { "epoch": 383.81194029850747, "grad_norm": 38.7270393371582, "learning_rate": 9.907142857142858e-06, "loss": 30.5195, "step": 16120 }, { "epoch": 383.8358208955224, "grad_norm": 33.950130462646484, "learning_rate": 9.90654761904762e-06, "loss": 31.3107, "step": 16121 }, { "epoch": 383.85970149253734, "grad_norm": 31.93348503112793, "learning_rate": 9.905952380952382e-06, "loss": 31.3036, "step": 16122 }, { "epoch": 383.88358208955225, "grad_norm": 31.724279403686523, "learning_rate": 9.905357142857144e-06, "loss": 30.6898, "step": 16123 }, { "epoch": 383.90746268656716, "grad_norm": 29.71946144104004, "learning_rate": 9.904761904761906e-06, "loss": 31.6857, "step": 16124 }, { "epoch": 383.93134328358207, "grad_norm": 26.805870056152344, "learning_rate": 9.904166666666667e-06, "loss": 31.5954, "step": 16125 }, { "epoch": 383.95522388059703, "grad_norm": 32.27679443359375, "learning_rate": 9.90357142857143e-06, "loss": 31.3116, "step": 16126 }, { "epoch": 383.97910447761194, "grad_norm": 30.086898803710938, "learning_rate": 9.902976190476191e-06, "loss": 31.8662, "step": 16127 }, { "epoch": 384.0, "grad_norm": 31.092683792114258, "learning_rate": 9.902380952380953e-06, "loss": 28.2241, "step": 16128 }, { "epoch": 384.0238805970149, "grad_norm": 32.8645133972168, "learning_rate": 9.901785714285715e-06, "loss": 31.9904, "step": 16129 }, { "epoch": 384.0477611940299, "grad_norm": 32.35710525512695, "learning_rate": 9.901190476190476e-06, "loss": 31.4219, "step": 16130 }, { "epoch": 384.0716417910448, "grad_norm": 27.111024856567383, "learning_rate": 9.900595238095238e-06, "loss": 31.3478, "step": 16131 }, { "epoch": 384.0955223880597, "grad_norm": 31.46786880493164, "learning_rate": 9.9e-06, "loss": 32.2751, "step": 16132 }, { "epoch": 384.1194029850746, "grad_norm": 30.0049991607666, "learning_rate": 9.899404761904764e-06, "loss": 32.7822, "step": 16133 }, { "epoch": 384.14328358208957, "grad_norm": 33.938175201416016, "learning_rate": 9.898809523809525e-06, "loss": 30.7121, "step": 16134 }, { "epoch": 384.1671641791045, "grad_norm": 28.707305908203125, "learning_rate": 9.898214285714286e-06, "loss": 30.6729, "step": 16135 }, { "epoch": 384.1910447761194, "grad_norm": 32.50811767578125, "learning_rate": 9.897619047619047e-06, "loss": 30.0735, "step": 16136 }, { "epoch": 384.21492537313435, "grad_norm": 30.479076385498047, "learning_rate": 9.89702380952381e-06, "loss": 31.5396, "step": 16137 }, { "epoch": 384.23880597014926, "grad_norm": 33.949214935302734, "learning_rate": 9.896428571428573e-06, "loss": 31.8274, "step": 16138 }, { "epoch": 384.26268656716417, "grad_norm": 26.714616775512695, "learning_rate": 9.895833333333334e-06, "loss": 31.9519, "step": 16139 }, { "epoch": 384.28656716417913, "grad_norm": 38.129310607910156, "learning_rate": 9.895238095238096e-06, "loss": 31.742, "step": 16140 }, { "epoch": 384.31044776119404, "grad_norm": 35.66838836669922, "learning_rate": 9.894642857142858e-06, "loss": 31.2153, "step": 16141 }, { "epoch": 384.33432835820895, "grad_norm": 30.687631607055664, "learning_rate": 9.89404761904762e-06, "loss": 31.3691, "step": 16142 }, { "epoch": 384.35820895522386, "grad_norm": 29.71688461303711, "learning_rate": 9.893452380952382e-06, "loss": 32.3767, "step": 16143 }, { "epoch": 384.3820895522388, "grad_norm": NaN, "learning_rate": 9.892857142857143e-06, "loss": 50.6448, "step": 16144 }, { "epoch": 384.40597014925373, "grad_norm": 31.855520248413086, "learning_rate": 9.892857142857143e-06, "loss": 30.7422, "step": 16145 }, { "epoch": 384.42985074626864, "grad_norm": 25.546953201293945, "learning_rate": 9.892261904761905e-06, "loss": 32.2336, "step": 16146 }, { "epoch": 384.4537313432836, "grad_norm": 32.33867263793945, "learning_rate": 9.891666666666667e-06, "loss": 30.6471, "step": 16147 }, { "epoch": 384.4776119402985, "grad_norm": 27.39624786376953, "learning_rate": 9.891071428571429e-06, "loss": 31.9699, "step": 16148 }, { "epoch": 384.5014925373134, "grad_norm": 34.7828369140625, "learning_rate": 9.89047619047619e-06, "loss": 30.8026, "step": 16149 }, { "epoch": 384.52537313432833, "grad_norm": 28.863527297973633, "learning_rate": 9.889880952380954e-06, "loss": 31.0636, "step": 16150 }, { "epoch": 384.5492537313433, "grad_norm": 30.425647735595703, "learning_rate": 9.889285714285714e-06, "loss": 32.5964, "step": 16151 }, { "epoch": 384.5731343283582, "grad_norm": 25.711910247802734, "learning_rate": 9.888690476190476e-06, "loss": 31.3709, "step": 16152 }, { "epoch": 384.5970149253731, "grad_norm": 30.859392166137695, "learning_rate": 9.88809523809524e-06, "loss": 32.0903, "step": 16153 }, { "epoch": 384.6208955223881, "grad_norm": 27.224456787109375, "learning_rate": 9.887500000000001e-06, "loss": 30.7084, "step": 16154 }, { "epoch": 384.644776119403, "grad_norm": 34.85042953491211, "learning_rate": 9.886904761904763e-06, "loss": 31.8646, "step": 16155 }, { "epoch": 384.6686567164179, "grad_norm": 31.036788940429688, "learning_rate": 9.886309523809523e-06, "loss": 31.217, "step": 16156 }, { "epoch": 384.6925373134328, "grad_norm": 34.45530319213867, "learning_rate": 9.885714285714287e-06, "loss": 32.0169, "step": 16157 }, { "epoch": 384.7164179104478, "grad_norm": 29.6302547454834, "learning_rate": 9.885119047619049e-06, "loss": 32.0262, "step": 16158 }, { "epoch": 384.7402985074627, "grad_norm": 27.671533584594727, "learning_rate": 9.88452380952381e-06, "loss": 31.8737, "step": 16159 }, { "epoch": 384.7641791044776, "grad_norm": 24.455589294433594, "learning_rate": 9.883928571428572e-06, "loss": 31.1658, "step": 16160 }, { "epoch": 384.78805970149256, "grad_norm": 25.79328727722168, "learning_rate": 9.883333333333334e-06, "loss": 29.9689, "step": 16161 }, { "epoch": 384.81194029850747, "grad_norm": 21.47579574584961, "learning_rate": 9.882738095238096e-06, "loss": 31.6123, "step": 16162 }, { "epoch": 384.8358208955224, "grad_norm": 28.515636444091797, "learning_rate": 9.882142857142858e-06, "loss": 30.9889, "step": 16163 }, { "epoch": 384.85970149253734, "grad_norm": 23.242191314697266, "learning_rate": 9.88154761904762e-06, "loss": 31.6707, "step": 16164 }, { "epoch": 384.88358208955225, "grad_norm": 29.00843620300293, "learning_rate": 9.880952380952381e-06, "loss": 31.4587, "step": 16165 }, { "epoch": 384.90746268656716, "grad_norm": 22.492422103881836, "learning_rate": 9.880357142857143e-06, "loss": 31.6438, "step": 16166 }, { "epoch": 384.93134328358207, "grad_norm": 27.189645767211914, "learning_rate": 9.879761904761905e-06, "loss": 31.5126, "step": 16167 }, { "epoch": 384.95522388059703, "grad_norm": 23.21908950805664, "learning_rate": 9.879166666666667e-06, "loss": 32.2981, "step": 16168 }, { "epoch": 384.97910447761194, "grad_norm": 27.132030487060547, "learning_rate": 9.87857142857143e-06, "loss": 32.1087, "step": 16169 }, { "epoch": 385.0, "grad_norm": 22.102291107177734, "learning_rate": 9.877976190476192e-06, "loss": 27.1166, "step": 16170 }, { "epoch": 385.0238805970149, "grad_norm": 23.863162994384766, "learning_rate": 9.877380952380952e-06, "loss": 31.7717, "step": 16171 }, { "epoch": 385.0477611940299, "grad_norm": 20.544803619384766, "learning_rate": 9.876785714285714e-06, "loss": 31.0464, "step": 16172 }, { "epoch": 385.0716417910448, "grad_norm": 22.5712947845459, "learning_rate": 9.876190476190478e-06, "loss": 32.1594, "step": 16173 }, { "epoch": 385.0955223880597, "grad_norm": 20.91865348815918, "learning_rate": 9.87559523809524e-06, "loss": 33.1558, "step": 16174 }, { "epoch": 385.1194029850746, "grad_norm": 21.566856384277344, "learning_rate": 9.875000000000001e-06, "loss": 31.3823, "step": 16175 }, { "epoch": 385.14328358208957, "grad_norm": 18.019092559814453, "learning_rate": 9.874404761904763e-06, "loss": 30.0196, "step": 16176 }, { "epoch": 385.1671641791045, "grad_norm": 21.332061767578125, "learning_rate": 9.873809523809525e-06, "loss": 30.5859, "step": 16177 }, { "epoch": 385.1910447761194, "grad_norm": 18.231172561645508, "learning_rate": 9.873214285714287e-06, "loss": 30.5717, "step": 16178 }, { "epoch": 385.21492537313435, "grad_norm": 20.333765029907227, "learning_rate": 9.872619047619048e-06, "loss": 31.2717, "step": 16179 }, { "epoch": 385.23880597014926, "grad_norm": NaN, "learning_rate": 9.87202380952381e-06, "loss": 50.9608, "step": 16180 }, { "epoch": 385.26268656716417, "grad_norm": 20.32228660583496, "learning_rate": 9.87202380952381e-06, "loss": 30.3185, "step": 16181 }, { "epoch": 385.28656716417913, "grad_norm": 18.662813186645508, "learning_rate": 9.871428571428572e-06, "loss": 31.3118, "step": 16182 }, { "epoch": 385.31044776119404, "grad_norm": 19.963438034057617, "learning_rate": 9.870833333333334e-06, "loss": 32.0171, "step": 16183 }, { "epoch": 385.33432835820895, "grad_norm": 20.611982345581055, "learning_rate": 9.870238095238096e-06, "loss": 31.6538, "step": 16184 }, { "epoch": 385.35820895522386, "grad_norm": 18.705074310302734, "learning_rate": 9.869642857142857e-06, "loss": 31.8616, "step": 16185 }, { "epoch": 385.3820895522388, "grad_norm": 20.165292739868164, "learning_rate": 9.869047619047621e-06, "loss": 32.7904, "step": 16186 }, { "epoch": 385.40597014925373, "grad_norm": 16.604703903198242, "learning_rate": 9.868452380952381e-06, "loss": 32.0385, "step": 16187 }, { "epoch": 385.42985074626864, "grad_norm": 19.941293716430664, "learning_rate": 9.867857142857143e-06, "loss": 31.5136, "step": 16188 }, { "epoch": 385.4537313432836, "grad_norm": 16.559743881225586, "learning_rate": 9.867261904761906e-06, "loss": 30.3442, "step": 16189 }, { "epoch": 385.4776119402985, "grad_norm": 21.531822204589844, "learning_rate": 9.866666666666668e-06, "loss": 32.8664, "step": 16190 }, { "epoch": 385.5014925373134, "grad_norm": 18.922697067260742, "learning_rate": 9.86607142857143e-06, "loss": 31.7802, "step": 16191 }, { "epoch": 385.52537313432833, "grad_norm": 18.97344207763672, "learning_rate": 9.86547619047619e-06, "loss": 31.3054, "step": 16192 }, { "epoch": 385.5492537313433, "grad_norm": 22.07037925720215, "learning_rate": 9.864880952380954e-06, "loss": 30.7271, "step": 16193 }, { "epoch": 385.5731343283582, "grad_norm": 19.429729461669922, "learning_rate": 9.864285714285715e-06, "loss": 31.6555, "step": 16194 }, { "epoch": 385.5970149253731, "grad_norm": 22.308879852294922, "learning_rate": 9.863690476190477e-06, "loss": 32.0755, "step": 16195 }, { "epoch": 385.6208955223881, "grad_norm": 20.618770599365234, "learning_rate": 9.863095238095239e-06, "loss": 31.2475, "step": 16196 }, { "epoch": 385.644776119403, "grad_norm": 18.875965118408203, "learning_rate": 9.862500000000001e-06, "loss": 32.2849, "step": 16197 }, { "epoch": 385.6686567164179, "grad_norm": 20.972700119018555, "learning_rate": 9.861904761904763e-06, "loss": 31.9794, "step": 16198 }, { "epoch": 385.6925373134328, "grad_norm": 20.304363250732422, "learning_rate": 9.861309523809524e-06, "loss": 32.1243, "step": 16199 }, { "epoch": 385.7164179104478, "grad_norm": 21.480579376220703, "learning_rate": 9.860714285714286e-06, "loss": 31.4321, "step": 16200 }, { "epoch": 385.7402985074627, "grad_norm": 19.26569175720215, "learning_rate": 9.860119047619048e-06, "loss": 31.5931, "step": 16201 }, { "epoch": 385.7641791044776, "grad_norm": 17.374101638793945, "learning_rate": 9.85952380952381e-06, "loss": 32.2479, "step": 16202 }, { "epoch": 385.78805970149256, "grad_norm": 22.569284439086914, "learning_rate": 9.858928571428572e-06, "loss": 31.0977, "step": 16203 }, { "epoch": 385.81194029850747, "grad_norm": 18.83555793762207, "learning_rate": 9.858333333333334e-06, "loss": 31.0797, "step": 16204 }, { "epoch": 385.8358208955224, "grad_norm": 18.740907669067383, "learning_rate": 9.857738095238097e-06, "loss": 30.9604, "step": 16205 }, { "epoch": 385.85970149253734, "grad_norm": 18.844552993774414, "learning_rate": 9.857142857142859e-06, "loss": 30.92, "step": 16206 }, { "epoch": 385.88358208955225, "grad_norm": 19.82196044921875, "learning_rate": 9.856547619047619e-06, "loss": 30.9999, "step": 16207 }, { "epoch": 385.90746268656716, "grad_norm": 17.69489860534668, "learning_rate": 9.85595238095238e-06, "loss": 32.1222, "step": 16208 }, { "epoch": 385.93134328358207, "grad_norm": 20.43714141845703, "learning_rate": 9.855357142857144e-06, "loss": 30.7134, "step": 16209 }, { "epoch": 385.95522388059703, "grad_norm": 19.41302490234375, "learning_rate": 9.854761904761906e-06, "loss": 32.0069, "step": 16210 }, { "epoch": 385.97910447761194, "grad_norm": 22.72101402282715, "learning_rate": 9.854166666666668e-06, "loss": 30.8264, "step": 16211 }, { "epoch": 386.0, "grad_norm": 15.351430892944336, "learning_rate": 9.85357142857143e-06, "loss": 27.5503, "step": 16212 }, { "epoch": 386.0238805970149, "grad_norm": 21.436670303344727, "learning_rate": 9.852976190476192e-06, "loss": 32.3433, "step": 16213 }, { "epoch": 386.0477611940299, "grad_norm": 16.964204788208008, "learning_rate": 9.852380952380953e-06, "loss": 29.4095, "step": 16214 }, { "epoch": 386.0716417910448, "grad_norm": 23.388107299804688, "learning_rate": 9.851785714285715e-06, "loss": 31.237, "step": 16215 }, { "epoch": 386.0955223880597, "grad_norm": 16.503149032592773, "learning_rate": 9.851190476190477e-06, "loss": 31.8172, "step": 16216 }, { "epoch": 386.1194029850746, "grad_norm": 24.22135353088379, "learning_rate": 9.850595238095239e-06, "loss": 29.8872, "step": 16217 }, { "epoch": 386.14328358208957, "grad_norm": 21.456871032714844, "learning_rate": 9.85e-06, "loss": 32.8561, "step": 16218 }, { "epoch": 386.1671641791045, "grad_norm": 20.144268035888672, "learning_rate": 9.849404761904762e-06, "loss": 32.1675, "step": 16219 }, { "epoch": 386.1910447761194, "grad_norm": 20.185773849487305, "learning_rate": 9.848809523809524e-06, "loss": 30.7105, "step": 16220 }, { "epoch": 386.21492537313435, "grad_norm": 18.498830795288086, "learning_rate": 9.848214285714288e-06, "loss": 31.1193, "step": 16221 }, { "epoch": 386.23880597014926, "grad_norm": 22.099266052246094, "learning_rate": 9.847619047619048e-06, "loss": 31.4756, "step": 16222 }, { "epoch": 386.26268656716417, "grad_norm": 20.17631721496582, "learning_rate": 9.84702380952381e-06, "loss": 31.4381, "step": 16223 }, { "epoch": 386.28656716417913, "grad_norm": 18.7167911529541, "learning_rate": 9.846428571428573e-06, "loss": 31.3872, "step": 16224 }, { "epoch": 386.31044776119404, "grad_norm": 18.53969383239746, "learning_rate": 9.845833333333335e-06, "loss": 31.404, "step": 16225 }, { "epoch": 386.33432835820895, "grad_norm": 19.445709228515625, "learning_rate": 9.845238095238097e-06, "loss": 29.5865, "step": 16226 }, { "epoch": 386.35820895522386, "grad_norm": 20.605234146118164, "learning_rate": 9.844642857142857e-06, "loss": 31.987, "step": 16227 }, { "epoch": 386.3820895522388, "grad_norm": 20.468053817749023, "learning_rate": 9.84404761904762e-06, "loss": 31.9659, "step": 16228 }, { "epoch": 386.40597014925373, "grad_norm": 21.1354923248291, "learning_rate": 9.843452380952382e-06, "loss": 31.8625, "step": 16229 }, { "epoch": 386.42985074626864, "grad_norm": 20.171497344970703, "learning_rate": 9.842857142857144e-06, "loss": 31.3537, "step": 16230 }, { "epoch": 386.4537313432836, "grad_norm": 25.776453018188477, "learning_rate": 9.842261904761906e-06, "loss": 31.3447, "step": 16231 }, { "epoch": 386.4776119402985, "grad_norm": 19.82379722595215, "learning_rate": 9.841666666666668e-06, "loss": 31.3169, "step": 16232 }, { "epoch": 386.5014925373134, "grad_norm": 23.489431381225586, "learning_rate": 9.84107142857143e-06, "loss": 31.9554, "step": 16233 }, { "epoch": 386.52537313432833, "grad_norm": 30.809371948242188, "learning_rate": 9.840476190476191e-06, "loss": 31.2285, "step": 16234 }, { "epoch": 386.5492537313433, "grad_norm": 22.05644989013672, "learning_rate": 9.839880952380953e-06, "loss": 32.0698, "step": 16235 }, { "epoch": 386.5731343283582, "grad_norm": 33.66168212890625, "learning_rate": 9.839285714285715e-06, "loss": 30.8832, "step": 16236 }, { "epoch": 386.5970149253731, "grad_norm": 25.675928115844727, "learning_rate": 9.838690476190477e-06, "loss": 31.5365, "step": 16237 }, { "epoch": 386.6208955223881, "grad_norm": 34.02192687988281, "learning_rate": 9.838095238095238e-06, "loss": 31.6749, "step": 16238 }, { "epoch": 386.644776119403, "grad_norm": 22.996620178222656, "learning_rate": 9.8375e-06, "loss": 31.2574, "step": 16239 }, { "epoch": 386.6686567164179, "grad_norm": 38.00943374633789, "learning_rate": 9.836904761904764e-06, "loss": 31.8429, "step": 16240 }, { "epoch": 386.6925373134328, "grad_norm": 26.652149200439453, "learning_rate": 9.836309523809524e-06, "loss": 32.4164, "step": 16241 }, { "epoch": 386.7164179104478, "grad_norm": 40.26640319824219, "learning_rate": 9.835714285714286e-06, "loss": 32.0015, "step": 16242 }, { "epoch": 386.7402985074627, "grad_norm": 30.79282569885254, "learning_rate": 9.83511904761905e-06, "loss": 31.1453, "step": 16243 }, { "epoch": 386.7641791044776, "grad_norm": 35.20528793334961, "learning_rate": 9.834523809523811e-06, "loss": 32.5224, "step": 16244 }, { "epoch": 386.78805970149256, "grad_norm": 30.823259353637695, "learning_rate": 9.833928571428573e-06, "loss": 31.7743, "step": 16245 }, { "epoch": 386.81194029850747, "grad_norm": 39.33624267578125, "learning_rate": 9.833333333333333e-06, "loss": 30.8932, "step": 16246 }, { "epoch": 386.8358208955224, "grad_norm": 31.379451751708984, "learning_rate": 9.832738095238096e-06, "loss": 31.0847, "step": 16247 }, { "epoch": 386.85970149253734, "grad_norm": 38.356510162353516, "learning_rate": 9.832142857142858e-06, "loss": 31.1917, "step": 16248 }, { "epoch": 386.88358208955225, "grad_norm": 35.24120330810547, "learning_rate": 9.83154761904762e-06, "loss": 31.9965, "step": 16249 }, { "epoch": 386.90746268656716, "grad_norm": 30.99354362487793, "learning_rate": 9.830952380952382e-06, "loss": 31.9035, "step": 16250 }, { "epoch": 386.93134328358207, "grad_norm": 33.42157745361328, "learning_rate": 9.830357142857144e-06, "loss": 31.9036, "step": 16251 }, { "epoch": 386.95522388059703, "grad_norm": 34.370689392089844, "learning_rate": 9.829761904761905e-06, "loss": 31.5059, "step": 16252 }, { "epoch": 386.97910447761194, "grad_norm": 26.31012725830078, "learning_rate": 9.829166666666667e-06, "loss": 31.8671, "step": 16253 }, { "epoch": 387.0, "grad_norm": 35.351680755615234, "learning_rate": 9.828571428571429e-06, "loss": 26.3941, "step": 16254 }, { "epoch": 387.0238805970149, "grad_norm": 36.14506530761719, "learning_rate": 9.827976190476191e-06, "loss": 32.1555, "step": 16255 }, { "epoch": 387.0477611940299, "grad_norm": 32.076114654541016, "learning_rate": 9.827380952380953e-06, "loss": 31.2854, "step": 16256 }, { "epoch": 387.0716417910448, "grad_norm": NaN, "learning_rate": 9.826785714285715e-06, "loss": 32.6211, "step": 16257 }, { "epoch": 387.0955223880597, "grad_norm": 33.81434631347656, "learning_rate": 9.826785714285715e-06, "loss": 31.3886, "step": 16258 }, { "epoch": 387.1194029850746, "grad_norm": 32.51563262939453, "learning_rate": 9.826190476190476e-06, "loss": 31.6933, "step": 16259 }, { "epoch": 387.14328358208957, "grad_norm": 27.100536346435547, "learning_rate": 9.82559523809524e-06, "loss": 31.3381, "step": 16260 }, { "epoch": 387.1671641791045, "grad_norm": 32.19075012207031, "learning_rate": 9.825000000000002e-06, "loss": 31.0665, "step": 16261 }, { "epoch": 387.1910447761194, "grad_norm": 29.4110050201416, "learning_rate": 9.824404761904762e-06, "loss": 31.431, "step": 16262 }, { "epoch": 387.21492537313435, "grad_norm": 34.70882797241211, "learning_rate": 9.823809523809524e-06, "loss": 32.1861, "step": 16263 }, { "epoch": 387.23880597014926, "grad_norm": 30.694459915161133, "learning_rate": 9.823214285714287e-06, "loss": 31.6779, "step": 16264 }, { "epoch": 387.26268656716417, "grad_norm": 33.5014533996582, "learning_rate": 9.822619047619049e-06, "loss": 31.0733, "step": 16265 }, { "epoch": 387.28656716417913, "grad_norm": 29.649723052978516, "learning_rate": 9.82202380952381e-06, "loss": 31.7184, "step": 16266 }, { "epoch": 387.31044776119404, "grad_norm": 33.55296325683594, "learning_rate": 9.821428571428573e-06, "loss": 31.9725, "step": 16267 }, { "epoch": 387.33432835820895, "grad_norm": 32.031715393066406, "learning_rate": 9.820833333333334e-06, "loss": 31.194, "step": 16268 }, { "epoch": 387.35820895522386, "grad_norm": 38.761688232421875, "learning_rate": 9.820238095238096e-06, "loss": 31.4994, "step": 16269 }, { "epoch": 387.3820895522388, "grad_norm": 37.0084342956543, "learning_rate": 9.819642857142858e-06, "loss": 32.1369, "step": 16270 }, { "epoch": 387.40597014925373, "grad_norm": 30.358522415161133, "learning_rate": 9.81904761904762e-06, "loss": 30.7688, "step": 16271 }, { "epoch": 387.42985074626864, "grad_norm": 24.442169189453125, "learning_rate": 9.818452380952382e-06, "loss": 30.1754, "step": 16272 }, { "epoch": 387.4537313432836, "grad_norm": 35.01533508300781, "learning_rate": 9.817857142857143e-06, "loss": 30.5212, "step": 16273 }, { "epoch": 387.4776119402985, "grad_norm": 32.43381118774414, "learning_rate": 9.817261904761905e-06, "loss": 32.2911, "step": 16274 }, { "epoch": 387.5014925373134, "grad_norm": 35.44935607910156, "learning_rate": 9.816666666666667e-06, "loss": 31.2107, "step": 16275 }, { "epoch": 387.52537313432833, "grad_norm": 35.55957794189453, "learning_rate": 9.81607142857143e-06, "loss": 32.334, "step": 16276 }, { "epoch": 387.5492537313433, "grad_norm": 29.662111282348633, "learning_rate": 9.81547619047619e-06, "loss": 31.8393, "step": 16277 }, { "epoch": 387.5731343283582, "grad_norm": 24.669198989868164, "learning_rate": 9.814880952380952e-06, "loss": 30.5643, "step": 16278 }, { "epoch": 387.5970149253731, "grad_norm": 33.51001739501953, "learning_rate": 9.814285714285716e-06, "loss": 30.8368, "step": 16279 }, { "epoch": 387.6208955223881, "grad_norm": 26.814899444580078, "learning_rate": 9.813690476190478e-06, "loss": 31.9893, "step": 16280 }, { "epoch": 387.644776119403, "grad_norm": 34.983394622802734, "learning_rate": 9.81309523809524e-06, "loss": 30.6433, "step": 16281 }, { "epoch": 387.6686567164179, "grad_norm": 30.86996841430664, "learning_rate": 9.8125e-06, "loss": 30.4695, "step": 16282 }, { "epoch": 387.6925373134328, "grad_norm": 33.628150939941406, "learning_rate": 9.811904761904763e-06, "loss": 31.8074, "step": 16283 }, { "epoch": 387.7164179104478, "grad_norm": 29.482221603393555, "learning_rate": 9.811309523809525e-06, "loss": 30.9069, "step": 16284 }, { "epoch": 387.7402985074627, "grad_norm": 34.709224700927734, "learning_rate": 9.810714285714287e-06, "loss": 30.5928, "step": 16285 }, { "epoch": 387.7641791044776, "grad_norm": 34.80800247192383, "learning_rate": 9.810119047619049e-06, "loss": 31.21, "step": 16286 }, { "epoch": 387.78805970149256, "grad_norm": 31.94898223876953, "learning_rate": 9.80952380952381e-06, "loss": 31.9303, "step": 16287 }, { "epoch": 387.81194029850747, "grad_norm": 32.53268051147461, "learning_rate": 9.808928571428572e-06, "loss": 32.5506, "step": 16288 }, { "epoch": 387.8358208955224, "grad_norm": 28.11086082458496, "learning_rate": 9.808333333333334e-06, "loss": 31.3513, "step": 16289 }, { "epoch": 387.85970149253734, "grad_norm": 23.117406845092773, "learning_rate": 9.807738095238096e-06, "loss": 30.733, "step": 16290 }, { "epoch": 387.88358208955225, "grad_norm": 34.6082649230957, "learning_rate": 9.807142857142858e-06, "loss": 30.7887, "step": 16291 }, { "epoch": 387.90746268656716, "grad_norm": 27.700349807739258, "learning_rate": 9.80654761904762e-06, "loss": 31.7152, "step": 16292 }, { "epoch": 387.93134328358207, "grad_norm": 39.212520599365234, "learning_rate": 9.805952380952381e-06, "loss": 31.4239, "step": 16293 }, { "epoch": 387.95522388059703, "grad_norm": 38.54717254638672, "learning_rate": 9.805357142857143e-06, "loss": 31.5663, "step": 16294 }, { "epoch": 387.97910447761194, "grad_norm": 28.900754928588867, "learning_rate": 9.804761904761907e-06, "loss": 31.3817, "step": 16295 }, { "epoch": 388.0, "grad_norm": 25.779775619506836, "learning_rate": 9.804166666666668e-06, "loss": 26.816, "step": 16296 }, { "epoch": 388.0238805970149, "grad_norm": 31.25226402282715, "learning_rate": 9.803571428571428e-06, "loss": 31.8264, "step": 16297 }, { "epoch": 388.0477611940299, "grad_norm": 25.336042404174805, "learning_rate": 9.80297619047619e-06, "loss": 30.7403, "step": 16298 }, { "epoch": 388.0716417910448, "grad_norm": 36.018619537353516, "learning_rate": 9.802380952380954e-06, "loss": 30.9437, "step": 16299 }, { "epoch": 388.0955223880597, "grad_norm": 28.848451614379883, "learning_rate": 9.801785714285716e-06, "loss": 31.3893, "step": 16300 }, { "epoch": 388.1194029850746, "grad_norm": 33.30680465698242, "learning_rate": 9.801190476190477e-06, "loss": 30.7243, "step": 16301 }, { "epoch": 388.14328358208957, "grad_norm": 28.282001495361328, "learning_rate": 9.80059523809524e-06, "loss": 29.7735, "step": 16302 }, { "epoch": 388.1671641791045, "grad_norm": 32.32350540161133, "learning_rate": 9.800000000000001e-06, "loss": 30.4583, "step": 16303 }, { "epoch": 388.1910447761194, "grad_norm": 28.372379302978516, "learning_rate": 9.799404761904763e-06, "loss": 31.7407, "step": 16304 }, { "epoch": 388.21492537313435, "grad_norm": 33.158390045166016, "learning_rate": 9.798809523809525e-06, "loss": 32.5392, "step": 16305 }, { "epoch": 388.23880597014926, "grad_norm": 27.013521194458008, "learning_rate": 9.798214285714286e-06, "loss": 30.975, "step": 16306 }, { "epoch": 388.26268656716417, "grad_norm": 33.014801025390625, "learning_rate": 9.797619047619048e-06, "loss": 30.342, "step": 16307 }, { "epoch": 388.28656716417913, "grad_norm": 28.111276626586914, "learning_rate": 9.79702380952381e-06, "loss": 30.6587, "step": 16308 }, { "epoch": 388.31044776119404, "grad_norm": 29.60170555114746, "learning_rate": 9.796428571428572e-06, "loss": 30.9499, "step": 16309 }, { "epoch": 388.33432835820895, "grad_norm": 25.55117416381836, "learning_rate": 9.795833333333334e-06, "loss": 29.306, "step": 16310 }, { "epoch": 388.35820895522386, "grad_norm": 32.451480865478516, "learning_rate": 9.795238095238097e-06, "loss": 32.0626, "step": 16311 }, { "epoch": 388.3820895522388, "grad_norm": 25.698986053466797, "learning_rate": 9.794642857142857e-06, "loss": 30.864, "step": 16312 }, { "epoch": 388.40597014925373, "grad_norm": 32.51515197753906, "learning_rate": 9.794047619047619e-06, "loss": 31.2816, "step": 16313 }, { "epoch": 388.42985074626864, "grad_norm": 30.733959197998047, "learning_rate": 9.793452380952383e-06, "loss": 32.5379, "step": 16314 }, { "epoch": 388.4537313432836, "grad_norm": 34.93669128417969, "learning_rate": 9.792857142857144e-06, "loss": 32.5641, "step": 16315 }, { "epoch": 388.4776119402985, "grad_norm": 33.55181121826172, "learning_rate": 9.792261904761906e-06, "loss": 32.4263, "step": 16316 }, { "epoch": 388.5014925373134, "grad_norm": 30.82851219177246, "learning_rate": 9.791666666666666e-06, "loss": 31.0749, "step": 16317 }, { "epoch": 388.52537313432833, "grad_norm": 27.4837646484375, "learning_rate": 9.79107142857143e-06, "loss": 31.9993, "step": 16318 }, { "epoch": 388.5492537313433, "grad_norm": 28.8560791015625, "learning_rate": 9.790476190476192e-06, "loss": 32.1957, "step": 16319 }, { "epoch": 388.5731343283582, "grad_norm": 26.107858657836914, "learning_rate": 9.789880952380953e-06, "loss": 31.3664, "step": 16320 }, { "epoch": 388.5970149253731, "grad_norm": 28.55649757385254, "learning_rate": 9.789285714285715e-06, "loss": 31.81, "step": 16321 }, { "epoch": 388.6208955223881, "grad_norm": 23.187219619750977, "learning_rate": 9.788690476190477e-06, "loss": 31.0063, "step": 16322 }, { "epoch": 388.644776119403, "grad_norm": 33.165069580078125, "learning_rate": 9.788095238095239e-06, "loss": 31.5465, "step": 16323 }, { "epoch": 388.6686567164179, "grad_norm": 24.102766036987305, "learning_rate": 9.7875e-06, "loss": 30.2347, "step": 16324 }, { "epoch": 388.6925373134328, "grad_norm": 29.791168212890625, "learning_rate": 9.786904761904763e-06, "loss": 30.8982, "step": 16325 }, { "epoch": 388.7164179104478, "grad_norm": 24.17734146118164, "learning_rate": 9.786309523809524e-06, "loss": 31.4424, "step": 16326 }, { "epoch": 388.7402985074627, "grad_norm": 27.78852081298828, "learning_rate": 9.785714285714286e-06, "loss": 31.2909, "step": 16327 }, { "epoch": 388.7641791044776, "grad_norm": 25.768030166625977, "learning_rate": 9.785119047619048e-06, "loss": 32.6746, "step": 16328 }, { "epoch": 388.78805970149256, "grad_norm": 27.267898559570312, "learning_rate": 9.78452380952381e-06, "loss": 31.4876, "step": 16329 }, { "epoch": 388.81194029850747, "grad_norm": 19.101016998291016, "learning_rate": 9.783928571428573e-06, "loss": 32.2565, "step": 16330 }, { "epoch": 388.8358208955224, "grad_norm": 28.757253646850586, "learning_rate": 9.783333333333335e-06, "loss": 32.2536, "step": 16331 }, { "epoch": 388.85970149253734, "grad_norm": 22.96048927307129, "learning_rate": 9.782738095238095e-06, "loss": 31.7659, "step": 16332 }, { "epoch": 388.88358208955225, "grad_norm": 26.02690315246582, "learning_rate": 9.782142857142857e-06, "loss": 31.5862, "step": 16333 }, { "epoch": 388.90746268656716, "grad_norm": 21.96470832824707, "learning_rate": 9.78154761904762e-06, "loss": 31.8278, "step": 16334 }, { "epoch": 388.93134328358207, "grad_norm": 26.473949432373047, "learning_rate": 9.780952380952382e-06, "loss": 30.8082, "step": 16335 }, { "epoch": 388.95522388059703, "grad_norm": 24.845199584960938, "learning_rate": 9.780357142857142e-06, "loss": 31.9927, "step": 16336 }, { "epoch": 388.97910447761194, "grad_norm": 21.908987045288086, "learning_rate": 9.779761904761906e-06, "loss": 31.3995, "step": 16337 }, { "epoch": 389.0, "grad_norm": 21.260526657104492, "learning_rate": 9.779166666666668e-06, "loss": 27.1762, "step": 16338 }, { "epoch": 389.0238805970149, "grad_norm": 23.290285110473633, "learning_rate": 9.77857142857143e-06, "loss": 32.5599, "step": 16339 }, { "epoch": 389.0477611940299, "grad_norm": 20.371511459350586, "learning_rate": 9.777976190476191e-06, "loss": 31.3901, "step": 16340 }, { "epoch": 389.0716417910448, "grad_norm": 24.474977493286133, "learning_rate": 9.777380952380953e-06, "loss": 30.5494, "step": 16341 }, { "epoch": 389.0955223880597, "grad_norm": 23.1691951751709, "learning_rate": 9.776785714285715e-06, "loss": 30.411, "step": 16342 }, { "epoch": 389.1194029850746, "grad_norm": 18.204683303833008, "learning_rate": 9.776190476190477e-06, "loss": 31.3261, "step": 16343 }, { "epoch": 389.14328358208957, "grad_norm": 21.73653793334961, "learning_rate": 9.775595238095239e-06, "loss": 31.2931, "step": 16344 }, { "epoch": 389.1671641791045, "grad_norm": 19.842016220092773, "learning_rate": 9.775e-06, "loss": 31.1381, "step": 16345 }, { "epoch": 389.1910447761194, "grad_norm": NaN, "learning_rate": 9.774404761904762e-06, "loss": 49.7906, "step": 16346 }, { "epoch": 389.21492537313435, "grad_norm": 21.1848201751709, "learning_rate": 9.774404761904762e-06, "loss": 30.2326, "step": 16347 }, { "epoch": 389.23880597014926, "grad_norm": 19.22425079345703, "learning_rate": 9.773809523809524e-06, "loss": 31.0529, "step": 16348 }, { "epoch": 389.26268656716417, "grad_norm": 17.01042366027832, "learning_rate": 9.773214285714286e-06, "loss": 31.4719, "step": 16349 }, { "epoch": 389.28656716417913, "grad_norm": 29.845762252807617, "learning_rate": 9.77261904761905e-06, "loss": 30.655, "step": 16350 }, { "epoch": 389.31044776119404, "grad_norm": 22.23818588256836, "learning_rate": 9.772023809523811e-06, "loss": 31.7885, "step": 16351 }, { "epoch": 389.33432835820895, "grad_norm": 27.320762634277344, "learning_rate": 9.771428571428571e-06, "loss": 30.5316, "step": 16352 }, { "epoch": 389.35820895522386, "grad_norm": 26.989601135253906, "learning_rate": 9.770833333333333e-06, "loss": 31.1129, "step": 16353 }, { "epoch": 389.3820895522388, "grad_norm": 21.75162696838379, "learning_rate": 9.770238095238097e-06, "loss": 31.9033, "step": 16354 }, { "epoch": 389.40597014925373, "grad_norm": 25.920270919799805, "learning_rate": 9.769642857142858e-06, "loss": 33.0053, "step": 16355 }, { "epoch": 389.42985074626864, "grad_norm": 21.13802719116211, "learning_rate": 9.76904761904762e-06, "loss": 32.3808, "step": 16356 }, { "epoch": 389.4537313432836, "grad_norm": 20.37413215637207, "learning_rate": 9.768452380952382e-06, "loss": 30.5768, "step": 16357 }, { "epoch": 389.4776119402985, "grad_norm": 26.006126403808594, "learning_rate": 9.767857142857144e-06, "loss": 32.0527, "step": 16358 }, { "epoch": 389.5014925373134, "grad_norm": 19.146987915039062, "learning_rate": 9.767261904761906e-06, "loss": 30.2903, "step": 16359 }, { "epoch": 389.52537313432833, "grad_norm": 25.838464736938477, "learning_rate": 9.766666666666667e-06, "loss": 31.661, "step": 16360 }, { "epoch": 389.5492537313433, "grad_norm": 23.29790496826172, "learning_rate": 9.76607142857143e-06, "loss": 32.0736, "step": 16361 }, { "epoch": 389.5731343283582, "grad_norm": 17.556703567504883, "learning_rate": 9.765476190476191e-06, "loss": 32.0744, "step": 16362 }, { "epoch": 389.5970149253731, "grad_norm": 26.709442138671875, "learning_rate": 9.764880952380953e-06, "loss": 29.9062, "step": 16363 }, { "epoch": 389.6208955223881, "grad_norm": 20.65360450744629, "learning_rate": 9.764285714285715e-06, "loss": 32.6888, "step": 16364 }, { "epoch": 389.644776119403, "grad_norm": 21.492149353027344, "learning_rate": 9.763690476190477e-06, "loss": 31.8798, "step": 16365 }, { "epoch": 389.6686567164179, "grad_norm": 24.044824600219727, "learning_rate": 9.76309523809524e-06, "loss": 31.6606, "step": 16366 }, { "epoch": 389.6925373134328, "grad_norm": 21.279258728027344, "learning_rate": 9.7625e-06, "loss": 31.6572, "step": 16367 }, { "epoch": 389.7164179104478, "grad_norm": 19.301111221313477, "learning_rate": 9.761904761904762e-06, "loss": 30.2098, "step": 16368 }, { "epoch": 389.7402985074627, "grad_norm": 27.06990623474121, "learning_rate": 9.761309523809524e-06, "loss": 30.784, "step": 16369 }, { "epoch": 389.7641791044776, "grad_norm": 17.46279525756836, "learning_rate": 9.760714285714287e-06, "loss": 29.7866, "step": 16370 }, { "epoch": 389.78805970149256, "grad_norm": 35.74827575683594, "learning_rate": 9.760119047619049e-06, "loss": 31.9352, "step": 16371 }, { "epoch": 389.81194029850747, "grad_norm": 20.53690528869629, "learning_rate": 9.75952380952381e-06, "loss": 30.6821, "step": 16372 }, { "epoch": 389.8358208955224, "grad_norm": 34.603843688964844, "learning_rate": 9.758928571428573e-06, "loss": 31.6675, "step": 16373 }, { "epoch": 389.85970149253734, "grad_norm": 25.659683227539062, "learning_rate": 9.758333333333334e-06, "loss": 31.1865, "step": 16374 }, { "epoch": 389.88358208955225, "grad_norm": 37.89887619018555, "learning_rate": 9.757738095238096e-06, "loss": 31.7436, "step": 16375 }, { "epoch": 389.90746268656716, "grad_norm": 31.363710403442383, "learning_rate": 9.757142857142858e-06, "loss": 33.2901, "step": 16376 }, { "epoch": 389.93134328358207, "grad_norm": 33.875370025634766, "learning_rate": 9.75654761904762e-06, "loss": 31.0055, "step": 16377 }, { "epoch": 389.95522388059703, "grad_norm": 31.001718521118164, "learning_rate": 9.755952380952382e-06, "loss": 33.0547, "step": 16378 }, { "epoch": 389.97910447761194, "grad_norm": 35.636051177978516, "learning_rate": 9.755357142857144e-06, "loss": 31.671, "step": 16379 }, { "epoch": 390.0, "grad_norm": 22.893800735473633, "learning_rate": 9.754761904761905e-06, "loss": 26.8595, "step": 16380 }, { "epoch": 390.0238805970149, "grad_norm": 29.35207748413086, "learning_rate": 9.754166666666667e-06, "loss": 29.6748, "step": 16381 }, { "epoch": 390.0477611940299, "grad_norm": 25.894886016845703, "learning_rate": 9.753571428571429e-06, "loss": 32.133, "step": 16382 }, { "epoch": 390.0716417910448, "grad_norm": 33.5870475769043, "learning_rate": 9.75297619047619e-06, "loss": 31.5938, "step": 16383 }, { "epoch": 390.0955223880597, "grad_norm": 26.548860549926758, "learning_rate": 9.752380952380953e-06, "loss": 30.8034, "step": 16384 }, { "epoch": 390.1194029850746, "grad_norm": 35.63127899169922, "learning_rate": 9.751785714285716e-06, "loss": 31.7315, "step": 16385 }, { "epoch": 390.14328358208957, "grad_norm": 28.513118743896484, "learning_rate": 9.751190476190478e-06, "loss": 31.5308, "step": 16386 }, { "epoch": 390.1671641791045, "grad_norm": 35.640899658203125, "learning_rate": 9.750595238095238e-06, "loss": 31.6477, "step": 16387 }, { "epoch": 390.1910447761194, "grad_norm": 26.226648330688477, "learning_rate": 9.75e-06, "loss": 30.4413, "step": 16388 }, { "epoch": 390.21492537313435, "grad_norm": 37.20478820800781, "learning_rate": 9.749404761904763e-06, "loss": 31.3381, "step": 16389 }, { "epoch": 390.23880597014926, "grad_norm": 29.90780258178711, "learning_rate": 9.748809523809525e-06, "loss": 30.9246, "step": 16390 }, { "epoch": 390.26268656716417, "grad_norm": 32.69132614135742, "learning_rate": 9.748214285714287e-06, "loss": 31.653, "step": 16391 }, { "epoch": 390.28656716417913, "grad_norm": 27.816987991333008, "learning_rate": 9.747619047619049e-06, "loss": 31.411, "step": 16392 }, { "epoch": 390.31044776119404, "grad_norm": 29.577363967895508, "learning_rate": 9.74702380952381e-06, "loss": 31.654, "step": 16393 }, { "epoch": 390.33432835820895, "grad_norm": 22.633867263793945, "learning_rate": 9.746428571428572e-06, "loss": 31.4019, "step": 16394 }, { "epoch": 390.35820895522386, "grad_norm": 39.5258903503418, "learning_rate": 9.745833333333334e-06, "loss": 29.8451, "step": 16395 }, { "epoch": 390.3820895522388, "grad_norm": 32.41792297363281, "learning_rate": 9.745238095238096e-06, "loss": 30.2906, "step": 16396 }, { "epoch": 390.40597014925373, "grad_norm": 37.13993835449219, "learning_rate": 9.744642857142858e-06, "loss": 30.4198, "step": 16397 }, { "epoch": 390.42985074626864, "grad_norm": 38.53489685058594, "learning_rate": 9.74404761904762e-06, "loss": 31.6754, "step": 16398 }, { "epoch": 390.4537313432836, "grad_norm": 27.62884521484375, "learning_rate": 9.743452380952381e-06, "loss": 31.3364, "step": 16399 }, { "epoch": 390.4776119402985, "grad_norm": 31.210311889648438, "learning_rate": 9.742857142857143e-06, "loss": 31.2983, "step": 16400 }, { "epoch": 390.5014925373134, "grad_norm": 29.904752731323242, "learning_rate": 9.742261904761907e-06, "loss": 32.132, "step": 16401 }, { "epoch": 390.52537313432833, "grad_norm": 25.746906280517578, "learning_rate": 9.741666666666667e-06, "loss": 31.2874, "step": 16402 }, { "epoch": 390.5492537313433, "grad_norm": 30.04813575744629, "learning_rate": 9.741071428571429e-06, "loss": 30.7112, "step": 16403 }, { "epoch": 390.5731343283582, "grad_norm": 24.54204750061035, "learning_rate": 9.74047619047619e-06, "loss": 32.5849, "step": 16404 }, { "epoch": 390.5970149253731, "grad_norm": 33.865020751953125, "learning_rate": 9.739880952380954e-06, "loss": 31.1069, "step": 16405 }, { "epoch": 390.6208955223881, "grad_norm": 30.01352882385254, "learning_rate": 9.739285714285716e-06, "loss": 32.3905, "step": 16406 }, { "epoch": 390.644776119403, "grad_norm": 34.7811393737793, "learning_rate": 9.738690476190476e-06, "loss": 30.8243, "step": 16407 }, { "epoch": 390.6686567164179, "grad_norm": 29.043743133544922, "learning_rate": 9.73809523809524e-06, "loss": 32.4975, "step": 16408 }, { "epoch": 390.6925373134328, "grad_norm": 30.478422164916992, "learning_rate": 9.737500000000001e-06, "loss": 31.3406, "step": 16409 }, { "epoch": 390.7164179104478, "grad_norm": 25.178617477416992, "learning_rate": 9.736904761904763e-06, "loss": 30.8249, "step": 16410 }, { "epoch": 390.7402985074627, "grad_norm": 30.27109146118164, "learning_rate": 9.736309523809525e-06, "loss": 32.6285, "step": 16411 }, { "epoch": 390.7641791044776, "grad_norm": 27.961475372314453, "learning_rate": 9.735714285714287e-06, "loss": 31.5123, "step": 16412 }, { "epoch": 390.78805970149256, "grad_norm": 30.353574752807617, "learning_rate": 9.735119047619048e-06, "loss": 31.6004, "step": 16413 }, { "epoch": 390.81194029850747, "grad_norm": 27.45003318786621, "learning_rate": 9.73452380952381e-06, "loss": 31.9344, "step": 16414 }, { "epoch": 390.8358208955224, "grad_norm": 30.07430648803711, "learning_rate": 9.733928571428572e-06, "loss": 32.1091, "step": 16415 }, { "epoch": 390.85970149253734, "grad_norm": 26.37547492980957, "learning_rate": 9.733333333333334e-06, "loss": 31.8889, "step": 16416 }, { "epoch": 390.88358208955225, "grad_norm": 30.38176155090332, "learning_rate": 9.732738095238096e-06, "loss": 30.271, "step": 16417 }, { "epoch": 390.90746268656716, "grad_norm": 27.770626068115234, "learning_rate": 9.732142857142858e-06, "loss": 31.9441, "step": 16418 }, { "epoch": 390.93134328358207, "grad_norm": 32.194908142089844, "learning_rate": 9.73154761904762e-06, "loss": 31.8723, "step": 16419 }, { "epoch": 390.95522388059703, "grad_norm": 28.570674896240234, "learning_rate": 9.730952380952383e-06, "loss": 31.6136, "step": 16420 }, { "epoch": 390.97910447761194, "grad_norm": 26.941953659057617, "learning_rate": 9.730357142857145e-06, "loss": 30.484, "step": 16421 }, { "epoch": 391.0, "grad_norm": 23.557344436645508, "learning_rate": 9.729761904761905e-06, "loss": 25.403, "step": 16422 }, { "epoch": 391.0238805970149, "grad_norm": 29.310739517211914, "learning_rate": 9.729166666666667e-06, "loss": 31.4626, "step": 16423 }, { "epoch": 391.0477611940299, "grad_norm": 24.535303115844727, "learning_rate": 9.72857142857143e-06, "loss": 31.0161, "step": 16424 }, { "epoch": 391.0716417910448, "grad_norm": 27.39189338684082, "learning_rate": 9.727976190476192e-06, "loss": 32.2284, "step": 16425 }, { "epoch": 391.0955223880597, "grad_norm": 20.453712463378906, "learning_rate": 9.727380952380954e-06, "loss": 31.7956, "step": 16426 }, { "epoch": 391.1194029850746, "grad_norm": NaN, "learning_rate": 9.726785714285715e-06, "loss": 51.877, "step": 16427 }, { "epoch": 391.14328358208957, "grad_norm": 26.24725914001465, "learning_rate": 9.726785714285715e-06, "loss": 30.8665, "step": 16428 }, { "epoch": 391.1671641791045, "grad_norm": 21.113723754882812, "learning_rate": 9.726190476190477e-06, "loss": 30.9764, "step": 16429 }, { "epoch": 391.1910447761194, "grad_norm": 25.212806701660156, "learning_rate": 9.725595238095239e-06, "loss": 31.2019, "step": 16430 }, { "epoch": 391.21492537313435, "grad_norm": 22.409730911254883, "learning_rate": 9.725000000000001e-06, "loss": 31.8457, "step": 16431 }, { "epoch": 391.23880597014926, "grad_norm": 24.708051681518555, "learning_rate": 9.724404761904763e-06, "loss": 32.0696, "step": 16432 }, { "epoch": 391.26268656716417, "grad_norm": 22.0627384185791, "learning_rate": 9.723809523809525e-06, "loss": 31.355, "step": 16433 }, { "epoch": 391.28656716417913, "grad_norm": 24.677188873291016, "learning_rate": 9.723214285714286e-06, "loss": 29.9739, "step": 16434 }, { "epoch": 391.31044776119404, "grad_norm": 20.176437377929688, "learning_rate": 9.722619047619048e-06, "loss": 32.4173, "step": 16435 }, { "epoch": 391.33432835820895, "grad_norm": 23.677534103393555, "learning_rate": 9.72202380952381e-06, "loss": 31.065, "step": 16436 }, { "epoch": 391.35820895522386, "grad_norm": 21.294153213500977, "learning_rate": 9.721428571428573e-06, "loss": 32.0519, "step": 16437 }, { "epoch": 391.3820895522388, "grad_norm": 18.736831665039062, "learning_rate": 9.720833333333334e-06, "loss": 30.2835, "step": 16438 }, { "epoch": 391.40597014925373, "grad_norm": 18.900318145751953, "learning_rate": 9.720238095238095e-06, "loss": 31.8502, "step": 16439 }, { "epoch": 391.42985074626864, "grad_norm": 19.314485549926758, "learning_rate": 9.719642857142859e-06, "loss": 31.2654, "step": 16440 }, { "epoch": 391.4537313432836, "grad_norm": 17.646129608154297, "learning_rate": 9.71904761904762e-06, "loss": 30.9776, "step": 16441 }, { "epoch": 391.4776119402985, "grad_norm": 19.75379180908203, "learning_rate": 9.71845238095238e-06, "loss": 30.3464, "step": 16442 }, { "epoch": 391.5014925373134, "grad_norm": 17.314546585083008, "learning_rate": 9.717857142857143e-06, "loss": 31.5072, "step": 16443 }, { "epoch": 391.52537313432833, "grad_norm": 24.70691680908203, "learning_rate": 9.717261904761906e-06, "loss": 30.76, "step": 16444 }, { "epoch": 391.5492537313433, "grad_norm": 21.464452743530273, "learning_rate": 9.716666666666668e-06, "loss": 31.7995, "step": 16445 }, { "epoch": 391.5731343283582, "grad_norm": 18.588397979736328, "learning_rate": 9.71607142857143e-06, "loss": 31.1106, "step": 16446 }, { "epoch": 391.5970149253731, "grad_norm": 20.397750854492188, "learning_rate": 9.715476190476192e-06, "loss": 31.4806, "step": 16447 }, { "epoch": 391.6208955223881, "grad_norm": 18.86182403564453, "learning_rate": 9.714880952380953e-06, "loss": 31.6908, "step": 16448 }, { "epoch": 391.644776119403, "grad_norm": 21.76464080810547, "learning_rate": 9.714285714285715e-06, "loss": 31.6445, "step": 16449 }, { "epoch": 391.6686567164179, "grad_norm": 17.58875274658203, "learning_rate": 9.713690476190477e-06, "loss": 32.1033, "step": 16450 }, { "epoch": 391.6925373134328, "grad_norm": 19.376646041870117, "learning_rate": 9.713095238095239e-06, "loss": 30.7335, "step": 16451 }, { "epoch": 391.7164179104478, "grad_norm": 25.674732208251953, "learning_rate": 9.7125e-06, "loss": 30.7509, "step": 16452 }, { "epoch": 391.7402985074627, "grad_norm": 18.902231216430664, "learning_rate": 9.711904761904762e-06, "loss": 32.1143, "step": 16453 }, { "epoch": 391.7641791044776, "grad_norm": 17.41876220703125, "learning_rate": 9.711309523809524e-06, "loss": 30.5472, "step": 16454 }, { "epoch": 391.78805970149256, "grad_norm": 18.64491844177246, "learning_rate": 9.710714285714286e-06, "loss": 31.4101, "step": 16455 }, { "epoch": 391.81194029850747, "grad_norm": 20.355897903442383, "learning_rate": 9.71011904761905e-06, "loss": 30.8667, "step": 16456 }, { "epoch": 391.8358208955224, "grad_norm": 17.163890838623047, "learning_rate": 9.70952380952381e-06, "loss": 30.9724, "step": 16457 }, { "epoch": 391.85970149253734, "grad_norm": 20.216176986694336, "learning_rate": 9.708928571428571e-06, "loss": 31.3025, "step": 16458 }, { "epoch": 391.88358208955225, "grad_norm": 20.08291244506836, "learning_rate": 9.708333333333333e-06, "loss": 33.5975, "step": 16459 }, { "epoch": 391.90746268656716, "grad_norm": 17.577011108398438, "learning_rate": 9.707738095238097e-06, "loss": 29.7382, "step": 16460 }, { "epoch": 391.93134328358207, "grad_norm": 17.22465705871582, "learning_rate": 9.707142857142859e-06, "loss": 31.1636, "step": 16461 }, { "epoch": 391.95522388059703, "grad_norm": 18.496328353881836, "learning_rate": 9.706547619047619e-06, "loss": 30.2448, "step": 16462 }, { "epoch": 391.97910447761194, "grad_norm": 17.75228500366211, "learning_rate": 9.705952380952382e-06, "loss": 30.703, "step": 16463 }, { "epoch": 392.0, "grad_norm": 18.882986068725586, "learning_rate": 9.705357142857144e-06, "loss": 28.359, "step": 16464 }, { "epoch": 392.0238805970149, "grad_norm": 17.181751251220703, "learning_rate": 9.704761904761906e-06, "loss": 31.6314, "step": 16465 }, { "epoch": 392.0477611940299, "grad_norm": 19.29618263244629, "learning_rate": 9.704166666666668e-06, "loss": 32.1336, "step": 16466 }, { "epoch": 392.0716417910448, "grad_norm": 19.281169891357422, "learning_rate": 9.70357142857143e-06, "loss": 31.0754, "step": 16467 }, { "epoch": 392.0955223880597, "grad_norm": 22.606828689575195, "learning_rate": 9.702976190476191e-06, "loss": 31.5669, "step": 16468 }, { "epoch": 392.1194029850746, "grad_norm": 19.79329490661621, "learning_rate": 9.702380952380953e-06, "loss": 31.5799, "step": 16469 }, { "epoch": 392.14328358208957, "grad_norm": 18.98135757446289, "learning_rate": 9.701785714285715e-06, "loss": 30.4859, "step": 16470 }, { "epoch": 392.1671641791045, "grad_norm": 20.63869285583496, "learning_rate": 9.701190476190477e-06, "loss": 32.6835, "step": 16471 }, { "epoch": 392.1910447761194, "grad_norm": 17.536327362060547, "learning_rate": 9.700595238095238e-06, "loss": 31.5062, "step": 16472 }, { "epoch": 392.21492537313435, "grad_norm": 18.95645523071289, "learning_rate": 9.7e-06, "loss": 31.2597, "step": 16473 }, { "epoch": 392.23880597014926, "grad_norm": 18.115530014038086, "learning_rate": 9.699404761904762e-06, "loss": 31.0092, "step": 16474 }, { "epoch": 392.26268656716417, "grad_norm": 17.346193313598633, "learning_rate": 9.698809523809526e-06, "loss": 31.391, "step": 16475 }, { "epoch": 392.28656716417913, "grad_norm": 20.828800201416016, "learning_rate": 9.698214285714287e-06, "loss": 31.3353, "step": 16476 }, { "epoch": 392.31044776119404, "grad_norm": 17.44601058959961, "learning_rate": 9.697619047619048e-06, "loss": 31.3625, "step": 16477 }, { "epoch": 392.33432835820895, "grad_norm": 18.767868041992188, "learning_rate": 9.69702380952381e-06, "loss": 31.5475, "step": 16478 }, { "epoch": 392.35820895522386, "grad_norm": 15.861065864562988, "learning_rate": 9.696428571428573e-06, "loss": 31.8539, "step": 16479 }, { "epoch": 392.3820895522388, "grad_norm": 21.5611515045166, "learning_rate": 9.695833333333335e-06, "loss": 32.9212, "step": 16480 }, { "epoch": 392.40597014925373, "grad_norm": 21.332988739013672, "learning_rate": 9.695238095238096e-06, "loss": 32.4479, "step": 16481 }, { "epoch": 392.42985074626864, "grad_norm": 23.342430114746094, "learning_rate": 9.694642857142858e-06, "loss": 31.6261, "step": 16482 }, { "epoch": 392.4537313432836, "grad_norm": 19.13523292541504, "learning_rate": 9.69404761904762e-06, "loss": 30.7945, "step": 16483 }, { "epoch": 392.4776119402985, "grad_norm": 16.57522201538086, "learning_rate": 9.693452380952382e-06, "loss": 30.6693, "step": 16484 }, { "epoch": 392.5014925373134, "grad_norm": 17.404447555541992, "learning_rate": 9.692857142857144e-06, "loss": 30.272, "step": 16485 }, { "epoch": 392.52537313432833, "grad_norm": 16.987762451171875, "learning_rate": 9.692261904761906e-06, "loss": 31.2822, "step": 16486 }, { "epoch": 392.5492537313433, "grad_norm": 20.205286026000977, "learning_rate": 9.691666666666667e-06, "loss": 31.4389, "step": 16487 }, { "epoch": 392.5731343283582, "grad_norm": 25.10284423828125, "learning_rate": 9.691071428571429e-06, "loss": 31.4368, "step": 16488 }, { "epoch": 392.5970149253731, "grad_norm": 20.7637939453125, "learning_rate": 9.690476190476191e-06, "loss": 32.2761, "step": 16489 }, { "epoch": 392.6208955223881, "grad_norm": 19.260000228881836, "learning_rate": 9.689880952380953e-06, "loss": 31.633, "step": 16490 }, { "epoch": 392.644776119403, "grad_norm": 30.960908889770508, "learning_rate": 9.689285714285716e-06, "loss": 30.5168, "step": 16491 }, { "epoch": 392.6686567164179, "grad_norm": 18.382041931152344, "learning_rate": 9.688690476190476e-06, "loss": 30.6955, "step": 16492 }, { "epoch": 392.6925373134328, "grad_norm": 33.018348693847656, "learning_rate": 9.688095238095238e-06, "loss": 30.4928, "step": 16493 }, { "epoch": 392.7164179104478, "grad_norm": 22.341228485107422, "learning_rate": 9.6875e-06, "loss": 30.7801, "step": 16494 }, { "epoch": 392.7402985074627, "grad_norm": 32.300472259521484, "learning_rate": 9.686904761904764e-06, "loss": 30.6573, "step": 16495 }, { "epoch": 392.7641791044776, "grad_norm": 25.2115421295166, "learning_rate": 9.686309523809525e-06, "loss": 31.3364, "step": 16496 }, { "epoch": 392.78805970149256, "grad_norm": 28.91110610961914, "learning_rate": 9.685714285714285e-06, "loss": 30.7299, "step": 16497 }, { "epoch": 392.81194029850747, "grad_norm": 24.175539016723633, "learning_rate": 9.685119047619049e-06, "loss": 30.2812, "step": 16498 }, { "epoch": 392.8358208955224, "grad_norm": 28.419218063354492, "learning_rate": 9.68452380952381e-06, "loss": 31.3165, "step": 16499 }, { "epoch": 392.85970149253734, "grad_norm": 26.446331024169922, "learning_rate": 9.683928571428573e-06, "loss": 32.2847, "step": 16500 }, { "epoch": 392.88358208955225, "grad_norm": 19.74005889892578, "learning_rate": 9.683333333333334e-06, "loss": 29.7685, "step": 16501 }, { "epoch": 392.90746268656716, "grad_norm": 24.966724395751953, "learning_rate": 9.682738095238096e-06, "loss": 30.26, "step": 16502 }, { "epoch": 392.93134328358207, "grad_norm": 19.56442642211914, "learning_rate": 9.682142857142858e-06, "loss": 31.7183, "step": 16503 }, { "epoch": 392.95522388059703, "grad_norm": 23.34406280517578, "learning_rate": 9.68154761904762e-06, "loss": 31.2273, "step": 16504 }, { "epoch": 392.97910447761194, "grad_norm": 20.585844039916992, "learning_rate": 9.680952380952382e-06, "loss": 31.4356, "step": 16505 }, { "epoch": 393.0, "grad_norm": 17.45985984802246, "learning_rate": 9.680357142857143e-06, "loss": 27.6711, "step": 16506 }, { "epoch": 393.0238805970149, "grad_norm": 18.485904693603516, "learning_rate": 9.679761904761905e-06, "loss": 30.6437, "step": 16507 }, { "epoch": 393.0477611940299, "grad_norm": 25.823047637939453, "learning_rate": 9.679166666666667e-06, "loss": 31.4364, "step": 16508 }, { "epoch": 393.0716417910448, "grad_norm": 19.41379737854004, "learning_rate": 9.678571428571429e-06, "loss": 31.3302, "step": 16509 }, { "epoch": 393.0955223880597, "grad_norm": 17.947589874267578, "learning_rate": 9.677976190476192e-06, "loss": 32.2393, "step": 16510 }, { "epoch": 393.1194029850746, "grad_norm": 21.385234832763672, "learning_rate": 9.677380952380954e-06, "loss": 30.9784, "step": 16511 }, { "epoch": 393.14328358208957, "grad_norm": 17.71302604675293, "learning_rate": 9.676785714285714e-06, "loss": 30.7161, "step": 16512 }, { "epoch": 393.1671641791045, "grad_norm": 23.4676456451416, "learning_rate": 9.676190476190476e-06, "loss": 30.3894, "step": 16513 }, { "epoch": 393.1910447761194, "grad_norm": 19.868331909179688, "learning_rate": 9.67559523809524e-06, "loss": 30.8656, "step": 16514 }, { "epoch": 393.21492537313435, "grad_norm": 22.355960845947266, "learning_rate": 9.675000000000001e-06, "loss": 31.1684, "step": 16515 }, { "epoch": 393.23880597014926, "grad_norm": 17.21105194091797, "learning_rate": 9.674404761904763e-06, "loss": 31.6164, "step": 16516 }, { "epoch": 393.26268656716417, "grad_norm": 21.77312469482422, "learning_rate": 9.673809523809525e-06, "loss": 31.5075, "step": 16517 }, { "epoch": 393.28656716417913, "grad_norm": 21.612089157104492, "learning_rate": 9.673214285714287e-06, "loss": 32.278, "step": 16518 }, { "epoch": 393.31044776119404, "grad_norm": 20.267621994018555, "learning_rate": 9.672619047619049e-06, "loss": 31.1718, "step": 16519 }, { "epoch": 393.33432835820895, "grad_norm": 18.022846221923828, "learning_rate": 9.67202380952381e-06, "loss": 31.1893, "step": 16520 }, { "epoch": 393.35820895522386, "grad_norm": 20.32938575744629, "learning_rate": 9.671428571428572e-06, "loss": 30.6629, "step": 16521 }, { "epoch": 393.3820895522388, "grad_norm": 21.79494857788086, "learning_rate": 9.670833333333334e-06, "loss": 31.9163, "step": 16522 }, { "epoch": 393.40597014925373, "grad_norm": 18.37645721435547, "learning_rate": 9.670238095238096e-06, "loss": 31.9426, "step": 16523 }, { "epoch": 393.42985074626864, "grad_norm": 20.292821884155273, "learning_rate": 9.669642857142858e-06, "loss": 31.664, "step": 16524 }, { "epoch": 393.4537313432836, "grad_norm": 21.310020446777344, "learning_rate": 9.66904761904762e-06, "loss": 31.7991, "step": 16525 }, { "epoch": 393.4776119402985, "grad_norm": 23.35830307006836, "learning_rate": 9.668452380952383e-06, "loss": 32.1127, "step": 16526 }, { "epoch": 393.5014925373134, "grad_norm": 20.169979095458984, "learning_rate": 9.667857142857143e-06, "loss": 30.6385, "step": 16527 }, { "epoch": 393.52537313432833, "grad_norm": 15.472123146057129, "learning_rate": 9.667261904761905e-06, "loss": 31.5138, "step": 16528 }, { "epoch": 393.5492537313433, "grad_norm": 19.554691314697266, "learning_rate": 9.666666666666667e-06, "loss": 30.9746, "step": 16529 }, { "epoch": 393.5731343283582, "grad_norm": 23.469707489013672, "learning_rate": 9.66607142857143e-06, "loss": 31.5301, "step": 16530 }, { "epoch": 393.5970149253731, "grad_norm": 21.12062644958496, "learning_rate": 9.665476190476192e-06, "loss": 29.9563, "step": 16531 }, { "epoch": 393.6208955223881, "grad_norm": 16.849655151367188, "learning_rate": 9.664880952380952e-06, "loss": 29.8341, "step": 16532 }, { "epoch": 393.644776119403, "grad_norm": 16.26079750061035, "learning_rate": 9.664285714285716e-06, "loss": 30.2833, "step": 16533 }, { "epoch": 393.6686567164179, "grad_norm": 17.466516494750977, "learning_rate": 9.663690476190477e-06, "loss": 31.4941, "step": 16534 }, { "epoch": 393.6925373134328, "grad_norm": 21.156137466430664, "learning_rate": 9.66309523809524e-06, "loss": 31.4996, "step": 16535 }, { "epoch": 393.7164179104478, "grad_norm": 21.75018882751465, "learning_rate": 9.662500000000001e-06, "loss": 30.7024, "step": 16536 }, { "epoch": 393.7402985074627, "grad_norm": 25.015518188476562, "learning_rate": 9.661904761904763e-06, "loss": 31.4117, "step": 16537 }, { "epoch": 393.7641791044776, "grad_norm": 16.228118896484375, "learning_rate": 9.661309523809525e-06, "loss": 32.1889, "step": 16538 }, { "epoch": 393.78805970149256, "grad_norm": 27.60285186767578, "learning_rate": 9.660714285714287e-06, "loss": 31.7577, "step": 16539 }, { "epoch": 393.81194029850747, "grad_norm": 20.638507843017578, "learning_rate": 9.660119047619048e-06, "loss": 31.9245, "step": 16540 }, { "epoch": 393.8358208955224, "grad_norm": 22.617639541625977, "learning_rate": 9.65952380952381e-06, "loss": 31.4598, "step": 16541 }, { "epoch": 393.85970149253734, "grad_norm": 24.75657081604004, "learning_rate": 9.658928571428572e-06, "loss": 30.8452, "step": 16542 }, { "epoch": 393.88358208955225, "grad_norm": 21.58110237121582, "learning_rate": 9.658333333333334e-06, "loss": 30.4439, "step": 16543 }, { "epoch": 393.90746268656716, "grad_norm": 18.69927978515625, "learning_rate": 9.657738095238096e-06, "loss": 30.6121, "step": 16544 }, { "epoch": 393.93134328358207, "grad_norm": 29.413753509521484, "learning_rate": 9.657142857142859e-06, "loss": 31.4029, "step": 16545 }, { "epoch": 393.95522388059703, "grad_norm": 19.673093795776367, "learning_rate": 9.656547619047621e-06, "loss": 31.0743, "step": 16546 }, { "epoch": 393.97910447761194, "grad_norm": 18.085433959960938, "learning_rate": 9.655952380952381e-06, "loss": 32.1028, "step": 16547 }, { "epoch": 394.0, "grad_norm": 23.433103561401367, "learning_rate": 9.655357142857143e-06, "loss": 27.2201, "step": 16548 }, { "epoch": 394.0238805970149, "grad_norm": 19.056753158569336, "learning_rate": 9.654761904761906e-06, "loss": 31.5069, "step": 16549 }, { "epoch": 394.0477611940299, "grad_norm": 18.356182098388672, "learning_rate": 9.654166666666668e-06, "loss": 30.1049, "step": 16550 }, { "epoch": 394.0716417910448, "grad_norm": 19.064579010009766, "learning_rate": 9.653571428571428e-06, "loss": 29.9186, "step": 16551 }, { "epoch": 394.0955223880597, "grad_norm": 19.439306259155273, "learning_rate": 9.652976190476192e-06, "loss": 30.6292, "step": 16552 }, { "epoch": 394.1194029850746, "grad_norm": 15.978523254394531, "learning_rate": 9.652380952380954e-06, "loss": 30.46, "step": 16553 }, { "epoch": 394.14328358208957, "grad_norm": 17.57875633239746, "learning_rate": 9.651785714285715e-06, "loss": 29.9802, "step": 16554 }, { "epoch": 394.1671641791045, "grad_norm": 17.737642288208008, "learning_rate": 9.651190476190477e-06, "loss": 31.1575, "step": 16555 }, { "epoch": 394.1910447761194, "grad_norm": 20.065645217895508, "learning_rate": 9.650595238095239e-06, "loss": 30.7144, "step": 16556 }, { "epoch": 394.21492537313435, "grad_norm": 21.767803192138672, "learning_rate": 9.65e-06, "loss": 29.3319, "step": 16557 }, { "epoch": 394.23880597014926, "grad_norm": 19.06543731689453, "learning_rate": 9.649404761904763e-06, "loss": 31.6025, "step": 16558 }, { "epoch": 394.26268656716417, "grad_norm": 22.794673919677734, "learning_rate": 9.648809523809524e-06, "loss": 32.2114, "step": 16559 }, { "epoch": 394.28656716417913, "grad_norm": 25.42393684387207, "learning_rate": 9.648214285714286e-06, "loss": 31.8628, "step": 16560 }, { "epoch": 394.31044776119404, "grad_norm": 20.32715606689453, "learning_rate": 9.647619047619048e-06, "loss": 32.0349, "step": 16561 }, { "epoch": 394.33432835820895, "grad_norm": 18.05319595336914, "learning_rate": 9.64702380952381e-06, "loss": 30.8603, "step": 16562 }, { "epoch": 394.35820895522386, "grad_norm": 23.052047729492188, "learning_rate": 9.646428571428572e-06, "loss": 31.2589, "step": 16563 }, { "epoch": 394.3820895522388, "grad_norm": 20.820653915405273, "learning_rate": 9.645833333333333e-06, "loss": 30.8611, "step": 16564 }, { "epoch": 394.40597014925373, "grad_norm": 16.364179611206055, "learning_rate": 9.645238095238097e-06, "loss": 31.3999, "step": 16565 }, { "epoch": 394.42985074626864, "grad_norm": 19.812480926513672, "learning_rate": 9.644642857142857e-06, "loss": 31.7923, "step": 16566 }, { "epoch": 394.4537313432836, "grad_norm": 21.836849212646484, "learning_rate": 9.644047619047619e-06, "loss": 33.2977, "step": 16567 }, { "epoch": 394.4776119402985, "grad_norm": 17.77836799621582, "learning_rate": 9.643452380952382e-06, "loss": 31.7209, "step": 16568 }, { "epoch": 394.5014925373134, "grad_norm": 22.15043067932129, "learning_rate": 9.642857142857144e-06, "loss": 32.0888, "step": 16569 }, { "epoch": 394.52537313432833, "grad_norm": 15.695204734802246, "learning_rate": 9.642261904761906e-06, "loss": 31.2755, "step": 16570 }, { "epoch": 394.5492537313433, "grad_norm": 26.243276596069336, "learning_rate": 9.641666666666666e-06, "loss": 31.3449, "step": 16571 }, { "epoch": 394.5731343283582, "grad_norm": 16.394758224487305, "learning_rate": 9.64107142857143e-06, "loss": 30.1301, "step": 16572 }, { "epoch": 394.5970149253731, "grad_norm": 22.238359451293945, "learning_rate": 9.640476190476191e-06, "loss": 31.6778, "step": 16573 }, { "epoch": 394.6208955223881, "grad_norm": 19.042591094970703, "learning_rate": 9.639880952380953e-06, "loss": 30.6233, "step": 16574 }, { "epoch": 394.644776119403, "grad_norm": 23.523181915283203, "learning_rate": 9.639285714285715e-06, "loss": 32.3055, "step": 16575 }, { "epoch": 394.6686567164179, "grad_norm": 25.505687713623047, "learning_rate": 9.638690476190477e-06, "loss": 30.1301, "step": 16576 }, { "epoch": 394.6925373134328, "grad_norm": 19.258235931396484, "learning_rate": 9.638095238095239e-06, "loss": 31.6026, "step": 16577 }, { "epoch": 394.7164179104478, "grad_norm": 24.81572914123535, "learning_rate": 9.6375e-06, "loss": 30.9966, "step": 16578 }, { "epoch": 394.7402985074627, "grad_norm": 24.1857967376709, "learning_rate": 9.636904761904762e-06, "loss": 32.3317, "step": 16579 }, { "epoch": 394.7641791044776, "grad_norm": 19.592126846313477, "learning_rate": 9.636309523809526e-06, "loss": 30.6968, "step": 16580 }, { "epoch": 394.78805970149256, "grad_norm": 19.79613494873047, "learning_rate": 9.635714285714286e-06, "loss": 30.9267, "step": 16581 }, { "epoch": 394.81194029850747, "grad_norm": 22.39429473876953, "learning_rate": 9.635119047619048e-06, "loss": 31.9018, "step": 16582 }, { "epoch": 394.8358208955224, "grad_norm": 16.94050407409668, "learning_rate": 9.63452380952381e-06, "loss": 31.5597, "step": 16583 }, { "epoch": 394.85970149253734, "grad_norm": 16.844341278076172, "learning_rate": 9.633928571428573e-06, "loss": 30.7179, "step": 16584 }, { "epoch": 394.88358208955225, "grad_norm": 19.728256225585938, "learning_rate": 9.633333333333335e-06, "loss": 31.5021, "step": 16585 }, { "epoch": 394.90746268656716, "grad_norm": 21.01833724975586, "learning_rate": 9.632738095238095e-06, "loss": 31.2969, "step": 16586 }, { "epoch": 394.93134328358207, "grad_norm": 16.520946502685547, "learning_rate": 9.632142857142858e-06, "loss": 31.9674, "step": 16587 }, { "epoch": 394.95522388059703, "grad_norm": 18.850202560424805, "learning_rate": 9.63154761904762e-06, "loss": 31.9042, "step": 16588 }, { "epoch": 394.97910447761194, "grad_norm": 15.861136436462402, "learning_rate": 9.630952380952382e-06, "loss": 30.7848, "step": 16589 }, { "epoch": 395.0, "grad_norm": 18.09653091430664, "learning_rate": 9.630357142857144e-06, "loss": 25.999, "step": 16590 }, { "epoch": 395.0238805970149, "grad_norm": 18.900066375732422, "learning_rate": 9.629761904761906e-06, "loss": 31.3835, "step": 16591 }, { "epoch": 395.0477611940299, "grad_norm": 18.146682739257812, "learning_rate": 9.629166666666668e-06, "loss": 31.2379, "step": 16592 }, { "epoch": 395.0716417910448, "grad_norm": 21.965797424316406, "learning_rate": 9.62857142857143e-06, "loss": 30.6798, "step": 16593 }, { "epoch": 395.0955223880597, "grad_norm": 22.59465217590332, "learning_rate": 9.627976190476191e-06, "loss": 30.2535, "step": 16594 }, { "epoch": 395.1194029850746, "grad_norm": 19.063858032226562, "learning_rate": 9.627380952380953e-06, "loss": 30.664, "step": 16595 }, { "epoch": 395.14328358208957, "grad_norm": 18.029199600219727, "learning_rate": 9.626785714285715e-06, "loss": 30.3232, "step": 16596 }, { "epoch": 395.1671641791045, "grad_norm": 17.064918518066406, "learning_rate": 9.626190476190477e-06, "loss": 31.4215, "step": 16597 }, { "epoch": 395.1910447761194, "grad_norm": 19.695556640625, "learning_rate": 9.625595238095238e-06, "loss": 30.1845, "step": 16598 }, { "epoch": 395.21492537313435, "grad_norm": 21.29120635986328, "learning_rate": 9.625e-06, "loss": 31.8835, "step": 16599 }, { "epoch": 395.23880597014926, "grad_norm": 20.6686954498291, "learning_rate": 9.624404761904764e-06, "loss": 31.8821, "step": 16600 }, { "epoch": 395.26268656716417, "grad_norm": 18.07406997680664, "learning_rate": 9.623809523809524e-06, "loss": 31.1092, "step": 16601 }, { "epoch": 395.28656716417913, "grad_norm": 17.966976165771484, "learning_rate": 9.623214285714286e-06, "loss": 30.3551, "step": 16602 }, { "epoch": 395.31044776119404, "grad_norm": 23.98894500732422, "learning_rate": 9.622619047619049e-06, "loss": 30.2829, "step": 16603 }, { "epoch": 395.33432835820895, "grad_norm": 17.835453033447266, "learning_rate": 9.622023809523811e-06, "loss": 30.4599, "step": 16604 }, { "epoch": 395.35820895522386, "grad_norm": 16.08890724182129, "learning_rate": 9.621428571428573e-06, "loss": 30.8228, "step": 16605 }, { "epoch": 395.3820895522388, "grad_norm": 26.890722274780273, "learning_rate": 9.620833333333335e-06, "loss": 31.7994, "step": 16606 }, { "epoch": 395.40597014925373, "grad_norm": 18.69220542907715, "learning_rate": 9.620238095238096e-06, "loss": 31.3295, "step": 16607 }, { "epoch": 395.42985074626864, "grad_norm": 19.22516441345215, "learning_rate": 9.619642857142858e-06, "loss": 31.2985, "step": 16608 }, { "epoch": 395.4537313432836, "grad_norm": 28.353347778320312, "learning_rate": 9.61904761904762e-06, "loss": 31.5362, "step": 16609 }, { "epoch": 395.4776119402985, "grad_norm": 17.4637508392334, "learning_rate": 9.618452380952382e-06, "loss": 30.8674, "step": 16610 }, { "epoch": 395.5014925373134, "grad_norm": 30.24287223815918, "learning_rate": 9.617857142857144e-06, "loss": 30.9374, "step": 16611 }, { "epoch": 395.52537313432833, "grad_norm": 23.158342361450195, "learning_rate": 9.617261904761905e-06, "loss": 31.9625, "step": 16612 }, { "epoch": 395.5492537313433, "grad_norm": 23.577930450439453, "learning_rate": 9.616666666666667e-06, "loss": 32.3882, "step": 16613 }, { "epoch": 395.5731343283582, "grad_norm": 28.652990341186523, "learning_rate": 9.616071428571429e-06, "loss": 31.3116, "step": 16614 }, { "epoch": 395.5970149253731, "grad_norm": 20.64177894592285, "learning_rate": 9.615476190476193e-06, "loss": 30.9665, "step": 16615 }, { "epoch": 395.6208955223881, "grad_norm": 37.987701416015625, "learning_rate": 9.614880952380953e-06, "loss": 31.1373, "step": 16616 }, { "epoch": 395.644776119403, "grad_norm": 27.08494758605957, "learning_rate": 9.614285714285714e-06, "loss": 31.2365, "step": 16617 }, { "epoch": 395.6686567164179, "grad_norm": 42.456336975097656, "learning_rate": 9.613690476190476e-06, "loss": 32.2337, "step": 16618 }, { "epoch": 395.6925373134328, "grad_norm": 32.765262603759766, "learning_rate": 9.61309523809524e-06, "loss": 31.1218, "step": 16619 }, { "epoch": 395.7164179104478, "grad_norm": 43.60858917236328, "learning_rate": 9.612500000000002e-06, "loss": 30.2923, "step": 16620 }, { "epoch": 395.7402985074627, "grad_norm": 42.65446853637695, "learning_rate": 9.611904761904762e-06, "loss": 31.4776, "step": 16621 }, { "epoch": 395.7641791044776, "grad_norm": 30.62103843688965, "learning_rate": 9.611309523809525e-06, "loss": 30.8037, "step": 16622 }, { "epoch": 395.78805970149256, "grad_norm": 31.356796264648438, "learning_rate": 9.610714285714287e-06, "loss": 32.1796, "step": 16623 }, { "epoch": 395.81194029850747, "grad_norm": 30.23118782043457, "learning_rate": 9.610119047619049e-06, "loss": 31.4694, "step": 16624 }, { "epoch": 395.8358208955224, "grad_norm": 25.252370834350586, "learning_rate": 9.60952380952381e-06, "loss": 31.9266, "step": 16625 }, { "epoch": 395.85970149253734, "grad_norm": 44.541969299316406, "learning_rate": 9.608928571428572e-06, "loss": 31.8544, "step": 16626 }, { "epoch": 395.88358208955225, "grad_norm": 35.47584533691406, "learning_rate": 9.608333333333334e-06, "loss": 30.4277, "step": 16627 }, { "epoch": 395.90746268656716, "grad_norm": 38.233604431152344, "learning_rate": 9.607738095238096e-06, "loss": 30.5344, "step": 16628 }, { "epoch": 395.93134328358207, "grad_norm": 37.811973571777344, "learning_rate": 9.607142857142858e-06, "loss": 30.0792, "step": 16629 }, { "epoch": 395.95522388059703, "grad_norm": 28.466703414916992, "learning_rate": 9.60654761904762e-06, "loss": 31.8081, "step": 16630 }, { "epoch": 395.97910447761194, "grad_norm": 25.1666259765625, "learning_rate": 9.605952380952381e-06, "loss": 32.0192, "step": 16631 }, { "epoch": 396.0, "grad_norm": 33.15395736694336, "learning_rate": 9.605357142857143e-06, "loss": 28.3371, "step": 16632 }, { "epoch": 396.0238805970149, "grad_norm": 31.699602127075195, "learning_rate": 9.604761904761905e-06, "loss": 30.8777, "step": 16633 }, { "epoch": 396.0477611940299, "grad_norm": 42.341495513916016, "learning_rate": 9.604166666666669e-06, "loss": 31.5053, "step": 16634 }, { "epoch": 396.0716417910448, "grad_norm": 34.36485290527344, "learning_rate": 9.60357142857143e-06, "loss": 32.9534, "step": 16635 }, { "epoch": 396.0955223880597, "grad_norm": 32.37449264526367, "learning_rate": 9.60297619047619e-06, "loss": 31.1525, "step": 16636 }, { "epoch": 396.1194029850746, "grad_norm": 27.004873275756836, "learning_rate": 9.602380952380952e-06, "loss": 30.6301, "step": 16637 }, { "epoch": 396.14328358208957, "grad_norm": 36.502906799316406, "learning_rate": 9.601785714285716e-06, "loss": 31.3992, "step": 16638 }, { "epoch": 396.1671641791045, "grad_norm": 29.16812515258789, "learning_rate": 9.601190476190478e-06, "loss": 30.064, "step": 16639 }, { "epoch": 396.1910447761194, "grad_norm": 34.83269500732422, "learning_rate": 9.60059523809524e-06, "loss": 31.2044, "step": 16640 }, { "epoch": 396.21492537313435, "grad_norm": 37.278987884521484, "learning_rate": 9.600000000000001e-06, "loss": 31.656, "step": 16641 }, { "epoch": 396.23880597014926, "grad_norm": 30.657453536987305, "learning_rate": 9.599404761904763e-06, "loss": 30.8146, "step": 16642 }, { "epoch": 396.26268656716417, "grad_norm": 27.678909301757812, "learning_rate": 9.598809523809525e-06, "loss": 30.6719, "step": 16643 }, { "epoch": 396.28656716417913, "grad_norm": 34.551876068115234, "learning_rate": 9.598214285714287e-06, "loss": 31.0905, "step": 16644 }, { "epoch": 396.31044776119404, "grad_norm": 28.76030731201172, "learning_rate": 9.597619047619048e-06, "loss": 31.1123, "step": 16645 }, { "epoch": 396.33432835820895, "grad_norm": 37.68404769897461, "learning_rate": 9.59702380952381e-06, "loss": 31.3855, "step": 16646 }, { "epoch": 396.35820895522386, "grad_norm": 33.67521667480469, "learning_rate": 9.596428571428572e-06, "loss": 30.5938, "step": 16647 }, { "epoch": 396.3820895522388, "grad_norm": 35.538673400878906, "learning_rate": 9.595833333333334e-06, "loss": 30.8305, "step": 16648 }, { "epoch": 396.40597014925373, "grad_norm": 30.006072998046875, "learning_rate": 9.595238095238096e-06, "loss": 31.4339, "step": 16649 }, { "epoch": 396.42985074626864, "grad_norm": 32.67222595214844, "learning_rate": 9.59464285714286e-06, "loss": 31.8208, "step": 16650 }, { "epoch": 396.4537313432836, "grad_norm": 27.725492477416992, "learning_rate": 9.59404761904762e-06, "loss": 30.8556, "step": 16651 }, { "epoch": 396.4776119402985, "grad_norm": 33.49855422973633, "learning_rate": 9.593452380952381e-06, "loss": 31.3964, "step": 16652 }, { "epoch": 396.5014925373134, "grad_norm": 27.76362419128418, "learning_rate": 9.592857142857143e-06, "loss": 31.0002, "step": 16653 }, { "epoch": 396.52537313432833, "grad_norm": 35.20942687988281, "learning_rate": 9.592261904761906e-06, "loss": 31.1557, "step": 16654 }, { "epoch": 396.5492537313433, "grad_norm": 33.215999603271484, "learning_rate": 9.591666666666667e-06, "loss": 31.4064, "step": 16655 }, { "epoch": 396.5731343283582, "grad_norm": 31.689847946166992, "learning_rate": 9.591071428571428e-06, "loss": 32.0374, "step": 16656 }, { "epoch": 396.5970149253731, "grad_norm": 27.90886878967285, "learning_rate": 9.590476190476192e-06, "loss": 30.0792, "step": 16657 }, { "epoch": 396.6208955223881, "grad_norm": 36.104854583740234, "learning_rate": 9.589880952380954e-06, "loss": 31.7099, "step": 16658 }, { "epoch": 396.644776119403, "grad_norm": 32.245887756347656, "learning_rate": 9.589285714285716e-06, "loss": 30.9505, "step": 16659 }, { "epoch": 396.6686567164179, "grad_norm": 34.97145080566406, "learning_rate": 9.588690476190476e-06, "loss": 31.2625, "step": 16660 }, { "epoch": 396.6925373134328, "grad_norm": 32.9593391418457, "learning_rate": 9.588095238095239e-06, "loss": 31.7602, "step": 16661 }, { "epoch": 396.7164179104478, "grad_norm": 29.946687698364258, "learning_rate": 9.587500000000001e-06, "loss": 30.1575, "step": 16662 }, { "epoch": 396.7402985074627, "grad_norm": 25.896459579467773, "learning_rate": 9.586904761904763e-06, "loss": 31.2349, "step": 16663 }, { "epoch": 396.7641791044776, "grad_norm": 32.72298812866211, "learning_rate": 9.586309523809525e-06, "loss": 30.6864, "step": 16664 }, { "epoch": 396.78805970149256, "grad_norm": 27.926958084106445, "learning_rate": 9.585714285714286e-06, "loss": 31.7503, "step": 16665 }, { "epoch": 396.81194029850747, "grad_norm": 34.445335388183594, "learning_rate": 9.585119047619048e-06, "loss": 31.3729, "step": 16666 }, { "epoch": 396.8358208955224, "grad_norm": 33.05970001220703, "learning_rate": 9.58452380952381e-06, "loss": 29.862, "step": 16667 }, { "epoch": 396.85970149253734, "grad_norm": 31.89533042907715, "learning_rate": 9.583928571428572e-06, "loss": 30.946, "step": 16668 }, { "epoch": 396.88358208955225, "grad_norm": 28.336143493652344, "learning_rate": 9.583333333333335e-06, "loss": 31.0802, "step": 16669 }, { "epoch": 396.90746268656716, "grad_norm": 35.388946533203125, "learning_rate": 9.582738095238095e-06, "loss": 31.4563, "step": 16670 }, { "epoch": 396.93134328358207, "grad_norm": 30.30738067626953, "learning_rate": 9.582142857142857e-06, "loss": 30.6146, "step": 16671 }, { "epoch": 396.95522388059703, "grad_norm": 33.11353302001953, "learning_rate": 9.581547619047619e-06, "loss": 31.227, "step": 16672 }, { "epoch": 396.97910447761194, "grad_norm": 31.145061492919922, "learning_rate": 9.580952380952383e-06, "loss": 31.3158, "step": 16673 }, { "epoch": 397.0, "grad_norm": 28.533903121948242, "learning_rate": 9.580357142857144e-06, "loss": 26.6179, "step": 16674 }, { "epoch": 397.0238805970149, "grad_norm": 27.6167049407959, "learning_rate": 9.579761904761904e-06, "loss": 31.3673, "step": 16675 }, { "epoch": 397.0477611940299, "grad_norm": 34.531558990478516, "learning_rate": 9.579166666666668e-06, "loss": 30.3208, "step": 16676 }, { "epoch": 397.0716417910448, "grad_norm": 31.28594398498535, "learning_rate": 9.57857142857143e-06, "loss": 31.1348, "step": 16677 }, { "epoch": 397.0955223880597, "grad_norm": 31.299909591674805, "learning_rate": 9.577976190476192e-06, "loss": 31.634, "step": 16678 }, { "epoch": 397.1194029850746, "grad_norm": 29.449188232421875, "learning_rate": 9.577380952380953e-06, "loss": 31.2931, "step": 16679 }, { "epoch": 397.14328358208957, "grad_norm": 31.746688842773438, "learning_rate": 9.576785714285715e-06, "loss": 30.6019, "step": 16680 }, { "epoch": 397.1671641791045, "grad_norm": 27.6197452545166, "learning_rate": 9.576190476190477e-06, "loss": 30.9896, "step": 16681 }, { "epoch": 397.1910447761194, "grad_norm": 34.12122344970703, "learning_rate": 9.575595238095239e-06, "loss": 30.9409, "step": 16682 }, { "epoch": 397.21492537313435, "grad_norm": 30.74228286743164, "learning_rate": 9.575e-06, "loss": 29.8835, "step": 16683 }, { "epoch": 397.23880597014926, "grad_norm": 34.26853942871094, "learning_rate": 9.574404761904762e-06, "loss": 30.3535, "step": 16684 }, { "epoch": 397.26268656716417, "grad_norm": 30.06424331665039, "learning_rate": 9.573809523809524e-06, "loss": 30.5033, "step": 16685 }, { "epoch": 397.28656716417913, "grad_norm": 29.320924758911133, "learning_rate": 9.573214285714286e-06, "loss": 30.938, "step": 16686 }, { "epoch": 397.31044776119404, "grad_norm": 24.558950424194336, "learning_rate": 9.572619047619048e-06, "loss": 31.675, "step": 16687 }, { "epoch": 397.33432835820895, "grad_norm": 35.58845138549805, "learning_rate": 9.57202380952381e-06, "loss": 31.0714, "step": 16688 }, { "epoch": 397.35820895522386, "grad_norm": 30.322538375854492, "learning_rate": 9.571428571428573e-06, "loss": 30.6867, "step": 16689 }, { "epoch": 397.3820895522388, "grad_norm": 33.869937896728516, "learning_rate": 9.570833333333333e-06, "loss": 31.3392, "step": 16690 }, { "epoch": 397.40597014925373, "grad_norm": 29.600238800048828, "learning_rate": 9.570238095238095e-06, "loss": 31.5124, "step": 16691 }, { "epoch": 397.42985074626864, "grad_norm": 30.72909927368164, "learning_rate": 9.569642857142859e-06, "loss": 30.4904, "step": 16692 }, { "epoch": 397.4537313432836, "grad_norm": 25.677860260009766, "learning_rate": 9.56904761904762e-06, "loss": 30.9418, "step": 16693 }, { "epoch": 397.4776119402985, "grad_norm": 35.1114616394043, "learning_rate": 9.568452380952382e-06, "loss": 31.4766, "step": 16694 }, { "epoch": 397.5014925373134, "grad_norm": 29.866853713989258, "learning_rate": 9.567857142857142e-06, "loss": 30.2016, "step": 16695 }, { "epoch": 397.52537313432833, "grad_norm": 33.91158676147461, "learning_rate": 9.567261904761906e-06, "loss": 31.032, "step": 16696 }, { "epoch": 397.5492537313433, "grad_norm": 31.699338912963867, "learning_rate": 9.566666666666668e-06, "loss": 30.5229, "step": 16697 }, { "epoch": 397.5731343283582, "grad_norm": 31.88851547241211, "learning_rate": 9.56607142857143e-06, "loss": 32.2403, "step": 16698 }, { "epoch": 397.5970149253731, "grad_norm": 29.413162231445312, "learning_rate": 9.565476190476191e-06, "loss": 32.0086, "step": 16699 }, { "epoch": 397.6208955223881, "grad_norm": 32.275978088378906, "learning_rate": 9.564880952380953e-06, "loss": 31.3495, "step": 16700 }, { "epoch": 397.644776119403, "grad_norm": 26.762638092041016, "learning_rate": 9.564285714285715e-06, "loss": 32.9203, "step": 16701 }, { "epoch": 397.6686567164179, "grad_norm": 33.741455078125, "learning_rate": 9.563690476190477e-06, "loss": 31.4001, "step": 16702 }, { "epoch": 397.6925373134328, "grad_norm": 30.879159927368164, "learning_rate": 9.563095238095239e-06, "loss": 32.1866, "step": 16703 }, { "epoch": 397.7164179104478, "grad_norm": 34.93317794799805, "learning_rate": 9.562500000000002e-06, "loss": 30.9474, "step": 16704 }, { "epoch": 397.7402985074627, "grad_norm": 27.91045379638672, "learning_rate": 9.561904761904762e-06, "loss": 30.6972, "step": 16705 }, { "epoch": 397.7641791044776, "grad_norm": 34.52175521850586, "learning_rate": 9.561309523809524e-06, "loss": 31.2406, "step": 16706 }, { "epoch": 397.78805970149256, "grad_norm": 28.630678176879883, "learning_rate": 9.560714285714286e-06, "loss": 30.4547, "step": 16707 }, { "epoch": 397.81194029850747, "grad_norm": 31.232086181640625, "learning_rate": 9.56011904761905e-06, "loss": 29.5587, "step": 16708 }, { "epoch": 397.8358208955224, "grad_norm": 27.394580841064453, "learning_rate": 9.559523809523811e-06, "loss": 31.1712, "step": 16709 }, { "epoch": 397.85970149253734, "grad_norm": 35.872867584228516, "learning_rate": 9.558928571428571e-06, "loss": 31.9228, "step": 16710 }, { "epoch": 397.88358208955225, "grad_norm": 26.121097564697266, "learning_rate": 9.558333333333335e-06, "loss": 31.7529, "step": 16711 }, { "epoch": 397.90746268656716, "grad_norm": 30.0105037689209, "learning_rate": 9.557738095238097e-06, "loss": 30.5681, "step": 16712 }, { "epoch": 397.93134328358207, "grad_norm": 27.735515594482422, "learning_rate": 9.557142857142858e-06, "loss": 31.976, "step": 16713 }, { "epoch": 397.95522388059703, "grad_norm": 33.157508850097656, "learning_rate": 9.55654761904762e-06, "loss": 31.5556, "step": 16714 }, { "epoch": 397.97910447761194, "grad_norm": 28.184017181396484, "learning_rate": 9.555952380952382e-06, "loss": 30.6026, "step": 16715 }, { "epoch": 398.0, "grad_norm": 29.705965042114258, "learning_rate": 9.555357142857144e-06, "loss": 27.0042, "step": 16716 }, { "epoch": 398.0238805970149, "grad_norm": 31.84282112121582, "learning_rate": 9.554761904761906e-06, "loss": 30.4317, "step": 16717 }, { "epoch": 398.0477611940299, "grad_norm": 35.78097915649414, "learning_rate": 9.554166666666667e-06, "loss": 31.5071, "step": 16718 }, { "epoch": 398.0716417910448, "grad_norm": 33.300533294677734, "learning_rate": 9.55357142857143e-06, "loss": 30.8626, "step": 16719 }, { "epoch": 398.0955223880597, "grad_norm": 29.378177642822266, "learning_rate": 9.552976190476191e-06, "loss": 29.6357, "step": 16720 }, { "epoch": 398.1194029850746, "grad_norm": 27.08431625366211, "learning_rate": 9.552380952380953e-06, "loss": 30.6963, "step": 16721 }, { "epoch": 398.14328358208957, "grad_norm": 32.26847457885742, "learning_rate": 9.551785714285715e-06, "loss": 31.8527, "step": 16722 }, { "epoch": 398.1671641791045, "grad_norm": 27.086942672729492, "learning_rate": 9.551190476190476e-06, "loss": 31.3711, "step": 16723 }, { "epoch": 398.1910447761194, "grad_norm": 35.80241775512695, "learning_rate": 9.55059523809524e-06, "loss": 31.0931, "step": 16724 }, { "epoch": 398.21492537313435, "grad_norm": 30.800640106201172, "learning_rate": 9.55e-06, "loss": 32.0653, "step": 16725 }, { "epoch": 398.23880597014926, "grad_norm": 32.04216003417969, "learning_rate": 9.549404761904762e-06, "loss": 30.3713, "step": 16726 }, { "epoch": 398.26268656716417, "grad_norm": 28.524978637695312, "learning_rate": 9.548809523809525e-06, "loss": 31.4776, "step": 16727 }, { "epoch": 398.28656716417913, "grad_norm": 32.854637145996094, "learning_rate": 9.548214285714287e-06, "loss": 31.4661, "step": 16728 }, { "epoch": 398.31044776119404, "grad_norm": 29.422914505004883, "learning_rate": 9.547619047619049e-06, "loss": 30.2009, "step": 16729 }, { "epoch": 398.33432835820895, "grad_norm": 34.19198226928711, "learning_rate": 9.547023809523809e-06, "loss": 31.805, "step": 16730 }, { "epoch": 398.35820895522386, "grad_norm": 25.075660705566406, "learning_rate": 9.546428571428573e-06, "loss": 31.1434, "step": 16731 }, { "epoch": 398.3820895522388, "grad_norm": 36.68419647216797, "learning_rate": 9.545833333333334e-06, "loss": 31.0459, "step": 16732 }, { "epoch": 398.40597014925373, "grad_norm": 31.8087215423584, "learning_rate": 9.545238095238096e-06, "loss": 31.1787, "step": 16733 }, { "epoch": 398.42985074626864, "grad_norm": 32.61941146850586, "learning_rate": 9.544642857142858e-06, "loss": 31.876, "step": 16734 }, { "epoch": 398.4537313432836, "grad_norm": 29.566112518310547, "learning_rate": 9.54404761904762e-06, "loss": 31.1784, "step": 16735 }, { "epoch": 398.4776119402985, "grad_norm": 30.375717163085938, "learning_rate": 9.543452380952382e-06, "loss": 31.0403, "step": 16736 }, { "epoch": 398.5014925373134, "grad_norm": 26.639244079589844, "learning_rate": 9.542857142857143e-06, "loss": 30.9947, "step": 16737 }, { "epoch": 398.52537313432833, "grad_norm": 31.981529235839844, "learning_rate": 9.542261904761905e-06, "loss": 31.2653, "step": 16738 }, { "epoch": 398.5492537313433, "grad_norm": 27.035037994384766, "learning_rate": 9.541666666666669e-06, "loss": 31.008, "step": 16739 }, { "epoch": 398.5731343283582, "grad_norm": 34.97123718261719, "learning_rate": 9.541071428571429e-06, "loss": 31.0956, "step": 16740 }, { "epoch": 398.5970149253731, "grad_norm": 31.80586814880371, "learning_rate": 9.54047619047619e-06, "loss": 32.1095, "step": 16741 }, { "epoch": 398.6208955223881, "grad_norm": 32.614200592041016, "learning_rate": 9.539880952380953e-06, "loss": 31.3034, "step": 16742 }, { "epoch": 398.644776119403, "grad_norm": 29.285017013549805, "learning_rate": 9.539285714285716e-06, "loss": 31.2842, "step": 16743 }, { "epoch": 398.6686567164179, "grad_norm": 32.6673583984375, "learning_rate": 9.538690476190478e-06, "loss": 30.9135, "step": 16744 }, { "epoch": 398.6925373134328, "grad_norm": 25.550617218017578, "learning_rate": 9.538095238095238e-06, "loss": 31.2939, "step": 16745 }, { "epoch": 398.7164179104478, "grad_norm": 29.780179977416992, "learning_rate": 9.537500000000001e-06, "loss": 29.9004, "step": 16746 }, { "epoch": 398.7402985074627, "grad_norm": 25.99339485168457, "learning_rate": 9.536904761904763e-06, "loss": 30.5079, "step": 16747 }, { "epoch": 398.7641791044776, "grad_norm": 34.089752197265625, "learning_rate": 9.536309523809525e-06, "loss": 30.3218, "step": 16748 }, { "epoch": 398.78805970149256, "grad_norm": 30.218477249145508, "learning_rate": 9.535714285714287e-06, "loss": 32.472, "step": 16749 }, { "epoch": 398.81194029850747, "grad_norm": 27.748638153076172, "learning_rate": 9.535119047619049e-06, "loss": 31.2098, "step": 16750 }, { "epoch": 398.8358208955224, "grad_norm": 25.21699333190918, "learning_rate": 9.53452380952381e-06, "loss": 30.4431, "step": 16751 }, { "epoch": 398.85970149253734, "grad_norm": 28.97269058227539, "learning_rate": 9.533928571428572e-06, "loss": 31.8221, "step": 16752 }, { "epoch": 398.88358208955225, "grad_norm": 24.254507064819336, "learning_rate": 9.533333333333334e-06, "loss": 30.3948, "step": 16753 }, { "epoch": 398.90746268656716, "grad_norm": 31.532323837280273, "learning_rate": 9.532738095238096e-06, "loss": 31.2338, "step": 16754 }, { "epoch": 398.93134328358207, "grad_norm": 24.351436614990234, "learning_rate": 9.532142857142858e-06, "loss": 30.3739, "step": 16755 }, { "epoch": 398.95522388059703, "grad_norm": 34.84754180908203, "learning_rate": 9.53154761904762e-06, "loss": 31.6995, "step": 16756 }, { "epoch": 398.97910447761194, "grad_norm": 23.07961082458496, "learning_rate": 9.530952380952381e-06, "loss": 30.4715, "step": 16757 }, { "epoch": 399.0, "grad_norm": 24.954120635986328, "learning_rate": 9.530357142857143e-06, "loss": 27.2571, "step": 16758 }, { "epoch": 399.0238805970149, "grad_norm": 27.035842895507812, "learning_rate": 9.529761904761905e-06, "loss": 31.2928, "step": 16759 }, { "epoch": 399.0477611940299, "grad_norm": 28.470870971679688, "learning_rate": 9.529166666666667e-06, "loss": 31.2636, "step": 16760 }, { "epoch": 399.0716417910448, "grad_norm": 21.631488800048828, "learning_rate": 9.528571428571429e-06, "loss": 30.8329, "step": 16761 }, { "epoch": 399.0955223880597, "grad_norm": 30.524328231811523, "learning_rate": 9.527976190476192e-06, "loss": 31.04, "step": 16762 }, { "epoch": 399.1194029850746, "grad_norm": 23.0170841217041, "learning_rate": 9.527380952380954e-06, "loss": 30.8095, "step": 16763 }, { "epoch": 399.14328358208957, "grad_norm": 32.84740447998047, "learning_rate": 9.526785714285714e-06, "loss": 31.5631, "step": 16764 }, { "epoch": 399.1671641791045, "grad_norm": 25.2762451171875, "learning_rate": 9.526190476190476e-06, "loss": 30.8093, "step": 16765 }, { "epoch": 399.1910447761194, "grad_norm": 31.601844787597656, "learning_rate": 9.52559523809524e-06, "loss": 30.4768, "step": 16766 }, { "epoch": 399.21492537313435, "grad_norm": 28.054853439331055, "learning_rate": 9.525000000000001e-06, "loss": 31.3206, "step": 16767 }, { "epoch": 399.23880597014926, "grad_norm": 25.569419860839844, "learning_rate": 9.524404761904763e-06, "loss": 31.7122, "step": 16768 }, { "epoch": 399.26268656716417, "grad_norm": 23.35809898376465, "learning_rate": 9.523809523809525e-06, "loss": 30.367, "step": 16769 }, { "epoch": 399.28656716417913, "grad_norm": 23.588653564453125, "learning_rate": 9.523214285714287e-06, "loss": 31.3669, "step": 16770 }, { "epoch": 399.31044776119404, "grad_norm": 23.405994415283203, "learning_rate": 9.522619047619048e-06, "loss": 31.4575, "step": 16771 }, { "epoch": 399.33432835820895, "grad_norm": 20.56930160522461, "learning_rate": 9.52202380952381e-06, "loss": 30.3676, "step": 16772 }, { "epoch": 399.35820895522386, "grad_norm": 18.158884048461914, "learning_rate": 9.521428571428572e-06, "loss": 30.0671, "step": 16773 }, { "epoch": 399.3820895522388, "grad_norm": 23.44738006591797, "learning_rate": 9.520833333333334e-06, "loss": 30.2954, "step": 16774 }, { "epoch": 399.40597014925373, "grad_norm": 19.364089965820312, "learning_rate": 9.520238095238096e-06, "loss": 31.8877, "step": 16775 }, { "epoch": 399.42985074626864, "grad_norm": 22.331945419311523, "learning_rate": 9.519642857142857e-06, "loss": 30.6507, "step": 16776 }, { "epoch": 399.4537313432836, "grad_norm": 19.487991333007812, "learning_rate": 9.51904761904762e-06, "loss": 31.6449, "step": 16777 }, { "epoch": 399.4776119402985, "grad_norm": 20.457239151000977, "learning_rate": 9.518452380952383e-06, "loss": 31.7808, "step": 16778 }, { "epoch": 399.5014925373134, "grad_norm": 15.00794792175293, "learning_rate": 9.517857142857143e-06, "loss": 29.7277, "step": 16779 }, { "epoch": 399.52537313432833, "grad_norm": 24.077810287475586, "learning_rate": 9.517261904761905e-06, "loss": 31.4422, "step": 16780 }, { "epoch": 399.5492537313433, "grad_norm": 18.673059463500977, "learning_rate": 9.516666666666668e-06, "loss": 30.5987, "step": 16781 }, { "epoch": 399.5731343283582, "grad_norm": 20.430227279663086, "learning_rate": 9.51607142857143e-06, "loss": 31.5846, "step": 16782 }, { "epoch": 399.5970149253731, "grad_norm": 19.923513412475586, "learning_rate": 9.515476190476192e-06, "loss": 31.11, "step": 16783 }, { "epoch": 399.6208955223881, "grad_norm": 23.15761375427246, "learning_rate": 9.514880952380952e-06, "loss": 30.6794, "step": 16784 }, { "epoch": 399.644776119403, "grad_norm": 20.51605987548828, "learning_rate": 9.514285714285715e-06, "loss": 31.3281, "step": 16785 }, { "epoch": 399.6686567164179, "grad_norm": 18.474712371826172, "learning_rate": 9.513690476190477e-06, "loss": 31.3219, "step": 16786 }, { "epoch": 399.6925373134328, "grad_norm": 20.799448013305664, "learning_rate": 9.513095238095239e-06, "loss": 31.7438, "step": 16787 }, { "epoch": 399.7164179104478, "grad_norm": 20.413267135620117, "learning_rate": 9.5125e-06, "loss": 31.2168, "step": 16788 }, { "epoch": 399.7402985074627, "grad_norm": 20.1457462310791, "learning_rate": 9.511904761904763e-06, "loss": 30.8774, "step": 16789 }, { "epoch": 399.7641791044776, "grad_norm": 20.93187713623047, "learning_rate": 9.511309523809524e-06, "loss": 31.4587, "step": 16790 }, { "epoch": 399.78805970149256, "grad_norm": 19.636737823486328, "learning_rate": 9.510714285714286e-06, "loss": 31.1715, "step": 16791 }, { "epoch": 399.81194029850747, "grad_norm": 19.83819580078125, "learning_rate": 9.510119047619048e-06, "loss": 31.0146, "step": 16792 }, { "epoch": 399.8358208955224, "grad_norm": 18.057538986206055, "learning_rate": 9.50952380952381e-06, "loss": 31.2029, "step": 16793 }, { "epoch": 399.85970149253734, "grad_norm": 25.070390701293945, "learning_rate": 9.508928571428572e-06, "loss": 32.3302, "step": 16794 }, { "epoch": 399.88358208955225, "grad_norm": 19.523630142211914, "learning_rate": 9.508333333333333e-06, "loss": 31.3145, "step": 16795 }, { "epoch": 399.90746268656716, "grad_norm": 19.136125564575195, "learning_rate": 9.507738095238095e-06, "loss": 30.6125, "step": 16796 }, { "epoch": 399.93134328358207, "grad_norm": 20.086763381958008, "learning_rate": 9.507142857142859e-06, "loss": 30.7412, "step": 16797 }, { "epoch": 399.95522388059703, "grad_norm": 24.598318099975586, "learning_rate": 9.50654761904762e-06, "loss": 30.579, "step": 16798 }, { "epoch": 399.97910447761194, "grad_norm": 19.404541015625, "learning_rate": 9.50595238095238e-06, "loss": 31.4261, "step": 16799 }, { "epoch": 400.0, "grad_norm": 17.034467697143555, "learning_rate": 9.505357142857144e-06, "loss": 26.7138, "step": 16800 }, { "epoch": 400.0, "step": 16800, "total_flos": 8.258106306923356e+17, "train_loss": 1.570451229867481, "train_runtime": 25616.4387, "train_samples_per_second": 83.571, "train_steps_per_second": 0.656 }, { "epoch": 400.0238805970149, "grad_norm": 19.236080169677734, "learning_rate": 1e-05, "loss": 30.7231, "step": 16801 }, { "epoch": 400.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999433106575964e-06, "loss": 34.9673, "step": 16802 }, { "epoch": 400.0716417910448, "grad_norm": 311.71990966796875, "learning_rate": 9.999433106575964e-06, "loss": 37.6479, "step": 16803 }, { "epoch": 400.0955223880597, "grad_norm": 141.22789001464844, "learning_rate": 9.998866213151928e-06, "loss": 34.1772, "step": 16804 }, { "epoch": 400.1194029850746, "grad_norm": 84.35620880126953, "learning_rate": 9.998299319727893e-06, "loss": 35.0214, "step": 16805 }, { "epoch": 400.14328358208957, "grad_norm": 63.754234313964844, "learning_rate": 9.997732426303856e-06, "loss": 31.6235, "step": 16806 }, { "epoch": 400.1671641791045, "grad_norm": 46.571712493896484, "learning_rate": 9.997165532879819e-06, "loss": 31.1043, "step": 16807 }, { "epoch": 400.1910447761194, "grad_norm": 40.43476104736328, "learning_rate": 9.996598639455783e-06, "loss": 32.5885, "step": 16808 }, { "epoch": 400.21492537313435, "grad_norm": 42.0704345703125, "learning_rate": 9.996031746031746e-06, "loss": 31.767, "step": 16809 }, { "epoch": 400.23880597014926, "grad_norm": 36.093997955322266, "learning_rate": 9.99546485260771e-06, "loss": 32.3834, "step": 16810 }, { "epoch": 400.26268656716417, "grad_norm": 29.10378074645996, "learning_rate": 9.994897959183675e-06, "loss": 30.6534, "step": 16811 }, { "epoch": 400.28656716417913, "grad_norm": 27.766040802001953, "learning_rate": 9.994331065759638e-06, "loss": 30.9044, "step": 16812 }, { "epoch": 400.31044776119404, "grad_norm": 28.87425994873047, "learning_rate": 9.993764172335601e-06, "loss": 30.3871, "step": 16813 }, { "epoch": 400.33432835820895, "grad_norm": 33.46707534790039, "learning_rate": 9.993197278911566e-06, "loss": 31.9699, "step": 16814 }, { "epoch": 400.35820895522386, "grad_norm": 23.63628387451172, "learning_rate": 9.992630385487529e-06, "loss": 31.7989, "step": 16815 }, { "epoch": 400.3820895522388, "grad_norm": 26.49193572998047, "learning_rate": 9.992063492063493e-06, "loss": 30.8326, "step": 16816 }, { "epoch": 400.40597014925373, "grad_norm": 31.20513916015625, "learning_rate": 9.991496598639456e-06, "loss": 31.3319, "step": 16817 }, { "epoch": 400.42985074626864, "grad_norm": 26.55478286743164, "learning_rate": 9.99092970521542e-06, "loss": 31.0836, "step": 16818 }, { "epoch": 400.4537313432836, "grad_norm": 20.33902931213379, "learning_rate": 9.990362811791384e-06, "loss": 32.284, "step": 16819 }, { "epoch": 400.4776119402985, "grad_norm": 27.636608123779297, "learning_rate": 9.989795918367348e-06, "loss": 30.9584, "step": 16820 }, { "epoch": 400.5014925373134, "grad_norm": 25.69060516357422, "learning_rate": 9.989229024943311e-06, "loss": 31.1324, "step": 16821 }, { "epoch": 400.52537313432833, "grad_norm": 18.75033950805664, "learning_rate": 9.988662131519276e-06, "loss": 30.9402, "step": 16822 }, { "epoch": 400.5492537313433, "grad_norm": 23.087989807128906, "learning_rate": 9.988095238095239e-06, "loss": 31.4712, "step": 16823 }, { "epoch": 400.5731343283582, "grad_norm": 20.75522232055664, "learning_rate": 9.987528344671202e-06, "loss": 31.3741, "step": 16824 }, { "epoch": 400.5970149253731, "grad_norm": 17.48866844177246, "learning_rate": 9.986961451247166e-06, "loss": 30.8529, "step": 16825 }, { "epoch": 400.6208955223881, "grad_norm": 18.83658218383789, "learning_rate": 9.98639455782313e-06, "loss": 31.9591, "step": 16826 }, { "epoch": 400.644776119403, "grad_norm": 23.694780349731445, "learning_rate": 9.985827664399094e-06, "loss": 30.2027, "step": 16827 }, { "epoch": 400.6686567164179, "grad_norm": 22.643640518188477, "learning_rate": 9.985260770975057e-06, "loss": 29.3691, "step": 16828 }, { "epoch": 400.6925373134328, "grad_norm": 17.421306610107422, "learning_rate": 9.984693877551021e-06, "loss": 31.3057, "step": 16829 }, { "epoch": 400.7164179104478, "grad_norm": 21.523630142211914, "learning_rate": 9.984126984126986e-06, "loss": 31.839, "step": 16830 }, { "epoch": 400.7402985074627, "grad_norm": 21.368457794189453, "learning_rate": 9.983560090702949e-06, "loss": 31.2177, "step": 16831 }, { "epoch": 400.7641791044776, "grad_norm": 21.82390594482422, "learning_rate": 9.982993197278913e-06, "loss": 32.4152, "step": 16832 }, { "epoch": 400.78805970149256, "grad_norm": 17.901853561401367, "learning_rate": 9.982426303854876e-06, "loss": 30.6141, "step": 16833 }, { "epoch": 400.81194029850747, "grad_norm": 24.431535720825195, "learning_rate": 9.981859410430839e-06, "loss": 32.3977, "step": 16834 }, { "epoch": 400.8358208955224, "grad_norm": NaN, "learning_rate": 9.981292517006804e-06, "loss": 56.8003, "step": 16835 }, { "epoch": 400.85970149253734, "grad_norm": 17.254962921142578, "learning_rate": 9.981292517006804e-06, "loss": 30.2194, "step": 16836 }, { "epoch": 400.88358208955225, "grad_norm": 22.819360733032227, "learning_rate": 9.980725623582768e-06, "loss": 31.6324, "step": 16837 }, { "epoch": 400.90746268656716, "grad_norm": 19.341094970703125, "learning_rate": 9.980158730158731e-06, "loss": 30.3093, "step": 16838 }, { "epoch": 400.93134328358207, "grad_norm": 21.019248962402344, "learning_rate": 9.979591836734694e-06, "loss": 32.384, "step": 16839 }, { "epoch": 400.95522388059703, "grad_norm": 18.01608657836914, "learning_rate": 9.979024943310659e-06, "loss": 31.0385, "step": 16840 }, { "epoch": 400.97910447761194, "grad_norm": 21.27842903137207, "learning_rate": 9.978458049886622e-06, "loss": 30.5699, "step": 16841 }, { "epoch": 401.0, "grad_norm": 16.495031356811523, "learning_rate": 9.977891156462586e-06, "loss": 25.8223, "step": 16842 }, { "epoch": 401.0238805970149, "grad_norm": 18.222089767456055, "learning_rate": 9.977324263038549e-06, "loss": 32.0258, "step": 16843 }, { "epoch": 401.0477611940299, "grad_norm": 21.727060317993164, "learning_rate": 9.976757369614514e-06, "loss": 31.3581, "step": 16844 }, { "epoch": 401.0716417910448, "grad_norm": 20.427047729492188, "learning_rate": 9.976190476190477e-06, "loss": 30.6954, "step": 16845 }, { "epoch": 401.0955223880597, "grad_norm": 16.091264724731445, "learning_rate": 9.975623582766441e-06, "loss": 30.2019, "step": 16846 }, { "epoch": 401.1194029850746, "grad_norm": 16.839252471923828, "learning_rate": 9.975056689342404e-06, "loss": 30.8647, "step": 16847 }, { "epoch": 401.14328358208957, "grad_norm": 18.731321334838867, "learning_rate": 9.974489795918369e-06, "loss": 31.5526, "step": 16848 }, { "epoch": 401.1671641791045, "grad_norm": 17.480756759643555, "learning_rate": 9.973922902494332e-06, "loss": 30.8418, "step": 16849 }, { "epoch": 401.1910447761194, "grad_norm": 22.90028953552246, "learning_rate": 9.973356009070294e-06, "loss": 31.4254, "step": 16850 }, { "epoch": 401.21492537313435, "grad_norm": 17.838167190551758, "learning_rate": 9.972789115646259e-06, "loss": 31.2838, "step": 16851 }, { "epoch": 401.23880597014926, "grad_norm": 17.68524742126465, "learning_rate": 9.972222222222224e-06, "loss": 32.1313, "step": 16852 }, { "epoch": 401.26268656716417, "grad_norm": 15.703474998474121, "learning_rate": 9.971655328798187e-06, "loss": 31.302, "step": 16853 }, { "epoch": 401.28656716417913, "grad_norm": 16.799213409423828, "learning_rate": 9.97108843537415e-06, "loss": 30.9738, "step": 16854 }, { "epoch": 401.31044776119404, "grad_norm": 16.973718643188477, "learning_rate": 9.970521541950114e-06, "loss": 30.7046, "step": 16855 }, { "epoch": 401.33432835820895, "grad_norm": 17.80534553527832, "learning_rate": 9.969954648526079e-06, "loss": 31.1274, "step": 16856 }, { "epoch": 401.35820895522386, "grad_norm": 23.12386703491211, "learning_rate": 9.969387755102042e-06, "loss": 31.0468, "step": 16857 }, { "epoch": 401.3820895522388, "grad_norm": 17.315753936767578, "learning_rate": 9.968820861678006e-06, "loss": 30.9335, "step": 16858 }, { "epoch": 401.40597014925373, "grad_norm": 15.378706932067871, "learning_rate": 9.968253968253969e-06, "loss": 29.2263, "step": 16859 }, { "epoch": 401.42985074626864, "grad_norm": 17.434106826782227, "learning_rate": 9.967687074829932e-06, "loss": 30.0167, "step": 16860 }, { "epoch": 401.4537313432836, "grad_norm": 18.828147888183594, "learning_rate": 9.967120181405897e-06, "loss": 29.8575, "step": 16861 }, { "epoch": 401.4776119402985, "grad_norm": 19.391490936279297, "learning_rate": 9.966553287981861e-06, "loss": 31.5681, "step": 16862 }, { "epoch": 401.5014925373134, "grad_norm": 20.753772735595703, "learning_rate": 9.965986394557824e-06, "loss": 31.2438, "step": 16863 }, { "epoch": 401.52537313432833, "grad_norm": 19.081161499023438, "learning_rate": 9.965419501133787e-06, "loss": 30.9956, "step": 16864 }, { "epoch": 401.5492537313433, "grad_norm": 15.791476249694824, "learning_rate": 9.964852607709752e-06, "loss": 31.0609, "step": 16865 }, { "epoch": 401.5731343283582, "grad_norm": 26.492345809936523, "learning_rate": 9.964285714285714e-06, "loss": 30.9209, "step": 16866 }, { "epoch": 401.5970149253731, "grad_norm": 21.19564437866211, "learning_rate": 9.963718820861679e-06, "loss": 31.4632, "step": 16867 }, { "epoch": 401.6208955223881, "grad_norm": 22.00580596923828, "learning_rate": 9.963151927437642e-06, "loss": 31.5752, "step": 16868 }, { "epoch": 401.644776119403, "grad_norm": 24.39080810546875, "learning_rate": 9.962585034013607e-06, "loss": 31.3848, "step": 16869 }, { "epoch": 401.6686567164179, "grad_norm": 25.685808181762695, "learning_rate": 9.96201814058957e-06, "loss": 31.0735, "step": 16870 }, { "epoch": 401.6925373134328, "grad_norm": 17.7955379486084, "learning_rate": 9.961451247165534e-06, "loss": 31.1636, "step": 16871 }, { "epoch": 401.7164179104478, "grad_norm": 26.66135597229004, "learning_rate": 9.960884353741499e-06, "loss": 31.876, "step": 16872 }, { "epoch": 401.7402985074627, "grad_norm": 21.049110412597656, "learning_rate": 9.960317460317462e-06, "loss": 30.4948, "step": 16873 }, { "epoch": 401.7641791044776, "grad_norm": 18.58393096923828, "learning_rate": 9.959750566893424e-06, "loss": 30.592, "step": 16874 }, { "epoch": 401.78805970149256, "grad_norm": 22.98539161682129, "learning_rate": 9.959183673469387e-06, "loss": 29.9171, "step": 16875 }, { "epoch": 401.81194029850747, "grad_norm": 20.962472915649414, "learning_rate": 9.958616780045352e-06, "loss": 31.3894, "step": 16876 }, { "epoch": 401.8358208955224, "grad_norm": 18.580272674560547, "learning_rate": 9.958049886621317e-06, "loss": 31.0705, "step": 16877 }, { "epoch": 401.85970149253734, "grad_norm": 16.85481071472168, "learning_rate": 9.95748299319728e-06, "loss": 30.511, "step": 16878 }, { "epoch": 401.88358208955225, "grad_norm": 18.063079833984375, "learning_rate": 9.956916099773244e-06, "loss": 31.5687, "step": 16879 }, { "epoch": 401.90746268656716, "grad_norm": 16.066482543945312, "learning_rate": 9.956349206349207e-06, "loss": 30.6445, "step": 16880 }, { "epoch": 401.93134328358207, "grad_norm": 18.891555786132812, "learning_rate": 9.955782312925172e-06, "loss": 31.0396, "step": 16881 }, { "epoch": 401.95522388059703, "grad_norm": 19.531452178955078, "learning_rate": 9.955215419501134e-06, "loss": 31.6228, "step": 16882 }, { "epoch": 401.97910447761194, "grad_norm": 17.721233367919922, "learning_rate": 9.954648526077099e-06, "loss": 31.2063, "step": 16883 }, { "epoch": 402.0, "grad_norm": 14.193357467651367, "learning_rate": 9.954081632653062e-06, "loss": 27.6322, "step": 16884 }, { "epoch": 402.0238805970149, "grad_norm": 17.433488845825195, "learning_rate": 9.953514739229025e-06, "loss": 31.3941, "step": 16885 }, { "epoch": 402.0477611940299, "grad_norm": 18.68000602722168, "learning_rate": 9.95294784580499e-06, "loss": 31.8399, "step": 16886 }, { "epoch": 402.0716417910448, "grad_norm": 26.427698135375977, "learning_rate": 9.952380952380954e-06, "loss": 30.4522, "step": 16887 }, { "epoch": 402.0955223880597, "grad_norm": 22.38204002380371, "learning_rate": 9.951814058956917e-06, "loss": 30.7499, "step": 16888 }, { "epoch": 402.1194029850746, "grad_norm": 16.085290908813477, "learning_rate": 9.95124716553288e-06, "loss": 30.6177, "step": 16889 }, { "epoch": 402.14328358208957, "grad_norm": 34.455780029296875, "learning_rate": 9.950680272108844e-06, "loss": 31.4126, "step": 16890 }, { "epoch": 402.1671641791045, "grad_norm": 23.277976989746094, "learning_rate": 9.950113378684807e-06, "loss": 31.2081, "step": 16891 }, { "epoch": 402.1910447761194, "grad_norm": 29.880111694335938, "learning_rate": 9.949546485260772e-06, "loss": 31.1892, "step": 16892 }, { "epoch": 402.21492537313435, "grad_norm": 25.491518020629883, "learning_rate": 9.948979591836737e-06, "loss": 30.7009, "step": 16893 }, { "epoch": 402.23880597014926, "grad_norm": 22.70813751220703, "learning_rate": 9.9484126984127e-06, "loss": 30.9564, "step": 16894 }, { "epoch": 402.26268656716417, "grad_norm": 32.41717529296875, "learning_rate": 9.947845804988662e-06, "loss": 31.0667, "step": 16895 }, { "epoch": 402.28656716417913, "grad_norm": 18.49527359008789, "learning_rate": 9.947278911564627e-06, "loss": 31.9441, "step": 16896 }, { "epoch": 402.31044776119404, "grad_norm": 35.96527099609375, "learning_rate": 9.946712018140592e-06, "loss": 30.9551, "step": 16897 }, { "epoch": 402.33432835820895, "grad_norm": 24.00267791748047, "learning_rate": 9.946145124716554e-06, "loss": 32.069, "step": 16898 }, { "epoch": 402.35820895522386, "grad_norm": 27.444225311279297, "learning_rate": 9.945578231292517e-06, "loss": 30.53, "step": 16899 }, { "epoch": 402.3820895522388, "grad_norm": 26.21395492553711, "learning_rate": 9.945011337868482e-06, "loss": 30.4552, "step": 16900 }, { "epoch": 402.40597014925373, "grad_norm": 21.139373779296875, "learning_rate": 9.944444444444445e-06, "loss": 31.3025, "step": 16901 }, { "epoch": 402.42985074626864, "grad_norm": 33.18841552734375, "learning_rate": 9.94387755102041e-06, "loss": 29.6865, "step": 16902 }, { "epoch": 402.4537313432836, "grad_norm": 21.520919799804688, "learning_rate": 9.943310657596372e-06, "loss": 30.8559, "step": 16903 }, { "epoch": 402.4776119402985, "grad_norm": 37.26947784423828, "learning_rate": 9.942743764172337e-06, "loss": 30.3123, "step": 16904 }, { "epoch": 402.5014925373134, "grad_norm": 28.98073387145996, "learning_rate": 9.9421768707483e-06, "loss": 30.9669, "step": 16905 }, { "epoch": 402.52537313432833, "grad_norm": 34.28809356689453, "learning_rate": 9.941609977324263e-06, "loss": 31.4575, "step": 16906 }, { "epoch": 402.5492537313433, "grad_norm": 25.58087158203125, "learning_rate": 9.941043083900227e-06, "loss": 31.7515, "step": 16907 }, { "epoch": 402.5731343283582, "grad_norm": 30.296483993530273, "learning_rate": 9.940476190476192e-06, "loss": 31.4462, "step": 16908 }, { "epoch": 402.5970149253731, "grad_norm": 20.742158889770508, "learning_rate": 9.939909297052155e-06, "loss": 29.5965, "step": 16909 }, { "epoch": 402.6208955223881, "grad_norm": 32.30087661743164, "learning_rate": 9.939342403628118e-06, "loss": 31.6445, "step": 16910 }, { "epoch": 402.644776119403, "grad_norm": 23.726449966430664, "learning_rate": 9.938775510204082e-06, "loss": 30.9719, "step": 16911 }, { "epoch": 402.6686567164179, "grad_norm": 28.384923934936523, "learning_rate": 9.938208616780047e-06, "loss": 31.3353, "step": 16912 }, { "epoch": 402.6925373134328, "grad_norm": 25.4388370513916, "learning_rate": 9.93764172335601e-06, "loss": 30.8237, "step": 16913 }, { "epoch": 402.7164179104478, "grad_norm": 27.198352813720703, "learning_rate": 9.937074829931974e-06, "loss": 31.0413, "step": 16914 }, { "epoch": 402.7402985074627, "grad_norm": 25.418916702270508, "learning_rate": 9.936507936507937e-06, "loss": 30.7082, "step": 16915 }, { "epoch": 402.7641791044776, "grad_norm": 28.595592498779297, "learning_rate": 9.9359410430839e-06, "loss": 31.3851, "step": 16916 }, { "epoch": 402.78805970149256, "grad_norm": 24.97838020324707, "learning_rate": 9.935374149659865e-06, "loss": 31.1574, "step": 16917 }, { "epoch": 402.81194029850747, "grad_norm": 27.653484344482422, "learning_rate": 9.93480725623583e-06, "loss": 31.1965, "step": 16918 }, { "epoch": 402.8358208955224, "grad_norm": 23.469223022460938, "learning_rate": 9.934240362811792e-06, "loss": 31.0656, "step": 16919 }, { "epoch": 402.85970149253734, "grad_norm": 27.161710739135742, "learning_rate": 9.933673469387755e-06, "loss": 31.3268, "step": 16920 }, { "epoch": 402.88358208955225, "grad_norm": 23.48308753967285, "learning_rate": 9.93310657596372e-06, "loss": 31.0566, "step": 16921 }, { "epoch": 402.90746268656716, "grad_norm": 23.823659896850586, "learning_rate": 9.932539682539684e-06, "loss": 30.3514, "step": 16922 }, { "epoch": 402.93134328358207, "grad_norm": 20.73362159729004, "learning_rate": 9.931972789115647e-06, "loss": 29.3461, "step": 16923 }, { "epoch": 402.95522388059703, "grad_norm": 21.970233917236328, "learning_rate": 9.93140589569161e-06, "loss": 31.0073, "step": 16924 }, { "epoch": 402.97910447761194, "grad_norm": NaN, "learning_rate": 9.930839002267575e-06, "loss": 51.1836, "step": 16925 }, { "epoch": 403.0, "grad_norm": 18.906526565551758, "learning_rate": 9.930839002267575e-06, "loss": 27.3897, "step": 16926 }, { "epoch": 403.0238805970149, "grad_norm": 22.018394470214844, "learning_rate": 9.930272108843538e-06, "loss": 29.3437, "step": 16927 }, { "epoch": 403.0477611940299, "grad_norm": 21.189476013183594, "learning_rate": 9.929705215419502e-06, "loss": 31.4675, "step": 16928 }, { "epoch": 403.0716417910448, "grad_norm": 22.949079513549805, "learning_rate": 9.929138321995465e-06, "loss": 31.5757, "step": 16929 }, { "epoch": 403.0955223880597, "grad_norm": 20.074411392211914, "learning_rate": 9.92857142857143e-06, "loss": 30.3693, "step": 16930 }, { "epoch": 403.1194029850746, "grad_norm": 22.19842529296875, "learning_rate": 9.928004535147393e-06, "loss": 31.1422, "step": 16931 }, { "epoch": 403.14328358208957, "grad_norm": 19.386714935302734, "learning_rate": 9.927437641723356e-06, "loss": 29.6273, "step": 16932 }, { "epoch": 403.1671641791045, "grad_norm": 22.950700759887695, "learning_rate": 9.92687074829932e-06, "loss": 30.1628, "step": 16933 }, { "epoch": 403.1910447761194, "grad_norm": 18.49382209777832, "learning_rate": 9.926303854875285e-06, "loss": 31.9745, "step": 16934 }, { "epoch": 403.21492537313435, "grad_norm": 25.086666107177734, "learning_rate": 9.925736961451248e-06, "loss": 30.9489, "step": 16935 }, { "epoch": 403.23880597014926, "grad_norm": 21.343698501586914, "learning_rate": 9.92517006802721e-06, "loss": 31.0264, "step": 16936 }, { "epoch": 403.26268656716417, "grad_norm": 23.167051315307617, "learning_rate": 9.924603174603175e-06, "loss": 30.348, "step": 16937 }, { "epoch": 403.28656716417913, "grad_norm": 21.987293243408203, "learning_rate": 9.92403628117914e-06, "loss": 32.2085, "step": 16938 }, { "epoch": 403.31044776119404, "grad_norm": 21.484840393066406, "learning_rate": 9.923469387755103e-06, "loss": 32.1084, "step": 16939 }, { "epoch": 403.33432835820895, "grad_norm": 22.352279663085938, "learning_rate": 9.922902494331067e-06, "loss": 31.2459, "step": 16940 }, { "epoch": 403.35820895522386, "grad_norm": 18.30046844482422, "learning_rate": 9.92233560090703e-06, "loss": 30.7262, "step": 16941 }, { "epoch": 403.3820895522388, "grad_norm": 21.983285903930664, "learning_rate": 9.921768707482993e-06, "loss": 31.6438, "step": 16942 }, { "epoch": 403.40597014925373, "grad_norm": 18.815380096435547, "learning_rate": 9.921201814058958e-06, "loss": 30.3058, "step": 16943 }, { "epoch": 403.42985074626864, "grad_norm": 17.1711368560791, "learning_rate": 9.920634920634922e-06, "loss": 31.1327, "step": 16944 }, { "epoch": 403.4537313432836, "grad_norm": 18.554462432861328, "learning_rate": 9.920068027210885e-06, "loss": 30.6388, "step": 16945 }, { "epoch": 403.4776119402985, "grad_norm": 17.991119384765625, "learning_rate": 9.919501133786848e-06, "loss": 31.1263, "step": 16946 }, { "epoch": 403.5014925373134, "grad_norm": 17.035293579101562, "learning_rate": 9.918934240362813e-06, "loss": 30.9584, "step": 16947 }, { "epoch": 403.52537313432833, "grad_norm": 17.614078521728516, "learning_rate": 9.918367346938776e-06, "loss": 31.2794, "step": 16948 }, { "epoch": 403.5492537313433, "grad_norm": 19.074827194213867, "learning_rate": 9.91780045351474e-06, "loss": 31.2004, "step": 16949 }, { "epoch": 403.5731343283582, "grad_norm": 20.858890533447266, "learning_rate": 9.917233560090703e-06, "loss": 31.1087, "step": 16950 }, { "epoch": 403.5970149253731, "grad_norm": 17.543880462646484, "learning_rate": 9.916666666666668e-06, "loss": 31.233, "step": 16951 }, { "epoch": 403.6208955223881, "grad_norm": 16.920913696289062, "learning_rate": 9.91609977324263e-06, "loss": 31.0293, "step": 16952 }, { "epoch": 403.644776119403, "grad_norm": 19.769798278808594, "learning_rate": 9.915532879818595e-06, "loss": 31.1607, "step": 16953 }, { "epoch": 403.6686567164179, "grad_norm": 20.002456665039062, "learning_rate": 9.91496598639456e-06, "loss": 30.9741, "step": 16954 }, { "epoch": 403.6925373134328, "grad_norm": 17.78573989868164, "learning_rate": 9.914399092970523e-06, "loss": 29.1995, "step": 16955 }, { "epoch": 403.7164179104478, "grad_norm": 15.988506317138672, "learning_rate": 9.913832199546486e-06, "loss": 30.9922, "step": 16956 }, { "epoch": 403.7402985074627, "grad_norm": 16.265708923339844, "learning_rate": 9.913265306122449e-06, "loss": 32.3465, "step": 16957 }, { "epoch": 403.7641791044776, "grad_norm": 18.048246383666992, "learning_rate": 9.912698412698413e-06, "loss": 31.4117, "step": 16958 }, { "epoch": 403.78805970149256, "grad_norm": 27.613380432128906, "learning_rate": 9.912131519274378e-06, "loss": 30.1975, "step": 16959 }, { "epoch": 403.81194029850747, "grad_norm": 21.412445068359375, "learning_rate": 9.91156462585034e-06, "loss": 31.1824, "step": 16960 }, { "epoch": 403.8358208955224, "grad_norm": 15.627758026123047, "learning_rate": 9.910997732426305e-06, "loss": 29.8711, "step": 16961 }, { "epoch": 403.85970149253734, "grad_norm": 17.799707412719727, "learning_rate": 9.910430839002268e-06, "loss": 31.122, "step": 16962 }, { "epoch": 403.88358208955225, "grad_norm": 24.470787048339844, "learning_rate": 9.909863945578233e-06, "loss": 30.9715, "step": 16963 }, { "epoch": 403.90746268656716, "grad_norm": 23.644081115722656, "learning_rate": 9.909297052154196e-06, "loss": 29.733, "step": 16964 }, { "epoch": 403.93134328358207, "grad_norm": 17.018402099609375, "learning_rate": 9.90873015873016e-06, "loss": 31.1744, "step": 16965 }, { "epoch": 403.95522388059703, "grad_norm": 17.959978103637695, "learning_rate": 9.908163265306123e-06, "loss": 31.198, "step": 16966 }, { "epoch": 403.97910447761194, "grad_norm": 15.238065719604492, "learning_rate": 9.907596371882086e-06, "loss": 31.2774, "step": 16967 }, { "epoch": 404.0, "grad_norm": NaN, "learning_rate": 9.90702947845805e-06, "loss": 27.5237, "step": 16968 }, { "epoch": 404.0238805970149, "grad_norm": 21.82721710205078, "learning_rate": 9.90702947845805e-06, "loss": 31.1855, "step": 16969 }, { "epoch": 404.0477611940299, "grad_norm": 22.108537673950195, "learning_rate": 9.906462585034015e-06, "loss": 29.9266, "step": 16970 }, { "epoch": 404.0716417910448, "grad_norm": 18.823047637939453, "learning_rate": 9.905895691609978e-06, "loss": 31.2415, "step": 16971 }, { "epoch": 404.0955223880597, "grad_norm": 19.243526458740234, "learning_rate": 9.905328798185941e-06, "loss": 29.7391, "step": 16972 }, { "epoch": 404.1194029850746, "grad_norm": 18.996889114379883, "learning_rate": 9.904761904761906e-06, "loss": 31.3894, "step": 16973 }, { "epoch": 404.14328358208957, "grad_norm": NaN, "learning_rate": 9.904195011337869e-06, "loss": 31.1024, "step": 16974 }, { "epoch": 404.1671641791045, "grad_norm": 21.685443878173828, "learning_rate": 9.904195011337869e-06, "loss": 31.273, "step": 16975 }, { "epoch": 404.1910447761194, "grad_norm": 17.583675384521484, "learning_rate": 9.903628117913833e-06, "loss": 31.0446, "step": 16976 }, { "epoch": 404.21492537313435, "grad_norm": 22.83050537109375, "learning_rate": 9.903061224489798e-06, "loss": 30.3282, "step": 16977 }, { "epoch": 404.23880597014926, "grad_norm": NaN, "learning_rate": 9.90249433106576e-06, "loss": 42.5643, "step": 16978 }, { "epoch": 404.26268656716417, "grad_norm": 25.750978469848633, "learning_rate": 9.90249433106576e-06, "loss": 31.4035, "step": 16979 }, { "epoch": 404.28656716417913, "grad_norm": 18.988866806030273, "learning_rate": 9.901927437641724e-06, "loss": 31.6592, "step": 16980 }, { "epoch": 404.31044776119404, "grad_norm": 17.866527557373047, "learning_rate": 9.901360544217688e-06, "loss": 29.8526, "step": 16981 }, { "epoch": 404.33432835820895, "grad_norm": 17.667320251464844, "learning_rate": 9.900793650793653e-06, "loss": 30.7798, "step": 16982 }, { "epoch": 404.35820895522386, "grad_norm": 21.57267951965332, "learning_rate": 9.900226757369616e-06, "loss": 31.132, "step": 16983 }, { "epoch": 404.3820895522388, "grad_norm": 21.000722885131836, "learning_rate": 9.899659863945579e-06, "loss": 31.3184, "step": 16984 }, { "epoch": 404.40597014925373, "grad_norm": 17.419523239135742, "learning_rate": 9.899092970521543e-06, "loss": 31.3596, "step": 16985 }, { "epoch": 404.42985074626864, "grad_norm": 18.504884719848633, "learning_rate": 9.898526077097506e-06, "loss": 30.8438, "step": 16986 }, { "epoch": 404.4537313432836, "grad_norm": 18.881832122802734, "learning_rate": 9.89795918367347e-06, "loss": 30.2917, "step": 16987 }, { "epoch": 404.4776119402985, "grad_norm": 14.251657485961914, "learning_rate": 9.897392290249433e-06, "loss": 30.8155, "step": 16988 }, { "epoch": 404.5014925373134, "grad_norm": 19.814865112304688, "learning_rate": 9.896825396825398e-06, "loss": 31.6332, "step": 16989 }, { "epoch": 404.52537313432833, "grad_norm": 19.100130081176758, "learning_rate": 9.896258503401361e-06, "loss": 30.5632, "step": 16990 }, { "epoch": 404.5492537313433, "grad_norm": 22.569988250732422, "learning_rate": 9.895691609977326e-06, "loss": 30.6678, "step": 16991 }, { "epoch": 404.5731343283582, "grad_norm": 22.602933883666992, "learning_rate": 9.895124716553288e-06, "loss": 31.5504, "step": 16992 }, { "epoch": 404.5970149253731, "grad_norm": 18.80979347229004, "learning_rate": 9.894557823129253e-06, "loss": 31.4335, "step": 16993 }, { "epoch": 404.6208955223881, "grad_norm": 17.524200439453125, "learning_rate": 9.893990929705216e-06, "loss": 31.1216, "step": 16994 }, { "epoch": 404.644776119403, "grad_norm": 16.63074493408203, "learning_rate": 9.893424036281179e-06, "loss": 30.694, "step": 16995 }, { "epoch": 404.6686567164179, "grad_norm": 17.04694366455078, "learning_rate": 9.892857142857143e-06, "loss": 31.897, "step": 16996 }, { "epoch": 404.6925373134328, "grad_norm": 16.07754135131836, "learning_rate": 9.892290249433108e-06, "loss": 31.4077, "step": 16997 }, { "epoch": 404.7164179104478, "grad_norm": 17.20372200012207, "learning_rate": 9.891723356009071e-06, "loss": 30.5793, "step": 16998 }, { "epoch": 404.7402985074627, "grad_norm": 18.453689575195312, "learning_rate": 9.891156462585036e-06, "loss": 30.3079, "step": 16999 }, { "epoch": 404.7641791044776, "grad_norm": 17.784910202026367, "learning_rate": 9.890589569160998e-06, "loss": 30.5712, "step": 17000 }, { "epoch": 404.78805970149256, "grad_norm": 19.61137580871582, "learning_rate": 9.890022675736961e-06, "loss": 30.6252, "step": 17001 }, { "epoch": 404.81194029850747, "grad_norm": 19.092275619506836, "learning_rate": 9.889455782312926e-06, "loss": 30.228, "step": 17002 }, { "epoch": 404.8358208955224, "grad_norm": 22.4578857421875, "learning_rate": 9.88888888888889e-06, "loss": 29.8657, "step": 17003 }, { "epoch": 404.85970149253734, "grad_norm": 21.06572723388672, "learning_rate": 9.888321995464853e-06, "loss": 30.2763, "step": 17004 }, { "epoch": 404.88358208955225, "grad_norm": 22.17323112487793, "learning_rate": 9.887755102040816e-06, "loss": 31.2704, "step": 17005 }, { "epoch": 404.90746268656716, "grad_norm": 18.96662139892578, "learning_rate": 9.887188208616781e-06, "loss": 30.9814, "step": 17006 }, { "epoch": 404.93134328358207, "grad_norm": 21.42250633239746, "learning_rate": 9.886621315192746e-06, "loss": 31.4689, "step": 17007 }, { "epoch": 404.95522388059703, "grad_norm": 23.838497161865234, "learning_rate": 9.886054421768708e-06, "loss": 31.8555, "step": 17008 }, { "epoch": 404.97910447761194, "grad_norm": 19.52179718017578, "learning_rate": 9.885487528344671e-06, "loss": 31.0752, "step": 17009 }, { "epoch": 405.0, "grad_norm": 17.59729766845703, "learning_rate": 9.884920634920636e-06, "loss": 26.1024, "step": 17010 }, { "epoch": 405.0238805970149, "grad_norm": 18.205644607543945, "learning_rate": 9.884353741496599e-06, "loss": 30.9611, "step": 17011 }, { "epoch": 405.0477611940299, "grad_norm": NaN, "learning_rate": 9.883786848072563e-06, "loss": 45.3269, "step": 17012 }, { "epoch": 405.0716417910448, "grad_norm": 19.729263305664062, "learning_rate": 9.883786848072563e-06, "loss": 31.0441, "step": 17013 }, { "epoch": 405.0955223880597, "grad_norm": 19.006351470947266, "learning_rate": 9.883219954648526e-06, "loss": 31.161, "step": 17014 }, { "epoch": 405.1194029850746, "grad_norm": 18.71268653869629, "learning_rate": 9.882653061224491e-06, "loss": 32.3478, "step": 17015 }, { "epoch": 405.14328358208957, "grad_norm": 18.07982063293457, "learning_rate": 9.882086167800454e-06, "loss": 32.0008, "step": 17016 }, { "epoch": 405.1671641791045, "grad_norm": 23.311058044433594, "learning_rate": 9.881519274376418e-06, "loss": 29.8327, "step": 17017 }, { "epoch": 405.1910447761194, "grad_norm": 19.43598175048828, "learning_rate": 9.880952380952381e-06, "loss": 30.2444, "step": 17018 }, { "epoch": 405.21492537313435, "grad_norm": 20.689617156982422, "learning_rate": 9.880385487528346e-06, "loss": 30.5942, "step": 17019 }, { "epoch": 405.23880597014926, "grad_norm": 16.17937469482422, "learning_rate": 9.879818594104309e-06, "loss": 30.325, "step": 17020 }, { "epoch": 405.26268656716417, "grad_norm": 21.156057357788086, "learning_rate": 9.879251700680272e-06, "loss": 32.2917, "step": 17021 }, { "epoch": 405.28656716417913, "grad_norm": 22.3360595703125, "learning_rate": 9.878684807256236e-06, "loss": 31.0669, "step": 17022 }, { "epoch": 405.31044776119404, "grad_norm": 19.197404861450195, "learning_rate": 9.878117913832201e-06, "loss": 31.7879, "step": 17023 }, { "epoch": 405.33432835820895, "grad_norm": 19.768911361694336, "learning_rate": 9.877551020408164e-06, "loss": 30.5908, "step": 17024 }, { "epoch": 405.35820895522386, "grad_norm": 26.982616424560547, "learning_rate": 9.876984126984128e-06, "loss": 31.0877, "step": 17025 }, { "epoch": 405.3820895522388, "grad_norm": 19.847026824951172, "learning_rate": 9.876417233560091e-06, "loss": 30.4891, "step": 17026 }, { "epoch": 405.40597014925373, "grad_norm": 15.999998092651367, "learning_rate": 9.875850340136054e-06, "loss": 31.2124, "step": 17027 }, { "epoch": 405.42985074626864, "grad_norm": 15.895896911621094, "learning_rate": 9.875283446712019e-06, "loss": 30.2103, "step": 17028 }, { "epoch": 405.4537313432836, "grad_norm": 15.329496383666992, "learning_rate": 9.874716553287983e-06, "loss": 30.0036, "step": 17029 }, { "epoch": 405.4776119402985, "grad_norm": 16.12322235107422, "learning_rate": 9.874149659863946e-06, "loss": 29.7037, "step": 17030 }, { "epoch": 405.5014925373134, "grad_norm": 16.98004722595215, "learning_rate": 9.87358276643991e-06, "loss": 30.9011, "step": 17031 }, { "epoch": 405.52537313432833, "grad_norm": 19.170299530029297, "learning_rate": 9.873015873015874e-06, "loss": 30.192, "step": 17032 }, { "epoch": 405.5492537313433, "grad_norm": 17.483421325683594, "learning_rate": 9.872448979591838e-06, "loss": 30.5255, "step": 17033 }, { "epoch": 405.5731343283582, "grad_norm": 20.62899398803711, "learning_rate": 9.871882086167801e-06, "loss": 30.8915, "step": 17034 }, { "epoch": 405.5970149253731, "grad_norm": 17.30540657043457, "learning_rate": 9.871315192743764e-06, "loss": 31.0716, "step": 17035 }, { "epoch": 405.6208955223881, "grad_norm": 18.22004508972168, "learning_rate": 9.870748299319729e-06, "loss": 31.9494, "step": 17036 }, { "epoch": 405.644776119403, "grad_norm": 16.671100616455078, "learning_rate": 9.870181405895692e-06, "loss": 30.6172, "step": 17037 }, { "epoch": 405.6686567164179, "grad_norm": NaN, "learning_rate": 9.869614512471656e-06, "loss": 45.332, "step": 17038 }, { "epoch": 405.6925373134328, "grad_norm": 17.983890533447266, "learning_rate": 9.869614512471656e-06, "loss": 30.4078, "step": 17039 }, { "epoch": 405.7164179104478, "grad_norm": 21.861013412475586, "learning_rate": 9.869047619047621e-06, "loss": 30.7618, "step": 17040 }, { "epoch": 405.7402985074627, "grad_norm": 22.274782180786133, "learning_rate": 9.868480725623584e-06, "loss": 31.8571, "step": 17041 }, { "epoch": 405.7641791044776, "grad_norm": 15.382903099060059, "learning_rate": 9.867913832199547e-06, "loss": 31.0003, "step": 17042 }, { "epoch": 405.78805970149256, "grad_norm": 24.925888061523438, "learning_rate": 9.867346938775511e-06, "loss": 31.1132, "step": 17043 }, { "epoch": 405.81194029850747, "grad_norm": 21.787288665771484, "learning_rate": 9.866780045351474e-06, "loss": 31.2963, "step": 17044 }, { "epoch": 405.8358208955224, "grad_norm": 18.153474807739258, "learning_rate": 9.866213151927439e-06, "loss": 30.8227, "step": 17045 }, { "epoch": 405.85970149253734, "grad_norm": 25.045469284057617, "learning_rate": 9.865646258503402e-06, "loss": 30.42, "step": 17046 }, { "epoch": 405.88358208955225, "grad_norm": 23.71382713317871, "learning_rate": 9.865079365079366e-06, "loss": 31.5095, "step": 17047 }, { "epoch": 405.90746268656716, "grad_norm": 19.81368637084961, "learning_rate": 9.86451247165533e-06, "loss": 31.6538, "step": 17048 }, { "epoch": 405.93134328358207, "grad_norm": 19.46569061279297, "learning_rate": 9.863945578231294e-06, "loss": 31.2084, "step": 17049 }, { "epoch": 405.95522388059703, "grad_norm": 16.42058563232422, "learning_rate": 9.863378684807257e-06, "loss": 30.8875, "step": 17050 }, { "epoch": 405.97910447761194, "grad_norm": 23.119794845581055, "learning_rate": 9.862811791383221e-06, "loss": 30.2666, "step": 17051 }, { "epoch": 406.0, "grad_norm": 15.268242835998535, "learning_rate": 9.862244897959184e-06, "loss": 26.7074, "step": 17052 }, { "epoch": 406.0238805970149, "grad_norm": 22.865819931030273, "learning_rate": 9.861678004535147e-06, "loss": 30.5228, "step": 17053 }, { "epoch": 406.0477611940299, "grad_norm": 17.812450408935547, "learning_rate": 9.861111111111112e-06, "loss": 30.5346, "step": 17054 }, { "epoch": 406.0716417910448, "grad_norm": 25.216026306152344, "learning_rate": 9.860544217687076e-06, "loss": 30.5902, "step": 17055 }, { "epoch": 406.0955223880597, "grad_norm": 20.24772834777832, "learning_rate": 9.85997732426304e-06, "loss": 30.2208, "step": 17056 }, { "epoch": 406.1194029850746, "grad_norm": NaN, "learning_rate": 9.859410430839002e-06, "loss": 52.1957, "step": 17057 }, { "epoch": 406.14328358208957, "grad_norm": 19.716794967651367, "learning_rate": 9.859410430839002e-06, "loss": 30.7717, "step": 17058 }, { "epoch": 406.1671641791045, "grad_norm": 18.68526268005371, "learning_rate": 9.858843537414967e-06, "loss": 30.2788, "step": 17059 }, { "epoch": 406.1910447761194, "grad_norm": 19.732078552246094, "learning_rate": 9.858276643990931e-06, "loss": 31.6837, "step": 17060 }, { "epoch": 406.21492537313435, "grad_norm": 21.107961654663086, "learning_rate": 9.857709750566894e-06, "loss": 30.4119, "step": 17061 }, { "epoch": 406.23880597014926, "grad_norm": 18.77997398376465, "learning_rate": 9.857142857142859e-06, "loss": 30.8591, "step": 17062 }, { "epoch": 406.26268656716417, "grad_norm": 17.919878005981445, "learning_rate": 9.856575963718822e-06, "loss": 32.7938, "step": 17063 }, { "epoch": 406.28656716417913, "grad_norm": 21.54792594909668, "learning_rate": 9.856009070294785e-06, "loss": 30.6374, "step": 17064 }, { "epoch": 406.31044776119404, "grad_norm": 17.233793258666992, "learning_rate": 9.85544217687075e-06, "loss": 31.1178, "step": 17065 }, { "epoch": 406.33432835820895, "grad_norm": 20.764001846313477, "learning_rate": 9.854875283446714e-06, "loss": 31.244, "step": 17066 }, { "epoch": 406.35820895522386, "grad_norm": 16.163888931274414, "learning_rate": 9.854308390022677e-06, "loss": 30.951, "step": 17067 }, { "epoch": 406.3820895522388, "grad_norm": 20.328454971313477, "learning_rate": 9.85374149659864e-06, "loss": 31.2245, "step": 17068 }, { "epoch": 406.40597014925373, "grad_norm": 19.258499145507812, "learning_rate": 9.853174603174604e-06, "loss": 30.5636, "step": 17069 }, { "epoch": 406.42985074626864, "grad_norm": 19.76752281188965, "learning_rate": 9.852607709750567e-06, "loss": 28.7173, "step": 17070 }, { "epoch": 406.4537313432836, "grad_norm": 24.23910903930664, "learning_rate": 9.852040816326532e-06, "loss": 32.9057, "step": 17071 }, { "epoch": 406.4776119402985, "grad_norm": 20.138818740844727, "learning_rate": 9.851473922902495e-06, "loss": 31.6671, "step": 17072 }, { "epoch": 406.5014925373134, "grad_norm": 18.762805938720703, "learning_rate": 9.85090702947846e-06, "loss": 31.1464, "step": 17073 }, { "epoch": 406.52537313432833, "grad_norm": 16.108400344848633, "learning_rate": 9.850340136054422e-06, "loss": 30.4823, "step": 17074 }, { "epoch": 406.5492537313433, "grad_norm": 20.8375244140625, "learning_rate": 9.849773242630387e-06, "loss": 30.6664, "step": 17075 }, { "epoch": 406.5731343283582, "grad_norm": 26.487838745117188, "learning_rate": 9.849206349206351e-06, "loss": 30.1852, "step": 17076 }, { "epoch": 406.5970149253731, "grad_norm": 18.415733337402344, "learning_rate": 9.848639455782314e-06, "loss": 30.3024, "step": 17077 }, { "epoch": 406.6208955223881, "grad_norm": 21.358409881591797, "learning_rate": 9.848072562358277e-06, "loss": 30.8718, "step": 17078 }, { "epoch": 406.644776119403, "grad_norm": 21.802520751953125, "learning_rate": 9.84750566893424e-06, "loss": 29.3102, "step": 17079 }, { "epoch": 406.6686567164179, "grad_norm": 22.83088493347168, "learning_rate": 9.846938775510205e-06, "loss": 31.9597, "step": 17080 }, { "epoch": 406.6925373134328, "grad_norm": 17.50154685974121, "learning_rate": 9.84637188208617e-06, "loss": 32.1017, "step": 17081 }, { "epoch": 406.7164179104478, "grad_norm": 20.930034637451172, "learning_rate": 9.845804988662132e-06, "loss": 31.1494, "step": 17082 }, { "epoch": 406.7402985074627, "grad_norm": 24.347320556640625, "learning_rate": 9.845238095238097e-06, "loss": 31.3371, "step": 17083 }, { "epoch": 406.7641791044776, "grad_norm": 20.09071922302246, "learning_rate": 9.84467120181406e-06, "loss": 30.4365, "step": 17084 }, { "epoch": 406.78805970149256, "grad_norm": 18.996219635009766, "learning_rate": 9.844104308390024e-06, "loss": 31.5441, "step": 17085 }, { "epoch": 406.81194029850747, "grad_norm": 18.961217880249023, "learning_rate": 9.843537414965987e-06, "loss": 30.1595, "step": 17086 }, { "epoch": 406.8358208955224, "grad_norm": 20.991409301757812, "learning_rate": 9.842970521541952e-06, "loss": 31.6773, "step": 17087 }, { "epoch": 406.85970149253734, "grad_norm": 17.288860321044922, "learning_rate": 9.842403628117915e-06, "loss": 30.7588, "step": 17088 }, { "epoch": 406.88358208955225, "grad_norm": 18.68027114868164, "learning_rate": 9.841836734693878e-06, "loss": 30.3646, "step": 17089 }, { "epoch": 406.90746268656716, "grad_norm": 18.632766723632812, "learning_rate": 9.841269841269842e-06, "loss": 31.178, "step": 17090 }, { "epoch": 406.93134328358207, "grad_norm": 20.20024299621582, "learning_rate": 9.840702947845807e-06, "loss": 31.5622, "step": 17091 }, { "epoch": 406.95522388059703, "grad_norm": 19.179670333862305, "learning_rate": 9.84013605442177e-06, "loss": 31.0176, "step": 17092 }, { "epoch": 406.97910447761194, "grad_norm": 23.177379608154297, "learning_rate": 9.839569160997733e-06, "loss": 30.1033, "step": 17093 }, { "epoch": 407.0, "grad_norm": 18.88588523864746, "learning_rate": 9.839002267573697e-06, "loss": 26.1528, "step": 17094 }, { "epoch": 407.0238805970149, "grad_norm": 22.23098373413086, "learning_rate": 9.83843537414966e-06, "loss": 30.0929, "step": 17095 }, { "epoch": 407.0477611940299, "grad_norm": 18.42059326171875, "learning_rate": 9.837868480725625e-06, "loss": 30.8388, "step": 17096 }, { "epoch": 407.0716417910448, "grad_norm": 20.517749786376953, "learning_rate": 9.837301587301588e-06, "loss": 30.4366, "step": 17097 }, { "epoch": 407.0955223880597, "grad_norm": 20.482519149780273, "learning_rate": 9.836734693877552e-06, "loss": 30.1441, "step": 17098 }, { "epoch": 407.1194029850746, "grad_norm": 19.885637283325195, "learning_rate": 9.836167800453515e-06, "loss": 31.2451, "step": 17099 }, { "epoch": 407.14328358208957, "grad_norm": 17.006126403808594, "learning_rate": 9.83560090702948e-06, "loss": 30.3939, "step": 17100 }, { "epoch": 407.1671641791045, "grad_norm": 18.33284568786621, "learning_rate": 9.835034013605444e-06, "loss": 29.9203, "step": 17101 }, { "epoch": 407.1910447761194, "grad_norm": 23.153682708740234, "learning_rate": 9.834467120181407e-06, "loss": 30.9469, "step": 17102 }, { "epoch": 407.21492537313435, "grad_norm": 20.087182998657227, "learning_rate": 9.83390022675737e-06, "loss": 30.6324, "step": 17103 }, { "epoch": 407.23880597014926, "grad_norm": 19.266353607177734, "learning_rate": 9.833333333333333e-06, "loss": 29.798, "step": 17104 }, { "epoch": 407.26268656716417, "grad_norm": 16.637983322143555, "learning_rate": 9.832766439909298e-06, "loss": 31.0228, "step": 17105 }, { "epoch": 407.28656716417913, "grad_norm": 20.662580490112305, "learning_rate": 9.832199546485262e-06, "loss": 31.0412, "step": 17106 }, { "epoch": 407.31044776119404, "grad_norm": 24.179311752319336, "learning_rate": 9.831632653061225e-06, "loss": 31.3721, "step": 17107 }, { "epoch": 407.33432835820895, "grad_norm": 18.55788803100586, "learning_rate": 9.83106575963719e-06, "loss": 30.7381, "step": 17108 }, { "epoch": 407.35820895522386, "grad_norm": 20.20111083984375, "learning_rate": 9.830498866213153e-06, "loss": 30.8676, "step": 17109 }, { "epoch": 407.3820895522388, "grad_norm": 17.19261932373047, "learning_rate": 9.829931972789115e-06, "loss": 31.3582, "step": 17110 }, { "epoch": 407.40597014925373, "grad_norm": 19.75364875793457, "learning_rate": 9.82936507936508e-06, "loss": 29.8319, "step": 17111 }, { "epoch": 407.42985074626864, "grad_norm": 19.894805908203125, "learning_rate": 9.828798185941045e-06, "loss": 31.0655, "step": 17112 }, { "epoch": 407.4537313432836, "grad_norm": 20.540538787841797, "learning_rate": 9.828231292517008e-06, "loss": 31.0926, "step": 17113 }, { "epoch": 407.4776119402985, "grad_norm": 18.901662826538086, "learning_rate": 9.82766439909297e-06, "loss": 30.6047, "step": 17114 }, { "epoch": 407.5014925373134, "grad_norm": 18.279712677001953, "learning_rate": 9.827097505668935e-06, "loss": 30.435, "step": 17115 }, { "epoch": 407.52537313432833, "grad_norm": 19.222557067871094, "learning_rate": 9.8265306122449e-06, "loss": 31.2252, "step": 17116 }, { "epoch": 407.5492537313433, "grad_norm": 20.993911743164062, "learning_rate": 9.825963718820863e-06, "loss": 31.9963, "step": 17117 }, { "epoch": 407.5731343283582, "grad_norm": 19.337543487548828, "learning_rate": 9.825396825396825e-06, "loss": 29.7285, "step": 17118 }, { "epoch": 407.5970149253731, "grad_norm": 22.092134475708008, "learning_rate": 9.82482993197279e-06, "loss": 30.4033, "step": 17119 }, { "epoch": 407.6208955223881, "grad_norm": 16.811466217041016, "learning_rate": 9.824263038548753e-06, "loss": 30.9516, "step": 17120 }, { "epoch": 407.644776119403, "grad_norm": 23.478229522705078, "learning_rate": 9.823696145124718e-06, "loss": 30.6678, "step": 17121 }, { "epoch": 407.6686567164179, "grad_norm": 22.2537841796875, "learning_rate": 9.823129251700682e-06, "loss": 31.1874, "step": 17122 }, { "epoch": 407.6925373134328, "grad_norm": 24.31056022644043, "learning_rate": 9.822562358276645e-06, "loss": 29.8586, "step": 17123 }, { "epoch": 407.7164179104478, "grad_norm": 20.723657608032227, "learning_rate": 9.821995464852608e-06, "loss": 30.6623, "step": 17124 }, { "epoch": 407.7402985074627, "grad_norm": 17.87926483154297, "learning_rate": 9.821428571428573e-06, "loss": 30.9176, "step": 17125 }, { "epoch": 407.7641791044776, "grad_norm": 20.93740463256836, "learning_rate": 9.820861678004537e-06, "loss": 30.8719, "step": 17126 }, { "epoch": 407.78805970149256, "grad_norm": 22.472055435180664, "learning_rate": 9.8202947845805e-06, "loss": 32.0218, "step": 17127 }, { "epoch": 407.81194029850747, "grad_norm": 18.69917106628418, "learning_rate": 9.819727891156463e-06, "loss": 31.4956, "step": 17128 }, { "epoch": 407.8358208955224, "grad_norm": 20.267702102661133, "learning_rate": 9.819160997732428e-06, "loss": 31.8654, "step": 17129 }, { "epoch": 407.85970149253734, "grad_norm": 16.86379051208496, "learning_rate": 9.81859410430839e-06, "loss": 32.0695, "step": 17130 }, { "epoch": 407.88358208955225, "grad_norm": 18.317886352539062, "learning_rate": 9.818027210884355e-06, "loss": 30.0415, "step": 17131 }, { "epoch": 407.90746268656716, "grad_norm": 18.96920394897461, "learning_rate": 9.817460317460318e-06, "loss": 30.5568, "step": 17132 }, { "epoch": 407.93134328358207, "grad_norm": 18.7996883392334, "learning_rate": 9.816893424036282e-06, "loss": 30.9763, "step": 17133 }, { "epoch": 407.95522388059703, "grad_norm": 21.947856903076172, "learning_rate": 9.816326530612245e-06, "loss": 31.0727, "step": 17134 }, { "epoch": 407.97910447761194, "grad_norm": 19.53034210205078, "learning_rate": 9.815759637188208e-06, "loss": 31.3669, "step": 17135 }, { "epoch": 408.0, "grad_norm": 14.76440715789795, "learning_rate": 9.815192743764173e-06, "loss": 27.1142, "step": 17136 }, { "epoch": 408.0238805970149, "grad_norm": 18.433521270751953, "learning_rate": 9.814625850340137e-06, "loss": 30.4153, "step": 17137 }, { "epoch": 408.0477611940299, "grad_norm": 18.82404899597168, "learning_rate": 9.8140589569161e-06, "loss": 30.9344, "step": 17138 }, { "epoch": 408.0716417910448, "grad_norm": 23.097026824951172, "learning_rate": 9.813492063492063e-06, "loss": 30.7174, "step": 17139 }, { "epoch": 408.0955223880597, "grad_norm": 19.073869705200195, "learning_rate": 9.812925170068028e-06, "loss": 30.063, "step": 17140 }, { "epoch": 408.1194029850746, "grad_norm": 17.834747314453125, "learning_rate": 9.812358276643992e-06, "loss": 30.1185, "step": 17141 }, { "epoch": 408.14328358208957, "grad_norm": 17.94736671447754, "learning_rate": 9.811791383219955e-06, "loss": 29.8447, "step": 17142 }, { "epoch": 408.1671641791045, "grad_norm": 28.02310562133789, "learning_rate": 9.81122448979592e-06, "loss": 31.3027, "step": 17143 }, { "epoch": 408.1910447761194, "grad_norm": 18.83740997314453, "learning_rate": 9.810657596371883e-06, "loss": 30.637, "step": 17144 }, { "epoch": 408.21492537313435, "grad_norm": 19.65155029296875, "learning_rate": 9.810090702947846e-06, "loss": 30.2705, "step": 17145 }, { "epoch": 408.23880597014926, "grad_norm": 20.439847946166992, "learning_rate": 9.80952380952381e-06, "loss": 31.1955, "step": 17146 }, { "epoch": 408.26268656716417, "grad_norm": 22.33255386352539, "learning_rate": 9.808956916099775e-06, "loss": 31.1991, "step": 17147 }, { "epoch": 408.28656716417913, "grad_norm": 19.935897827148438, "learning_rate": 9.808390022675738e-06, "loss": 29.7388, "step": 17148 }, { "epoch": 408.31044776119404, "grad_norm": 17.66071891784668, "learning_rate": 9.8078231292517e-06, "loss": 29.9912, "step": 17149 }, { "epoch": 408.33432835820895, "grad_norm": 16.160856246948242, "learning_rate": 9.807256235827665e-06, "loss": 31.067, "step": 17150 }, { "epoch": 408.35820895522386, "grad_norm": 18.06080436706543, "learning_rate": 9.806689342403628e-06, "loss": 31.404, "step": 17151 }, { "epoch": 408.3820895522388, "grad_norm": 19.497299194335938, "learning_rate": 9.806122448979593e-06, "loss": 29.9847, "step": 17152 }, { "epoch": 408.40597014925373, "grad_norm": 25.528413772583008, "learning_rate": 9.805555555555556e-06, "loss": 30.5061, "step": 17153 }, { "epoch": 408.42985074626864, "grad_norm": 19.330944061279297, "learning_rate": 9.80498866213152e-06, "loss": 29.9116, "step": 17154 }, { "epoch": 408.4537313432836, "grad_norm": 17.243127822875977, "learning_rate": 9.804421768707483e-06, "loss": 30.5956, "step": 17155 }, { "epoch": 408.4776119402985, "grad_norm": 25.73492431640625, "learning_rate": 9.803854875283448e-06, "loss": 31.494, "step": 17156 }, { "epoch": 408.5014925373134, "grad_norm": 28.76915740966797, "learning_rate": 9.803287981859412e-06, "loss": 31.9458, "step": 17157 }, { "epoch": 408.52537313432833, "grad_norm": 17.02314567565918, "learning_rate": 9.802721088435375e-06, "loss": 31.0822, "step": 17158 }, { "epoch": 408.5492537313433, "grad_norm": 31.693359375, "learning_rate": 9.802154195011338e-06, "loss": 31.2315, "step": 17159 }, { "epoch": 408.5731343283582, "grad_norm": 22.58902931213379, "learning_rate": 9.801587301587301e-06, "loss": 31.5996, "step": 17160 }, { "epoch": 408.5970149253731, "grad_norm": 25.078304290771484, "learning_rate": 9.801020408163266e-06, "loss": 30.4479, "step": 17161 }, { "epoch": 408.6208955223881, "grad_norm": 32.10065841674805, "learning_rate": 9.80045351473923e-06, "loss": 31.7707, "step": 17162 }, { "epoch": 408.644776119403, "grad_norm": 21.00263786315918, "learning_rate": 9.799886621315193e-06, "loss": 32.3937, "step": 17163 }, { "epoch": 408.6686567164179, "grad_norm": 38.225982666015625, "learning_rate": 9.799319727891158e-06, "loss": 30.241, "step": 17164 }, { "epoch": 408.6925373134328, "grad_norm": 25.415969848632812, "learning_rate": 9.79875283446712e-06, "loss": 30.0834, "step": 17165 }, { "epoch": 408.7164179104478, "grad_norm": 37.347129821777344, "learning_rate": 9.798185941043085e-06, "loss": 31.426, "step": 17166 }, { "epoch": 408.7402985074627, "grad_norm": 23.168502807617188, "learning_rate": 9.797619047619048e-06, "loss": 30.2809, "step": 17167 }, { "epoch": 408.7641791044776, "grad_norm": 39.452091217041016, "learning_rate": 9.797052154195013e-06, "loss": 31.4917, "step": 17168 }, { "epoch": 408.78805970149256, "grad_norm": 28.55925750732422, "learning_rate": 9.796485260770976e-06, "loss": 31.6921, "step": 17169 }, { "epoch": 408.81194029850747, "grad_norm": 47.2205696105957, "learning_rate": 9.795918367346939e-06, "loss": 30.9541, "step": 17170 }, { "epoch": 408.8358208955224, "grad_norm": 38.546993255615234, "learning_rate": 9.795351473922903e-06, "loss": 30.3784, "step": 17171 }, { "epoch": 408.85970149253734, "grad_norm": 40.99946975708008, "learning_rate": 9.794784580498868e-06, "loss": 31.1373, "step": 17172 }, { "epoch": 408.88358208955225, "grad_norm": 37.15016174316406, "learning_rate": 9.79421768707483e-06, "loss": 30.7365, "step": 17173 }, { "epoch": 408.90746268656716, "grad_norm": 36.37284851074219, "learning_rate": 9.793650793650794e-06, "loss": 30.6809, "step": 17174 }, { "epoch": 408.93134328358207, "grad_norm": 36.006309509277344, "learning_rate": 9.793083900226758e-06, "loss": 30.8276, "step": 17175 }, { "epoch": 408.95522388059703, "grad_norm": 35.93528366088867, "learning_rate": 9.792517006802721e-06, "loss": 31.5256, "step": 17176 }, { "epoch": 408.97910447761194, "grad_norm": 25.682090759277344, "learning_rate": 9.791950113378686e-06, "loss": 29.6271, "step": 17177 }, { "epoch": 409.0, "grad_norm": 34.678077697753906, "learning_rate": 9.791383219954649e-06, "loss": 26.7581, "step": 17178 }, { "epoch": 409.0238805970149, "grad_norm": 32.7984504699707, "learning_rate": 9.790816326530613e-06, "loss": 30.9675, "step": 17179 }, { "epoch": 409.0477611940299, "grad_norm": 43.166133880615234, "learning_rate": 9.790249433106576e-06, "loss": 29.4506, "step": 17180 }, { "epoch": 409.0716417910448, "grad_norm": 40.31956481933594, "learning_rate": 9.78968253968254e-06, "loss": 32.4348, "step": 17181 }, { "epoch": 409.0955223880597, "grad_norm": 32.789756774902344, "learning_rate": 9.789115646258505e-06, "loss": 30.5454, "step": 17182 }, { "epoch": 409.1194029850746, "grad_norm": 32.10710525512695, "learning_rate": 9.788548752834468e-06, "loss": 29.9216, "step": 17183 }, { "epoch": 409.14328358208957, "grad_norm": 34.188148498535156, "learning_rate": 9.787981859410431e-06, "loss": 30.9924, "step": 17184 }, { "epoch": 409.1671641791045, "grad_norm": 30.383773803710938, "learning_rate": 9.787414965986394e-06, "loss": 30.9493, "step": 17185 }, { "epoch": 409.1910447761194, "grad_norm": 40.345176696777344, "learning_rate": 9.786848072562359e-06, "loss": 30.5554, "step": 17186 }, { "epoch": 409.21492537313435, "grad_norm": 34.366477966308594, "learning_rate": 9.786281179138323e-06, "loss": 31.3236, "step": 17187 }, { "epoch": 409.23880597014926, "grad_norm": 37.019676208496094, "learning_rate": 9.785714285714286e-06, "loss": 30.2368, "step": 17188 }, { "epoch": 409.26268656716417, "grad_norm": 33.16834259033203, "learning_rate": 9.78514739229025e-06, "loss": 30.4291, "step": 17189 }, { "epoch": 409.28656716417913, "grad_norm": 36.23292922973633, "learning_rate": 9.784580498866214e-06, "loss": 30.3375, "step": 17190 }, { "epoch": 409.31044776119404, "grad_norm": 29.69635772705078, "learning_rate": 9.784013605442178e-06, "loss": 31.311, "step": 17191 }, { "epoch": 409.33432835820895, "grad_norm": 36.75214767456055, "learning_rate": 9.783446712018141e-06, "loss": 31.749, "step": 17192 }, { "epoch": 409.35820895522386, "grad_norm": 34.37923049926758, "learning_rate": 9.782879818594106e-06, "loss": 31.6592, "step": 17193 }, { "epoch": 409.3820895522388, "grad_norm": 36.90751647949219, "learning_rate": 9.782312925170069e-06, "loss": 31.6242, "step": 17194 }, { "epoch": 409.40597014925373, "grad_norm": 33.20652389526367, "learning_rate": 9.781746031746032e-06, "loss": 31.5448, "step": 17195 }, { "epoch": 409.42985074626864, "grad_norm": 33.465938568115234, "learning_rate": 9.781179138321996e-06, "loss": 30.9569, "step": 17196 }, { "epoch": 409.4537313432836, "grad_norm": 33.31385040283203, "learning_rate": 9.78061224489796e-06, "loss": 31.5872, "step": 17197 }, { "epoch": 409.4776119402985, "grad_norm": 34.62633514404297, "learning_rate": 9.780045351473924e-06, "loss": 30.7983, "step": 17198 }, { "epoch": 409.5014925373134, "grad_norm": 31.84848976135254, "learning_rate": 9.779478458049887e-06, "loss": 31.574, "step": 17199 }, { "epoch": 409.52537313432833, "grad_norm": 33.264076232910156, "learning_rate": 9.778911564625851e-06, "loss": 29.8799, "step": 17200 }, { "epoch": 409.5492537313433, "grad_norm": 29.371370315551758, "learning_rate": 9.778344671201814e-06, "loss": 31.3128, "step": 17201 }, { "epoch": 409.5731343283582, "grad_norm": 38.70520782470703, "learning_rate": 9.777777777777779e-06, "loss": 31.7127, "step": 17202 }, { "epoch": 409.5970149253731, "grad_norm": 34.72576141357422, "learning_rate": 9.777210884353743e-06, "loss": 29.2374, "step": 17203 }, { "epoch": 409.6208955223881, "grad_norm": 33.42527770996094, "learning_rate": 9.776643990929706e-06, "loss": 30.6626, "step": 17204 }, { "epoch": 409.644776119403, "grad_norm": 32.23186492919922, "learning_rate": 9.776077097505669e-06, "loss": 30.1451, "step": 17205 }, { "epoch": 409.6686567164179, "grad_norm": 34.285430908203125, "learning_rate": 9.775510204081634e-06, "loss": 30.9031, "step": 17206 }, { "epoch": 409.6925373134328, "grad_norm": 30.343807220458984, "learning_rate": 9.774943310657598e-06, "loss": 30.7695, "step": 17207 }, { "epoch": 409.7164179104478, "grad_norm": 42.4433479309082, "learning_rate": 9.774376417233561e-06, "loss": 31.0233, "step": 17208 }, { "epoch": 409.7402985074627, "grad_norm": 33.58607864379883, "learning_rate": 9.773809523809524e-06, "loss": 30.2251, "step": 17209 }, { "epoch": 409.7641791044776, "grad_norm": 34.856666564941406, "learning_rate": 9.773242630385489e-06, "loss": 30.0397, "step": 17210 }, { "epoch": 409.78805970149256, "grad_norm": 32.365806579589844, "learning_rate": 9.772675736961452e-06, "loss": 29.791, "step": 17211 }, { "epoch": 409.81194029850747, "grad_norm": 31.12361717224121, "learning_rate": 9.772108843537416e-06, "loss": 30.6748, "step": 17212 }, { "epoch": 409.8358208955224, "grad_norm": 27.62432098388672, "learning_rate": 9.771541950113379e-06, "loss": 30.7766, "step": 17213 }, { "epoch": 409.85970149253734, "grad_norm": 36.967857360839844, "learning_rate": 9.770975056689344e-06, "loss": 30.597, "step": 17214 }, { "epoch": 409.88358208955225, "grad_norm": 33.09959411621094, "learning_rate": 9.770408163265307e-06, "loss": 29.5581, "step": 17215 }, { "epoch": 409.90746268656716, "grad_norm": 32.458499908447266, "learning_rate": 9.769841269841271e-06, "loss": 30.0254, "step": 17216 }, { "epoch": 409.93134328358207, "grad_norm": 32.26570510864258, "learning_rate": 9.769274376417234e-06, "loss": 30.1378, "step": 17217 }, { "epoch": 409.95522388059703, "grad_norm": 32.063602447509766, "learning_rate": 9.768707482993199e-06, "loss": 30.5175, "step": 17218 }, { "epoch": 409.97910447761194, "grad_norm": 29.157814025878906, "learning_rate": 9.768140589569162e-06, "loss": 30.7194, "step": 17219 }, { "epoch": 410.0, "grad_norm": 29.813369750976562, "learning_rate": 9.767573696145124e-06, "loss": 27.861, "step": 17220 }, { "epoch": 410.0238805970149, "grad_norm": 31.18349266052246, "learning_rate": 9.767006802721089e-06, "loss": 30.2601, "step": 17221 }, { "epoch": 410.0477611940299, "grad_norm": 33.58921813964844, "learning_rate": 9.766439909297054e-06, "loss": 30.5417, "step": 17222 }, { "epoch": 410.0716417910448, "grad_norm": 31.673707962036133, "learning_rate": 9.765873015873017e-06, "loss": 30.812, "step": 17223 }, { "epoch": 410.0955223880597, "grad_norm": 31.584043502807617, "learning_rate": 9.765306122448981e-06, "loss": 31.2592, "step": 17224 }, { "epoch": 410.1194029850746, "grad_norm": 30.22162628173828, "learning_rate": 9.764739229024944e-06, "loss": 31.577, "step": 17225 }, { "epoch": 410.14328358208957, "grad_norm": 33.900306701660156, "learning_rate": 9.764172335600907e-06, "loss": 29.0864, "step": 17226 }, { "epoch": 410.1671641791045, "grad_norm": 30.085092544555664, "learning_rate": 9.763605442176872e-06, "loss": 30.5391, "step": 17227 }, { "epoch": 410.1910447761194, "grad_norm": 38.079933166503906, "learning_rate": 9.763038548752836e-06, "loss": 31.1666, "step": 17228 }, { "epoch": 410.21492537313435, "grad_norm": 34.481910705566406, "learning_rate": 9.762471655328799e-06, "loss": 31.2171, "step": 17229 }, { "epoch": 410.23880597014926, "grad_norm": 34.52700424194336, "learning_rate": 9.761904761904762e-06, "loss": 30.8267, "step": 17230 }, { "epoch": 410.26268656716417, "grad_norm": 31.537492752075195, "learning_rate": 9.761337868480727e-06, "loss": 30.8598, "step": 17231 }, { "epoch": 410.28656716417913, "grad_norm": 33.93818283081055, "learning_rate": 9.760770975056691e-06, "loss": 30.6294, "step": 17232 }, { "epoch": 410.31044776119404, "grad_norm": 29.543317794799805, "learning_rate": 9.760204081632654e-06, "loss": 30.3695, "step": 17233 }, { "epoch": 410.33432835820895, "grad_norm": 37.45649719238281, "learning_rate": 9.759637188208617e-06, "loss": 31.0027, "step": 17234 }, { "epoch": 410.35820895522386, "grad_norm": NaN, "learning_rate": 9.759070294784582e-06, "loss": 35.4775, "step": 17235 }, { "epoch": 410.3820895522388, "grad_norm": 30.60056495666504, "learning_rate": 9.759070294784582e-06, "loss": 30.9545, "step": 17236 }, { "epoch": 410.40597014925373, "grad_norm": 32.86592102050781, "learning_rate": 9.758503401360544e-06, "loss": 30.482, "step": 17237 }, { "epoch": 410.42985074626864, "grad_norm": 28.09597396850586, "learning_rate": 9.757936507936509e-06, "loss": 30.1862, "step": 17238 }, { "epoch": 410.4537313432836, "grad_norm": 34.24605941772461, "learning_rate": 9.757369614512474e-06, "loss": 29.7122, "step": 17239 }, { "epoch": 410.4776119402985, "grad_norm": 26.68988800048828, "learning_rate": 9.756802721088437e-06, "loss": 29.6089, "step": 17240 }, { "epoch": 410.5014925373134, "grad_norm": 35.57308578491211, "learning_rate": 9.7562358276644e-06, "loss": 32.0771, "step": 17241 }, { "epoch": 410.52537313432833, "grad_norm": 31.854652404785156, "learning_rate": 9.755668934240364e-06, "loss": 30.3433, "step": 17242 }, { "epoch": 410.5492537313433, "grad_norm": 32.33788299560547, "learning_rate": 9.755102040816327e-06, "loss": 29.581, "step": 17243 }, { "epoch": 410.5731343283582, "grad_norm": 32.283477783203125, "learning_rate": 9.754535147392292e-06, "loss": 32.0348, "step": 17244 }, { "epoch": 410.5970149253731, "grad_norm": 29.719423294067383, "learning_rate": 9.753968253968254e-06, "loss": 30.9744, "step": 17245 }, { "epoch": 410.6208955223881, "grad_norm": 28.74759864807129, "learning_rate": 9.753401360544217e-06, "loss": 31.0377, "step": 17246 }, { "epoch": 410.644776119403, "grad_norm": 31.67352867126465, "learning_rate": 9.752834467120182e-06, "loss": 30.4061, "step": 17247 }, { "epoch": 410.6686567164179, "grad_norm": 25.129446029663086, "learning_rate": 9.752267573696147e-06, "loss": 31.7588, "step": 17248 }, { "epoch": 410.6925373134328, "grad_norm": 37.61256408691406, "learning_rate": 9.75170068027211e-06, "loss": 31.9124, "step": 17249 }, { "epoch": 410.7164179104478, "grad_norm": 29.709802627563477, "learning_rate": 9.751133786848074e-06, "loss": 31.7182, "step": 17250 }, { "epoch": 410.7402985074627, "grad_norm": 38.39265823364258, "learning_rate": 9.750566893424037e-06, "loss": 30.8116, "step": 17251 }, { "epoch": 410.7641791044776, "grad_norm": 33.163082122802734, "learning_rate": 9.75e-06, "loss": 29.819, "step": 17252 }, { "epoch": 410.78805970149256, "grad_norm": 31.229955673217773, "learning_rate": 9.749433106575964e-06, "loss": 29.8216, "step": 17253 }, { "epoch": 410.81194029850747, "grad_norm": 30.527732849121094, "learning_rate": 9.748866213151929e-06, "loss": 30.9318, "step": 17254 }, { "epoch": 410.8358208955224, "grad_norm": 31.837890625, "learning_rate": 9.748299319727892e-06, "loss": 31.0031, "step": 17255 }, { "epoch": 410.85970149253734, "grad_norm": 30.608051300048828, "learning_rate": 9.747732426303855e-06, "loss": 30.6614, "step": 17256 }, { "epoch": 410.88358208955225, "grad_norm": NaN, "learning_rate": 9.74716553287982e-06, "loss": 27.1812, "step": 17257 }, { "epoch": 410.90746268656716, "grad_norm": 36.195335388183594, "learning_rate": 9.74716553287982e-06, "loss": 29.9308, "step": 17258 }, { "epoch": 410.93134328358207, "grad_norm": 30.972122192382812, "learning_rate": 9.746598639455784e-06, "loss": 30.4884, "step": 17259 }, { "epoch": 410.95522388059703, "grad_norm": 32.893672943115234, "learning_rate": 9.746031746031747e-06, "loss": 29.9437, "step": 17260 }, { "epoch": 410.97910447761194, "grad_norm": 29.859777450561523, "learning_rate": 9.74546485260771e-06, "loss": 30.0954, "step": 17261 }, { "epoch": 411.0, "grad_norm": 28.657880783081055, "learning_rate": 9.744897959183674e-06, "loss": 27.0012, "step": 17262 }, { "epoch": 411.0238805970149, "grad_norm": 30.218544006347656, "learning_rate": 9.744331065759637e-06, "loss": 29.9479, "step": 17263 }, { "epoch": 411.0477611940299, "grad_norm": 32.60407257080078, "learning_rate": 9.743764172335602e-06, "loss": 30.6546, "step": 17264 }, { "epoch": 411.0716417910448, "grad_norm": 27.861570358276367, "learning_rate": 9.743197278911567e-06, "loss": 31.1357, "step": 17265 }, { "epoch": 411.0955223880597, "grad_norm": 32.95627975463867, "learning_rate": 9.74263038548753e-06, "loss": 30.2324, "step": 17266 }, { "epoch": 411.1194029850746, "grad_norm": 29.979084014892578, "learning_rate": 9.742063492063492e-06, "loss": 30.0651, "step": 17267 }, { "epoch": 411.14328358208957, "grad_norm": 32.46349334716797, "learning_rate": 9.741496598639457e-06, "loss": 31.6332, "step": 17268 }, { "epoch": 411.1671641791045, "grad_norm": 27.955581665039062, "learning_rate": 9.74092970521542e-06, "loss": 30.7639, "step": 17269 }, { "epoch": 411.1910447761194, "grad_norm": 31.141822814941406, "learning_rate": 9.740362811791384e-06, "loss": 30.5363, "step": 17270 }, { "epoch": 411.21492537313435, "grad_norm": 29.90436553955078, "learning_rate": 9.739795918367347e-06, "loss": 30.9016, "step": 17271 }, { "epoch": 411.23880597014926, "grad_norm": 30.68499755859375, "learning_rate": 9.739229024943312e-06, "loss": 30.5875, "step": 17272 }, { "epoch": 411.26268656716417, "grad_norm": NaN, "learning_rate": 9.738662131519275e-06, "loss": 39.0752, "step": 17273 }, { "epoch": 411.28656716417913, "grad_norm": 25.61927032470703, "learning_rate": 9.738662131519275e-06, "loss": 29.7111, "step": 17274 }, { "epoch": 411.31044776119404, "grad_norm": 33.73572540283203, "learning_rate": 9.73809523809524e-06, "loss": 29.6255, "step": 17275 }, { "epoch": 411.33432835820895, "grad_norm": 30.84865379333496, "learning_rate": 9.737528344671202e-06, "loss": 32.1993, "step": 17276 }, { "epoch": 411.35820895522386, "grad_norm": 34.85017776489258, "learning_rate": 9.736961451247167e-06, "loss": 30.1535, "step": 17277 }, { "epoch": 411.3820895522388, "grad_norm": 30.275859832763672, "learning_rate": 9.73639455782313e-06, "loss": 30.5525, "step": 17278 }, { "epoch": 411.40597014925373, "grad_norm": 33.74799728393555, "learning_rate": 9.735827664399093e-06, "loss": 30.5777, "step": 17279 }, { "epoch": 411.42985074626864, "grad_norm": NaN, "learning_rate": 9.735260770975057e-06, "loss": 49.9628, "step": 17280 }, { "epoch": 411.4537313432836, "grad_norm": 30.752450942993164, "learning_rate": 9.735260770975057e-06, "loss": 30.3917, "step": 17281 }, { "epoch": 411.4776119402985, "grad_norm": 32.22258377075195, "learning_rate": 9.734693877551022e-06, "loss": 31.2265, "step": 17282 }, { "epoch": 411.5014925373134, "grad_norm": 27.925674438476562, "learning_rate": 9.734126984126985e-06, "loss": 30.0399, "step": 17283 }, { "epoch": 411.52537313432833, "grad_norm": 34.14848327636719, "learning_rate": 9.733560090702948e-06, "loss": 29.8929, "step": 17284 }, { "epoch": 411.5492537313433, "grad_norm": 28.29414939880371, "learning_rate": 9.732993197278912e-06, "loss": 30.0548, "step": 17285 }, { "epoch": 411.5731343283582, "grad_norm": 33.69083023071289, "learning_rate": 9.732426303854877e-06, "loss": 31.5717, "step": 17286 }, { "epoch": 411.5970149253731, "grad_norm": 29.985658645629883, "learning_rate": 9.73185941043084e-06, "loss": 29.5741, "step": 17287 }, { "epoch": 411.6208955223881, "grad_norm": 29.38248634338379, "learning_rate": 9.731292517006804e-06, "loss": 30.3663, "step": 17288 }, { "epoch": 411.644776119403, "grad_norm": 28.989734649658203, "learning_rate": 9.730725623582767e-06, "loss": 32.1202, "step": 17289 }, { "epoch": 411.6686567164179, "grad_norm": 30.57007598876953, "learning_rate": 9.73015873015873e-06, "loss": 29.8059, "step": 17290 }, { "epoch": 411.6925373134328, "grad_norm": 27.80312728881836, "learning_rate": 9.729591836734695e-06, "loss": 30.9309, "step": 17291 }, { "epoch": 411.7164179104478, "grad_norm": 35.20885467529297, "learning_rate": 9.72902494331066e-06, "loss": 31.8309, "step": 17292 }, { "epoch": 411.7402985074627, "grad_norm": 29.32598304748535, "learning_rate": 9.728458049886622e-06, "loss": 31.491, "step": 17293 }, { "epoch": 411.7641791044776, "grad_norm": 41.09993362426758, "learning_rate": 9.727891156462585e-06, "loss": 31.7247, "step": 17294 }, { "epoch": 411.78805970149256, "grad_norm": 34.537803649902344, "learning_rate": 9.72732426303855e-06, "loss": 31.3353, "step": 17295 }, { "epoch": 411.81194029850747, "grad_norm": 27.136680603027344, "learning_rate": 9.726757369614513e-06, "loss": 31.4219, "step": 17296 }, { "epoch": 411.8358208955224, "grad_norm": 28.26192855834961, "learning_rate": 9.726190476190477e-06, "loss": 30.8183, "step": 17297 }, { "epoch": 411.85970149253734, "grad_norm": 29.68960189819336, "learning_rate": 9.72562358276644e-06, "loss": 31.601, "step": 17298 }, { "epoch": 411.88358208955225, "grad_norm": 25.01177406311035, "learning_rate": 9.725056689342405e-06, "loss": 30.5706, "step": 17299 }, { "epoch": 411.90746268656716, "grad_norm": 30.833648681640625, "learning_rate": 9.724489795918368e-06, "loss": 30.0223, "step": 17300 }, { "epoch": 411.93134328358207, "grad_norm": 24.258073806762695, "learning_rate": 9.723922902494332e-06, "loss": 29.7812, "step": 17301 }, { "epoch": 411.95522388059703, "grad_norm": 34.464168548583984, "learning_rate": 9.723356009070297e-06, "loss": 31.7863, "step": 17302 }, { "epoch": 411.97910447761194, "grad_norm": 29.224231719970703, "learning_rate": 9.72278911564626e-06, "loss": 30.4418, "step": 17303 }, { "epoch": 412.0, "grad_norm": 26.98820686340332, "learning_rate": 9.722222222222223e-06, "loss": 26.6036, "step": 17304 }, { "epoch": 412.0238805970149, "grad_norm": 30.626056671142578, "learning_rate": 9.721655328798186e-06, "loss": 30.981, "step": 17305 }, { "epoch": 412.0477611940299, "grad_norm": 26.96034049987793, "learning_rate": 9.72108843537415e-06, "loss": 30.8802, "step": 17306 }, { "epoch": 412.0716417910448, "grad_norm": 24.572372436523438, "learning_rate": 9.720521541950115e-06, "loss": 31.7014, "step": 17307 }, { "epoch": 412.0955223880597, "grad_norm": 27.470827102661133, "learning_rate": 9.719954648526078e-06, "loss": 30.3461, "step": 17308 }, { "epoch": 412.1194029850746, "grad_norm": 25.224088668823242, "learning_rate": 9.719387755102042e-06, "loss": 30.95, "step": 17309 }, { "epoch": 412.14328358208957, "grad_norm": 30.355358123779297, "learning_rate": 9.718820861678005e-06, "loss": 30.4449, "step": 17310 }, { "epoch": 412.1671641791045, "grad_norm": 25.076364517211914, "learning_rate": 9.71825396825397e-06, "loss": 30.5565, "step": 17311 }, { "epoch": 412.1910447761194, "grad_norm": 30.030237197875977, "learning_rate": 9.717687074829933e-06, "loss": 30.5954, "step": 17312 }, { "epoch": 412.21492537313435, "grad_norm": 25.06392478942871, "learning_rate": 9.717120181405897e-06, "loss": 30.1632, "step": 17313 }, { "epoch": 412.23880597014926, "grad_norm": 25.10326385498047, "learning_rate": 9.71655328798186e-06, "loss": 30.322, "step": 17314 }, { "epoch": 412.26268656716417, "grad_norm": 24.228214263916016, "learning_rate": 9.715986394557823e-06, "loss": 31.4099, "step": 17315 }, { "epoch": 412.28656716417913, "grad_norm": 21.156068801879883, "learning_rate": 9.715419501133788e-06, "loss": 31.4968, "step": 17316 }, { "epoch": 412.31044776119404, "grad_norm": 20.491891860961914, "learning_rate": 9.714852607709752e-06, "loss": 30.2069, "step": 17317 }, { "epoch": 412.33432835820895, "grad_norm": 18.79143524169922, "learning_rate": 9.714285714285715e-06, "loss": 30.6204, "step": 17318 }, { "epoch": 412.35820895522386, "grad_norm": 23.70470428466797, "learning_rate": 9.713718820861678e-06, "loss": 31.1247, "step": 17319 }, { "epoch": 412.3820895522388, "grad_norm": 19.9675235748291, "learning_rate": 9.713151927437643e-06, "loss": 30.9067, "step": 17320 }, { "epoch": 412.40597014925373, "grad_norm": 22.93436050415039, "learning_rate": 9.712585034013606e-06, "loss": 30.1828, "step": 17321 }, { "epoch": 412.42985074626864, "grad_norm": 19.510936737060547, "learning_rate": 9.71201814058957e-06, "loss": 31.448, "step": 17322 }, { "epoch": 412.4537313432836, "grad_norm": 25.782569885253906, "learning_rate": 9.711451247165535e-06, "loss": 31.1077, "step": 17323 }, { "epoch": 412.4776119402985, "grad_norm": 19.96767234802246, "learning_rate": 9.710884353741498e-06, "loss": 30.7972, "step": 17324 }, { "epoch": 412.5014925373134, "grad_norm": 26.440196990966797, "learning_rate": 9.71031746031746e-06, "loss": 30.4361, "step": 17325 }, { "epoch": 412.52537313432833, "grad_norm": 21.148910522460938, "learning_rate": 9.709750566893425e-06, "loss": 31.4516, "step": 17326 }, { "epoch": 412.5492537313433, "grad_norm": 25.684194564819336, "learning_rate": 9.70918367346939e-06, "loss": 29.7638, "step": 17327 }, { "epoch": 412.5731343283582, "grad_norm": 22.77691650390625, "learning_rate": 9.708616780045353e-06, "loss": 30.8605, "step": 17328 }, { "epoch": 412.5970149253731, "grad_norm": 23.238990783691406, "learning_rate": 9.708049886621316e-06, "loss": 30.6097, "step": 17329 }, { "epoch": 412.6208955223881, "grad_norm": 22.082658767700195, "learning_rate": 9.707482993197278e-06, "loss": 30.4965, "step": 17330 }, { "epoch": 412.644776119403, "grad_norm": 23.30976676940918, "learning_rate": 9.706916099773243e-06, "loss": 30.0028, "step": 17331 }, { "epoch": 412.6686567164179, "grad_norm": 18.33295440673828, "learning_rate": 9.706349206349208e-06, "loss": 31.2935, "step": 17332 }, { "epoch": 412.6925373134328, "grad_norm": 25.902048110961914, "learning_rate": 9.70578231292517e-06, "loss": 30.2697, "step": 17333 }, { "epoch": 412.7164179104478, "grad_norm": 21.660961151123047, "learning_rate": 9.705215419501135e-06, "loss": 29.2878, "step": 17334 }, { "epoch": 412.7402985074627, "grad_norm": 20.73514175415039, "learning_rate": 9.704648526077098e-06, "loss": 30.9861, "step": 17335 }, { "epoch": 412.7641791044776, "grad_norm": NaN, "learning_rate": 9.704081632653061e-06, "loss": 52.321, "step": 17336 }, { "epoch": 412.78805970149256, "grad_norm": 19.39436149597168, "learning_rate": 9.704081632653061e-06, "loss": 30.16, "step": 17337 }, { "epoch": 412.81194029850747, "grad_norm": 22.03596305847168, "learning_rate": 9.703514739229026e-06, "loss": 30.3655, "step": 17338 }, { "epoch": 412.8358208955224, "grad_norm": 24.618896484375, "learning_rate": 9.70294784580499e-06, "loss": 31.0424, "step": 17339 }, { "epoch": 412.85970149253734, "grad_norm": 17.97269630432129, "learning_rate": 9.702380952380953e-06, "loss": 30.5927, "step": 17340 }, { "epoch": 412.88358208955225, "grad_norm": NaN, "learning_rate": 9.701814058956916e-06, "loss": 27.2081, "step": 17341 }, { "epoch": 412.90746268656716, "grad_norm": 28.49968147277832, "learning_rate": 9.701814058956916e-06, "loss": 30.9395, "step": 17342 }, { "epoch": 412.93134328358207, "grad_norm": 20.1293888092041, "learning_rate": 9.70124716553288e-06, "loss": 31.8925, "step": 17343 }, { "epoch": 412.95522388059703, "grad_norm": 26.639421463012695, "learning_rate": 9.700680272108845e-06, "loss": 30.8541, "step": 17344 }, { "epoch": 412.97910447761194, "grad_norm": 23.90354347229004, "learning_rate": 9.700113378684808e-06, "loss": 32.278, "step": 17345 }, { "epoch": 413.0, "grad_norm": 20.97551918029785, "learning_rate": 9.699546485260771e-06, "loss": 25.9998, "step": 17346 }, { "epoch": 413.0238805970149, "grad_norm": 24.772329330444336, "learning_rate": 9.698979591836736e-06, "loss": 31.5759, "step": 17347 }, { "epoch": 413.0477611940299, "grad_norm": 20.000246047973633, "learning_rate": 9.698412698412698e-06, "loss": 30.7338, "step": 17348 }, { "epoch": 413.0716417910448, "grad_norm": 22.413970947265625, "learning_rate": 9.697845804988663e-06, "loss": 30.6352, "step": 17349 }, { "epoch": 413.0955223880597, "grad_norm": 22.880908966064453, "learning_rate": 9.697278911564628e-06, "loss": 31.6199, "step": 17350 }, { "epoch": 413.1194029850746, "grad_norm": 21.30689239501953, "learning_rate": 9.69671201814059e-06, "loss": 30.6978, "step": 17351 }, { "epoch": 413.14328358208957, "grad_norm": 20.144020080566406, "learning_rate": 9.696145124716553e-06, "loss": 31.9464, "step": 17352 }, { "epoch": 413.1671641791045, "grad_norm": 25.37303924560547, "learning_rate": 9.695578231292518e-06, "loss": 31.5355, "step": 17353 }, { "epoch": 413.1910447761194, "grad_norm": 17.912534713745117, "learning_rate": 9.695011337868483e-06, "loss": 31.2457, "step": 17354 }, { "epoch": 413.21492537313435, "grad_norm": 23.097381591796875, "learning_rate": 9.694444444444446e-06, "loss": 30.5069, "step": 17355 }, { "epoch": 413.23880597014926, "grad_norm": 22.0723819732666, "learning_rate": 9.693877551020408e-06, "loss": 31.1957, "step": 17356 }, { "epoch": 413.26268656716417, "grad_norm": 18.241931915283203, "learning_rate": 9.693310657596373e-06, "loss": 31.3098, "step": 17357 }, { "epoch": 413.28656716417913, "grad_norm": 24.820484161376953, "learning_rate": 9.692743764172336e-06, "loss": 30.3309, "step": 17358 }, { "epoch": 413.31044776119404, "grad_norm": 16.013769149780273, "learning_rate": 9.6921768707483e-06, "loss": 30.5326, "step": 17359 }, { "epoch": 413.33432835820895, "grad_norm": 27.219940185546875, "learning_rate": 9.691609977324263e-06, "loss": 31.3708, "step": 17360 }, { "epoch": 413.35820895522386, "grad_norm": 21.605802536010742, "learning_rate": 9.691043083900228e-06, "loss": 30.8502, "step": 17361 }, { "epoch": 413.3820895522388, "grad_norm": 30.45836639404297, "learning_rate": 9.690476190476191e-06, "loss": 32.564, "step": 17362 }, { "epoch": 413.40597014925373, "grad_norm": 24.562942504882812, "learning_rate": 9.689909297052154e-06, "loss": 31.0815, "step": 17363 }, { "epoch": 413.42985074626864, "grad_norm": 24.780115127563477, "learning_rate": 9.689342403628118e-06, "loss": 31.9012, "step": 17364 }, { "epoch": 413.4537313432836, "grad_norm": 19.684261322021484, "learning_rate": 9.688775510204083e-06, "loss": 30.6087, "step": 17365 }, { "epoch": 413.4776119402985, "grad_norm": 20.546043395996094, "learning_rate": 9.688208616780046e-06, "loss": 31.1515, "step": 17366 }, { "epoch": 413.5014925373134, "grad_norm": 21.40205192565918, "learning_rate": 9.687641723356009e-06, "loss": 29.8446, "step": 17367 }, { "epoch": 413.52537313432833, "grad_norm": 17.48446273803711, "learning_rate": 9.687074829931973e-06, "loss": 31.8636, "step": 17368 }, { "epoch": 413.5492537313433, "grad_norm": 20.376388549804688, "learning_rate": 9.686507936507938e-06, "loss": 30.2396, "step": 17369 }, { "epoch": 413.5731343283582, "grad_norm": 19.347999572753906, "learning_rate": 9.685941043083901e-06, "loss": 31.5867, "step": 17370 }, { "epoch": 413.5970149253731, "grad_norm": 16.06380271911621, "learning_rate": 9.685374149659866e-06, "loss": 31.7397, "step": 17371 }, { "epoch": 413.6208955223881, "grad_norm": 18.676008224487305, "learning_rate": 9.684807256235828e-06, "loss": 31.9843, "step": 17372 }, { "epoch": 413.644776119403, "grad_norm": 17.208215713500977, "learning_rate": 9.684240362811791e-06, "loss": 31.5737, "step": 17373 }, { "epoch": 413.6686567164179, "grad_norm": 20.85196304321289, "learning_rate": 9.683673469387756e-06, "loss": 31.6395, "step": 17374 }, { "epoch": 413.6925373134328, "grad_norm": 19.394147872924805, "learning_rate": 9.68310657596372e-06, "loss": 31.1919, "step": 17375 }, { "epoch": 413.7164179104478, "grad_norm": 20.282413482666016, "learning_rate": 9.682539682539683e-06, "loss": 30.9567, "step": 17376 }, { "epoch": 413.7402985074627, "grad_norm": 20.27549934387207, "learning_rate": 9.681972789115646e-06, "loss": 31.5665, "step": 17377 }, { "epoch": 413.7641791044776, "grad_norm": 19.347043991088867, "learning_rate": 9.681405895691611e-06, "loss": 30.8904, "step": 17378 }, { "epoch": 413.78805970149256, "grad_norm": 16.298198699951172, "learning_rate": 9.680839002267574e-06, "loss": 31.6256, "step": 17379 }, { "epoch": 413.81194029850747, "grad_norm": 17.747581481933594, "learning_rate": 9.680272108843538e-06, "loss": 31.5865, "step": 17380 }, { "epoch": 413.8358208955224, "grad_norm": 18.13039779663086, "learning_rate": 9.679705215419501e-06, "loss": 31.9384, "step": 17381 }, { "epoch": 413.85970149253734, "grad_norm": 23.176939010620117, "learning_rate": 9.679138321995466e-06, "loss": 30.9652, "step": 17382 }, { "epoch": 413.88358208955225, "grad_norm": 15.896499633789062, "learning_rate": 9.678571428571429e-06, "loss": 31.0066, "step": 17383 }, { "epoch": 413.90746268656716, "grad_norm": 16.574430465698242, "learning_rate": 9.678004535147393e-06, "loss": 30.3854, "step": 17384 }, { "epoch": 413.93134328358207, "grad_norm": 16.726947784423828, "learning_rate": 9.677437641723358e-06, "loss": 30.866, "step": 17385 }, { "epoch": 413.95522388059703, "grad_norm": 20.015026092529297, "learning_rate": 9.676870748299321e-06, "loss": 31.0936, "step": 17386 }, { "epoch": 413.97910447761194, "grad_norm": 21.819541931152344, "learning_rate": 9.676303854875284e-06, "loss": 31.6246, "step": 17387 }, { "epoch": 414.0, "grad_norm": 14.112858772277832, "learning_rate": 9.675736961451247e-06, "loss": 26.3616, "step": 17388 }, { "epoch": 414.0238805970149, "grad_norm": 17.154964447021484, "learning_rate": 9.675170068027211e-06, "loss": 31.003, "step": 17389 }, { "epoch": 414.0477611940299, "grad_norm": 30.1414737701416, "learning_rate": 9.674603174603176e-06, "loss": 30.0583, "step": 17390 }, { "epoch": 414.0716417910448, "grad_norm": 18.26875877380371, "learning_rate": 9.674036281179139e-06, "loss": 30.8035, "step": 17391 }, { "epoch": 414.0955223880597, "grad_norm": 16.537519454956055, "learning_rate": 9.673469387755103e-06, "loss": 30.3484, "step": 17392 }, { "epoch": 414.1194029850746, "grad_norm": 35.826725006103516, "learning_rate": 9.672902494331066e-06, "loss": 32.044, "step": 17393 }, { "epoch": 414.14328358208957, "grad_norm": 20.51215934753418, "learning_rate": 9.672335600907031e-06, "loss": 30.8656, "step": 17394 }, { "epoch": 414.1671641791045, "grad_norm": 41.638153076171875, "learning_rate": 9.671768707482994e-06, "loss": 31.5341, "step": 17395 }, { "epoch": 414.1910447761194, "grad_norm": 32.373390197753906, "learning_rate": 9.671201814058958e-06, "loss": 31.1785, "step": 17396 }, { "epoch": 414.21492537313435, "grad_norm": 45.31845474243164, "learning_rate": 9.670634920634921e-06, "loss": 31.3905, "step": 17397 }, { "epoch": 414.23880597014926, "grad_norm": 41.73866271972656, "learning_rate": 9.670068027210884e-06, "loss": 32.4249, "step": 17398 }, { "epoch": 414.26268656716417, "grad_norm": 34.975154876708984, "learning_rate": 9.669501133786849e-06, "loss": 31.9652, "step": 17399 }, { "epoch": 414.28656716417913, "grad_norm": 36.46391296386719, "learning_rate": 9.668934240362813e-06, "loss": 30.2904, "step": 17400 }, { "epoch": 414.31044776119404, "grad_norm": 32.26873779296875, "learning_rate": 9.668367346938776e-06, "loss": 31.0885, "step": 17401 }, { "epoch": 414.33432835820895, "grad_norm": 28.08407974243164, "learning_rate": 9.66780045351474e-06, "loss": 30.2394, "step": 17402 }, { "epoch": 414.35820895522386, "grad_norm": 38.191524505615234, "learning_rate": 9.667233560090704e-06, "loss": 30.2707, "step": 17403 }, { "epoch": 414.3820895522388, "grad_norm": 30.715505599975586, "learning_rate": 9.666666666666667e-06, "loss": 30.857, "step": 17404 }, { "epoch": 414.40597014925373, "grad_norm": 39.77232360839844, "learning_rate": 9.666099773242631e-06, "loss": 31.821, "step": 17405 }, { "epoch": 414.42985074626864, "grad_norm": 35.4797477722168, "learning_rate": 9.665532879818596e-06, "loss": 31.302, "step": 17406 }, { "epoch": 414.4537313432836, "grad_norm": 34.66422653198242, "learning_rate": 9.664965986394559e-06, "loss": 32.2161, "step": 17407 }, { "epoch": 414.4776119402985, "grad_norm": 32.85184097290039, "learning_rate": 9.664399092970522e-06, "loss": 31.9949, "step": 17408 }, { "epoch": 414.5014925373134, "grad_norm": 35.33075714111328, "learning_rate": 9.663832199546486e-06, "loss": 31.0031, "step": 17409 }, { "epoch": 414.52537313432833, "grad_norm": 32.316551208496094, "learning_rate": 9.663265306122451e-06, "loss": 31.0723, "step": 17410 }, { "epoch": 414.5492537313433, "grad_norm": 34.94147872924805, "learning_rate": 9.662698412698414e-06, "loss": 30.4708, "step": 17411 }, { "epoch": 414.5731343283582, "grad_norm": 32.667728424072266, "learning_rate": 9.662131519274377e-06, "loss": 30.697, "step": 17412 }, { "epoch": 414.5970149253731, "grad_norm": 35.34757995605469, "learning_rate": 9.66156462585034e-06, "loss": 30.566, "step": 17413 }, { "epoch": 414.6208955223881, "grad_norm": 32.55601501464844, "learning_rate": 9.660997732426304e-06, "loss": 31.046, "step": 17414 }, { "epoch": 414.644776119403, "grad_norm": 35.4473762512207, "learning_rate": 9.660430839002269e-06, "loss": 31.1124, "step": 17415 }, { "epoch": 414.6686567164179, "grad_norm": 32.57688903808594, "learning_rate": 9.659863945578232e-06, "loss": 31.5619, "step": 17416 }, { "epoch": 414.6925373134328, "grad_norm": 38.10930633544922, "learning_rate": 9.659297052154196e-06, "loss": 31.9327, "step": 17417 }, { "epoch": 414.7164179104478, "grad_norm": 33.61649703979492, "learning_rate": 9.65873015873016e-06, "loss": 31.3994, "step": 17418 }, { "epoch": 414.7402985074627, "grad_norm": 34.291107177734375, "learning_rate": 9.658163265306124e-06, "loss": 32.1362, "step": 17419 }, { "epoch": 414.7641791044776, "grad_norm": 32.92462158203125, "learning_rate": 9.657596371882087e-06, "loss": 31.3864, "step": 17420 }, { "epoch": 414.78805970149256, "grad_norm": 38.45814895629883, "learning_rate": 9.657029478458051e-06, "loss": 30.5475, "step": 17421 }, { "epoch": 414.81194029850747, "grad_norm": 31.68741226196289, "learning_rate": 9.656462585034014e-06, "loss": 32.3403, "step": 17422 }, { "epoch": 414.8358208955224, "grad_norm": 36.55650329589844, "learning_rate": 9.655895691609977e-06, "loss": 32.0996, "step": 17423 }, { "epoch": 414.85970149253734, "grad_norm": 33.3495979309082, "learning_rate": 9.655328798185942e-06, "loss": 30.9624, "step": 17424 }, { "epoch": 414.88358208955225, "grad_norm": 35.57025146484375, "learning_rate": 9.654761904761906e-06, "loss": 31.1892, "step": 17425 }, { "epoch": 414.90746268656716, "grad_norm": 32.37010955810547, "learning_rate": 9.65419501133787e-06, "loss": 30.7616, "step": 17426 }, { "epoch": 414.93134328358207, "grad_norm": 33.7585334777832, "learning_rate": 9.653628117913832e-06, "loss": 31.299, "step": 17427 }, { "epoch": 414.95522388059703, "grad_norm": 30.659799575805664, "learning_rate": 9.653061224489797e-06, "loss": 30.9077, "step": 17428 }, { "epoch": 414.97910447761194, "grad_norm": 35.51494598388672, "learning_rate": 9.65249433106576e-06, "loss": 31.9317, "step": 17429 }, { "epoch": 415.0, "grad_norm": 30.698129653930664, "learning_rate": 9.651927437641724e-06, "loss": 27.1297, "step": 17430 }, { "epoch": 415.0238805970149, "grad_norm": 34.81195831298828, "learning_rate": 9.651360544217689e-06, "loss": 31.1253, "step": 17431 }, { "epoch": 415.0477611940299, "grad_norm": 31.048845291137695, "learning_rate": 9.650793650793652e-06, "loss": 30.1795, "step": 17432 }, { "epoch": 415.0716417910448, "grad_norm": 35.35676956176758, "learning_rate": 9.650226757369615e-06, "loss": 30.8869, "step": 17433 }, { "epoch": 415.0955223880597, "grad_norm": 29.95574378967285, "learning_rate": 9.64965986394558e-06, "loss": 31.0124, "step": 17434 }, { "epoch": 415.1194029850746, "grad_norm": 37.70972442626953, "learning_rate": 9.649092970521544e-06, "loss": 30.3196, "step": 17435 }, { "epoch": 415.14328358208957, "grad_norm": 34.77582931518555, "learning_rate": 9.648526077097507e-06, "loss": 30.9152, "step": 17436 }, { "epoch": 415.1671641791045, "grad_norm": 37.33811950683594, "learning_rate": 9.64795918367347e-06, "loss": 31.0816, "step": 17437 }, { "epoch": 415.1910447761194, "grad_norm": 29.557003021240234, "learning_rate": 9.647392290249434e-06, "loss": 31.382, "step": 17438 }, { "epoch": 415.21492537313435, "grad_norm": 36.27533721923828, "learning_rate": 9.646825396825397e-06, "loss": 30.9413, "step": 17439 }, { "epoch": 415.23880597014926, "grad_norm": 32.80736541748047, "learning_rate": 9.646258503401362e-06, "loss": 31.7801, "step": 17440 }, { "epoch": 415.26268656716417, "grad_norm": 36.021240234375, "learning_rate": 9.645691609977325e-06, "loss": 31.8343, "step": 17441 }, { "epoch": 415.28656716417913, "grad_norm": 32.6784553527832, "learning_rate": 9.64512471655329e-06, "loss": 30.9252, "step": 17442 }, { "epoch": 415.31044776119404, "grad_norm": 32.43506622314453, "learning_rate": 9.644557823129252e-06, "loss": 30.8383, "step": 17443 }, { "epoch": 415.33432835820895, "grad_norm": 28.829185485839844, "learning_rate": 9.643990929705217e-06, "loss": 31.4669, "step": 17444 }, { "epoch": 415.35820895522386, "grad_norm": 40.13490295410156, "learning_rate": 9.64342403628118e-06, "loss": 32.4948, "step": 17445 }, { "epoch": 415.3820895522388, "grad_norm": 34.88079833984375, "learning_rate": 9.642857142857144e-06, "loss": 30.145, "step": 17446 }, { "epoch": 415.40597014925373, "grad_norm": 35.491676330566406, "learning_rate": 9.642290249433107e-06, "loss": 31.2718, "step": 17447 }, { "epoch": 415.42985074626864, "grad_norm": 30.531219482421875, "learning_rate": 9.64172335600907e-06, "loss": 31.199, "step": 17448 }, { "epoch": 415.4537313432836, "grad_norm": 34.6240348815918, "learning_rate": 9.641156462585035e-06, "loss": 31.5692, "step": 17449 }, { "epoch": 415.4776119402985, "grad_norm": NaN, "learning_rate": 9.640589569161e-06, "loss": 46.3923, "step": 17450 }, { "epoch": 415.5014925373134, "grad_norm": 19.935775756835938, "learning_rate": 9.640589569161e-06, "loss": 31.8499, "step": 17451 }, { "epoch": 415.52537313432833, "grad_norm": 34.28929901123047, "learning_rate": 9.640022675736962e-06, "loss": 30.9777, "step": 17452 }, { "epoch": 415.5492537313433, "grad_norm": 23.856491088867188, "learning_rate": 9.639455782312927e-06, "loss": 30.8576, "step": 17453 }, { "epoch": 415.5731343283582, "grad_norm": 44.480350494384766, "learning_rate": 9.63888888888889e-06, "loss": 31.3179, "step": 17454 }, { "epoch": 415.5970149253731, "grad_norm": 40.33920669555664, "learning_rate": 9.638321995464852e-06, "loss": 31.7506, "step": 17455 }, { "epoch": 415.6208955223881, "grad_norm": 31.928173065185547, "learning_rate": 9.637755102040817e-06, "loss": 32.2406, "step": 17456 }, { "epoch": 415.644776119403, "grad_norm": 31.577682495117188, "learning_rate": 9.637188208616782e-06, "loss": 30.344, "step": 17457 }, { "epoch": 415.6686567164179, "grad_norm": 33.42020034790039, "learning_rate": 9.636621315192745e-06, "loss": 32.0592, "step": 17458 }, { "epoch": 415.6925373134328, "grad_norm": 24.131744384765625, "learning_rate": 9.636054421768707e-06, "loss": 32.8307, "step": 17459 }, { "epoch": 415.7164179104478, "grad_norm": 42.14389419555664, "learning_rate": 9.635487528344672e-06, "loss": 31.7787, "step": 17460 }, { "epoch": 415.7402985074627, "grad_norm": 33.02387619018555, "learning_rate": 9.634920634920637e-06, "loss": 31.4886, "step": 17461 }, { "epoch": 415.7641791044776, "grad_norm": 40.44780731201172, "learning_rate": 9.6343537414966e-06, "loss": 33.1966, "step": 17462 }, { "epoch": 415.78805970149256, "grad_norm": 38.408538818359375, "learning_rate": 9.633786848072562e-06, "loss": 31.0887, "step": 17463 }, { "epoch": 415.81194029850747, "grad_norm": 33.120426177978516, "learning_rate": 9.633219954648527e-06, "loss": 31.4848, "step": 17464 }, { "epoch": 415.8358208955224, "grad_norm": 30.846548080444336, "learning_rate": 9.63265306122449e-06, "loss": 32.0165, "step": 17465 }, { "epoch": 415.85970149253734, "grad_norm": 39.12036895751953, "learning_rate": 9.632086167800455e-06, "loss": 30.8918, "step": 17466 }, { "epoch": 415.88358208955225, "grad_norm": 34.815101623535156, "learning_rate": 9.63151927437642e-06, "loss": 30.5831, "step": 17467 }, { "epoch": 415.90746268656716, "grad_norm": 37.425941467285156, "learning_rate": 9.630952380952382e-06, "loss": 31.4369, "step": 17468 }, { "epoch": 415.93134328358207, "grad_norm": 33.379005432128906, "learning_rate": 9.630385487528345e-06, "loss": 32.4893, "step": 17469 }, { "epoch": 415.95522388059703, "grad_norm": 34.791534423828125, "learning_rate": 9.62981859410431e-06, "loss": 32.1588, "step": 17470 }, { "epoch": 415.97910447761194, "grad_norm": 34.959320068359375, "learning_rate": 9.629251700680272e-06, "loss": 32.3794, "step": 17471 }, { "epoch": 416.0, "grad_norm": 28.786602020263672, "learning_rate": 9.628684807256237e-06, "loss": 28.1386, "step": 17472 }, { "epoch": 416.0238805970149, "grad_norm": 33.026161193847656, "learning_rate": 9.6281179138322e-06, "loss": 31.1314, "step": 17473 }, { "epoch": 416.0477611940299, "grad_norm": 38.329368591308594, "learning_rate": 9.627551020408165e-06, "loss": 32.9591, "step": 17474 }, { "epoch": 416.0716417910448, "grad_norm": 31.09491729736328, "learning_rate": 9.626984126984127e-06, "loss": 31.3726, "step": 17475 }, { "epoch": 416.0955223880597, "grad_norm": 38.20236587524414, "learning_rate": 9.626417233560092e-06, "loss": 31.2644, "step": 17476 }, { "epoch": 416.1194029850746, "grad_norm": 33.05231857299805, "learning_rate": 9.625850340136055e-06, "loss": 31.2694, "step": 17477 }, { "epoch": 416.14328358208957, "grad_norm": 33.7283821105957, "learning_rate": 9.62528344671202e-06, "loss": 31.4671, "step": 17478 }, { "epoch": 416.1671641791045, "grad_norm": 29.768062591552734, "learning_rate": 9.624716553287982e-06, "loss": 31.5029, "step": 17479 }, { "epoch": 416.1910447761194, "grad_norm": 34.26103973388672, "learning_rate": 9.624149659863945e-06, "loss": 31.6965, "step": 17480 }, { "epoch": 416.21492537313435, "grad_norm": 29.112293243408203, "learning_rate": 9.62358276643991e-06, "loss": 31.0562, "step": 17481 }, { "epoch": 416.23880597014926, "grad_norm": 36.47053527832031, "learning_rate": 9.623015873015875e-06, "loss": 32.4062, "step": 17482 }, { "epoch": 416.26268656716417, "grad_norm": 33.5345458984375, "learning_rate": 9.622448979591837e-06, "loss": 31.8404, "step": 17483 }, { "epoch": 416.28656716417913, "grad_norm": 37.95174026489258, "learning_rate": 9.6218820861678e-06, "loss": 30.7303, "step": 17484 }, { "epoch": 416.31044776119404, "grad_norm": 33.3171272277832, "learning_rate": 9.621315192743765e-06, "loss": 31.9348, "step": 17485 }, { "epoch": 416.33432835820895, "grad_norm": 36.97134780883789, "learning_rate": 9.62074829931973e-06, "loss": 30.7094, "step": 17486 }, { "epoch": 416.35820895522386, "grad_norm": 33.6683349609375, "learning_rate": 9.620181405895692e-06, "loss": 32.4069, "step": 17487 }, { "epoch": 416.3820895522388, "grad_norm": 36.61471939086914, "learning_rate": 9.619614512471655e-06, "loss": 31.9634, "step": 17488 }, { "epoch": 416.40597014925373, "grad_norm": 30.84718894958496, "learning_rate": 9.61904761904762e-06, "loss": 31.6744, "step": 17489 }, { "epoch": 416.42985074626864, "grad_norm": 39.370994567871094, "learning_rate": 9.618480725623583e-06, "loss": 32.897, "step": 17490 }, { "epoch": 416.4537313432836, "grad_norm": 32.75835418701172, "learning_rate": 9.617913832199547e-06, "loss": 32.2125, "step": 17491 }, { "epoch": 416.4776119402985, "grad_norm": 38.812191009521484, "learning_rate": 9.617346938775512e-06, "loss": 33.1373, "step": 17492 }, { "epoch": 416.5014925373134, "grad_norm": 35.7237548828125, "learning_rate": 9.616780045351475e-06, "loss": 31.8632, "step": 17493 }, { "epoch": 416.52537313432833, "grad_norm": 32.696044921875, "learning_rate": 9.616213151927438e-06, "loss": 31.1517, "step": 17494 }, { "epoch": 416.5492537313433, "grad_norm": 31.310306549072266, "learning_rate": 9.6156462585034e-06, "loss": 32.4938, "step": 17495 }, { "epoch": 416.5731343283582, "grad_norm": 33.53468704223633, "learning_rate": 9.615079365079365e-06, "loss": 31.6092, "step": 17496 }, { "epoch": 416.5970149253731, "grad_norm": 31.257722854614258, "learning_rate": 9.61451247165533e-06, "loss": 32.7622, "step": 17497 }, { "epoch": 416.6208955223881, "grad_norm": 41.01079177856445, "learning_rate": 9.613945578231293e-06, "loss": 30.3508, "step": 17498 }, { "epoch": 416.644776119403, "grad_norm": 34.838714599609375, "learning_rate": 9.613378684807257e-06, "loss": 30.8629, "step": 17499 }, { "epoch": 416.6686567164179, "grad_norm": 35.80812072753906, "learning_rate": 9.61281179138322e-06, "loss": 31.4608, "step": 17500 }, { "epoch": 416.6925373134328, "grad_norm": 29.86716651916504, "learning_rate": 9.612244897959185e-06, "loss": 32.0256, "step": 17501 }, { "epoch": 416.7164179104478, "grad_norm": 36.53352355957031, "learning_rate": 9.611678004535148e-06, "loss": 31.7998, "step": 17502 }, { "epoch": 416.7402985074627, "grad_norm": 33.87714385986328, "learning_rate": 9.611111111111112e-06, "loss": 31.5293, "step": 17503 }, { "epoch": 416.7641791044776, "grad_norm": 34.24754333496094, "learning_rate": 9.610544217687075e-06, "loss": 30.5463, "step": 17504 }, { "epoch": 416.78805970149256, "grad_norm": 26.537628173828125, "learning_rate": 9.609977324263038e-06, "loss": 31.6937, "step": 17505 }, { "epoch": 416.81194029850747, "grad_norm": 35.77519989013672, "learning_rate": 9.609410430839003e-06, "loss": 33.0816, "step": 17506 }, { "epoch": 416.8358208955224, "grad_norm": 32.720829010009766, "learning_rate": 9.608843537414967e-06, "loss": 32.7679, "step": 17507 }, { "epoch": 416.85970149253734, "grad_norm": 40.37900161743164, "learning_rate": 9.60827664399093e-06, "loss": 31.6627, "step": 17508 }, { "epoch": 416.88358208955225, "grad_norm": 39.46938705444336, "learning_rate": 9.607709750566893e-06, "loss": 33.3254, "step": 17509 }, { "epoch": 416.90746268656716, "grad_norm": 33.46904754638672, "learning_rate": 9.607142857142858e-06, "loss": 31.2637, "step": 17510 }, { "epoch": 416.93134328358207, "grad_norm": 34.10995101928711, "learning_rate": 9.606575963718822e-06, "loss": 31.9394, "step": 17511 }, { "epoch": 416.95522388059703, "grad_norm": 29.791414260864258, "learning_rate": 9.606009070294785e-06, "loss": 31.2692, "step": 17512 }, { "epoch": 416.97910447761194, "grad_norm": 26.669784545898438, "learning_rate": 9.60544217687075e-06, "loss": 31.9486, "step": 17513 }, { "epoch": 417.0, "grad_norm": 28.887584686279297, "learning_rate": 9.604875283446713e-06, "loss": 27.0846, "step": 17514 }, { "epoch": 417.0238805970149, "grad_norm": 26.53117561340332, "learning_rate": 9.604308390022676e-06, "loss": 32.2599, "step": 17515 }, { "epoch": 417.0477611940299, "grad_norm": 30.18800163269043, "learning_rate": 9.60374149659864e-06, "loss": 32.1433, "step": 17516 }, { "epoch": 417.0716417910448, "grad_norm": 25.092082977294922, "learning_rate": 9.603174603174605e-06, "loss": 31.7247, "step": 17517 }, { "epoch": 417.0955223880597, "grad_norm": 31.795455932617188, "learning_rate": 9.602607709750568e-06, "loss": 30.6835, "step": 17518 }, { "epoch": 417.1194029850746, "grad_norm": 22.675058364868164, "learning_rate": 9.60204081632653e-06, "loss": 32.5906, "step": 17519 }, { "epoch": 417.14328358208957, "grad_norm": 35.6273307800293, "learning_rate": 9.601473922902495e-06, "loss": 31.4875, "step": 17520 }, { "epoch": 417.1671641791045, "grad_norm": 32.19929122924805, "learning_rate": 9.600907029478458e-06, "loss": 30.7397, "step": 17521 }, { "epoch": 417.1910447761194, "grad_norm": 31.30029296875, "learning_rate": 9.600340136054423e-06, "loss": 32.4994, "step": 17522 }, { "epoch": 417.21492537313435, "grad_norm": 27.148357391357422, "learning_rate": 9.599773242630386e-06, "loss": 32.9034, "step": 17523 }, { "epoch": 417.23880597014926, "grad_norm": 29.497865676879883, "learning_rate": 9.59920634920635e-06, "loss": 31.6199, "step": 17524 }, { "epoch": 417.26268656716417, "grad_norm": 24.3176326751709, "learning_rate": 9.598639455782313e-06, "loss": 33.2101, "step": 17525 }, { "epoch": 417.28656716417913, "grad_norm": 31.636337280273438, "learning_rate": 9.598072562358278e-06, "loss": 30.4667, "step": 17526 }, { "epoch": 417.31044776119404, "grad_norm": 24.830875396728516, "learning_rate": 9.597505668934242e-06, "loss": 32.5534, "step": 17527 }, { "epoch": 417.33432835820895, "grad_norm": 31.565763473510742, "learning_rate": 9.596938775510205e-06, "loss": 30.5732, "step": 17528 }, { "epoch": 417.35820895522386, "grad_norm": 27.42149543762207, "learning_rate": 9.596371882086168e-06, "loss": 31.6573, "step": 17529 }, { "epoch": 417.3820895522388, "grad_norm": 29.571035385131836, "learning_rate": 9.595804988662131e-06, "loss": 32.4568, "step": 17530 }, { "epoch": 417.40597014925373, "grad_norm": 27.271560668945312, "learning_rate": 9.595238095238096e-06, "loss": 32.0478, "step": 17531 }, { "epoch": 417.42985074626864, "grad_norm": 25.561492919921875, "learning_rate": 9.59467120181406e-06, "loss": 32.1874, "step": 17532 }, { "epoch": 417.4537313432836, "grad_norm": 25.692110061645508, "learning_rate": 9.594104308390023e-06, "loss": 31.4429, "step": 17533 }, { "epoch": 417.4776119402985, "grad_norm": 26.35320472717285, "learning_rate": 9.593537414965988e-06, "loss": 31.4279, "step": 17534 }, { "epoch": 417.5014925373134, "grad_norm": 25.281557083129883, "learning_rate": 9.59297052154195e-06, "loss": 33.0092, "step": 17535 }, { "epoch": 417.52537313432833, "grad_norm": 26.904966354370117, "learning_rate": 9.592403628117914e-06, "loss": 31.7924, "step": 17536 }, { "epoch": 417.5492537313433, "grad_norm": 22.45372772216797, "learning_rate": 9.591836734693878e-06, "loss": 32.2811, "step": 17537 }, { "epoch": 417.5731343283582, "grad_norm": 24.859397888183594, "learning_rate": 9.591269841269843e-06, "loss": 31.921, "step": 17538 }, { "epoch": 417.5970149253731, "grad_norm": 23.877729415893555, "learning_rate": 9.590702947845806e-06, "loss": 32.5846, "step": 17539 }, { "epoch": 417.6208955223881, "grad_norm": 18.87540626525879, "learning_rate": 9.590136054421769e-06, "loss": 33.1223, "step": 17540 }, { "epoch": 417.644776119403, "grad_norm": 23.214746475219727, "learning_rate": 9.589569160997733e-06, "loss": 33.1679, "step": 17541 }, { "epoch": 417.6686567164179, "grad_norm": 21.429454803466797, "learning_rate": 9.589002267573698e-06, "loss": 31.8201, "step": 17542 }, { "epoch": 417.6925373134328, "grad_norm": 21.93021011352539, "learning_rate": 9.58843537414966e-06, "loss": 29.5693, "step": 17543 }, { "epoch": 417.7164179104478, "grad_norm": 16.797672271728516, "learning_rate": 9.587868480725624e-06, "loss": 31.6084, "step": 17544 }, { "epoch": 417.7402985074627, "grad_norm": 29.99384880065918, "learning_rate": 9.587301587301588e-06, "loss": 31.7087, "step": 17545 }, { "epoch": 417.7641791044776, "grad_norm": 22.556325912475586, "learning_rate": 9.586734693877551e-06, "loss": 32.217, "step": 17546 }, { "epoch": 417.78805970149256, "grad_norm": 25.481576919555664, "learning_rate": 9.586167800453516e-06, "loss": 31.3533, "step": 17547 }, { "epoch": 417.81194029850747, "grad_norm": 25.59836769104004, "learning_rate": 9.58560090702948e-06, "loss": 31.2115, "step": 17548 }, { "epoch": 417.8358208955224, "grad_norm": 19.862539291381836, "learning_rate": 9.585034013605443e-06, "loss": 31.1257, "step": 17549 }, { "epoch": 417.85970149253734, "grad_norm": 22.07025146484375, "learning_rate": 9.584467120181406e-06, "loss": 32.9178, "step": 17550 }, { "epoch": 417.88358208955225, "grad_norm": 24.650352478027344, "learning_rate": 9.58390022675737e-06, "loss": 32.2181, "step": 17551 }, { "epoch": 417.90746268656716, "grad_norm": 17.13593292236328, "learning_rate": 9.583333333333335e-06, "loss": 32.0811, "step": 17552 }, { "epoch": 417.93134328358207, "grad_norm": 22.94606590270996, "learning_rate": 9.582766439909298e-06, "loss": 31.1929, "step": 17553 }, { "epoch": 417.95522388059703, "grad_norm": 24.301525115966797, "learning_rate": 9.582199546485261e-06, "loss": 32.6324, "step": 17554 }, { "epoch": 417.97910447761194, "grad_norm": 18.32398796081543, "learning_rate": 9.581632653061226e-06, "loss": 31.1325, "step": 17555 }, { "epoch": 418.0, "grad_norm": 15.554393768310547, "learning_rate": 9.581065759637189e-06, "loss": 27.5425, "step": 17556 }, { "epoch": 418.0238805970149, "grad_norm": 25.79958152770996, "learning_rate": 9.580498866213153e-06, "loss": 32.2033, "step": 17557 }, { "epoch": 418.0477611940299, "grad_norm": 22.983232498168945, "learning_rate": 9.579931972789116e-06, "loss": 31.7035, "step": 17558 }, { "epoch": 418.0716417910448, "grad_norm": 15.403076171875, "learning_rate": 9.57936507936508e-06, "loss": 32.0717, "step": 17559 }, { "epoch": 418.0955223880597, "grad_norm": 18.59060287475586, "learning_rate": 9.578798185941044e-06, "loss": 30.2652, "step": 17560 }, { "epoch": 418.1194029850746, "grad_norm": 24.422931671142578, "learning_rate": 9.578231292517007e-06, "loss": 32.4902, "step": 17561 }, { "epoch": 418.14328358208957, "grad_norm": 16.37408447265625, "learning_rate": 9.577664399092971e-06, "loss": 30.9031, "step": 17562 }, { "epoch": 418.1671641791045, "grad_norm": 22.187536239624023, "learning_rate": 9.577097505668936e-06, "loss": 31.2213, "step": 17563 }, { "epoch": 418.1910447761194, "grad_norm": NaN, "learning_rate": 9.576530612244899e-06, "loss": 45.3476, "step": 17564 }, { "epoch": 418.21492537313435, "grad_norm": 22.222942352294922, "learning_rate": 9.576530612244899e-06, "loss": 31.6182, "step": 17565 }, { "epoch": 418.23880597014926, "grad_norm": 48.21788024902344, "learning_rate": 9.575963718820862e-06, "loss": 32.9053, "step": 17566 }, { "epoch": 418.26268656716417, "grad_norm": 38.83982849121094, "learning_rate": 9.575396825396826e-06, "loss": 33.7198, "step": 17567 }, { "epoch": 418.28656716417913, "grad_norm": 40.27055740356445, "learning_rate": 9.57482993197279e-06, "loss": 31.2131, "step": 17568 }, { "epoch": 418.31044776119404, "grad_norm": 31.61357307434082, "learning_rate": 9.574263038548754e-06, "loss": 32.2099, "step": 17569 }, { "epoch": 418.33432835820895, "grad_norm": 40.3348388671875, "learning_rate": 9.573696145124717e-06, "loss": 31.9725, "step": 17570 }, { "epoch": 418.35820895522386, "grad_norm": 31.171201705932617, "learning_rate": 9.573129251700681e-06, "loss": 33.3625, "step": 17571 }, { "epoch": 418.3820895522388, "grad_norm": 44.89733123779297, "learning_rate": 9.572562358276644e-06, "loss": 32.2646, "step": 17572 }, { "epoch": 418.40597014925373, "grad_norm": 38.07866668701172, "learning_rate": 9.571995464852609e-06, "loss": 32.5972, "step": 17573 }, { "epoch": 418.42985074626864, "grad_norm": 49.71159362792969, "learning_rate": 9.571428571428573e-06, "loss": 33.2418, "step": 17574 }, { "epoch": 418.4537313432836, "grad_norm": 41.45730972290039, "learning_rate": 9.570861678004536e-06, "loss": 33.2561, "step": 17575 }, { "epoch": 418.4776119402985, "grad_norm": 41.64115905761719, "learning_rate": 9.570294784580499e-06, "loss": 32.1524, "step": 17576 }, { "epoch": 418.5014925373134, "grad_norm": 37.11415481567383, "learning_rate": 9.569727891156464e-06, "loss": 33.4341, "step": 17577 }, { "epoch": 418.52537313432833, "grad_norm": 41.89173889160156, "learning_rate": 9.569160997732427e-06, "loss": 32.5726, "step": 17578 }, { "epoch": 418.5492537313433, "grad_norm": 34.52882385253906, "learning_rate": 9.568594104308391e-06, "loss": 33.1209, "step": 17579 }, { "epoch": 418.5731343283582, "grad_norm": 40.373287200927734, "learning_rate": 9.568027210884354e-06, "loss": 33.3066, "step": 17580 }, { "epoch": 418.5970149253731, "grad_norm": 32.57235336303711, "learning_rate": 9.567460317460319e-06, "loss": 32.788, "step": 17581 }, { "epoch": 418.6208955223881, "grad_norm": 48.87415313720703, "learning_rate": 9.566893424036282e-06, "loss": 33.2523, "step": 17582 }, { "epoch": 418.644776119403, "grad_norm": 39.380393981933594, "learning_rate": 9.566326530612246e-06, "loss": 33.0736, "step": 17583 }, { "epoch": 418.6686567164179, "grad_norm": 36.41569900512695, "learning_rate": 9.565759637188209e-06, "loss": 33.0713, "step": 17584 }, { "epoch": 418.6925373134328, "grad_norm": 33.75836181640625, "learning_rate": 9.565192743764174e-06, "loss": 32.9451, "step": 17585 }, { "epoch": 418.7164179104478, "grad_norm": 36.547908782958984, "learning_rate": 9.564625850340137e-06, "loss": 31.6862, "step": 17586 }, { "epoch": 418.7402985074627, "grad_norm": 32.715938568115234, "learning_rate": 9.5640589569161e-06, "loss": 32.9217, "step": 17587 }, { "epoch": 418.7641791044776, "grad_norm": 44.12977981567383, "learning_rate": 9.563492063492064e-06, "loss": 32.9309, "step": 17588 }, { "epoch": 418.78805970149256, "grad_norm": 38.17497634887695, "learning_rate": 9.562925170068029e-06, "loss": 32.5588, "step": 17589 }, { "epoch": 418.81194029850747, "grad_norm": 41.1082649230957, "learning_rate": 9.562358276643991e-06, "loss": 33.0644, "step": 17590 }, { "epoch": 418.8358208955224, "grad_norm": 35.24775314331055, "learning_rate": 9.561791383219954e-06, "loss": 33.8196, "step": 17591 }, { "epoch": 418.85970149253734, "grad_norm": 37.03262710571289, "learning_rate": 9.561224489795919e-06, "loss": 32.1539, "step": 17592 }, { "epoch": 418.88358208955225, "grad_norm": 33.93702697753906, "learning_rate": 9.560657596371884e-06, "loss": 32.4094, "step": 17593 }, { "epoch": 418.90746268656716, "grad_norm": 44.84053421020508, "learning_rate": 9.560090702947846e-06, "loss": 33.1483, "step": 17594 }, { "epoch": 418.93134328358207, "grad_norm": 42.49712371826172, "learning_rate": 9.559523809523811e-06, "loss": 33.1746, "step": 17595 }, { "epoch": 418.95522388059703, "grad_norm": 36.922584533691406, "learning_rate": 9.558956916099774e-06, "loss": 33.5949, "step": 17596 }, { "epoch": 418.97910447761194, "grad_norm": 36.12288284301758, "learning_rate": 9.558390022675737e-06, "loss": 32.6467, "step": 17597 }, { "epoch": 419.0, "grad_norm": 37.515892028808594, "learning_rate": 9.557823129251701e-06, "loss": 28.5415, "step": 17598 }, { "epoch": 419.0238805970149, "grad_norm": 34.370445251464844, "learning_rate": 9.557256235827666e-06, "loss": 33.8195, "step": 17599 }, { "epoch": 419.0477611940299, "grad_norm": 41.703155517578125, "learning_rate": 9.556689342403629e-06, "loss": 32.9765, "step": 17600 }, { "epoch": 419.0716417910448, "grad_norm": 39.18743896484375, "learning_rate": 9.556122448979592e-06, "loss": 31.2492, "step": 17601 }, { "epoch": 419.0955223880597, "grad_norm": 39.28944778442383, "learning_rate": 9.555555555555556e-06, "loss": 32.1479, "step": 17602 }, { "epoch": 419.1194029850746, "grad_norm": 35.05729675292969, "learning_rate": 9.55498866213152e-06, "loss": 32.1249, "step": 17603 }, { "epoch": 419.14328358208957, "grad_norm": 40.646728515625, "learning_rate": 9.554421768707484e-06, "loss": 33.3527, "step": 17604 }, { "epoch": 419.1671641791045, "grad_norm": 35.161800384521484, "learning_rate": 9.553854875283447e-06, "loss": 31.7491, "step": 17605 }, { "epoch": 419.1910447761194, "grad_norm": 41.97065353393555, "learning_rate": 9.553287981859411e-06, "loss": 33.5065, "step": 17606 }, { "epoch": 419.21492537313435, "grad_norm": 39.23545455932617, "learning_rate": 9.552721088435374e-06, "loss": 33.4734, "step": 17607 }, { "epoch": 419.23880597014926, "grad_norm": 43.604305267333984, "learning_rate": 9.552154195011339e-06, "loss": 32.548, "step": 17608 }, { "epoch": 419.26268656716417, "grad_norm": 36.96836853027344, "learning_rate": 9.551587301587304e-06, "loss": 33.0143, "step": 17609 }, { "epoch": 419.28656716417913, "grad_norm": 40.2878532409668, "learning_rate": 9.551020408163266e-06, "loss": 31.9072, "step": 17610 }, { "epoch": 419.31044776119404, "grad_norm": 35.68461990356445, "learning_rate": 9.55045351473923e-06, "loss": 32.882, "step": 17611 }, { "epoch": 419.33432835820895, "grad_norm": 41.77913284301758, "learning_rate": 9.549886621315192e-06, "loss": 31.5042, "step": 17612 }, { "epoch": 419.35820895522386, "grad_norm": 35.19708251953125, "learning_rate": 9.549319727891157e-06, "loss": 32.4653, "step": 17613 }, { "epoch": 419.3820895522388, "grad_norm": 41.49123764038086, "learning_rate": 9.548752834467121e-06, "loss": 31.4312, "step": 17614 }, { "epoch": 419.40597014925373, "grad_norm": 32.89275360107422, "learning_rate": 9.548185941043084e-06, "loss": 33.208, "step": 17615 }, { "epoch": 419.42985074626864, "grad_norm": 40.672149658203125, "learning_rate": 9.547619047619049e-06, "loss": 34.5644, "step": 17616 }, { "epoch": 419.4537313432836, "grad_norm": 35.49092483520508, "learning_rate": 9.547052154195012e-06, "loss": 33.4505, "step": 17617 }, { "epoch": 419.4776119402985, "grad_norm": 43.761478424072266, "learning_rate": 9.546485260770976e-06, "loss": 32.4576, "step": 17618 }, { "epoch": 419.5014925373134, "grad_norm": 41.06398010253906, "learning_rate": 9.54591836734694e-06, "loss": 32.5994, "step": 17619 }, { "epoch": 419.52537313432833, "grad_norm": 40.87247085571289, "learning_rate": 9.545351473922904e-06, "loss": 33.1939, "step": 17620 }, { "epoch": 419.5492537313433, "grad_norm": 38.59591293334961, "learning_rate": 9.544784580498867e-06, "loss": 33.1224, "step": 17621 }, { "epoch": 419.5731343283582, "grad_norm": 39.23366928100586, "learning_rate": 9.54421768707483e-06, "loss": 33.0341, "step": 17622 }, { "epoch": 419.5970149253731, "grad_norm": 36.526763916015625, "learning_rate": 9.543650793650794e-06, "loss": 31.8997, "step": 17623 }, { "epoch": 419.6208955223881, "grad_norm": 40.675716400146484, "learning_rate": 9.543083900226759e-06, "loss": 32.2546, "step": 17624 }, { "epoch": 419.644776119403, "grad_norm": 37.90499496459961, "learning_rate": 9.542517006802722e-06, "loss": 32.4246, "step": 17625 }, { "epoch": 419.6686567164179, "grad_norm": NaN, "learning_rate": 9.541950113378685e-06, "loss": 51.5688, "step": 17626 }, { "epoch": 419.6925373134328, "grad_norm": 23.699739456176758, "learning_rate": 9.541950113378685e-06, "loss": 33.5107, "step": 17627 }, { "epoch": 419.7164179104478, "grad_norm": 49.75596237182617, "learning_rate": 9.54138321995465e-06, "loss": 33.7643, "step": 17628 }, { "epoch": 419.7402985074627, "grad_norm": 34.00912094116211, "learning_rate": 9.540816326530612e-06, "loss": 34.7164, "step": 17629 }, { "epoch": 419.7641791044776, "grad_norm": 36.16143035888672, "learning_rate": 9.540249433106577e-06, "loss": 34.8689, "step": 17630 }, { "epoch": 419.78805970149256, "grad_norm": 30.307167053222656, "learning_rate": 9.539682539682541e-06, "loss": 33.6437, "step": 17631 }, { "epoch": 419.81194029850747, "grad_norm": 29.870691299438477, "learning_rate": 9.539115646258504e-06, "loss": 33.9267, "step": 17632 }, { "epoch": 419.8358208955224, "grad_norm": 30.42253875732422, "learning_rate": 9.538548752834467e-06, "loss": 33.6009, "step": 17633 }, { "epoch": 419.85970149253734, "grad_norm": 26.70201873779297, "learning_rate": 9.537981859410432e-06, "loss": 34.1938, "step": 17634 }, { "epoch": 419.88358208955225, "grad_norm": 38.59564208984375, "learning_rate": 9.537414965986396e-06, "loss": 33.16, "step": 17635 }, { "epoch": 419.90746268656716, "grad_norm": 25.168344497680664, "learning_rate": 9.53684807256236e-06, "loss": 34.469, "step": 17636 }, { "epoch": 419.93134328358207, "grad_norm": 52.04273986816406, "learning_rate": 9.536281179138322e-06, "loss": 33.9904, "step": 17637 }, { "epoch": 419.95522388059703, "grad_norm": 37.60790252685547, "learning_rate": 9.535714285714287e-06, "loss": 34.7973, "step": 17638 }, { "epoch": 419.97910447761194, "grad_norm": 55.68663024902344, "learning_rate": 9.53514739229025e-06, "loss": 35.7397, "step": 17639 }, { "epoch": 420.0, "grad_norm": 51.74850845336914, "learning_rate": 9.534580498866214e-06, "loss": 31.1879, "step": 17640 }, { "epoch": 420.0, "step": 17640, "total_flos": 8.671153071633885e+17, "train_loss": 1.4952584476427697, "train_runtime": 25672.4343, "train_samples_per_second": 87.559, "train_steps_per_second": 0.687 }, { "epoch": 420.0238805970149, "grad_norm": 112.91419219970703, "learning_rate": 1e-05, "loss": 33.9151, "step": 17641 }, { "epoch": 420.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999458874458874e-06, "loss": 40.5489, "step": 17642 }, { "epoch": 420.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999458874458874e-06, "loss": 39.5182, "step": 17643 }, { "epoch": 420.0955223880597, "grad_norm": 402.99609375, "learning_rate": 9.999458874458874e-06, "loss": 38.8643, "step": 17644 }, { "epoch": 420.1194029850746, "grad_norm": 222.9229736328125, "learning_rate": 9.99891774891775e-06, "loss": 36.7847, "step": 17645 }, { "epoch": 420.14328358208957, "grad_norm": 73.6364974975586, "learning_rate": 9.998376623376625e-06, "loss": 34.5185, "step": 17646 }, { "epoch": 420.1671641791045, "grad_norm": 98.36174011230469, "learning_rate": 9.997835497835499e-06, "loss": 34.3682, "step": 17647 }, { "epoch": 420.1910447761194, "grad_norm": 56.974830627441406, "learning_rate": 9.997294372294372e-06, "loss": 33.9145, "step": 17648 }, { "epoch": 420.21492537313435, "grad_norm": 47.68419647216797, "learning_rate": 9.996753246753248e-06, "loss": 32.737, "step": 17649 }, { "epoch": 420.23880597014926, "grad_norm": 38.63371276855469, "learning_rate": 9.996212121212123e-06, "loss": 32.4193, "step": 17650 }, { "epoch": 420.26268656716417, "grad_norm": 35.25867462158203, "learning_rate": 9.995670995670996e-06, "loss": 33.1348, "step": 17651 }, { "epoch": 420.28656716417913, "grad_norm": 31.282548904418945, "learning_rate": 9.99512987012987e-06, "loss": 32.6214, "step": 17652 }, { "epoch": 420.31044776119404, "grad_norm": 29.480207443237305, "learning_rate": 9.994588744588745e-06, "loss": 31.8834, "step": 17653 }, { "epoch": 420.33432835820895, "grad_norm": 33.77840042114258, "learning_rate": 9.99404761904762e-06, "loss": 31.733, "step": 17654 }, { "epoch": 420.35820895522386, "grad_norm": 34.8023567199707, "learning_rate": 9.993506493506494e-06, "loss": 31.583, "step": 17655 }, { "epoch": 420.3820895522388, "grad_norm": 23.891918182373047, "learning_rate": 9.99296536796537e-06, "loss": 31.7572, "step": 17656 }, { "epoch": 420.40597014925373, "grad_norm": 24.45165252685547, "learning_rate": 9.992424242424243e-06, "loss": 30.9273, "step": 17657 }, { "epoch": 420.42985074626864, "grad_norm": 31.686626434326172, "learning_rate": 9.991883116883118e-06, "loss": 31.7009, "step": 17658 }, { "epoch": 420.4537313432836, "grad_norm": 28.03240203857422, "learning_rate": 9.991341991341992e-06, "loss": 32.3674, "step": 17659 }, { "epoch": 420.4776119402985, "grad_norm": 19.185731887817383, "learning_rate": 9.990800865800867e-06, "loss": 31.2072, "step": 17660 }, { "epoch": 420.5014925373134, "grad_norm": 21.57698631286621, "learning_rate": 9.990259740259741e-06, "loss": 30.589, "step": 17661 }, { "epoch": 420.52537313432833, "grad_norm": 26.052366256713867, "learning_rate": 9.989718614718616e-06, "loss": 31.491, "step": 17662 }, { "epoch": 420.5492537313433, "grad_norm": 17.546358108520508, "learning_rate": 9.98917748917749e-06, "loss": 30.1266, "step": 17663 }, { "epoch": 420.5731343283582, "grad_norm": 20.863521575927734, "learning_rate": 9.988636363636365e-06, "loss": 31.4594, "step": 17664 }, { "epoch": 420.5970149253731, "grad_norm": 21.719436645507812, "learning_rate": 9.988095238095239e-06, "loss": 30.7668, "step": 17665 }, { "epoch": 420.6208955223881, "grad_norm": 20.507556915283203, "learning_rate": 9.987554112554112e-06, "loss": 31.5237, "step": 17666 }, { "epoch": 420.644776119403, "grad_norm": 16.809471130371094, "learning_rate": 9.987012987012988e-06, "loss": 31.3119, "step": 17667 }, { "epoch": 420.6686567164179, "grad_norm": 25.894765853881836, "learning_rate": 9.986471861471863e-06, "loss": 31.2617, "step": 17668 }, { "epoch": 420.6925373134328, "grad_norm": 22.232994079589844, "learning_rate": 9.985930735930737e-06, "loss": 30.2337, "step": 17669 }, { "epoch": 420.7164179104478, "grad_norm": 19.489809036254883, "learning_rate": 9.98538961038961e-06, "loss": 32.0539, "step": 17670 }, { "epoch": 420.7402985074627, "grad_norm": 21.0067195892334, "learning_rate": 9.984848484848485e-06, "loss": 30.9518, "step": 17671 }, { "epoch": 420.7641791044776, "grad_norm": 25.96683120727539, "learning_rate": 9.98430735930736e-06, "loss": 30.3011, "step": 17672 }, { "epoch": 420.78805970149256, "grad_norm": 18.228660583496094, "learning_rate": 9.983766233766234e-06, "loss": 30.681, "step": 17673 }, { "epoch": 420.81194029850747, "grad_norm": 20.957002639770508, "learning_rate": 9.983225108225108e-06, "loss": 31.5611, "step": 17674 }, { "epoch": 420.8358208955224, "grad_norm": 19.175439834594727, "learning_rate": 9.982683982683983e-06, "loss": 31.0533, "step": 17675 }, { "epoch": 420.85970149253734, "grad_norm": 20.846750259399414, "learning_rate": 9.982142857142858e-06, "loss": 30.1827, "step": 17676 }, { "epoch": 420.88358208955225, "grad_norm": 17.5195369720459, "learning_rate": 9.981601731601732e-06, "loss": 30.4414, "step": 17677 }, { "epoch": 420.90746268656716, "grad_norm": 18.69231605529785, "learning_rate": 9.981060606060606e-06, "loss": 30.6528, "step": 17678 }, { "epoch": 420.93134328358207, "grad_norm": 19.49618911743164, "learning_rate": 9.980519480519481e-06, "loss": 30.482, "step": 17679 }, { "epoch": 420.95522388059703, "grad_norm": 19.205490112304688, "learning_rate": 9.979978354978356e-06, "loss": 29.6978, "step": 17680 }, { "epoch": 420.97910447761194, "grad_norm": 19.25636100769043, "learning_rate": 9.97943722943723e-06, "loss": 30.7216, "step": 17681 }, { "epoch": 421.0, "grad_norm": 17.168264389038086, "learning_rate": 9.978896103896105e-06, "loss": 28.0752, "step": 17682 }, { "epoch": 421.0238805970149, "grad_norm": 21.812082290649414, "learning_rate": 9.978354978354979e-06, "loss": 31.1343, "step": 17683 }, { "epoch": 421.0477611940299, "grad_norm": 17.323999404907227, "learning_rate": 9.977813852813854e-06, "loss": 31.3371, "step": 17684 }, { "epoch": 421.0716417910448, "grad_norm": 19.48267936706543, "learning_rate": 9.977272727272728e-06, "loss": 30.5882, "step": 17685 }, { "epoch": 421.0955223880597, "grad_norm": 17.70973014831543, "learning_rate": 9.976731601731603e-06, "loss": 31.1488, "step": 17686 }, { "epoch": 421.1194029850746, "grad_norm": 19.419677734375, "learning_rate": 9.976190476190477e-06, "loss": 29.9038, "step": 17687 }, { "epoch": 421.14328358208957, "grad_norm": 18.19388198852539, "learning_rate": 9.975649350649352e-06, "loss": 30.389, "step": 17688 }, { "epoch": 421.1671641791045, "grad_norm": 16.10342025756836, "learning_rate": 9.975108225108225e-06, "loss": 30.582, "step": 17689 }, { "epoch": 421.1910447761194, "grad_norm": 19.07910919189453, "learning_rate": 9.9745670995671e-06, "loss": 29.9752, "step": 17690 }, { "epoch": 421.21492537313435, "grad_norm": 18.458965301513672, "learning_rate": 9.974025974025974e-06, "loss": 31.2172, "step": 17691 }, { "epoch": 421.23880597014926, "grad_norm": 18.080291748046875, "learning_rate": 9.97348484848485e-06, "loss": 30.7753, "step": 17692 }, { "epoch": 421.26268656716417, "grad_norm": 23.084182739257812, "learning_rate": 9.972943722943725e-06, "loss": 31.4107, "step": 17693 }, { "epoch": 421.28656716417913, "grad_norm": 16.90616798400879, "learning_rate": 9.972402597402599e-06, "loss": 30.7728, "step": 17694 }, { "epoch": 421.31044776119404, "grad_norm": 19.12290382385254, "learning_rate": 9.971861471861472e-06, "loss": 31.1052, "step": 17695 }, { "epoch": 421.33432835820895, "grad_norm": 22.74007225036621, "learning_rate": 9.971320346320347e-06, "loss": 31.7474, "step": 17696 }, { "epoch": 421.35820895522386, "grad_norm": 17.926822662353516, "learning_rate": 9.970779220779223e-06, "loss": 29.8698, "step": 17697 }, { "epoch": 421.3820895522388, "grad_norm": 20.645263671875, "learning_rate": 9.970238095238096e-06, "loss": 30.3212, "step": 17698 }, { "epoch": 421.40597014925373, "grad_norm": 22.62677574157715, "learning_rate": 9.96969696969697e-06, "loss": 31.3098, "step": 17699 }, { "epoch": 421.42985074626864, "grad_norm": 16.30235481262207, "learning_rate": 9.969155844155845e-06, "loss": 30.6176, "step": 17700 }, { "epoch": 421.4537313432836, "grad_norm": 16.102262496948242, "learning_rate": 9.96861471861472e-06, "loss": 30.7111, "step": 17701 }, { "epoch": 421.4776119402985, "grad_norm": 16.76392936706543, "learning_rate": 9.968073593073594e-06, "loss": 31.0407, "step": 17702 }, { "epoch": 421.5014925373134, "grad_norm": 20.659801483154297, "learning_rate": 9.967532467532468e-06, "loss": 31.4066, "step": 17703 }, { "epoch": 421.52537313432833, "grad_norm": 20.50784683227539, "learning_rate": 9.966991341991343e-06, "loss": 29.9563, "step": 17704 }, { "epoch": 421.5492537313433, "grad_norm": 19.23495864868164, "learning_rate": 9.966450216450217e-06, "loss": 31.179, "step": 17705 }, { "epoch": 421.5731343283582, "grad_norm": 18.347591400146484, "learning_rate": 9.965909090909092e-06, "loss": 30.8491, "step": 17706 }, { "epoch": 421.5970149253731, "grad_norm": 17.33802604675293, "learning_rate": 9.965367965367966e-06, "loss": 31.1655, "step": 17707 }, { "epoch": 421.6208955223881, "grad_norm": 17.760305404663086, "learning_rate": 9.964826839826841e-06, "loss": 30.5194, "step": 17708 }, { "epoch": 421.644776119403, "grad_norm": 23.50550079345703, "learning_rate": 9.964285714285714e-06, "loss": 31.2406, "step": 17709 }, { "epoch": 421.6686567164179, "grad_norm": 19.194673538208008, "learning_rate": 9.96374458874459e-06, "loss": 30.7916, "step": 17710 }, { "epoch": 421.6925373134328, "grad_norm": 17.148178100585938, "learning_rate": 9.963203463203463e-06, "loss": 28.8889, "step": 17711 }, { "epoch": 421.7164179104478, "grad_norm": 20.253847122192383, "learning_rate": 9.962662337662339e-06, "loss": 30.4494, "step": 17712 }, { "epoch": 421.7402985074627, "grad_norm": 21.947084426879883, "learning_rate": 9.962121212121212e-06, "loss": 31.4266, "step": 17713 }, { "epoch": 421.7641791044776, "grad_norm": 20.87897491455078, "learning_rate": 9.961580086580088e-06, "loss": 31.3864, "step": 17714 }, { "epoch": 421.78805970149256, "grad_norm": 17.126819610595703, "learning_rate": 9.961038961038963e-06, "loss": 30.0833, "step": 17715 }, { "epoch": 421.81194029850747, "grad_norm": 19.545730590820312, "learning_rate": 9.960497835497836e-06, "loss": 30.3197, "step": 17716 }, { "epoch": 421.8358208955224, "grad_norm": 20.319454193115234, "learning_rate": 9.95995670995671e-06, "loss": 30.0552, "step": 17717 }, { "epoch": 421.85970149253734, "grad_norm": 17.654926300048828, "learning_rate": 9.959415584415585e-06, "loss": 30.7586, "step": 17718 }, { "epoch": 421.88358208955225, "grad_norm": 15.611412048339844, "learning_rate": 9.95887445887446e-06, "loss": 30.4199, "step": 17719 }, { "epoch": 421.90746268656716, "grad_norm": NaN, "learning_rate": 9.958333333333334e-06, "loss": 29.7574, "step": 17720 }, { "epoch": 421.93134328358207, "grad_norm": 15.934208869934082, "learning_rate": 9.958333333333334e-06, "loss": 30.6046, "step": 17721 }, { "epoch": 421.95522388059703, "grad_norm": 17.66332244873047, "learning_rate": 9.957792207792208e-06, "loss": 30.948, "step": 17722 }, { "epoch": 421.97910447761194, "grad_norm": 15.901494979858398, "learning_rate": 9.957251082251083e-06, "loss": 31.1169, "step": 17723 }, { "epoch": 422.0, "grad_norm": 18.879749298095703, "learning_rate": 9.956709956709958e-06, "loss": 27.1391, "step": 17724 }, { "epoch": 422.0238805970149, "grad_norm": 22.32988739013672, "learning_rate": 9.956168831168832e-06, "loss": 31.3596, "step": 17725 }, { "epoch": 422.0477611940299, "grad_norm": 20.225967407226562, "learning_rate": 9.955627705627706e-06, "loss": 31.0335, "step": 17726 }, { "epoch": 422.0716417910448, "grad_norm": 19.20271110534668, "learning_rate": 9.955086580086581e-06, "loss": 30.2488, "step": 17727 }, { "epoch": 422.0955223880597, "grad_norm": 19.8067626953125, "learning_rate": 9.954545454545456e-06, "loss": 31.0425, "step": 17728 }, { "epoch": 422.1194029850746, "grad_norm": 22.654584884643555, "learning_rate": 9.95400432900433e-06, "loss": 30.037, "step": 17729 }, { "epoch": 422.14328358208957, "grad_norm": 19.72269058227539, "learning_rate": 9.953463203463203e-06, "loss": 31.6525, "step": 17730 }, { "epoch": 422.1671641791045, "grad_norm": 18.585247039794922, "learning_rate": 9.952922077922079e-06, "loss": 30.8078, "step": 17731 }, { "epoch": 422.1910447761194, "grad_norm": 16.761127471923828, "learning_rate": 9.952380952380954e-06, "loss": 31.3979, "step": 17732 }, { "epoch": 422.21492537313435, "grad_norm": 20.538742065429688, "learning_rate": 9.951839826839828e-06, "loss": 29.8246, "step": 17733 }, { "epoch": 422.23880597014926, "grad_norm": 20.14104461669922, "learning_rate": 9.951298701298701e-06, "loss": 29.3158, "step": 17734 }, { "epoch": 422.26268656716417, "grad_norm": 15.591094017028809, "learning_rate": 9.950757575757577e-06, "loss": 31.2207, "step": 17735 }, { "epoch": 422.28656716417913, "grad_norm": 16.661462783813477, "learning_rate": 9.950216450216452e-06, "loss": 32.3801, "step": 17736 }, { "epoch": 422.31044776119404, "grad_norm": 16.15082359313965, "learning_rate": 9.949675324675325e-06, "loss": 30.3467, "step": 17737 }, { "epoch": 422.33432835820895, "grad_norm": 18.46958351135254, "learning_rate": 9.949134199134199e-06, "loss": 29.7602, "step": 17738 }, { "epoch": 422.35820895522386, "grad_norm": 20.684654235839844, "learning_rate": 9.948593073593074e-06, "loss": 31.3437, "step": 17739 }, { "epoch": 422.3820895522388, "grad_norm": 16.216947555541992, "learning_rate": 9.94805194805195e-06, "loss": 29.3093, "step": 17740 }, { "epoch": 422.40597014925373, "grad_norm": 18.468616485595703, "learning_rate": 9.947510822510823e-06, "loss": 30.6165, "step": 17741 }, { "epoch": 422.42985074626864, "grad_norm": 17.252256393432617, "learning_rate": 9.946969696969699e-06, "loss": 31.1957, "step": 17742 }, { "epoch": 422.4537313432836, "grad_norm": 18.978557586669922, "learning_rate": 9.946428571428572e-06, "loss": 30.4812, "step": 17743 }, { "epoch": 422.4776119402985, "grad_norm": 23.810266494750977, "learning_rate": 9.945887445887446e-06, "loss": 31.4226, "step": 17744 }, { "epoch": 422.5014925373134, "grad_norm": 21.530662536621094, "learning_rate": 9.945346320346321e-06, "loss": 29.7931, "step": 17745 }, { "epoch": 422.52537313432833, "grad_norm": 17.577211380004883, "learning_rate": 9.944805194805196e-06, "loss": 30.1175, "step": 17746 }, { "epoch": 422.5492537313433, "grad_norm": 17.01324462890625, "learning_rate": 9.94426406926407e-06, "loss": 30.0424, "step": 17747 }, { "epoch": 422.5731343283582, "grad_norm": NaN, "learning_rate": 9.943722943722944e-06, "loss": 38.9605, "step": 17748 }, { "epoch": 422.5970149253731, "grad_norm": 22.119617462158203, "learning_rate": 9.943722943722944e-06, "loss": 30.4234, "step": 17749 }, { "epoch": 422.6208955223881, "grad_norm": 19.939777374267578, "learning_rate": 9.943181818181819e-06, "loss": 30.3345, "step": 17750 }, { "epoch": 422.644776119403, "grad_norm": 15.90744400024414, "learning_rate": 9.942640692640694e-06, "loss": 31.2696, "step": 17751 }, { "epoch": 422.6686567164179, "grad_norm": 19.290239334106445, "learning_rate": 9.942099567099568e-06, "loss": 30.276, "step": 17752 }, { "epoch": 422.6925373134328, "grad_norm": 30.07827377319336, "learning_rate": 9.941558441558441e-06, "loss": 30.3613, "step": 17753 }, { "epoch": 422.7164179104478, "grad_norm": 17.388504028320312, "learning_rate": 9.941017316017317e-06, "loss": 30.0253, "step": 17754 }, { "epoch": 422.7402985074627, "grad_norm": 21.055727005004883, "learning_rate": 9.940476190476192e-06, "loss": 30.8306, "step": 17755 }, { "epoch": 422.7641791044776, "grad_norm": 25.98221206665039, "learning_rate": 9.939935064935066e-06, "loss": 30.4435, "step": 17756 }, { "epoch": 422.78805970149256, "grad_norm": 17.11972427368164, "learning_rate": 9.939393939393939e-06, "loss": 30.8077, "step": 17757 }, { "epoch": 422.81194029850747, "grad_norm": 20.359371185302734, "learning_rate": 9.938852813852814e-06, "loss": 29.795, "step": 17758 }, { "epoch": 422.8358208955224, "grad_norm": 25.94600486755371, "learning_rate": 9.93831168831169e-06, "loss": 31.2549, "step": 17759 }, { "epoch": 422.85970149253734, "grad_norm": 16.56966781616211, "learning_rate": 9.937770562770563e-06, "loss": 29.1916, "step": 17760 }, { "epoch": 422.88358208955225, "grad_norm": 31.18387222290039, "learning_rate": 9.937229437229437e-06, "loss": 30.9677, "step": 17761 }, { "epoch": 422.90746268656716, "grad_norm": 18.86530303955078, "learning_rate": 9.936688311688312e-06, "loss": 29.9417, "step": 17762 }, { "epoch": 422.93134328358207, "grad_norm": 30.287748336791992, "learning_rate": 9.936147186147188e-06, "loss": 31.2346, "step": 17763 }, { "epoch": 422.95522388059703, "grad_norm": 22.606487274169922, "learning_rate": 9.935606060606061e-06, "loss": 30.5415, "step": 17764 }, { "epoch": 422.97910447761194, "grad_norm": 22.81735610961914, "learning_rate": 9.935064935064936e-06, "loss": 30.0585, "step": 17765 }, { "epoch": 423.0, "grad_norm": 24.056591033935547, "learning_rate": 9.93452380952381e-06, "loss": 26.9734, "step": 17766 }, { "epoch": 423.0238805970149, "grad_norm": 19.35657501220703, "learning_rate": 9.933982683982685e-06, "loss": 29.8204, "step": 17767 }, { "epoch": 423.0477611940299, "grad_norm": 30.62700843811035, "learning_rate": 9.933441558441559e-06, "loss": 31.759, "step": 17768 }, { "epoch": 423.0716417910448, "grad_norm": 22.67262840270996, "learning_rate": 9.932900432900434e-06, "loss": 29.912, "step": 17769 }, { "epoch": 423.0955223880597, "grad_norm": 19.989831924438477, "learning_rate": 9.932359307359308e-06, "loss": 30.7858, "step": 17770 }, { "epoch": 423.1194029850746, "grad_norm": 31.967315673828125, "learning_rate": 9.931818181818183e-06, "loss": 30.787, "step": 17771 }, { "epoch": 423.14328358208957, "grad_norm": 21.397275924682617, "learning_rate": 9.931277056277057e-06, "loss": 30.4314, "step": 17772 }, { "epoch": 423.1671641791045, "grad_norm": 35.429481506347656, "learning_rate": 9.930735930735932e-06, "loss": 29.9019, "step": 17773 }, { "epoch": 423.1910447761194, "grad_norm": 25.119787216186523, "learning_rate": 9.930194805194806e-06, "loss": 30.2204, "step": 17774 }, { "epoch": 423.21492537313435, "grad_norm": 28.95484733581543, "learning_rate": 9.929653679653681e-06, "loss": 30.3761, "step": 17775 }, { "epoch": 423.23880597014926, "grad_norm": 27.173364639282227, "learning_rate": 9.929112554112556e-06, "loss": 29.5577, "step": 17776 }, { "epoch": 423.26268656716417, "grad_norm": 28.711408615112305, "learning_rate": 9.92857142857143e-06, "loss": 31.2918, "step": 17777 }, { "epoch": 423.28656716417913, "grad_norm": 31.441675186157227, "learning_rate": 9.928030303030303e-06, "loss": 31.5406, "step": 17778 }, { "epoch": 423.31044776119404, "grad_norm": 23.445634841918945, "learning_rate": 9.927489177489179e-06, "loss": 30.5159, "step": 17779 }, { "epoch": 423.33432835820895, "grad_norm": 32.985877990722656, "learning_rate": 9.926948051948054e-06, "loss": 30.7025, "step": 17780 }, { "epoch": 423.35820895522386, "grad_norm": 22.22218894958496, "learning_rate": 9.926406926406928e-06, "loss": 28.941, "step": 17781 }, { "epoch": 423.3820895522388, "grad_norm": 34.304080963134766, "learning_rate": 9.925865800865801e-06, "loss": 30.4497, "step": 17782 }, { "epoch": 423.40597014925373, "grad_norm": 25.405498504638672, "learning_rate": 9.925324675324677e-06, "loss": 31.6932, "step": 17783 }, { "epoch": 423.42985074626864, "grad_norm": 32.971370697021484, "learning_rate": 9.92478354978355e-06, "loss": 30.6665, "step": 17784 }, { "epoch": 423.4537313432836, "grad_norm": 21.468421936035156, "learning_rate": 9.924242424242425e-06, "loss": 29.4264, "step": 17785 }, { "epoch": 423.4776119402985, "grad_norm": 31.87540626525879, "learning_rate": 9.923701298701299e-06, "loss": 30.6439, "step": 17786 }, { "epoch": 423.5014925373134, "grad_norm": 21.964067459106445, "learning_rate": 9.923160173160173e-06, "loss": 31.0809, "step": 17787 }, { "epoch": 423.52537313432833, "grad_norm": 34.96088409423828, "learning_rate": 9.922619047619048e-06, "loss": 29.8497, "step": 17788 }, { "epoch": 423.5492537313433, "grad_norm": 24.022890090942383, "learning_rate": 9.922077922077923e-06, "loss": 30.354, "step": 17789 }, { "epoch": 423.5731343283582, "grad_norm": 39.60622787475586, "learning_rate": 9.921536796536797e-06, "loss": 29.1475, "step": 17790 }, { "epoch": 423.5970149253731, "grad_norm": 28.349515914916992, "learning_rate": 9.920995670995672e-06, "loss": 31.3577, "step": 17791 }, { "epoch": 423.6208955223881, "grad_norm": 33.882591247558594, "learning_rate": 9.920454545454546e-06, "loss": 29.6474, "step": 17792 }, { "epoch": 423.644776119403, "grad_norm": 27.70705223083496, "learning_rate": 9.919913419913421e-06, "loss": 31.2927, "step": 17793 }, { "epoch": 423.6686567164179, "grad_norm": 35.434532165527344, "learning_rate": 9.919372294372295e-06, "loss": 29.8524, "step": 17794 }, { "epoch": 423.6925373134328, "grad_norm": NaN, "learning_rate": 9.91883116883117e-06, "loss": 52.2435, "step": 17795 }, { "epoch": 423.7164179104478, "grad_norm": 26.123586654663086, "learning_rate": 9.91883116883117e-06, "loss": 31.4211, "step": 17796 }, { "epoch": 423.7402985074627, "grad_norm": 38.33211135864258, "learning_rate": 9.918290043290044e-06, "loss": 30.4635, "step": 17797 }, { "epoch": 423.7641791044776, "grad_norm": 30.718141555786133, "learning_rate": 9.917748917748919e-06, "loss": 29.4099, "step": 17798 }, { "epoch": 423.78805970149256, "grad_norm": 34.1279411315918, "learning_rate": 9.917207792207792e-06, "loss": 30.2232, "step": 17799 }, { "epoch": 423.81194029850747, "grad_norm": 30.384740829467773, "learning_rate": 9.916666666666668e-06, "loss": 30.7991, "step": 17800 }, { "epoch": 423.8358208955224, "grad_norm": 29.308504104614258, "learning_rate": 9.916125541125541e-06, "loss": 31.0372, "step": 17801 }, { "epoch": 423.85970149253734, "grad_norm": 27.05010223388672, "learning_rate": 9.915584415584417e-06, "loss": 30.4624, "step": 17802 }, { "epoch": 423.88358208955225, "grad_norm": 26.148622512817383, "learning_rate": 9.915043290043292e-06, "loss": 30.948, "step": 17803 }, { "epoch": 423.90746268656716, "grad_norm": 26.995044708251953, "learning_rate": 9.914502164502166e-06, "loss": 30.5287, "step": 17804 }, { "epoch": 423.93134328358207, "grad_norm": 18.846092224121094, "learning_rate": 9.913961038961039e-06, "loss": 30.8986, "step": 17805 }, { "epoch": 423.95522388059703, "grad_norm": 37.53959274291992, "learning_rate": 9.913419913419914e-06, "loss": 30.9469, "step": 17806 }, { "epoch": 423.97910447761194, "grad_norm": 27.74464988708496, "learning_rate": 9.91287878787879e-06, "loss": 30.7162, "step": 17807 }, { "epoch": 424.0, "grad_norm": 35.00090789794922, "learning_rate": 9.912337662337663e-06, "loss": 27.6168, "step": 17808 }, { "epoch": 424.0238805970149, "grad_norm": 33.48642349243164, "learning_rate": 9.911796536796537e-06, "loss": 29.8008, "step": 17809 }, { "epoch": 424.0477611940299, "grad_norm": 35.65671157836914, "learning_rate": 9.911255411255412e-06, "loss": 29.1094, "step": 17810 }, { "epoch": 424.0716417910448, "grad_norm": 30.726531982421875, "learning_rate": 9.910714285714288e-06, "loss": 31.0795, "step": 17811 }, { "epoch": 424.0955223880597, "grad_norm": 35.1904411315918, "learning_rate": 9.910173160173161e-06, "loss": 30.6741, "step": 17812 }, { "epoch": 424.1194029850746, "grad_norm": 26.820903778076172, "learning_rate": 9.909632034632035e-06, "loss": 31.4292, "step": 17813 }, { "epoch": 424.14328358208957, "grad_norm": 39.136512756347656, "learning_rate": 9.90909090909091e-06, "loss": 30.6442, "step": 17814 }, { "epoch": 424.1671641791045, "grad_norm": 33.135902404785156, "learning_rate": 9.908549783549785e-06, "loss": 31.1797, "step": 17815 }, { "epoch": 424.1910447761194, "grad_norm": 33.562984466552734, "learning_rate": 9.908008658008659e-06, "loss": 31.4799, "step": 17816 }, { "epoch": 424.21492537313435, "grad_norm": 31.27215576171875, "learning_rate": 9.907467532467533e-06, "loss": 30.1781, "step": 17817 }, { "epoch": 424.23880597014926, "grad_norm": 31.56283950805664, "learning_rate": 9.906926406926408e-06, "loss": 30.6477, "step": 17818 }, { "epoch": 424.26268656716417, "grad_norm": 29.7825927734375, "learning_rate": 9.906385281385283e-06, "loss": 28.9348, "step": 17819 }, { "epoch": 424.28656716417913, "grad_norm": 35.68899917602539, "learning_rate": 9.905844155844157e-06, "loss": 30.0048, "step": 17820 }, { "epoch": 424.31044776119404, "grad_norm": 30.14948844909668, "learning_rate": 9.90530303030303e-06, "loss": 30.2308, "step": 17821 }, { "epoch": 424.33432835820895, "grad_norm": 38.211917877197266, "learning_rate": 9.904761904761906e-06, "loss": 31.5832, "step": 17822 }, { "epoch": 424.35820895522386, "grad_norm": 34.006839752197266, "learning_rate": 9.904220779220781e-06, "loss": 29.7783, "step": 17823 }, { "epoch": 424.3820895522388, "grad_norm": 34.897071838378906, "learning_rate": 9.903679653679655e-06, "loss": 29.6908, "step": 17824 }, { "epoch": 424.40597014925373, "grad_norm": 34.3736572265625, "learning_rate": 9.90313852813853e-06, "loss": 30.3572, "step": 17825 }, { "epoch": 424.42985074626864, "grad_norm": 31.942798614501953, "learning_rate": 9.902597402597403e-06, "loss": 30.1564, "step": 17826 }, { "epoch": 424.4537313432836, "grad_norm": 28.030567169189453, "learning_rate": 9.902056277056277e-06, "loss": 31.4503, "step": 17827 }, { "epoch": 424.4776119402985, "grad_norm": 34.852108001708984, "learning_rate": 9.901515151515152e-06, "loss": 30.7671, "step": 17828 }, { "epoch": 424.5014925373134, "grad_norm": 31.828214645385742, "learning_rate": 9.900974025974028e-06, "loss": 30.4066, "step": 17829 }, { "epoch": 424.52537313432833, "grad_norm": 35.93915557861328, "learning_rate": 9.900432900432901e-06, "loss": 30.2484, "step": 17830 }, { "epoch": 424.5492537313433, "grad_norm": 31.057619094848633, "learning_rate": 9.899891774891775e-06, "loss": 29.3562, "step": 17831 }, { "epoch": 424.5731343283582, "grad_norm": 32.44815444946289, "learning_rate": 9.89935064935065e-06, "loss": 30.7158, "step": 17832 }, { "epoch": 424.5970149253731, "grad_norm": 29.92171859741211, "learning_rate": 9.898809523809525e-06, "loss": 31.3564, "step": 17833 }, { "epoch": 424.6208955223881, "grad_norm": 35.34208679199219, "learning_rate": 9.898268398268399e-06, "loss": 30.372, "step": 17834 }, { "epoch": 424.644776119403, "grad_norm": 32.69564437866211, "learning_rate": 9.897727272727273e-06, "loss": 31.1831, "step": 17835 }, { "epoch": 424.6686567164179, "grad_norm": 32.14631652832031, "learning_rate": 9.897186147186148e-06, "loss": 30.3639, "step": 17836 }, { "epoch": 424.6925373134328, "grad_norm": 28.59247398376465, "learning_rate": 9.896645021645023e-06, "loss": 30.2063, "step": 17837 }, { "epoch": 424.7164179104478, "grad_norm": 37.333717346191406, "learning_rate": 9.896103896103897e-06, "loss": 30.7617, "step": 17838 }, { "epoch": 424.7402985074627, "grad_norm": NaN, "learning_rate": 9.89556277056277e-06, "loss": 52.2572, "step": 17839 }, { "epoch": 424.7641791044776, "grad_norm": 30.809633255004883, "learning_rate": 9.89556277056277e-06, "loss": 29.9285, "step": 17840 }, { "epoch": 424.78805970149256, "grad_norm": 31.57054901123047, "learning_rate": 9.895021645021646e-06, "loss": 30.6814, "step": 17841 }, { "epoch": 424.81194029850747, "grad_norm": 27.44460105895996, "learning_rate": 9.894480519480521e-06, "loss": 29.5906, "step": 17842 }, { "epoch": 424.8358208955224, "grad_norm": 35.57521057128906, "learning_rate": 9.893939393939395e-06, "loss": 29.9667, "step": 17843 }, { "epoch": 424.85970149253734, "grad_norm": 31.476011276245117, "learning_rate": 9.893398268398268e-06, "loss": 30.0742, "step": 17844 }, { "epoch": 424.88358208955225, "grad_norm": 33.79240036010742, "learning_rate": 9.892857142857143e-06, "loss": 31.2717, "step": 17845 }, { "epoch": 424.90746268656716, "grad_norm": 29.751890182495117, "learning_rate": 9.892316017316019e-06, "loss": 30.2951, "step": 17846 }, { "epoch": 424.93134328358207, "grad_norm": 32.82600402832031, "learning_rate": 9.891774891774892e-06, "loss": 31.0297, "step": 17847 }, { "epoch": 424.95522388059703, "grad_norm": 29.788423538208008, "learning_rate": 9.891233766233766e-06, "loss": 31.3126, "step": 17848 }, { "epoch": 424.97910447761194, "grad_norm": 32.77175521850586, "learning_rate": 9.890692640692641e-06, "loss": 29.8603, "step": 17849 }, { "epoch": 425.0, "grad_norm": 26.2161922454834, "learning_rate": 9.890151515151517e-06, "loss": 26.7776, "step": 17850 }, { "epoch": 425.0238805970149, "grad_norm": 34.68898391723633, "learning_rate": 9.88961038961039e-06, "loss": 30.9171, "step": 17851 }, { "epoch": 425.0477611940299, "grad_norm": 30.301311492919922, "learning_rate": 9.889069264069265e-06, "loss": 30.7368, "step": 17852 }, { "epoch": 425.0716417910448, "grad_norm": 37.42303466796875, "learning_rate": 9.888528138528139e-06, "loss": 30.7461, "step": 17853 }, { "epoch": 425.0955223880597, "grad_norm": 34.15085983276367, "learning_rate": 9.887987012987014e-06, "loss": 30.8722, "step": 17854 }, { "epoch": 425.1194029850746, "grad_norm": 28.95452880859375, "learning_rate": 9.887445887445888e-06, "loss": 29.8098, "step": 17855 }, { "epoch": 425.14328358208957, "grad_norm": 30.111637115478516, "learning_rate": 9.886904761904763e-06, "loss": 30.7687, "step": 17856 }, { "epoch": 425.1671641791045, "grad_norm": 31.200292587280273, "learning_rate": 9.886363636363637e-06, "loss": 29.3525, "step": 17857 }, { "epoch": 425.1910447761194, "grad_norm": 25.285808563232422, "learning_rate": 9.885822510822512e-06, "loss": 29.628, "step": 17858 }, { "epoch": 425.21492537313435, "grad_norm": 36.19496536254883, "learning_rate": 9.885281385281386e-06, "loss": 30.054, "step": 17859 }, { "epoch": 425.23880597014926, "grad_norm": 29.875398635864258, "learning_rate": 9.884740259740261e-06, "loss": 31.2073, "step": 17860 }, { "epoch": 425.26268656716417, "grad_norm": 33.963462829589844, "learning_rate": 9.884199134199135e-06, "loss": 30.4521, "step": 17861 }, { "epoch": 425.28656716417913, "grad_norm": 33.140625, "learning_rate": 9.88365800865801e-06, "loss": 30.9789, "step": 17862 }, { "epoch": 425.31044776119404, "grad_norm": 33.85870361328125, "learning_rate": 9.883116883116885e-06, "loss": 30.4421, "step": 17863 }, { "epoch": 425.33432835820895, "grad_norm": 29.22146987915039, "learning_rate": 9.882575757575759e-06, "loss": 30.8357, "step": 17864 }, { "epoch": 425.35820895522386, "grad_norm": 30.87940788269043, "learning_rate": 9.882034632034632e-06, "loss": 29.9467, "step": 17865 }, { "epoch": 425.3820895522388, "grad_norm": 27.88400650024414, "learning_rate": 9.881493506493506e-06, "loss": 29.1242, "step": 17866 }, { "epoch": 425.40597014925373, "grad_norm": 32.36547088623047, "learning_rate": 9.880952380952381e-06, "loss": 30.4286, "step": 17867 }, { "epoch": 425.42985074626864, "grad_norm": 31.14236068725586, "learning_rate": 9.880411255411257e-06, "loss": 30.7921, "step": 17868 }, { "epoch": 425.4537313432836, "grad_norm": 34.59557342529297, "learning_rate": 9.87987012987013e-06, "loss": 30.5107, "step": 17869 }, { "epoch": 425.4776119402985, "grad_norm": 29.180438995361328, "learning_rate": 9.879329004329004e-06, "loss": 30.3648, "step": 17870 }, { "epoch": 425.5014925373134, "grad_norm": 39.44462966918945, "learning_rate": 9.87878787878788e-06, "loss": 30.5324, "step": 17871 }, { "epoch": 425.52537313432833, "grad_norm": 34.72891616821289, "learning_rate": 9.878246753246754e-06, "loss": 30.7565, "step": 17872 }, { "epoch": 425.5492537313433, "grad_norm": 30.731353759765625, "learning_rate": 9.877705627705628e-06, "loss": 30.7171, "step": 17873 }, { "epoch": 425.5731343283582, "grad_norm": 30.493850708007812, "learning_rate": 9.877164502164503e-06, "loss": 30.4339, "step": 17874 }, { "epoch": 425.5970149253731, "grad_norm": 32.21706008911133, "learning_rate": 9.876623376623377e-06, "loss": 30.1464, "step": 17875 }, { "epoch": 425.6208955223881, "grad_norm": 30.330780029296875, "learning_rate": 9.876082251082252e-06, "loss": 30.5009, "step": 17876 }, { "epoch": 425.644776119403, "grad_norm": 32.3734016418457, "learning_rate": 9.875541125541126e-06, "loss": 30.3951, "step": 17877 }, { "epoch": 425.6686567164179, "grad_norm": 29.343202590942383, "learning_rate": 9.875000000000001e-06, "loss": 30.2334, "step": 17878 }, { "epoch": 425.6925373134328, "grad_norm": 29.87128257751465, "learning_rate": 9.874458874458875e-06, "loss": 30.1809, "step": 17879 }, { "epoch": 425.7164179104478, "grad_norm": 27.3814754486084, "learning_rate": 9.87391774891775e-06, "loss": 30.4723, "step": 17880 }, { "epoch": 425.7402985074627, "grad_norm": 33.278907775878906, "learning_rate": 9.873376623376624e-06, "loss": 29.45, "step": 17881 }, { "epoch": 425.7641791044776, "grad_norm": 28.267608642578125, "learning_rate": 9.872835497835499e-06, "loss": 30.5861, "step": 17882 }, { "epoch": 425.78805970149256, "grad_norm": 33.01785659790039, "learning_rate": 9.872294372294373e-06, "loss": 30.4724, "step": 17883 }, { "epoch": 425.81194029850747, "grad_norm": 29.440507888793945, "learning_rate": 9.871753246753248e-06, "loss": 29.078, "step": 17884 }, { "epoch": 425.8358208955224, "grad_norm": 31.783506393432617, "learning_rate": 9.871212121212121e-06, "loss": 30.31, "step": 17885 }, { "epoch": 425.85970149253734, "grad_norm": 27.963035583496094, "learning_rate": 9.870670995670997e-06, "loss": 31.2888, "step": 17886 }, { "epoch": 425.88358208955225, "grad_norm": 32.07109069824219, "learning_rate": 9.87012987012987e-06, "loss": 31.0465, "step": 17887 }, { "epoch": 425.90746268656716, "grad_norm": 27.297204971313477, "learning_rate": 9.869588744588746e-06, "loss": 30.2904, "step": 17888 }, { "epoch": 425.93134328358207, "grad_norm": 38.54228210449219, "learning_rate": 9.869047619047621e-06, "loss": 31.458, "step": 17889 }, { "epoch": 425.95522388059703, "grad_norm": 31.75337028503418, "learning_rate": 9.868506493506495e-06, "loss": 29.7941, "step": 17890 }, { "epoch": 425.97910447761194, "grad_norm": 34.617706298828125, "learning_rate": 9.867965367965368e-06, "loss": 30.7023, "step": 17891 }, { "epoch": 426.0, "grad_norm": 26.294721603393555, "learning_rate": 9.867424242424243e-06, "loss": 26.6909, "step": 17892 }, { "epoch": 426.0238805970149, "grad_norm": 30.75958824157715, "learning_rate": 9.866883116883119e-06, "loss": 31.2395, "step": 17893 }, { "epoch": 426.0477611940299, "grad_norm": 25.127758026123047, "learning_rate": 9.866341991341992e-06, "loss": 29.0264, "step": 17894 }, { "epoch": 426.0716417910448, "grad_norm": 33.18397903442383, "learning_rate": 9.865800865800866e-06, "loss": 30.1247, "step": 17895 }, { "epoch": 426.0955223880597, "grad_norm": 29.049959182739258, "learning_rate": 9.865259740259741e-06, "loss": 30.354, "step": 17896 }, { "epoch": 426.1194029850746, "grad_norm": 31.164453506469727, "learning_rate": 9.864718614718617e-06, "loss": 30.3468, "step": 17897 }, { "epoch": 426.14328358208957, "grad_norm": 28.106958389282227, "learning_rate": 9.86417748917749e-06, "loss": 30.1245, "step": 17898 }, { "epoch": 426.1671641791045, "grad_norm": 36.262516021728516, "learning_rate": 9.863636363636364e-06, "loss": 30.0848, "step": 17899 }, { "epoch": 426.1910447761194, "grad_norm": 28.52217674255371, "learning_rate": 9.863095238095239e-06, "loss": 30.2682, "step": 17900 }, { "epoch": 426.21492537313435, "grad_norm": 34.440555572509766, "learning_rate": 9.862554112554114e-06, "loss": 29.7051, "step": 17901 }, { "epoch": 426.23880597014926, "grad_norm": 28.033153533935547, "learning_rate": 9.862012987012988e-06, "loss": 30.5083, "step": 17902 }, { "epoch": 426.26268656716417, "grad_norm": 34.20317459106445, "learning_rate": 9.861471861471862e-06, "loss": 31.9734, "step": 17903 }, { "epoch": 426.28656716417913, "grad_norm": 27.240867614746094, "learning_rate": 9.860930735930737e-06, "loss": 30.4648, "step": 17904 }, { "epoch": 426.31044776119404, "grad_norm": 33.89701461791992, "learning_rate": 9.86038961038961e-06, "loss": 31.649, "step": 17905 }, { "epoch": 426.33432835820895, "grad_norm": 31.373794555664062, "learning_rate": 9.859848484848486e-06, "loss": 29.3744, "step": 17906 }, { "epoch": 426.35820895522386, "grad_norm": 34.37152099609375, "learning_rate": 9.85930735930736e-06, "loss": 29.9243, "step": 17907 }, { "epoch": 426.3820895522388, "grad_norm": 28.36467170715332, "learning_rate": 9.858766233766235e-06, "loss": 31.0966, "step": 17908 }, { "epoch": 426.40597014925373, "grad_norm": 30.59943962097168, "learning_rate": 9.858225108225108e-06, "loss": 29.8116, "step": 17909 }, { "epoch": 426.42985074626864, "grad_norm": 26.82291030883789, "learning_rate": 9.857683982683984e-06, "loss": 30.8499, "step": 17910 }, { "epoch": 426.4537313432836, "grad_norm": 31.674299240112305, "learning_rate": 9.857142857142859e-06, "loss": 30.462, "step": 17911 }, { "epoch": 426.4776119402985, "grad_norm": 19.16553497314453, "learning_rate": 9.856601731601732e-06, "loss": 29.5794, "step": 17912 }, { "epoch": 426.5014925373134, "grad_norm": 34.04121780395508, "learning_rate": 9.856060606060606e-06, "loss": 30.4825, "step": 17913 }, { "epoch": 426.52537313432833, "grad_norm": 25.557682037353516, "learning_rate": 9.855519480519481e-06, "loss": 30.9062, "step": 17914 }, { "epoch": 426.5492537313433, "grad_norm": 36.60789108276367, "learning_rate": 9.854978354978357e-06, "loss": 29.3358, "step": 17915 }, { "epoch": 426.5731343283582, "grad_norm": 37.37985610961914, "learning_rate": 9.85443722943723e-06, "loss": 30.7294, "step": 17916 }, { "epoch": 426.5970149253731, "grad_norm": 26.243133544921875, "learning_rate": 9.853896103896104e-06, "loss": 30.779, "step": 17917 }, { "epoch": 426.6208955223881, "grad_norm": 27.727338790893555, "learning_rate": 9.853354978354979e-06, "loss": 30.1231, "step": 17918 }, { "epoch": 426.644776119403, "grad_norm": 29.845748901367188, "learning_rate": 9.852813852813854e-06, "loss": 29.3638, "step": 17919 }, { "epoch": 426.6686567164179, "grad_norm": 23.25604820251465, "learning_rate": 9.852272727272728e-06, "loss": 30.5057, "step": 17920 }, { "epoch": 426.6925373134328, "grad_norm": 30.109285354614258, "learning_rate": 9.851731601731602e-06, "loss": 31.1297, "step": 17921 }, { "epoch": 426.7164179104478, "grad_norm": 22.888425827026367, "learning_rate": 9.851190476190477e-06, "loss": 30.2449, "step": 17922 }, { "epoch": 426.7402985074627, "grad_norm": 28.549480438232422, "learning_rate": 9.850649350649352e-06, "loss": 29.7814, "step": 17923 }, { "epoch": 426.7641791044776, "grad_norm": 26.971939086914062, "learning_rate": 9.850108225108226e-06, "loss": 30.6323, "step": 17924 }, { "epoch": 426.78805970149256, "grad_norm": 31.966575622558594, "learning_rate": 9.8495670995671e-06, "loss": 30.7119, "step": 17925 }, { "epoch": 426.81194029850747, "grad_norm": 27.02059555053711, "learning_rate": 9.849025974025975e-06, "loss": 31.3519, "step": 17926 }, { "epoch": 426.8358208955224, "grad_norm": 27.787981033325195, "learning_rate": 9.84848484848485e-06, "loss": 30.2269, "step": 17927 }, { "epoch": 426.85970149253734, "grad_norm": 26.651683807373047, "learning_rate": 9.847943722943724e-06, "loss": 31.4751, "step": 17928 }, { "epoch": 426.88358208955225, "grad_norm": 24.847896575927734, "learning_rate": 9.847402597402597e-06, "loss": 29.7752, "step": 17929 }, { "epoch": 426.90746268656716, "grad_norm": 19.923368453979492, "learning_rate": 9.846861471861473e-06, "loss": 30.3849, "step": 17930 }, { "epoch": 426.93134328358207, "grad_norm": 19.710582733154297, "learning_rate": 9.846320346320348e-06, "loss": 30.8312, "step": 17931 }, { "epoch": 426.95522388059703, "grad_norm": 23.509376525878906, "learning_rate": 9.845779220779221e-06, "loss": 30.1011, "step": 17932 }, { "epoch": 426.97910447761194, "grad_norm": 20.309120178222656, "learning_rate": 9.845238095238097e-06, "loss": 30.4874, "step": 17933 }, { "epoch": 427.0, "grad_norm": 17.632957458496094, "learning_rate": 9.84469696969697e-06, "loss": 26.1494, "step": 17934 }, { "epoch": 427.0238805970149, "grad_norm": 22.372276306152344, "learning_rate": 9.844155844155846e-06, "loss": 30.3935, "step": 17935 }, { "epoch": 427.0477611940299, "grad_norm": 15.630749702453613, "learning_rate": 9.84361471861472e-06, "loss": 29.0292, "step": 17936 }, { "epoch": 427.0716417910448, "grad_norm": 22.172382354736328, "learning_rate": 9.843073593073595e-06, "loss": 31.2715, "step": 17937 }, { "epoch": 427.0955223880597, "grad_norm": 23.19870376586914, "learning_rate": 9.842532467532468e-06, "loss": 30.2725, "step": 17938 }, { "epoch": 427.1194029850746, "grad_norm": 18.850271224975586, "learning_rate": 9.841991341991343e-06, "loss": 30.0919, "step": 17939 }, { "epoch": 427.14328358208957, "grad_norm": 25.05748748779297, "learning_rate": 9.841450216450217e-06, "loss": 30.5763, "step": 17940 }, { "epoch": 427.1671641791045, "grad_norm": 16.73858070373535, "learning_rate": 9.840909090909092e-06, "loss": 29.7672, "step": 17941 }, { "epoch": 427.1910447761194, "grad_norm": 28.96164894104004, "learning_rate": 9.840367965367966e-06, "loss": 30.946, "step": 17942 }, { "epoch": 427.21492537313435, "grad_norm": 22.287185668945312, "learning_rate": 9.839826839826841e-06, "loss": 29.6005, "step": 17943 }, { "epoch": 427.23880597014926, "grad_norm": 27.769054412841797, "learning_rate": 9.839285714285715e-06, "loss": 30.3361, "step": 17944 }, { "epoch": 427.26268656716417, "grad_norm": 24.134944915771484, "learning_rate": 9.83874458874459e-06, "loss": 30.3562, "step": 17945 }, { "epoch": 427.28656716417913, "grad_norm": 22.095304489135742, "learning_rate": 9.838203463203464e-06, "loss": 29.4175, "step": 17946 }, { "epoch": 427.31044776119404, "grad_norm": 23.975479125976562, "learning_rate": 9.837662337662337e-06, "loss": 30.8257, "step": 17947 }, { "epoch": 427.33432835820895, "grad_norm": 20.785980224609375, "learning_rate": 9.837121212121213e-06, "loss": 29.4275, "step": 17948 }, { "epoch": 427.35820895522386, "grad_norm": 22.19607162475586, "learning_rate": 9.836580086580088e-06, "loss": 29.9369, "step": 17949 }, { "epoch": 427.3820895522388, "grad_norm": 21.125015258789062, "learning_rate": 9.836038961038962e-06, "loss": 31.9682, "step": 17950 }, { "epoch": 427.40597014925373, "grad_norm": 21.566129684448242, "learning_rate": 9.835497835497835e-06, "loss": 29.7118, "step": 17951 }, { "epoch": 427.42985074626864, "grad_norm": 18.172327041625977, "learning_rate": 9.83495670995671e-06, "loss": 29.7498, "step": 17952 }, { "epoch": 427.4537313432836, "grad_norm": 24.794818878173828, "learning_rate": 9.834415584415586e-06, "loss": 30.1251, "step": 17953 }, { "epoch": 427.4776119402985, "grad_norm": 18.634353637695312, "learning_rate": 9.83387445887446e-06, "loss": 30.5711, "step": 17954 }, { "epoch": 427.5014925373134, "grad_norm": 22.368017196655273, "learning_rate": 9.833333333333333e-06, "loss": 30.6314, "step": 17955 }, { "epoch": 427.52537313432833, "grad_norm": 21.320226669311523, "learning_rate": 9.832792207792208e-06, "loss": 31.0989, "step": 17956 }, { "epoch": 427.5492537313433, "grad_norm": 18.811485290527344, "learning_rate": 9.832251082251084e-06, "loss": 29.3738, "step": 17957 }, { "epoch": 427.5731343283582, "grad_norm": 19.80241584777832, "learning_rate": 9.831709956709957e-06, "loss": 30.3017, "step": 17958 }, { "epoch": 427.5970149253731, "grad_norm": 18.42851448059082, "learning_rate": 9.831168831168832e-06, "loss": 30.0681, "step": 17959 }, { "epoch": 427.6208955223881, "grad_norm": 22.667186737060547, "learning_rate": 9.830627705627706e-06, "loss": 31.2866, "step": 17960 }, { "epoch": 427.644776119403, "grad_norm": 16.108610153198242, "learning_rate": 9.830086580086581e-06, "loss": 29.6498, "step": 17961 }, { "epoch": 427.6686567164179, "grad_norm": 22.690263748168945, "learning_rate": 9.829545454545455e-06, "loss": 30.835, "step": 17962 }, { "epoch": 427.6925373134328, "grad_norm": 18.636512756347656, "learning_rate": 9.82900432900433e-06, "loss": 30.6828, "step": 17963 }, { "epoch": 427.7164179104478, "grad_norm": 19.8270263671875, "learning_rate": 9.828463203463204e-06, "loss": 30.089, "step": 17964 }, { "epoch": 427.7402985074627, "grad_norm": 20.09429359436035, "learning_rate": 9.827922077922079e-06, "loss": 30.4413, "step": 17965 }, { "epoch": 427.7641791044776, "grad_norm": 22.37393569946289, "learning_rate": 9.827380952380953e-06, "loss": 30.876, "step": 17966 }, { "epoch": 427.78805970149256, "grad_norm": 18.90312957763672, "learning_rate": 9.826839826839828e-06, "loss": 30.6051, "step": 17967 }, { "epoch": 427.81194029850747, "grad_norm": 23.74074935913086, "learning_rate": 9.826298701298702e-06, "loss": 30.7434, "step": 17968 }, { "epoch": 427.8358208955224, "grad_norm": 20.368953704833984, "learning_rate": 9.825757575757577e-06, "loss": 29.9398, "step": 17969 }, { "epoch": 427.85970149253734, "grad_norm": 20.267969131469727, "learning_rate": 9.825216450216452e-06, "loss": 30.475, "step": 17970 }, { "epoch": 427.88358208955225, "grad_norm": 20.809078216552734, "learning_rate": 9.824675324675326e-06, "loss": 31.1628, "step": 17971 }, { "epoch": 427.90746268656716, "grad_norm": 22.63227653503418, "learning_rate": 9.8241341991342e-06, "loss": 30.6279, "step": 17972 }, { "epoch": 427.93134328358207, "grad_norm": 22.542566299438477, "learning_rate": 9.823593073593075e-06, "loss": 29.9502, "step": 17973 }, { "epoch": 427.95522388059703, "grad_norm": 18.110464096069336, "learning_rate": 9.82305194805195e-06, "loss": 31.4847, "step": 17974 }, { "epoch": 427.97910447761194, "grad_norm": 25.007631301879883, "learning_rate": 9.822510822510824e-06, "loss": 31.5311, "step": 17975 }, { "epoch": 428.0, "grad_norm": 18.795978546142578, "learning_rate": 9.821969696969697e-06, "loss": 26.3636, "step": 17976 }, { "epoch": 428.0238805970149, "grad_norm": 18.240968704223633, "learning_rate": 9.821428571428573e-06, "loss": 31.3659, "step": 17977 }, { "epoch": 428.0477611940299, "grad_norm": 17.354169845581055, "learning_rate": 9.820887445887448e-06, "loss": 30.6544, "step": 17978 }, { "epoch": 428.0716417910448, "grad_norm": 18.9918270111084, "learning_rate": 9.820346320346321e-06, "loss": 30.1045, "step": 17979 }, { "epoch": 428.0955223880597, "grad_norm": 16.270517349243164, "learning_rate": 9.819805194805195e-06, "loss": 30.5998, "step": 17980 }, { "epoch": 428.1194029850746, "grad_norm": 16.979324340820312, "learning_rate": 9.81926406926407e-06, "loss": 30.3645, "step": 17981 }, { "epoch": 428.14328358208957, "grad_norm": 19.20657730102539, "learning_rate": 9.818722943722946e-06, "loss": 31.0501, "step": 17982 }, { "epoch": 428.1671641791045, "grad_norm": 18.77733612060547, "learning_rate": 9.81818181818182e-06, "loss": 30.3856, "step": 17983 }, { "epoch": 428.1910447761194, "grad_norm": 18.703338623046875, "learning_rate": 9.817640692640693e-06, "loss": 29.618, "step": 17984 }, { "epoch": 428.21492537313435, "grad_norm": 20.7010440826416, "learning_rate": 9.817099567099568e-06, "loss": 30.8886, "step": 17985 }, { "epoch": 428.23880597014926, "grad_norm": 17.985546112060547, "learning_rate": 9.816558441558442e-06, "loss": 30.6722, "step": 17986 }, { "epoch": 428.26268656716417, "grad_norm": 19.62334632873535, "learning_rate": 9.816017316017317e-06, "loss": 29.9925, "step": 17987 }, { "epoch": 428.28656716417913, "grad_norm": 18.24506950378418, "learning_rate": 9.81547619047619e-06, "loss": 31.1573, "step": 17988 }, { "epoch": 428.31044776119404, "grad_norm": 18.331899642944336, "learning_rate": 9.814935064935066e-06, "loss": 30.4798, "step": 17989 }, { "epoch": 428.33432835820895, "grad_norm": 20.825767517089844, "learning_rate": 9.81439393939394e-06, "loss": 30.6141, "step": 17990 }, { "epoch": 428.35820895522386, "grad_norm": 17.477787017822266, "learning_rate": 9.813852813852815e-06, "loss": 29.8966, "step": 17991 }, { "epoch": 428.3820895522388, "grad_norm": 17.410789489746094, "learning_rate": 9.813311688311688e-06, "loss": 30.443, "step": 17992 }, { "epoch": 428.40597014925373, "grad_norm": 21.99690818786621, "learning_rate": 9.812770562770564e-06, "loss": 30.2403, "step": 17993 }, { "epoch": 428.42985074626864, "grad_norm": 18.052631378173828, "learning_rate": 9.812229437229437e-06, "loss": 30.7037, "step": 17994 }, { "epoch": 428.4537313432836, "grad_norm": 19.822654724121094, "learning_rate": 9.811688311688313e-06, "loss": 29.4578, "step": 17995 }, { "epoch": 428.4776119402985, "grad_norm": 18.63534164428711, "learning_rate": 9.811147186147188e-06, "loss": 30.2552, "step": 17996 }, { "epoch": 428.5014925373134, "grad_norm": 22.506925582885742, "learning_rate": 9.810606060606061e-06, "loss": 30.118, "step": 17997 }, { "epoch": 428.52537313432833, "grad_norm": 19.12542152404785, "learning_rate": 9.810064935064935e-06, "loss": 29.1208, "step": 17998 }, { "epoch": 428.5492537313433, "grad_norm": 20.821836471557617, "learning_rate": 9.80952380952381e-06, "loss": 30.1457, "step": 17999 }, { "epoch": 428.5731343283582, "grad_norm": 18.658109664916992, "learning_rate": 9.808982683982686e-06, "loss": 28.8632, "step": 18000 }, { "epoch": 428.5970149253731, "grad_norm": 22.922346115112305, "learning_rate": 9.80844155844156e-06, "loss": 30.2293, "step": 18001 }, { "epoch": 428.6208955223881, "grad_norm": 16.36787223815918, "learning_rate": 9.807900432900433e-06, "loss": 29.8542, "step": 18002 }, { "epoch": 428.644776119403, "grad_norm": 22.274993896484375, "learning_rate": 9.807359307359308e-06, "loss": 30.4504, "step": 18003 }, { "epoch": 428.6686567164179, "grad_norm": NaN, "learning_rate": 9.806818181818183e-06, "loss": 30.018, "step": 18004 }, { "epoch": 428.6925373134328, "grad_norm": 17.916748046875, "learning_rate": 9.806818181818183e-06, "loss": 30.5479, "step": 18005 }, { "epoch": 428.7164179104478, "grad_norm": 23.17523765563965, "learning_rate": 9.806277056277057e-06, "loss": 30.4771, "step": 18006 }, { "epoch": 428.7402985074627, "grad_norm": 19.569843292236328, "learning_rate": 9.80573593073593e-06, "loss": 30.4651, "step": 18007 }, { "epoch": 428.7641791044776, "grad_norm": 21.045124053955078, "learning_rate": 9.805194805194806e-06, "loss": 29.7906, "step": 18008 }, { "epoch": 428.78805970149256, "grad_norm": 22.470186233520508, "learning_rate": 9.804653679653681e-06, "loss": 31.1689, "step": 18009 }, { "epoch": 428.81194029850747, "grad_norm": 18.39364242553711, "learning_rate": 9.804112554112555e-06, "loss": 30.7684, "step": 18010 }, { "epoch": 428.8358208955224, "grad_norm": 20.105195999145508, "learning_rate": 9.803571428571428e-06, "loss": 30.9044, "step": 18011 }, { "epoch": 428.85970149253734, "grad_norm": 16.686826705932617, "learning_rate": 9.803030303030304e-06, "loss": 30.3485, "step": 18012 }, { "epoch": 428.88358208955225, "grad_norm": 21.4024600982666, "learning_rate": 9.802489177489179e-06, "loss": 30.5889, "step": 18013 }, { "epoch": 428.90746268656716, "grad_norm": NaN, "learning_rate": 9.801948051948053e-06, "loss": 26.1148, "step": 18014 }, { "epoch": 428.93134328358207, "grad_norm": 20.984010696411133, "learning_rate": 9.801948051948053e-06, "loss": 29.6254, "step": 18015 }, { "epoch": 428.95522388059703, "grad_norm": 19.516313552856445, "learning_rate": 9.801406926406926e-06, "loss": 31.5041, "step": 18016 }, { "epoch": 428.97910447761194, "grad_norm": 22.676307678222656, "learning_rate": 9.800865800865802e-06, "loss": 30.2982, "step": 18017 }, { "epoch": 429.0, "grad_norm": 16.253156661987305, "learning_rate": 9.800324675324677e-06, "loss": 26.3313, "step": 18018 }, { "epoch": 429.0238805970149, "grad_norm": 18.35772705078125, "learning_rate": 9.79978354978355e-06, "loss": 28.7806, "step": 18019 }, { "epoch": 429.0477611940299, "grad_norm": 20.161684036254883, "learning_rate": 9.799242424242426e-06, "loss": 30.2068, "step": 18020 }, { "epoch": 429.0716417910448, "grad_norm": 18.514429092407227, "learning_rate": 9.7987012987013e-06, "loss": 30.3083, "step": 18021 }, { "epoch": 429.0955223880597, "grad_norm": 21.278491973876953, "learning_rate": 9.798160173160175e-06, "loss": 31.3024, "step": 18022 }, { "epoch": 429.1194029850746, "grad_norm": 19.238842010498047, "learning_rate": 9.797619047619048e-06, "loss": 30.5968, "step": 18023 }, { "epoch": 429.14328358208957, "grad_norm": 19.597755432128906, "learning_rate": 9.797077922077924e-06, "loss": 30.1577, "step": 18024 }, { "epoch": 429.1671641791045, "grad_norm": 18.52017593383789, "learning_rate": 9.796536796536797e-06, "loss": 31.0317, "step": 18025 }, { "epoch": 429.1910447761194, "grad_norm": 19.330345153808594, "learning_rate": 9.79599567099567e-06, "loss": 30.1967, "step": 18026 }, { "epoch": 429.21492537313435, "grad_norm": 20.696184158325195, "learning_rate": 9.795454545454546e-06, "loss": 29.8116, "step": 18027 }, { "epoch": 429.23880597014926, "grad_norm": 23.277179718017578, "learning_rate": 9.794913419913421e-06, "loss": 31.3263, "step": 18028 }, { "epoch": 429.26268656716417, "grad_norm": 17.244430541992188, "learning_rate": 9.794372294372295e-06, "loss": 29.495, "step": 18029 }, { "epoch": 429.28656716417913, "grad_norm": 28.546722412109375, "learning_rate": 9.793831168831169e-06, "loss": 30.5444, "step": 18030 }, { "epoch": 429.31044776119404, "grad_norm": 21.881196975708008, "learning_rate": 9.793290043290044e-06, "loss": 30.0764, "step": 18031 }, { "epoch": 429.33432835820895, "grad_norm": 20.317873001098633, "learning_rate": 9.79274891774892e-06, "loss": 30.2842, "step": 18032 }, { "epoch": 429.35820895522386, "grad_norm": 25.365812301635742, "learning_rate": 9.792207792207793e-06, "loss": 29.8889, "step": 18033 }, { "epoch": 429.3820895522388, "grad_norm": 21.7113037109375, "learning_rate": 9.791666666666666e-06, "loss": 31.2148, "step": 18034 }, { "epoch": 429.40597014925373, "grad_norm": 19.957426071166992, "learning_rate": 9.791125541125542e-06, "loss": 30.9801, "step": 18035 }, { "epoch": 429.42985074626864, "grad_norm": 28.227706909179688, "learning_rate": 9.790584415584417e-06, "loss": 29.9109, "step": 18036 }, { "epoch": 429.4537313432836, "grad_norm": 16.694480895996094, "learning_rate": 9.79004329004329e-06, "loss": 30.586, "step": 18037 }, { "epoch": 429.4776119402985, "grad_norm": 25.612768173217773, "learning_rate": 9.789502164502164e-06, "loss": 30.0729, "step": 18038 }, { "epoch": 429.5014925373134, "grad_norm": 19.783058166503906, "learning_rate": 9.78896103896104e-06, "loss": 30.5687, "step": 18039 }, { "epoch": 429.52537313432833, "grad_norm": 21.934432983398438, "learning_rate": 9.788419913419915e-06, "loss": 30.241, "step": 18040 }, { "epoch": 429.5492537313433, "grad_norm": 22.333999633789062, "learning_rate": 9.787878787878788e-06, "loss": 30.1466, "step": 18041 }, { "epoch": 429.5731343283582, "grad_norm": 22.895530700683594, "learning_rate": 9.787337662337664e-06, "loss": 29.9201, "step": 18042 }, { "epoch": 429.5970149253731, "grad_norm": 17.72591209411621, "learning_rate": 9.786796536796537e-06, "loss": 30.3627, "step": 18043 }, { "epoch": 429.6208955223881, "grad_norm": 28.418760299682617, "learning_rate": 9.786255411255413e-06, "loss": 30.4524, "step": 18044 }, { "epoch": 429.644776119403, "grad_norm": 20.38718032836914, "learning_rate": 9.785714285714286e-06, "loss": 29.7648, "step": 18045 }, { "epoch": 429.6686567164179, "grad_norm": 22.938461303710938, "learning_rate": 9.785173160173161e-06, "loss": 29.9673, "step": 18046 }, { "epoch": 429.6925373134328, "grad_norm": 27.806175231933594, "learning_rate": 9.784632034632035e-06, "loss": 29.2941, "step": 18047 }, { "epoch": 429.7164179104478, "grad_norm": 19.876920700073242, "learning_rate": 9.78409090909091e-06, "loss": 30.2551, "step": 18048 }, { "epoch": 429.7402985074627, "grad_norm": 27.202096939086914, "learning_rate": 9.783549783549784e-06, "loss": 29.598, "step": 18049 }, { "epoch": 429.7641791044776, "grad_norm": 22.663503646850586, "learning_rate": 9.78300865800866e-06, "loss": 30.6346, "step": 18050 }, { "epoch": 429.78805970149256, "grad_norm": 19.912015914916992, "learning_rate": 9.782467532467533e-06, "loss": 30.3092, "step": 18051 }, { "epoch": 429.81194029850747, "grad_norm": 25.106313705444336, "learning_rate": 9.781926406926408e-06, "loss": 30.9421, "step": 18052 }, { "epoch": 429.8358208955224, "grad_norm": 20.329668045043945, "learning_rate": 9.781385281385282e-06, "loss": 30.6523, "step": 18053 }, { "epoch": 429.85970149253734, "grad_norm": 18.845617294311523, "learning_rate": 9.780844155844157e-06, "loss": 30.7676, "step": 18054 }, { "epoch": 429.88358208955225, "grad_norm": 26.7338809967041, "learning_rate": 9.78030303030303e-06, "loss": 30.6298, "step": 18055 }, { "epoch": 429.90746268656716, "grad_norm": 17.853437423706055, "learning_rate": 9.779761904761906e-06, "loss": 30.2678, "step": 18056 }, { "epoch": 429.93134328358207, "grad_norm": 20.483516693115234, "learning_rate": 9.779220779220781e-06, "loss": 29.3205, "step": 18057 }, { "epoch": 429.95522388059703, "grad_norm": 21.360971450805664, "learning_rate": 9.778679653679655e-06, "loss": 30.1287, "step": 18058 }, { "epoch": 429.97910447761194, "grad_norm": 21.779895782470703, "learning_rate": 9.778138528138528e-06, "loss": 30.0274, "step": 18059 }, { "epoch": 430.0, "grad_norm": 21.29844093322754, "learning_rate": 9.777597402597404e-06, "loss": 27.3231, "step": 18060 }, { "epoch": 430.0238805970149, "grad_norm": 19.95744514465332, "learning_rate": 9.777056277056279e-06, "loss": 29.8417, "step": 18061 }, { "epoch": 430.0477611940299, "grad_norm": 18.681852340698242, "learning_rate": 9.776515151515153e-06, "loss": 29.8086, "step": 18062 }, { "epoch": 430.0716417910448, "grad_norm": 27.81563377380371, "learning_rate": 9.775974025974026e-06, "loss": 30.0835, "step": 18063 }, { "epoch": 430.0955223880597, "grad_norm": 20.64076805114746, "learning_rate": 9.775432900432902e-06, "loss": 31.3017, "step": 18064 }, { "epoch": 430.1194029850746, "grad_norm": 17.689733505249023, "learning_rate": 9.774891774891775e-06, "loss": 29.8934, "step": 18065 }, { "epoch": 430.14328358208957, "grad_norm": 18.394147872924805, "learning_rate": 9.77435064935065e-06, "loss": 30.7361, "step": 18066 }, { "epoch": 430.1671641791045, "grad_norm": 22.4614315032959, "learning_rate": 9.773809523809524e-06, "loss": 30.3341, "step": 18067 }, { "epoch": 430.1910447761194, "grad_norm": 19.07718849182129, "learning_rate": 9.7732683982684e-06, "loss": 29.995, "step": 18068 }, { "epoch": 430.21492537313435, "grad_norm": 19.843769073486328, "learning_rate": 9.772727272727273e-06, "loss": 29.9518, "step": 18069 }, { "epoch": 430.23880597014926, "grad_norm": 19.379444122314453, "learning_rate": 9.772186147186148e-06, "loss": 31.0508, "step": 18070 }, { "epoch": 430.26268656716417, "grad_norm": 25.515151977539062, "learning_rate": 9.771645021645022e-06, "loss": 30.3901, "step": 18071 }, { "epoch": 430.28656716417913, "grad_norm": 19.96921157836914, "learning_rate": 9.771103896103897e-06, "loss": 29.9685, "step": 18072 }, { "epoch": 430.31044776119404, "grad_norm": 17.278278350830078, "learning_rate": 9.77056277056277e-06, "loss": 30.2587, "step": 18073 }, { "epoch": 430.33432835820895, "grad_norm": 18.309537887573242, "learning_rate": 9.770021645021646e-06, "loss": 29.8807, "step": 18074 }, { "epoch": 430.35820895522386, "grad_norm": 21.972036361694336, "learning_rate": 9.76948051948052e-06, "loss": 29.7726, "step": 18075 }, { "epoch": 430.3820895522388, "grad_norm": 17.125043869018555, "learning_rate": 9.768939393939395e-06, "loss": 30.4803, "step": 18076 }, { "epoch": 430.40597014925373, "grad_norm": 20.584535598754883, "learning_rate": 9.768398268398269e-06, "loss": 29.1054, "step": 18077 }, { "epoch": 430.42985074626864, "grad_norm": 21.71653938293457, "learning_rate": 9.767857142857144e-06, "loss": 30.7956, "step": 18078 }, { "epoch": 430.4537313432836, "grad_norm": 22.659194946289062, "learning_rate": 9.767316017316019e-06, "loss": 31.0582, "step": 18079 }, { "epoch": 430.4776119402985, "grad_norm": 23.04308319091797, "learning_rate": 9.766774891774893e-06, "loss": 30.7528, "step": 18080 }, { "epoch": 430.5014925373134, "grad_norm": 18.38223648071289, "learning_rate": 9.766233766233766e-06, "loss": 29.7243, "step": 18081 }, { "epoch": 430.52537313432833, "grad_norm": 21.17744255065918, "learning_rate": 9.765692640692642e-06, "loss": 29.9287, "step": 18082 }, { "epoch": 430.5492537313433, "grad_norm": 23.022775650024414, "learning_rate": 9.765151515151517e-06, "loss": 30.4872, "step": 18083 }, { "epoch": 430.5731343283582, "grad_norm": 17.25739860534668, "learning_rate": 9.76461038961039e-06, "loss": 30.0265, "step": 18084 }, { "epoch": 430.5970149253731, "grad_norm": 19.320589065551758, "learning_rate": 9.764069264069264e-06, "loss": 29.8083, "step": 18085 }, { "epoch": 430.6208955223881, "grad_norm": 18.31188201904297, "learning_rate": 9.76352813852814e-06, "loss": 29.8401, "step": 18086 }, { "epoch": 430.644776119403, "grad_norm": 21.063247680664062, "learning_rate": 9.762987012987015e-06, "loss": 30.7492, "step": 18087 }, { "epoch": 430.6686567164179, "grad_norm": 17.298439025878906, "learning_rate": 9.762445887445888e-06, "loss": 31.1181, "step": 18088 }, { "epoch": 430.6925373134328, "grad_norm": 19.951534271240234, "learning_rate": 9.761904761904762e-06, "loss": 30.5114, "step": 18089 }, { "epoch": 430.7164179104478, "grad_norm": 17.378904342651367, "learning_rate": 9.761363636363637e-06, "loss": 30.5523, "step": 18090 }, { "epoch": 430.7402985074627, "grad_norm": 19.628141403198242, "learning_rate": 9.760822510822513e-06, "loss": 30.5202, "step": 18091 }, { "epoch": 430.7641791044776, "grad_norm": 18.593860626220703, "learning_rate": 9.760281385281386e-06, "loss": 30.9547, "step": 18092 }, { "epoch": 430.78805970149256, "grad_norm": 21.787429809570312, "learning_rate": 9.75974025974026e-06, "loss": 30.2461, "step": 18093 }, { "epoch": 430.81194029850747, "grad_norm": 19.232810974121094, "learning_rate": 9.759199134199135e-06, "loss": 29.9098, "step": 18094 }, { "epoch": 430.8358208955224, "grad_norm": 20.132171630859375, "learning_rate": 9.75865800865801e-06, "loss": 30.2782, "step": 18095 }, { "epoch": 430.85970149253734, "grad_norm": 21.953615188598633, "learning_rate": 9.758116883116884e-06, "loss": 30.2047, "step": 18096 }, { "epoch": 430.88358208955225, "grad_norm": 19.692167282104492, "learning_rate": 9.757575757575758e-06, "loss": 29.5148, "step": 18097 }, { "epoch": 430.90746268656716, "grad_norm": 19.286014556884766, "learning_rate": 9.757034632034633e-06, "loss": 30.6875, "step": 18098 }, { "epoch": 430.93134328358207, "grad_norm": 22.44078254699707, "learning_rate": 9.756493506493508e-06, "loss": 30.1331, "step": 18099 }, { "epoch": 430.95522388059703, "grad_norm": 25.140562057495117, "learning_rate": 9.755952380952382e-06, "loss": 30.417, "step": 18100 }, { "epoch": 430.97910447761194, "grad_norm": 19.807554244995117, "learning_rate": 9.755411255411255e-06, "loss": 29.6433, "step": 18101 }, { "epoch": 431.0, "grad_norm": 15.321440696716309, "learning_rate": 9.75487012987013e-06, "loss": 26.0771, "step": 18102 }, { "epoch": 431.0238805970149, "grad_norm": 21.101791381835938, "learning_rate": 9.754329004329006e-06, "loss": 29.9268, "step": 18103 }, { "epoch": 431.0477611940299, "grad_norm": 19.69845199584961, "learning_rate": 9.75378787878788e-06, "loss": 29.4053, "step": 18104 }, { "epoch": 431.0716417910448, "grad_norm": 22.732641220092773, "learning_rate": 9.753246753246755e-06, "loss": 30.7583, "step": 18105 }, { "epoch": 431.0955223880597, "grad_norm": 15.956716537475586, "learning_rate": 9.752705627705628e-06, "loss": 30.2716, "step": 18106 }, { "epoch": 431.1194029850746, "grad_norm": 28.313772201538086, "learning_rate": 9.752164502164502e-06, "loss": 31.3226, "step": 18107 }, { "epoch": 431.14328358208957, "grad_norm": 24.60181999206543, "learning_rate": 9.751623376623377e-06, "loss": 31.0822, "step": 18108 }, { "epoch": 431.1671641791045, "grad_norm": 18.626888275146484, "learning_rate": 9.751082251082253e-06, "loss": 30.4575, "step": 18109 }, { "epoch": 431.1910447761194, "grad_norm": 20.24835968017578, "learning_rate": 9.750541125541126e-06, "loss": 30.0248, "step": 18110 }, { "epoch": 431.21492537313435, "grad_norm": 22.693408966064453, "learning_rate": 9.75e-06, "loss": 31.5596, "step": 18111 }, { "epoch": 431.23880597014926, "grad_norm": 17.155376434326172, "learning_rate": 9.749458874458875e-06, "loss": 30.8917, "step": 18112 }, { "epoch": 431.26268656716417, "grad_norm": 21.3492431640625, "learning_rate": 9.74891774891775e-06, "loss": 29.6096, "step": 18113 }, { "epoch": 431.28656716417913, "grad_norm": 19.785463333129883, "learning_rate": 9.748376623376624e-06, "loss": 31.0222, "step": 18114 }, { "epoch": 431.31044776119404, "grad_norm": 20.90851402282715, "learning_rate": 9.747835497835498e-06, "loss": 30.3411, "step": 18115 }, { "epoch": 431.33432835820895, "grad_norm": 19.563947677612305, "learning_rate": 9.747294372294373e-06, "loss": 30.4188, "step": 18116 }, { "epoch": 431.35820895522386, "grad_norm": 16.971725463867188, "learning_rate": 9.746753246753248e-06, "loss": 29.6125, "step": 18117 }, { "epoch": 431.3820895522388, "grad_norm": 30.47445297241211, "learning_rate": 9.746212121212122e-06, "loss": 30.3827, "step": 18118 }, { "epoch": 431.40597014925373, "grad_norm": 19.661272048950195, "learning_rate": 9.745670995670995e-06, "loss": 30.1986, "step": 18119 }, { "epoch": 431.42985074626864, "grad_norm": 24.344816207885742, "learning_rate": 9.74512987012987e-06, "loss": 30.7661, "step": 18120 }, { "epoch": 431.4537313432836, "grad_norm": 28.426176071166992, "learning_rate": 9.744588744588746e-06, "loss": 30.1947, "step": 18121 }, { "epoch": 431.4776119402985, "grad_norm": 17.48659324645996, "learning_rate": 9.74404761904762e-06, "loss": 30.8871, "step": 18122 }, { "epoch": 431.5014925373134, "grad_norm": 29.077180862426758, "learning_rate": 9.743506493506493e-06, "loss": 29.2371, "step": 18123 }, { "epoch": 431.52537313432833, "grad_norm": 25.1240177154541, "learning_rate": 9.742965367965369e-06, "loss": 30.2949, "step": 18124 }, { "epoch": 431.5492537313433, "grad_norm": 20.506816864013672, "learning_rate": 9.742424242424244e-06, "loss": 30.0029, "step": 18125 }, { "epoch": 431.5731343283582, "grad_norm": 29.383607864379883, "learning_rate": 9.741883116883117e-06, "loss": 30.1708, "step": 18126 }, { "epoch": 431.5970149253731, "grad_norm": 21.535627365112305, "learning_rate": 9.741341991341993e-06, "loss": 29.6401, "step": 18127 }, { "epoch": 431.6208955223881, "grad_norm": 26.823631286621094, "learning_rate": 9.740800865800866e-06, "loss": 29.6246, "step": 18128 }, { "epoch": 431.644776119403, "grad_norm": 23.506425857543945, "learning_rate": 9.740259740259742e-06, "loss": 29.7555, "step": 18129 }, { "epoch": 431.6686567164179, "grad_norm": 26.477182388305664, "learning_rate": 9.739718614718615e-06, "loss": 29.9764, "step": 18130 }, { "epoch": 431.6925373134328, "grad_norm": 27.343353271484375, "learning_rate": 9.73917748917749e-06, "loss": 30.8947, "step": 18131 }, { "epoch": 431.7164179104478, "grad_norm": 20.136842727661133, "learning_rate": 9.738636363636364e-06, "loss": 29.4448, "step": 18132 }, { "epoch": 431.7402985074627, "grad_norm": 23.874711990356445, "learning_rate": 9.73809523809524e-06, "loss": 30.4971, "step": 18133 }, { "epoch": 431.7641791044776, "grad_norm": 22.99772071838379, "learning_rate": 9.737554112554113e-06, "loss": 28.7115, "step": 18134 }, { "epoch": 431.78805970149256, "grad_norm": 17.651897430419922, "learning_rate": 9.737012987012988e-06, "loss": 28.9517, "step": 18135 }, { "epoch": 431.81194029850747, "grad_norm": 23.423057556152344, "learning_rate": 9.736471861471862e-06, "loss": 30.3671, "step": 18136 }, { "epoch": 431.8358208955224, "grad_norm": 22.226015090942383, "learning_rate": 9.735930735930737e-06, "loss": 31.7865, "step": 18137 }, { "epoch": 431.85970149253734, "grad_norm": 19.68935203552246, "learning_rate": 9.735389610389612e-06, "loss": 29.694, "step": 18138 }, { "epoch": 431.88358208955225, "grad_norm": 18.63748550415039, "learning_rate": 9.734848484848486e-06, "loss": 29.9531, "step": 18139 }, { "epoch": 431.90746268656716, "grad_norm": 24.25529670715332, "learning_rate": 9.73430735930736e-06, "loss": 29.8732, "step": 18140 }, { "epoch": 431.93134328358207, "grad_norm": 19.358558654785156, "learning_rate": 9.733766233766235e-06, "loss": 30.0011, "step": 18141 }, { "epoch": 431.95522388059703, "grad_norm": 18.492074966430664, "learning_rate": 9.73322510822511e-06, "loss": 30.1113, "step": 18142 }, { "epoch": 431.97910447761194, "grad_norm": 23.94791030883789, "learning_rate": 9.732683982683984e-06, "loss": 30.768, "step": 18143 }, { "epoch": 432.0, "grad_norm": 18.57393455505371, "learning_rate": 9.732142857142858e-06, "loss": 26.8599, "step": 18144 }, { "epoch": 432.0238805970149, "grad_norm": 18.835695266723633, "learning_rate": 9.731601731601731e-06, "loss": 29.5854, "step": 18145 }, { "epoch": 432.0477611940299, "grad_norm": 26.365015029907227, "learning_rate": 9.731060606060606e-06, "loss": 30.2654, "step": 18146 }, { "epoch": 432.0716417910448, "grad_norm": 18.96073341369629, "learning_rate": 9.730519480519482e-06, "loss": 29.6751, "step": 18147 }, { "epoch": 432.0955223880597, "grad_norm": 19.330810546875, "learning_rate": 9.729978354978355e-06, "loss": 30.1745, "step": 18148 }, { "epoch": 432.1194029850746, "grad_norm": 20.16083526611328, "learning_rate": 9.729437229437229e-06, "loss": 29.659, "step": 18149 }, { "epoch": 432.14328358208957, "grad_norm": 21.80440330505371, "learning_rate": 9.728896103896104e-06, "loss": 29.8091, "step": 18150 }, { "epoch": 432.1671641791045, "grad_norm": 17.65022850036621, "learning_rate": 9.72835497835498e-06, "loss": 30.9192, "step": 18151 }, { "epoch": 432.1910447761194, "grad_norm": 17.394853591918945, "learning_rate": 9.727813852813853e-06, "loss": 29.6661, "step": 18152 }, { "epoch": 432.21492537313435, "grad_norm": 18.526281356811523, "learning_rate": 9.727272727272728e-06, "loss": 30.1869, "step": 18153 }, { "epoch": 432.23880597014926, "grad_norm": 16.71364974975586, "learning_rate": 9.726731601731602e-06, "loss": 29.687, "step": 18154 }, { "epoch": 432.26268656716417, "grad_norm": 23.47662925720215, "learning_rate": 9.726190476190477e-06, "loss": 29.7638, "step": 18155 }, { "epoch": 432.28656716417913, "grad_norm": 17.683979034423828, "learning_rate": 9.725649350649351e-06, "loss": 29.8452, "step": 18156 }, { "epoch": 432.31044776119404, "grad_norm": 21.617971420288086, "learning_rate": 9.725108225108226e-06, "loss": 30.203, "step": 18157 }, { "epoch": 432.33432835820895, "grad_norm": 19.951866149902344, "learning_rate": 9.7245670995671e-06, "loss": 30.7912, "step": 18158 }, { "epoch": 432.35820895522386, "grad_norm": 19.99698257446289, "learning_rate": 9.724025974025975e-06, "loss": 30.8737, "step": 18159 }, { "epoch": 432.3820895522388, "grad_norm": 24.1381893157959, "learning_rate": 9.723484848484849e-06, "loss": 30.2432, "step": 18160 }, { "epoch": 432.40597014925373, "grad_norm": 19.578128814697266, "learning_rate": 9.722943722943724e-06, "loss": 29.848, "step": 18161 }, { "epoch": 432.42985074626864, "grad_norm": 21.668193817138672, "learning_rate": 9.722402597402598e-06, "loss": 31.3999, "step": 18162 }, { "epoch": 432.4537313432836, "grad_norm": 20.279748916625977, "learning_rate": 9.721861471861473e-06, "loss": 31.1406, "step": 18163 }, { "epoch": 432.4776119402985, "grad_norm": 20.21373748779297, "learning_rate": 9.721320346320348e-06, "loss": 30.5899, "step": 18164 }, { "epoch": 432.5014925373134, "grad_norm": 18.572547912597656, "learning_rate": 9.720779220779222e-06, "loss": 29.4998, "step": 18165 }, { "epoch": 432.52537313432833, "grad_norm": 19.156526565551758, "learning_rate": 9.720238095238095e-06, "loss": 30.9866, "step": 18166 }, { "epoch": 432.5492537313433, "grad_norm": 20.265737533569336, "learning_rate": 9.71969696969697e-06, "loss": 30.1894, "step": 18167 }, { "epoch": 432.5731343283582, "grad_norm": 19.298349380493164, "learning_rate": 9.719155844155846e-06, "loss": 30.6742, "step": 18168 }, { "epoch": 432.5970149253731, "grad_norm": 20.337852478027344, "learning_rate": 9.71861471861472e-06, "loss": 30.027, "step": 18169 }, { "epoch": 432.6208955223881, "grad_norm": 18.822757720947266, "learning_rate": 9.718073593073593e-06, "loss": 29.6242, "step": 18170 }, { "epoch": 432.644776119403, "grad_norm": 17.615934371948242, "learning_rate": 9.717532467532468e-06, "loss": 29.7349, "step": 18171 }, { "epoch": 432.6686567164179, "grad_norm": 19.46221923828125, "learning_rate": 9.716991341991344e-06, "loss": 29.5274, "step": 18172 }, { "epoch": 432.6925373134328, "grad_norm": 23.778234481811523, "learning_rate": 9.716450216450217e-06, "loss": 30.3217, "step": 18173 }, { "epoch": 432.7164179104478, "grad_norm": 19.636152267456055, "learning_rate": 9.715909090909091e-06, "loss": 31.0492, "step": 18174 }, { "epoch": 432.7402985074627, "grad_norm": 17.6784725189209, "learning_rate": 9.715367965367966e-06, "loss": 29.4998, "step": 18175 }, { "epoch": 432.7641791044776, "grad_norm": 19.695819854736328, "learning_rate": 9.714826839826842e-06, "loss": 30.7269, "step": 18176 }, { "epoch": 432.78805970149256, "grad_norm": 19.45430564880371, "learning_rate": 9.714285714285715e-06, "loss": 30.1602, "step": 18177 }, { "epoch": 432.81194029850747, "grad_norm": 21.0908260345459, "learning_rate": 9.713744588744589e-06, "loss": 29.7193, "step": 18178 }, { "epoch": 432.8358208955224, "grad_norm": 19.038637161254883, "learning_rate": 9.713203463203464e-06, "loss": 30.0591, "step": 18179 }, { "epoch": 432.85970149253734, "grad_norm": 17.280475616455078, "learning_rate": 9.71266233766234e-06, "loss": 29.9562, "step": 18180 }, { "epoch": 432.88358208955225, "grad_norm": 18.024349212646484, "learning_rate": 9.712121212121213e-06, "loss": 30.2917, "step": 18181 }, { "epoch": 432.90746268656716, "grad_norm": 17.661876678466797, "learning_rate": 9.711580086580087e-06, "loss": 30.0967, "step": 18182 }, { "epoch": 432.93134328358207, "grad_norm": 18.027271270751953, "learning_rate": 9.711038961038962e-06, "loss": 29.4842, "step": 18183 }, { "epoch": 432.95522388059703, "grad_norm": 18.41741943359375, "learning_rate": 9.710497835497835e-06, "loss": 30.4195, "step": 18184 }, { "epoch": 432.97910447761194, "grad_norm": 16.7603816986084, "learning_rate": 9.70995670995671e-06, "loss": 30.8362, "step": 18185 }, { "epoch": 433.0, "grad_norm": 16.588382720947266, "learning_rate": 9.709415584415586e-06, "loss": 26.8038, "step": 18186 }, { "epoch": 433.0238805970149, "grad_norm": 15.491820335388184, "learning_rate": 9.70887445887446e-06, "loss": 29.728, "step": 18187 }, { "epoch": 433.0477611940299, "grad_norm": 18.844192504882812, "learning_rate": 9.708333333333333e-06, "loss": 29.4772, "step": 18188 }, { "epoch": 433.0716417910448, "grad_norm": 18.78489112854004, "learning_rate": 9.707792207792209e-06, "loss": 29.0074, "step": 18189 }, { "epoch": 433.0955223880597, "grad_norm": 19.88336181640625, "learning_rate": 9.707251082251084e-06, "loss": 29.2426, "step": 18190 }, { "epoch": 433.1194029850746, "grad_norm": 24.643024444580078, "learning_rate": 9.706709956709957e-06, "loss": 29.6507, "step": 18191 }, { "epoch": 433.14328358208957, "grad_norm": 16.000728607177734, "learning_rate": 9.706168831168831e-06, "loss": 30.8033, "step": 18192 }, { "epoch": 433.1671641791045, "grad_norm": 25.131465911865234, "learning_rate": 9.705627705627706e-06, "loss": 28.96, "step": 18193 }, { "epoch": 433.1910447761194, "grad_norm": 23.845787048339844, "learning_rate": 9.705086580086582e-06, "loss": 29.8061, "step": 18194 }, { "epoch": 433.21492537313435, "grad_norm": 17.705398559570312, "learning_rate": 9.704545454545455e-06, "loss": 29.806, "step": 18195 }, { "epoch": 433.23880597014926, "grad_norm": 18.651830673217773, "learning_rate": 9.704004329004329e-06, "loss": 30.2466, "step": 18196 }, { "epoch": 433.26268656716417, "grad_norm": 21.60057830810547, "learning_rate": 9.703463203463204e-06, "loss": 31.1439, "step": 18197 }, { "epoch": 433.28656716417913, "grad_norm": 19.81477928161621, "learning_rate": 9.70292207792208e-06, "loss": 31.1098, "step": 18198 }, { "epoch": 433.31044776119404, "grad_norm": 18.393125534057617, "learning_rate": 9.702380952380953e-06, "loss": 29.9146, "step": 18199 }, { "epoch": 433.33432835820895, "grad_norm": 17.704788208007812, "learning_rate": 9.701839826839827e-06, "loss": 29.6932, "step": 18200 }, { "epoch": 433.35820895522386, "grad_norm": 19.540067672729492, "learning_rate": 9.701298701298702e-06, "loss": 29.1834, "step": 18201 }, { "epoch": 433.3820895522388, "grad_norm": 24.0918025970459, "learning_rate": 9.700757575757577e-06, "loss": 30.6327, "step": 18202 }, { "epoch": 433.40597014925373, "grad_norm": 17.470829010009766, "learning_rate": 9.700216450216451e-06, "loss": 29.4752, "step": 18203 }, { "epoch": 433.42985074626864, "grad_norm": 22.728925704956055, "learning_rate": 9.699675324675324e-06, "loss": 30.8575, "step": 18204 }, { "epoch": 433.4537313432836, "grad_norm": 23.7744140625, "learning_rate": 9.6991341991342e-06, "loss": 29.8983, "step": 18205 }, { "epoch": 433.4776119402985, "grad_norm": 19.178041458129883, "learning_rate": 9.698593073593075e-06, "loss": 30.1342, "step": 18206 }, { "epoch": 433.5014925373134, "grad_norm": 20.79680061340332, "learning_rate": 9.698051948051949e-06, "loss": 30.2384, "step": 18207 }, { "epoch": 433.52537313432833, "grad_norm": 22.13588523864746, "learning_rate": 9.697510822510822e-06, "loss": 30.4178, "step": 18208 }, { "epoch": 433.5492537313433, "grad_norm": 22.770479202270508, "learning_rate": 9.696969696969698e-06, "loss": 30.1464, "step": 18209 }, { "epoch": 433.5731343283582, "grad_norm": 20.7957706451416, "learning_rate": 9.696428571428573e-06, "loss": 30.8754, "step": 18210 }, { "epoch": 433.5970149253731, "grad_norm": 18.539743423461914, "learning_rate": 9.695887445887446e-06, "loss": 30.1513, "step": 18211 }, { "epoch": 433.6208955223881, "grad_norm": 29.045120239257812, "learning_rate": 9.695346320346322e-06, "loss": 30.3363, "step": 18212 }, { "epoch": 433.644776119403, "grad_norm": 20.481473922729492, "learning_rate": 9.694805194805195e-06, "loss": 29.9034, "step": 18213 }, { "epoch": 433.6686567164179, "grad_norm": 21.91567611694336, "learning_rate": 9.69426406926407e-06, "loss": 30.7069, "step": 18214 }, { "epoch": 433.6925373134328, "grad_norm": 27.261484146118164, "learning_rate": 9.693722943722944e-06, "loss": 30.8677, "step": 18215 }, { "epoch": 433.7164179104478, "grad_norm": 18.287307739257812, "learning_rate": 9.69318181818182e-06, "loss": 29.6516, "step": 18216 }, { "epoch": 433.7402985074627, "grad_norm": 27.005582809448242, "learning_rate": 9.692640692640693e-06, "loss": 30.5841, "step": 18217 }, { "epoch": 433.7641791044776, "grad_norm": 23.46379852294922, "learning_rate": 9.692099567099568e-06, "loss": 29.9727, "step": 18218 }, { "epoch": 433.78805970149256, "grad_norm": 19.012388229370117, "learning_rate": 9.691558441558442e-06, "loss": 29.4702, "step": 18219 }, { "epoch": 433.81194029850747, "grad_norm": 21.882553100585938, "learning_rate": 9.691017316017317e-06, "loss": 31.2094, "step": 18220 }, { "epoch": 433.8358208955224, "grad_norm": 19.79618263244629, "learning_rate": 9.690476190476191e-06, "loss": 30.3362, "step": 18221 }, { "epoch": 433.85970149253734, "grad_norm": 23.287639617919922, "learning_rate": 9.689935064935066e-06, "loss": 31.2182, "step": 18222 }, { "epoch": 433.88358208955225, "grad_norm": 19.262718200683594, "learning_rate": 9.68939393939394e-06, "loss": 29.0019, "step": 18223 }, { "epoch": 433.90746268656716, "grad_norm": 18.80190658569336, "learning_rate": 9.688852813852815e-06, "loss": 31.3132, "step": 18224 }, { "epoch": 433.93134328358207, "grad_norm": 20.80198860168457, "learning_rate": 9.688311688311689e-06, "loss": 30.3108, "step": 18225 }, { "epoch": 433.95522388059703, "grad_norm": 22.15380859375, "learning_rate": 9.687770562770562e-06, "loss": 31.2515, "step": 18226 }, { "epoch": 433.97910447761194, "grad_norm": 25.34805679321289, "learning_rate": 9.687229437229438e-06, "loss": 29.3708, "step": 18227 }, { "epoch": 434.0, "grad_norm": 19.4454402923584, "learning_rate": 9.686688311688313e-06, "loss": 26.7535, "step": 18228 }, { "epoch": 434.0238805970149, "grad_norm": 16.465877532958984, "learning_rate": 9.686147186147187e-06, "loss": 30.0039, "step": 18229 }, { "epoch": 434.0477611940299, "grad_norm": 22.68767547607422, "learning_rate": 9.68560606060606e-06, "loss": 29.7874, "step": 18230 }, { "epoch": 434.0716417910448, "grad_norm": 16.684471130371094, "learning_rate": 9.685064935064935e-06, "loss": 29.8318, "step": 18231 }, { "epoch": 434.0955223880597, "grad_norm": 23.836278915405273, "learning_rate": 9.68452380952381e-06, "loss": 30.5512, "step": 18232 }, { "epoch": 434.1194029850746, "grad_norm": 19.90594482421875, "learning_rate": 9.683982683982684e-06, "loss": 30.1523, "step": 18233 }, { "epoch": 434.14328358208957, "grad_norm": 18.027976989746094, "learning_rate": 9.68344155844156e-06, "loss": 30.4587, "step": 18234 }, { "epoch": 434.1671641791045, "grad_norm": 20.156003952026367, "learning_rate": 9.682900432900433e-06, "loss": 30.6802, "step": 18235 }, { "epoch": 434.1910447761194, "grad_norm": 16.125776290893555, "learning_rate": 9.682359307359309e-06, "loss": 30.2559, "step": 18236 }, { "epoch": 434.21492537313435, "grad_norm": 22.57579231262207, "learning_rate": 9.681818181818182e-06, "loss": 28.9034, "step": 18237 }, { "epoch": 434.23880597014926, "grad_norm": 18.740280151367188, "learning_rate": 9.681277056277057e-06, "loss": 31.3436, "step": 18238 }, { "epoch": 434.26268656716417, "grad_norm": 15.311004638671875, "learning_rate": 9.680735930735931e-06, "loss": 29.4365, "step": 18239 }, { "epoch": 434.28656716417913, "grad_norm": 19.666488647460938, "learning_rate": 9.680194805194806e-06, "loss": 31.0185, "step": 18240 }, { "epoch": 434.31044776119404, "grad_norm": 17.850126266479492, "learning_rate": 9.67965367965368e-06, "loss": 30.8535, "step": 18241 }, { "epoch": 434.33432835820895, "grad_norm": 21.057401657104492, "learning_rate": 9.679112554112555e-06, "loss": 30.4327, "step": 18242 }, { "epoch": 434.35820895522386, "grad_norm": 23.181318283081055, "learning_rate": 9.678571428571429e-06, "loss": 30.5596, "step": 18243 }, { "epoch": 434.3820895522388, "grad_norm": 22.088865280151367, "learning_rate": 9.678030303030304e-06, "loss": 29.6078, "step": 18244 }, { "epoch": 434.40597014925373, "grad_norm": 21.577547073364258, "learning_rate": 9.67748917748918e-06, "loss": 29.4521, "step": 18245 }, { "epoch": 434.42985074626864, "grad_norm": 17.609233856201172, "learning_rate": 9.676948051948053e-06, "loss": 31.0021, "step": 18246 }, { "epoch": 434.4537313432836, "grad_norm": 21.627946853637695, "learning_rate": 9.676406926406927e-06, "loss": 29.8607, "step": 18247 }, { "epoch": 434.4776119402985, "grad_norm": 17.885469436645508, "learning_rate": 9.675865800865802e-06, "loss": 30.6589, "step": 18248 }, { "epoch": 434.5014925373134, "grad_norm": 20.681833267211914, "learning_rate": 9.675324675324677e-06, "loss": 29.7241, "step": 18249 }, { "epoch": 434.52537313432833, "grad_norm": 18.545494079589844, "learning_rate": 9.67478354978355e-06, "loss": 29.7875, "step": 18250 }, { "epoch": 434.5492537313433, "grad_norm": 19.230512619018555, "learning_rate": 9.674242424242424e-06, "loss": 30.675, "step": 18251 }, { "epoch": 434.5731343283582, "grad_norm": 20.685983657836914, "learning_rate": 9.6737012987013e-06, "loss": 30.303, "step": 18252 }, { "epoch": 434.5970149253731, "grad_norm": 21.225440979003906, "learning_rate": 9.673160173160175e-06, "loss": 29.8067, "step": 18253 }, { "epoch": 434.6208955223881, "grad_norm": 22.626157760620117, "learning_rate": 9.672619047619049e-06, "loss": 29.4272, "step": 18254 }, { "epoch": 434.644776119403, "grad_norm": 19.518709182739258, "learning_rate": 9.672077922077922e-06, "loss": 29.5052, "step": 18255 }, { "epoch": 434.6686567164179, "grad_norm": 20.40300750732422, "learning_rate": 9.671536796536798e-06, "loss": 30.9477, "step": 18256 }, { "epoch": 434.6925373134328, "grad_norm": 23.23400115966797, "learning_rate": 9.670995670995673e-06, "loss": 31.575, "step": 18257 }, { "epoch": 434.7164179104478, "grad_norm": 19.318143844604492, "learning_rate": 9.670454545454546e-06, "loss": 31.069, "step": 18258 }, { "epoch": 434.7402985074627, "grad_norm": 21.579816818237305, "learning_rate": 9.66991341991342e-06, "loss": 30.5433, "step": 18259 }, { "epoch": 434.7641791044776, "grad_norm": 20.505245208740234, "learning_rate": 9.669372294372295e-06, "loss": 29.7707, "step": 18260 }, { "epoch": 434.78805970149256, "grad_norm": 18.020780563354492, "learning_rate": 9.66883116883117e-06, "loss": 30.7001, "step": 18261 }, { "epoch": 434.81194029850747, "grad_norm": 19.13140869140625, "learning_rate": 9.668290043290044e-06, "loss": 29.109, "step": 18262 }, { "epoch": 434.8358208955224, "grad_norm": 20.555435180664062, "learning_rate": 9.667748917748918e-06, "loss": 29.5558, "step": 18263 }, { "epoch": 434.85970149253734, "grad_norm": 18.302505493164062, "learning_rate": 9.667207792207793e-06, "loss": 29.3802, "step": 18264 }, { "epoch": 434.88358208955225, "grad_norm": 17.136194229125977, "learning_rate": 9.666666666666667e-06, "loss": 30.112, "step": 18265 }, { "epoch": 434.90746268656716, "grad_norm": 20.43988609313965, "learning_rate": 9.666125541125542e-06, "loss": 29.7273, "step": 18266 }, { "epoch": 434.93134328358207, "grad_norm": 21.64653968811035, "learning_rate": 9.665584415584416e-06, "loss": 29.9953, "step": 18267 }, { "epoch": 434.95522388059703, "grad_norm": 19.094741821289062, "learning_rate": 9.665043290043291e-06, "loss": 28.9775, "step": 18268 }, { "epoch": 434.97910447761194, "grad_norm": 20.54570960998535, "learning_rate": 9.664502164502165e-06, "loss": 30.3792, "step": 18269 }, { "epoch": 435.0, "grad_norm": 14.584463119506836, "learning_rate": 9.66396103896104e-06, "loss": 24.9574, "step": 18270 }, { "epoch": 435.0238805970149, "grad_norm": 15.791644096374512, "learning_rate": 9.663419913419915e-06, "loss": 30.1044, "step": 18271 }, { "epoch": 435.0477611940299, "grad_norm": 18.99878692626953, "learning_rate": 9.662878787878789e-06, "loss": 30.5295, "step": 18272 }, { "epoch": 435.0716417910448, "grad_norm": 17.93902587890625, "learning_rate": 9.662337662337662e-06, "loss": 31.205, "step": 18273 }, { "epoch": 435.0955223880597, "grad_norm": 20.62776756286621, "learning_rate": 9.661796536796538e-06, "loss": 30.331, "step": 18274 }, { "epoch": 435.1194029850746, "grad_norm": 24.08873176574707, "learning_rate": 9.661255411255413e-06, "loss": 30.398, "step": 18275 }, { "epoch": 435.14328358208957, "grad_norm": 19.241039276123047, "learning_rate": 9.660714285714287e-06, "loss": 29.3642, "step": 18276 }, { "epoch": 435.1671641791045, "grad_norm": 20.880123138427734, "learning_rate": 9.66017316017316e-06, "loss": 29.915, "step": 18277 }, { "epoch": 435.1910447761194, "grad_norm": 22.662073135375977, "learning_rate": 9.659632034632035e-06, "loss": 31.5605, "step": 18278 }, { "epoch": 435.21492537313435, "grad_norm": 18.814071655273438, "learning_rate": 9.65909090909091e-06, "loss": 29.1728, "step": 18279 }, { "epoch": 435.23880597014926, "grad_norm": 15.919159889221191, "learning_rate": 9.658549783549784e-06, "loss": 29.6376, "step": 18280 }, { "epoch": 435.26268656716417, "grad_norm": 17.640968322753906, "learning_rate": 9.658008658008658e-06, "loss": 30.3631, "step": 18281 }, { "epoch": 435.28656716417913, "grad_norm": 25.971193313598633, "learning_rate": 9.657467532467533e-06, "loss": 29.6467, "step": 18282 }, { "epoch": 435.31044776119404, "grad_norm": 20.070676803588867, "learning_rate": 9.656926406926409e-06, "loss": 30.4805, "step": 18283 }, { "epoch": 435.33432835820895, "grad_norm": 17.20748519897461, "learning_rate": 9.656385281385282e-06, "loss": 30.0635, "step": 18284 }, { "epoch": 435.35820895522386, "grad_norm": 20.692230224609375, "learning_rate": 9.655844155844156e-06, "loss": 30.4306, "step": 18285 }, { "epoch": 435.3820895522388, "grad_norm": 14.956451416015625, "learning_rate": 9.655303030303031e-06, "loss": 29.3072, "step": 18286 }, { "epoch": 435.40597014925373, "grad_norm": 18.895109176635742, "learning_rate": 9.654761904761906e-06, "loss": 29.1034, "step": 18287 }, { "epoch": 435.42985074626864, "grad_norm": 22.296415328979492, "learning_rate": 9.65422077922078e-06, "loss": 30.1436, "step": 18288 }, { "epoch": 435.4537313432836, "grad_norm": 21.924699783325195, "learning_rate": 9.653679653679654e-06, "loss": 30.0738, "step": 18289 }, { "epoch": 435.4776119402985, "grad_norm": 27.33721160888672, "learning_rate": 9.653138528138529e-06, "loss": 29.8749, "step": 18290 }, { "epoch": 435.5014925373134, "grad_norm": 18.547534942626953, "learning_rate": 9.652597402597404e-06, "loss": 28.7964, "step": 18291 }, { "epoch": 435.52537313432833, "grad_norm": 17.842880249023438, "learning_rate": 9.652056277056278e-06, "loss": 30.13, "step": 18292 }, { "epoch": 435.5492537313433, "grad_norm": 16.652873992919922, "learning_rate": 9.651515151515153e-06, "loss": 29.7252, "step": 18293 }, { "epoch": 435.5731343283582, "grad_norm": 17.452665328979492, "learning_rate": 9.650974025974027e-06, "loss": 29.0173, "step": 18294 }, { "epoch": 435.5970149253731, "grad_norm": 16.44731330871582, "learning_rate": 9.650432900432902e-06, "loss": 29.189, "step": 18295 }, { "epoch": 435.6208955223881, "grad_norm": 16.534452438354492, "learning_rate": 9.649891774891776e-06, "loss": 31.1884, "step": 18296 }, { "epoch": 435.644776119403, "grad_norm": 17.389270782470703, "learning_rate": 9.64935064935065e-06, "loss": 30.8596, "step": 18297 }, { "epoch": 435.6686567164179, "grad_norm": 23.34886360168457, "learning_rate": 9.648809523809524e-06, "loss": 30.7862, "step": 18298 }, { "epoch": 435.6925373134328, "grad_norm": 23.544044494628906, "learning_rate": 9.6482683982684e-06, "loss": 29.7926, "step": 18299 }, { "epoch": 435.7164179104478, "grad_norm": 17.028202056884766, "learning_rate": 9.647727272727273e-06, "loss": 30.1643, "step": 18300 }, { "epoch": 435.7402985074627, "grad_norm": 19.95630645751953, "learning_rate": 9.647186147186149e-06, "loss": 29.3429, "step": 18301 }, { "epoch": 435.7641791044776, "grad_norm": 25.058000564575195, "learning_rate": 9.646645021645022e-06, "loss": 30.7787, "step": 18302 }, { "epoch": 435.78805970149256, "grad_norm": 22.101835250854492, "learning_rate": 9.646103896103896e-06, "loss": 30.2931, "step": 18303 }, { "epoch": 435.81194029850747, "grad_norm": 22.743885040283203, "learning_rate": 9.645562770562771e-06, "loss": 30.3098, "step": 18304 }, { "epoch": 435.8358208955224, "grad_norm": 27.39186668395996, "learning_rate": 9.645021645021646e-06, "loss": 29.4458, "step": 18305 }, { "epoch": 435.85970149253734, "grad_norm": 20.444841384887695, "learning_rate": 9.64448051948052e-06, "loss": 29.3536, "step": 18306 }, { "epoch": 435.88358208955225, "grad_norm": 17.648893356323242, "learning_rate": 9.643939393939394e-06, "loss": 30.3493, "step": 18307 }, { "epoch": 435.90746268656716, "grad_norm": 20.359098434448242, "learning_rate": 9.643398268398269e-06, "loss": 30.1989, "step": 18308 }, { "epoch": 435.93134328358207, "grad_norm": 20.681459426879883, "learning_rate": 9.642857142857144e-06, "loss": 30.9317, "step": 18309 }, { "epoch": 435.95522388059703, "grad_norm": 20.243064880371094, "learning_rate": 9.642316017316018e-06, "loss": 30.7878, "step": 18310 }, { "epoch": 435.97910447761194, "grad_norm": 16.853389739990234, "learning_rate": 9.641774891774891e-06, "loss": 30.89, "step": 18311 }, { "epoch": 436.0, "grad_norm": 18.18767547607422, "learning_rate": 9.641233766233767e-06, "loss": 25.6831, "step": 18312 }, { "epoch": 436.0238805970149, "grad_norm": 20.59931182861328, "learning_rate": 9.640692640692642e-06, "loss": 30.2498, "step": 18313 }, { "epoch": 436.0477611940299, "grad_norm": 22.943437576293945, "learning_rate": 9.640151515151516e-06, "loss": 29.6802, "step": 18314 }, { "epoch": 436.0716417910448, "grad_norm": 18.298830032348633, "learning_rate": 9.63961038961039e-06, "loss": 29.9832, "step": 18315 }, { "epoch": 436.0955223880597, "grad_norm": 20.217575073242188, "learning_rate": 9.639069264069264e-06, "loss": 30.7649, "step": 18316 }, { "epoch": 436.1194029850746, "grad_norm": 20.93951416015625, "learning_rate": 9.63852813852814e-06, "loss": 30.559, "step": 18317 }, { "epoch": 436.14328358208957, "grad_norm": 23.427631378173828, "learning_rate": 9.637987012987013e-06, "loss": 28.9487, "step": 18318 }, { "epoch": 436.1671641791045, "grad_norm": 20.74120330810547, "learning_rate": 9.637445887445889e-06, "loss": 28.8305, "step": 18319 }, { "epoch": 436.1910447761194, "grad_norm": 20.74155616760254, "learning_rate": 9.636904761904762e-06, "loss": 30.583, "step": 18320 }, { "epoch": 436.21492537313435, "grad_norm": 29.1689453125, "learning_rate": 9.636363636363638e-06, "loss": 29.8797, "step": 18321 }, { "epoch": 436.23880597014926, "grad_norm": 22.114608764648438, "learning_rate": 9.635822510822511e-06, "loss": 29.4548, "step": 18322 }, { "epoch": 436.26268656716417, "grad_norm": 18.739072799682617, "learning_rate": 9.635281385281386e-06, "loss": 30.5758, "step": 18323 }, { "epoch": 436.28656716417913, "grad_norm": 21.38836669921875, "learning_rate": 9.63474025974026e-06, "loss": 29.4342, "step": 18324 }, { "epoch": 436.31044776119404, "grad_norm": 19.67865753173828, "learning_rate": 9.634199134199135e-06, "loss": 30.2517, "step": 18325 }, { "epoch": 436.33432835820895, "grad_norm": 19.225650787353516, "learning_rate": 9.633658008658009e-06, "loss": 30.7116, "step": 18326 }, { "epoch": 436.35820895522386, "grad_norm": 22.08695411682129, "learning_rate": 9.633116883116884e-06, "loss": 30.3946, "step": 18327 }, { "epoch": 436.3820895522388, "grad_norm": 26.342357635498047, "learning_rate": 9.632575757575758e-06, "loss": 30.4322, "step": 18328 }, { "epoch": 436.40597014925373, "grad_norm": 18.675661087036133, "learning_rate": 9.632034632034633e-06, "loss": 30.3804, "step": 18329 }, { "epoch": 436.42985074626864, "grad_norm": 22.840425491333008, "learning_rate": 9.631493506493508e-06, "loss": 29.9916, "step": 18330 }, { "epoch": 436.4537313432836, "grad_norm": 18.92214584350586, "learning_rate": 9.630952380952382e-06, "loss": 30.1596, "step": 18331 }, { "epoch": 436.4776119402985, "grad_norm": 18.427722930908203, "learning_rate": 9.630411255411256e-06, "loss": 30.9355, "step": 18332 }, { "epoch": 436.5014925373134, "grad_norm": 23.86881446838379, "learning_rate": 9.629870129870131e-06, "loss": 30.8778, "step": 18333 }, { "epoch": 436.52537313432833, "grad_norm": 22.45794105529785, "learning_rate": 9.629329004329006e-06, "loss": 30.3567, "step": 18334 }, { "epoch": 436.5492537313433, "grad_norm": 21.06729507446289, "learning_rate": 9.62878787878788e-06, "loss": 29.3477, "step": 18335 }, { "epoch": 436.5731343283582, "grad_norm": 18.070526123046875, "learning_rate": 9.628246753246753e-06, "loss": 30.2371, "step": 18336 }, { "epoch": 436.5970149253731, "grad_norm": 21.866718292236328, "learning_rate": 9.627705627705629e-06, "loss": 30.5973, "step": 18337 }, { "epoch": 436.6208955223881, "grad_norm": 20.23525619506836, "learning_rate": 9.627164502164504e-06, "loss": 29.9108, "step": 18338 }, { "epoch": 436.644776119403, "grad_norm": 20.61037826538086, "learning_rate": 9.626623376623378e-06, "loss": 29.1173, "step": 18339 }, { "epoch": 436.6686567164179, "grad_norm": 16.306133270263672, "learning_rate": 9.626082251082251e-06, "loss": 30.1601, "step": 18340 }, { "epoch": 436.6925373134328, "grad_norm": 19.701372146606445, "learning_rate": 9.625541125541127e-06, "loss": 29.3894, "step": 18341 }, { "epoch": 436.7164179104478, "grad_norm": 17.36764144897461, "learning_rate": 9.625e-06, "loss": 28.3988, "step": 18342 }, { "epoch": 436.7402985074627, "grad_norm": 23.294689178466797, "learning_rate": 9.624458874458875e-06, "loss": 30.0732, "step": 18343 }, { "epoch": 436.7641791044776, "grad_norm": 18.175935745239258, "learning_rate": 9.623917748917749e-06, "loss": 30.4979, "step": 18344 }, { "epoch": 436.78805970149256, "grad_norm": 18.192787170410156, "learning_rate": 9.623376623376624e-06, "loss": 30.0431, "step": 18345 }, { "epoch": 436.81194029850747, "grad_norm": 21.695138931274414, "learning_rate": 9.622835497835498e-06, "loss": 29.922, "step": 18346 }, { "epoch": 436.8358208955224, "grad_norm": 19.197233200073242, "learning_rate": 9.622294372294373e-06, "loss": 30.2292, "step": 18347 }, { "epoch": 436.85970149253734, "grad_norm": 19.522125244140625, "learning_rate": 9.621753246753247e-06, "loss": 29.7332, "step": 18348 }, { "epoch": 436.88358208955225, "grad_norm": 17.038297653198242, "learning_rate": 9.621212121212122e-06, "loss": 29.5148, "step": 18349 }, { "epoch": 436.90746268656716, "grad_norm": 21.62288475036621, "learning_rate": 9.620670995670996e-06, "loss": 29.949, "step": 18350 }, { "epoch": 436.93134328358207, "grad_norm": 19.052358627319336, "learning_rate": 9.620129870129871e-06, "loss": 29.7353, "step": 18351 }, { "epoch": 436.95522388059703, "grad_norm": 24.851343154907227, "learning_rate": 9.619588744588746e-06, "loss": 30.6338, "step": 18352 }, { "epoch": 436.97910447761194, "grad_norm": 19.704662322998047, "learning_rate": 9.61904761904762e-06, "loss": 31.0321, "step": 18353 }, { "epoch": 437.0, "grad_norm": 19.7940673828125, "learning_rate": 9.618506493506494e-06, "loss": 26.4347, "step": 18354 }, { "epoch": 437.0238805970149, "grad_norm": 21.809568405151367, "learning_rate": 9.617965367965369e-06, "loss": 30.4459, "step": 18355 }, { "epoch": 437.0477611940299, "grad_norm": 21.622299194335938, "learning_rate": 9.617424242424244e-06, "loss": 30.5263, "step": 18356 }, { "epoch": 437.0716417910448, "grad_norm": 19.945377349853516, "learning_rate": 9.616883116883118e-06, "loss": 30.7836, "step": 18357 }, { "epoch": 437.0955223880597, "grad_norm": 19.13798713684082, "learning_rate": 9.616341991341991e-06, "loss": 29.5093, "step": 18358 }, { "epoch": 437.1194029850746, "grad_norm": 19.33329200744629, "learning_rate": 9.615800865800867e-06, "loss": 31.2368, "step": 18359 }, { "epoch": 437.14328358208957, "grad_norm": 23.51718521118164, "learning_rate": 9.615259740259742e-06, "loss": 30.6811, "step": 18360 }, { "epoch": 437.1671641791045, "grad_norm": 22.49342918395996, "learning_rate": 9.614718614718616e-06, "loss": 30.6678, "step": 18361 }, { "epoch": 437.1910447761194, "grad_norm": 18.669591903686523, "learning_rate": 9.61417748917749e-06, "loss": 30.1849, "step": 18362 }, { "epoch": 437.21492537313435, "grad_norm": 19.08563995361328, "learning_rate": 9.613636363636364e-06, "loss": 30.6485, "step": 18363 }, { "epoch": 437.23880597014926, "grad_norm": 16.72998809814453, "learning_rate": 9.61309523809524e-06, "loss": 30.2999, "step": 18364 }, { "epoch": 437.26268656716417, "grad_norm": 15.711103439331055, "learning_rate": 9.612554112554113e-06, "loss": 31.2417, "step": 18365 }, { "epoch": 437.28656716417913, "grad_norm": 18.274826049804688, "learning_rate": 9.612012987012987e-06, "loss": 29.0119, "step": 18366 }, { "epoch": 437.31044776119404, "grad_norm": 16.25044059753418, "learning_rate": 9.611471861471862e-06, "loss": 29.3701, "step": 18367 }, { "epoch": 437.33432835820895, "grad_norm": 19.717958450317383, "learning_rate": 9.610930735930738e-06, "loss": 29.8097, "step": 18368 }, { "epoch": 437.35820895522386, "grad_norm": 24.66046714782715, "learning_rate": 9.610389610389611e-06, "loss": 29.3774, "step": 18369 }, { "epoch": 437.3820895522388, "grad_norm": 22.514968872070312, "learning_rate": 9.609848484848485e-06, "loss": 30.8877, "step": 18370 }, { "epoch": 437.40597014925373, "grad_norm": 16.56135368347168, "learning_rate": 9.60930735930736e-06, "loss": 29.161, "step": 18371 }, { "epoch": 437.42985074626864, "grad_norm": 22.930313110351562, "learning_rate": 9.608766233766235e-06, "loss": 30.1345, "step": 18372 }, { "epoch": 437.4537313432836, "grad_norm": 21.518224716186523, "learning_rate": 9.608225108225109e-06, "loss": 29.7609, "step": 18373 }, { "epoch": 437.4776119402985, "grad_norm": 18.309608459472656, "learning_rate": 9.607683982683983e-06, "loss": 30.1276, "step": 18374 }, { "epoch": 437.5014925373134, "grad_norm": 16.86956787109375, "learning_rate": 9.607142857142858e-06, "loss": 29.9542, "step": 18375 }, { "epoch": 437.52537313432833, "grad_norm": 16.955245971679688, "learning_rate": 9.606601731601733e-06, "loss": 28.8259, "step": 18376 }, { "epoch": 437.5492537313433, "grad_norm": 15.334726333618164, "learning_rate": 9.606060606060607e-06, "loss": 29.7862, "step": 18377 }, { "epoch": 437.5731343283582, "grad_norm": 16.968448638916016, "learning_rate": 9.605519480519482e-06, "loss": 28.543, "step": 18378 }, { "epoch": 437.5970149253731, "grad_norm": 18.797088623046875, "learning_rate": 9.604978354978356e-06, "loss": 29.9734, "step": 18379 }, { "epoch": 437.6208955223881, "grad_norm": 20.425338745117188, "learning_rate": 9.604437229437231e-06, "loss": 30.6261, "step": 18380 }, { "epoch": 437.644776119403, "grad_norm": 23.71156120300293, "learning_rate": 9.603896103896105e-06, "loss": 29.6452, "step": 18381 }, { "epoch": 437.6686567164179, "grad_norm": 19.44552230834961, "learning_rate": 9.60335497835498e-06, "loss": 29.8348, "step": 18382 }, { "epoch": 437.6925373134328, "grad_norm": 19.09593963623047, "learning_rate": 9.602813852813853e-06, "loss": 30.0302, "step": 18383 }, { "epoch": 437.7164179104478, "grad_norm": 16.287437438964844, "learning_rate": 9.602272727272727e-06, "loss": 30.1343, "step": 18384 }, { "epoch": 437.7402985074627, "grad_norm": 22.482288360595703, "learning_rate": 9.601731601731602e-06, "loss": 30.1525, "step": 18385 }, { "epoch": 437.7641791044776, "grad_norm": 21.083412170410156, "learning_rate": 9.601190476190478e-06, "loss": 30.0119, "step": 18386 }, { "epoch": 437.78805970149256, "grad_norm": 16.497600555419922, "learning_rate": 9.600649350649351e-06, "loss": 29.4542, "step": 18387 }, { "epoch": 437.81194029850747, "grad_norm": 19.62682342529297, "learning_rate": 9.600108225108225e-06, "loss": 28.9712, "step": 18388 }, { "epoch": 437.8358208955224, "grad_norm": 24.665424346923828, "learning_rate": 9.5995670995671e-06, "loss": 29.6004, "step": 18389 }, { "epoch": 437.85970149253734, "grad_norm": 24.325775146484375, "learning_rate": 9.599025974025975e-06, "loss": 29.7811, "step": 18390 }, { "epoch": 437.88358208955225, "grad_norm": 14.164239883422852, "learning_rate": 9.598484848484849e-06, "loss": 29.9186, "step": 18391 }, { "epoch": 437.90746268656716, "grad_norm": 20.377151489257812, "learning_rate": 9.597943722943723e-06, "loss": 30.8108, "step": 18392 }, { "epoch": 437.93134328358207, "grad_norm": 19.745681762695312, "learning_rate": 9.597402597402598e-06, "loss": 30.8965, "step": 18393 }, { "epoch": 437.95522388059703, "grad_norm": 23.006057739257812, "learning_rate": 9.596861471861473e-06, "loss": 30.3527, "step": 18394 }, { "epoch": 437.97910447761194, "grad_norm": 21.118085861206055, "learning_rate": 9.596320346320347e-06, "loss": 29.401, "step": 18395 }, { "epoch": 438.0, "grad_norm": 16.79197120666504, "learning_rate": 9.59577922077922e-06, "loss": 26.6836, "step": 18396 }, { "epoch": 438.0238805970149, "grad_norm": 24.08824348449707, "learning_rate": 9.595238095238096e-06, "loss": 29.4007, "step": 18397 }, { "epoch": 438.0477611940299, "grad_norm": 21.36703109741211, "learning_rate": 9.594696969696971e-06, "loss": 29.648, "step": 18398 }, { "epoch": 438.0716417910448, "grad_norm": 23.28516387939453, "learning_rate": 9.594155844155845e-06, "loss": 30.2528, "step": 18399 }, { "epoch": 438.0955223880597, "grad_norm": 19.036104202270508, "learning_rate": 9.59361471861472e-06, "loss": 30.0436, "step": 18400 }, { "epoch": 438.1194029850746, "grad_norm": 21.310630798339844, "learning_rate": 9.593073593073594e-06, "loss": 28.7011, "step": 18401 }, { "epoch": 438.14328358208957, "grad_norm": 17.626867294311523, "learning_rate": 9.592532467532469e-06, "loss": 29.9864, "step": 18402 }, { "epoch": 438.1671641791045, "grad_norm": 25.529451370239258, "learning_rate": 9.591991341991342e-06, "loss": 29.9949, "step": 18403 }, { "epoch": 438.1910447761194, "grad_norm": 20.48516845703125, "learning_rate": 9.591450216450218e-06, "loss": 29.5868, "step": 18404 }, { "epoch": 438.21492537313435, "grad_norm": 21.072383880615234, "learning_rate": 9.590909090909091e-06, "loss": 29.6007, "step": 18405 }, { "epoch": 438.23880597014926, "grad_norm": 18.2504825592041, "learning_rate": 9.590367965367967e-06, "loss": 30.4394, "step": 18406 }, { "epoch": 438.26268656716417, "grad_norm": 21.870798110961914, "learning_rate": 9.58982683982684e-06, "loss": 29.6911, "step": 18407 }, { "epoch": 438.28656716417913, "grad_norm": 21.054988861083984, "learning_rate": 9.589285714285716e-06, "loss": 31.2226, "step": 18408 }, { "epoch": 438.31044776119404, "grad_norm": 23.002134323120117, "learning_rate": 9.588744588744589e-06, "loss": 30.6077, "step": 18409 }, { "epoch": 438.33432835820895, "grad_norm": 20.597837448120117, "learning_rate": 9.588203463203464e-06, "loss": 30.642, "step": 18410 }, { "epoch": 438.35820895522386, "grad_norm": 20.26851463317871, "learning_rate": 9.587662337662338e-06, "loss": 31.7231, "step": 18411 }, { "epoch": 438.3820895522388, "grad_norm": 22.513456344604492, "learning_rate": 9.587121212121213e-06, "loss": 30.6977, "step": 18412 }, { "epoch": 438.40597014925373, "grad_norm": 24.793535232543945, "learning_rate": 9.586580086580087e-06, "loss": 31.4542, "step": 18413 }, { "epoch": 438.42985074626864, "grad_norm": 20.16357421875, "learning_rate": 9.586038961038962e-06, "loss": 30.6742, "step": 18414 }, { "epoch": 438.4537313432836, "grad_norm": 24.922685623168945, "learning_rate": 9.585497835497838e-06, "loss": 29.9368, "step": 18415 }, { "epoch": 438.4776119402985, "grad_norm": 26.20820426940918, "learning_rate": 9.584956709956711e-06, "loss": 30.7537, "step": 18416 }, { "epoch": 438.5014925373134, "grad_norm": 19.87091827392578, "learning_rate": 9.584415584415585e-06, "loss": 29.8322, "step": 18417 }, { "epoch": 438.52537313432833, "grad_norm": 21.4395809173584, "learning_rate": 9.58387445887446e-06, "loss": 29.5324, "step": 18418 }, { "epoch": 438.5492537313433, "grad_norm": 19.096750259399414, "learning_rate": 9.583333333333335e-06, "loss": 29.9812, "step": 18419 }, { "epoch": 438.5731343283582, "grad_norm": 18.143898010253906, "learning_rate": 9.582792207792209e-06, "loss": 29.1294, "step": 18420 }, { "epoch": 438.5970149253731, "grad_norm": 19.896244049072266, "learning_rate": 9.582251082251083e-06, "loss": 30.8624, "step": 18421 }, { "epoch": 438.6208955223881, "grad_norm": 16.79835319519043, "learning_rate": 9.581709956709956e-06, "loss": 30.0049, "step": 18422 }, { "epoch": 438.644776119403, "grad_norm": 24.06325912475586, "learning_rate": 9.581168831168831e-06, "loss": 29.8186, "step": 18423 }, { "epoch": 438.6686567164179, "grad_norm": 19.912395477294922, "learning_rate": 9.580627705627707e-06, "loss": 30.1886, "step": 18424 }, { "epoch": 438.6925373134328, "grad_norm": 23.333765029907227, "learning_rate": 9.58008658008658e-06, "loss": 28.5251, "step": 18425 }, { "epoch": 438.7164179104478, "grad_norm": 18.128990173339844, "learning_rate": 9.579545454545456e-06, "loss": 29.2279, "step": 18426 }, { "epoch": 438.7402985074627, "grad_norm": 23.521347045898438, "learning_rate": 9.57900432900433e-06, "loss": 29.4037, "step": 18427 }, { "epoch": 438.7641791044776, "grad_norm": 21.70577621459961, "learning_rate": 9.578463203463205e-06, "loss": 29.4827, "step": 18428 }, { "epoch": 438.78805970149256, "grad_norm": 19.254405975341797, "learning_rate": 9.577922077922078e-06, "loss": 29.448, "step": 18429 }, { "epoch": 438.81194029850747, "grad_norm": 18.917478561401367, "learning_rate": 9.577380952380953e-06, "loss": 28.6075, "step": 18430 }, { "epoch": 438.8358208955224, "grad_norm": 21.376449584960938, "learning_rate": 9.576839826839827e-06, "loss": 29.4114, "step": 18431 }, { "epoch": 438.85970149253734, "grad_norm": 21.97930908203125, "learning_rate": 9.576298701298702e-06, "loss": 30.9656, "step": 18432 }, { "epoch": 438.88358208955225, "grad_norm": 16.989635467529297, "learning_rate": 9.575757575757576e-06, "loss": 30.2065, "step": 18433 }, { "epoch": 438.90746268656716, "grad_norm": 19.979799270629883, "learning_rate": 9.575216450216451e-06, "loss": 29.4274, "step": 18434 }, { "epoch": 438.93134328358207, "grad_norm": 20.198780059814453, "learning_rate": 9.574675324675325e-06, "loss": 29.6797, "step": 18435 }, { "epoch": 438.95522388059703, "grad_norm": 23.670854568481445, "learning_rate": 9.5741341991342e-06, "loss": 29.9383, "step": 18436 }, { "epoch": 438.97910447761194, "grad_norm": 20.571134567260742, "learning_rate": 9.573593073593075e-06, "loss": 30.4454, "step": 18437 }, { "epoch": 439.0, "grad_norm": 19.257028579711914, "learning_rate": 9.573051948051949e-06, "loss": 26.2818, "step": 18438 }, { "epoch": 439.0238805970149, "grad_norm": 17.581405639648438, "learning_rate": 9.572510822510823e-06, "loss": 31.1575, "step": 18439 }, { "epoch": 439.0477611940299, "grad_norm": 19.168819427490234, "learning_rate": 9.571969696969698e-06, "loss": 30.432, "step": 18440 }, { "epoch": 439.0716417910448, "grad_norm": 25.43934440612793, "learning_rate": 9.571428571428573e-06, "loss": 29.1514, "step": 18441 }, { "epoch": 439.0955223880597, "grad_norm": 22.299837112426758, "learning_rate": 9.570887445887447e-06, "loss": 28.0818, "step": 18442 }, { "epoch": 439.1194029850746, "grad_norm": 19.19082260131836, "learning_rate": 9.57034632034632e-06, "loss": 30.8908, "step": 18443 }, { "epoch": 439.14328358208957, "grad_norm": 23.17669105529785, "learning_rate": 9.569805194805196e-06, "loss": 29.4613, "step": 18444 }, { "epoch": 439.1671641791045, "grad_norm": 23.474390029907227, "learning_rate": 9.569264069264071e-06, "loss": 30.4825, "step": 18445 }, { "epoch": 439.1910447761194, "grad_norm": 17.151443481445312, "learning_rate": 9.568722943722945e-06, "loss": 30.5358, "step": 18446 }, { "epoch": 439.21492537313435, "grad_norm": 21.566646575927734, "learning_rate": 9.568181818181818e-06, "loss": 30.2673, "step": 18447 }, { "epoch": 439.23880597014926, "grad_norm": 24.813852310180664, "learning_rate": 9.567640692640694e-06, "loss": 31.1043, "step": 18448 }, { "epoch": 439.26268656716417, "grad_norm": 20.139890670776367, "learning_rate": 9.567099567099569e-06, "loss": 29.8795, "step": 18449 }, { "epoch": 439.28656716417913, "grad_norm": 17.006986618041992, "learning_rate": 9.566558441558442e-06, "loss": 29.6531, "step": 18450 }, { "epoch": 439.31044776119404, "grad_norm": 28.538646697998047, "learning_rate": 9.566017316017316e-06, "loss": 29.801, "step": 18451 }, { "epoch": 439.33432835820895, "grad_norm": 21.307741165161133, "learning_rate": 9.565476190476191e-06, "loss": 29.3162, "step": 18452 }, { "epoch": 439.35820895522386, "grad_norm": 17.67075538635254, "learning_rate": 9.564935064935067e-06, "loss": 28.564, "step": 18453 }, { "epoch": 439.3820895522388, "grad_norm": 23.861326217651367, "learning_rate": 9.56439393939394e-06, "loss": 29.5506, "step": 18454 }, { "epoch": 439.40597014925373, "grad_norm": 20.954423904418945, "learning_rate": 9.563852813852814e-06, "loss": 30.6395, "step": 18455 }, { "epoch": 439.42985074626864, "grad_norm": 20.9737491607666, "learning_rate": 9.563311688311689e-06, "loss": 30.9972, "step": 18456 }, { "epoch": 439.4537313432836, "grad_norm": 20.269493103027344, "learning_rate": 9.562770562770564e-06, "loss": 29.935, "step": 18457 }, { "epoch": 439.4776119402985, "grad_norm": 29.25650405883789, "learning_rate": 9.562229437229438e-06, "loss": 31.0753, "step": 18458 }, { "epoch": 439.5014925373134, "grad_norm": 22.108583450317383, "learning_rate": 9.561688311688313e-06, "loss": 30.9956, "step": 18459 }, { "epoch": 439.52537313432833, "grad_norm": 17.152618408203125, "learning_rate": 9.561147186147187e-06, "loss": 28.8859, "step": 18460 }, { "epoch": 439.5492537313433, "grad_norm": 29.345670700073242, "learning_rate": 9.56060606060606e-06, "loss": 29.3872, "step": 18461 }, { "epoch": 439.5731343283582, "grad_norm": 21.24360466003418, "learning_rate": 9.560064935064936e-06, "loss": 29.3939, "step": 18462 }, { "epoch": 439.5970149253731, "grad_norm": 20.873172760009766, "learning_rate": 9.559523809523811e-06, "loss": 30.2505, "step": 18463 }, { "epoch": 439.6208955223881, "grad_norm": 18.2236270904541, "learning_rate": 9.558982683982685e-06, "loss": 30.2748, "step": 18464 }, { "epoch": 439.644776119403, "grad_norm": 23.175804138183594, "learning_rate": 9.558441558441558e-06, "loss": 28.8336, "step": 18465 }, { "epoch": 439.6686567164179, "grad_norm": 21.064016342163086, "learning_rate": 9.557900432900434e-06, "loss": 29.8691, "step": 18466 }, { "epoch": 439.6925373134328, "grad_norm": 17.81121253967285, "learning_rate": 9.557359307359309e-06, "loss": 30.5635, "step": 18467 }, { "epoch": 439.7164179104478, "grad_norm": 18.693376541137695, "learning_rate": 9.556818181818182e-06, "loss": 28.9394, "step": 18468 }, { "epoch": 439.7402985074627, "grad_norm": 20.317508697509766, "learning_rate": 9.556277056277056e-06, "loss": 29.9811, "step": 18469 }, { "epoch": 439.7641791044776, "grad_norm": 24.36268424987793, "learning_rate": 9.555735930735931e-06, "loss": 29.8501, "step": 18470 }, { "epoch": 439.78805970149256, "grad_norm": 18.132827758789062, "learning_rate": 9.555194805194807e-06, "loss": 30.5219, "step": 18471 }, { "epoch": 439.81194029850747, "grad_norm": 20.71527099609375, "learning_rate": 9.55465367965368e-06, "loss": 29.5205, "step": 18472 }, { "epoch": 439.8358208955224, "grad_norm": 17.375009536743164, "learning_rate": 9.554112554112554e-06, "loss": 30.0705, "step": 18473 }, { "epoch": 439.85970149253734, "grad_norm": 18.961162567138672, "learning_rate": 9.55357142857143e-06, "loss": 30.8293, "step": 18474 }, { "epoch": 439.88358208955225, "grad_norm": 17.276344299316406, "learning_rate": 9.553030303030304e-06, "loss": 29.609, "step": 18475 }, { "epoch": 439.90746268656716, "grad_norm": 20.65808868408203, "learning_rate": 9.552489177489178e-06, "loss": 30.5208, "step": 18476 }, { "epoch": 439.93134328358207, "grad_norm": 28.521509170532227, "learning_rate": 9.551948051948052e-06, "loss": 29.6507, "step": 18477 }, { "epoch": 439.95522388059703, "grad_norm": 17.221881866455078, "learning_rate": 9.551406926406927e-06, "loss": 29.306, "step": 18478 }, { "epoch": 439.97910447761194, "grad_norm": 16.699331283569336, "learning_rate": 9.550865800865802e-06, "loss": 28.9915, "step": 18479 }, { "epoch": 440.0, "grad_norm": 22.226022720336914, "learning_rate": 9.550324675324676e-06, "loss": 27.0842, "step": 18480 }, { "epoch": 440.0, "step": 18480, "total_flos": 9.084245825331505e+17, "train_loss": 1.3789456827815993, "train_runtime": 25688.7761, "train_samples_per_second": 91.67, "train_steps_per_second": 0.719 }, { "epoch": 440.0238805970149, "grad_norm": 17.3594970703125, "learning_rate": 1e-05, "loss": 28.5431, "step": 18481 }, { "epoch": 440.0477611940299, "grad_norm": Infinity, "learning_rate": 9.9994708994709e-06, "loss": 34.6472, "step": 18482 }, { "epoch": 440.0716417910448, "grad_norm": 216.82839965820312, "learning_rate": 9.9994708994709e-06, "loss": 34.2689, "step": 18483 }, { "epoch": 440.0955223880597, "grad_norm": 99.35746765136719, "learning_rate": 9.9989417989418e-06, "loss": 32.1615, "step": 18484 }, { "epoch": 440.1194029850746, "grad_norm": 67.5246353149414, "learning_rate": 9.998412698412699e-06, "loss": 31.404, "step": 18485 }, { "epoch": 440.14328358208957, "grad_norm": 54.333831787109375, "learning_rate": 9.997883597883598e-06, "loss": 31.1933, "step": 18486 }, { "epoch": 440.1671641791045, "grad_norm": 67.78981018066406, "learning_rate": 9.997354497354498e-06, "loss": 30.1989, "step": 18487 }, { "epoch": 440.1910447761194, "grad_norm": 64.62834167480469, "learning_rate": 9.996825396825399e-06, "loss": 31.7329, "step": 18488 }, { "epoch": 440.21492537313435, "grad_norm": 38.224002838134766, "learning_rate": 9.996296296296298e-06, "loss": 30.013, "step": 18489 }, { "epoch": 440.23880597014926, "grad_norm": 51.87199401855469, "learning_rate": 9.995767195767196e-06, "loss": 30.6313, "step": 18490 }, { "epoch": 440.26268656716417, "grad_norm": 35.34834289550781, "learning_rate": 9.995238095238095e-06, "loss": 30.4587, "step": 18491 }, { "epoch": 440.28656716417913, "grad_norm": 32.24353790283203, "learning_rate": 9.994708994708996e-06, "loss": 29.857, "step": 18492 }, { "epoch": 440.31044776119404, "grad_norm": 45.48128890991211, "learning_rate": 9.994179894179895e-06, "loss": 30.6734, "step": 18493 }, { "epoch": 440.33432835820895, "grad_norm": 29.150545120239258, "learning_rate": 9.993650793650793e-06, "loss": 29.4115, "step": 18494 }, { "epoch": 440.35820895522386, "grad_norm": 31.93963050842285, "learning_rate": 9.993121693121694e-06, "loss": 29.6597, "step": 18495 }, { "epoch": 440.3820895522388, "grad_norm": 37.35916519165039, "learning_rate": 9.992592592592594e-06, "loss": 29.9122, "step": 18496 }, { "epoch": 440.40597014925373, "grad_norm": 21.346900939941406, "learning_rate": 9.992063492063493e-06, "loss": 29.7236, "step": 18497 }, { "epoch": 440.42985074626864, "grad_norm": 30.50381851196289, "learning_rate": 9.991534391534392e-06, "loss": 29.7606, "step": 18498 }, { "epoch": 440.4537313432836, "grad_norm": 31.944915771484375, "learning_rate": 9.991005291005293e-06, "loss": 31.5071, "step": 18499 }, { "epoch": 440.4776119402985, "grad_norm": 24.084373474121094, "learning_rate": 9.990476190476191e-06, "loss": 30.5081, "step": 18500 }, { "epoch": 440.5014925373134, "grad_norm": 31.495773315429688, "learning_rate": 9.98994708994709e-06, "loss": 31.0624, "step": 18501 }, { "epoch": 440.52537313432833, "grad_norm": 25.130971908569336, "learning_rate": 9.989417989417989e-06, "loss": 29.6397, "step": 18502 }, { "epoch": 440.5492537313433, "grad_norm": 27.502403259277344, "learning_rate": 9.98888888888889e-06, "loss": 29.7606, "step": 18503 }, { "epoch": 440.5731343283582, "grad_norm": 24.509063720703125, "learning_rate": 9.98835978835979e-06, "loss": 30.8578, "step": 18504 }, { "epoch": 440.5970149253731, "grad_norm": 25.166122436523438, "learning_rate": 9.987830687830689e-06, "loss": 30.5301, "step": 18505 }, { "epoch": 440.6208955223881, "grad_norm": 22.01051139831543, "learning_rate": 9.987301587301588e-06, "loss": 30.5776, "step": 18506 }, { "epoch": 440.644776119403, "grad_norm": 26.435636520385742, "learning_rate": 9.986772486772488e-06, "loss": 29.8726, "step": 18507 }, { "epoch": 440.6686567164179, "grad_norm": 21.437650680541992, "learning_rate": 9.986243386243387e-06, "loss": 31.0078, "step": 18508 }, { "epoch": 440.6925373134328, "grad_norm": 23.44769287109375, "learning_rate": 9.985714285714286e-06, "loss": 30.2984, "step": 18509 }, { "epoch": 440.7164179104478, "grad_norm": 21.210704803466797, "learning_rate": 9.985185185185185e-06, "loss": 30.3408, "step": 18510 }, { "epoch": 440.7402985074627, "grad_norm": 24.448598861694336, "learning_rate": 9.984656084656085e-06, "loss": 29.5081, "step": 18511 }, { "epoch": 440.7641791044776, "grad_norm": 19.705307006835938, "learning_rate": 9.984126984126986e-06, "loss": 29.2404, "step": 18512 }, { "epoch": 440.78805970149256, "grad_norm": 21.462610244750977, "learning_rate": 9.983597883597885e-06, "loss": 30.1652, "step": 18513 }, { "epoch": 440.81194029850747, "grad_norm": 18.456214904785156, "learning_rate": 9.983068783068783e-06, "loss": 28.7153, "step": 18514 }, { "epoch": 440.8358208955224, "grad_norm": 20.732948303222656, "learning_rate": 9.982539682539684e-06, "loss": 29.555, "step": 18515 }, { "epoch": 440.85970149253734, "grad_norm": 18.703096389770508, "learning_rate": 9.982010582010583e-06, "loss": 29.4648, "step": 18516 }, { "epoch": 440.88358208955225, "grad_norm": 19.93602752685547, "learning_rate": 9.981481481481482e-06, "loss": 30.4644, "step": 18517 }, { "epoch": 440.90746268656716, "grad_norm": 18.751026153564453, "learning_rate": 9.980952380952382e-06, "loss": 30.0305, "step": 18518 }, { "epoch": 440.93134328358207, "grad_norm": 19.60203742980957, "learning_rate": 9.980423280423281e-06, "loss": 30.8265, "step": 18519 }, { "epoch": 440.95522388059703, "grad_norm": 16.871681213378906, "learning_rate": 9.979894179894181e-06, "loss": 29.7696, "step": 18520 }, { "epoch": 440.97910447761194, "grad_norm": 21.47115707397461, "learning_rate": 9.97936507936508e-06, "loss": 30.9418, "step": 18521 }, { "epoch": 441.0, "grad_norm": 19.628236770629883, "learning_rate": 9.97883597883598e-06, "loss": 26.7793, "step": 18522 }, { "epoch": 441.0238805970149, "grad_norm": 23.01894760131836, "learning_rate": 9.97830687830688e-06, "loss": 30.63, "step": 18523 }, { "epoch": 441.0477611940299, "grad_norm": 24.92695426940918, "learning_rate": 9.977777777777778e-06, "loss": 31.6279, "step": 18524 }, { "epoch": 441.0716417910448, "grad_norm": 22.709012985229492, "learning_rate": 9.977248677248677e-06, "loss": 30.1622, "step": 18525 }, { "epoch": 441.0955223880597, "grad_norm": 21.488391876220703, "learning_rate": 9.976719576719578e-06, "loss": 30.9013, "step": 18526 }, { "epoch": 441.1194029850746, "grad_norm": 24.677249908447266, "learning_rate": 9.976190476190477e-06, "loss": 29.3563, "step": 18527 }, { "epoch": 441.14328358208957, "grad_norm": 20.757888793945312, "learning_rate": 9.975661375661377e-06, "loss": 29.4941, "step": 18528 }, { "epoch": 441.1671641791045, "grad_norm": 28.257102966308594, "learning_rate": 9.975132275132276e-06, "loss": 28.1639, "step": 18529 }, { "epoch": 441.1910447761194, "grad_norm": 21.217844009399414, "learning_rate": 9.974603174603176e-06, "loss": 30.9851, "step": 18530 }, { "epoch": 441.21492537313435, "grad_norm": 20.33362579345703, "learning_rate": 9.974074074074075e-06, "loss": 28.5901, "step": 18531 }, { "epoch": 441.23880597014926, "grad_norm": 24.75749969482422, "learning_rate": 9.973544973544974e-06, "loss": 30.44, "step": 18532 }, { "epoch": 441.26268656716417, "grad_norm": 22.43568992614746, "learning_rate": 9.973015873015875e-06, "loss": 29.9177, "step": 18533 }, { "epoch": 441.28656716417913, "grad_norm": 21.92473030090332, "learning_rate": 9.972486772486773e-06, "loss": 29.5564, "step": 18534 }, { "epoch": 441.31044776119404, "grad_norm": 18.299039840698242, "learning_rate": 9.971957671957672e-06, "loss": 29.081, "step": 18535 }, { "epoch": 441.33432835820895, "grad_norm": 30.581947326660156, "learning_rate": 9.971428571428571e-06, "loss": 30.2632, "step": 18536 }, { "epoch": 441.35820895522386, "grad_norm": 18.41385841369629, "learning_rate": 9.970899470899472e-06, "loss": 30.6754, "step": 18537 }, { "epoch": 441.3820895522388, "grad_norm": 19.474079132080078, "learning_rate": 9.970370370370372e-06, "loss": 29.5535, "step": 18538 }, { "epoch": 441.40597014925373, "grad_norm": 21.05908966064453, "learning_rate": 9.969841269841271e-06, "loss": 29.9627, "step": 18539 }, { "epoch": 441.42985074626864, "grad_norm": 22.585433959960938, "learning_rate": 9.96931216931217e-06, "loss": 28.8729, "step": 18540 }, { "epoch": 441.4537313432836, "grad_norm": 20.29838752746582, "learning_rate": 9.96878306878307e-06, "loss": 28.6151, "step": 18541 }, { "epoch": 441.4776119402985, "grad_norm": 18.94068145751953, "learning_rate": 9.968253968253969e-06, "loss": 29.5402, "step": 18542 }, { "epoch": 441.5014925373134, "grad_norm": 19.419042587280273, "learning_rate": 9.967724867724868e-06, "loss": 30.2059, "step": 18543 }, { "epoch": 441.52537313432833, "grad_norm": 22.420263290405273, "learning_rate": 9.967195767195767e-06, "loss": 30.3212, "step": 18544 }, { "epoch": 441.5492537313433, "grad_norm": 24.03433609008789, "learning_rate": 9.966666666666667e-06, "loss": 30.5324, "step": 18545 }, { "epoch": 441.5731343283582, "grad_norm": 22.240964889526367, "learning_rate": 9.966137566137568e-06, "loss": 29.8519, "step": 18546 }, { "epoch": 441.5970149253731, "grad_norm": 17.776729583740234, "learning_rate": 9.965608465608467e-06, "loss": 30.1097, "step": 18547 }, { "epoch": 441.6208955223881, "grad_norm": 19.53279685974121, "learning_rate": 9.965079365079365e-06, "loss": 30.1563, "step": 18548 }, { "epoch": 441.644776119403, "grad_norm": 20.960952758789062, "learning_rate": 9.964550264550266e-06, "loss": 30.4249, "step": 18549 }, { "epoch": 441.6686567164179, "grad_norm": 25.247777938842773, "learning_rate": 9.964021164021165e-06, "loss": 31.019, "step": 18550 }, { "epoch": 441.6925373134328, "grad_norm": 19.04428482055664, "learning_rate": 9.963492063492064e-06, "loss": 29.7853, "step": 18551 }, { "epoch": 441.7164179104478, "grad_norm": 19.53366470336914, "learning_rate": 9.962962962962964e-06, "loss": 29.2121, "step": 18552 }, { "epoch": 441.7402985074627, "grad_norm": 17.115434646606445, "learning_rate": 9.962433862433863e-06, "loss": 29.8615, "step": 18553 }, { "epoch": 441.7641791044776, "grad_norm": 18.052148818969727, "learning_rate": 9.961904761904763e-06, "loss": 30.1049, "step": 18554 }, { "epoch": 441.78805970149256, "grad_norm": 23.43327522277832, "learning_rate": 9.961375661375662e-06, "loss": 28.4668, "step": 18555 }, { "epoch": 441.81194029850747, "grad_norm": 21.95025634765625, "learning_rate": 9.960846560846563e-06, "loss": 29.9168, "step": 18556 }, { "epoch": 441.8358208955224, "grad_norm": 19.90508270263672, "learning_rate": 9.960317460317462e-06, "loss": 30.6355, "step": 18557 }, { "epoch": 441.85970149253734, "grad_norm": 17.02307891845703, "learning_rate": 9.95978835978836e-06, "loss": 29.8329, "step": 18558 }, { "epoch": 441.88358208955225, "grad_norm": 19.480838775634766, "learning_rate": 9.95925925925926e-06, "loss": 29.438, "step": 18559 }, { "epoch": 441.90746268656716, "grad_norm": 19.61400604248047, "learning_rate": 9.95873015873016e-06, "loss": 29.7707, "step": 18560 }, { "epoch": 441.93134328358207, "grad_norm": 18.133447647094727, "learning_rate": 9.958201058201059e-06, "loss": 30.9157, "step": 18561 }, { "epoch": 441.95522388059703, "grad_norm": 19.02525520324707, "learning_rate": 9.957671957671959e-06, "loss": 29.9864, "step": 18562 }, { "epoch": 441.97910447761194, "grad_norm": 18.124753952026367, "learning_rate": 9.957142857142858e-06, "loss": 30.0027, "step": 18563 }, { "epoch": 442.0, "grad_norm": 19.648605346679688, "learning_rate": 9.956613756613758e-06, "loss": 26.2238, "step": 18564 }, { "epoch": 442.0238805970149, "grad_norm": 17.967876434326172, "learning_rate": 9.956084656084657e-06, "loss": 28.3698, "step": 18565 }, { "epoch": 442.0477611940299, "grad_norm": 20.19828987121582, "learning_rate": 9.955555555555556e-06, "loss": 29.6088, "step": 18566 }, { "epoch": 442.0716417910448, "grad_norm": 21.698062896728516, "learning_rate": 9.955026455026457e-06, "loss": 29.477, "step": 18567 }, { "epoch": 442.0955223880597, "grad_norm": 20.658674240112305, "learning_rate": 9.954497354497355e-06, "loss": 29.1767, "step": 18568 }, { "epoch": 442.1194029850746, "grad_norm": 21.921630859375, "learning_rate": 9.953968253968254e-06, "loss": 30.28, "step": 18569 }, { "epoch": 442.14328358208957, "grad_norm": 21.92098617553711, "learning_rate": 9.953439153439155e-06, "loss": 29.0111, "step": 18570 }, { "epoch": 442.1671641791045, "grad_norm": 17.449251174926758, "learning_rate": 9.952910052910054e-06, "loss": 31.0356, "step": 18571 }, { "epoch": 442.1910447761194, "grad_norm": 18.957931518554688, "learning_rate": 9.952380952380954e-06, "loss": 29.2397, "step": 18572 }, { "epoch": 442.21492537313435, "grad_norm": 17.29442024230957, "learning_rate": 9.951851851851853e-06, "loss": 30.5482, "step": 18573 }, { "epoch": 442.23880597014926, "grad_norm": 21.626771926879883, "learning_rate": 9.951322751322752e-06, "loss": 29.6465, "step": 18574 }, { "epoch": 442.26268656716417, "grad_norm": 18.364559173583984, "learning_rate": 9.950793650793652e-06, "loss": 30.3357, "step": 18575 }, { "epoch": 442.28656716417913, "grad_norm": 21.34333610534668, "learning_rate": 9.950264550264551e-06, "loss": 29.0995, "step": 18576 }, { "epoch": 442.31044776119404, "grad_norm": 17.538114547729492, "learning_rate": 9.94973544973545e-06, "loss": 29.9753, "step": 18577 }, { "epoch": 442.33432835820895, "grad_norm": 22.703763961791992, "learning_rate": 9.94920634920635e-06, "loss": 29.5471, "step": 18578 }, { "epoch": 442.35820895522386, "grad_norm": 18.60055923461914, "learning_rate": 9.94867724867725e-06, "loss": 30.6518, "step": 18579 }, { "epoch": 442.3820895522388, "grad_norm": 22.66931915283203, "learning_rate": 9.94814814814815e-06, "loss": 29.7501, "step": 18580 }, { "epoch": 442.40597014925373, "grad_norm": 18.586894989013672, "learning_rate": 9.947619047619049e-06, "loss": 29.0112, "step": 18581 }, { "epoch": 442.42985074626864, "grad_norm": 23.203092575073242, "learning_rate": 9.947089947089947e-06, "loss": 30.937, "step": 18582 }, { "epoch": 442.4537313432836, "grad_norm": 18.897573471069336, "learning_rate": 9.946560846560848e-06, "loss": 30.4058, "step": 18583 }, { "epoch": 442.4776119402985, "grad_norm": 22.032442092895508, "learning_rate": 9.946031746031747e-06, "loss": 29.1225, "step": 18584 }, { "epoch": 442.5014925373134, "grad_norm": 21.034584045410156, "learning_rate": 9.945502645502646e-06, "loss": 29.0111, "step": 18585 }, { "epoch": 442.52537313432833, "grad_norm": 17.994476318359375, "learning_rate": 9.944973544973546e-06, "loss": 30.4909, "step": 18586 }, { "epoch": 442.5492537313433, "grad_norm": 22.44892120361328, "learning_rate": 9.944444444444445e-06, "loss": 29.9081, "step": 18587 }, { "epoch": 442.5731343283582, "grad_norm": 27.11910057067871, "learning_rate": 9.943915343915345e-06, "loss": 30.1356, "step": 18588 }, { "epoch": 442.5970149253731, "grad_norm": 20.926912307739258, "learning_rate": 9.943386243386244e-06, "loss": 29.785, "step": 18589 }, { "epoch": 442.6208955223881, "grad_norm": 15.875774383544922, "learning_rate": 9.942857142857145e-06, "loss": 30.8185, "step": 18590 }, { "epoch": 442.644776119403, "grad_norm": 21.94791603088379, "learning_rate": 9.942328042328044e-06, "loss": 30.2967, "step": 18591 }, { "epoch": 442.6686567164179, "grad_norm": 22.416215896606445, "learning_rate": 9.941798941798942e-06, "loss": 30.0581, "step": 18592 }, { "epoch": 442.6925373134328, "grad_norm": 19.829410552978516, "learning_rate": 9.941269841269841e-06, "loss": 30.66, "step": 18593 }, { "epoch": 442.7164179104478, "grad_norm": 18.064342498779297, "learning_rate": 9.940740740740742e-06, "loss": 30.4255, "step": 18594 }, { "epoch": 442.7402985074627, "grad_norm": 19.131240844726562, "learning_rate": 9.94021164021164e-06, "loss": 29.9299, "step": 18595 }, { "epoch": 442.7641791044776, "grad_norm": 25.69076156616211, "learning_rate": 9.939682539682541e-06, "loss": 30.0875, "step": 18596 }, { "epoch": 442.78805970149256, "grad_norm": 24.921091079711914, "learning_rate": 9.93915343915344e-06, "loss": 30.9268, "step": 18597 }, { "epoch": 442.81194029850747, "grad_norm": NaN, "learning_rate": 9.93862433862434e-06, "loss": 37.3905, "step": 18598 }, { "epoch": 442.8358208955224, "grad_norm": 18.504121780395508, "learning_rate": 9.93862433862434e-06, "loss": 30.1702, "step": 18599 }, { "epoch": 442.85970149253734, "grad_norm": 23.754684448242188, "learning_rate": 9.93809523809524e-06, "loss": 30.1518, "step": 18600 }, { "epoch": 442.88358208955225, "grad_norm": 28.301525115966797, "learning_rate": 9.937566137566138e-06, "loss": 30.9355, "step": 18601 }, { "epoch": 442.90746268656716, "grad_norm": 17.825712203979492, "learning_rate": 9.937037037037039e-06, "loss": 29.4089, "step": 18602 }, { "epoch": 442.93134328358207, "grad_norm": 18.297264099121094, "learning_rate": 9.936507936507937e-06, "loss": 29.377, "step": 18603 }, { "epoch": 442.95522388059703, "grad_norm": 25.3094425201416, "learning_rate": 9.935978835978836e-06, "loss": 29.5095, "step": 18604 }, { "epoch": 442.97910447761194, "grad_norm": 24.7398681640625, "learning_rate": 9.935449735449737e-06, "loss": 29.4477, "step": 18605 }, { "epoch": 443.0, "grad_norm": 17.500024795532227, "learning_rate": 9.934920634920636e-06, "loss": 25.0404, "step": 18606 }, { "epoch": 443.0238805970149, "grad_norm": 17.63364601135254, "learning_rate": 9.934391534391536e-06, "loss": 30.2119, "step": 18607 }, { "epoch": 443.0477611940299, "grad_norm": 26.692359924316406, "learning_rate": 9.933862433862435e-06, "loss": 29.4333, "step": 18608 }, { "epoch": 443.0716417910448, "grad_norm": 20.488828659057617, "learning_rate": 9.933333333333334e-06, "loss": 28.8854, "step": 18609 }, { "epoch": 443.0955223880597, "grad_norm": 21.39193344116211, "learning_rate": 9.932804232804234e-06, "loss": 29.0897, "step": 18610 }, { "epoch": 443.1194029850746, "grad_norm": 19.156391143798828, "learning_rate": 9.932275132275133e-06, "loss": 29.3835, "step": 18611 }, { "epoch": 443.14328358208957, "grad_norm": 27.199663162231445, "learning_rate": 9.931746031746032e-06, "loss": 31.355, "step": 18612 }, { "epoch": 443.1671641791045, "grad_norm": 19.176109313964844, "learning_rate": 9.931216931216932e-06, "loss": 30.5999, "step": 18613 }, { "epoch": 443.1910447761194, "grad_norm": 21.19511604309082, "learning_rate": 9.930687830687831e-06, "loss": 29.8309, "step": 18614 }, { "epoch": 443.21492537313435, "grad_norm": 17.26340103149414, "learning_rate": 9.930158730158732e-06, "loss": 29.8293, "step": 18615 }, { "epoch": 443.23880597014926, "grad_norm": 25.40530014038086, "learning_rate": 9.92962962962963e-06, "loss": 29.635, "step": 18616 }, { "epoch": 443.26268656716417, "grad_norm": 23.863309860229492, "learning_rate": 9.929100529100531e-06, "loss": 30.0053, "step": 18617 }, { "epoch": 443.28656716417913, "grad_norm": 19.732179641723633, "learning_rate": 9.92857142857143e-06, "loss": 28.9756, "step": 18618 }, { "epoch": 443.31044776119404, "grad_norm": 19.102367401123047, "learning_rate": 9.928042328042329e-06, "loss": 30.768, "step": 18619 }, { "epoch": 443.33432835820895, "grad_norm": 16.483366012573242, "learning_rate": 9.927513227513227e-06, "loss": 27.8106, "step": 18620 }, { "epoch": 443.35820895522386, "grad_norm": 21.833375930786133, "learning_rate": 9.926984126984128e-06, "loss": 28.161, "step": 18621 }, { "epoch": 443.3820895522388, "grad_norm": 23.619691848754883, "learning_rate": 9.926455026455027e-06, "loss": 29.5948, "step": 18622 }, { "epoch": 443.40597014925373, "grad_norm": 22.981292724609375, "learning_rate": 9.925925925925927e-06, "loss": 30.1602, "step": 18623 }, { "epoch": 443.42985074626864, "grad_norm": 15.958575248718262, "learning_rate": 9.925396825396826e-06, "loss": 29.7908, "step": 18624 }, { "epoch": 443.4537313432836, "grad_norm": 21.9560604095459, "learning_rate": 9.924867724867727e-06, "loss": 30.6451, "step": 18625 }, { "epoch": 443.4776119402985, "grad_norm": 22.995847702026367, "learning_rate": 9.924338624338625e-06, "loss": 30.2189, "step": 18626 }, { "epoch": 443.5014925373134, "grad_norm": 19.66353416442871, "learning_rate": 9.923809523809524e-06, "loss": 29.547, "step": 18627 }, { "epoch": 443.52537313432833, "grad_norm": 18.92270851135254, "learning_rate": 9.923280423280423e-06, "loss": 29.694, "step": 18628 }, { "epoch": 443.5492537313433, "grad_norm": 19.271345138549805, "learning_rate": 9.922751322751324e-06, "loss": 29.9493, "step": 18629 }, { "epoch": 443.5731343283582, "grad_norm": 20.335243225097656, "learning_rate": 9.922222222222222e-06, "loss": 30.3863, "step": 18630 }, { "epoch": 443.5970149253731, "grad_norm": 24.05260467529297, "learning_rate": 9.921693121693123e-06, "loss": 30.5802, "step": 18631 }, { "epoch": 443.6208955223881, "grad_norm": 22.513591766357422, "learning_rate": 9.921164021164022e-06, "loss": 31.0789, "step": 18632 }, { "epoch": 443.644776119403, "grad_norm": 15.696191787719727, "learning_rate": 9.920634920634922e-06, "loss": 29.4079, "step": 18633 }, { "epoch": 443.6686567164179, "grad_norm": NaN, "learning_rate": 9.920105820105821e-06, "loss": 52.3631, "step": 18634 }, { "epoch": 443.6925373134328, "grad_norm": 23.175010681152344, "learning_rate": 9.920105820105821e-06, "loss": 29.5275, "step": 18635 }, { "epoch": 443.7164179104478, "grad_norm": 23.706695556640625, "learning_rate": 9.91957671957672e-06, "loss": 29.7385, "step": 18636 }, { "epoch": 443.7402985074627, "grad_norm": 18.948604583740234, "learning_rate": 9.91904761904762e-06, "loss": 30.7269, "step": 18637 }, { "epoch": 443.7641791044776, "grad_norm": 19.654752731323242, "learning_rate": 9.91851851851852e-06, "loss": 28.3922, "step": 18638 }, { "epoch": 443.78805970149256, "grad_norm": 17.5794677734375, "learning_rate": 9.917989417989418e-06, "loss": 29.2845, "step": 18639 }, { "epoch": 443.81194029850747, "grad_norm": 22.551067352294922, "learning_rate": 9.917460317460319e-06, "loss": 29.4956, "step": 18640 }, { "epoch": 443.8358208955224, "grad_norm": 21.116165161132812, "learning_rate": 9.916931216931217e-06, "loss": 29.8524, "step": 18641 }, { "epoch": 443.85970149253734, "grad_norm": 24.164901733398438, "learning_rate": 9.916402116402118e-06, "loss": 30.8316, "step": 18642 }, { "epoch": 443.88358208955225, "grad_norm": 18.75589370727539, "learning_rate": 9.915873015873017e-06, "loss": 30.6909, "step": 18643 }, { "epoch": 443.90746268656716, "grad_norm": 21.51673698425293, "learning_rate": 9.915343915343916e-06, "loss": 30.8981, "step": 18644 }, { "epoch": 443.93134328358207, "grad_norm": 20.762636184692383, "learning_rate": 9.914814814814816e-06, "loss": 29.25, "step": 18645 }, { "epoch": 443.95522388059703, "grad_norm": 23.939481735229492, "learning_rate": 9.914285714285715e-06, "loss": 30.0374, "step": 18646 }, { "epoch": 443.97910447761194, "grad_norm": 20.75077247619629, "learning_rate": 9.913756613756614e-06, "loss": 29.2723, "step": 18647 }, { "epoch": 444.0, "grad_norm": 19.254112243652344, "learning_rate": 9.913227513227514e-06, "loss": 26.2907, "step": 18648 }, { "epoch": 444.0238805970149, "grad_norm": 20.285133361816406, "learning_rate": 9.912698412698413e-06, "loss": 28.9627, "step": 18649 }, { "epoch": 444.0477611940299, "grad_norm": 20.10259437561035, "learning_rate": 9.912169312169314e-06, "loss": 29.3833, "step": 18650 }, { "epoch": 444.0716417910448, "grad_norm": 18.515504837036133, "learning_rate": 9.911640211640212e-06, "loss": 29.2917, "step": 18651 }, { "epoch": 444.0955223880597, "grad_norm": 20.508150100708008, "learning_rate": 9.911111111111113e-06, "loss": 29.2335, "step": 18652 }, { "epoch": 444.1194029850746, "grad_norm": 17.969497680664062, "learning_rate": 9.910582010582012e-06, "loss": 30.0243, "step": 18653 }, { "epoch": 444.14328358208957, "grad_norm": 25.349966049194336, "learning_rate": 9.91005291005291e-06, "loss": 30.3186, "step": 18654 }, { "epoch": 444.1671641791045, "grad_norm": 18.017724990844727, "learning_rate": 9.90952380952381e-06, "loss": 30.2973, "step": 18655 }, { "epoch": 444.1910447761194, "grad_norm": 26.20635414123535, "learning_rate": 9.90899470899471e-06, "loss": 29.9004, "step": 18656 }, { "epoch": 444.21492537313435, "grad_norm": 20.62889289855957, "learning_rate": 9.908465608465609e-06, "loss": 30.3634, "step": 18657 }, { "epoch": 444.23880597014926, "grad_norm": 21.628355026245117, "learning_rate": 9.90793650793651e-06, "loss": 30.4709, "step": 18658 }, { "epoch": 444.26268656716417, "grad_norm": 20.195955276489258, "learning_rate": 9.907407407407408e-06, "loss": 30.5781, "step": 18659 }, { "epoch": 444.28656716417913, "grad_norm": 24.391246795654297, "learning_rate": 9.906878306878309e-06, "loss": 29.4739, "step": 18660 }, { "epoch": 444.31044776119404, "grad_norm": 22.97021484375, "learning_rate": 9.906349206349207e-06, "loss": 29.2833, "step": 18661 }, { "epoch": 444.33432835820895, "grad_norm": 19.887691497802734, "learning_rate": 9.905820105820106e-06, "loss": 29.7117, "step": 18662 }, { "epoch": 444.35820895522386, "grad_norm": 23.528059005737305, "learning_rate": 9.905291005291005e-06, "loss": 30.1557, "step": 18663 }, { "epoch": 444.3820895522388, "grad_norm": 20.737913131713867, "learning_rate": 9.904761904761906e-06, "loss": 29.1906, "step": 18664 }, { "epoch": 444.40597014925373, "grad_norm": 21.257333755493164, "learning_rate": 9.904232804232804e-06, "loss": 30.1986, "step": 18665 }, { "epoch": 444.42985074626864, "grad_norm": 18.684720993041992, "learning_rate": 9.903703703703705e-06, "loss": 30.4563, "step": 18666 }, { "epoch": 444.4537313432836, "grad_norm": 19.677936553955078, "learning_rate": 9.903174603174604e-06, "loss": 28.8837, "step": 18667 }, { "epoch": 444.4776119402985, "grad_norm": 22.362728118896484, "learning_rate": 9.902645502645504e-06, "loss": 30.1345, "step": 18668 }, { "epoch": 444.5014925373134, "grad_norm": 23.642974853515625, "learning_rate": 9.902116402116403e-06, "loss": 30.1094, "step": 18669 }, { "epoch": 444.52537313432833, "grad_norm": 24.003171920776367, "learning_rate": 9.901587301587302e-06, "loss": 30.5538, "step": 18670 }, { "epoch": 444.5492537313433, "grad_norm": 20.49563217163086, "learning_rate": 9.901058201058202e-06, "loss": 30.2229, "step": 18671 }, { "epoch": 444.5731343283582, "grad_norm": 24.953784942626953, "learning_rate": 9.900529100529101e-06, "loss": 28.4147, "step": 18672 }, { "epoch": 444.5970149253731, "grad_norm": 21.08348274230957, "learning_rate": 9.9e-06, "loss": 30.7389, "step": 18673 }, { "epoch": 444.6208955223881, "grad_norm": 24.60961151123047, "learning_rate": 9.8994708994709e-06, "loss": 30.0161, "step": 18674 }, { "epoch": 444.644776119403, "grad_norm": 17.964492797851562, "learning_rate": 9.8989417989418e-06, "loss": 29.0658, "step": 18675 }, { "epoch": 444.6686567164179, "grad_norm": 23.04926300048828, "learning_rate": 9.8984126984127e-06, "loss": 28.9474, "step": 18676 }, { "epoch": 444.6925373134328, "grad_norm": 21.68794059753418, "learning_rate": 9.897883597883599e-06, "loss": 28.6149, "step": 18677 }, { "epoch": 444.7164179104478, "grad_norm": 25.25395965576172, "learning_rate": 9.897354497354498e-06, "loss": 29.3648, "step": 18678 }, { "epoch": 444.7402985074627, "grad_norm": 22.624588012695312, "learning_rate": 9.896825396825398e-06, "loss": 30.8506, "step": 18679 }, { "epoch": 444.7641791044776, "grad_norm": 19.853567123413086, "learning_rate": 9.896296296296297e-06, "loss": 30.412, "step": 18680 }, { "epoch": 444.78805970149256, "grad_norm": 28.002241134643555, "learning_rate": 9.895767195767196e-06, "loss": 31.018, "step": 18681 }, { "epoch": 444.81194029850747, "grad_norm": 28.90605926513672, "learning_rate": 9.895238095238096e-06, "loss": 29.1872, "step": 18682 }, { "epoch": 444.8358208955224, "grad_norm": 17.46945571899414, "learning_rate": 9.894708994708995e-06, "loss": 28.7829, "step": 18683 }, { "epoch": 444.85970149253734, "grad_norm": 24.100780487060547, "learning_rate": 9.894179894179896e-06, "loss": 28.8928, "step": 18684 }, { "epoch": 444.88358208955225, "grad_norm": 23.657773971557617, "learning_rate": 9.893650793650794e-06, "loss": 29.678, "step": 18685 }, { "epoch": 444.90746268656716, "grad_norm": 17.536001205444336, "learning_rate": 9.893121693121695e-06, "loss": 30.3878, "step": 18686 }, { "epoch": 444.93134328358207, "grad_norm": 20.298398971557617, "learning_rate": 9.892592592592594e-06, "loss": 29.8327, "step": 18687 }, { "epoch": 444.95522388059703, "grad_norm": 17.706310272216797, "learning_rate": 9.892063492063493e-06, "loss": 30.6398, "step": 18688 }, { "epoch": 444.97910447761194, "grad_norm": 27.356420516967773, "learning_rate": 9.891534391534391e-06, "loss": 31.0165, "step": 18689 }, { "epoch": 445.0, "grad_norm": 21.346406936645508, "learning_rate": 9.891005291005292e-06, "loss": 25.2093, "step": 18690 }, { "epoch": 445.0238805970149, "grad_norm": 20.276702880859375, "learning_rate": 9.89047619047619e-06, "loss": 30.3656, "step": 18691 }, { "epoch": 445.0477611940299, "grad_norm": 19.048446655273438, "learning_rate": 9.889947089947091e-06, "loss": 29.9698, "step": 18692 }, { "epoch": 445.0716417910448, "grad_norm": 20.90336036682129, "learning_rate": 9.88941798941799e-06, "loss": 30.4029, "step": 18693 }, { "epoch": 445.0955223880597, "grad_norm": 18.675891876220703, "learning_rate": 9.88888888888889e-06, "loss": 28.9267, "step": 18694 }, { "epoch": 445.1194029850746, "grad_norm": 20.610153198242188, "learning_rate": 9.88835978835979e-06, "loss": 29.4429, "step": 18695 }, { "epoch": 445.14328358208957, "grad_norm": 16.437572479248047, "learning_rate": 9.887830687830688e-06, "loss": 29.3179, "step": 18696 }, { "epoch": 445.1671641791045, "grad_norm": 23.6490421295166, "learning_rate": 9.887301587301587e-06, "loss": 29.5433, "step": 18697 }, { "epoch": 445.1910447761194, "grad_norm": 21.725297927856445, "learning_rate": 9.886772486772488e-06, "loss": 28.6549, "step": 18698 }, { "epoch": 445.21492537313435, "grad_norm": 24.445068359375, "learning_rate": 9.886243386243386e-06, "loss": 29.7307, "step": 18699 }, { "epoch": 445.23880597014926, "grad_norm": 16.260190963745117, "learning_rate": 9.885714285714287e-06, "loss": 29.4139, "step": 18700 }, { "epoch": 445.26268656716417, "grad_norm": 20.442712783813477, "learning_rate": 9.885185185185186e-06, "loss": 29.3785, "step": 18701 }, { "epoch": 445.28656716417913, "grad_norm": 24.0363826751709, "learning_rate": 9.884656084656086e-06, "loss": 30.5873, "step": 18702 }, { "epoch": 445.31044776119404, "grad_norm": 24.070919036865234, "learning_rate": 9.884126984126985e-06, "loss": 30.0358, "step": 18703 }, { "epoch": 445.33432835820895, "grad_norm": 20.672880172729492, "learning_rate": 9.883597883597884e-06, "loss": 29.025, "step": 18704 }, { "epoch": 445.35820895522386, "grad_norm": 20.473325729370117, "learning_rate": 9.883068783068784e-06, "loss": 29.3211, "step": 18705 }, { "epoch": 445.3820895522388, "grad_norm": 18.26165199279785, "learning_rate": 9.882539682539683e-06, "loss": 29.8058, "step": 18706 }, { "epoch": 445.40597014925373, "grad_norm": 23.052764892578125, "learning_rate": 9.882010582010582e-06, "loss": 30.271, "step": 18707 }, { "epoch": 445.42985074626864, "grad_norm": 19.159442901611328, "learning_rate": 9.881481481481483e-06, "loss": 30.7617, "step": 18708 }, { "epoch": 445.4537313432836, "grad_norm": 25.2326717376709, "learning_rate": 9.880952380952381e-06, "loss": 29.8031, "step": 18709 }, { "epoch": 445.4776119402985, "grad_norm": 20.35891342163086, "learning_rate": 9.880423280423282e-06, "loss": 29.6101, "step": 18710 }, { "epoch": 445.5014925373134, "grad_norm": 25.427818298339844, "learning_rate": 9.87989417989418e-06, "loss": 30.5315, "step": 18711 }, { "epoch": 445.52537313432833, "grad_norm": 19.373870849609375, "learning_rate": 9.87936507936508e-06, "loss": 29.9657, "step": 18712 }, { "epoch": 445.5492537313433, "grad_norm": 23.325149536132812, "learning_rate": 9.87883597883598e-06, "loss": 28.8324, "step": 18713 }, { "epoch": 445.5731343283582, "grad_norm": 19.05719566345215, "learning_rate": 9.878306878306879e-06, "loss": 29.459, "step": 18714 }, { "epoch": 445.5970149253731, "grad_norm": 23.693634033203125, "learning_rate": 9.877777777777778e-06, "loss": 30.169, "step": 18715 }, { "epoch": 445.6208955223881, "grad_norm": 18.649036407470703, "learning_rate": 9.877248677248678e-06, "loss": 29.9517, "step": 18716 }, { "epoch": 445.644776119403, "grad_norm": 22.940074920654297, "learning_rate": 9.876719576719577e-06, "loss": 29.5278, "step": 18717 }, { "epoch": 445.6686567164179, "grad_norm": 22.418012619018555, "learning_rate": 9.876190476190478e-06, "loss": 30.244, "step": 18718 }, { "epoch": 445.6925373134328, "grad_norm": 20.056880950927734, "learning_rate": 9.875661375661376e-06, "loss": 29.651, "step": 18719 }, { "epoch": 445.7164179104478, "grad_norm": 20.92812156677246, "learning_rate": 9.875132275132277e-06, "loss": 29.7148, "step": 18720 }, { "epoch": 445.7402985074627, "grad_norm": 19.421911239624023, "learning_rate": 9.874603174603176e-06, "loss": 29.6727, "step": 18721 }, { "epoch": 445.7641791044776, "grad_norm": 21.001712799072266, "learning_rate": 9.874074074074075e-06, "loss": 29.3088, "step": 18722 }, { "epoch": 445.78805970149256, "grad_norm": 20.59142303466797, "learning_rate": 9.873544973544973e-06, "loss": 29.4323, "step": 18723 }, { "epoch": 445.81194029850747, "grad_norm": 20.60802459716797, "learning_rate": 9.873015873015874e-06, "loss": 30.7279, "step": 18724 }, { "epoch": 445.8358208955224, "grad_norm": 17.077266693115234, "learning_rate": 9.872486772486773e-06, "loss": 29.0152, "step": 18725 }, { "epoch": 445.85970149253734, "grad_norm": 16.55583953857422, "learning_rate": 9.871957671957673e-06, "loss": 29.3891, "step": 18726 }, { "epoch": 445.88358208955225, "grad_norm": 17.879047393798828, "learning_rate": 9.871428571428572e-06, "loss": 30.1239, "step": 18727 }, { "epoch": 445.90746268656716, "grad_norm": 19.5820369720459, "learning_rate": 9.870899470899473e-06, "loss": 30.702, "step": 18728 }, { "epoch": 445.93134328358207, "grad_norm": 19.33560562133789, "learning_rate": 9.870370370370371e-06, "loss": 30.1221, "step": 18729 }, { "epoch": 445.95522388059703, "grad_norm": 18.632369995117188, "learning_rate": 9.86984126984127e-06, "loss": 29.8922, "step": 18730 }, { "epoch": 445.97910447761194, "grad_norm": 19.27196502685547, "learning_rate": 9.869312169312169e-06, "loss": 30.1569, "step": 18731 }, { "epoch": 446.0, "grad_norm": 25.46162986755371, "learning_rate": 9.86878306878307e-06, "loss": 25.4744, "step": 18732 }, { "epoch": 446.0238805970149, "grad_norm": 20.501720428466797, "learning_rate": 9.868253968253968e-06, "loss": 29.7296, "step": 18733 }, { "epoch": 446.0477611940299, "grad_norm": 15.892001152038574, "learning_rate": 9.867724867724869e-06, "loss": 29.951, "step": 18734 }, { "epoch": 446.0716417910448, "grad_norm": 23.434839248657227, "learning_rate": 9.86719576719577e-06, "loss": 29.5602, "step": 18735 }, { "epoch": 446.0955223880597, "grad_norm": 28.479963302612305, "learning_rate": 9.866666666666668e-06, "loss": 30.5887, "step": 18736 }, { "epoch": 446.1194029850746, "grad_norm": 18.02228355407715, "learning_rate": 9.866137566137567e-06, "loss": 29.1836, "step": 18737 }, { "epoch": 446.14328358208957, "grad_norm": 31.304271697998047, "learning_rate": 9.865608465608466e-06, "loss": 30.0157, "step": 18738 }, { "epoch": 446.1671641791045, "grad_norm": 23.778724670410156, "learning_rate": 9.865079365079366e-06, "loss": 29.6594, "step": 18739 }, { "epoch": 446.1910447761194, "grad_norm": 24.051563262939453, "learning_rate": 9.864550264550265e-06, "loss": 29.3966, "step": 18740 }, { "epoch": 446.21492537313435, "grad_norm": 28.467056274414062, "learning_rate": 9.864021164021164e-06, "loss": 29.6585, "step": 18741 }, { "epoch": 446.23880597014926, "grad_norm": 25.203536987304688, "learning_rate": 9.863492063492065e-06, "loss": 29.2565, "step": 18742 }, { "epoch": 446.26268656716417, "grad_norm": 18.118160247802734, "learning_rate": 9.862962962962963e-06, "loss": 30.0215, "step": 18743 }, { "epoch": 446.28656716417913, "grad_norm": 26.8660831451416, "learning_rate": 9.862433862433864e-06, "loss": 28.9416, "step": 18744 }, { "epoch": 446.31044776119404, "grad_norm": 26.6632080078125, "learning_rate": 9.861904761904763e-06, "loss": 29.9127, "step": 18745 }, { "epoch": 446.33432835820895, "grad_norm": 17.099775314331055, "learning_rate": 9.861375661375661e-06, "loss": 29.9441, "step": 18746 }, { "epoch": 446.35820895522386, "grad_norm": 35.367431640625, "learning_rate": 9.860846560846562e-06, "loss": 30.4273, "step": 18747 }, { "epoch": 446.3820895522388, "grad_norm": 21.456600189208984, "learning_rate": 9.86031746031746e-06, "loss": 29.8876, "step": 18748 }, { "epoch": 446.40597014925373, "grad_norm": 35.74831771850586, "learning_rate": 9.85978835978836e-06, "loss": 30.4487, "step": 18749 }, { "epoch": 446.42985074626864, "grad_norm": 25.279451370239258, "learning_rate": 9.85925925925926e-06, "loss": 30.0203, "step": 18750 }, { "epoch": 446.4537313432836, "grad_norm": 25.811397552490234, "learning_rate": 9.858730158730159e-06, "loss": 28.7727, "step": 18751 }, { "epoch": 446.4776119402985, "grad_norm": 34.82917022705078, "learning_rate": 9.85820105820106e-06, "loss": 30.5936, "step": 18752 }, { "epoch": 446.5014925373134, "grad_norm": 21.543052673339844, "learning_rate": 9.857671957671958e-06, "loss": 28.5933, "step": 18753 }, { "epoch": 446.52537313432833, "grad_norm": 41.380638122558594, "learning_rate": 9.857142857142859e-06, "loss": 30.0671, "step": 18754 }, { "epoch": 446.5492537313433, "grad_norm": 28.443233489990234, "learning_rate": 9.856613756613758e-06, "loss": 29.5265, "step": 18755 }, { "epoch": 446.5731343283582, "grad_norm": 51.77538299560547, "learning_rate": 9.856084656084656e-06, "loss": 29.667, "step": 18756 }, { "epoch": 446.5970149253731, "grad_norm": 43.79566192626953, "learning_rate": 9.855555555555555e-06, "loss": 29.8074, "step": 18757 }, { "epoch": 446.6208955223881, "grad_norm": 43.7327880859375, "learning_rate": 9.855026455026456e-06, "loss": 30.0798, "step": 18758 }, { "epoch": 446.644776119403, "grad_norm": 37.443233489990234, "learning_rate": 9.854497354497355e-06, "loss": 28.6069, "step": 18759 }, { "epoch": 446.6686567164179, "grad_norm": 35.89811706542969, "learning_rate": 9.853968253968255e-06, "loss": 29.1999, "step": 18760 }, { "epoch": 446.6925373134328, "grad_norm": 33.100379943847656, "learning_rate": 9.853439153439154e-06, "loss": 29.5536, "step": 18761 }, { "epoch": 446.7164179104478, "grad_norm": 39.89789962768555, "learning_rate": 9.852910052910054e-06, "loss": 29.5007, "step": 18762 }, { "epoch": 446.7402985074627, "grad_norm": 30.517335891723633, "learning_rate": 9.852380952380953e-06, "loss": 30.4981, "step": 18763 }, { "epoch": 446.7641791044776, "grad_norm": 44.66365051269531, "learning_rate": 9.851851851851852e-06, "loss": 29.4309, "step": 18764 }, { "epoch": 446.78805970149256, "grad_norm": 39.44496536254883, "learning_rate": 9.851322751322751e-06, "loss": 30.7236, "step": 18765 }, { "epoch": 446.81194029850747, "grad_norm": 35.49729537963867, "learning_rate": 9.850793650793651e-06, "loss": 29.855, "step": 18766 }, { "epoch": 446.8358208955224, "grad_norm": 35.016822814941406, "learning_rate": 9.85026455026455e-06, "loss": 29.8831, "step": 18767 }, { "epoch": 446.85970149253734, "grad_norm": 34.95130157470703, "learning_rate": 9.84973544973545e-06, "loss": 29.9338, "step": 18768 }, { "epoch": 446.88358208955225, "grad_norm": 31.749237060546875, "learning_rate": 9.849206349206351e-06, "loss": 29.0045, "step": 18769 }, { "epoch": 446.90746268656716, "grad_norm": 40.916542053222656, "learning_rate": 9.84867724867725e-06, "loss": 29.5833, "step": 18770 }, { "epoch": 446.93134328358207, "grad_norm": 36.60881423950195, "learning_rate": 9.848148148148149e-06, "loss": 29.3984, "step": 18771 }, { "epoch": 446.95522388059703, "grad_norm": 33.884708404541016, "learning_rate": 9.847619047619048e-06, "loss": 29.9819, "step": 18772 }, { "epoch": 446.97910447761194, "grad_norm": 31.100940704345703, "learning_rate": 9.847089947089948e-06, "loss": 31.2755, "step": 18773 }, { "epoch": 447.0, "grad_norm": 33.251338958740234, "learning_rate": 9.846560846560847e-06, "loss": 25.9599, "step": 18774 }, { "epoch": 447.0238805970149, "grad_norm": 31.726837158203125, "learning_rate": 9.846031746031746e-06, "loss": 30.208, "step": 18775 }, { "epoch": 447.0477611940299, "grad_norm": 41.428863525390625, "learning_rate": 9.845502645502646e-06, "loss": 28.9709, "step": 18776 }, { "epoch": 447.0716417910448, "grad_norm": 36.63111877441406, "learning_rate": 9.844973544973547e-06, "loss": 30.2441, "step": 18777 }, { "epoch": 447.0955223880597, "grad_norm": 36.10809326171875, "learning_rate": 9.844444444444446e-06, "loss": 29.8194, "step": 18778 }, { "epoch": 447.1194029850746, "grad_norm": 32.919063568115234, "learning_rate": 9.843915343915345e-06, "loss": 29.2762, "step": 18779 }, { "epoch": 447.14328358208957, "grad_norm": 35.652862548828125, "learning_rate": 9.843386243386243e-06, "loss": 30.7961, "step": 18780 }, { "epoch": 447.1671641791045, "grad_norm": 32.187049865722656, "learning_rate": 9.842857142857144e-06, "loss": 29.3027, "step": 18781 }, { "epoch": 447.1910447761194, "grad_norm": 36.274559020996094, "learning_rate": 9.842328042328043e-06, "loss": 30.6209, "step": 18782 }, { "epoch": 447.21492537313435, "grad_norm": 33.96268844604492, "learning_rate": 9.841798941798942e-06, "loss": 30.3775, "step": 18783 }, { "epoch": 447.23880597014926, "grad_norm": 34.651920318603516, "learning_rate": 9.841269841269842e-06, "loss": 29.7717, "step": 18784 }, { "epoch": 447.26268656716417, "grad_norm": 33.044281005859375, "learning_rate": 9.840740740740743e-06, "loss": 29.7228, "step": 18785 }, { "epoch": 447.28656716417913, "grad_norm": 34.92821502685547, "learning_rate": 9.840211640211641e-06, "loss": 28.877, "step": 18786 }, { "epoch": 447.31044776119404, "grad_norm": 29.17439079284668, "learning_rate": 9.83968253968254e-06, "loss": 30.2265, "step": 18787 }, { "epoch": 447.33432835820895, "grad_norm": 37.36045455932617, "learning_rate": 9.83915343915344e-06, "loss": 29.1902, "step": 18788 }, { "epoch": 447.35820895522386, "grad_norm": 31.162965774536133, "learning_rate": 9.83862433862434e-06, "loss": 30.106, "step": 18789 }, { "epoch": 447.3820895522388, "grad_norm": 37.819244384765625, "learning_rate": 9.838095238095238e-06, "loss": 29.6305, "step": 18790 }, { "epoch": 447.40597014925373, "grad_norm": 37.481231689453125, "learning_rate": 9.837566137566137e-06, "loss": 29.3044, "step": 18791 }, { "epoch": 447.42985074626864, "grad_norm": 33.168601989746094, "learning_rate": 9.837037037037038e-06, "loss": 29.4276, "step": 18792 }, { "epoch": 447.4537313432836, "grad_norm": 30.808195114135742, "learning_rate": 9.836507936507937e-06, "loss": 28.7775, "step": 18793 }, { "epoch": 447.4776119402985, "grad_norm": 36.16692352294922, "learning_rate": 9.835978835978837e-06, "loss": 29.6296, "step": 18794 }, { "epoch": 447.5014925373134, "grad_norm": 27.94761848449707, "learning_rate": 9.835449735449736e-06, "loss": 29.6343, "step": 18795 }, { "epoch": 447.52537313432833, "grad_norm": 40.37862777709961, "learning_rate": 9.834920634920636e-06, "loss": 31.2057, "step": 18796 }, { "epoch": 447.5492537313433, "grad_norm": 36.71797180175781, "learning_rate": 9.834391534391535e-06, "loss": 29.9827, "step": 18797 }, { "epoch": 447.5731343283582, "grad_norm": 33.4858512878418, "learning_rate": 9.833862433862434e-06, "loss": 29.3121, "step": 18798 }, { "epoch": 447.5970149253731, "grad_norm": 32.97297286987305, "learning_rate": 9.833333333333333e-06, "loss": 30.4292, "step": 18799 }, { "epoch": 447.6208955223881, "grad_norm": 36.36544418334961, "learning_rate": 9.832804232804233e-06, "loss": 29.3929, "step": 18800 }, { "epoch": 447.644776119403, "grad_norm": 31.649633407592773, "learning_rate": 9.832275132275132e-06, "loss": 28.9444, "step": 18801 }, { "epoch": 447.6686567164179, "grad_norm": 44.3813591003418, "learning_rate": 9.831746031746033e-06, "loss": 29.9432, "step": 18802 }, { "epoch": 447.6925373134328, "grad_norm": 37.95224380493164, "learning_rate": 9.831216931216933e-06, "loss": 28.9526, "step": 18803 }, { "epoch": 447.7164179104478, "grad_norm": 34.08156967163086, "learning_rate": 9.830687830687832e-06, "loss": 29.0852, "step": 18804 }, { "epoch": 447.7402985074627, "grad_norm": 33.56852722167969, "learning_rate": 9.830158730158731e-06, "loss": 28.6329, "step": 18805 }, { "epoch": 447.7641791044776, "grad_norm": 36.387908935546875, "learning_rate": 9.82962962962963e-06, "loss": 28.5445, "step": 18806 }, { "epoch": 447.78805970149256, "grad_norm": 30.244382858276367, "learning_rate": 9.82910052910053e-06, "loss": 29.7699, "step": 18807 }, { "epoch": 447.81194029850747, "grad_norm": 38.11452102661133, "learning_rate": 9.828571428571429e-06, "loss": 29.7457, "step": 18808 }, { "epoch": 447.8358208955224, "grad_norm": 35.806453704833984, "learning_rate": 9.828042328042328e-06, "loss": 29.4785, "step": 18809 }, { "epoch": 447.85970149253734, "grad_norm": 35.697021484375, "learning_rate": 9.827513227513228e-06, "loss": 30.0922, "step": 18810 }, { "epoch": 447.88358208955225, "grad_norm": 34.11040496826172, "learning_rate": 9.826984126984129e-06, "loss": 29.6874, "step": 18811 }, { "epoch": 447.90746268656716, "grad_norm": 33.44720458984375, "learning_rate": 9.826455026455028e-06, "loss": 30.0404, "step": 18812 }, { "epoch": 447.93134328358207, "grad_norm": 28.22995376586914, "learning_rate": 9.825925925925927e-06, "loss": 29.9623, "step": 18813 }, { "epoch": 447.95522388059703, "grad_norm": 33.87968826293945, "learning_rate": 9.825396825396825e-06, "loss": 29.5566, "step": 18814 }, { "epoch": 447.97910447761194, "grad_norm": 32.12936782836914, "learning_rate": 9.824867724867726e-06, "loss": 29.496, "step": 18815 }, { "epoch": 448.0, "grad_norm": 33.93634796142578, "learning_rate": 9.824338624338625e-06, "loss": 27.2855, "step": 18816 }, { "epoch": 448.0238805970149, "grad_norm": 30.463336944580078, "learning_rate": 9.823809523809524e-06, "loss": 29.377, "step": 18817 }, { "epoch": 448.0477611940299, "grad_norm": 33.1998291015625, "learning_rate": 9.823280423280424e-06, "loss": 30.1633, "step": 18818 }, { "epoch": 448.0716417910448, "grad_norm": 30.724191665649414, "learning_rate": 9.822751322751325e-06, "loss": 29.8424, "step": 18819 }, { "epoch": 448.0955223880597, "grad_norm": 38.30782699584961, "learning_rate": 9.822222222222223e-06, "loss": 29.4547, "step": 18820 }, { "epoch": 448.1194029850746, "grad_norm": 33.71158981323242, "learning_rate": 9.821693121693122e-06, "loss": 29.9913, "step": 18821 }, { "epoch": 448.14328358208957, "grad_norm": 37.22774887084961, "learning_rate": 9.821164021164023e-06, "loss": 30.966, "step": 18822 }, { "epoch": 448.1671641791045, "grad_norm": 33.146244049072266, "learning_rate": 9.820634920634922e-06, "loss": 29.3025, "step": 18823 }, { "epoch": 448.1910447761194, "grad_norm": 34.13437271118164, "learning_rate": 9.82010582010582e-06, "loss": 30.1923, "step": 18824 }, { "epoch": 448.21492537313435, "grad_norm": 32.6254997253418, "learning_rate": 9.81957671957672e-06, "loss": 30.1733, "step": 18825 }, { "epoch": 448.23880597014926, "grad_norm": 35.598819732666016, "learning_rate": 9.81904761904762e-06, "loss": 29.5885, "step": 18826 }, { "epoch": 448.26268656716417, "grad_norm": 33.22323989868164, "learning_rate": 9.81851851851852e-06, "loss": 30.7683, "step": 18827 }, { "epoch": 448.28656716417913, "grad_norm": 36.63930892944336, "learning_rate": 9.817989417989419e-06, "loss": 28.1788, "step": 18828 }, { "epoch": 448.31044776119404, "grad_norm": 32.096744537353516, "learning_rate": 9.817460317460318e-06, "loss": 30.0768, "step": 18829 }, { "epoch": 448.33432835820895, "grad_norm": 33.445011138916016, "learning_rate": 9.816931216931218e-06, "loss": 29.5985, "step": 18830 }, { "epoch": 448.35820895522386, "grad_norm": 28.628639221191406, "learning_rate": 9.816402116402117e-06, "loss": 29.2123, "step": 18831 }, { "epoch": 448.3820895522388, "grad_norm": 37.036949157714844, "learning_rate": 9.815873015873016e-06, "loss": 29.9231, "step": 18832 }, { "epoch": 448.40597014925373, "grad_norm": 32.678466796875, "learning_rate": 9.815343915343915e-06, "loss": 29.4799, "step": 18833 }, { "epoch": 448.42985074626864, "grad_norm": 36.6664924621582, "learning_rate": 9.814814814814815e-06, "loss": 29.0067, "step": 18834 }, { "epoch": 448.4537313432836, "grad_norm": 32.55145263671875, "learning_rate": 9.814285714285716e-06, "loss": 29.5954, "step": 18835 }, { "epoch": 448.4776119402985, "grad_norm": 32.24037170410156, "learning_rate": 9.813756613756615e-06, "loss": 29.2615, "step": 18836 }, { "epoch": 448.5014925373134, "grad_norm": 32.47368621826172, "learning_rate": 9.813227513227515e-06, "loss": 28.4796, "step": 18837 }, { "epoch": 448.52537313432833, "grad_norm": 34.174102783203125, "learning_rate": 9.812698412698414e-06, "loss": 29.8036, "step": 18838 }, { "epoch": 448.5492537313433, "grad_norm": 29.635099411010742, "learning_rate": 9.812169312169313e-06, "loss": 29.2207, "step": 18839 }, { "epoch": 448.5731343283582, "grad_norm": 36.1382942199707, "learning_rate": 9.811640211640212e-06, "loss": 30.018, "step": 18840 }, { "epoch": 448.5970149253731, "grad_norm": 34.936832427978516, "learning_rate": 9.811111111111112e-06, "loss": 29.5068, "step": 18841 }, { "epoch": 448.6208955223881, "grad_norm": 37.43104934692383, "learning_rate": 9.810582010582011e-06, "loss": 29.5357, "step": 18842 }, { "epoch": 448.644776119403, "grad_norm": 38.44711685180664, "learning_rate": 9.81005291005291e-06, "loss": 29.2115, "step": 18843 }, { "epoch": 448.6686567164179, "grad_norm": 28.588653564453125, "learning_rate": 9.80952380952381e-06, "loss": 29.349, "step": 18844 }, { "epoch": 448.6925373134328, "grad_norm": 29.671405792236328, "learning_rate": 9.808994708994711e-06, "loss": 28.7659, "step": 18845 }, { "epoch": 448.7164179104478, "grad_norm": 26.597749710083008, "learning_rate": 9.80846560846561e-06, "loss": 29.1947, "step": 18846 }, { "epoch": 448.7402985074627, "grad_norm": 25.768972396850586, "learning_rate": 9.807936507936509e-06, "loss": 30.2264, "step": 18847 }, { "epoch": 448.7641791044776, "grad_norm": 32.354278564453125, "learning_rate": 9.807407407407407e-06, "loss": 29.8559, "step": 18848 }, { "epoch": 448.78805970149256, "grad_norm": 25.641151428222656, "learning_rate": 9.806878306878308e-06, "loss": 30.4412, "step": 18849 }, { "epoch": 448.81194029850747, "grad_norm": 32.82002639770508, "learning_rate": 9.806349206349207e-06, "loss": 29.512, "step": 18850 }, { "epoch": 448.8358208955224, "grad_norm": 27.287641525268555, "learning_rate": 9.805820105820106e-06, "loss": 29.7458, "step": 18851 }, { "epoch": 448.85970149253734, "grad_norm": 33.86443328857422, "learning_rate": 9.805291005291006e-06, "loss": 29.8252, "step": 18852 }, { "epoch": 448.88358208955225, "grad_norm": 31.047433853149414, "learning_rate": 9.804761904761907e-06, "loss": 29.4836, "step": 18853 }, { "epoch": 448.90746268656716, "grad_norm": 33.35688018798828, "learning_rate": 9.804232804232805e-06, "loss": 29.5944, "step": 18854 }, { "epoch": 448.93134328358207, "grad_norm": 27.1873779296875, "learning_rate": 9.803703703703704e-06, "loss": 30.3422, "step": 18855 }, { "epoch": 448.95522388059703, "grad_norm": 32.179019927978516, "learning_rate": 9.803174603174605e-06, "loss": 29.5344, "step": 18856 }, { "epoch": 448.97910447761194, "grad_norm": 25.708383560180664, "learning_rate": 9.802645502645504e-06, "loss": 31.3246, "step": 18857 }, { "epoch": 449.0, "grad_norm": 26.604280471801758, "learning_rate": 9.802116402116402e-06, "loss": 25.5377, "step": 18858 }, { "epoch": 449.0238805970149, "grad_norm": 31.397233963012695, "learning_rate": 9.801587301587301e-06, "loss": 30.5642, "step": 18859 }, { "epoch": 449.0477611940299, "grad_norm": 25.787208557128906, "learning_rate": 9.801058201058202e-06, "loss": 28.8864, "step": 18860 }, { "epoch": 449.0716417910448, "grad_norm": 25.823400497436523, "learning_rate": 9.800529100529102e-06, "loss": 29.7379, "step": 18861 }, { "epoch": 449.0955223880597, "grad_norm": 23.510948181152344, "learning_rate": 9.800000000000001e-06, "loss": 29.8785, "step": 18862 }, { "epoch": 449.1194029850746, "grad_norm": 22.849016189575195, "learning_rate": 9.7994708994709e-06, "loss": 30.8151, "step": 18863 }, { "epoch": 449.14328358208957, "grad_norm": 21.94300651550293, "learning_rate": 9.7989417989418e-06, "loss": 28.4248, "step": 18864 }, { "epoch": 449.1671641791045, "grad_norm": 24.85818099975586, "learning_rate": 9.7984126984127e-06, "loss": 30.4554, "step": 18865 }, { "epoch": 449.1910447761194, "grad_norm": 22.531618118286133, "learning_rate": 9.797883597883598e-06, "loss": 29.7083, "step": 18866 }, { "epoch": 449.21492537313435, "grad_norm": 19.220949172973633, "learning_rate": 9.797354497354497e-06, "loss": 28.4596, "step": 18867 }, { "epoch": 449.23880597014926, "grad_norm": 23.896013259887695, "learning_rate": 9.796825396825397e-06, "loss": 29.422, "step": 18868 }, { "epoch": 449.26268656716417, "grad_norm": 18.753400802612305, "learning_rate": 9.796296296296298e-06, "loss": 29.6429, "step": 18869 }, { "epoch": 449.28656716417913, "grad_norm": 22.286386489868164, "learning_rate": 9.795767195767197e-06, "loss": 29.7818, "step": 18870 }, { "epoch": 449.31044776119404, "grad_norm": 20.750408172607422, "learning_rate": 9.795238095238097e-06, "loss": 30.5447, "step": 18871 }, { "epoch": 449.33432835820895, "grad_norm": 19.726438522338867, "learning_rate": 9.794708994708996e-06, "loss": 30.5707, "step": 18872 }, { "epoch": 449.35820895522386, "grad_norm": 18.497900009155273, "learning_rate": 9.794179894179895e-06, "loss": 30.4066, "step": 18873 }, { "epoch": 449.3820895522388, "grad_norm": 21.51070785522461, "learning_rate": 9.793650793650794e-06, "loss": 30.5392, "step": 18874 }, { "epoch": 449.40597014925373, "grad_norm": 17.816740036010742, "learning_rate": 9.793121693121694e-06, "loss": 29.9868, "step": 18875 }, { "epoch": 449.42985074626864, "grad_norm": 18.465328216552734, "learning_rate": 9.792592592592593e-06, "loss": 29.2578, "step": 18876 }, { "epoch": 449.4537313432836, "grad_norm": 22.90824317932129, "learning_rate": 9.792063492063494e-06, "loss": 29.6319, "step": 18877 }, { "epoch": 449.4776119402985, "grad_norm": 19.82144546508789, "learning_rate": 9.791534391534392e-06, "loss": 29.9684, "step": 18878 }, { "epoch": 449.5014925373134, "grad_norm": 20.08651351928711, "learning_rate": 9.791005291005293e-06, "loss": 29.4972, "step": 18879 }, { "epoch": 449.52537313432833, "grad_norm": 20.742626190185547, "learning_rate": 9.790476190476192e-06, "loss": 30.0482, "step": 18880 }, { "epoch": 449.5492537313433, "grad_norm": 21.239789962768555, "learning_rate": 9.78994708994709e-06, "loss": 29.0164, "step": 18881 }, { "epoch": 449.5731343283582, "grad_norm": 18.97139549255371, "learning_rate": 9.78941798941799e-06, "loss": 28.9804, "step": 18882 }, { "epoch": 449.5970149253731, "grad_norm": 18.684490203857422, "learning_rate": 9.78888888888889e-06, "loss": 27.6919, "step": 18883 }, { "epoch": 449.6208955223881, "grad_norm": 21.324514389038086, "learning_rate": 9.788359788359789e-06, "loss": 29.3598, "step": 18884 }, { "epoch": 449.644776119403, "grad_norm": 20.929763793945312, "learning_rate": 9.78783068783069e-06, "loss": 30.2065, "step": 18885 }, { "epoch": 449.6686567164179, "grad_norm": 18.934946060180664, "learning_rate": 9.787301587301588e-06, "loss": 29.4551, "step": 18886 }, { "epoch": 449.6925373134328, "grad_norm": 23.363676071166992, "learning_rate": 9.786772486772489e-06, "loss": 28.6651, "step": 18887 }, { "epoch": 449.7164179104478, "grad_norm": 20.75950813293457, "learning_rate": 9.786243386243387e-06, "loss": 30.1084, "step": 18888 }, { "epoch": 449.7402985074627, "grad_norm": 18.916799545288086, "learning_rate": 9.785714285714286e-06, "loss": 30.6394, "step": 18889 }, { "epoch": 449.7641791044776, "grad_norm": 24.755535125732422, "learning_rate": 9.785185185185187e-06, "loss": 29.147, "step": 18890 }, { "epoch": 449.78805970149256, "grad_norm": 20.55992317199707, "learning_rate": 9.784656084656086e-06, "loss": 29.1261, "step": 18891 }, { "epoch": 449.81194029850747, "grad_norm": 21.978919982910156, "learning_rate": 9.784126984126984e-06, "loss": 30.0534, "step": 18892 }, { "epoch": 449.8358208955224, "grad_norm": 20.38338279724121, "learning_rate": 9.783597883597883e-06, "loss": 29.9411, "step": 18893 }, { "epoch": 449.85970149253734, "grad_norm": 24.43837547302246, "learning_rate": 9.783068783068784e-06, "loss": 29.4558, "step": 18894 }, { "epoch": 449.88358208955225, "grad_norm": 21.979455947875977, "learning_rate": 9.782539682539684e-06, "loss": 29.3048, "step": 18895 }, { "epoch": 449.90746268656716, "grad_norm": 21.548137664794922, "learning_rate": 9.782010582010583e-06, "loss": 29.6003, "step": 18896 }, { "epoch": 449.93134328358207, "grad_norm": 19.433612823486328, "learning_rate": 9.781481481481482e-06, "loss": 30.1635, "step": 18897 }, { "epoch": 449.95522388059703, "grad_norm": 22.34962272644043, "learning_rate": 9.780952380952382e-06, "loss": 29.6221, "step": 18898 }, { "epoch": 449.97910447761194, "grad_norm": 19.86094093322754, "learning_rate": 9.780423280423281e-06, "loss": 30.0172, "step": 18899 }, { "epoch": 450.0, "grad_norm": 16.426191329956055, "learning_rate": 9.77989417989418e-06, "loss": 25.9236, "step": 18900 }, { "epoch": 450.0, "step": 18900, "total_flos": 9.29092219417275e+17, "train_loss": 0.6629277442245887, "train_runtime": 12803.0873, "train_samples_per_second": 188.111, "train_steps_per_second": 1.476 }, { "epoch": 450.0238805970149, "grad_norm": 19.30845832824707, "learning_rate": 1e-05, "loss": 29.5776, "step": 18901 }, { "epoch": 450.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999493414387033e-06, "loss": 34.3983, "step": 18902 }, { "epoch": 450.0716417910448, "grad_norm": 301.84906005859375, "learning_rate": 9.999493414387033e-06, "loss": 34.9512, "step": 18903 }, { "epoch": 450.0955223880597, "grad_norm": 159.06385803222656, "learning_rate": 9.998986828774063e-06, "loss": 34.0581, "step": 18904 }, { "epoch": 450.1194029850746, "grad_norm": 79.39588928222656, "learning_rate": 9.998480243161095e-06, "loss": 31.2132, "step": 18905 }, { "epoch": 450.14328358208957, "grad_norm": 82.88153076171875, "learning_rate": 9.997973657548127e-06, "loss": 30.0432, "step": 18906 }, { "epoch": 450.1671641791045, "grad_norm": 52.992210388183594, "learning_rate": 9.997467071935157e-06, "loss": 30.9851, "step": 18907 }, { "epoch": 450.1910447761194, "grad_norm": 51.63279724121094, "learning_rate": 9.99696048632219e-06, "loss": 30.2706, "step": 18908 }, { "epoch": 450.21492537313435, "grad_norm": 45.98822021484375, "learning_rate": 9.996453900709222e-06, "loss": 30.5838, "step": 18909 }, { "epoch": 450.23880597014926, "grad_norm": NaN, "learning_rate": 9.995947315096252e-06, "loss": 43.1125, "step": 18910 }, { "epoch": 450.26268656716417, "grad_norm": 42.12148666381836, "learning_rate": 9.995947315096252e-06, "loss": 29.6904, "step": 18911 }, { "epoch": 450.28656716417913, "grad_norm": 30.80125617980957, "learning_rate": 9.995440729483284e-06, "loss": 30.6284, "step": 18912 }, { "epoch": 450.31044776119404, "grad_norm": 33.184295654296875, "learning_rate": 9.994934143870316e-06, "loss": 30.5464, "step": 18913 }, { "epoch": 450.33432835820895, "grad_norm": 26.37697410583496, "learning_rate": 9.994427558257346e-06, "loss": 30.1079, "step": 18914 }, { "epoch": 450.35820895522386, "grad_norm": 25.714906692504883, "learning_rate": 9.993920972644378e-06, "loss": 29.9216, "step": 18915 }, { "epoch": 450.3820895522388, "grad_norm": 24.25808334350586, "learning_rate": 9.993414387031409e-06, "loss": 29.1718, "step": 18916 }, { "epoch": 450.40597014925373, "grad_norm": 22.937480926513672, "learning_rate": 9.99290780141844e-06, "loss": 29.8218, "step": 18917 }, { "epoch": 450.42985074626864, "grad_norm": 20.91872215270996, "learning_rate": 9.992401215805473e-06, "loss": 29.123, "step": 18918 }, { "epoch": 450.4537313432836, "grad_norm": 24.89742088317871, "learning_rate": 9.991894630192503e-06, "loss": 30.2468, "step": 18919 }, { "epoch": 450.4776119402985, "grad_norm": 22.898672103881836, "learning_rate": 9.991388044579535e-06, "loss": 30.5613, "step": 18920 }, { "epoch": 450.5014925373134, "grad_norm": 27.06220054626465, "learning_rate": 9.990881458966565e-06, "loss": 29.3531, "step": 18921 }, { "epoch": 450.52537313432833, "grad_norm": 24.804279327392578, "learning_rate": 9.990374873353597e-06, "loss": 29.9173, "step": 18922 }, { "epoch": 450.5492537313433, "grad_norm": 22.740867614746094, "learning_rate": 9.98986828774063e-06, "loss": 29.519, "step": 18923 }, { "epoch": 450.5731343283582, "grad_norm": 22.420976638793945, "learning_rate": 9.98936170212766e-06, "loss": 29.249, "step": 18924 }, { "epoch": 450.5970149253731, "grad_norm": 24.420682907104492, "learning_rate": 9.988855116514692e-06, "loss": 30.4978, "step": 18925 }, { "epoch": 450.6208955223881, "grad_norm": 24.97078514099121, "learning_rate": 9.988348530901724e-06, "loss": 29.9352, "step": 18926 }, { "epoch": 450.644776119403, "grad_norm": 20.361722946166992, "learning_rate": 9.987841945288754e-06, "loss": 30.1875, "step": 18927 }, { "epoch": 450.6686567164179, "grad_norm": 17.911455154418945, "learning_rate": 9.987335359675786e-06, "loss": 29.623, "step": 18928 }, { "epoch": 450.6925373134328, "grad_norm": 19.583101272583008, "learning_rate": 9.986828774062818e-06, "loss": 29.7614, "step": 18929 }, { "epoch": 450.7164179104478, "grad_norm": 16.494487762451172, "learning_rate": 9.986322188449848e-06, "loss": 29.5737, "step": 18930 }, { "epoch": 450.7402985074627, "grad_norm": 24.277469635009766, "learning_rate": 9.98581560283688e-06, "loss": 30.0424, "step": 18931 }, { "epoch": 450.7641791044776, "grad_norm": 17.92649269104004, "learning_rate": 9.985309017223912e-06, "loss": 29.198, "step": 18932 }, { "epoch": 450.78805970149256, "grad_norm": 21.55950355529785, "learning_rate": 9.984802431610943e-06, "loss": 29.0494, "step": 18933 }, { "epoch": 450.81194029850747, "grad_norm": 22.094663619995117, "learning_rate": 9.984295845997975e-06, "loss": 30.593, "step": 18934 }, { "epoch": 450.8358208955224, "grad_norm": 22.226341247558594, "learning_rate": 9.983789260385007e-06, "loss": 30.1663, "step": 18935 }, { "epoch": 450.85970149253734, "grad_norm": 20.94849967956543, "learning_rate": 9.983282674772037e-06, "loss": 28.8151, "step": 18936 }, { "epoch": 450.88358208955225, "grad_norm": 22.0506591796875, "learning_rate": 9.982776089159069e-06, "loss": 30.753, "step": 18937 }, { "epoch": 450.90746268656716, "grad_norm": 20.112022399902344, "learning_rate": 9.982269503546101e-06, "loss": 29.2001, "step": 18938 }, { "epoch": 450.93134328358207, "grad_norm": 18.095592498779297, "learning_rate": 9.981762917933131e-06, "loss": 29.373, "step": 18939 }, { "epoch": 450.95522388059703, "grad_norm": 25.190563201904297, "learning_rate": 9.981256332320163e-06, "loss": 29.4462, "step": 18940 }, { "epoch": 450.97910447761194, "grad_norm": 23.752017974853516, "learning_rate": 9.980749746707195e-06, "loss": 30.4988, "step": 18941 }, { "epoch": 451.0, "grad_norm": 17.788291931152344, "learning_rate": 9.980243161094226e-06, "loss": 26.8989, "step": 18942 }, { "epoch": 451.0238805970149, "grad_norm": 23.511444091796875, "learning_rate": 9.979736575481258e-06, "loss": 28.793, "step": 18943 }, { "epoch": 451.0477611940299, "grad_norm": 26.580766677856445, "learning_rate": 9.979229989868288e-06, "loss": 28.7534, "step": 18944 }, { "epoch": 451.0716417910448, "grad_norm": 18.559480667114258, "learning_rate": 9.97872340425532e-06, "loss": 29.5967, "step": 18945 }, { "epoch": 451.0955223880597, "grad_norm": 23.843984603881836, "learning_rate": 9.978216818642352e-06, "loss": 30.0343, "step": 18946 }, { "epoch": 451.1194029850746, "grad_norm": 22.217266082763672, "learning_rate": 9.977710233029382e-06, "loss": 30.3578, "step": 18947 }, { "epoch": 451.14328358208957, "grad_norm": 23.060617446899414, "learning_rate": 9.977203647416414e-06, "loss": 29.792, "step": 18948 }, { "epoch": 451.1671641791045, "grad_norm": 21.334890365600586, "learning_rate": 9.976697061803445e-06, "loss": 28.7295, "step": 18949 }, { "epoch": 451.1910447761194, "grad_norm": 26.603607177734375, "learning_rate": 9.976190476190477e-06, "loss": 28.9861, "step": 18950 }, { "epoch": 451.21492537313435, "grad_norm": 24.815683364868164, "learning_rate": 9.975683890577509e-06, "loss": 30.2151, "step": 18951 }, { "epoch": 451.23880597014926, "grad_norm": 17.89673614501953, "learning_rate": 9.975177304964539e-06, "loss": 30.8364, "step": 18952 }, { "epoch": 451.26268656716417, "grad_norm": 17.92727279663086, "learning_rate": 9.974670719351571e-06, "loss": 29.6827, "step": 18953 }, { "epoch": 451.28656716417913, "grad_norm": 20.994428634643555, "learning_rate": 9.974164133738603e-06, "loss": 29.7407, "step": 18954 }, { "epoch": 451.31044776119404, "grad_norm": 20.474212646484375, "learning_rate": 9.973657548125633e-06, "loss": 29.9772, "step": 18955 }, { "epoch": 451.33432835820895, "grad_norm": 18.365907669067383, "learning_rate": 9.973150962512665e-06, "loss": 29.618, "step": 18956 }, { "epoch": 451.35820895522386, "grad_norm": NaN, "learning_rate": 9.972644376899697e-06, "loss": 33.7726, "step": 18957 }, { "epoch": 451.3820895522388, "grad_norm": 18.915334701538086, "learning_rate": 9.972644376899697e-06, "loss": 28.4901, "step": 18958 }, { "epoch": 451.40597014925373, "grad_norm": 18.583858489990234, "learning_rate": 9.972137791286728e-06, "loss": 29.815, "step": 18959 }, { "epoch": 451.42985074626864, "grad_norm": 17.04874610900879, "learning_rate": 9.97163120567376e-06, "loss": 30.2592, "step": 18960 }, { "epoch": 451.4537313432836, "grad_norm": 20.502761840820312, "learning_rate": 9.971124620060792e-06, "loss": 28.3765, "step": 18961 }, { "epoch": 451.4776119402985, "grad_norm": 22.77794075012207, "learning_rate": 9.970618034447822e-06, "loss": 30.2104, "step": 18962 }, { "epoch": 451.5014925373134, "grad_norm": 22.17766571044922, "learning_rate": 9.970111448834854e-06, "loss": 30.2448, "step": 18963 }, { "epoch": 451.52537313432833, "grad_norm": 17.22074317932129, "learning_rate": 9.969604863221886e-06, "loss": 29.4063, "step": 18964 }, { "epoch": 451.5492537313433, "grad_norm": 18.516681671142578, "learning_rate": 9.969098277608916e-06, "loss": 28.7269, "step": 18965 }, { "epoch": 451.5731343283582, "grad_norm": 22.7879638671875, "learning_rate": 9.968591691995948e-06, "loss": 30.4879, "step": 18966 }, { "epoch": 451.5970149253731, "grad_norm": 22.5107479095459, "learning_rate": 9.96808510638298e-06, "loss": 30.5349, "step": 18967 }, { "epoch": 451.6208955223881, "grad_norm": 21.335634231567383, "learning_rate": 9.96757852077001e-06, "loss": 28.7798, "step": 18968 }, { "epoch": 451.644776119403, "grad_norm": 17.770339965820312, "learning_rate": 9.967071935157043e-06, "loss": 29.9293, "step": 18969 }, { "epoch": 451.6686567164179, "grad_norm": 18.20197868347168, "learning_rate": 9.966565349544075e-06, "loss": 29.6942, "step": 18970 }, { "epoch": 451.6925373134328, "grad_norm": 20.81208610534668, "learning_rate": 9.966058763931105e-06, "loss": 28.8187, "step": 18971 }, { "epoch": 451.7164179104478, "grad_norm": 19.00330924987793, "learning_rate": 9.965552178318137e-06, "loss": 29.2971, "step": 18972 }, { "epoch": 451.7402985074627, "grad_norm": 19.042354583740234, "learning_rate": 9.965045592705167e-06, "loss": 28.7213, "step": 18973 }, { "epoch": 451.7641791044776, "grad_norm": 20.025510787963867, "learning_rate": 9.9645390070922e-06, "loss": 29.8927, "step": 18974 }, { "epoch": 451.78805970149256, "grad_norm": 17.361202239990234, "learning_rate": 9.964032421479231e-06, "loss": 30.2936, "step": 18975 }, { "epoch": 451.81194029850747, "grad_norm": 20.94649314880371, "learning_rate": 9.963525835866262e-06, "loss": 29.5945, "step": 18976 }, { "epoch": 451.8358208955224, "grad_norm": 19.1213436126709, "learning_rate": 9.963019250253294e-06, "loss": 30.0282, "step": 18977 }, { "epoch": 451.85970149253734, "grad_norm": 24.17024803161621, "learning_rate": 9.962512664640324e-06, "loss": 29.7211, "step": 18978 }, { "epoch": 451.88358208955225, "grad_norm": 19.829086303710938, "learning_rate": 9.962006079027356e-06, "loss": 28.8128, "step": 18979 }, { "epoch": 451.90746268656716, "grad_norm": 22.490489959716797, "learning_rate": 9.961499493414388e-06, "loss": 29.8406, "step": 18980 }, { "epoch": 451.93134328358207, "grad_norm": 18.907854080200195, "learning_rate": 9.960992907801418e-06, "loss": 30.2934, "step": 18981 }, { "epoch": 451.95522388059703, "grad_norm": 23.866716384887695, "learning_rate": 9.96048632218845e-06, "loss": 29.516, "step": 18982 }, { "epoch": 451.97910447761194, "grad_norm": 21.31512451171875, "learning_rate": 9.959979736575482e-06, "loss": 30.1028, "step": 18983 }, { "epoch": 452.0, "grad_norm": 20.712146759033203, "learning_rate": 9.959473150962513e-06, "loss": 26.29, "step": 18984 }, { "epoch": 452.0238805970149, "grad_norm": 21.922698974609375, "learning_rate": 9.958966565349545e-06, "loss": 29.6919, "step": 18985 }, { "epoch": 452.0477611940299, "grad_norm": 19.797822952270508, "learning_rate": 9.958459979736577e-06, "loss": 28.5553, "step": 18986 }, { "epoch": 452.0716417910448, "grad_norm": 28.08194351196289, "learning_rate": 9.957953394123607e-06, "loss": 29.0069, "step": 18987 }, { "epoch": 452.0955223880597, "grad_norm": 19.913061141967773, "learning_rate": 9.957446808510639e-06, "loss": 29.8299, "step": 18988 }, { "epoch": 452.1194029850746, "grad_norm": 20.26679229736328, "learning_rate": 9.956940222897671e-06, "loss": 29.5776, "step": 18989 }, { "epoch": 452.14328358208957, "grad_norm": 17.44236946105957, "learning_rate": 9.956433637284701e-06, "loss": 29.9729, "step": 18990 }, { "epoch": 452.1671641791045, "grad_norm": 22.253799438476562, "learning_rate": 9.955927051671733e-06, "loss": 30.6684, "step": 18991 }, { "epoch": 452.1910447761194, "grad_norm": NaN, "learning_rate": 9.955420466058765e-06, "loss": 25.7006, "step": 18992 }, { "epoch": 452.21492537313435, "grad_norm": 18.766408920288086, "learning_rate": 9.955420466058765e-06, "loss": 30.3421, "step": 18993 }, { "epoch": 452.23880597014926, "grad_norm": 21.71162986755371, "learning_rate": 9.954913880445796e-06, "loss": 29.8582, "step": 18994 }, { "epoch": 452.26268656716417, "grad_norm": 20.402259826660156, "learning_rate": 9.954407294832828e-06, "loss": 29.5923, "step": 18995 }, { "epoch": 452.28656716417913, "grad_norm": 20.867965698242188, "learning_rate": 9.95390070921986e-06, "loss": 29.7953, "step": 18996 }, { "epoch": 452.31044776119404, "grad_norm": 21.364362716674805, "learning_rate": 9.95339412360689e-06, "loss": 29.8313, "step": 18997 }, { "epoch": 452.33432835820895, "grad_norm": 20.873966217041016, "learning_rate": 9.952887537993922e-06, "loss": 29.0472, "step": 18998 }, { "epoch": 452.35820895522386, "grad_norm": 19.085573196411133, "learning_rate": 9.952380952380954e-06, "loss": 29.4426, "step": 18999 }, { "epoch": 452.3820895522388, "grad_norm": 18.38790512084961, "learning_rate": 9.951874366767984e-06, "loss": 30.6716, "step": 19000 }, { "epoch": 452.40597014925373, "grad_norm": 23.675302505493164, "learning_rate": 9.951367781155016e-06, "loss": 30.5216, "step": 19001 }, { "epoch": 452.42985074626864, "grad_norm": 22.37555503845215, "learning_rate": 9.950861195542047e-06, "loss": 29.2836, "step": 19002 }, { "epoch": 452.4537313432836, "grad_norm": 18.275020599365234, "learning_rate": 9.950354609929079e-06, "loss": 29.6182, "step": 19003 }, { "epoch": 452.4776119402985, "grad_norm": 24.41230583190918, "learning_rate": 9.94984802431611e-06, "loss": 28.8093, "step": 19004 }, { "epoch": 452.5014925373134, "grad_norm": 19.157386779785156, "learning_rate": 9.949341438703141e-06, "loss": 30.0343, "step": 19005 }, { "epoch": 452.52537313432833, "grad_norm": 23.22016143798828, "learning_rate": 9.948834853090173e-06, "loss": 29.574, "step": 19006 }, { "epoch": 452.5492537313433, "grad_norm": 20.810165405273438, "learning_rate": 9.948328267477203e-06, "loss": 29.1676, "step": 19007 }, { "epoch": 452.5731343283582, "grad_norm": 23.29741096496582, "learning_rate": 9.947821681864235e-06, "loss": 29.6805, "step": 19008 }, { "epoch": 452.5970149253731, "grad_norm": 27.233989715576172, "learning_rate": 9.947315096251267e-06, "loss": 30.5184, "step": 19009 }, { "epoch": 452.6208955223881, "grad_norm": 22.64206886291504, "learning_rate": 9.946808510638298e-06, "loss": 29.0761, "step": 19010 }, { "epoch": 452.644776119403, "grad_norm": 21.678905487060547, "learning_rate": 9.94630192502533e-06, "loss": 29.0221, "step": 19011 }, { "epoch": 452.6686567164179, "grad_norm": 18.353296279907227, "learning_rate": 9.945795339412362e-06, "loss": 28.9182, "step": 19012 }, { "epoch": 452.6925373134328, "grad_norm": 19.17940330505371, "learning_rate": 9.945288753799392e-06, "loss": 29.7233, "step": 19013 }, { "epoch": 452.7164179104478, "grad_norm": 19.929561614990234, "learning_rate": 9.944782168186424e-06, "loss": 28.9992, "step": 19014 }, { "epoch": 452.7402985074627, "grad_norm": 26.79445457458496, "learning_rate": 9.944275582573456e-06, "loss": 29.4933, "step": 19015 }, { "epoch": 452.7641791044776, "grad_norm": 25.97791290283203, "learning_rate": 9.943768996960486e-06, "loss": 29.6141, "step": 19016 }, { "epoch": 452.78805970149256, "grad_norm": 17.454017639160156, "learning_rate": 9.943262411347518e-06, "loss": 29.079, "step": 19017 }, { "epoch": 452.81194029850747, "grad_norm": 27.575307846069336, "learning_rate": 9.94275582573455e-06, "loss": 29.6457, "step": 19018 }, { "epoch": 452.8358208955224, "grad_norm": 22.000795364379883, "learning_rate": 9.94224924012158e-06, "loss": 28.2218, "step": 19019 }, { "epoch": 452.85970149253734, "grad_norm": 23.153474807739258, "learning_rate": 9.941742654508613e-06, "loss": 29.7166, "step": 19020 }, { "epoch": 452.88358208955225, "grad_norm": 19.123037338256836, "learning_rate": 9.941236068895645e-06, "loss": 29.7495, "step": 19021 }, { "epoch": 452.90746268656716, "grad_norm": 22.07781410217285, "learning_rate": 9.940729483282675e-06, "loss": 30.1874, "step": 19022 }, { "epoch": 452.93134328358207, "grad_norm": 16.852331161499023, "learning_rate": 9.940222897669707e-06, "loss": 29.2176, "step": 19023 }, { "epoch": 452.95522388059703, "grad_norm": 30.62080192565918, "learning_rate": 9.939716312056739e-06, "loss": 30.2328, "step": 19024 }, { "epoch": 452.97910447761194, "grad_norm": 25.09130096435547, "learning_rate": 9.939209726443771e-06, "loss": 29.7749, "step": 19025 }, { "epoch": 453.0, "grad_norm": 17.25800895690918, "learning_rate": 9.938703140830801e-06, "loss": 25.9245, "step": 19026 }, { "epoch": 453.0238805970149, "grad_norm": 21.87555694580078, "learning_rate": 9.938196555217833e-06, "loss": 29.7954, "step": 19027 }, { "epoch": 453.0477611940299, "grad_norm": 26.06454849243164, "learning_rate": 9.937689969604864e-06, "loss": 29.2764, "step": 19028 }, { "epoch": 453.0716417910448, "grad_norm": 18.936857223510742, "learning_rate": 9.937183383991896e-06, "loss": 28.4654, "step": 19029 }, { "epoch": 453.0955223880597, "grad_norm": 24.079837799072266, "learning_rate": 9.936676798378928e-06, "loss": 29.038, "step": 19030 }, { "epoch": 453.1194029850746, "grad_norm": 33.02068328857422, "learning_rate": 9.936170212765958e-06, "loss": 29.3694, "step": 19031 }, { "epoch": 453.14328358208957, "grad_norm": 21.16935920715332, "learning_rate": 9.93566362715299e-06, "loss": 30.7159, "step": 19032 }, { "epoch": 453.1671641791045, "grad_norm": 26.3829288482666, "learning_rate": 9.93515704154002e-06, "loss": 28.8901, "step": 19033 }, { "epoch": 453.1910447761194, "grad_norm": 26.91878890991211, "learning_rate": 9.934650455927052e-06, "loss": 29.2694, "step": 19034 }, { "epoch": 453.21492537313435, "grad_norm": 20.891857147216797, "learning_rate": 9.934143870314083e-06, "loss": 28.7589, "step": 19035 }, { "epoch": 453.23880597014926, "grad_norm": 18.569833755493164, "learning_rate": 9.933637284701115e-06, "loss": 28.8227, "step": 19036 }, { "epoch": 453.26268656716417, "grad_norm": 31.15035057067871, "learning_rate": 9.933130699088147e-06, "loss": 30.1908, "step": 19037 }, { "epoch": 453.28656716417913, "grad_norm": 23.253963470458984, "learning_rate": 9.932624113475177e-06, "loss": 29.9851, "step": 19038 }, { "epoch": 453.31044776119404, "grad_norm": 19.411197662353516, "learning_rate": 9.932117527862209e-06, "loss": 29.1209, "step": 19039 }, { "epoch": 453.33432835820895, "grad_norm": 27.161725997924805, "learning_rate": 9.931610942249241e-06, "loss": 31.0307, "step": 19040 }, { "epoch": 453.35820895522386, "grad_norm": 21.7067813873291, "learning_rate": 9.931104356636271e-06, "loss": 29.5604, "step": 19041 }, { "epoch": 453.3820895522388, "grad_norm": 18.418291091918945, "learning_rate": 9.930597771023303e-06, "loss": 30.4239, "step": 19042 }, { "epoch": 453.40597014925373, "grad_norm": 24.172698974609375, "learning_rate": 9.930091185410335e-06, "loss": 29.5182, "step": 19043 }, { "epoch": 453.42985074626864, "grad_norm": 26.470552444458008, "learning_rate": 9.929584599797366e-06, "loss": 30.539, "step": 19044 }, { "epoch": 453.4537313432836, "grad_norm": 20.006248474121094, "learning_rate": 9.929078014184398e-06, "loss": 30.3029, "step": 19045 }, { "epoch": 453.4776119402985, "grad_norm": 18.114675521850586, "learning_rate": 9.92857142857143e-06, "loss": 29.7339, "step": 19046 }, { "epoch": 453.5014925373134, "grad_norm": 21.677228927612305, "learning_rate": 9.92806484295846e-06, "loss": 28.5574, "step": 19047 }, { "epoch": 453.52537313432833, "grad_norm": 23.898073196411133, "learning_rate": 9.927558257345492e-06, "loss": 29.1927, "step": 19048 }, { "epoch": 453.5492537313433, "grad_norm": 19.3674373626709, "learning_rate": 9.927051671732524e-06, "loss": 29.3592, "step": 19049 }, { "epoch": 453.5731343283582, "grad_norm": NaN, "learning_rate": 9.926545086119554e-06, "loss": 51.8813, "step": 19050 }, { "epoch": 453.5970149253731, "grad_norm": 18.530160903930664, "learning_rate": 9.926545086119554e-06, "loss": 30.0106, "step": 19051 }, { "epoch": 453.6208955223881, "grad_norm": 18.773128509521484, "learning_rate": 9.926038500506587e-06, "loss": 30.5348, "step": 19052 }, { "epoch": 453.644776119403, "grad_norm": 23.465530395507812, "learning_rate": 9.925531914893619e-06, "loss": 29.0787, "step": 19053 }, { "epoch": 453.6686567164179, "grad_norm": 20.9652156829834, "learning_rate": 9.92502532928065e-06, "loss": 29.2345, "step": 19054 }, { "epoch": 453.6925373134328, "grad_norm": 24.755847930908203, "learning_rate": 9.92451874366768e-06, "loss": 29.2927, "step": 19055 }, { "epoch": 453.7164179104478, "grad_norm": 20.42489242553711, "learning_rate": 9.924012158054713e-06, "loss": 29.4847, "step": 19056 }, { "epoch": 453.7402985074627, "grad_norm": 22.348148345947266, "learning_rate": 9.923505572441743e-06, "loss": 29.9685, "step": 19057 }, { "epoch": 453.7641791044776, "grad_norm": 21.650615692138672, "learning_rate": 9.922998986828775e-06, "loss": 29.9357, "step": 19058 }, { "epoch": 453.78805970149256, "grad_norm": 23.012311935424805, "learning_rate": 9.922492401215807e-06, "loss": 29.1585, "step": 19059 }, { "epoch": 453.81194029850747, "grad_norm": 23.965974807739258, "learning_rate": 9.921985815602838e-06, "loss": 29.7905, "step": 19060 }, { "epoch": 453.8358208955224, "grad_norm": 17.922374725341797, "learning_rate": 9.92147922998987e-06, "loss": 29.7496, "step": 19061 }, { "epoch": 453.85970149253734, "grad_norm": 24.05933952331543, "learning_rate": 9.9209726443769e-06, "loss": 29.741, "step": 19062 }, { "epoch": 453.88358208955225, "grad_norm": 20.847492218017578, "learning_rate": 9.920466058763932e-06, "loss": 29.2709, "step": 19063 }, { "epoch": 453.90746268656716, "grad_norm": 23.328235626220703, "learning_rate": 9.919959473150962e-06, "loss": 29.1113, "step": 19064 }, { "epoch": 453.93134328358207, "grad_norm": 18.568466186523438, "learning_rate": 9.919452887537994e-06, "loss": 28.6684, "step": 19065 }, { "epoch": 453.95522388059703, "grad_norm": 20.001220703125, "learning_rate": 9.918946301925026e-06, "loss": 30.1047, "step": 19066 }, { "epoch": 453.97910447761194, "grad_norm": 30.461639404296875, "learning_rate": 9.918439716312057e-06, "loss": 29.3991, "step": 19067 }, { "epoch": 454.0, "grad_norm": 20.439512252807617, "learning_rate": 9.917933130699089e-06, "loss": 25.447, "step": 19068 }, { "epoch": 454.0238805970149, "grad_norm": 20.934720993041992, "learning_rate": 9.91742654508612e-06, "loss": 29.8999, "step": 19069 }, { "epoch": 454.0477611940299, "grad_norm": 22.450014114379883, "learning_rate": 9.916919959473151e-06, "loss": 29.4222, "step": 19070 }, { "epoch": 454.0716417910448, "grad_norm": 21.325674057006836, "learning_rate": 9.916413373860183e-06, "loss": 29.1857, "step": 19071 }, { "epoch": 454.0955223880597, "grad_norm": 23.04296875, "learning_rate": 9.915906788247215e-06, "loss": 30.9098, "step": 19072 }, { "epoch": 454.1194029850746, "grad_norm": 18.591461181640625, "learning_rate": 9.915400202634245e-06, "loss": 30.2492, "step": 19073 }, { "epoch": 454.14328358208957, "grad_norm": 18.84215545654297, "learning_rate": 9.914893617021277e-06, "loss": 30.1186, "step": 19074 }, { "epoch": 454.1671641791045, "grad_norm": 24.66998291015625, "learning_rate": 9.91438703140831e-06, "loss": 28.4616, "step": 19075 }, { "epoch": 454.1910447761194, "grad_norm": 18.380191802978516, "learning_rate": 9.91388044579534e-06, "loss": 29.1661, "step": 19076 }, { "epoch": 454.21492537313435, "grad_norm": 24.829097747802734, "learning_rate": 9.913373860182372e-06, "loss": 29.6372, "step": 19077 }, { "epoch": 454.23880597014926, "grad_norm": 17.87748146057129, "learning_rate": 9.912867274569404e-06, "loss": 29.6081, "step": 19078 }, { "epoch": 454.26268656716417, "grad_norm": 18.679304122924805, "learning_rate": 9.912360688956436e-06, "loss": 29.1529, "step": 19079 }, { "epoch": 454.28656716417913, "grad_norm": 19.627031326293945, "learning_rate": 9.911854103343466e-06, "loss": 28.9462, "step": 19080 }, { "epoch": 454.31044776119404, "grad_norm": 21.130306243896484, "learning_rate": 9.911347517730498e-06, "loss": 29.2786, "step": 19081 }, { "epoch": 454.33432835820895, "grad_norm": 19.948322296142578, "learning_rate": 9.91084093211753e-06, "loss": 29.566, "step": 19082 }, { "epoch": 454.35820895522386, "grad_norm": 23.76077651977539, "learning_rate": 9.91033434650456e-06, "loss": 30.2856, "step": 19083 }, { "epoch": 454.3820895522388, "grad_norm": 17.250768661499023, "learning_rate": 9.909827760891592e-06, "loss": 29.6766, "step": 19084 }, { "epoch": 454.40597014925373, "grad_norm": 19.772397994995117, "learning_rate": 9.909321175278623e-06, "loss": 29.4973, "step": 19085 }, { "epoch": 454.42985074626864, "grad_norm": 17.53466033935547, "learning_rate": 9.908814589665655e-06, "loss": 29.0204, "step": 19086 }, { "epoch": 454.4537313432836, "grad_norm": 25.139310836791992, "learning_rate": 9.908308004052687e-06, "loss": 30.3044, "step": 19087 }, { "epoch": 454.4776119402985, "grad_norm": 20.389799118041992, "learning_rate": 9.907801418439717e-06, "loss": 29.3645, "step": 19088 }, { "epoch": 454.5014925373134, "grad_norm": 21.05880355834961, "learning_rate": 9.907294832826749e-06, "loss": 30.2086, "step": 19089 }, { "epoch": 454.52537313432833, "grad_norm": 18.106170654296875, "learning_rate": 9.90678824721378e-06, "loss": 29.8774, "step": 19090 }, { "epoch": 454.5492537313433, "grad_norm": 28.106945037841797, "learning_rate": 9.906281661600811e-06, "loss": 27.9651, "step": 19091 }, { "epoch": 454.5731343283582, "grad_norm": 22.183032989501953, "learning_rate": 9.905775075987842e-06, "loss": 29.6666, "step": 19092 }, { "epoch": 454.5970149253731, "grad_norm": 18.060319900512695, "learning_rate": 9.905268490374874e-06, "loss": 28.5265, "step": 19093 }, { "epoch": 454.6208955223881, "grad_norm": 17.943225860595703, "learning_rate": 9.904761904761906e-06, "loss": 29.4108, "step": 19094 }, { "epoch": 454.644776119403, "grad_norm": 18.97201919555664, "learning_rate": 9.904255319148936e-06, "loss": 28.9959, "step": 19095 }, { "epoch": 454.6686567164179, "grad_norm": 19.64834976196289, "learning_rate": 9.903748733535968e-06, "loss": 28.5516, "step": 19096 }, { "epoch": 454.6925373134328, "grad_norm": 22.228757858276367, "learning_rate": 9.903242147923e-06, "loss": 29.23, "step": 19097 }, { "epoch": 454.7164179104478, "grad_norm": 19.77589225769043, "learning_rate": 9.90273556231003e-06, "loss": 30.0002, "step": 19098 }, { "epoch": 454.7402985074627, "grad_norm": 23.442407608032227, "learning_rate": 9.902228976697062e-06, "loss": 29.3287, "step": 19099 }, { "epoch": 454.7641791044776, "grad_norm": 23.087989807128906, "learning_rate": 9.901722391084094e-06, "loss": 30.6465, "step": 19100 }, { "epoch": 454.78805970149256, "grad_norm": 15.87074089050293, "learning_rate": 9.901215805471125e-06, "loss": 29.7361, "step": 19101 }, { "epoch": 454.81194029850747, "grad_norm": 20.68094253540039, "learning_rate": 9.900709219858157e-06, "loss": 29.7803, "step": 19102 }, { "epoch": 454.8358208955224, "grad_norm": 23.566436767578125, "learning_rate": 9.900202634245189e-06, "loss": 29.4971, "step": 19103 }, { "epoch": 454.85970149253734, "grad_norm": 24.97744369506836, "learning_rate": 9.899696048632219e-06, "loss": 30.2207, "step": 19104 }, { "epoch": 454.88358208955225, "grad_norm": 17.030092239379883, "learning_rate": 9.899189463019251e-06, "loss": 29.3498, "step": 19105 }, { "epoch": 454.90746268656716, "grad_norm": 18.9128360748291, "learning_rate": 9.898682877406283e-06, "loss": 29.3812, "step": 19106 }, { "epoch": 454.93134328358207, "grad_norm": 19.106258392333984, "learning_rate": 9.898176291793315e-06, "loss": 29.5947, "step": 19107 }, { "epoch": 454.95522388059703, "grad_norm": 23.14015769958496, "learning_rate": 9.897669706180345e-06, "loss": 28.2389, "step": 19108 }, { "epoch": 454.97910447761194, "grad_norm": 20.130321502685547, "learning_rate": 9.897163120567377e-06, "loss": 29.6467, "step": 19109 }, { "epoch": 455.0, "grad_norm": 19.723512649536133, "learning_rate": 9.89665653495441e-06, "loss": 26.8729, "step": 19110 }, { "epoch": 455.0238805970149, "grad_norm": 17.89289665222168, "learning_rate": 9.89614994934144e-06, "loss": 30.4565, "step": 19111 }, { "epoch": 455.0477611940299, "grad_norm": 23.229106903076172, "learning_rate": 9.895643363728472e-06, "loss": 29.5136, "step": 19112 }, { "epoch": 455.0716417910448, "grad_norm": 17.3760929107666, "learning_rate": 9.895136778115502e-06, "loss": 29.8619, "step": 19113 }, { "epoch": 455.0955223880597, "grad_norm": 24.57352638244629, "learning_rate": 9.894630192502534e-06, "loss": 29.83, "step": 19114 }, { "epoch": 455.1194029850746, "grad_norm": 21.133859634399414, "learning_rate": 9.894123606889566e-06, "loss": 29.5494, "step": 19115 }, { "epoch": 455.14328358208957, "grad_norm": 29.154788970947266, "learning_rate": 9.893617021276596e-06, "loss": 29.2929, "step": 19116 }, { "epoch": 455.1671641791045, "grad_norm": 21.36109161376953, "learning_rate": 9.893110435663628e-06, "loss": 28.8335, "step": 19117 }, { "epoch": 455.1910447761194, "grad_norm": 30.977230072021484, "learning_rate": 9.892603850050659e-06, "loss": 28.7356, "step": 19118 }, { "epoch": 455.21492537313435, "grad_norm": 27.112159729003906, "learning_rate": 9.89209726443769e-06, "loss": 29.1088, "step": 19119 }, { "epoch": 455.23880597014926, "grad_norm": 25.431562423706055, "learning_rate": 9.891590678824721e-06, "loss": 30.0781, "step": 19120 }, { "epoch": 455.26268656716417, "grad_norm": 25.777645111083984, "learning_rate": 9.891084093211753e-06, "loss": 29.7197, "step": 19121 }, { "epoch": 455.28656716417913, "grad_norm": 23.407804489135742, "learning_rate": 9.890577507598785e-06, "loss": 29.2347, "step": 19122 }, { "epoch": 455.31044776119404, "grad_norm": 21.81983757019043, "learning_rate": 9.890070921985815e-06, "loss": 29.9653, "step": 19123 }, { "epoch": 455.33432835820895, "grad_norm": 24.613548278808594, "learning_rate": 9.889564336372847e-06, "loss": 29.4998, "step": 19124 }, { "epoch": 455.35820895522386, "grad_norm": 22.13513946533203, "learning_rate": 9.88905775075988e-06, "loss": 29.2122, "step": 19125 }, { "epoch": 455.3820895522388, "grad_norm": 22.429059982299805, "learning_rate": 9.88855116514691e-06, "loss": 30.44, "step": 19126 }, { "epoch": 455.40597014925373, "grad_norm": 19.439857482910156, "learning_rate": 9.888044579533942e-06, "loss": 29.7238, "step": 19127 }, { "epoch": 455.42985074626864, "grad_norm": 25.000146865844727, "learning_rate": 9.887537993920974e-06, "loss": 28.7278, "step": 19128 }, { "epoch": 455.4537313432836, "grad_norm": 23.06098747253418, "learning_rate": 9.887031408308004e-06, "loss": 30.5801, "step": 19129 }, { "epoch": 455.4776119402985, "grad_norm": 24.45461082458496, "learning_rate": 9.886524822695036e-06, "loss": 29.044, "step": 19130 }, { "epoch": 455.5014925373134, "grad_norm": 21.666181564331055, "learning_rate": 9.886018237082068e-06, "loss": 29.3057, "step": 19131 }, { "epoch": 455.52537313432833, "grad_norm": 23.259044647216797, "learning_rate": 9.8855116514691e-06, "loss": 29.9987, "step": 19132 }, { "epoch": 455.5492537313433, "grad_norm": 21.323957443237305, "learning_rate": 9.88500506585613e-06, "loss": 29.4533, "step": 19133 }, { "epoch": 455.5731343283582, "grad_norm": 20.257339477539062, "learning_rate": 9.884498480243162e-06, "loss": 28.9865, "step": 19134 }, { "epoch": 455.5970149253731, "grad_norm": 25.392488479614258, "learning_rate": 9.883991894630194e-06, "loss": 29.3876, "step": 19135 }, { "epoch": 455.6208955223881, "grad_norm": 21.934669494628906, "learning_rate": 9.883485309017225e-06, "loss": 29.0184, "step": 19136 }, { "epoch": 455.644776119403, "grad_norm": 18.836292266845703, "learning_rate": 9.882978723404257e-06, "loss": 28.3154, "step": 19137 }, { "epoch": 455.6686567164179, "grad_norm": 22.241985321044922, "learning_rate": 9.882472137791289e-06, "loss": 30.2232, "step": 19138 }, { "epoch": 455.6925373134328, "grad_norm": 20.754749298095703, "learning_rate": 9.881965552178319e-06, "loss": 29.312, "step": 19139 }, { "epoch": 455.7164179104478, "grad_norm": 25.973600387573242, "learning_rate": 9.881458966565351e-06, "loss": 29.6986, "step": 19140 }, { "epoch": 455.7402985074627, "grad_norm": 21.763071060180664, "learning_rate": 9.880952380952381e-06, "loss": 29.5845, "step": 19141 }, { "epoch": 455.7641791044776, "grad_norm": 20.634607315063477, "learning_rate": 9.880445795339413e-06, "loss": 29.6203, "step": 19142 }, { "epoch": 455.78805970149256, "grad_norm": 18.496437072753906, "learning_rate": 9.879939209726445e-06, "loss": 29.9328, "step": 19143 }, { "epoch": 455.81194029850747, "grad_norm": 18.399723052978516, "learning_rate": 9.879432624113476e-06, "loss": 29.182, "step": 19144 }, { "epoch": 455.8358208955224, "grad_norm": 18.73917007446289, "learning_rate": 9.878926038500508e-06, "loss": 29.5763, "step": 19145 }, { "epoch": 455.85970149253734, "grad_norm": 23.76397132873535, "learning_rate": 9.878419452887538e-06, "loss": 28.0173, "step": 19146 }, { "epoch": 455.88358208955225, "grad_norm": 21.462810516357422, "learning_rate": 9.87791286727457e-06, "loss": 29.8801, "step": 19147 }, { "epoch": 455.90746268656716, "grad_norm": 21.139163970947266, "learning_rate": 9.8774062816616e-06, "loss": 29.7496, "step": 19148 }, { "epoch": 455.93134328358207, "grad_norm": 21.185636520385742, "learning_rate": 9.876899696048632e-06, "loss": 30.1738, "step": 19149 }, { "epoch": 455.95522388059703, "grad_norm": 21.83820343017578, "learning_rate": 9.876393110435664e-06, "loss": 29.488, "step": 19150 }, { "epoch": 455.97910447761194, "grad_norm": 16.595802307128906, "learning_rate": 9.875886524822695e-06, "loss": 29.3169, "step": 19151 }, { "epoch": 456.0, "grad_norm": 18.398517608642578, "learning_rate": 9.875379939209727e-06, "loss": 24.9141, "step": 19152 }, { "epoch": 456.0238805970149, "grad_norm": 18.595191955566406, "learning_rate": 9.874873353596759e-06, "loss": 29.9468, "step": 19153 }, { "epoch": 456.0477611940299, "grad_norm": 17.2691593170166, "learning_rate": 9.874366767983789e-06, "loss": 29.4216, "step": 19154 }, { "epoch": 456.0716417910448, "grad_norm": 20.43369483947754, "learning_rate": 9.873860182370821e-06, "loss": 28.5565, "step": 19155 }, { "epoch": 456.0955223880597, "grad_norm": 22.333574295043945, "learning_rate": 9.873353596757853e-06, "loss": 30.0623, "step": 19156 }, { "epoch": 456.1194029850746, "grad_norm": 22.241008758544922, "learning_rate": 9.872847011144883e-06, "loss": 29.9241, "step": 19157 }, { "epoch": 456.14328358208957, "grad_norm": 28.559328079223633, "learning_rate": 9.872340425531915e-06, "loss": 28.7025, "step": 19158 }, { "epoch": 456.1671641791045, "grad_norm": 21.240528106689453, "learning_rate": 9.871833839918947e-06, "loss": 30.2142, "step": 19159 }, { "epoch": 456.1910447761194, "grad_norm": 18.391925811767578, "learning_rate": 9.87132725430598e-06, "loss": 28.0082, "step": 19160 }, { "epoch": 456.21492537313435, "grad_norm": 24.54650115966797, "learning_rate": 9.87082066869301e-06, "loss": 29.8683, "step": 19161 }, { "epoch": 456.23880597014926, "grad_norm": 19.180864334106445, "learning_rate": 9.870314083080042e-06, "loss": 29.2989, "step": 19162 }, { "epoch": 456.26268656716417, "grad_norm": 22.951881408691406, "learning_rate": 9.869807497467074e-06, "loss": 29.3115, "step": 19163 }, { "epoch": 456.28656716417913, "grad_norm": NaN, "learning_rate": 9.869300911854104e-06, "loss": 37.2842, "step": 19164 }, { "epoch": 456.31044776119404, "grad_norm": 21.16175079345703, "learning_rate": 9.869300911854104e-06, "loss": 29.6535, "step": 19165 }, { "epoch": 456.33432835820895, "grad_norm": 20.784685134887695, "learning_rate": 9.868794326241136e-06, "loss": 29.2302, "step": 19166 }, { "epoch": 456.35820895522386, "grad_norm": 24.27679443359375, "learning_rate": 9.868287740628168e-06, "loss": 29.8548, "step": 19167 }, { "epoch": 456.3820895522388, "grad_norm": 18.145225524902344, "learning_rate": 9.867781155015198e-06, "loss": 30.2074, "step": 19168 }, { "epoch": 456.40597014925373, "grad_norm": 24.988798141479492, "learning_rate": 9.86727456940223e-06, "loss": 29.8436, "step": 19169 }, { "epoch": 456.42985074626864, "grad_norm": 20.108173370361328, "learning_rate": 9.86676798378926e-06, "loss": 28.379, "step": 19170 }, { "epoch": 456.4537313432836, "grad_norm": 23.041162490844727, "learning_rate": 9.866261398176293e-06, "loss": 29.1555, "step": 19171 }, { "epoch": 456.4776119402985, "grad_norm": 21.777790069580078, "learning_rate": 9.865754812563325e-06, "loss": 29.2353, "step": 19172 }, { "epoch": 456.5014925373134, "grad_norm": NaN, "learning_rate": 9.865248226950355e-06, "loss": 36.8098, "step": 19173 }, { "epoch": 456.52537313432833, "grad_norm": 17.247835159301758, "learning_rate": 9.865248226950355e-06, "loss": 27.7985, "step": 19174 }, { "epoch": 456.5492537313433, "grad_norm": 21.01220703125, "learning_rate": 9.864741641337387e-06, "loss": 30.0772, "step": 19175 }, { "epoch": 456.5731343283582, "grad_norm": 19.805622100830078, "learning_rate": 9.864235055724417e-06, "loss": 29.3845, "step": 19176 }, { "epoch": 456.5970149253731, "grad_norm": 20.144039154052734, "learning_rate": 9.86372847011145e-06, "loss": 30.251, "step": 19177 }, { "epoch": 456.6208955223881, "grad_norm": 20.17630386352539, "learning_rate": 9.86322188449848e-06, "loss": 31.0423, "step": 19178 }, { "epoch": 456.644776119403, "grad_norm": 20.71660804748535, "learning_rate": 9.862715298885512e-06, "loss": 28.7978, "step": 19179 }, { "epoch": 456.6686567164179, "grad_norm": 17.953632354736328, "learning_rate": 9.862208713272544e-06, "loss": 28.3022, "step": 19180 }, { "epoch": 456.6925373134328, "grad_norm": 23.19453239440918, "learning_rate": 9.861702127659574e-06, "loss": 30.1111, "step": 19181 }, { "epoch": 456.7164179104478, "grad_norm": 18.856718063354492, "learning_rate": 9.861195542046606e-06, "loss": 29.4, "step": 19182 }, { "epoch": 456.7402985074627, "grad_norm": 27.340335845947266, "learning_rate": 9.860688956433638e-06, "loss": 30.107, "step": 19183 }, { "epoch": 456.7641791044776, "grad_norm": 20.478364944458008, "learning_rate": 9.860182370820668e-06, "loss": 29.4685, "step": 19184 }, { "epoch": 456.78805970149256, "grad_norm": 24.226905822753906, "learning_rate": 9.8596757852077e-06, "loss": 29.0965, "step": 19185 }, { "epoch": 456.81194029850747, "grad_norm": 21.748374938964844, "learning_rate": 9.859169199594732e-06, "loss": 29.9059, "step": 19186 }, { "epoch": 456.8358208955224, "grad_norm": 27.36787986755371, "learning_rate": 9.858662613981765e-06, "loss": 29.9554, "step": 19187 }, { "epoch": 456.85970149253734, "grad_norm": 23.247079849243164, "learning_rate": 9.858156028368795e-06, "loss": 29.0368, "step": 19188 }, { "epoch": 456.88358208955225, "grad_norm": 22.493261337280273, "learning_rate": 9.857649442755827e-06, "loss": 29.3209, "step": 19189 }, { "epoch": 456.90746268656716, "grad_norm": 19.817188262939453, "learning_rate": 9.857142857142859e-06, "loss": 29.6946, "step": 19190 }, { "epoch": 456.93134328358207, "grad_norm": 20.686668395996094, "learning_rate": 9.85663627152989e-06, "loss": 29.9945, "step": 19191 }, { "epoch": 456.95522388059703, "grad_norm": 21.672449111938477, "learning_rate": 9.856129685916921e-06, "loss": 29.2487, "step": 19192 }, { "epoch": 456.97910447761194, "grad_norm": 21.50278091430664, "learning_rate": 9.855623100303953e-06, "loss": 28.9159, "step": 19193 }, { "epoch": 457.0, "grad_norm": 20.647798538208008, "learning_rate": 9.855116514690984e-06, "loss": 26.5313, "step": 19194 }, { "epoch": 457.0238805970149, "grad_norm": 20.921506881713867, "learning_rate": 9.854609929078016e-06, "loss": 29.4576, "step": 19195 }, { "epoch": 457.0477611940299, "grad_norm": 27.025419235229492, "learning_rate": 9.854103343465048e-06, "loss": 28.227, "step": 19196 }, { "epoch": 457.0716417910448, "grad_norm": 26.055225372314453, "learning_rate": 9.853596757852078e-06, "loss": 29.3895, "step": 19197 }, { "epoch": 457.0955223880597, "grad_norm": 16.929737091064453, "learning_rate": 9.85309017223911e-06, "loss": 30.1052, "step": 19198 }, { "epoch": 457.1194029850746, "grad_norm": 20.70711898803711, "learning_rate": 9.85258358662614e-06, "loss": 29.1084, "step": 19199 }, { "epoch": 457.14328358208957, "grad_norm": 24.27307891845703, "learning_rate": 9.852077001013172e-06, "loss": 28.5702, "step": 19200 }, { "epoch": 457.1671641791045, "grad_norm": 20.841079711914062, "learning_rate": 9.851570415400204e-06, "loss": 30.1483, "step": 19201 }, { "epoch": 457.1910447761194, "grad_norm": 21.060884475708008, "learning_rate": 9.851063829787235e-06, "loss": 29.1818, "step": 19202 }, { "epoch": 457.21492537313435, "grad_norm": 19.868520736694336, "learning_rate": 9.850557244174267e-06, "loss": 28.1411, "step": 19203 }, { "epoch": 457.23880597014926, "grad_norm": 29.871267318725586, "learning_rate": 9.850050658561297e-06, "loss": 29.4148, "step": 19204 }, { "epoch": 457.26268656716417, "grad_norm": 25.286216735839844, "learning_rate": 9.849544072948329e-06, "loss": 29.4848, "step": 19205 }, { "epoch": 457.28656716417913, "grad_norm": 17.283329010009766, "learning_rate": 9.84903748733536e-06, "loss": 28.3217, "step": 19206 }, { "epoch": 457.31044776119404, "grad_norm": 24.15245246887207, "learning_rate": 9.848530901722391e-06, "loss": 28.5427, "step": 19207 }, { "epoch": 457.33432835820895, "grad_norm": 32.59128189086914, "learning_rate": 9.848024316109423e-06, "loss": 29.8952, "step": 19208 }, { "epoch": 457.35820895522386, "grad_norm": 20.108243942260742, "learning_rate": 9.847517730496454e-06, "loss": 29.9038, "step": 19209 }, { "epoch": 457.3820895522388, "grad_norm": 22.84111785888672, "learning_rate": 9.847011144883486e-06, "loss": 29.3513, "step": 19210 }, { "epoch": 457.40597014925373, "grad_norm": 33.537071228027344, "learning_rate": 9.846504559270518e-06, "loss": 29.6163, "step": 19211 }, { "epoch": 457.42985074626864, "grad_norm": 18.52109718322754, "learning_rate": 9.845997973657548e-06, "loss": 29.0576, "step": 19212 }, { "epoch": 457.4537313432836, "grad_norm": 33.638206481933594, "learning_rate": 9.84549138804458e-06, "loss": 29.1435, "step": 19213 }, { "epoch": 457.4776119402985, "grad_norm": 26.255043029785156, "learning_rate": 9.844984802431612e-06, "loss": 29.3605, "step": 19214 }, { "epoch": 457.5014925373134, "grad_norm": 21.626371383666992, "learning_rate": 9.844478216818644e-06, "loss": 29.1576, "step": 19215 }, { "epoch": 457.52537313432833, "grad_norm": 35.100955963134766, "learning_rate": 9.843971631205674e-06, "loss": 28.9487, "step": 19216 }, { "epoch": 457.5492537313433, "grad_norm": 21.975605010986328, "learning_rate": 9.843465045592706e-06, "loss": 29.4886, "step": 19217 }, { "epoch": 457.5731343283582, "grad_norm": 36.209957122802734, "learning_rate": 9.842958459979738e-06, "loss": 30.3928, "step": 19218 }, { "epoch": 457.5970149253731, "grad_norm": 25.683401107788086, "learning_rate": 9.842451874366769e-06, "loss": 30.1302, "step": 19219 }, { "epoch": 457.6208955223881, "grad_norm": 30.197681427001953, "learning_rate": 9.8419452887538e-06, "loss": 29.8332, "step": 19220 }, { "epoch": 457.644776119403, "grad_norm": 27.59058380126953, "learning_rate": 9.841438703140833e-06, "loss": 29.4078, "step": 19221 }, { "epoch": 457.6686567164179, "grad_norm": 21.226558685302734, "learning_rate": 9.840932117527863e-06, "loss": 29.6398, "step": 19222 }, { "epoch": 457.6925373134328, "grad_norm": 27.211956024169922, "learning_rate": 9.840425531914895e-06, "loss": 29.5271, "step": 19223 }, { "epoch": 457.7164179104478, "grad_norm": 27.29814338684082, "learning_rate": 9.839918946301927e-06, "loss": 29.4784, "step": 19224 }, { "epoch": 457.7402985074627, "grad_norm": 17.2387752532959, "learning_rate": 9.839412360688957e-06, "loss": 28.8109, "step": 19225 }, { "epoch": 457.7641791044776, "grad_norm": 33.42396545410156, "learning_rate": 9.83890577507599e-06, "loss": 30.0833, "step": 19226 }, { "epoch": 457.78805970149256, "grad_norm": 26.031282424926758, "learning_rate": 9.83839918946302e-06, "loss": 29.9814, "step": 19227 }, { "epoch": 457.81194029850747, "grad_norm": 19.81572723388672, "learning_rate": 9.837892603850052e-06, "loss": 29.177, "step": 19228 }, { "epoch": 457.8358208955224, "grad_norm": 33.60468673706055, "learning_rate": 9.837386018237084e-06, "loss": 30.6398, "step": 19229 }, { "epoch": 457.85970149253734, "grad_norm": 20.807445526123047, "learning_rate": 9.836879432624114e-06, "loss": 29.6065, "step": 19230 }, { "epoch": 457.88358208955225, "grad_norm": 34.45721435546875, "learning_rate": 9.836372847011146e-06, "loss": 29.4505, "step": 19231 }, { "epoch": 457.90746268656716, "grad_norm": 23.473485946655273, "learning_rate": 9.835866261398176e-06, "loss": 30.3788, "step": 19232 }, { "epoch": 457.93134328358207, "grad_norm": 30.816837310791016, "learning_rate": 9.835359675785208e-06, "loss": 29.8634, "step": 19233 }, { "epoch": 457.95522388059703, "grad_norm": 26.7440242767334, "learning_rate": 9.83485309017224e-06, "loss": 29.7351, "step": 19234 }, { "epoch": 457.97910447761194, "grad_norm": 21.26458168029785, "learning_rate": 9.83434650455927e-06, "loss": 28.956, "step": 19235 }, { "epoch": 458.0, "grad_norm": 35.877960205078125, "learning_rate": 9.833839918946303e-06, "loss": 25.7181, "step": 19236 }, { "epoch": 458.0238805970149, "grad_norm": 26.053483963012695, "learning_rate": 9.833333333333333e-06, "loss": 27.5489, "step": 19237 }, { "epoch": 458.0477611940299, "grad_norm": 44.33601760864258, "learning_rate": 9.832826747720365e-06, "loss": 29.3817, "step": 19238 }, { "epoch": 458.0716417910448, "grad_norm": 29.47449493408203, "learning_rate": 9.832320162107397e-06, "loss": 28.4728, "step": 19239 }, { "epoch": 458.0955223880597, "grad_norm": 51.64469909667969, "learning_rate": 9.831813576494429e-06, "loss": 29.3539, "step": 19240 }, { "epoch": 458.1194029850746, "grad_norm": 44.06813430786133, "learning_rate": 9.83130699088146e-06, "loss": 28.7457, "step": 19241 }, { "epoch": 458.14328358208957, "grad_norm": 35.73231506347656, "learning_rate": 9.830800405268491e-06, "loss": 29.7434, "step": 19242 }, { "epoch": 458.1671641791045, "grad_norm": 35.66960906982422, "learning_rate": 9.830293819655523e-06, "loss": 29.3448, "step": 19243 }, { "epoch": 458.1910447761194, "grad_norm": 37.736209869384766, "learning_rate": 9.829787234042554e-06, "loss": 29.6803, "step": 19244 }, { "epoch": 458.21492537313435, "grad_norm": 31.245277404785156, "learning_rate": 9.829280648429586e-06, "loss": 29.7783, "step": 19245 }, { "epoch": 458.23880597014926, "grad_norm": 40.36437225341797, "learning_rate": 9.828774062816618e-06, "loss": 29.047, "step": 19246 }, { "epoch": 458.26268656716417, "grad_norm": 30.969738006591797, "learning_rate": 9.828267477203648e-06, "loss": 29.6852, "step": 19247 }, { "epoch": 458.28656716417913, "grad_norm": 40.856712341308594, "learning_rate": 9.82776089159068e-06, "loss": 28.8257, "step": 19248 }, { "epoch": 458.31044776119404, "grad_norm": 33.11186981201172, "learning_rate": 9.827254305977712e-06, "loss": 30.7169, "step": 19249 }, { "epoch": 458.33432835820895, "grad_norm": 41.166038513183594, "learning_rate": 9.826747720364742e-06, "loss": 29.9743, "step": 19250 }, { "epoch": 458.35820895522386, "grad_norm": 35.319862365722656, "learning_rate": 9.826241134751774e-06, "loss": 29.0824, "step": 19251 }, { "epoch": 458.3820895522388, "grad_norm": 41.14870834350586, "learning_rate": 9.825734549138806e-06, "loss": 29.0222, "step": 19252 }, { "epoch": 458.40597014925373, "grad_norm": 35.1260986328125, "learning_rate": 9.825227963525837e-06, "loss": 29.0844, "step": 19253 }, { "epoch": 458.42985074626864, "grad_norm": 36.84624481201172, "learning_rate": 9.824721377912869e-06, "loss": 28.7555, "step": 19254 }, { "epoch": 458.4537313432836, "grad_norm": 32.55091094970703, "learning_rate": 9.8242147922999e-06, "loss": 30.1445, "step": 19255 }, { "epoch": 458.4776119402985, "grad_norm": 39.441463470458984, "learning_rate": 9.823708206686931e-06, "loss": 29.8275, "step": 19256 }, { "epoch": 458.5014925373134, "grad_norm": 30.006322860717773, "learning_rate": 9.823201621073963e-06, "loss": 29.5119, "step": 19257 }, { "epoch": 458.52537313432833, "grad_norm": 40.125423431396484, "learning_rate": 9.822695035460993e-06, "loss": 30.3015, "step": 19258 }, { "epoch": 458.5492537313433, "grad_norm": 32.94392776489258, "learning_rate": 9.822188449848025e-06, "loss": 28.7093, "step": 19259 }, { "epoch": 458.5731343283582, "grad_norm": 36.204490661621094, "learning_rate": 9.821681864235056e-06, "loss": 29.4371, "step": 19260 }, { "epoch": 458.5970149253731, "grad_norm": 34.00215148925781, "learning_rate": 9.821175278622088e-06, "loss": 29.0574, "step": 19261 }, { "epoch": 458.6208955223881, "grad_norm": 36.801551818847656, "learning_rate": 9.82066869300912e-06, "loss": 29.195, "step": 19262 }, { "epoch": 458.644776119403, "grad_norm": 33.25895690917969, "learning_rate": 9.82016210739615e-06, "loss": 29.4306, "step": 19263 }, { "epoch": 458.6686567164179, "grad_norm": 37.70011520385742, "learning_rate": 9.819655521783182e-06, "loss": 29.8465, "step": 19264 }, { "epoch": 458.6925373134328, "grad_norm": 34.15808868408203, "learning_rate": 9.819148936170212e-06, "loss": 29.8913, "step": 19265 }, { "epoch": 458.7164179104478, "grad_norm": 36.8090934753418, "learning_rate": 9.818642350557244e-06, "loss": 28.998, "step": 19266 }, { "epoch": 458.7402985074627, "grad_norm": 30.713571548461914, "learning_rate": 9.818135764944276e-06, "loss": 29.5298, "step": 19267 }, { "epoch": 458.7641791044776, "grad_norm": 41.15922927856445, "learning_rate": 9.817629179331308e-06, "loss": 29.1809, "step": 19268 }, { "epoch": 458.78805970149256, "grad_norm": 36.987361907958984, "learning_rate": 9.817122593718339e-06, "loss": 29.1823, "step": 19269 }, { "epoch": 458.81194029850747, "grad_norm": 40.20362854003906, "learning_rate": 9.81661600810537e-06, "loss": 30.4912, "step": 19270 }, { "epoch": 458.8358208955224, "grad_norm": 38.58114242553711, "learning_rate": 9.816109422492403e-06, "loss": 30.6518, "step": 19271 }, { "epoch": 458.85970149253734, "grad_norm": 33.77410125732422, "learning_rate": 9.815602836879433e-06, "loss": 29.1496, "step": 19272 }, { "epoch": 458.88358208955225, "grad_norm": 30.761463165283203, "learning_rate": 9.815096251266465e-06, "loss": 28.6573, "step": 19273 }, { "epoch": 458.90746268656716, "grad_norm": 33.24839782714844, "learning_rate": 9.814589665653497e-06, "loss": 29.4314, "step": 19274 }, { "epoch": 458.93134328358207, "grad_norm": 32.07191467285156, "learning_rate": 9.814083080040527e-06, "loss": 29.1255, "step": 19275 }, { "epoch": 458.95522388059703, "grad_norm": 41.3092041015625, "learning_rate": 9.81357649442756e-06, "loss": 29.3262, "step": 19276 }, { "epoch": 458.97910447761194, "grad_norm": 36.18317413330078, "learning_rate": 9.813069908814591e-06, "loss": 29.3351, "step": 19277 }, { "epoch": 459.0, "grad_norm": 35.39738082885742, "learning_rate": 9.812563323201622e-06, "loss": 26.0214, "step": 19278 }, { "epoch": 459.0238805970149, "grad_norm": 37.295963287353516, "learning_rate": 9.812056737588654e-06, "loss": 28.6834, "step": 19279 }, { "epoch": 459.0477611940299, "grad_norm": 31.578271865844727, "learning_rate": 9.811550151975686e-06, "loss": 29.5739, "step": 19280 }, { "epoch": 459.0716417910448, "grad_norm": 27.35216522216797, "learning_rate": 9.811043566362716e-06, "loss": 28.6825, "step": 19281 }, { "epoch": 459.0955223880597, "grad_norm": 37.37176513671875, "learning_rate": 9.810536980749748e-06, "loss": 28.9888, "step": 19282 }, { "epoch": 459.1194029850746, "grad_norm": 30.310951232910156, "learning_rate": 9.81003039513678e-06, "loss": 28.5085, "step": 19283 }, { "epoch": 459.14328358208957, "grad_norm": 39.19706344604492, "learning_rate": 9.80952380952381e-06, "loss": 29.9645, "step": 19284 }, { "epoch": 459.1671641791045, "grad_norm": 35.94255065917969, "learning_rate": 9.809017223910842e-06, "loss": 29.7614, "step": 19285 }, { "epoch": 459.1910447761194, "grad_norm": 37.08477783203125, "learning_rate": 9.808510638297873e-06, "loss": 29.1952, "step": 19286 }, { "epoch": 459.21492537313435, "grad_norm": 33.27838897705078, "learning_rate": 9.808004052684905e-06, "loss": 28.5363, "step": 19287 }, { "epoch": 459.23880597014926, "grad_norm": 35.71473693847656, "learning_rate": 9.807497467071935e-06, "loss": 29.5674, "step": 19288 }, { "epoch": 459.26268656716417, "grad_norm": 30.5062255859375, "learning_rate": 9.806990881458967e-06, "loss": 29.0441, "step": 19289 }, { "epoch": 459.28656716417913, "grad_norm": 36.58542251586914, "learning_rate": 9.806484295845999e-06, "loss": 29.3961, "step": 19290 }, { "epoch": 459.31044776119404, "grad_norm": 30.195968627929688, "learning_rate": 9.80597771023303e-06, "loss": 29.7412, "step": 19291 }, { "epoch": 459.33432835820895, "grad_norm": 38.29735565185547, "learning_rate": 9.805471124620061e-06, "loss": 29.0478, "step": 19292 }, { "epoch": 459.35820895522386, "grad_norm": 34.79240417480469, "learning_rate": 9.804964539007093e-06, "loss": 29.5119, "step": 19293 }, { "epoch": 459.3820895522388, "grad_norm": 36.50742721557617, "learning_rate": 9.804457953394124e-06, "loss": 29.515, "step": 19294 }, { "epoch": 459.40597014925373, "grad_norm": 35.28668975830078, "learning_rate": 9.803951367781156e-06, "loss": 30.0612, "step": 19295 }, { "epoch": 459.42985074626864, "grad_norm": 32.858192443847656, "learning_rate": 9.803444782168188e-06, "loss": 29.1399, "step": 19296 }, { "epoch": 459.4537313432836, "grad_norm": 31.381935119628906, "learning_rate": 9.802938196555218e-06, "loss": 29.3441, "step": 19297 }, { "epoch": 459.4776119402985, "grad_norm": 34.095420837402344, "learning_rate": 9.80243161094225e-06, "loss": 29.0445, "step": 19298 }, { "epoch": 459.5014925373134, "grad_norm": 30.9876651763916, "learning_rate": 9.801925025329282e-06, "loss": 29.548, "step": 19299 }, { "epoch": 459.52537313432833, "grad_norm": 37.28546142578125, "learning_rate": 9.801418439716312e-06, "loss": 28.9987, "step": 19300 }, { "epoch": 459.5492537313433, "grad_norm": 35.0363655090332, "learning_rate": 9.800911854103344e-06, "loss": 29.2121, "step": 19301 }, { "epoch": 459.5731343283582, "grad_norm": 36.79004669189453, "learning_rate": 9.800405268490376e-06, "loss": 29.7165, "step": 19302 }, { "epoch": 459.5970149253731, "grad_norm": 36.82136535644531, "learning_rate": 9.799898682877407e-06, "loss": 29.5093, "step": 19303 }, { "epoch": 459.6208955223881, "grad_norm": 34.30350112915039, "learning_rate": 9.799392097264439e-06, "loss": 29.4731, "step": 19304 }, { "epoch": 459.644776119403, "grad_norm": 31.135427474975586, "learning_rate": 9.79888551165147e-06, "loss": 29.3314, "step": 19305 }, { "epoch": 459.6686567164179, "grad_norm": 35.00565719604492, "learning_rate": 9.798378926038501e-06, "loss": 30.3751, "step": 19306 }, { "epoch": 459.6925373134328, "grad_norm": 30.2750244140625, "learning_rate": 9.797872340425533e-06, "loss": 29.2164, "step": 19307 }, { "epoch": 459.7164179104478, "grad_norm": 36.491981506347656, "learning_rate": 9.797365754812565e-06, "loss": 29.4006, "step": 19308 }, { "epoch": 459.7402985074627, "grad_norm": 32.442195892333984, "learning_rate": 9.796859169199595e-06, "loss": 29.1407, "step": 19309 }, { "epoch": 459.7641791044776, "grad_norm": 33.38274002075195, "learning_rate": 9.796352583586627e-06, "loss": 29.8151, "step": 19310 }, { "epoch": 459.78805970149256, "grad_norm": 31.595043182373047, "learning_rate": 9.79584599797366e-06, "loss": 29.9502, "step": 19311 }, { "epoch": 459.81194029850747, "grad_norm": 36.629581451416016, "learning_rate": 9.79533941236069e-06, "loss": 28.7226, "step": 19312 }, { "epoch": 459.8358208955224, "grad_norm": 31.02758026123047, "learning_rate": 9.794832826747722e-06, "loss": 29.0231, "step": 19313 }, { "epoch": 459.85970149253734, "grad_norm": 36.117313385009766, "learning_rate": 9.794326241134752e-06, "loss": 29.4282, "step": 19314 }, { "epoch": 459.88358208955225, "grad_norm": 31.993675231933594, "learning_rate": 9.793819655521784e-06, "loss": 30.027, "step": 19315 }, { "epoch": 459.90746268656716, "grad_norm": 33.16458511352539, "learning_rate": 9.793313069908814e-06, "loss": 29.4404, "step": 19316 }, { "epoch": 459.93134328358207, "grad_norm": 31.433929443359375, "learning_rate": 9.792806484295846e-06, "loss": 29.2099, "step": 19317 }, { "epoch": 459.95522388059703, "grad_norm": 40.249210357666016, "learning_rate": 9.792299898682878e-06, "loss": 29.6706, "step": 19318 }, { "epoch": 459.97910447761194, "grad_norm": 33.58461380004883, "learning_rate": 9.791793313069909e-06, "loss": 28.7832, "step": 19319 }, { "epoch": 460.0, "grad_norm": 31.575824737548828, "learning_rate": 9.79128672745694e-06, "loss": 25.6894, "step": 19320 }, { "epoch": 460.0238805970149, "grad_norm": 30.184171676635742, "learning_rate": 9.790780141843973e-06, "loss": 28.3271, "step": 19321 }, { "epoch": 460.0477611940299, "grad_norm": 33.15692138671875, "learning_rate": 9.790273556231003e-06, "loss": 28.7778, "step": 19322 }, { "epoch": 460.0716417910448, "grad_norm": 26.79400634765625, "learning_rate": 9.789766970618035e-06, "loss": 28.7622, "step": 19323 }, { "epoch": 460.0955223880597, "grad_norm": 36.866878509521484, "learning_rate": 9.789260385005067e-06, "loss": 30.1443, "step": 19324 }, { "epoch": 460.1194029850746, "grad_norm": 30.687620162963867, "learning_rate": 9.788753799392097e-06, "loss": 30.81, "step": 19325 }, { "epoch": 460.14328358208957, "grad_norm": 37.53904342651367, "learning_rate": 9.78824721377913e-06, "loss": 29.9662, "step": 19326 }, { "epoch": 460.1671641791045, "grad_norm": 35.032127380371094, "learning_rate": 9.787740628166162e-06, "loss": 29.2176, "step": 19327 }, { "epoch": 460.1910447761194, "grad_norm": 34.697017669677734, "learning_rate": 9.787234042553192e-06, "loss": 29.2221, "step": 19328 }, { "epoch": 460.21492537313435, "grad_norm": 33.518943786621094, "learning_rate": 9.786727456940224e-06, "loss": 29.8943, "step": 19329 }, { "epoch": 460.23880597014926, "grad_norm": 37.450416564941406, "learning_rate": 9.786220871327256e-06, "loss": 28.7407, "step": 19330 }, { "epoch": 460.26268656716417, "grad_norm": 29.826570510864258, "learning_rate": 9.785714285714286e-06, "loss": 28.6267, "step": 19331 }, { "epoch": 460.28656716417913, "grad_norm": 39.43460464477539, "learning_rate": 9.785207700101318e-06, "loss": 28.909, "step": 19332 }, { "epoch": 460.31044776119404, "grad_norm": 33.69282531738281, "learning_rate": 9.78470111448835e-06, "loss": 29.2969, "step": 19333 }, { "epoch": 460.33432835820895, "grad_norm": 37.60845947265625, "learning_rate": 9.78419452887538e-06, "loss": 28.9752, "step": 19334 }, { "epoch": 460.35820895522386, "grad_norm": 31.867868423461914, "learning_rate": 9.783687943262413e-06, "loss": 28.0368, "step": 19335 }, { "epoch": 460.3820895522388, "grad_norm": 34.84505844116211, "learning_rate": 9.783181357649445e-06, "loss": 29.4816, "step": 19336 }, { "epoch": 460.40597014925373, "grad_norm": 30.14092254638672, "learning_rate": 9.782674772036475e-06, "loss": 29.1079, "step": 19337 }, { "epoch": 460.42985074626864, "grad_norm": 36.451419830322266, "learning_rate": 9.782168186423507e-06, "loss": 29.3205, "step": 19338 }, { "epoch": 460.4537313432836, "grad_norm": 32.26408386230469, "learning_rate": 9.781661600810539e-06, "loss": 30.4088, "step": 19339 }, { "epoch": 460.4776119402985, "grad_norm": 34.030948638916016, "learning_rate": 9.78115501519757e-06, "loss": 28.0962, "step": 19340 }, { "epoch": 460.5014925373134, "grad_norm": 35.145965576171875, "learning_rate": 9.780648429584601e-06, "loss": 30.4691, "step": 19341 }, { "epoch": 460.52537313432833, "grad_norm": 32.93907928466797, "learning_rate": 9.780141843971632e-06, "loss": 28.5116, "step": 19342 }, { "epoch": 460.5492537313433, "grad_norm": 29.416433334350586, "learning_rate": 9.779635258358664e-06, "loss": 30.0241, "step": 19343 }, { "epoch": 460.5731343283582, "grad_norm": 38.05205154418945, "learning_rate": 9.779128672745694e-06, "loss": 28.6561, "step": 19344 }, { "epoch": 460.5970149253731, "grad_norm": 34.09502029418945, "learning_rate": 9.778622087132726e-06, "loss": 29.3852, "step": 19345 }, { "epoch": 460.6208955223881, "grad_norm": 33.65375518798828, "learning_rate": 9.778115501519758e-06, "loss": 29.7214, "step": 19346 }, { "epoch": 460.644776119403, "grad_norm": 31.7790584564209, "learning_rate": 9.777608915906788e-06, "loss": 29.0519, "step": 19347 }, { "epoch": 460.6686567164179, "grad_norm": 30.615726470947266, "learning_rate": 9.77710233029382e-06, "loss": 28.2639, "step": 19348 }, { "epoch": 460.6925373134328, "grad_norm": 27.758419036865234, "learning_rate": 9.776595744680852e-06, "loss": 28.7136, "step": 19349 }, { "epoch": 460.7164179104478, "grad_norm": 34.94424819946289, "learning_rate": 9.776089159067883e-06, "loss": 30.874, "step": 19350 }, { "epoch": 460.7402985074627, "grad_norm": 30.509201049804688, "learning_rate": 9.775582573454915e-06, "loss": 30.1014, "step": 19351 }, { "epoch": 460.7641791044776, "grad_norm": 35.772064208984375, "learning_rate": 9.775075987841947e-06, "loss": 29.6099, "step": 19352 }, { "epoch": 460.78805970149256, "grad_norm": 32.06110382080078, "learning_rate": 9.774569402228977e-06, "loss": 30.0304, "step": 19353 }, { "epoch": 460.81194029850747, "grad_norm": 33.029048919677734, "learning_rate": 9.774062816616009e-06, "loss": 29.8923, "step": 19354 }, { "epoch": 460.8358208955224, "grad_norm": 32.55839538574219, "learning_rate": 9.773556231003041e-06, "loss": 29.0146, "step": 19355 }, { "epoch": 460.85970149253734, "grad_norm": 34.50947952270508, "learning_rate": 9.773049645390071e-06, "loss": 29.749, "step": 19356 }, { "epoch": 460.88358208955225, "grad_norm": 29.599512100219727, "learning_rate": 9.772543059777103e-06, "loss": 27.8364, "step": 19357 }, { "epoch": 460.90746268656716, "grad_norm": 37.348140716552734, "learning_rate": 9.772036474164135e-06, "loss": 30.0731, "step": 19358 }, { "epoch": 460.93134328358207, "grad_norm": 28.60744857788086, "learning_rate": 9.771529888551166e-06, "loss": 29.0893, "step": 19359 }, { "epoch": 460.95522388059703, "grad_norm": 33.496456146240234, "learning_rate": 9.771023302938198e-06, "loss": 29.0412, "step": 19360 }, { "epoch": 460.97910447761194, "grad_norm": 34.302249908447266, "learning_rate": 9.77051671732523e-06, "loss": 30.0188, "step": 19361 }, { "epoch": 461.0, "grad_norm": 32.09307098388672, "learning_rate": 9.77001013171226e-06, "loss": 25.984, "step": 19362 }, { "epoch": 461.0238805970149, "grad_norm": 31.677602767944336, "learning_rate": 9.769503546099292e-06, "loss": 30.3131, "step": 19363 }, { "epoch": 461.0477611940299, "grad_norm": 28.644956588745117, "learning_rate": 9.768996960486324e-06, "loss": 29.6966, "step": 19364 }, { "epoch": 461.0716417910448, "grad_norm": 28.267742156982422, "learning_rate": 9.768490374873354e-06, "loss": 29.4412, "step": 19365 }, { "epoch": 461.0955223880597, "grad_norm": 36.51250076293945, "learning_rate": 9.767983789260386e-06, "loss": 30.5337, "step": 19366 }, { "epoch": 461.1194029850746, "grad_norm": 26.163394927978516, "learning_rate": 9.767477203647418e-06, "loss": 29.1519, "step": 19367 }, { "epoch": 461.14328358208957, "grad_norm": 39.848453521728516, "learning_rate": 9.766970618034449e-06, "loss": 30.2117, "step": 19368 }, { "epoch": 461.1671641791045, "grad_norm": 31.233781814575195, "learning_rate": 9.76646403242148e-06, "loss": 27.8491, "step": 19369 }, { "epoch": 461.1910447761194, "grad_norm": 31.696020126342773, "learning_rate": 9.765957446808511e-06, "loss": 29.4443, "step": 19370 }, { "epoch": 461.21492537313435, "grad_norm": 31.025056838989258, "learning_rate": 9.765450861195543e-06, "loss": 29.1664, "step": 19371 }, { "epoch": 461.23880597014926, "grad_norm": 32.986541748046875, "learning_rate": 9.764944275582573e-06, "loss": 29.5053, "step": 19372 }, { "epoch": 461.26268656716417, "grad_norm": 28.400985717773438, "learning_rate": 9.764437689969605e-06, "loss": 28.0785, "step": 19373 }, { "epoch": 461.28656716417913, "grad_norm": 34.50825881958008, "learning_rate": 9.763931104356637e-06, "loss": 29.5711, "step": 19374 }, { "epoch": 461.31044776119404, "grad_norm": 27.274003982543945, "learning_rate": 9.763424518743668e-06, "loss": 28.6607, "step": 19375 }, { "epoch": 461.33432835820895, "grad_norm": 35.14054489135742, "learning_rate": 9.7629179331307e-06, "loss": 29.7253, "step": 19376 }, { "epoch": 461.35820895522386, "grad_norm": 28.780384063720703, "learning_rate": 9.762411347517732e-06, "loss": 29.4841, "step": 19377 }, { "epoch": 461.3820895522388, "grad_norm": NaN, "learning_rate": 9.761904761904762e-06, "loss": 50.2796, "step": 19378 }, { "epoch": 461.40597014925373, "grad_norm": 41.20328903198242, "learning_rate": 9.761904761904762e-06, "loss": 29.3711, "step": 19379 }, { "epoch": 461.42985074626864, "grad_norm": 35.50848388671875, "learning_rate": 9.761398176291794e-06, "loss": 30.178, "step": 19380 }, { "epoch": 461.4537313432836, "grad_norm": 36.41617965698242, "learning_rate": 9.760891590678826e-06, "loss": 30.4492, "step": 19381 }, { "epoch": 461.4776119402985, "grad_norm": 32.988197326660156, "learning_rate": 9.760385005065856e-06, "loss": 29.2368, "step": 19382 }, { "epoch": 461.5014925373134, "grad_norm": 33.786529541015625, "learning_rate": 9.759878419452888e-06, "loss": 29.0346, "step": 19383 }, { "epoch": 461.52537313432833, "grad_norm": 30.287538528442383, "learning_rate": 9.75937183383992e-06, "loss": 29.9288, "step": 19384 }, { "epoch": 461.5492537313433, "grad_norm": 38.34580612182617, "learning_rate": 9.75886524822695e-06, "loss": 29.451, "step": 19385 }, { "epoch": 461.5731343283582, "grad_norm": 33.06439208984375, "learning_rate": 9.758358662613983e-06, "loss": 28.9868, "step": 19386 }, { "epoch": 461.5970149253731, "grad_norm": 32.587669372558594, "learning_rate": 9.757852077001015e-06, "loss": 29.1071, "step": 19387 }, { "epoch": 461.6208955223881, "grad_norm": 29.959531784057617, "learning_rate": 9.757345491388045e-06, "loss": 29.1486, "step": 19388 }, { "epoch": 461.644776119403, "grad_norm": 35.33243942260742, "learning_rate": 9.756838905775077e-06, "loss": 29.237, "step": 19389 }, { "epoch": 461.6686567164179, "grad_norm": 28.44919776916504, "learning_rate": 9.756332320162109e-06, "loss": 29.1829, "step": 19390 }, { "epoch": 461.6925373134328, "grad_norm": 36.514041900634766, "learning_rate": 9.75582573454914e-06, "loss": 29.6295, "step": 19391 }, { "epoch": 461.7164179104478, "grad_norm": 32.30393981933594, "learning_rate": 9.755319148936171e-06, "loss": 29.6223, "step": 19392 }, { "epoch": 461.7402985074627, "grad_norm": 30.371519088745117, "learning_rate": 9.754812563323203e-06, "loss": 28.8952, "step": 19393 }, { "epoch": 461.7641791044776, "grad_norm": 28.29704475402832, "learning_rate": 9.754305977710234e-06, "loss": 27.955, "step": 19394 }, { "epoch": 461.78805970149256, "grad_norm": 32.753814697265625, "learning_rate": 9.753799392097266e-06, "loss": 28.841, "step": 19395 }, { "epoch": 461.81194029850747, "grad_norm": 28.19756317138672, "learning_rate": 9.753292806484298e-06, "loss": 28.5373, "step": 19396 }, { "epoch": 461.8358208955224, "grad_norm": 33.99843215942383, "learning_rate": 9.752786220871328e-06, "loss": 30.3151, "step": 19397 }, { "epoch": 461.85970149253734, "grad_norm": 27.546531677246094, "learning_rate": 9.75227963525836e-06, "loss": 29.3253, "step": 19398 }, { "epoch": 461.88358208955225, "grad_norm": 34.28022384643555, "learning_rate": 9.75177304964539e-06, "loss": 29.5096, "step": 19399 }, { "epoch": 461.90746268656716, "grad_norm": 30.510034561157227, "learning_rate": 9.751266464032422e-06, "loss": 29.6538, "step": 19400 }, { "epoch": 461.93134328358207, "grad_norm": 36.70356750488281, "learning_rate": 9.750759878419453e-06, "loss": 29.45, "step": 19401 }, { "epoch": 461.95522388059703, "grad_norm": 28.712860107421875, "learning_rate": 9.750253292806485e-06, "loss": 29.2148, "step": 19402 }, { "epoch": 461.97910447761194, "grad_norm": 29.592735290527344, "learning_rate": 9.749746707193517e-06, "loss": 28.6936, "step": 19403 }, { "epoch": 462.0, "grad_norm": 26.34160041809082, "learning_rate": 9.749240121580547e-06, "loss": 25.8545, "step": 19404 }, { "epoch": 462.0238805970149, "grad_norm": 35.77501678466797, "learning_rate": 9.748733535967579e-06, "loss": 29.5629, "step": 19405 }, { "epoch": 462.0477611940299, "grad_norm": 29.49849510192871, "learning_rate": 9.748226950354611e-06, "loss": 29.086, "step": 19406 }, { "epoch": 462.0716417910448, "grad_norm": 35.121341705322266, "learning_rate": 9.747720364741641e-06, "loss": 28.7888, "step": 19407 }, { "epoch": 462.0955223880597, "grad_norm": 29.353239059448242, "learning_rate": 9.747213779128673e-06, "loss": 29.4343, "step": 19408 }, { "epoch": 462.1194029850746, "grad_norm": 32.75098419189453, "learning_rate": 9.746707193515705e-06, "loss": 29.3764, "step": 19409 }, { "epoch": 462.14328358208957, "grad_norm": 28.075088500976562, "learning_rate": 9.746200607902736e-06, "loss": 28.7701, "step": 19410 }, { "epoch": 462.1671641791045, "grad_norm": 33.91477966308594, "learning_rate": 9.745694022289768e-06, "loss": 29.388, "step": 19411 }, { "epoch": 462.1910447761194, "grad_norm": 26.130840301513672, "learning_rate": 9.7451874366768e-06, "loss": 29.4576, "step": 19412 }, { "epoch": 462.21492537313435, "grad_norm": 33.45295715332031, "learning_rate": 9.74468085106383e-06, "loss": 29.7176, "step": 19413 }, { "epoch": 462.23880597014926, "grad_norm": 27.171737670898438, "learning_rate": 9.744174265450862e-06, "loss": 29.0653, "step": 19414 }, { "epoch": 462.26268656716417, "grad_norm": 34.54220199584961, "learning_rate": 9.743667679837894e-06, "loss": 28.7225, "step": 19415 }, { "epoch": 462.28656716417913, "grad_norm": 30.338260650634766, "learning_rate": 9.743161094224924e-06, "loss": 29.1378, "step": 19416 }, { "epoch": 462.31044776119404, "grad_norm": 37.82553482055664, "learning_rate": 9.742654508611956e-06, "loss": 30.1956, "step": 19417 }, { "epoch": 462.33432835820895, "grad_norm": NaN, "learning_rate": 9.742147922998988e-06, "loss": 26.3465, "step": 19418 }, { "epoch": 462.35820895522386, "grad_norm": 32.55746841430664, "learning_rate": 9.742147922998988e-06, "loss": 30.3732, "step": 19419 }, { "epoch": 462.3820895522388, "grad_norm": 34.20072937011719, "learning_rate": 9.741641337386019e-06, "loss": 29.666, "step": 19420 }, { "epoch": 462.40597014925373, "grad_norm": 28.92413902282715, "learning_rate": 9.74113475177305e-06, "loss": 29.4213, "step": 19421 }, { "epoch": 462.42985074626864, "grad_norm": 28.41001319885254, "learning_rate": 9.740628166160083e-06, "loss": 29.2171, "step": 19422 }, { "epoch": 462.4537313432836, "grad_norm": 24.280712127685547, "learning_rate": 9.740121580547113e-06, "loss": 29.2171, "step": 19423 }, { "epoch": 462.4776119402985, "grad_norm": 32.105892181396484, "learning_rate": 9.739614994934145e-06, "loss": 29.5183, "step": 19424 }, { "epoch": 462.5014925373134, "grad_norm": 25.65811538696289, "learning_rate": 9.739108409321177e-06, "loss": 29.696, "step": 19425 }, { "epoch": 462.52537313432833, "grad_norm": 33.12766647338867, "learning_rate": 9.738601823708207e-06, "loss": 29.4041, "step": 19426 }, { "epoch": 462.5492537313433, "grad_norm": 26.562183380126953, "learning_rate": 9.73809523809524e-06, "loss": 28.8242, "step": 19427 }, { "epoch": 462.5731343283582, "grad_norm": 32.0421142578125, "learning_rate": 9.73758865248227e-06, "loss": 29.5225, "step": 19428 }, { "epoch": 462.5970149253731, "grad_norm": 24.110591888427734, "learning_rate": 9.737082066869302e-06, "loss": 28.7345, "step": 19429 }, { "epoch": 462.6208955223881, "grad_norm": 31.64080810546875, "learning_rate": 9.736575481256332e-06, "loss": 28.6838, "step": 19430 }, { "epoch": 462.644776119403, "grad_norm": 29.083173751831055, "learning_rate": 9.736068895643364e-06, "loss": 29.762, "step": 19431 }, { "epoch": 462.6686567164179, "grad_norm": 34.15278625488281, "learning_rate": 9.735562310030396e-06, "loss": 30.3489, "step": 19432 }, { "epoch": 462.6925373134328, "grad_norm": 28.977872848510742, "learning_rate": 9.735055724417426e-06, "loss": 28.8644, "step": 19433 }, { "epoch": 462.7164179104478, "grad_norm": 29.12445640563965, "learning_rate": 9.734549138804458e-06, "loss": 29.4317, "step": 19434 }, { "epoch": 462.7402985074627, "grad_norm": 27.41645050048828, "learning_rate": 9.73404255319149e-06, "loss": 29.7058, "step": 19435 }, { "epoch": 462.7641791044776, "grad_norm": 23.647470474243164, "learning_rate": 9.73353596757852e-06, "loss": 28.5939, "step": 19436 }, { "epoch": 462.78805970149256, "grad_norm": 20.839101791381836, "learning_rate": 9.733029381965553e-06, "loss": 28.7688, "step": 19437 }, { "epoch": 462.81194029850747, "grad_norm": 26.324705123901367, "learning_rate": 9.732522796352585e-06, "loss": 28.4858, "step": 19438 }, { "epoch": 462.8358208955224, "grad_norm": 19.894611358642578, "learning_rate": 9.732016210739615e-06, "loss": 28.8421, "step": 19439 }, { "epoch": 462.85970149253734, "grad_norm": 32.89299392700195, "learning_rate": 9.731509625126647e-06, "loss": 29.0529, "step": 19440 }, { "epoch": 462.88358208955225, "grad_norm": 24.56739044189453, "learning_rate": 9.731003039513679e-06, "loss": 29.4563, "step": 19441 }, { "epoch": 462.90746268656716, "grad_norm": 25.414873123168945, "learning_rate": 9.73049645390071e-06, "loss": 29.3494, "step": 19442 }, { "epoch": 462.93134328358207, "grad_norm": 24.166391372680664, "learning_rate": 9.729989868287741e-06, "loss": 29.2338, "step": 19443 }, { "epoch": 462.95522388059703, "grad_norm": 25.127506256103516, "learning_rate": 9.729483282674773e-06, "loss": 29.11, "step": 19444 }, { "epoch": 462.97910447761194, "grad_norm": 23.257104873657227, "learning_rate": 9.728976697061804e-06, "loss": 29.7996, "step": 19445 }, { "epoch": 463.0, "grad_norm": 21.48944664001465, "learning_rate": 9.728470111448836e-06, "loss": 25.508, "step": 19446 }, { "epoch": 463.0238805970149, "grad_norm": 23.019773483276367, "learning_rate": 9.727963525835868e-06, "loss": 28.4003, "step": 19447 }, { "epoch": 463.0477611940299, "grad_norm": 24.27079963684082, "learning_rate": 9.727456940222898e-06, "loss": 29.4585, "step": 19448 }, { "epoch": 463.0716417910448, "grad_norm": 23.10042953491211, "learning_rate": 9.72695035460993e-06, "loss": 29.0502, "step": 19449 }, { "epoch": 463.0955223880597, "grad_norm": 21.899099349975586, "learning_rate": 9.726443768996962e-06, "loss": 28.3503, "step": 19450 }, { "epoch": 463.1194029850746, "grad_norm": 22.890586853027344, "learning_rate": 9.725937183383992e-06, "loss": 29.0034, "step": 19451 }, { "epoch": 463.14328358208957, "grad_norm": 23.165586471557617, "learning_rate": 9.725430597771024e-06, "loss": 29.4389, "step": 19452 }, { "epoch": 463.1671641791045, "grad_norm": 25.238155364990234, "learning_rate": 9.724924012158056e-06, "loss": 28.4063, "step": 19453 }, { "epoch": 463.1910447761194, "grad_norm": 21.789159774780273, "learning_rate": 9.724417426545087e-06, "loss": 29.3011, "step": 19454 }, { "epoch": 463.21492537313435, "grad_norm": 22.825904846191406, "learning_rate": 9.723910840932119e-06, "loss": 29.2173, "step": 19455 }, { "epoch": 463.23880597014926, "grad_norm": 19.504039764404297, "learning_rate": 9.723404255319149e-06, "loss": 29.7002, "step": 19456 }, { "epoch": 463.26268656716417, "grad_norm": 22.882749557495117, "learning_rate": 9.722897669706181e-06, "loss": 28.389, "step": 19457 }, { "epoch": 463.28656716417913, "grad_norm": 22.095870971679688, "learning_rate": 9.722391084093213e-06, "loss": 29.2173, "step": 19458 }, { "epoch": 463.31044776119404, "grad_norm": 20.926246643066406, "learning_rate": 9.721884498480243e-06, "loss": 28.6776, "step": 19459 }, { "epoch": 463.33432835820895, "grad_norm": 20.549245834350586, "learning_rate": 9.721377912867275e-06, "loss": 29.2315, "step": 19460 }, { "epoch": 463.35820895522386, "grad_norm": 17.710586547851562, "learning_rate": 9.720871327254306e-06, "loss": 30.3142, "step": 19461 }, { "epoch": 463.3820895522388, "grad_norm": 22.811368942260742, "learning_rate": 9.720364741641338e-06, "loss": 28.8098, "step": 19462 }, { "epoch": 463.40597014925373, "grad_norm": 17.41098976135254, "learning_rate": 9.71985815602837e-06, "loss": 30.1459, "step": 19463 }, { "epoch": 463.42985074626864, "grad_norm": 27.67725944519043, "learning_rate": 9.7193515704154e-06, "loss": 31.1802, "step": 19464 }, { "epoch": 463.4537313432836, "grad_norm": 22.67815399169922, "learning_rate": 9.718844984802432e-06, "loss": 28.6566, "step": 19465 }, { "epoch": 463.4776119402985, "grad_norm": 20.139501571655273, "learning_rate": 9.718338399189464e-06, "loss": 29.76, "step": 19466 }, { "epoch": 463.5014925373134, "grad_norm": 22.284563064575195, "learning_rate": 9.717831813576494e-06, "loss": 29.1833, "step": 19467 }, { "epoch": 463.52537313432833, "grad_norm": 22.907533645629883, "learning_rate": 9.717325227963526e-06, "loss": 29.7265, "step": 19468 }, { "epoch": 463.5492537313433, "grad_norm": 20.395618438720703, "learning_rate": 9.716818642350559e-06, "loss": 30.1453, "step": 19469 }, { "epoch": 463.5731343283582, "grad_norm": 20.52897071838379, "learning_rate": 9.716312056737589e-06, "loss": 28.7555, "step": 19470 }, { "epoch": 463.5970149253731, "grad_norm": 20.452720642089844, "learning_rate": 9.71580547112462e-06, "loss": 29.6042, "step": 19471 }, { "epoch": 463.6208955223881, "grad_norm": 20.722702026367188, "learning_rate": 9.715298885511653e-06, "loss": 28.3004, "step": 19472 }, { "epoch": 463.644776119403, "grad_norm": 17.714391708374023, "learning_rate": 9.714792299898683e-06, "loss": 29.5908, "step": 19473 }, { "epoch": 463.6686567164179, "grad_norm": 21.52263069152832, "learning_rate": 9.714285714285715e-06, "loss": 29.5977, "step": 19474 }, { "epoch": 463.6925373134328, "grad_norm": 20.687578201293945, "learning_rate": 9.713779128672747e-06, "loss": 29.5179, "step": 19475 }, { "epoch": 463.7164179104478, "grad_norm": 20.711994171142578, "learning_rate": 9.713272543059778e-06, "loss": 29.7797, "step": 19476 }, { "epoch": 463.7402985074627, "grad_norm": 22.17572593688965, "learning_rate": 9.71276595744681e-06, "loss": 28.8559, "step": 19477 }, { "epoch": 463.7641791044776, "grad_norm": 20.257568359375, "learning_rate": 9.712259371833842e-06, "loss": 28.7053, "step": 19478 }, { "epoch": 463.78805970149256, "grad_norm": 21.96816635131836, "learning_rate": 9.711752786220872e-06, "loss": 30.7084, "step": 19479 }, { "epoch": 463.81194029850747, "grad_norm": 18.239877700805664, "learning_rate": 9.711246200607904e-06, "loss": 29.1026, "step": 19480 }, { "epoch": 463.8358208955224, "grad_norm": 17.588346481323242, "learning_rate": 9.710739614994936e-06, "loss": 29.2268, "step": 19481 }, { "epoch": 463.85970149253734, "grad_norm": 18.30121612548828, "learning_rate": 9.710233029381966e-06, "loss": 28.9174, "step": 19482 }, { "epoch": 463.88358208955225, "grad_norm": 20.780614852905273, "learning_rate": 9.709726443768998e-06, "loss": 30.2715, "step": 19483 }, { "epoch": 463.90746268656716, "grad_norm": 21.29032325744629, "learning_rate": 9.709219858156029e-06, "loss": 29.2584, "step": 19484 }, { "epoch": 463.93134328358207, "grad_norm": 23.783409118652344, "learning_rate": 9.70871327254306e-06, "loss": 29.6888, "step": 19485 }, { "epoch": 463.95522388059703, "grad_norm": 23.26983070373535, "learning_rate": 9.708206686930093e-06, "loss": 28.9931, "step": 19486 }, { "epoch": 463.97910447761194, "grad_norm": 20.583187103271484, "learning_rate": 9.707700101317123e-06, "loss": 28.4984, "step": 19487 }, { "epoch": 464.0, "grad_norm": 16.919492721557617, "learning_rate": 9.707193515704155e-06, "loss": 25.5655, "step": 19488 }, { "epoch": 464.0238805970149, "grad_norm": 20.25433921813965, "learning_rate": 9.706686930091185e-06, "loss": 28.5134, "step": 19489 }, { "epoch": 464.0477611940299, "grad_norm": 21.577367782592773, "learning_rate": 9.706180344478217e-06, "loss": 29.8885, "step": 19490 }, { "epoch": 464.0716417910448, "grad_norm": 20.47617530822754, "learning_rate": 9.70567375886525e-06, "loss": 29.0789, "step": 19491 }, { "epoch": 464.0955223880597, "grad_norm": 18.172958374023438, "learning_rate": 9.70516717325228e-06, "loss": 28.8886, "step": 19492 }, { "epoch": 464.1194029850746, "grad_norm": 19.9902286529541, "learning_rate": 9.704660587639312e-06, "loss": 28.4007, "step": 19493 }, { "epoch": 464.14328358208957, "grad_norm": 20.412309646606445, "learning_rate": 9.704154002026344e-06, "loss": 29.306, "step": 19494 }, { "epoch": 464.1671641791045, "grad_norm": 21.970645904541016, "learning_rate": 9.703647416413374e-06, "loss": 29.5945, "step": 19495 }, { "epoch": 464.1910447761194, "grad_norm": 19.769027709960938, "learning_rate": 9.703140830800406e-06, "loss": 28.7612, "step": 19496 }, { "epoch": 464.21492537313435, "grad_norm": 20.189281463623047, "learning_rate": 9.702634245187438e-06, "loss": 29.7843, "step": 19497 }, { "epoch": 464.23880597014926, "grad_norm": 21.913818359375, "learning_rate": 9.702127659574468e-06, "loss": 28.3025, "step": 19498 }, { "epoch": 464.26268656716417, "grad_norm": 18.779232025146484, "learning_rate": 9.7016210739615e-06, "loss": 29.5857, "step": 19499 }, { "epoch": 464.28656716417913, "grad_norm": 20.621723175048828, "learning_rate": 9.701114488348532e-06, "loss": 29.2109, "step": 19500 }, { "epoch": 464.31044776119404, "grad_norm": 18.507545471191406, "learning_rate": 9.700607902735563e-06, "loss": 29.3919, "step": 19501 }, { "epoch": 464.33432835820895, "grad_norm": 20.707616806030273, "learning_rate": 9.700101317122595e-06, "loss": 29.6756, "step": 19502 }, { "epoch": 464.35820895522386, "grad_norm": 22.391029357910156, "learning_rate": 9.699594731509627e-06, "loss": 30.0252, "step": 19503 }, { "epoch": 464.3820895522388, "grad_norm": 20.694711685180664, "learning_rate": 9.699088145896657e-06, "loss": 29.2355, "step": 19504 }, { "epoch": 464.40597014925373, "grad_norm": 17.417152404785156, "learning_rate": 9.698581560283689e-06, "loss": 28.9382, "step": 19505 }, { "epoch": 464.42985074626864, "grad_norm": 27.688743591308594, "learning_rate": 9.698074974670721e-06, "loss": 29.5859, "step": 19506 }, { "epoch": 464.4537313432836, "grad_norm": 20.634483337402344, "learning_rate": 9.697568389057753e-06, "loss": 29.8447, "step": 19507 }, { "epoch": 464.4776119402985, "grad_norm": 18.68338394165039, "learning_rate": 9.697061803444783e-06, "loss": 29.4837, "step": 19508 }, { "epoch": 464.5014925373134, "grad_norm": 20.74565887451172, "learning_rate": 9.696555217831815e-06, "loss": 30.6587, "step": 19509 }, { "epoch": 464.52537313432833, "grad_norm": 19.485794067382812, "learning_rate": 9.696048632218846e-06, "loss": 28.5228, "step": 19510 }, { "epoch": 464.5492537313433, "grad_norm": 21.990123748779297, "learning_rate": 9.695542046605878e-06, "loss": 29.5229, "step": 19511 }, { "epoch": 464.5731343283582, "grad_norm": 19.95937728881836, "learning_rate": 9.695035460992908e-06, "loss": 29.4375, "step": 19512 }, { "epoch": 464.5970149253731, "grad_norm": 21.43914222717285, "learning_rate": 9.69452887537994e-06, "loss": 29.5426, "step": 19513 }, { "epoch": 464.6208955223881, "grad_norm": 19.22632598876953, "learning_rate": 9.694022289766972e-06, "loss": 29.4911, "step": 19514 }, { "epoch": 464.644776119403, "grad_norm": 20.98499870300293, "learning_rate": 9.693515704154002e-06, "loss": 29.4667, "step": 19515 }, { "epoch": 464.6686567164179, "grad_norm": 26.57865333557129, "learning_rate": 9.693009118541034e-06, "loss": 29.1769, "step": 19516 }, { "epoch": 464.6925373134328, "grad_norm": 22.837032318115234, "learning_rate": 9.692502532928065e-06, "loss": 29.3953, "step": 19517 }, { "epoch": 464.7164179104478, "grad_norm": 17.518875122070312, "learning_rate": 9.691995947315097e-06, "loss": 28.0768, "step": 19518 }, { "epoch": 464.7402985074627, "grad_norm": 21.7294921875, "learning_rate": 9.691489361702129e-06, "loss": 29.7332, "step": 19519 }, { "epoch": 464.7641791044776, "grad_norm": 20.465078353881836, "learning_rate": 9.690982776089159e-06, "loss": 29.516, "step": 19520 }, { "epoch": 464.78805970149256, "grad_norm": 21.908279418945312, "learning_rate": 9.690476190476191e-06, "loss": 29.3488, "step": 19521 }, { "epoch": 464.81194029850747, "grad_norm": 20.265151977539062, "learning_rate": 9.689969604863223e-06, "loss": 30.1959, "step": 19522 }, { "epoch": 464.8358208955224, "grad_norm": 19.762197494506836, "learning_rate": 9.689463019250253e-06, "loss": 28.0801, "step": 19523 }, { "epoch": 464.85970149253734, "grad_norm": 17.311105728149414, "learning_rate": 9.688956433637285e-06, "loss": 28.6032, "step": 19524 }, { "epoch": 464.88358208955225, "grad_norm": 21.487136840820312, "learning_rate": 9.688449848024317e-06, "loss": 29.0696, "step": 19525 }, { "epoch": 464.90746268656716, "grad_norm": 18.609182357788086, "learning_rate": 9.687943262411348e-06, "loss": 28.3605, "step": 19526 }, { "epoch": 464.93134328358207, "grad_norm": 22.429649353027344, "learning_rate": 9.68743667679838e-06, "loss": 29.6709, "step": 19527 }, { "epoch": 464.95522388059703, "grad_norm": NaN, "learning_rate": 9.686930091185412e-06, "loss": 31.8701, "step": 19528 }, { "epoch": 464.97910447761194, "grad_norm": 20.860506057739258, "learning_rate": 9.686930091185412e-06, "loss": 28.9315, "step": 19529 }, { "epoch": 465.0, "grad_norm": 18.168445587158203, "learning_rate": 9.686423505572442e-06, "loss": 26.245, "step": 19530 }, { "epoch": 465.0238805970149, "grad_norm": 19.087047576904297, "learning_rate": 9.685916919959474e-06, "loss": 28.8313, "step": 19531 }, { "epoch": 465.0477611940299, "grad_norm": 21.82554054260254, "learning_rate": 9.685410334346506e-06, "loss": 30.0893, "step": 19532 }, { "epoch": 465.0716417910448, "grad_norm": NaN, "learning_rate": 9.684903748733536e-06, "loss": 44.0871, "step": 19533 }, { "epoch": 465.0955223880597, "grad_norm": 20.99284553527832, "learning_rate": 9.684903748733536e-06, "loss": 29.0612, "step": 19534 }, { "epoch": 465.1194029850746, "grad_norm": 19.966144561767578, "learning_rate": 9.684397163120568e-06, "loss": 28.7009, "step": 19535 }, { "epoch": 465.14328358208957, "grad_norm": 21.824159622192383, "learning_rate": 9.6838905775076e-06, "loss": 30.2687, "step": 19536 }, { "epoch": 465.1671641791045, "grad_norm": 21.857791900634766, "learning_rate": 9.683383991894632e-06, "loss": 29.7353, "step": 19537 }, { "epoch": 465.1910447761194, "grad_norm": 20.229259490966797, "learning_rate": 9.682877406281663e-06, "loss": 28.7022, "step": 19538 }, { "epoch": 465.21492537313435, "grad_norm": 19.548921585083008, "learning_rate": 9.682370820668695e-06, "loss": 29.4159, "step": 19539 }, { "epoch": 465.23880597014926, "grad_norm": 18.370994567871094, "learning_rate": 9.681864235055725e-06, "loss": 28.1805, "step": 19540 }, { "epoch": 465.26268656716417, "grad_norm": 20.095266342163086, "learning_rate": 9.681357649442757e-06, "loss": 28.8881, "step": 19541 }, { "epoch": 465.28656716417913, "grad_norm": 18.214750289916992, "learning_rate": 9.680851063829787e-06, "loss": 27.6625, "step": 19542 }, { "epoch": 465.31044776119404, "grad_norm": 20.37249755859375, "learning_rate": 9.68034447821682e-06, "loss": 29.2554, "step": 19543 }, { "epoch": 465.33432835820895, "grad_norm": 17.515926361083984, "learning_rate": 9.679837892603851e-06, "loss": 29.2173, "step": 19544 }, { "epoch": 465.35820895522386, "grad_norm": 22.137720108032227, "learning_rate": 9.679331306990882e-06, "loss": 27.7231, "step": 19545 }, { "epoch": 465.3820895522388, "grad_norm": 22.34898567199707, "learning_rate": 9.678824721377914e-06, "loss": 29.5257, "step": 19546 }, { "epoch": 465.40597014925373, "grad_norm": 19.46327781677246, "learning_rate": 9.678318135764944e-06, "loss": 29.6255, "step": 19547 }, { "epoch": 465.42985074626864, "grad_norm": 20.316120147705078, "learning_rate": 9.677811550151976e-06, "loss": 28.9808, "step": 19548 }, { "epoch": 465.4537313432836, "grad_norm": 19.533613204956055, "learning_rate": 9.677304964539008e-06, "loss": 27.7122, "step": 19549 }, { "epoch": 465.4776119402985, "grad_norm": 21.041292190551758, "learning_rate": 9.676798378926038e-06, "loss": 29.8973, "step": 19550 }, { "epoch": 465.5014925373134, "grad_norm": 21.25673484802246, "learning_rate": 9.67629179331307e-06, "loss": 30.1948, "step": 19551 }, { "epoch": 465.52537313432833, "grad_norm": 19.606124877929688, "learning_rate": 9.675785207700102e-06, "loss": 29.132, "step": 19552 }, { "epoch": 465.5492537313433, "grad_norm": 21.429485321044922, "learning_rate": 9.675278622087133e-06, "loss": 29.5966, "step": 19553 }, { "epoch": 465.5731343283582, "grad_norm": 22.03343963623047, "learning_rate": 9.674772036474165e-06, "loss": 29.3635, "step": 19554 }, { "epoch": 465.5970149253731, "grad_norm": 26.789669036865234, "learning_rate": 9.674265450861197e-06, "loss": 30.0711, "step": 19555 }, { "epoch": 465.6208955223881, "grad_norm": 17.71918296813965, "learning_rate": 9.673758865248227e-06, "loss": 28.6093, "step": 19556 }, { "epoch": 465.644776119403, "grad_norm": 21.963144302368164, "learning_rate": 9.673252279635259e-06, "loss": 29.4653, "step": 19557 }, { "epoch": 465.6686567164179, "grad_norm": 19.368396759033203, "learning_rate": 9.672745694022291e-06, "loss": 29.5839, "step": 19558 }, { "epoch": 465.6925373134328, "grad_norm": 21.25804901123047, "learning_rate": 9.672239108409321e-06, "loss": 28.767, "step": 19559 }, { "epoch": 465.7164179104478, "grad_norm": 19.41716766357422, "learning_rate": 9.671732522796353e-06, "loss": 30.256, "step": 19560 }, { "epoch": 465.7402985074627, "grad_norm": 21.57291030883789, "learning_rate": 9.671225937183385e-06, "loss": 29.9719, "step": 19561 }, { "epoch": 465.7641791044776, "grad_norm": 19.73448944091797, "learning_rate": 9.670719351570416e-06, "loss": 29.929, "step": 19562 }, { "epoch": 465.78805970149256, "grad_norm": 22.57986831665039, "learning_rate": 9.670212765957448e-06, "loss": 28.7362, "step": 19563 }, { "epoch": 465.81194029850747, "grad_norm": 23.136096954345703, "learning_rate": 9.66970618034448e-06, "loss": 29.5778, "step": 19564 }, { "epoch": 465.8358208955224, "grad_norm": 25.73858642578125, "learning_rate": 9.669199594731512e-06, "loss": 29.3252, "step": 19565 }, { "epoch": 465.85970149253734, "grad_norm": 21.977853775024414, "learning_rate": 9.668693009118542e-06, "loss": 29.2759, "step": 19566 }, { "epoch": 465.88358208955225, "grad_norm": 25.043472290039062, "learning_rate": 9.668186423505574e-06, "loss": 28.4493, "step": 19567 }, { "epoch": 465.90746268656716, "grad_norm": 24.318626403808594, "learning_rate": 9.667679837892604e-06, "loss": 28.3086, "step": 19568 }, { "epoch": 465.93134328358207, "grad_norm": 19.293962478637695, "learning_rate": 9.667173252279636e-06, "loss": 30.0868, "step": 19569 }, { "epoch": 465.95522388059703, "grad_norm": 27.815082550048828, "learning_rate": 9.666666666666667e-06, "loss": 28.0355, "step": 19570 }, { "epoch": 465.97910447761194, "grad_norm": 25.050622940063477, "learning_rate": 9.666160081053699e-06, "loss": 30.2659, "step": 19571 }, { "epoch": 466.0, "grad_norm": 21.81355857849121, "learning_rate": 9.66565349544073e-06, "loss": 25.628, "step": 19572 }, { "epoch": 466.0238805970149, "grad_norm": 19.47244644165039, "learning_rate": 9.665146909827761e-06, "loss": 28.1647, "step": 19573 }, { "epoch": 466.0477611940299, "grad_norm": 23.920944213867188, "learning_rate": 9.664640324214793e-06, "loss": 28.7773, "step": 19574 }, { "epoch": 466.0716417910448, "grad_norm": 21.04451560974121, "learning_rate": 9.664133738601823e-06, "loss": 27.9617, "step": 19575 }, { "epoch": 466.0955223880597, "grad_norm": 22.89613151550293, "learning_rate": 9.663627152988855e-06, "loss": 28.5038, "step": 19576 }, { "epoch": 466.1194029850746, "grad_norm": 20.0987548828125, "learning_rate": 9.663120567375887e-06, "loss": 29.9947, "step": 19577 }, { "epoch": 466.14328358208957, "grad_norm": 22.32472801208496, "learning_rate": 9.662613981762918e-06, "loss": 29.1217, "step": 19578 }, { "epoch": 466.1671641791045, "grad_norm": 19.953065872192383, "learning_rate": 9.66210739614995e-06, "loss": 27.9213, "step": 19579 }, { "epoch": 466.1910447761194, "grad_norm": 19.123825073242188, "learning_rate": 9.661600810536982e-06, "loss": 28.7735, "step": 19580 }, { "epoch": 466.21492537313435, "grad_norm": 23.306509017944336, "learning_rate": 9.661094224924012e-06, "loss": 30.2237, "step": 19581 }, { "epoch": 466.23880597014926, "grad_norm": 24.01358985900879, "learning_rate": 9.660587639311044e-06, "loss": 30.3705, "step": 19582 }, { "epoch": 466.26268656716417, "grad_norm": 20.21047019958496, "learning_rate": 9.660081053698076e-06, "loss": 29.879, "step": 19583 }, { "epoch": 466.28656716417913, "grad_norm": 18.769472122192383, "learning_rate": 9.659574468085106e-06, "loss": 28.7467, "step": 19584 }, { "epoch": 466.31044776119404, "grad_norm": 22.1401424407959, "learning_rate": 9.659067882472138e-06, "loss": 29.4588, "step": 19585 }, { "epoch": 466.33432835820895, "grad_norm": 20.6121883392334, "learning_rate": 9.65856129685917e-06, "loss": 28.8613, "step": 19586 }, { "epoch": 466.35820895522386, "grad_norm": 18.872995376586914, "learning_rate": 9.6580547112462e-06, "loss": 29.3691, "step": 19587 }, { "epoch": 466.3820895522388, "grad_norm": 17.24449348449707, "learning_rate": 9.657548125633233e-06, "loss": 30.1796, "step": 19588 }, { "epoch": 466.40597014925373, "grad_norm": 19.13880729675293, "learning_rate": 9.657041540020265e-06, "loss": 30.0138, "step": 19589 }, { "epoch": 466.42985074626864, "grad_norm": 18.576152801513672, "learning_rate": 9.656534954407297e-06, "loss": 29.5088, "step": 19590 }, { "epoch": 466.4537313432836, "grad_norm": 17.294544219970703, "learning_rate": 9.656028368794327e-06, "loss": 28.6845, "step": 19591 }, { "epoch": 466.4776119402985, "grad_norm": 18.293901443481445, "learning_rate": 9.655521783181359e-06, "loss": 30.8795, "step": 19592 }, { "epoch": 466.5014925373134, "grad_norm": 20.203691482543945, "learning_rate": 9.655015197568391e-06, "loss": 30.1692, "step": 19593 }, { "epoch": 466.52537313432833, "grad_norm": 17.883424758911133, "learning_rate": 9.654508611955421e-06, "loss": 28.6168, "step": 19594 }, { "epoch": 466.5492537313433, "grad_norm": 19.2342529296875, "learning_rate": 9.654002026342453e-06, "loss": 30.446, "step": 19595 }, { "epoch": 466.5731343283582, "grad_norm": 17.74437141418457, "learning_rate": 9.653495440729484e-06, "loss": 29.4343, "step": 19596 }, { "epoch": 466.5970149253731, "grad_norm": 18.750404357910156, "learning_rate": 9.652988855116516e-06, "loss": 28.7384, "step": 19597 }, { "epoch": 466.6208955223881, "grad_norm": 18.557533264160156, "learning_rate": 9.652482269503546e-06, "loss": 28.8522, "step": 19598 }, { "epoch": 466.644776119403, "grad_norm": 19.501636505126953, "learning_rate": 9.651975683890578e-06, "loss": 29.2296, "step": 19599 }, { "epoch": 466.6686567164179, "grad_norm": 19.635868072509766, "learning_rate": 9.65146909827761e-06, "loss": 29.4321, "step": 19600 }, { "epoch": 466.6925373134328, "grad_norm": 20.67929458618164, "learning_rate": 9.65096251266464e-06, "loss": 29.5197, "step": 19601 }, { "epoch": 466.7164179104478, "grad_norm": 22.123544692993164, "learning_rate": 9.650455927051672e-06, "loss": 29.1789, "step": 19602 }, { "epoch": 466.7402985074627, "grad_norm": 19.92279052734375, "learning_rate": 9.649949341438703e-06, "loss": 28.9335, "step": 19603 }, { "epoch": 466.7641791044776, "grad_norm": 21.12049102783203, "learning_rate": 9.649442755825735e-06, "loss": 29.3815, "step": 19604 }, { "epoch": 466.78805970149256, "grad_norm": 26.54119300842285, "learning_rate": 9.648936170212767e-06, "loss": 28.0722, "step": 19605 }, { "epoch": 466.81194029850747, "grad_norm": 20.616308212280273, "learning_rate": 9.648429584599797e-06, "loss": 29.3766, "step": 19606 }, { "epoch": 466.8358208955224, "grad_norm": 20.852275848388672, "learning_rate": 9.647922998986829e-06, "loss": 28.6937, "step": 19607 }, { "epoch": 466.85970149253734, "grad_norm": 21.48158073425293, "learning_rate": 9.647416413373861e-06, "loss": 28.7924, "step": 19608 }, { "epoch": 466.88358208955225, "grad_norm": 21.7023868560791, "learning_rate": 9.646909827760891e-06, "loss": 28.7072, "step": 19609 }, { "epoch": 466.90746268656716, "grad_norm": 27.339448928833008, "learning_rate": 9.646403242147923e-06, "loss": 29.0183, "step": 19610 }, { "epoch": 466.93134328358207, "grad_norm": 20.71759605407715, "learning_rate": 9.645896656534956e-06, "loss": 28.1847, "step": 19611 }, { "epoch": 466.95522388059703, "grad_norm": 24.899444580078125, "learning_rate": 9.645390070921986e-06, "loss": 30.4574, "step": 19612 }, { "epoch": 466.97910447761194, "grad_norm": 22.556394577026367, "learning_rate": 9.644883485309018e-06, "loss": 28.4048, "step": 19613 }, { "epoch": 467.0, "grad_norm": 18.318809509277344, "learning_rate": 9.64437689969605e-06, "loss": 25.3363, "step": 19614 }, { "epoch": 467.0238805970149, "grad_norm": 22.30056381225586, "learning_rate": 9.64387031408308e-06, "loss": 29.2578, "step": 19615 }, { "epoch": 467.0477611940299, "grad_norm": 20.252290725708008, "learning_rate": 9.643363728470112e-06, "loss": 28.9834, "step": 19616 }, { "epoch": 467.0716417910448, "grad_norm": 20.37310791015625, "learning_rate": 9.642857142857144e-06, "loss": 28.9571, "step": 19617 }, { "epoch": 467.0955223880597, "grad_norm": 19.089561462402344, "learning_rate": 9.642350557244176e-06, "loss": 28.2773, "step": 19618 }, { "epoch": 467.1194029850746, "grad_norm": 21.87244987487793, "learning_rate": 9.641843971631207e-06, "loss": 30.3677, "step": 19619 }, { "epoch": 467.14328358208957, "grad_norm": 21.3272762298584, "learning_rate": 9.641337386018239e-06, "loss": 30.0263, "step": 19620 }, { "epoch": 467.1671641791045, "grad_norm": 24.562166213989258, "learning_rate": 9.64083080040527e-06, "loss": 30.042, "step": 19621 }, { "epoch": 467.1910447761194, "grad_norm": 19.34654998779297, "learning_rate": 9.640324214792301e-06, "loss": 29.0078, "step": 19622 }, { "epoch": 467.21492537313435, "grad_norm": 22.340896606445312, "learning_rate": 9.639817629179333e-06, "loss": 28.2769, "step": 19623 }, { "epoch": 467.23880597014926, "grad_norm": 21.19550132751465, "learning_rate": 9.639311043566363e-06, "loss": 28.6238, "step": 19624 }, { "epoch": 467.26268656716417, "grad_norm": 18.92556381225586, "learning_rate": 9.638804457953395e-06, "loss": 28.9114, "step": 19625 }, { "epoch": 467.28656716417913, "grad_norm": 18.69098663330078, "learning_rate": 9.638297872340426e-06, "loss": 29.221, "step": 19626 }, { "epoch": 467.31044776119404, "grad_norm": 19.42716407775879, "learning_rate": 9.637791286727458e-06, "loss": 28.5446, "step": 19627 }, { "epoch": 467.33432835820895, "grad_norm": 20.391361236572266, "learning_rate": 9.63728470111449e-06, "loss": 29.1295, "step": 19628 }, { "epoch": 467.35820895522386, "grad_norm": 25.626131057739258, "learning_rate": 9.63677811550152e-06, "loss": 28.6539, "step": 19629 }, { "epoch": 467.3820895522388, "grad_norm": 19.701906204223633, "learning_rate": 9.636271529888552e-06, "loss": 29.4698, "step": 19630 }, { "epoch": 467.40597014925373, "grad_norm": 17.121196746826172, "learning_rate": 9.635764944275582e-06, "loss": 28.6439, "step": 19631 }, { "epoch": 467.42985074626864, "grad_norm": 18.25693130493164, "learning_rate": 9.635258358662614e-06, "loss": 28.9014, "step": 19632 }, { "epoch": 467.4537313432836, "grad_norm": 19.422767639160156, "learning_rate": 9.634751773049646e-06, "loss": 28.7436, "step": 19633 }, { "epoch": 467.4776119402985, "grad_norm": 20.1483154296875, "learning_rate": 9.634245187436677e-06, "loss": 28.9002, "step": 19634 }, { "epoch": 467.5014925373134, "grad_norm": 22.752906799316406, "learning_rate": 9.633738601823709e-06, "loss": 29.2692, "step": 19635 }, { "epoch": 467.52537313432833, "grad_norm": 18.461193084716797, "learning_rate": 9.63323201621074e-06, "loss": 28.9754, "step": 19636 }, { "epoch": 467.5492537313433, "grad_norm": 16.01105308532715, "learning_rate": 9.632725430597771e-06, "loss": 29.3077, "step": 19637 }, { "epoch": 467.5731343283582, "grad_norm": 21.365711212158203, "learning_rate": 9.632218844984803e-06, "loss": 28.9677, "step": 19638 }, { "epoch": 467.5970149253731, "grad_norm": 24.480318069458008, "learning_rate": 9.631712259371835e-06, "loss": 29.8441, "step": 19639 }, { "epoch": 467.6208955223881, "grad_norm": 19.58094596862793, "learning_rate": 9.631205673758865e-06, "loss": 27.8587, "step": 19640 }, { "epoch": 467.644776119403, "grad_norm": 19.89678192138672, "learning_rate": 9.630699088145897e-06, "loss": 29.0374, "step": 19641 }, { "epoch": 467.6686567164179, "grad_norm": 22.888118743896484, "learning_rate": 9.63019250253293e-06, "loss": 29.8845, "step": 19642 }, { "epoch": 467.6925373134328, "grad_norm": 25.601680755615234, "learning_rate": 9.629685916919961e-06, "loss": 29.7014, "step": 19643 }, { "epoch": 467.7164179104478, "grad_norm": 21.57819366455078, "learning_rate": 9.629179331306992e-06, "loss": 28.3753, "step": 19644 }, { "epoch": 467.7402985074627, "grad_norm": 18.086196899414062, "learning_rate": 9.628672745694024e-06, "loss": 29.4379, "step": 19645 }, { "epoch": 467.7641791044776, "grad_norm": 23.92120361328125, "learning_rate": 9.628166160081056e-06, "loss": 28.895, "step": 19646 }, { "epoch": 467.78805970149256, "grad_norm": 26.08490753173828, "learning_rate": 9.627659574468086e-06, "loss": 29.5374, "step": 19647 }, { "epoch": 467.81194029850747, "grad_norm": 16.537267684936523, "learning_rate": 9.627152988855118e-06, "loss": 29.5027, "step": 19648 }, { "epoch": 467.8358208955224, "grad_norm": 22.8741455078125, "learning_rate": 9.62664640324215e-06, "loss": 30.0906, "step": 19649 }, { "epoch": 467.85970149253734, "grad_norm": 28.271041870117188, "learning_rate": 9.62613981762918e-06, "loss": 29.6912, "step": 19650 }, { "epoch": 467.88358208955225, "grad_norm": 20.11058807373047, "learning_rate": 9.625633232016212e-06, "loss": 28.2863, "step": 19651 }, { "epoch": 467.90746268656716, "grad_norm": 19.616628646850586, "learning_rate": 9.625126646403243e-06, "loss": 29.2593, "step": 19652 }, { "epoch": 467.93134328358207, "grad_norm": 21.026023864746094, "learning_rate": 9.624620060790275e-06, "loss": 29.6475, "step": 19653 }, { "epoch": 467.95522388059703, "grad_norm": 18.169038772583008, "learning_rate": 9.624113475177305e-06, "loss": 29.2866, "step": 19654 }, { "epoch": 467.97910447761194, "grad_norm": 23.313758850097656, "learning_rate": 9.623606889564337e-06, "loss": 29.2861, "step": 19655 }, { "epoch": 468.0, "grad_norm": 19.968507766723633, "learning_rate": 9.623100303951369e-06, "loss": 25.8639, "step": 19656 }, { "epoch": 468.0238805970149, "grad_norm": 18.471317291259766, "learning_rate": 9.6225937183384e-06, "loss": 28.5895, "step": 19657 }, { "epoch": 468.0477611940299, "grad_norm": 20.528568267822266, "learning_rate": 9.622087132725431e-06, "loss": 28.9515, "step": 19658 }, { "epoch": 468.0716417910448, "grad_norm": 24.879005432128906, "learning_rate": 9.621580547112462e-06, "loss": 29.9242, "step": 19659 }, { "epoch": 468.0955223880597, "grad_norm": 25.250202178955078, "learning_rate": 9.621073961499494e-06, "loss": 29.5826, "step": 19660 }, { "epoch": 468.1194029850746, "grad_norm": 20.922510147094727, "learning_rate": 9.620567375886526e-06, "loss": 29.5858, "step": 19661 }, { "epoch": 468.14328358208957, "grad_norm": 18.859712600708008, "learning_rate": 9.620060790273556e-06, "loss": 29.4395, "step": 19662 }, { "epoch": 468.1671641791045, "grad_norm": 18.36642837524414, "learning_rate": 9.619554204660588e-06, "loss": 29.0934, "step": 19663 }, { "epoch": 468.1910447761194, "grad_norm": 20.03298568725586, "learning_rate": 9.61904761904762e-06, "loss": 29.2848, "step": 19664 }, { "epoch": 468.21492537313435, "grad_norm": 17.36714744567871, "learning_rate": 9.61854103343465e-06, "loss": 29.0354, "step": 19665 }, { "epoch": 468.23880597014926, "grad_norm": 19.04121971130371, "learning_rate": 9.618034447821682e-06, "loss": 29.1022, "step": 19666 }, { "epoch": 468.26268656716417, "grad_norm": 21.522136688232422, "learning_rate": 9.617527862208714e-06, "loss": 28.335, "step": 19667 }, { "epoch": 468.28656716417913, "grad_norm": 18.737871170043945, "learning_rate": 9.617021276595745e-06, "loss": 28.5847, "step": 19668 }, { "epoch": 468.31044776119404, "grad_norm": 19.256973266601562, "learning_rate": 9.616514690982777e-06, "loss": 28.4947, "step": 19669 }, { "epoch": 468.33432835820895, "grad_norm": 22.03706169128418, "learning_rate": 9.616008105369809e-06, "loss": 29.8349, "step": 19670 }, { "epoch": 468.35820895522386, "grad_norm": 23.332576751708984, "learning_rate": 9.61550151975684e-06, "loss": 28.4757, "step": 19671 }, { "epoch": 468.3820895522388, "grad_norm": 18.33847427368164, "learning_rate": 9.614994934143871e-06, "loss": 28.3303, "step": 19672 }, { "epoch": 468.40597014925373, "grad_norm": 18.78719139099121, "learning_rate": 9.614488348530903e-06, "loss": 29.3937, "step": 19673 }, { "epoch": 468.42985074626864, "grad_norm": 25.91994857788086, "learning_rate": 9.613981762917935e-06, "loss": 28.1718, "step": 19674 }, { "epoch": 468.4537313432836, "grad_norm": 20.809009552001953, "learning_rate": 9.613475177304965e-06, "loss": 28.6016, "step": 19675 }, { "epoch": 468.4776119402985, "grad_norm": 19.664823532104492, "learning_rate": 9.612968591691997e-06, "loss": 27.9512, "step": 19676 }, { "epoch": 468.5014925373134, "grad_norm": 20.439250946044922, "learning_rate": 9.61246200607903e-06, "loss": 28.8703, "step": 19677 }, { "epoch": 468.52537313432833, "grad_norm": 25.52252769470215, "learning_rate": 9.61195542046606e-06, "loss": 29.93, "step": 19678 }, { "epoch": 468.5492537313433, "grad_norm": 18.69904136657715, "learning_rate": 9.611448834853092e-06, "loss": 29.0771, "step": 19679 }, { "epoch": 468.5731343283582, "grad_norm": 23.08135986328125, "learning_rate": 9.610942249240122e-06, "loss": 28.7046, "step": 19680 }, { "epoch": 468.5970149253731, "grad_norm": 20.103927612304688, "learning_rate": 9.610435663627154e-06, "loss": 30.4296, "step": 19681 }, { "epoch": 468.6208955223881, "grad_norm": 26.821758270263672, "learning_rate": 9.609929078014186e-06, "loss": 29.8685, "step": 19682 }, { "epoch": 468.644776119403, "grad_norm": 20.518943786621094, "learning_rate": 9.609422492401216e-06, "loss": 29.466, "step": 19683 }, { "epoch": 468.6686567164179, "grad_norm": 29.1739501953125, "learning_rate": 9.608915906788248e-06, "loss": 28.2493, "step": 19684 }, { "epoch": 468.6925373134328, "grad_norm": 25.186906814575195, "learning_rate": 9.608409321175279e-06, "loss": 29.5826, "step": 19685 }, { "epoch": 468.7164179104478, "grad_norm": 21.22698974609375, "learning_rate": 9.60790273556231e-06, "loss": 29.3345, "step": 19686 }, { "epoch": 468.7402985074627, "grad_norm": 23.495573043823242, "learning_rate": 9.607396149949341e-06, "loss": 29.5948, "step": 19687 }, { "epoch": 468.7641791044776, "grad_norm": 23.364694595336914, "learning_rate": 9.606889564336373e-06, "loss": 28.4133, "step": 19688 }, { "epoch": 468.78805970149256, "grad_norm": 18.091373443603516, "learning_rate": 9.606382978723405e-06, "loss": 28.8059, "step": 19689 }, { "epoch": 468.81194029850747, "grad_norm": 21.842491149902344, "learning_rate": 9.605876393110435e-06, "loss": 29.1128, "step": 19690 }, { "epoch": 468.8358208955224, "grad_norm": 17.07388687133789, "learning_rate": 9.605369807497467e-06, "loss": 29.5451, "step": 19691 }, { "epoch": 468.85970149253734, "grad_norm": 20.303184509277344, "learning_rate": 9.6048632218845e-06, "loss": 29.0557, "step": 19692 }, { "epoch": 468.88358208955225, "grad_norm": 21.604591369628906, "learning_rate": 9.60435663627153e-06, "loss": 29.9273, "step": 19693 }, { "epoch": 468.90746268656716, "grad_norm": 23.5158748626709, "learning_rate": 9.603850050658562e-06, "loss": 29.6563, "step": 19694 }, { "epoch": 468.93134328358207, "grad_norm": 21.707273483276367, "learning_rate": 9.603343465045594e-06, "loss": 28.6726, "step": 19695 }, { "epoch": 468.95522388059703, "grad_norm": 19.169403076171875, "learning_rate": 9.602836879432626e-06, "loss": 29.9767, "step": 19696 }, { "epoch": 468.97910447761194, "grad_norm": 18.876110076904297, "learning_rate": 9.602330293819656e-06, "loss": 29.0644, "step": 19697 }, { "epoch": 469.0, "grad_norm": 17.647687911987305, "learning_rate": 9.601823708206688e-06, "loss": 25.2757, "step": 19698 }, { "epoch": 469.0238805970149, "grad_norm": 20.746171951293945, "learning_rate": 9.60131712259372e-06, "loss": 30.1679, "step": 19699 }, { "epoch": 469.0477611940299, "grad_norm": 20.99315071105957, "learning_rate": 9.60081053698075e-06, "loss": 29.1228, "step": 19700 }, { "epoch": 469.0716417910448, "grad_norm": 21.16964340209961, "learning_rate": 9.600303951367782e-06, "loss": 29.0611, "step": 19701 }, { "epoch": 469.0955223880597, "grad_norm": 20.03379249572754, "learning_rate": 9.599797365754814e-06, "loss": 28.3937, "step": 19702 }, { "epoch": 469.1194029850746, "grad_norm": 17.687355041503906, "learning_rate": 9.599290780141845e-06, "loss": 28.904, "step": 19703 }, { "epoch": 469.14328358208957, "grad_norm": 19.185100555419922, "learning_rate": 9.598784194528877e-06, "loss": 29.6079, "step": 19704 }, { "epoch": 469.1671641791045, "grad_norm": 17.56740379333496, "learning_rate": 9.598277608915909e-06, "loss": 28.8182, "step": 19705 }, { "epoch": 469.1910447761194, "grad_norm": 19.709192276000977, "learning_rate": 9.597771023302939e-06, "loss": 29.3208, "step": 19706 }, { "epoch": 469.21492537313435, "grad_norm": 19.217390060424805, "learning_rate": 9.597264437689971e-06, "loss": 29.5979, "step": 19707 }, { "epoch": 469.23880597014926, "grad_norm": 22.66893196105957, "learning_rate": 9.596757852077001e-06, "loss": 29.1066, "step": 19708 }, { "epoch": 469.26268656716417, "grad_norm": 20.20285415649414, "learning_rate": 9.596251266464033e-06, "loss": 29.7549, "step": 19709 }, { "epoch": 469.28656716417913, "grad_norm": 16.239822387695312, "learning_rate": 9.595744680851065e-06, "loss": 29.5944, "step": 19710 }, { "epoch": 469.31044776119404, "grad_norm": 23.667879104614258, "learning_rate": 9.595238095238096e-06, "loss": 29.1633, "step": 19711 }, { "epoch": 469.33432835820895, "grad_norm": 22.749649047851562, "learning_rate": 9.594731509625128e-06, "loss": 29.223, "step": 19712 }, { "epoch": 469.35820895522386, "grad_norm": 21.708410263061523, "learning_rate": 9.594224924012158e-06, "loss": 28.9992, "step": 19713 }, { "epoch": 469.3820895522388, "grad_norm": 23.01854705810547, "learning_rate": 9.59371833839919e-06, "loss": 29.0999, "step": 19714 }, { "epoch": 469.40597014925373, "grad_norm": 20.65330696105957, "learning_rate": 9.59321175278622e-06, "loss": 28.203, "step": 19715 }, { "epoch": 469.42985074626864, "grad_norm": 18.535802841186523, "learning_rate": 9.592705167173252e-06, "loss": 28.0298, "step": 19716 }, { "epoch": 469.4537313432836, "grad_norm": NaN, "learning_rate": 9.592198581560284e-06, "loss": 51.7579, "step": 19717 }, { "epoch": 469.4776119402985, "grad_norm": 22.80849266052246, "learning_rate": 9.592198581560284e-06, "loss": 29.0688, "step": 19718 }, { "epoch": 469.5014925373134, "grad_norm": 16.994373321533203, "learning_rate": 9.591691995947315e-06, "loss": 28.6513, "step": 19719 }, { "epoch": 469.52537313432833, "grad_norm": 16.844884872436523, "learning_rate": 9.591185410334347e-06, "loss": 28.3201, "step": 19720 }, { "epoch": 469.5492537313433, "grad_norm": 22.792234420776367, "learning_rate": 9.590678824721379e-06, "loss": 29.593, "step": 19721 }, { "epoch": 469.5731343283582, "grad_norm": 19.392051696777344, "learning_rate": 9.590172239108409e-06, "loss": 29.1117, "step": 19722 }, { "epoch": 469.5970149253731, "grad_norm": 24.38888168334961, "learning_rate": 9.589665653495441e-06, "loss": 29.8008, "step": 19723 }, { "epoch": 469.6208955223881, "grad_norm": 20.49308967590332, "learning_rate": 9.589159067882473e-06, "loss": 29.1159, "step": 19724 }, { "epoch": 469.644776119403, "grad_norm": 19.321168899536133, "learning_rate": 9.588652482269505e-06, "loss": 29.5985, "step": 19725 }, { "epoch": 469.6686567164179, "grad_norm": 19.85795783996582, "learning_rate": 9.588145896656535e-06, "loss": 29.8911, "step": 19726 }, { "epoch": 469.6925373134328, "grad_norm": 18.103805541992188, "learning_rate": 9.587639311043567e-06, "loss": 27.8121, "step": 19727 }, { "epoch": 469.7164179104478, "grad_norm": 25.594194412231445, "learning_rate": 9.5871327254306e-06, "loss": 29.5047, "step": 19728 }, { "epoch": 469.7402985074627, "grad_norm": 23.330305099487305, "learning_rate": 9.58662613981763e-06, "loss": 27.9161, "step": 19729 }, { "epoch": 469.7641791044776, "grad_norm": 21.391653060913086, "learning_rate": 9.586119554204662e-06, "loss": 29.564, "step": 19730 }, { "epoch": 469.78805970149256, "grad_norm": 20.58977508544922, "learning_rate": 9.585612968591694e-06, "loss": 28.2074, "step": 19731 }, { "epoch": 469.81194029850747, "grad_norm": 19.340822219848633, "learning_rate": 9.585106382978724e-06, "loss": 28.7366, "step": 19732 }, { "epoch": 469.8358208955224, "grad_norm": 18.77394676208496, "learning_rate": 9.584599797365756e-06, "loss": 29.1437, "step": 19733 }, { "epoch": 469.85970149253734, "grad_norm": 23.745790481567383, "learning_rate": 9.584093211752788e-06, "loss": 28.1626, "step": 19734 }, { "epoch": 469.88358208955225, "grad_norm": 20.616003036499023, "learning_rate": 9.583586626139818e-06, "loss": 30.7816, "step": 19735 }, { "epoch": 469.90746268656716, "grad_norm": 18.47565269470215, "learning_rate": 9.58308004052685e-06, "loss": 28.8184, "step": 19736 }, { "epoch": 469.93134328358207, "grad_norm": 20.470050811767578, "learning_rate": 9.58257345491388e-06, "loss": 29.8601, "step": 19737 }, { "epoch": 469.95522388059703, "grad_norm": 29.08283805847168, "learning_rate": 9.582066869300913e-06, "loss": 28.6784, "step": 19738 }, { "epoch": 469.97910447761194, "grad_norm": 22.45155143737793, "learning_rate": 9.581560283687945e-06, "loss": 28.5674, "step": 19739 }, { "epoch": 470.0, "grad_norm": 15.902899742126465, "learning_rate": 9.581053698074975e-06, "loss": 25.8267, "step": 19740 }, { "epoch": 470.0, "step": 19740, "total_flos": 9.703918982411759e+17, "train_loss": 1.252900850301818, "train_runtime": 25604.1255, "train_samples_per_second": 98.244, "train_steps_per_second": 0.771 }, { "epoch": 470.0238805970149, "grad_norm": 27.276023864746094, "learning_rate": 1e-05, "loss": 29.1662, "step": 19741 }, { "epoch": 470.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999503968253968e-06, "loss": 36.0562, "step": 19742 }, { "epoch": 470.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999503968253968e-06, "loss": 37.2188, "step": 19743 }, { "epoch": 470.0955223880597, "grad_norm": 428.5850524902344, "learning_rate": 9.999503968253968e-06, "loss": 36.4822, "step": 19744 }, { "epoch": 470.1194029850746, "grad_norm": 215.894775390625, "learning_rate": 9.999007936507937e-06, "loss": 34.1757, "step": 19745 }, { "epoch": 470.14328358208957, "grad_norm": 105.78826141357422, "learning_rate": 9.998511904761904e-06, "loss": 31.9137, "step": 19746 }, { "epoch": 470.1671641791045, "grad_norm": 74.41807556152344, "learning_rate": 9.998015873015874e-06, "loss": 30.6267, "step": 19747 }, { "epoch": 470.1910447761194, "grad_norm": 65.91170501708984, "learning_rate": 9.99751984126984e-06, "loss": 31.3685, "step": 19748 }, { "epoch": 470.21492537313435, "grad_norm": 48.53287887573242, "learning_rate": 9.99702380952381e-06, "loss": 29.0579, "step": 19749 }, { "epoch": 470.23880597014926, "grad_norm": 62.90013885498047, "learning_rate": 9.996527777777779e-06, "loss": 29.5241, "step": 19750 }, { "epoch": 470.26268656716417, "grad_norm": 39.991241455078125, "learning_rate": 9.996031746031746e-06, "loss": 29.5951, "step": 19751 }, { "epoch": 470.28656716417913, "grad_norm": 43.86666488647461, "learning_rate": 9.995535714285715e-06, "loss": 30.1543, "step": 19752 }, { "epoch": 470.31044776119404, "grad_norm": 39.79184341430664, "learning_rate": 9.995039682539683e-06, "loss": 29.4396, "step": 19753 }, { "epoch": 470.33432835820895, "grad_norm": 29.022216796875, "learning_rate": 9.994543650793652e-06, "loss": 29.0068, "step": 19754 }, { "epoch": 470.35820895522386, "grad_norm": 47.34379577636719, "learning_rate": 9.99404761904762e-06, "loss": 29.9802, "step": 19755 }, { "epoch": 470.3820895522388, "grad_norm": 25.846607208251953, "learning_rate": 9.993551587301588e-06, "loss": 28.7942, "step": 19756 }, { "epoch": 470.40597014925373, "grad_norm": 35.39889907836914, "learning_rate": 9.993055555555557e-06, "loss": 29.9441, "step": 19757 }, { "epoch": 470.42985074626864, "grad_norm": 30.289539337158203, "learning_rate": 9.992559523809524e-06, "loss": 30.1426, "step": 19758 }, { "epoch": 470.4537313432836, "grad_norm": 23.93886375427246, "learning_rate": 9.992063492063493e-06, "loss": 28.7878, "step": 19759 }, { "epoch": 470.4776119402985, "grad_norm": 36.23288345336914, "learning_rate": 9.99156746031746e-06, "loss": 29.8665, "step": 19760 }, { "epoch": 470.5014925373134, "grad_norm": 29.456787109375, "learning_rate": 9.99107142857143e-06, "loss": 29.7986, "step": 19761 }, { "epoch": 470.52537313432833, "grad_norm": 22.537702560424805, "learning_rate": 9.990575396825397e-06, "loss": 28.7702, "step": 19762 }, { "epoch": 470.5492537313433, "grad_norm": 31.276037216186523, "learning_rate": 9.990079365079366e-06, "loss": 28.8982, "step": 19763 }, { "epoch": 470.5731343283582, "grad_norm": 22.79057502746582, "learning_rate": 9.989583333333333e-06, "loss": 29.647, "step": 19764 }, { "epoch": 470.5970149253731, "grad_norm": 30.46953582763672, "learning_rate": 9.989087301587302e-06, "loss": 28.5053, "step": 19765 }, { "epoch": 470.6208955223881, "grad_norm": 25.715972900390625, "learning_rate": 9.98859126984127e-06, "loss": 28.3684, "step": 19766 }, { "epoch": 470.644776119403, "grad_norm": 23.282564163208008, "learning_rate": 9.988095238095239e-06, "loss": 29.154, "step": 19767 }, { "epoch": 470.6686567164179, "grad_norm": 24.749910354614258, "learning_rate": 9.987599206349206e-06, "loss": 28.9358, "step": 19768 }, { "epoch": 470.6925373134328, "grad_norm": 27.42661476135254, "learning_rate": 9.987103174603175e-06, "loss": 29.6728, "step": 19769 }, { "epoch": 470.7164179104478, "grad_norm": 18.886133193969727, "learning_rate": 9.986607142857142e-06, "loss": 28.6897, "step": 19770 }, { "epoch": 470.7402985074627, "grad_norm": 24.163373947143555, "learning_rate": 9.986111111111111e-06, "loss": 29.2158, "step": 19771 }, { "epoch": 470.7641791044776, "grad_norm": 24.39092445373535, "learning_rate": 9.98561507936508e-06, "loss": 28.8451, "step": 19772 }, { "epoch": 470.78805970149256, "grad_norm": 18.183731079101562, "learning_rate": 9.985119047619048e-06, "loss": 29.1355, "step": 19773 }, { "epoch": 470.81194029850747, "grad_norm": 26.172393798828125, "learning_rate": 9.984623015873017e-06, "loss": 29.1687, "step": 19774 }, { "epoch": 470.8358208955224, "grad_norm": 25.32247543334961, "learning_rate": 9.984126984126986e-06, "loss": 29.6151, "step": 19775 }, { "epoch": 470.85970149253734, "grad_norm": 18.8282470703125, "learning_rate": 9.983630952380953e-06, "loss": 29.909, "step": 19776 }, { "epoch": 470.88358208955225, "grad_norm": 27.22785758972168, "learning_rate": 9.983134920634922e-06, "loss": 29.6233, "step": 19777 }, { "epoch": 470.90746268656716, "grad_norm": 23.622692108154297, "learning_rate": 9.98263888888889e-06, "loss": 29.4145, "step": 19778 }, { "epoch": 470.93134328358207, "grad_norm": 22.667997360229492, "learning_rate": 9.982142857142858e-06, "loss": 28.2564, "step": 19779 }, { "epoch": 470.95522388059703, "grad_norm": 28.490755081176758, "learning_rate": 9.981646825396826e-06, "loss": 30.2663, "step": 19780 }, { "epoch": 470.97910447761194, "grad_norm": 20.664491653442383, "learning_rate": 9.981150793650795e-06, "loss": 27.9093, "step": 19781 }, { "epoch": 471.0, "grad_norm": 20.902971267700195, "learning_rate": 9.980654761904762e-06, "loss": 25.9909, "step": 19782 }, { "epoch": 471.0238805970149, "grad_norm": 22.71077537536621, "learning_rate": 9.980158730158731e-06, "loss": 29.2289, "step": 19783 }, { "epoch": 471.0477611940299, "grad_norm": 22.69999885559082, "learning_rate": 9.979662698412699e-06, "loss": 28.5316, "step": 19784 }, { "epoch": 471.0716417910448, "grad_norm": 22.211442947387695, "learning_rate": 9.979166666666668e-06, "loss": 29.7362, "step": 19785 }, { "epoch": 471.0955223880597, "grad_norm": 19.082195281982422, "learning_rate": 9.978670634920635e-06, "loss": 30.0965, "step": 19786 }, { "epoch": 471.1194029850746, "grad_norm": 22.48455047607422, "learning_rate": 9.978174603174604e-06, "loss": 28.267, "step": 19787 }, { "epoch": 471.14328358208957, "grad_norm": 19.120037078857422, "learning_rate": 9.977678571428571e-06, "loss": 28.9615, "step": 19788 }, { "epoch": 471.1671641791045, "grad_norm": 17.319116592407227, "learning_rate": 9.97718253968254e-06, "loss": 28.9795, "step": 19789 }, { "epoch": 471.1910447761194, "grad_norm": 19.962446212768555, "learning_rate": 9.976686507936508e-06, "loss": 29.9832, "step": 19790 }, { "epoch": 471.21492537313435, "grad_norm": NaN, "learning_rate": 9.976190476190477e-06, "loss": 50.4832, "step": 19791 }, { "epoch": 471.23880597014926, "grad_norm": 20.378686904907227, "learning_rate": 9.976190476190477e-06, "loss": 29.6206, "step": 19792 }, { "epoch": 471.26268656716417, "grad_norm": 20.45332145690918, "learning_rate": 9.975694444444446e-06, "loss": 29.5804, "step": 19793 }, { "epoch": 471.28656716417913, "grad_norm": 19.47447967529297, "learning_rate": 9.975198412698413e-06, "loss": 29.631, "step": 19794 }, { "epoch": 471.31044776119404, "grad_norm": 18.83791160583496, "learning_rate": 9.974702380952382e-06, "loss": 28.4298, "step": 19795 }, { "epoch": 471.33432835820895, "grad_norm": 19.611337661743164, "learning_rate": 9.97420634920635e-06, "loss": 28.6466, "step": 19796 }, { "epoch": 471.35820895522386, "grad_norm": 23.94387435913086, "learning_rate": 9.973710317460318e-06, "loss": 28.9669, "step": 19797 }, { "epoch": 471.3820895522388, "grad_norm": 22.975929260253906, "learning_rate": 9.973214285714287e-06, "loss": 28.3556, "step": 19798 }, { "epoch": 471.40597014925373, "grad_norm": 18.550844192504883, "learning_rate": 9.972718253968255e-06, "loss": 28.7792, "step": 19799 }, { "epoch": 471.42985074626864, "grad_norm": 19.519533157348633, "learning_rate": 9.972222222222224e-06, "loss": 28.2437, "step": 19800 }, { "epoch": 471.4537313432836, "grad_norm": 20.234277725219727, "learning_rate": 9.971726190476191e-06, "loss": 27.7287, "step": 19801 }, { "epoch": 471.4776119402985, "grad_norm": 19.358108520507812, "learning_rate": 9.97123015873016e-06, "loss": 28.9303, "step": 19802 }, { "epoch": 471.5014925373134, "grad_norm": 22.9925594329834, "learning_rate": 9.970734126984127e-06, "loss": 28.653, "step": 19803 }, { "epoch": 471.52537313432833, "grad_norm": 20.76951789855957, "learning_rate": 9.970238095238096e-06, "loss": 28.6214, "step": 19804 }, { "epoch": 471.5492537313433, "grad_norm": 18.84917449951172, "learning_rate": 9.969742063492064e-06, "loss": 29.1933, "step": 19805 }, { "epoch": 471.5731343283582, "grad_norm": 17.90723419189453, "learning_rate": 9.969246031746033e-06, "loss": 28.9057, "step": 19806 }, { "epoch": 471.5970149253731, "grad_norm": 20.423791885375977, "learning_rate": 9.96875e-06, "loss": 30.0936, "step": 19807 }, { "epoch": 471.6208955223881, "grad_norm": 28.2806396484375, "learning_rate": 9.968253968253969e-06, "loss": 29.5068, "step": 19808 }, { "epoch": 471.644776119403, "grad_norm": 22.43063735961914, "learning_rate": 9.967757936507936e-06, "loss": 29.265, "step": 19809 }, { "epoch": 471.6686567164179, "grad_norm": 20.88560676574707, "learning_rate": 9.967261904761905e-06, "loss": 29.2876, "step": 19810 }, { "epoch": 471.6925373134328, "grad_norm": 16.918142318725586, "learning_rate": 9.966765873015873e-06, "loss": 27.9065, "step": 19811 }, { "epoch": 471.7164179104478, "grad_norm": 22.524934768676758, "learning_rate": 9.966269841269842e-06, "loss": 28.7911, "step": 19812 }, { "epoch": 471.7402985074627, "grad_norm": 21.51717758178711, "learning_rate": 9.965773809523809e-06, "loss": 29.306, "step": 19813 }, { "epoch": 471.7641791044776, "grad_norm": 17.81270980834961, "learning_rate": 9.965277777777778e-06, "loss": 29.0912, "step": 19814 }, { "epoch": 471.78805970149256, "grad_norm": 18.485572814941406, "learning_rate": 9.964781746031747e-06, "loss": 29.9338, "step": 19815 }, { "epoch": 471.81194029850747, "grad_norm": 21.913972854614258, "learning_rate": 9.964285714285714e-06, "loss": 30.1437, "step": 19816 }, { "epoch": 471.8358208955224, "grad_norm": 19.970117568969727, "learning_rate": 9.963789682539683e-06, "loss": 27.7666, "step": 19817 }, { "epoch": 471.85970149253734, "grad_norm": 19.666255950927734, "learning_rate": 9.963293650793653e-06, "loss": 29.1732, "step": 19818 }, { "epoch": 471.88358208955225, "grad_norm": 22.215911865234375, "learning_rate": 9.96279761904762e-06, "loss": 28.6622, "step": 19819 }, { "epoch": 471.90746268656716, "grad_norm": 19.01563262939453, "learning_rate": 9.962301587301589e-06, "loss": 28.7558, "step": 19820 }, { "epoch": 471.93134328358207, "grad_norm": 22.76268196105957, "learning_rate": 9.961805555555556e-06, "loss": 29.0703, "step": 19821 }, { "epoch": 471.95522388059703, "grad_norm": 26.040929794311523, "learning_rate": 9.961309523809525e-06, "loss": 29.3545, "step": 19822 }, { "epoch": 471.97910447761194, "grad_norm": 19.44101333618164, "learning_rate": 9.960813492063493e-06, "loss": 29.7285, "step": 19823 }, { "epoch": 472.0, "grad_norm": 21.457660675048828, "learning_rate": 9.960317460317462e-06, "loss": 25.3161, "step": 19824 }, { "epoch": 472.0238805970149, "grad_norm": 24.1329288482666, "learning_rate": 9.959821428571429e-06, "loss": 28.0485, "step": 19825 }, { "epoch": 472.0477611940299, "grad_norm": 24.55424690246582, "learning_rate": 9.959325396825398e-06, "loss": 30.329, "step": 19826 }, { "epoch": 472.0716417910448, "grad_norm": 19.239744186401367, "learning_rate": 9.958829365079365e-06, "loss": 29.9694, "step": 19827 }, { "epoch": 472.0955223880597, "grad_norm": 19.867977142333984, "learning_rate": 9.958333333333334e-06, "loss": 28.2362, "step": 19828 }, { "epoch": 472.1194029850746, "grad_norm": 17.120723724365234, "learning_rate": 9.957837301587302e-06, "loss": 29.279, "step": 19829 }, { "epoch": 472.14328358208957, "grad_norm": 21.33145523071289, "learning_rate": 9.95734126984127e-06, "loss": 28.61, "step": 19830 }, { "epoch": 472.1671641791045, "grad_norm": 21.46869659423828, "learning_rate": 9.956845238095238e-06, "loss": 28.5316, "step": 19831 }, { "epoch": 472.1910447761194, "grad_norm": 20.999595642089844, "learning_rate": 9.956349206349207e-06, "loss": 28.5006, "step": 19832 }, { "epoch": 472.21492537313435, "grad_norm": 24.218647003173828, "learning_rate": 9.955853174603174e-06, "loss": 29.2689, "step": 19833 }, { "epoch": 472.23880597014926, "grad_norm": 21.251211166381836, "learning_rate": 9.955357142857143e-06, "loss": 28.9998, "step": 19834 }, { "epoch": 472.26268656716417, "grad_norm": 20.517820358276367, "learning_rate": 9.954861111111112e-06, "loss": 30.2719, "step": 19835 }, { "epoch": 472.28656716417913, "grad_norm": 23.910871505737305, "learning_rate": 9.95436507936508e-06, "loss": 29.5407, "step": 19836 }, { "epoch": 472.31044776119404, "grad_norm": 18.889711380004883, "learning_rate": 9.953869047619049e-06, "loss": 28.8576, "step": 19837 }, { "epoch": 472.33432835820895, "grad_norm": 16.83867835998535, "learning_rate": 9.953373015873016e-06, "loss": 28.3222, "step": 19838 }, { "epoch": 472.35820895522386, "grad_norm": 17.324962615966797, "learning_rate": 9.952876984126985e-06, "loss": 27.6531, "step": 19839 }, { "epoch": 472.3820895522388, "grad_norm": 20.60772705078125, "learning_rate": 9.952380952380954e-06, "loss": 29.3557, "step": 19840 }, { "epoch": 472.40597014925373, "grad_norm": 24.499656677246094, "learning_rate": 9.951884920634921e-06, "loss": 28.2457, "step": 19841 }, { "epoch": 472.42985074626864, "grad_norm": 21.65102195739746, "learning_rate": 9.95138888888889e-06, "loss": 29.89, "step": 19842 }, { "epoch": 472.4537313432836, "grad_norm": 18.991304397583008, "learning_rate": 9.950892857142858e-06, "loss": 29.5867, "step": 19843 }, { "epoch": 472.4776119402985, "grad_norm": 19.86419105529785, "learning_rate": 9.950396825396827e-06, "loss": 27.9715, "step": 19844 }, { "epoch": 472.5014925373134, "grad_norm": 27.514719009399414, "learning_rate": 9.949900793650794e-06, "loss": 29.1708, "step": 19845 }, { "epoch": 472.52537313432833, "grad_norm": 18.95916175842285, "learning_rate": 9.949404761904763e-06, "loss": 28.9427, "step": 19846 }, { "epoch": 472.5492537313433, "grad_norm": 23.226207733154297, "learning_rate": 9.94890873015873e-06, "loss": 30.4151, "step": 19847 }, { "epoch": 472.5731343283582, "grad_norm": 23.497516632080078, "learning_rate": 9.9484126984127e-06, "loss": 29.2947, "step": 19848 }, { "epoch": 472.5970149253731, "grad_norm": 22.172489166259766, "learning_rate": 9.947916666666667e-06, "loss": 28.4934, "step": 19849 }, { "epoch": 472.6208955223881, "grad_norm": 19.863224029541016, "learning_rate": 9.947420634920636e-06, "loss": 28.3109, "step": 19850 }, { "epoch": 472.644776119403, "grad_norm": 18.293636322021484, "learning_rate": 9.946924603174603e-06, "loss": 28.9519, "step": 19851 }, { "epoch": 472.6686567164179, "grad_norm": 21.69944953918457, "learning_rate": 9.946428571428572e-06, "loss": 28.6514, "step": 19852 }, { "epoch": 472.6925373134328, "grad_norm": 24.533340454101562, "learning_rate": 9.94593253968254e-06, "loss": 28.7479, "step": 19853 }, { "epoch": 472.7164179104478, "grad_norm": 24.13123893737793, "learning_rate": 9.945436507936509e-06, "loss": 29.8382, "step": 19854 }, { "epoch": 472.7402985074627, "grad_norm": 19.28775978088379, "learning_rate": 9.944940476190476e-06, "loss": 28.5312, "step": 19855 }, { "epoch": 472.7641791044776, "grad_norm": 19.832151412963867, "learning_rate": 9.944444444444445e-06, "loss": 29.2102, "step": 19856 }, { "epoch": 472.78805970149256, "grad_norm": 16.026643753051758, "learning_rate": 9.943948412698414e-06, "loss": 28.8383, "step": 19857 }, { "epoch": 472.81194029850747, "grad_norm": 21.212610244750977, "learning_rate": 9.943452380952381e-06, "loss": 29.9367, "step": 19858 }, { "epoch": 472.8358208955224, "grad_norm": 25.468170166015625, "learning_rate": 9.94295634920635e-06, "loss": 29.3612, "step": 19859 }, { "epoch": 472.85970149253734, "grad_norm": 20.614086151123047, "learning_rate": 9.94246031746032e-06, "loss": 29.7618, "step": 19860 }, { "epoch": 472.88358208955225, "grad_norm": 17.519723892211914, "learning_rate": 9.941964285714287e-06, "loss": 28.7966, "step": 19861 }, { "epoch": 472.90746268656716, "grad_norm": 18.790908813476562, "learning_rate": 9.941468253968256e-06, "loss": 28.4451, "step": 19862 }, { "epoch": 472.93134328358207, "grad_norm": 20.16123390197754, "learning_rate": 9.940972222222223e-06, "loss": 27.9365, "step": 19863 }, { "epoch": 472.95522388059703, "grad_norm": 20.411623001098633, "learning_rate": 9.940476190476192e-06, "loss": 28.1928, "step": 19864 }, { "epoch": 472.97910447761194, "grad_norm": 22.377485275268555, "learning_rate": 9.93998015873016e-06, "loss": 30.2059, "step": 19865 }, { "epoch": 473.0, "grad_norm": 17.700414657592773, "learning_rate": 9.939484126984128e-06, "loss": 25.3126, "step": 19866 }, { "epoch": 473.0238805970149, "grad_norm": 18.218631744384766, "learning_rate": 9.938988095238096e-06, "loss": 28.2624, "step": 19867 }, { "epoch": 473.0477611940299, "grad_norm": 17.341609954833984, "learning_rate": 9.938492063492065e-06, "loss": 29.6349, "step": 19868 }, { "epoch": 473.0716417910448, "grad_norm": 23.280887603759766, "learning_rate": 9.937996031746032e-06, "loss": 29.0063, "step": 19869 }, { "epoch": 473.0955223880597, "grad_norm": 21.830177307128906, "learning_rate": 9.937500000000001e-06, "loss": 27.977, "step": 19870 }, { "epoch": 473.1194029850746, "grad_norm": 18.94339370727539, "learning_rate": 9.937003968253968e-06, "loss": 28.7315, "step": 19871 }, { "epoch": 473.14328358208957, "grad_norm": 20.223987579345703, "learning_rate": 9.936507936507937e-06, "loss": 29.0587, "step": 19872 }, { "epoch": 473.1671641791045, "grad_norm": 21.869497299194336, "learning_rate": 9.936011904761905e-06, "loss": 29.2118, "step": 19873 }, { "epoch": 473.1910447761194, "grad_norm": 20.000415802001953, "learning_rate": 9.935515873015874e-06, "loss": 29.1046, "step": 19874 }, { "epoch": 473.21492537313435, "grad_norm": 22.175703048706055, "learning_rate": 9.935019841269841e-06, "loss": 29.1038, "step": 19875 }, { "epoch": 473.23880597014926, "grad_norm": 21.373899459838867, "learning_rate": 9.93452380952381e-06, "loss": 29.3149, "step": 19876 }, { "epoch": 473.26268656716417, "grad_norm": 18.517169952392578, "learning_rate": 9.934027777777779e-06, "loss": 28.2975, "step": 19877 }, { "epoch": 473.28656716417913, "grad_norm": 22.586498260498047, "learning_rate": 9.933531746031746e-06, "loss": 29.3894, "step": 19878 }, { "epoch": 473.31044776119404, "grad_norm": 23.222373962402344, "learning_rate": 9.933035714285715e-06, "loss": 29.3994, "step": 19879 }, { "epoch": 473.33432835820895, "grad_norm": 19.342426300048828, "learning_rate": 9.932539682539684e-06, "loss": 29.0664, "step": 19880 }, { "epoch": 473.35820895522386, "grad_norm": 17.627635955810547, "learning_rate": 9.932043650793652e-06, "loss": 28.2929, "step": 19881 }, { "epoch": 473.3820895522388, "grad_norm": 17.554208755493164, "learning_rate": 9.93154761904762e-06, "loss": 29.0218, "step": 19882 }, { "epoch": 473.40597014925373, "grad_norm": 19.768505096435547, "learning_rate": 9.931051587301588e-06, "loss": 29.2426, "step": 19883 }, { "epoch": 473.42985074626864, "grad_norm": 22.89387321472168, "learning_rate": 9.930555555555557e-06, "loss": 29.3041, "step": 19884 }, { "epoch": 473.4537313432836, "grad_norm": 21.837268829345703, "learning_rate": 9.930059523809524e-06, "loss": 28.4128, "step": 19885 }, { "epoch": 473.4776119402985, "grad_norm": 18.35175132751465, "learning_rate": 9.929563492063493e-06, "loss": 28.4128, "step": 19886 }, { "epoch": 473.5014925373134, "grad_norm": 20.350643157958984, "learning_rate": 9.92906746031746e-06, "loss": 28.5945, "step": 19887 }, { "epoch": 473.52537313432833, "grad_norm": 25.532493591308594, "learning_rate": 9.92857142857143e-06, "loss": 29.3331, "step": 19888 }, { "epoch": 473.5492537313433, "grad_norm": 22.639394760131836, "learning_rate": 9.928075396825397e-06, "loss": 28.8922, "step": 19889 }, { "epoch": 473.5731343283582, "grad_norm": 18.845840454101562, "learning_rate": 9.927579365079366e-06, "loss": 28.9763, "step": 19890 }, { "epoch": 473.5970149253731, "grad_norm": 20.70524024963379, "learning_rate": 9.927083333333334e-06, "loss": 28.8228, "step": 19891 }, { "epoch": 473.6208955223881, "grad_norm": 20.09345054626465, "learning_rate": 9.926587301587303e-06, "loss": 30.0134, "step": 19892 }, { "epoch": 473.644776119403, "grad_norm": 18.419227600097656, "learning_rate": 9.92609126984127e-06, "loss": 28.8919, "step": 19893 }, { "epoch": 473.6686567164179, "grad_norm": 20.23253059387207, "learning_rate": 9.925595238095239e-06, "loss": 29.3936, "step": 19894 }, { "epoch": 473.6925373134328, "grad_norm": 21.38014030456543, "learning_rate": 9.925099206349206e-06, "loss": 29.2112, "step": 19895 }, { "epoch": 473.7164179104478, "grad_norm": 17.452327728271484, "learning_rate": 9.924603174603175e-06, "loss": 28.479, "step": 19896 }, { "epoch": 473.7402985074627, "grad_norm": 18.69632339477539, "learning_rate": 9.924107142857143e-06, "loss": 28.6107, "step": 19897 }, { "epoch": 473.7641791044776, "grad_norm": 23.400474548339844, "learning_rate": 9.923611111111112e-06, "loss": 29.3534, "step": 19898 }, { "epoch": 473.78805970149256, "grad_norm": 22.832151412963867, "learning_rate": 9.92311507936508e-06, "loss": 29.7381, "step": 19899 }, { "epoch": 473.81194029850747, "grad_norm": 21.019702911376953, "learning_rate": 9.922619047619048e-06, "loss": 30.0351, "step": 19900 }, { "epoch": 473.8358208955224, "grad_norm": 19.32085418701172, "learning_rate": 9.922123015873017e-06, "loss": 29.6092, "step": 19901 }, { "epoch": 473.85970149253734, "grad_norm": 23.206087112426758, "learning_rate": 9.921626984126986e-06, "loss": 28.4535, "step": 19902 }, { "epoch": 473.88358208955225, "grad_norm": 23.671852111816406, "learning_rate": 9.921130952380953e-06, "loss": 28.3459, "step": 19903 }, { "epoch": 473.90746268656716, "grad_norm": 23.82563018798828, "learning_rate": 9.920634920634922e-06, "loss": 28.7524, "step": 19904 }, { "epoch": 473.93134328358207, "grad_norm": 19.196388244628906, "learning_rate": 9.92013888888889e-06, "loss": 28.5036, "step": 19905 }, { "epoch": 473.95522388059703, "grad_norm": 26.328235626220703, "learning_rate": 9.919642857142859e-06, "loss": 29.4687, "step": 19906 }, { "epoch": 473.97910447761194, "grad_norm": 26.21993064880371, "learning_rate": 9.919146825396826e-06, "loss": 28.2377, "step": 19907 }, { "epoch": 474.0, "grad_norm": 16.734630584716797, "learning_rate": 9.918650793650795e-06, "loss": 25.2416, "step": 19908 }, { "epoch": 474.0238805970149, "grad_norm": 29.282976150512695, "learning_rate": 9.918154761904762e-06, "loss": 29.2753, "step": 19909 }, { "epoch": 474.0477611940299, "grad_norm": NaN, "learning_rate": 9.917658730158731e-06, "loss": 31.4288, "step": 19910 }, { "epoch": 474.0716417910448, "grad_norm": 24.734886169433594, "learning_rate": 9.917658730158731e-06, "loss": 28.5829, "step": 19911 }, { "epoch": 474.0955223880597, "grad_norm": 20.89947509765625, "learning_rate": 9.917162698412699e-06, "loss": 28.845, "step": 19912 }, { "epoch": 474.1194029850746, "grad_norm": 35.58835983276367, "learning_rate": 9.916666666666668e-06, "loss": 27.8796, "step": 19913 }, { "epoch": 474.14328358208957, "grad_norm": 23.77728271484375, "learning_rate": 9.916170634920635e-06, "loss": 28.512, "step": 19914 }, { "epoch": 474.1671641791045, "grad_norm": 30.654644012451172, "learning_rate": 9.915674603174604e-06, "loss": 29.0223, "step": 19915 }, { "epoch": 474.1910447761194, "grad_norm": 25.15546989440918, "learning_rate": 9.915178571428571e-06, "loss": 28.6673, "step": 19916 }, { "epoch": 474.21492537313435, "grad_norm": 23.76650619506836, "learning_rate": 9.91468253968254e-06, "loss": 28.6103, "step": 19917 }, { "epoch": 474.23880597014926, "grad_norm": 29.17532730102539, "learning_rate": 9.914186507936508e-06, "loss": 28.7368, "step": 19918 }, { "epoch": 474.26268656716417, "grad_norm": 24.020946502685547, "learning_rate": 9.913690476190477e-06, "loss": 29.6255, "step": 19919 }, { "epoch": 474.28656716417913, "grad_norm": 28.878032684326172, "learning_rate": 9.913194444444446e-06, "loss": 28.709, "step": 19920 }, { "epoch": 474.31044776119404, "grad_norm": 26.31392478942871, "learning_rate": 9.912698412698413e-06, "loss": 27.5826, "step": 19921 }, { "epoch": 474.33432835820895, "grad_norm": 20.45071792602539, "learning_rate": 9.912202380952382e-06, "loss": 29.0132, "step": 19922 }, { "epoch": 474.35820895522386, "grad_norm": 30.28704261779785, "learning_rate": 9.911706349206351e-06, "loss": 28.136, "step": 19923 }, { "epoch": 474.3820895522388, "grad_norm": 23.57769203186035, "learning_rate": 9.911210317460318e-06, "loss": 29.5262, "step": 19924 }, { "epoch": 474.40597014925373, "grad_norm": 23.563297271728516, "learning_rate": 9.910714285714288e-06, "loss": 29.6456, "step": 19925 }, { "epoch": 474.42985074626864, "grad_norm": 33.386070251464844, "learning_rate": 9.910218253968255e-06, "loss": 29.4737, "step": 19926 }, { "epoch": 474.4537313432836, "grad_norm": 21.406179428100586, "learning_rate": 9.909722222222224e-06, "loss": 28.2609, "step": 19927 }, { "epoch": 474.4776119402985, "grad_norm": 44.05112075805664, "learning_rate": 9.909226190476191e-06, "loss": 29.8127, "step": 19928 }, { "epoch": 474.5014925373134, "grad_norm": 32.098793029785156, "learning_rate": 9.90873015873016e-06, "loss": 29.9435, "step": 19929 }, { "epoch": 474.52537313432833, "grad_norm": 46.76020431518555, "learning_rate": 9.908234126984128e-06, "loss": 28.7744, "step": 19930 }, { "epoch": 474.5492537313433, "grad_norm": 35.252777099609375, "learning_rate": 9.907738095238097e-06, "loss": 28.6496, "step": 19931 }, { "epoch": 474.5731343283582, "grad_norm": 42.62713623046875, "learning_rate": 9.907242063492064e-06, "loss": 28.1506, "step": 19932 }, { "epoch": 474.5970149253731, "grad_norm": 39.93196487426758, "learning_rate": 9.906746031746033e-06, "loss": 29.6986, "step": 19933 }, { "epoch": 474.6208955223881, "grad_norm": 40.19843292236328, "learning_rate": 9.90625e-06, "loss": 29.1044, "step": 19934 }, { "epoch": 474.644776119403, "grad_norm": 36.8906135559082, "learning_rate": 9.90575396825397e-06, "loss": 29.5219, "step": 19935 }, { "epoch": 474.6686567164179, "grad_norm": 38.332191467285156, "learning_rate": 9.905257936507937e-06, "loss": 28.0681, "step": 19936 }, { "epoch": 474.6925373134328, "grad_norm": 32.24909210205078, "learning_rate": 9.904761904761906e-06, "loss": 28.9423, "step": 19937 }, { "epoch": 474.7164179104478, "grad_norm": 39.6190185546875, "learning_rate": 9.904265873015873e-06, "loss": 29.4578, "step": 19938 }, { "epoch": 474.7402985074627, "grad_norm": 33.68924331665039, "learning_rate": 9.903769841269842e-06, "loss": 28.9885, "step": 19939 }, { "epoch": 474.7641791044776, "grad_norm": 35.39975357055664, "learning_rate": 9.90327380952381e-06, "loss": 28.5214, "step": 19940 }, { "epoch": 474.78805970149256, "grad_norm": 32.87039566040039, "learning_rate": 9.902777777777778e-06, "loss": 29.1563, "step": 19941 }, { "epoch": 474.81194029850747, "grad_norm": 36.44541549682617, "learning_rate": 9.902281746031747e-06, "loss": 29.7335, "step": 19942 }, { "epoch": 474.8358208955224, "grad_norm": 31.115802764892578, "learning_rate": 9.901785714285715e-06, "loss": 29.9665, "step": 19943 }, { "epoch": 474.85970149253734, "grad_norm": 37.52363204956055, "learning_rate": 9.901289682539684e-06, "loss": 29.4497, "step": 19944 }, { "epoch": 474.88358208955225, "grad_norm": 32.975311279296875, "learning_rate": 9.900793650793653e-06, "loss": 28.9101, "step": 19945 }, { "epoch": 474.90746268656716, "grad_norm": 40.700645446777344, "learning_rate": 9.90029761904762e-06, "loss": 29.1758, "step": 19946 }, { "epoch": 474.93134328358207, "grad_norm": 37.00098419189453, "learning_rate": 9.899801587301589e-06, "loss": 28.6901, "step": 19947 }, { "epoch": 474.95522388059703, "grad_norm": 38.5326042175293, "learning_rate": 9.899305555555556e-06, "loss": 28.2591, "step": 19948 }, { "epoch": 474.97910447761194, "grad_norm": 32.32034683227539, "learning_rate": 9.898809523809525e-06, "loss": 28.9759, "step": 19949 }, { "epoch": 475.0, "grad_norm": 35.301578521728516, "learning_rate": 9.898313492063493e-06, "loss": 25.369, "step": 19950 }, { "epoch": 475.0238805970149, "grad_norm": 32.51556396484375, "learning_rate": 9.897817460317462e-06, "loss": 29.2318, "step": 19951 }, { "epoch": 475.0477611940299, "grad_norm": 37.05495071411133, "learning_rate": 9.897321428571429e-06, "loss": 28.9011, "step": 19952 }, { "epoch": 475.0716417910448, "grad_norm": 31.503154754638672, "learning_rate": 9.896825396825398e-06, "loss": 28.8056, "step": 19953 }, { "epoch": 475.0955223880597, "grad_norm": 35.54788589477539, "learning_rate": 9.896329365079365e-06, "loss": 28.4992, "step": 19954 }, { "epoch": 475.1194029850746, "grad_norm": 28.8798828125, "learning_rate": 9.895833333333334e-06, "loss": 28.3148, "step": 19955 }, { "epoch": 475.14328358208957, "grad_norm": 40.07311248779297, "learning_rate": 9.895337301587302e-06, "loss": 27.6682, "step": 19956 }, { "epoch": 475.1671641791045, "grad_norm": 34.4341926574707, "learning_rate": 9.89484126984127e-06, "loss": 29.1752, "step": 19957 }, { "epoch": 475.1910447761194, "grad_norm": 37.17250061035156, "learning_rate": 9.894345238095238e-06, "loss": 28.7507, "step": 19958 }, { "epoch": 475.21492537313435, "grad_norm": 33.938350677490234, "learning_rate": 9.893849206349207e-06, "loss": 30.2095, "step": 19959 }, { "epoch": 475.23880597014926, "grad_norm": NaN, "learning_rate": 9.893353174603174e-06, "loss": 33.8526, "step": 19960 }, { "epoch": 475.26268656716417, "grad_norm": 34.27640914916992, "learning_rate": 9.893353174603174e-06, "loss": 28.6695, "step": 19961 }, { "epoch": 475.28656716417913, "grad_norm": 31.252012252807617, "learning_rate": 9.892857142857143e-06, "loss": 28.197, "step": 19962 }, { "epoch": 475.31044776119404, "grad_norm": 31.371944427490234, "learning_rate": 9.892361111111113e-06, "loss": 28.1965, "step": 19963 }, { "epoch": 475.33432835820895, "grad_norm": 25.581090927124023, "learning_rate": 9.89186507936508e-06, "loss": 28.0676, "step": 19964 }, { "epoch": 475.35820895522386, "grad_norm": 29.677453994750977, "learning_rate": 9.891369047619049e-06, "loss": 28.3714, "step": 19965 }, { "epoch": 475.3820895522388, "grad_norm": 23.091285705566406, "learning_rate": 9.890873015873018e-06, "loss": 28.379, "step": 19966 }, { "epoch": 475.40597014925373, "grad_norm": 30.91316032409668, "learning_rate": 9.890376984126985e-06, "loss": 28.7257, "step": 19967 }, { "epoch": 475.42985074626864, "grad_norm": 22.67203712463379, "learning_rate": 9.889880952380954e-06, "loss": 28.7978, "step": 19968 }, { "epoch": 475.4537313432836, "grad_norm": 35.88056182861328, "learning_rate": 9.889384920634922e-06, "loss": 29.3278, "step": 19969 }, { "epoch": 475.4776119402985, "grad_norm": 26.078975677490234, "learning_rate": 9.88888888888889e-06, "loss": 29.1941, "step": 19970 }, { "epoch": 475.5014925373134, "grad_norm": 33.55527114868164, "learning_rate": 9.888392857142858e-06, "loss": 28.5448, "step": 19971 }, { "epoch": 475.52537313432833, "grad_norm": 28.407503128051758, "learning_rate": 9.887896825396827e-06, "loss": 29.4596, "step": 19972 }, { "epoch": 475.5492537313433, "grad_norm": 32.13499450683594, "learning_rate": 9.887400793650794e-06, "loss": 29.3326, "step": 19973 }, { "epoch": 475.5731343283582, "grad_norm": 31.87044906616211, "learning_rate": 9.886904761904763e-06, "loss": 29.7177, "step": 19974 }, { "epoch": 475.5970149253731, "grad_norm": 25.34290885925293, "learning_rate": 9.88640873015873e-06, "loss": 28.7635, "step": 19975 }, { "epoch": 475.6208955223881, "grad_norm": 23.515888214111328, "learning_rate": 9.8859126984127e-06, "loss": 28.8062, "step": 19976 }, { "epoch": 475.644776119403, "grad_norm": 27.76143455505371, "learning_rate": 9.885416666666667e-06, "loss": 29.7828, "step": 19977 }, { "epoch": 475.6686567164179, "grad_norm": 22.57871437072754, "learning_rate": 9.884920634920636e-06, "loss": 28.4372, "step": 19978 }, { "epoch": 475.6925373134328, "grad_norm": 26.393295288085938, "learning_rate": 9.884424603174603e-06, "loss": 27.7356, "step": 19979 }, { "epoch": 475.7164179104478, "grad_norm": 20.88528823852539, "learning_rate": 9.883928571428572e-06, "loss": 28.6534, "step": 19980 }, { "epoch": 475.7402985074627, "grad_norm": 22.21483039855957, "learning_rate": 9.88343253968254e-06, "loss": 28.7174, "step": 19981 }, { "epoch": 475.7641791044776, "grad_norm": 21.612873077392578, "learning_rate": 9.882936507936509e-06, "loss": 28.7254, "step": 19982 }, { "epoch": 475.78805970149256, "grad_norm": 22.080617904663086, "learning_rate": 9.882440476190478e-06, "loss": 27.7633, "step": 19983 }, { "epoch": 475.81194029850747, "grad_norm": 18.41670799255371, "learning_rate": 9.881944444444445e-06, "loss": 29.2582, "step": 19984 }, { "epoch": 475.8358208955224, "grad_norm": 22.295820236206055, "learning_rate": 9.881448412698414e-06, "loss": 30.2879, "step": 19985 }, { "epoch": 475.85970149253734, "grad_norm": 18.671546936035156, "learning_rate": 9.880952380952381e-06, "loss": 29.5194, "step": 19986 }, { "epoch": 475.88358208955225, "grad_norm": 19.915266036987305, "learning_rate": 9.88045634920635e-06, "loss": 29.694, "step": 19987 }, { "epoch": 475.90746268656716, "grad_norm": 22.668834686279297, "learning_rate": 9.87996031746032e-06, "loss": 28.4325, "step": 19988 }, { "epoch": 475.93134328358207, "grad_norm": 18.24365234375, "learning_rate": 9.879464285714287e-06, "loss": 28.606, "step": 19989 }, { "epoch": 475.95522388059703, "grad_norm": 23.580520629882812, "learning_rate": 9.878968253968256e-06, "loss": 28.9731, "step": 19990 }, { "epoch": 475.97910447761194, "grad_norm": 19.860042572021484, "learning_rate": 9.878472222222223e-06, "loss": 29.1227, "step": 19991 }, { "epoch": 476.0, "grad_norm": 19.978227615356445, "learning_rate": 9.877976190476192e-06, "loss": 25.9231, "step": 19992 }, { "epoch": 476.0238805970149, "grad_norm": 22.569692611694336, "learning_rate": 9.87748015873016e-06, "loss": 27.907, "step": 19993 }, { "epoch": 476.0477611940299, "grad_norm": 21.620622634887695, "learning_rate": 9.876984126984128e-06, "loss": 29.0792, "step": 19994 }, { "epoch": 476.0716417910448, "grad_norm": 21.59153175354004, "learning_rate": 9.876488095238096e-06, "loss": 28.8125, "step": 19995 }, { "epoch": 476.0955223880597, "grad_norm": 21.304000854492188, "learning_rate": 9.875992063492065e-06, "loss": 29.3301, "step": 19996 }, { "epoch": 476.1194029850746, "grad_norm": 24.158193588256836, "learning_rate": 9.875496031746032e-06, "loss": 28.0898, "step": 19997 }, { "epoch": 476.14328358208957, "grad_norm": 21.428970336914062, "learning_rate": 9.875000000000001e-06, "loss": 29.3261, "step": 19998 }, { "epoch": 476.1671641791045, "grad_norm": 18.84784698486328, "learning_rate": 9.874503968253968e-06, "loss": 29.0128, "step": 19999 }, { "epoch": 476.1910447761194, "grad_norm": 24.298227310180664, "learning_rate": 9.874007936507938e-06, "loss": 28.3589, "step": 20000 }, { "epoch": 476.21492537313435, "grad_norm": 19.766069412231445, "learning_rate": 9.873511904761905e-06, "loss": 29.2432, "step": 20001 }, { "epoch": 476.23880597014926, "grad_norm": 26.47551727294922, "learning_rate": 9.873015873015874e-06, "loss": 28.7378, "step": 20002 }, { "epoch": 476.26268656716417, "grad_norm": 20.200963973999023, "learning_rate": 9.872519841269841e-06, "loss": 29.2443, "step": 20003 }, { "epoch": 476.28656716417913, "grad_norm": 28.267419815063477, "learning_rate": 9.87202380952381e-06, "loss": 29.3882, "step": 20004 }, { "epoch": 476.31044776119404, "grad_norm": 22.640520095825195, "learning_rate": 9.87152777777778e-06, "loss": 28.9684, "step": 20005 }, { "epoch": 476.33432835820895, "grad_norm": 24.50528335571289, "learning_rate": 9.871031746031747e-06, "loss": 27.7658, "step": 20006 }, { "epoch": 476.35820895522386, "grad_norm": 21.654760360717773, "learning_rate": 9.870535714285716e-06, "loss": 28.2696, "step": 20007 }, { "epoch": 476.3820895522388, "grad_norm": 25.23786735534668, "learning_rate": 9.870039682539685e-06, "loss": 30.2183, "step": 20008 }, { "epoch": 476.40597014925373, "grad_norm": 22.403377532958984, "learning_rate": 9.869543650793652e-06, "loss": 29.3872, "step": 20009 }, { "epoch": 476.42985074626864, "grad_norm": 23.16541290283203, "learning_rate": 9.869047619047621e-06, "loss": 28.9051, "step": 20010 }, { "epoch": 476.4537313432836, "grad_norm": 20.560955047607422, "learning_rate": 9.868551587301588e-06, "loss": 29.3676, "step": 20011 }, { "epoch": 476.4776119402985, "grad_norm": 24.604143142700195, "learning_rate": 9.868055555555557e-06, "loss": 27.4335, "step": 20012 }, { "epoch": 476.5014925373134, "grad_norm": 24.74250602722168, "learning_rate": 9.867559523809525e-06, "loss": 28.7853, "step": 20013 }, { "epoch": 476.52537313432833, "grad_norm": 19.583236694335938, "learning_rate": 9.867063492063494e-06, "loss": 28.1116, "step": 20014 }, { "epoch": 476.5492537313433, "grad_norm": 21.19641876220703, "learning_rate": 9.866567460317461e-06, "loss": 29.1223, "step": 20015 }, { "epoch": 476.5731343283582, "grad_norm": 19.40864372253418, "learning_rate": 9.86607142857143e-06, "loss": 27.8285, "step": 20016 }, { "epoch": 476.5970149253731, "grad_norm": 21.315650939941406, "learning_rate": 9.865575396825397e-06, "loss": 28.6917, "step": 20017 }, { "epoch": 476.6208955223881, "grad_norm": 22.075759887695312, "learning_rate": 9.865079365079366e-06, "loss": 29.8995, "step": 20018 }, { "epoch": 476.644776119403, "grad_norm": 21.8599910736084, "learning_rate": 9.864583333333334e-06, "loss": 29.654, "step": 20019 }, { "epoch": 476.6686567164179, "grad_norm": 22.064714431762695, "learning_rate": 9.864087301587303e-06, "loss": 28.276, "step": 20020 }, { "epoch": 476.6925373134328, "grad_norm": 21.046098709106445, "learning_rate": 9.86359126984127e-06, "loss": 29.0298, "step": 20021 }, { "epoch": 476.7164179104478, "grad_norm": 21.310020446777344, "learning_rate": 9.863095238095239e-06, "loss": 28.8067, "step": 20022 }, { "epoch": 476.7402985074627, "grad_norm": 24.117107391357422, "learning_rate": 9.862599206349206e-06, "loss": 28.5039, "step": 20023 }, { "epoch": 476.7641791044776, "grad_norm": 25.02859115600586, "learning_rate": 9.862103174603175e-06, "loss": 28.9914, "step": 20024 }, { "epoch": 476.78805970149256, "grad_norm": 20.155445098876953, "learning_rate": 9.861607142857144e-06, "loss": 28.7562, "step": 20025 }, { "epoch": 476.81194029850747, "grad_norm": 19.630525588989258, "learning_rate": 9.861111111111112e-06, "loss": 28.8357, "step": 20026 }, { "epoch": 476.8358208955224, "grad_norm": 21.64138412475586, "learning_rate": 9.86061507936508e-06, "loss": 29.5638, "step": 20027 }, { "epoch": 476.85970149253734, "grad_norm": 23.30614471435547, "learning_rate": 9.860119047619048e-06, "loss": 28.9632, "step": 20028 }, { "epoch": 476.88358208955225, "grad_norm": 23.495332717895508, "learning_rate": 9.859623015873017e-06, "loss": 29.3263, "step": 20029 }, { "epoch": 476.90746268656716, "grad_norm": 20.316940307617188, "learning_rate": 9.859126984126986e-06, "loss": 29.6812, "step": 20030 }, { "epoch": 476.93134328358207, "grad_norm": 28.81653594970703, "learning_rate": 9.858630952380953e-06, "loss": 28.3243, "step": 20031 }, { "epoch": 476.95522388059703, "grad_norm": 24.493192672729492, "learning_rate": 9.858134920634922e-06, "loss": 29.9693, "step": 20032 }, { "epoch": 476.97910447761194, "grad_norm": 19.490127563476562, "learning_rate": 9.85763888888889e-06, "loss": 28.6086, "step": 20033 }, { "epoch": 477.0, "grad_norm": 23.5108585357666, "learning_rate": 9.857142857142859e-06, "loss": 25.3471, "step": 20034 }, { "epoch": 477.0238805970149, "grad_norm": 22.95199203491211, "learning_rate": 9.856646825396826e-06, "loss": 29.0635, "step": 20035 }, { "epoch": 477.0477611940299, "grad_norm": 19.895967483520508, "learning_rate": 9.856150793650795e-06, "loss": 27.7474, "step": 20036 }, { "epoch": 477.0716417910448, "grad_norm": 29.656370162963867, "learning_rate": 9.855654761904763e-06, "loss": 28.8574, "step": 20037 }, { "epoch": 477.0955223880597, "grad_norm": 22.4112606048584, "learning_rate": 9.855158730158732e-06, "loss": 27.7741, "step": 20038 }, { "epoch": 477.1194029850746, "grad_norm": 17.50308609008789, "learning_rate": 9.854662698412699e-06, "loss": 28.3282, "step": 20039 }, { "epoch": 477.14328358208957, "grad_norm": 27.75299835205078, "learning_rate": 9.854166666666668e-06, "loss": 28.9853, "step": 20040 }, { "epoch": 477.1671641791045, "grad_norm": 20.684833526611328, "learning_rate": 9.853670634920635e-06, "loss": 28.7685, "step": 20041 }, { "epoch": 477.1910447761194, "grad_norm": 20.125850677490234, "learning_rate": 9.853174603174604e-06, "loss": 29.2388, "step": 20042 }, { "epoch": 477.21492537313435, "grad_norm": 20.25307273864746, "learning_rate": 9.852678571428572e-06, "loss": 28.4089, "step": 20043 }, { "epoch": 477.23880597014926, "grad_norm": 20.54622459411621, "learning_rate": 9.85218253968254e-06, "loss": 28.733, "step": 20044 }, { "epoch": 477.26268656716417, "grad_norm": 22.199771881103516, "learning_rate": 9.851686507936508e-06, "loss": 27.6529, "step": 20045 }, { "epoch": 477.28656716417913, "grad_norm": 19.888200759887695, "learning_rate": 9.851190476190477e-06, "loss": 28.8039, "step": 20046 }, { "epoch": 477.31044776119404, "grad_norm": 21.05242347717285, "learning_rate": 9.850694444444446e-06, "loss": 28.9922, "step": 20047 }, { "epoch": 477.33432835820895, "grad_norm": 27.259843826293945, "learning_rate": 9.850198412698413e-06, "loss": 29.7257, "step": 20048 }, { "epoch": 477.35820895522386, "grad_norm": 20.52132225036621, "learning_rate": 9.849702380952382e-06, "loss": 28.9505, "step": 20049 }, { "epoch": 477.3820895522388, "grad_norm": 19.856258392333984, "learning_rate": 9.849206349206351e-06, "loss": 28.2457, "step": 20050 }, { "epoch": 477.40597014925373, "grad_norm": 18.868064880371094, "learning_rate": 9.848710317460319e-06, "loss": 28.5497, "step": 20051 }, { "epoch": 477.42985074626864, "grad_norm": 19.866289138793945, "learning_rate": 9.848214285714288e-06, "loss": 29.4852, "step": 20052 }, { "epoch": 477.4537313432836, "grad_norm": 28.517311096191406, "learning_rate": 9.847718253968255e-06, "loss": 29.2419, "step": 20053 }, { "epoch": 477.4776119402985, "grad_norm": 21.656312942504883, "learning_rate": 9.847222222222224e-06, "loss": 29.0787, "step": 20054 }, { "epoch": 477.5014925373134, "grad_norm": 18.255203247070312, "learning_rate": 9.846726190476191e-06, "loss": 28.1134, "step": 20055 }, { "epoch": 477.52537313432833, "grad_norm": 24.236461639404297, "learning_rate": 9.84623015873016e-06, "loss": 29.1855, "step": 20056 }, { "epoch": 477.5492537313433, "grad_norm": 25.48997688293457, "learning_rate": 9.845734126984128e-06, "loss": 29.3841, "step": 20057 }, { "epoch": 477.5731343283582, "grad_norm": 18.344154357910156, "learning_rate": 9.845238095238097e-06, "loss": 29.1679, "step": 20058 }, { "epoch": 477.5970149253731, "grad_norm": 18.593276977539062, "learning_rate": 9.844742063492064e-06, "loss": 28.5664, "step": 20059 }, { "epoch": 477.6208955223881, "grad_norm": 21.04168128967285, "learning_rate": 9.844246031746033e-06, "loss": 28.4966, "step": 20060 }, { "epoch": 477.644776119403, "grad_norm": 18.83087921142578, "learning_rate": 9.84375e-06, "loss": 29.0124, "step": 20061 }, { "epoch": 477.6686567164179, "grad_norm": 19.876909255981445, "learning_rate": 9.843253968253968e-06, "loss": 30.396, "step": 20062 }, { "epoch": 477.6925373134328, "grad_norm": 19.70854377746582, "learning_rate": 9.842757936507937e-06, "loss": 28.3864, "step": 20063 }, { "epoch": 477.7164179104478, "grad_norm": 20.15097427368164, "learning_rate": 9.842261904761906e-06, "loss": 29.7295, "step": 20064 }, { "epoch": 477.7402985074627, "grad_norm": 21.8720703125, "learning_rate": 9.841765873015873e-06, "loss": 28.3605, "step": 20065 }, { "epoch": 477.7641791044776, "grad_norm": 22.32832145690918, "learning_rate": 9.841269841269842e-06, "loss": 28.2719, "step": 20066 }, { "epoch": 477.78805970149256, "grad_norm": 19.60017204284668, "learning_rate": 9.840773809523811e-06, "loss": 29.868, "step": 20067 }, { "epoch": 477.81194029850747, "grad_norm": 20.21356964111328, "learning_rate": 9.840277777777778e-06, "loss": 29.5714, "step": 20068 }, { "epoch": 477.8358208955224, "grad_norm": 18.702484130859375, "learning_rate": 9.839781746031747e-06, "loss": 28.3179, "step": 20069 }, { "epoch": 477.85970149253734, "grad_norm": 20.100799560546875, "learning_rate": 9.839285714285715e-06, "loss": 28.2994, "step": 20070 }, { "epoch": 477.88358208955225, "grad_norm": 22.726518630981445, "learning_rate": 9.838789682539684e-06, "loss": 28.7268, "step": 20071 }, { "epoch": 477.90746268656716, "grad_norm": 21.930782318115234, "learning_rate": 9.838293650793651e-06, "loss": 28.8636, "step": 20072 }, { "epoch": 477.93134328358207, "grad_norm": 21.225326538085938, "learning_rate": 9.83779761904762e-06, "loss": 30.4687, "step": 20073 }, { "epoch": 477.95522388059703, "grad_norm": 19.335861206054688, "learning_rate": 9.837301587301588e-06, "loss": 28.9519, "step": 20074 }, { "epoch": 477.97910447761194, "grad_norm": 22.18132209777832, "learning_rate": 9.836805555555557e-06, "loss": 29.2359, "step": 20075 }, { "epoch": 478.0, "grad_norm": 19.979711532592773, "learning_rate": 9.836309523809524e-06, "loss": 24.009, "step": 20076 }, { "epoch": 478.0238805970149, "grad_norm": 22.19866180419922, "learning_rate": 9.835813492063493e-06, "loss": 29.613, "step": 20077 }, { "epoch": 478.0477611940299, "grad_norm": 19.449928283691406, "learning_rate": 9.83531746031746e-06, "loss": 28.087, "step": 20078 }, { "epoch": 478.0716417910448, "grad_norm": 22.50609588623047, "learning_rate": 9.83482142857143e-06, "loss": 29.3478, "step": 20079 }, { "epoch": 478.0955223880597, "grad_norm": 18.857681274414062, "learning_rate": 9.834325396825397e-06, "loss": 29.2307, "step": 20080 }, { "epoch": 478.1194029850746, "grad_norm": 22.512802124023438, "learning_rate": 9.833829365079366e-06, "loss": 28.9821, "step": 20081 }, { "epoch": 478.14328358208957, "grad_norm": 19.95056915283203, "learning_rate": 9.833333333333333e-06, "loss": 28.8229, "step": 20082 }, { "epoch": 478.1671641791045, "grad_norm": 20.649404525756836, "learning_rate": 9.832837301587302e-06, "loss": 28.5079, "step": 20083 }, { "epoch": 478.1910447761194, "grad_norm": 25.335416793823242, "learning_rate": 9.832341269841271e-06, "loss": 29.3005, "step": 20084 }, { "epoch": 478.21492537313435, "grad_norm": 21.936880111694336, "learning_rate": 9.831845238095238e-06, "loss": 28.1536, "step": 20085 }, { "epoch": 478.23880597014926, "grad_norm": 19.70534324645996, "learning_rate": 9.831349206349207e-06, "loss": 27.9557, "step": 20086 }, { "epoch": 478.26268656716417, "grad_norm": 18.259496688842773, "learning_rate": 9.830853174603175e-06, "loss": 29.722, "step": 20087 }, { "epoch": 478.28656716417913, "grad_norm": 19.366130828857422, "learning_rate": 9.830357142857144e-06, "loss": 28.5794, "step": 20088 }, { "epoch": 478.31044776119404, "grad_norm": 23.058292388916016, "learning_rate": 9.829861111111113e-06, "loss": 29.5271, "step": 20089 }, { "epoch": 478.33432835820895, "grad_norm": 22.784347534179688, "learning_rate": 9.82936507936508e-06, "loss": 29.2457, "step": 20090 }, { "epoch": 478.35820895522386, "grad_norm": 25.689149856567383, "learning_rate": 9.828869047619049e-06, "loss": 27.8423, "step": 20091 }, { "epoch": 478.3820895522388, "grad_norm": 19.807090759277344, "learning_rate": 9.828373015873016e-06, "loss": 29.0071, "step": 20092 }, { "epoch": 478.40597014925373, "grad_norm": 23.231769561767578, "learning_rate": 9.827876984126985e-06, "loss": 29.1063, "step": 20093 }, { "epoch": 478.42985074626864, "grad_norm": 21.45319366455078, "learning_rate": 9.827380952380953e-06, "loss": 29.4278, "step": 20094 }, { "epoch": 478.4537313432836, "grad_norm": 19.159465789794922, "learning_rate": 9.826884920634922e-06, "loss": 28.6459, "step": 20095 }, { "epoch": 478.4776119402985, "grad_norm": 26.168617248535156, "learning_rate": 9.826388888888889e-06, "loss": 29.1004, "step": 20096 }, { "epoch": 478.5014925373134, "grad_norm": 21.362560272216797, "learning_rate": 9.825892857142858e-06, "loss": 28.7545, "step": 20097 }, { "epoch": 478.52537313432833, "grad_norm": 23.432018280029297, "learning_rate": 9.825396825396825e-06, "loss": 27.9458, "step": 20098 }, { "epoch": 478.5492537313433, "grad_norm": 23.440340042114258, "learning_rate": 9.824900793650794e-06, "loss": 29.1027, "step": 20099 }, { "epoch": 478.5731343283582, "grad_norm": 21.281414031982422, "learning_rate": 9.824404761904762e-06, "loss": 30.8618, "step": 20100 }, { "epoch": 478.5970149253731, "grad_norm": 20.597326278686523, "learning_rate": 9.82390873015873e-06, "loss": 28.6904, "step": 20101 }, { "epoch": 478.6208955223881, "grad_norm": 19.968257904052734, "learning_rate": 9.823412698412698e-06, "loss": 28.4608, "step": 20102 }, { "epoch": 478.644776119403, "grad_norm": 22.74517059326172, "learning_rate": 9.822916666666667e-06, "loss": 28.3677, "step": 20103 }, { "epoch": 478.6686567164179, "grad_norm": 24.303682327270508, "learning_rate": 9.822420634920634e-06, "loss": 28.1006, "step": 20104 }, { "epoch": 478.6925373134328, "grad_norm": 18.249107360839844, "learning_rate": 9.821924603174603e-06, "loss": 28.6453, "step": 20105 }, { "epoch": 478.7164179104478, "grad_norm": 19.763927459716797, "learning_rate": 9.821428571428573e-06, "loss": 28.2593, "step": 20106 }, { "epoch": 478.7402985074627, "grad_norm": 18.220224380493164, "learning_rate": 9.82093253968254e-06, "loss": 28.911, "step": 20107 }, { "epoch": 478.7641791044776, "grad_norm": 27.74159049987793, "learning_rate": 9.820436507936509e-06, "loss": 29.954, "step": 20108 }, { "epoch": 478.78805970149256, "grad_norm": 21.56827163696289, "learning_rate": 9.819940476190478e-06, "loss": 29.8835, "step": 20109 }, { "epoch": 478.81194029850747, "grad_norm": 18.558122634887695, "learning_rate": 9.819444444444445e-06, "loss": 29.7678, "step": 20110 }, { "epoch": 478.8358208955224, "grad_norm": 21.06109046936035, "learning_rate": 9.818948412698414e-06, "loss": 28.6846, "step": 20111 }, { "epoch": 478.85970149253734, "grad_norm": 28.406217575073242, "learning_rate": 9.818452380952382e-06, "loss": 28.5815, "step": 20112 }, { "epoch": 478.88358208955225, "grad_norm": 22.025068283081055, "learning_rate": 9.81795634920635e-06, "loss": 28.0421, "step": 20113 }, { "epoch": 478.90746268656716, "grad_norm": 17.224693298339844, "learning_rate": 9.817460317460318e-06, "loss": 28.3245, "step": 20114 }, { "epoch": 478.93134328358207, "grad_norm": 19.429710388183594, "learning_rate": 9.816964285714287e-06, "loss": 27.8414, "step": 20115 }, { "epoch": 478.95522388059703, "grad_norm": 17.449539184570312, "learning_rate": 9.816468253968254e-06, "loss": 28.0465, "step": 20116 }, { "epoch": 478.97910447761194, "grad_norm": 27.281818389892578, "learning_rate": 9.815972222222223e-06, "loss": 28.7879, "step": 20117 }, { "epoch": 479.0, "grad_norm": 25.82404899597168, "learning_rate": 9.81547619047619e-06, "loss": 24.6422, "step": 20118 }, { "epoch": 479.0238805970149, "grad_norm": 21.03105926513672, "learning_rate": 9.81498015873016e-06, "loss": 29.8213, "step": 20119 }, { "epoch": 479.0477611940299, "grad_norm": 22.494203567504883, "learning_rate": 9.814484126984127e-06, "loss": 29.0493, "step": 20120 }, { "epoch": 479.0716417910448, "grad_norm": 20.890405654907227, "learning_rate": 9.813988095238096e-06, "loss": 29.0215, "step": 20121 }, { "epoch": 479.0955223880597, "grad_norm": 20.07906150817871, "learning_rate": 9.813492063492063e-06, "loss": 29.1978, "step": 20122 }, { "epoch": 479.1194029850746, "grad_norm": 22.30605697631836, "learning_rate": 9.812996031746032e-06, "loss": 29.0661, "step": 20123 }, { "epoch": 479.14328358208957, "grad_norm": 19.23296356201172, "learning_rate": 9.8125e-06, "loss": 28.1018, "step": 20124 }, { "epoch": 479.1671641791045, "grad_norm": 20.519678115844727, "learning_rate": 9.812003968253969e-06, "loss": 28.8164, "step": 20125 }, { "epoch": 479.1910447761194, "grad_norm": 19.62612533569336, "learning_rate": 9.811507936507938e-06, "loss": 27.9606, "step": 20126 }, { "epoch": 479.21492537313435, "grad_norm": 19.5782527923584, "learning_rate": 9.811011904761905e-06, "loss": 29.158, "step": 20127 }, { "epoch": 479.23880597014926, "grad_norm": 16.35541534423828, "learning_rate": 9.810515873015874e-06, "loss": 28.2425, "step": 20128 }, { "epoch": 479.26268656716417, "grad_norm": 22.22014617919922, "learning_rate": 9.810019841269841e-06, "loss": 28.7745, "step": 20129 }, { "epoch": 479.28656716417913, "grad_norm": 18.676942825317383, "learning_rate": 9.80952380952381e-06, "loss": 28.3816, "step": 20130 }, { "epoch": 479.31044776119404, "grad_norm": 30.270145416259766, "learning_rate": 9.80902777777778e-06, "loss": 29.5513, "step": 20131 }, { "epoch": 479.33432835820895, "grad_norm": 23.516271591186523, "learning_rate": 9.808531746031747e-06, "loss": 28.6869, "step": 20132 }, { "epoch": 479.35820895522386, "grad_norm": 20.43653106689453, "learning_rate": 9.808035714285716e-06, "loss": 28.7659, "step": 20133 }, { "epoch": 479.3820895522388, "grad_norm": 21.9481201171875, "learning_rate": 9.807539682539683e-06, "loss": 29.1735, "step": 20134 }, { "epoch": 479.40597014925373, "grad_norm": 24.646465301513672, "learning_rate": 9.807043650793652e-06, "loss": 28.0261, "step": 20135 }, { "epoch": 479.42985074626864, "grad_norm": 23.00975799560547, "learning_rate": 9.80654761904762e-06, "loss": 28.7564, "step": 20136 }, { "epoch": 479.4537313432836, "grad_norm": 18.543306350708008, "learning_rate": 9.806051587301588e-06, "loss": 29.3112, "step": 20137 }, { "epoch": 479.4776119402985, "grad_norm": 16.890037536621094, "learning_rate": 9.805555555555556e-06, "loss": 28.7261, "step": 20138 }, { "epoch": 479.5014925373134, "grad_norm": 19.709848403930664, "learning_rate": 9.805059523809525e-06, "loss": 29.159, "step": 20139 }, { "epoch": 479.52537313432833, "grad_norm": 16.527368545532227, "learning_rate": 9.804563492063492e-06, "loss": 28.9869, "step": 20140 }, { "epoch": 479.5492537313433, "grad_norm": 18.89594841003418, "learning_rate": 9.804067460317461e-06, "loss": 28.1829, "step": 20141 }, { "epoch": 479.5731343283582, "grad_norm": 18.616872787475586, "learning_rate": 9.803571428571428e-06, "loss": 29.0325, "step": 20142 }, { "epoch": 479.5970149253731, "grad_norm": 20.08157730102539, "learning_rate": 9.803075396825398e-06, "loss": 28.6679, "step": 20143 }, { "epoch": 479.6208955223881, "grad_norm": 27.74882698059082, "learning_rate": 9.802579365079365e-06, "loss": 29.4991, "step": 20144 }, { "epoch": 479.644776119403, "grad_norm": 19.18621253967285, "learning_rate": 9.802083333333334e-06, "loss": 29.9387, "step": 20145 }, { "epoch": 479.6686567164179, "grad_norm": 18.57462501525879, "learning_rate": 9.801587301587301e-06, "loss": 29.6671, "step": 20146 }, { "epoch": 479.6925373134328, "grad_norm": 29.752283096313477, "learning_rate": 9.80109126984127e-06, "loss": 29.3933, "step": 20147 }, { "epoch": 479.7164179104478, "grad_norm": 21.03801155090332, "learning_rate": 9.80059523809524e-06, "loss": 28.3396, "step": 20148 }, { "epoch": 479.7402985074627, "grad_norm": 23.719074249267578, "learning_rate": 9.800099206349207e-06, "loss": 29.3868, "step": 20149 }, { "epoch": 479.7641791044776, "grad_norm": 28.864248275756836, "learning_rate": 9.799603174603176e-06, "loss": 28.5527, "step": 20150 }, { "epoch": 479.78805970149256, "grad_norm": 20.859743118286133, "learning_rate": 9.799107142857145e-06, "loss": 28.4296, "step": 20151 }, { "epoch": 479.81194029850747, "grad_norm": 29.062795639038086, "learning_rate": 9.798611111111112e-06, "loss": 28.5259, "step": 20152 }, { "epoch": 479.8358208955224, "grad_norm": 30.0617733001709, "learning_rate": 9.798115079365081e-06, "loss": 28.4228, "step": 20153 }, { "epoch": 479.85970149253734, "grad_norm": 19.43954086303711, "learning_rate": 9.797619047619048e-06, "loss": 29.3653, "step": 20154 }, { "epoch": 479.88358208955225, "grad_norm": 35.291465759277344, "learning_rate": 9.797123015873017e-06, "loss": 28.4925, "step": 20155 }, { "epoch": 479.90746268656716, "grad_norm": 24.626693725585938, "learning_rate": 9.796626984126985e-06, "loss": 28.4904, "step": 20156 }, { "epoch": 479.93134328358207, "grad_norm": 21.620014190673828, "learning_rate": 9.796130952380954e-06, "loss": 28.1416, "step": 20157 }, { "epoch": 479.95522388059703, "grad_norm": 32.271175384521484, "learning_rate": 9.795634920634921e-06, "loss": 29.3895, "step": 20158 }, { "epoch": 479.97910447761194, "grad_norm": 22.050737380981445, "learning_rate": 9.79513888888889e-06, "loss": 27.0958, "step": 20159 }, { "epoch": 480.0, "grad_norm": 26.27474021911621, "learning_rate": 9.794642857142857e-06, "loss": 24.0007, "step": 20160 }, { "epoch": 480.0, "step": 20160, "total_flos": 9.910422294428713e+17, "train_loss": 0.6043554852879237, "train_runtime": 12834.8117, "train_samples_per_second": 200.156, "train_steps_per_second": 1.571 }, { "epoch": 480.0238805970149, "grad_norm": 25.723268508911133, "learning_rate": 1e-05, "loss": 28.8431, "step": 20161 }, { "epoch": 480.0477611940299, "grad_norm": Infinity, "learning_rate": 9.99952380952381e-06, "loss": 35.1322, "step": 20162 }, { "epoch": 480.0716417910448, "grad_norm": 354.3346862792969, "learning_rate": 9.99952380952381e-06, "loss": 34.9322, "step": 20163 }, { "epoch": 480.0955223880597, "grad_norm": 165.04383850097656, "learning_rate": 9.99904761904762e-06, "loss": 34.7653, "step": 20164 }, { "epoch": 480.1194029850746, "grad_norm": 109.95718383789062, "learning_rate": 9.99857142857143e-06, "loss": 31.5661, "step": 20165 }, { "epoch": 480.14328358208957, "grad_norm": 63.13902282714844, "learning_rate": 9.998095238095239e-06, "loss": 31.0271, "step": 20166 }, { "epoch": 480.1671641791045, "grad_norm": 64.72493743896484, "learning_rate": 9.997619047619048e-06, "loss": 29.8589, "step": 20167 }, { "epoch": 480.1910447761194, "grad_norm": 59.331504821777344, "learning_rate": 9.997142857142858e-06, "loss": 30.1191, "step": 20168 }, { "epoch": 480.21492537313435, "grad_norm": 47.057796478271484, "learning_rate": 9.996666666666669e-06, "loss": 28.2344, "step": 20169 }, { "epoch": 480.23880597014926, "grad_norm": 54.76762390136719, "learning_rate": 9.996190476190476e-06, "loss": 29.8349, "step": 20170 }, { "epoch": 480.26268656716417, "grad_norm": 39.76959228515625, "learning_rate": 9.995714285714286e-06, "loss": 29.6354, "step": 20171 }, { "epoch": 480.28656716417913, "grad_norm": 33.12776565551758, "learning_rate": 9.995238095238095e-06, "loss": 29.2695, "step": 20172 }, { "epoch": 480.31044776119404, "grad_norm": 38.09038543701172, "learning_rate": 9.994761904761906e-06, "loss": 29.9943, "step": 20173 }, { "epoch": 480.33432835820895, "grad_norm": 36.06444549560547, "learning_rate": 9.994285714285716e-06, "loss": 28.1836, "step": 20174 }, { "epoch": 480.35820895522386, "grad_norm": 25.416536331176758, "learning_rate": 9.993809523809525e-06, "loss": 29.6217, "step": 20175 }, { "epoch": 480.3820895522388, "grad_norm": 24.371889114379883, "learning_rate": 9.993333333333333e-06, "loss": 28.5817, "step": 20176 }, { "epoch": 480.40597014925373, "grad_norm": 25.52541160583496, "learning_rate": 9.992857142857144e-06, "loss": 28.6827, "step": 20177 }, { "epoch": 480.42985074626864, "grad_norm": 26.93755531311035, "learning_rate": 9.992380952380954e-06, "loss": 28.4247, "step": 20178 }, { "epoch": 480.4537313432836, "grad_norm": 20.399927139282227, "learning_rate": 9.991904761904763e-06, "loss": 27.8332, "step": 20179 }, { "epoch": 480.4776119402985, "grad_norm": 24.121658325195312, "learning_rate": 9.991428571428573e-06, "loss": 29.8494, "step": 20180 }, { "epoch": 480.5014925373134, "grad_norm": 26.94188117980957, "learning_rate": 9.990952380952382e-06, "loss": 27.6666, "step": 20181 }, { "epoch": 480.52537313432833, "grad_norm": 28.049680709838867, "learning_rate": 9.990476190476191e-06, "loss": 29.3276, "step": 20182 }, { "epoch": 480.5492537313433, "grad_norm": 22.38313102722168, "learning_rate": 9.990000000000001e-06, "loss": 28.5654, "step": 20183 }, { "epoch": 480.5731343283582, "grad_norm": 20.977689743041992, "learning_rate": 9.98952380952381e-06, "loss": 29.1452, "step": 20184 }, { "epoch": 480.5970149253731, "grad_norm": 33.072044372558594, "learning_rate": 9.98904761904762e-06, "loss": 29.0129, "step": 20185 }, { "epoch": 480.6208955223881, "grad_norm": 21.850934982299805, "learning_rate": 9.98857142857143e-06, "loss": 29.0877, "step": 20186 }, { "epoch": 480.644776119403, "grad_norm": 23.96761131286621, "learning_rate": 9.988095238095239e-06, "loss": 29.4476, "step": 20187 }, { "epoch": 480.6686567164179, "grad_norm": 27.09062385559082, "learning_rate": 9.987619047619048e-06, "loss": 28.699, "step": 20188 }, { "epoch": 480.6925373134328, "grad_norm": 21.538965225219727, "learning_rate": 9.987142857142858e-06, "loss": 28.6378, "step": 20189 }, { "epoch": 480.7164179104478, "grad_norm": 20.351478576660156, "learning_rate": 9.986666666666667e-06, "loss": 28.7142, "step": 20190 }, { "epoch": 480.7402985074627, "grad_norm": 20.446372985839844, "learning_rate": 9.986190476190476e-06, "loss": 28.7609, "step": 20191 }, { "epoch": 480.7641791044776, "grad_norm": 21.336627960205078, "learning_rate": 9.985714285714286e-06, "loss": 29.5122, "step": 20192 }, { "epoch": 480.78805970149256, "grad_norm": 20.075637817382812, "learning_rate": 9.985238095238095e-06, "loss": 30.1867, "step": 20193 }, { "epoch": 480.81194029850747, "grad_norm": 19.935693740844727, "learning_rate": 9.984761904761907e-06, "loss": 28.3563, "step": 20194 }, { "epoch": 480.8358208955224, "grad_norm": 19.169353485107422, "learning_rate": 9.984285714285716e-06, "loss": 29.1849, "step": 20195 }, { "epoch": 480.85970149253734, "grad_norm": 18.909347534179688, "learning_rate": 9.983809523809524e-06, "loss": 28.4189, "step": 20196 }, { "epoch": 480.88358208955225, "grad_norm": 20.45849609375, "learning_rate": 9.983333333333333e-06, "loss": 27.9645, "step": 20197 }, { "epoch": 480.90746268656716, "grad_norm": 22.947221755981445, "learning_rate": 9.982857142857144e-06, "loss": 29.8432, "step": 20198 }, { "epoch": 480.93134328358207, "grad_norm": 26.371912002563477, "learning_rate": 9.982380952380954e-06, "loss": 28.4873, "step": 20199 }, { "epoch": 480.95522388059703, "grad_norm": 18.589427947998047, "learning_rate": 9.981904761904763e-06, "loss": 28.0994, "step": 20200 }, { "epoch": 480.97910447761194, "grad_norm": 24.336427688598633, "learning_rate": 9.981428571428573e-06, "loss": 28.7166, "step": 20201 }, { "epoch": 481.0, "grad_norm": 24.16462516784668, "learning_rate": 9.980952380952382e-06, "loss": 25.196, "step": 20202 }, { "epoch": 481.0238805970149, "grad_norm": 23.438302993774414, "learning_rate": 9.980476190476192e-06, "loss": 29.0592, "step": 20203 }, { "epoch": 481.0477611940299, "grad_norm": 18.65182113647461, "learning_rate": 9.980000000000001e-06, "loss": 28.9313, "step": 20204 }, { "epoch": 481.0716417910448, "grad_norm": 18.319074630737305, "learning_rate": 9.97952380952381e-06, "loss": 29.251, "step": 20205 }, { "epoch": 481.0955223880597, "grad_norm": 24.235713958740234, "learning_rate": 9.97904761904762e-06, "loss": 27.6206, "step": 20206 }, { "epoch": 481.1194029850746, "grad_norm": 27.445629119873047, "learning_rate": 9.97857142857143e-06, "loss": 28.9451, "step": 20207 }, { "epoch": 481.14328358208957, "grad_norm": 20.418249130249023, "learning_rate": 9.978095238095239e-06, "loss": 29.3195, "step": 20208 }, { "epoch": 481.1671641791045, "grad_norm": 23.20700454711914, "learning_rate": 9.977619047619048e-06, "loss": 29.2675, "step": 20209 }, { "epoch": 481.1910447761194, "grad_norm": 23.95560073852539, "learning_rate": 9.977142857142858e-06, "loss": 28.6586, "step": 20210 }, { "epoch": 481.21492537313435, "grad_norm": 24.961809158325195, "learning_rate": 9.976666666666667e-06, "loss": 28.7666, "step": 20211 }, { "epoch": 481.23880597014926, "grad_norm": 23.189964294433594, "learning_rate": 9.976190476190477e-06, "loss": 30.0668, "step": 20212 }, { "epoch": 481.26268656716417, "grad_norm": 21.233089447021484, "learning_rate": 9.975714285714286e-06, "loss": 28.9257, "step": 20213 }, { "epoch": 481.28656716417913, "grad_norm": 23.495697021484375, "learning_rate": 9.975238095238095e-06, "loss": 29.7954, "step": 20214 }, { "epoch": 481.31044776119404, "grad_norm": 25.266977310180664, "learning_rate": 9.974761904761907e-06, "loss": 28.7672, "step": 20215 }, { "epoch": 481.33432835820895, "grad_norm": 20.896425247192383, "learning_rate": 9.974285714285716e-06, "loss": 28.2724, "step": 20216 }, { "epoch": 481.35820895522386, "grad_norm": 25.76570701599121, "learning_rate": 9.973809523809524e-06, "loss": 29.8421, "step": 20217 }, { "epoch": 481.3820895522388, "grad_norm": 23.29555892944336, "learning_rate": 9.973333333333333e-06, "loss": 28.3823, "step": 20218 }, { "epoch": 481.40597014925373, "grad_norm": NaN, "learning_rate": 9.972857142857144e-06, "loss": 32.3143, "step": 20219 }, { "epoch": 481.42985074626864, "grad_norm": 21.067678451538086, "learning_rate": 9.972857142857144e-06, "loss": 28.7548, "step": 20220 }, { "epoch": 481.4537313432836, "grad_norm": 18.583986282348633, "learning_rate": 9.972380952380954e-06, "loss": 27.9842, "step": 20221 }, { "epoch": 481.4776119402985, "grad_norm": 20.916494369506836, "learning_rate": 9.971904761904763e-06, "loss": 29.4441, "step": 20222 }, { "epoch": 481.5014925373134, "grad_norm": 21.48413848876953, "learning_rate": 9.971428571428571e-06, "loss": 28.9537, "step": 20223 }, { "epoch": 481.52537313432833, "grad_norm": 20.850095748901367, "learning_rate": 9.970952380952382e-06, "loss": 28.9254, "step": 20224 }, { "epoch": 481.5492537313433, "grad_norm": 24.294078826904297, "learning_rate": 9.970476190476192e-06, "loss": 28.6475, "step": 20225 }, { "epoch": 481.5731343283582, "grad_norm": 17.041154861450195, "learning_rate": 9.970000000000001e-06, "loss": 30.0491, "step": 20226 }, { "epoch": 481.5970149253731, "grad_norm": 26.550004959106445, "learning_rate": 9.96952380952381e-06, "loss": 27.7713, "step": 20227 }, { "epoch": 481.6208955223881, "grad_norm": 19.902069091796875, "learning_rate": 9.96904761904762e-06, "loss": 28.627, "step": 20228 }, { "epoch": 481.644776119403, "grad_norm": 23.518800735473633, "learning_rate": 9.96857142857143e-06, "loss": 27.6447, "step": 20229 }, { "epoch": 481.6686567164179, "grad_norm": 21.663528442382812, "learning_rate": 9.968095238095239e-06, "loss": 28.5816, "step": 20230 }, { "epoch": 481.6925373134328, "grad_norm": 21.660457611083984, "learning_rate": 9.967619047619048e-06, "loss": 28.4233, "step": 20231 }, { "epoch": 481.7164179104478, "grad_norm": 22.633602142333984, "learning_rate": 9.967142857142858e-06, "loss": 28.6538, "step": 20232 }, { "epoch": 481.7402985074627, "grad_norm": 23.07989501953125, "learning_rate": 9.966666666666667e-06, "loss": 28.9498, "step": 20233 }, { "epoch": 481.7641791044776, "grad_norm": 21.25726318359375, "learning_rate": 9.966190476190477e-06, "loss": 28.4038, "step": 20234 }, { "epoch": 481.78805970149256, "grad_norm": 22.63221549987793, "learning_rate": 9.965714285714286e-06, "loss": 29.569, "step": 20235 }, { "epoch": 481.81194029850747, "grad_norm": 25.31451988220215, "learning_rate": 9.965238095238096e-06, "loss": 27.8984, "step": 20236 }, { "epoch": 481.8358208955224, "grad_norm": 22.274919509887695, "learning_rate": 9.964761904761907e-06, "loss": 27.8957, "step": 20237 }, { "epoch": 481.85970149253734, "grad_norm": 21.37106704711914, "learning_rate": 9.964285714285714e-06, "loss": 29.1181, "step": 20238 }, { "epoch": 481.88358208955225, "grad_norm": 19.64527130126953, "learning_rate": 9.963809523809524e-06, "loss": 29.1078, "step": 20239 }, { "epoch": 481.90746268656716, "grad_norm": 27.223262786865234, "learning_rate": 9.963333333333333e-06, "loss": 29.5314, "step": 20240 }, { "epoch": 481.93134328358207, "grad_norm": 24.425275802612305, "learning_rate": 9.962857142857145e-06, "loss": 28.494, "step": 20241 }, { "epoch": 481.95522388059703, "grad_norm": 17.718719482421875, "learning_rate": 9.962380952380954e-06, "loss": 27.4268, "step": 20242 }, { "epoch": 481.97910447761194, "grad_norm": 20.2794132232666, "learning_rate": 9.961904761904763e-06, "loss": 27.382, "step": 20243 }, { "epoch": 482.0, "grad_norm": 19.360593795776367, "learning_rate": 9.961428571428571e-06, "loss": 25.089, "step": 20244 }, { "epoch": 482.0238805970149, "grad_norm": 28.829143524169922, "learning_rate": 9.960952380952382e-06, "loss": 27.3767, "step": 20245 }, { "epoch": 482.0477611940299, "grad_norm": 19.257179260253906, "learning_rate": 9.960476190476192e-06, "loss": 28.5036, "step": 20246 }, { "epoch": 482.0716417910448, "grad_norm": 22.647342681884766, "learning_rate": 9.960000000000001e-06, "loss": 29.3721, "step": 20247 }, { "epoch": 482.0955223880597, "grad_norm": 31.89955711364746, "learning_rate": 9.95952380952381e-06, "loss": 28.0881, "step": 20248 }, { "epoch": 482.1194029850746, "grad_norm": 21.83026695251465, "learning_rate": 9.95904761904762e-06, "loss": 29.3653, "step": 20249 }, { "epoch": 482.14328358208957, "grad_norm": 23.611486434936523, "learning_rate": 9.95857142857143e-06, "loss": 29.412, "step": 20250 }, { "epoch": 482.1671641791045, "grad_norm": 23.222190856933594, "learning_rate": 9.958095238095239e-06, "loss": 28.5036, "step": 20251 }, { "epoch": 482.1910447761194, "grad_norm": 25.836458206176758, "learning_rate": 9.957619047619048e-06, "loss": 27.9756, "step": 20252 }, { "epoch": 482.21492537313435, "grad_norm": 18.66162872314453, "learning_rate": 9.957142857142858e-06, "loss": 28.3837, "step": 20253 }, { "epoch": 482.23880597014926, "grad_norm": 22.503887176513672, "learning_rate": 9.956666666666667e-06, "loss": 28.5732, "step": 20254 }, { "epoch": 482.26268656716417, "grad_norm": 26.385324478149414, "learning_rate": 9.956190476190477e-06, "loss": 28.4468, "step": 20255 }, { "epoch": 482.28656716417913, "grad_norm": 21.18877410888672, "learning_rate": 9.955714285714286e-06, "loss": 28.5893, "step": 20256 }, { "epoch": 482.31044776119404, "grad_norm": 18.8259334564209, "learning_rate": 9.955238095238096e-06, "loss": 27.6395, "step": 20257 }, { "epoch": 482.33432835820895, "grad_norm": 20.971973419189453, "learning_rate": 9.954761904761905e-06, "loss": 28.9612, "step": 20258 }, { "epoch": 482.35820895522386, "grad_norm": 18.0950870513916, "learning_rate": 9.954285714285715e-06, "loss": 27.7092, "step": 20259 }, { "epoch": 482.3820895522388, "grad_norm": 19.162185668945312, "learning_rate": 9.953809523809524e-06, "loss": 29.8261, "step": 20260 }, { "epoch": 482.40597014925373, "grad_norm": 23.60042381286621, "learning_rate": 9.953333333333333e-06, "loss": 28.6059, "step": 20261 }, { "epoch": 482.42985074626864, "grad_norm": 20.095979690551758, "learning_rate": 9.952857142857145e-06, "loss": 29.9035, "step": 20262 }, { "epoch": 482.4537313432836, "grad_norm": 22.086477279663086, "learning_rate": 9.952380952380954e-06, "loss": 28.5024, "step": 20263 }, { "epoch": 482.4776119402985, "grad_norm": 24.75166893005371, "learning_rate": 9.951904761904762e-06, "loss": 29.2224, "step": 20264 }, { "epoch": 482.5014925373134, "grad_norm": 20.64609718322754, "learning_rate": 9.951428571428571e-06, "loss": 28.696, "step": 20265 }, { "epoch": 482.52537313432833, "grad_norm": 26.566701889038086, "learning_rate": 9.950952380952382e-06, "loss": 28.1785, "step": 20266 }, { "epoch": 482.5492537313433, "grad_norm": 25.09548568725586, "learning_rate": 9.950476190476192e-06, "loss": 27.9218, "step": 20267 }, { "epoch": 482.5731343283582, "grad_norm": 25.41645050048828, "learning_rate": 9.950000000000001e-06, "loss": 28.5791, "step": 20268 }, { "epoch": 482.5970149253731, "grad_norm": 19.318960189819336, "learning_rate": 9.94952380952381e-06, "loss": 28.3998, "step": 20269 }, { "epoch": 482.6208955223881, "grad_norm": 26.60614013671875, "learning_rate": 9.94904761904762e-06, "loss": 28.9331, "step": 20270 }, { "epoch": 482.644776119403, "grad_norm": 23.979753494262695, "learning_rate": 9.94857142857143e-06, "loss": 29.792, "step": 20271 }, { "epoch": 482.6686567164179, "grad_norm": 21.708065032958984, "learning_rate": 9.948095238095239e-06, "loss": 29.832, "step": 20272 }, { "epoch": 482.6925373134328, "grad_norm": 23.372556686401367, "learning_rate": 9.947619047619049e-06, "loss": 29.2101, "step": 20273 }, { "epoch": 482.7164179104478, "grad_norm": 19.557018280029297, "learning_rate": 9.947142857142858e-06, "loss": 27.8984, "step": 20274 }, { "epoch": 482.7402985074627, "grad_norm": 22.12640953063965, "learning_rate": 9.946666666666667e-06, "loss": 27.7433, "step": 20275 }, { "epoch": 482.7641791044776, "grad_norm": 21.11604881286621, "learning_rate": 9.946190476190477e-06, "loss": 28.19, "step": 20276 }, { "epoch": 482.78805970149256, "grad_norm": 18.87818717956543, "learning_rate": 9.945714285714286e-06, "loss": 28.2406, "step": 20277 }, { "epoch": 482.81194029850747, "grad_norm": 21.497812271118164, "learning_rate": 9.945238095238096e-06, "loss": 29.4871, "step": 20278 }, { "epoch": 482.8358208955224, "grad_norm": 23.409082412719727, "learning_rate": 9.944761904761905e-06, "loss": 29.5244, "step": 20279 }, { "epoch": 482.85970149253734, "grad_norm": 20.595369338989258, "learning_rate": 9.944285714285715e-06, "loss": 28.8418, "step": 20280 }, { "epoch": 482.88358208955225, "grad_norm": 20.518239974975586, "learning_rate": 9.943809523809524e-06, "loss": 29.9747, "step": 20281 }, { "epoch": 482.90746268656716, "grad_norm": 17.038326263427734, "learning_rate": 9.943333333333334e-06, "loss": 28.9432, "step": 20282 }, { "epoch": 482.93134328358207, "grad_norm": 22.083585739135742, "learning_rate": 9.942857142857145e-06, "loss": 28.8852, "step": 20283 }, { "epoch": 482.95522388059703, "grad_norm": 23.976177215576172, "learning_rate": 9.942380952380954e-06, "loss": 27.557, "step": 20284 }, { "epoch": 482.97910447761194, "grad_norm": 22.95453643798828, "learning_rate": 9.941904761904762e-06, "loss": 29.5857, "step": 20285 }, { "epoch": 483.0, "grad_norm": 23.480430603027344, "learning_rate": 9.941428571428571e-06, "loss": 25.0089, "step": 20286 }, { "epoch": 483.0238805970149, "grad_norm": 21.59208869934082, "learning_rate": 9.940952380952382e-06, "loss": 29.644, "step": 20287 }, { "epoch": 483.0477611940299, "grad_norm": 18.064453125, "learning_rate": 9.940476190476192e-06, "loss": 28.7579, "step": 20288 }, { "epoch": 483.0716417910448, "grad_norm": 21.085912704467773, "learning_rate": 9.940000000000001e-06, "loss": 29.2113, "step": 20289 }, { "epoch": 483.0955223880597, "grad_norm": 21.223039627075195, "learning_rate": 9.93952380952381e-06, "loss": 29.7188, "step": 20290 }, { "epoch": 483.1194029850746, "grad_norm": 20.278305053710938, "learning_rate": 9.93904761904762e-06, "loss": 27.8903, "step": 20291 }, { "epoch": 483.14328358208957, "grad_norm": 22.56578254699707, "learning_rate": 9.93857142857143e-06, "loss": 28.6667, "step": 20292 }, { "epoch": 483.1671641791045, "grad_norm": 17.27849769592285, "learning_rate": 9.93809523809524e-06, "loss": 29.0122, "step": 20293 }, { "epoch": 483.1910447761194, "grad_norm": 19.0905818939209, "learning_rate": 9.937619047619049e-06, "loss": 27.6678, "step": 20294 }, { "epoch": 483.21492537313435, "grad_norm": 20.384960174560547, "learning_rate": 9.937142857142858e-06, "loss": 29.6324, "step": 20295 }, { "epoch": 483.23880597014926, "grad_norm": 20.477943420410156, "learning_rate": 9.936666666666668e-06, "loss": 28.0528, "step": 20296 }, { "epoch": 483.26268656716417, "grad_norm": 23.83664894104004, "learning_rate": 9.936190476190477e-06, "loss": 29.0095, "step": 20297 }, { "epoch": 483.28656716417913, "grad_norm": 23.71938133239746, "learning_rate": 9.935714285714286e-06, "loss": 29.2503, "step": 20298 }, { "epoch": 483.31044776119404, "grad_norm": 22.315481185913086, "learning_rate": 9.935238095238096e-06, "loss": 27.6741, "step": 20299 }, { "epoch": 483.33432835820895, "grad_norm": 20.363277435302734, "learning_rate": 9.934761904761905e-06, "loss": 28.5877, "step": 20300 }, { "epoch": 483.35820895522386, "grad_norm": 19.49530792236328, "learning_rate": 9.934285714285715e-06, "loss": 28.5663, "step": 20301 }, { "epoch": 483.3820895522388, "grad_norm": 17.85773468017578, "learning_rate": 9.933809523809524e-06, "loss": 27.9447, "step": 20302 }, { "epoch": 483.40597014925373, "grad_norm": 22.018287658691406, "learning_rate": 9.933333333333334e-06, "loss": 29.4559, "step": 20303 }, { "epoch": 483.42985074626864, "grad_norm": 18.655183792114258, "learning_rate": 9.932857142857145e-06, "loss": 29.3967, "step": 20304 }, { "epoch": 483.4537313432836, "grad_norm": 21.200735092163086, "learning_rate": 9.932380952380953e-06, "loss": 28.3836, "step": 20305 }, { "epoch": 483.4776119402985, "grad_norm": 19.749969482421875, "learning_rate": 9.931904761904762e-06, "loss": 28.6249, "step": 20306 }, { "epoch": 483.5014925373134, "grad_norm": 25.089603424072266, "learning_rate": 9.931428571428571e-06, "loss": 28.801, "step": 20307 }, { "epoch": 483.52537313432833, "grad_norm": 18.289600372314453, "learning_rate": 9.930952380952383e-06, "loss": 29.2823, "step": 20308 }, { "epoch": 483.5492537313433, "grad_norm": 25.358701705932617, "learning_rate": 9.930476190476192e-06, "loss": 28.2523, "step": 20309 }, { "epoch": 483.5731343283582, "grad_norm": 22.941797256469727, "learning_rate": 9.930000000000001e-06, "loss": 28.3615, "step": 20310 }, { "epoch": 483.5970149253731, "grad_norm": 22.99311065673828, "learning_rate": 9.92952380952381e-06, "loss": 27.9251, "step": 20311 }, { "epoch": 483.6208955223881, "grad_norm": 21.468599319458008, "learning_rate": 9.92904761904762e-06, "loss": 28.2599, "step": 20312 }, { "epoch": 483.644776119403, "grad_norm": 19.710407257080078, "learning_rate": 9.92857142857143e-06, "loss": 28.5435, "step": 20313 }, { "epoch": 483.6686567164179, "grad_norm": 23.596385955810547, "learning_rate": 9.92809523809524e-06, "loss": 27.7124, "step": 20314 }, { "epoch": 483.6925373134328, "grad_norm": 31.100221633911133, "learning_rate": 9.927619047619049e-06, "loss": 28.8879, "step": 20315 }, { "epoch": 483.7164179104478, "grad_norm": 22.1812801361084, "learning_rate": 9.927142857142858e-06, "loss": 28.5651, "step": 20316 }, { "epoch": 483.7402985074627, "grad_norm": 17.228715896606445, "learning_rate": 9.926666666666668e-06, "loss": 28.5459, "step": 20317 }, { "epoch": 483.7641791044776, "grad_norm": 21.209434509277344, "learning_rate": 9.926190476190477e-06, "loss": 29.8293, "step": 20318 }, { "epoch": 483.78805970149256, "grad_norm": 23.542659759521484, "learning_rate": 9.925714285714287e-06, "loss": 28.6741, "step": 20319 }, { "epoch": 483.81194029850747, "grad_norm": 22.615947723388672, "learning_rate": 9.925238095238096e-06, "loss": 28.8616, "step": 20320 }, { "epoch": 483.8358208955224, "grad_norm": 24.083242416381836, "learning_rate": 9.924761904761905e-06, "loss": 28.2751, "step": 20321 }, { "epoch": 483.85970149253734, "grad_norm": 20.294309616088867, "learning_rate": 9.924285714285715e-06, "loss": 28.2588, "step": 20322 }, { "epoch": 483.88358208955225, "grad_norm": 18.410850524902344, "learning_rate": 9.923809523809524e-06, "loss": 29.1694, "step": 20323 }, { "epoch": 483.90746268656716, "grad_norm": 21.735889434814453, "learning_rate": 9.923333333333334e-06, "loss": 28.1465, "step": 20324 }, { "epoch": 483.93134328358207, "grad_norm": 26.238693237304688, "learning_rate": 9.922857142857145e-06, "loss": 28.5394, "step": 20325 }, { "epoch": 483.95522388059703, "grad_norm": 26.103525161743164, "learning_rate": 9.922380952380953e-06, "loss": 29.2001, "step": 20326 }, { "epoch": 483.97910447761194, "grad_norm": NaN, "learning_rate": 9.921904761904762e-06, "loss": 35.9329, "step": 20327 }, { "epoch": 484.0, "grad_norm": 15.225421905517578, "learning_rate": 9.921904761904762e-06, "loss": 24.8913, "step": 20328 }, { "epoch": 484.0238805970149, "grad_norm": 28.01540184020996, "learning_rate": 9.921428571428572e-06, "loss": 27.9046, "step": 20329 }, { "epoch": 484.0477611940299, "grad_norm": 28.59665870666504, "learning_rate": 9.920952380952383e-06, "loss": 28.599, "step": 20330 }, { "epoch": 484.0716417910448, "grad_norm": 23.521760940551758, "learning_rate": 9.920476190476192e-06, "loss": 28.9263, "step": 20331 }, { "epoch": 484.0955223880597, "grad_norm": 21.983776092529297, "learning_rate": 9.920000000000002e-06, "loss": 28.2692, "step": 20332 }, { "epoch": 484.1194029850746, "grad_norm": 31.300758361816406, "learning_rate": 9.91952380952381e-06, "loss": 29.3243, "step": 20333 }, { "epoch": 484.14328358208957, "grad_norm": 20.892271041870117, "learning_rate": 9.91904761904762e-06, "loss": 29.2818, "step": 20334 }, { "epoch": 484.1671641791045, "grad_norm": 23.302135467529297, "learning_rate": 9.91857142857143e-06, "loss": 28.3588, "step": 20335 }, { "epoch": 484.1910447761194, "grad_norm": 29.748947143554688, "learning_rate": 9.91809523809524e-06, "loss": 29.5011, "step": 20336 }, { "epoch": 484.21492537313435, "grad_norm": 18.145200729370117, "learning_rate": 9.917619047619049e-06, "loss": 28.4362, "step": 20337 }, { "epoch": 484.23880597014926, "grad_norm": 26.522932052612305, "learning_rate": 9.917142857142857e-06, "loss": 29.4296, "step": 20338 }, { "epoch": 484.26268656716417, "grad_norm": 26.99478530883789, "learning_rate": 9.916666666666668e-06, "loss": 27.5296, "step": 20339 }, { "epoch": 484.28656716417913, "grad_norm": 19.766443252563477, "learning_rate": 9.916190476190477e-06, "loss": 28.8853, "step": 20340 }, { "epoch": 484.31044776119404, "grad_norm": 19.78734016418457, "learning_rate": 9.915714285714287e-06, "loss": 28.6424, "step": 20341 }, { "epoch": 484.33432835820895, "grad_norm": 19.728219985961914, "learning_rate": 9.915238095238096e-06, "loss": 28.497, "step": 20342 }, { "epoch": 484.35820895522386, "grad_norm": 27.4100399017334, "learning_rate": 9.914761904761906e-06, "loss": 28.9369, "step": 20343 }, { "epoch": 484.3820895522388, "grad_norm": 23.836267471313477, "learning_rate": 9.914285714285715e-06, "loss": 28.3919, "step": 20344 }, { "epoch": 484.40597014925373, "grad_norm": 18.341026306152344, "learning_rate": 9.913809523809524e-06, "loss": 29.1768, "step": 20345 }, { "epoch": 484.42985074626864, "grad_norm": 20.07976531982422, "learning_rate": 9.913333333333334e-06, "loss": 28.0723, "step": 20346 }, { "epoch": 484.4537313432836, "grad_norm": 23.461803436279297, "learning_rate": 9.912857142857143e-06, "loss": 29.8764, "step": 20347 }, { "epoch": 484.4776119402985, "grad_norm": 29.059659957885742, "learning_rate": 9.912380952380953e-06, "loss": 28.338, "step": 20348 }, { "epoch": 484.5014925373134, "grad_norm": 19.423175811767578, "learning_rate": 9.911904761904762e-06, "loss": 28.5624, "step": 20349 }, { "epoch": 484.52537313432833, "grad_norm": 19.371070861816406, "learning_rate": 9.911428571428572e-06, "loss": 28.6114, "step": 20350 }, { "epoch": 484.5492537313433, "grad_norm": 26.106718063354492, "learning_rate": 9.910952380952383e-06, "loss": 29.1343, "step": 20351 }, { "epoch": 484.5731343283582, "grad_norm": 28.637704849243164, "learning_rate": 9.910476190476192e-06, "loss": 28.3051, "step": 20352 }, { "epoch": 484.5970149253731, "grad_norm": 18.507583618164062, "learning_rate": 9.91e-06, "loss": 28.5221, "step": 20353 }, { "epoch": 484.6208955223881, "grad_norm": 20.094993591308594, "learning_rate": 9.90952380952381e-06, "loss": 28.7028, "step": 20354 }, { "epoch": 484.644776119403, "grad_norm": 21.177268981933594, "learning_rate": 9.90904761904762e-06, "loss": 26.557, "step": 20355 }, { "epoch": 484.6686567164179, "grad_norm": 24.490766525268555, "learning_rate": 9.90857142857143e-06, "loss": 28.9953, "step": 20356 }, { "epoch": 484.6925373134328, "grad_norm": 15.803695678710938, "learning_rate": 9.90809523809524e-06, "loss": 28.6512, "step": 20357 }, { "epoch": 484.7164179104478, "grad_norm": 21.35489273071289, "learning_rate": 9.907619047619049e-06, "loss": 27.7016, "step": 20358 }, { "epoch": 484.7402985074627, "grad_norm": 30.8648681640625, "learning_rate": 9.907142857142858e-06, "loss": 27.564, "step": 20359 }, { "epoch": 484.7641791044776, "grad_norm": 21.425973892211914, "learning_rate": 9.906666666666668e-06, "loss": 28.3937, "step": 20360 }, { "epoch": 484.78805970149256, "grad_norm": 21.337797164916992, "learning_rate": 9.906190476190477e-06, "loss": 28.6335, "step": 20361 }, { "epoch": 484.81194029850747, "grad_norm": 23.279327392578125, "learning_rate": 9.905714285714287e-06, "loss": 28.7329, "step": 20362 }, { "epoch": 484.8358208955224, "grad_norm": 22.478771209716797, "learning_rate": 9.905238095238096e-06, "loss": 28.8378, "step": 20363 }, { "epoch": 484.85970149253734, "grad_norm": 20.696571350097656, "learning_rate": 9.904761904761906e-06, "loss": 28.3524, "step": 20364 }, { "epoch": 484.88358208955225, "grad_norm": 21.428403854370117, "learning_rate": 9.904285714285715e-06, "loss": 29.6248, "step": 20365 }, { "epoch": 484.90746268656716, "grad_norm": 22.643564224243164, "learning_rate": 9.903809523809524e-06, "loss": 29.5143, "step": 20366 }, { "epoch": 484.93134328358207, "grad_norm": 27.500896453857422, "learning_rate": 9.903333333333334e-06, "loss": 29.6034, "step": 20367 }, { "epoch": 484.95522388059703, "grad_norm": 18.018728256225586, "learning_rate": 9.902857142857143e-06, "loss": 27.6092, "step": 20368 }, { "epoch": 484.97910447761194, "grad_norm": 17.75319480895996, "learning_rate": 9.902380952380953e-06, "loss": 29.5504, "step": 20369 }, { "epoch": 485.0, "grad_norm": 21.668750762939453, "learning_rate": 9.901904761904762e-06, "loss": 24.5648, "step": 20370 }, { "epoch": 485.0238805970149, "grad_norm": 22.175403594970703, "learning_rate": 9.901428571428572e-06, "loss": 28.5532, "step": 20371 }, { "epoch": 485.0477611940299, "grad_norm": 19.6533203125, "learning_rate": 9.900952380952383e-06, "loss": 28.4196, "step": 20372 }, { "epoch": 485.0716417910448, "grad_norm": 29.342243194580078, "learning_rate": 9.90047619047619e-06, "loss": 29.3159, "step": 20373 }, { "epoch": 485.0955223880597, "grad_norm": 28.117246627807617, "learning_rate": 9.9e-06, "loss": 27.3075, "step": 20374 }, { "epoch": 485.1194029850746, "grad_norm": 17.812196731567383, "learning_rate": 9.89952380952381e-06, "loss": 27.3631, "step": 20375 }, { "epoch": 485.14328358208957, "grad_norm": 19.44521713256836, "learning_rate": 9.89904761904762e-06, "loss": 28.0409, "step": 20376 }, { "epoch": 485.1671641791045, "grad_norm": 27.97115707397461, "learning_rate": 9.89857142857143e-06, "loss": 29.0385, "step": 20377 }, { "epoch": 485.1910447761194, "grad_norm": 22.871797561645508, "learning_rate": 9.89809523809524e-06, "loss": 29.0847, "step": 20378 }, { "epoch": 485.21492537313435, "grad_norm": 23.071109771728516, "learning_rate": 9.897619047619047e-06, "loss": 28.8786, "step": 20379 }, { "epoch": 485.23880597014926, "grad_norm": 27.90340232849121, "learning_rate": 9.897142857142858e-06, "loss": 28.6802, "step": 20380 }, { "epoch": 485.26268656716417, "grad_norm": 21.32827377319336, "learning_rate": 9.896666666666668e-06, "loss": 29.2266, "step": 20381 }, { "epoch": 485.28656716417913, "grad_norm": 22.314165115356445, "learning_rate": 9.896190476190477e-06, "loss": 28.2851, "step": 20382 }, { "epoch": 485.31044776119404, "grad_norm": 31.734224319458008, "learning_rate": 9.895714285714287e-06, "loss": 29.4496, "step": 20383 }, { "epoch": 485.33432835820895, "grad_norm": 19.218154907226562, "learning_rate": 9.895238095238096e-06, "loss": 28.764, "step": 20384 }, { "epoch": 485.35820895522386, "grad_norm": 24.229536056518555, "learning_rate": 9.894761904761906e-06, "loss": 29.1907, "step": 20385 }, { "epoch": 485.3820895522388, "grad_norm": 27.350658416748047, "learning_rate": 9.894285714285715e-06, "loss": 29.4367, "step": 20386 }, { "epoch": 485.40597014925373, "grad_norm": 20.400604248046875, "learning_rate": 9.893809523809525e-06, "loss": 27.9874, "step": 20387 }, { "epoch": 485.42985074626864, "grad_norm": 18.86604881286621, "learning_rate": 9.893333333333334e-06, "loss": 28.0772, "step": 20388 }, { "epoch": 485.4537313432836, "grad_norm": 34.45559310913086, "learning_rate": 9.892857142857143e-06, "loss": 29.7494, "step": 20389 }, { "epoch": 485.4776119402985, "grad_norm": 19.828678131103516, "learning_rate": 9.892380952380953e-06, "loss": 29.1618, "step": 20390 }, { "epoch": 485.5014925373134, "grad_norm": 34.477447509765625, "learning_rate": 9.891904761904762e-06, "loss": 29.6478, "step": 20391 }, { "epoch": 485.52537313432833, "grad_norm": 26.693809509277344, "learning_rate": 9.891428571428572e-06, "loss": 29.4778, "step": 20392 }, { "epoch": 485.5492537313433, "grad_norm": 24.650724411010742, "learning_rate": 9.890952380952383e-06, "loss": 28.8518, "step": 20393 }, { "epoch": 485.5731343283582, "grad_norm": 28.382831573486328, "learning_rate": 9.89047619047619e-06, "loss": 28.1649, "step": 20394 }, { "epoch": 485.5970149253731, "grad_norm": 24.758092880249023, "learning_rate": 9.89e-06, "loss": 27.892, "step": 20395 }, { "epoch": 485.6208955223881, "grad_norm": 17.941537857055664, "learning_rate": 9.88952380952381e-06, "loss": 28.904, "step": 20396 }, { "epoch": 485.644776119403, "grad_norm": 25.778043746948242, "learning_rate": 9.88904761904762e-06, "loss": 28.7505, "step": 20397 }, { "epoch": 485.6686567164179, "grad_norm": 25.357357025146484, "learning_rate": 9.88857142857143e-06, "loss": 28.4136, "step": 20398 }, { "epoch": 485.6925373134328, "grad_norm": 18.186588287353516, "learning_rate": 9.88809523809524e-06, "loss": 28.4273, "step": 20399 }, { "epoch": 485.7164179104478, "grad_norm": 29.723102569580078, "learning_rate": 9.887619047619047e-06, "loss": 28.1359, "step": 20400 }, { "epoch": 485.7402985074627, "grad_norm": 24.085601806640625, "learning_rate": 9.887142857142859e-06, "loss": 28.2209, "step": 20401 }, { "epoch": 485.7641791044776, "grad_norm": 19.23664093017578, "learning_rate": 9.886666666666668e-06, "loss": 28.4211, "step": 20402 }, { "epoch": 485.78805970149256, "grad_norm": 35.173126220703125, "learning_rate": 9.886190476190477e-06, "loss": 28.0201, "step": 20403 }, { "epoch": 485.81194029850747, "grad_norm": 20.88926887512207, "learning_rate": 9.885714285714287e-06, "loss": 29.3281, "step": 20404 }, { "epoch": 485.8358208955224, "grad_norm": 30.900062561035156, "learning_rate": 9.885238095238096e-06, "loss": 27.6048, "step": 20405 }, { "epoch": 485.85970149253734, "grad_norm": 23.367061614990234, "learning_rate": 9.884761904761906e-06, "loss": 29.0744, "step": 20406 }, { "epoch": 485.88358208955225, "grad_norm": 26.387022018432617, "learning_rate": 9.884285714285715e-06, "loss": 27.8337, "step": 20407 }, { "epoch": 485.90746268656716, "grad_norm": 31.204984664916992, "learning_rate": 9.883809523809525e-06, "loss": 28.5459, "step": 20408 }, { "epoch": 485.93134328358207, "grad_norm": 20.5678653717041, "learning_rate": 9.883333333333334e-06, "loss": 28.1671, "step": 20409 }, { "epoch": 485.95522388059703, "grad_norm": 33.08265686035156, "learning_rate": 9.882857142857144e-06, "loss": 28.7177, "step": 20410 }, { "epoch": 485.97910447761194, "grad_norm": 22.839336395263672, "learning_rate": 9.882380952380953e-06, "loss": 28.5893, "step": 20411 }, { "epoch": 486.0, "grad_norm": 27.050588607788086, "learning_rate": 9.881904761904762e-06, "loss": 24.4728, "step": 20412 }, { "epoch": 486.0238805970149, "grad_norm": 26.603694915771484, "learning_rate": 9.881428571428572e-06, "loss": 27.6473, "step": 20413 }, { "epoch": 486.0477611940299, "grad_norm": 29.471691131591797, "learning_rate": 9.880952380952381e-06, "loss": 28.8915, "step": 20414 }, { "epoch": 486.0716417910448, "grad_norm": 25.668256759643555, "learning_rate": 9.88047619047619e-06, "loss": 29.1298, "step": 20415 }, { "epoch": 486.0955223880597, "grad_norm": 35.592018127441406, "learning_rate": 9.88e-06, "loss": 28.0791, "step": 20416 }, { "epoch": 486.1194029850746, "grad_norm": 24.15786361694336, "learning_rate": 9.87952380952381e-06, "loss": 28.5635, "step": 20417 }, { "epoch": 486.14328358208957, "grad_norm": 45.56925964355469, "learning_rate": 9.879047619047621e-06, "loss": 28.3313, "step": 20418 }, { "epoch": 486.1671641791045, "grad_norm": 36.845458984375, "learning_rate": 9.87857142857143e-06, "loss": 28.7826, "step": 20419 }, { "epoch": 486.1910447761194, "grad_norm": 36.99827575683594, "learning_rate": 9.878095238095238e-06, "loss": 28.7439, "step": 20420 }, { "epoch": 486.21492537313435, "grad_norm": 33.900146484375, "learning_rate": 9.877619047619048e-06, "loss": 28.7555, "step": 20421 }, { "epoch": 486.23880597014926, "grad_norm": 33.78064727783203, "learning_rate": 9.877142857142859e-06, "loss": 29.9723, "step": 20422 }, { "epoch": 486.26268656716417, "grad_norm": 29.53579330444336, "learning_rate": 9.876666666666668e-06, "loss": 29.0976, "step": 20423 }, { "epoch": 486.28656716417913, "grad_norm": 38.46416473388672, "learning_rate": 9.876190476190478e-06, "loss": 28.6881, "step": 20424 }, { "epoch": 486.31044776119404, "grad_norm": 28.6951904296875, "learning_rate": 9.875714285714287e-06, "loss": 28.5752, "step": 20425 }, { "epoch": 486.33432835820895, "grad_norm": 39.854888916015625, "learning_rate": 9.875238095238095e-06, "loss": 28.9223, "step": 20426 }, { "epoch": 486.35820895522386, "grad_norm": 27.746002197265625, "learning_rate": 9.874761904761906e-06, "loss": 27.2871, "step": 20427 }, { "epoch": 486.3820895522388, "grad_norm": 36.45392990112305, "learning_rate": 9.874285714285715e-06, "loss": 28.4126, "step": 20428 }, { "epoch": 486.40597014925373, "grad_norm": 29.433177947998047, "learning_rate": 9.873809523809525e-06, "loss": 27.7437, "step": 20429 }, { "epoch": 486.42985074626864, "grad_norm": 31.952598571777344, "learning_rate": 9.873333333333334e-06, "loss": 27.4496, "step": 20430 }, { "epoch": 486.4537313432836, "grad_norm": 27.731264114379883, "learning_rate": 9.872857142857144e-06, "loss": 28.7254, "step": 20431 }, { "epoch": 486.4776119402985, "grad_norm": 28.319324493408203, "learning_rate": 9.872380952380953e-06, "loss": 27.9868, "step": 20432 }, { "epoch": 486.5014925373134, "grad_norm": 29.183609008789062, "learning_rate": 9.871904761904763e-06, "loss": 28.2041, "step": 20433 }, { "epoch": 486.52537313432833, "grad_norm": 23.685731887817383, "learning_rate": 9.871428571428572e-06, "loss": 27.3307, "step": 20434 }, { "epoch": 486.5492537313433, "grad_norm": 25.113554000854492, "learning_rate": 9.870952380952381e-06, "loss": 29.0208, "step": 20435 }, { "epoch": 486.5731343283582, "grad_norm": 31.438831329345703, "learning_rate": 9.870476190476191e-06, "loss": 28.2866, "step": 20436 }, { "epoch": 486.5970149253731, "grad_norm": 20.719141006469727, "learning_rate": 9.87e-06, "loss": 29.1306, "step": 20437 }, { "epoch": 486.6208955223881, "grad_norm": 37.38289260864258, "learning_rate": 9.86952380952381e-06, "loss": 27.6139, "step": 20438 }, { "epoch": 486.644776119403, "grad_norm": 30.64270782470703, "learning_rate": 9.869047619047621e-06, "loss": 30.0489, "step": 20439 }, { "epoch": 486.6686567164179, "grad_norm": 28.15789794921875, "learning_rate": 9.86857142857143e-06, "loss": 27.9486, "step": 20440 }, { "epoch": 486.6925373134328, "grad_norm": 28.478513717651367, "learning_rate": 9.868095238095238e-06, "loss": 28.1818, "step": 20441 }, { "epoch": 486.7164179104478, "grad_norm": 26.086503982543945, "learning_rate": 9.867619047619048e-06, "loss": 29.2265, "step": 20442 }, { "epoch": 486.7402985074627, "grad_norm": 25.757184982299805, "learning_rate": 9.867142857142859e-06, "loss": 28.8504, "step": 20443 }, { "epoch": 486.7641791044776, "grad_norm": NaN, "learning_rate": 9.866666666666668e-06, "loss": 50.2759, "step": 20444 }, { "epoch": 486.78805970149256, "grad_norm": 27.32605743408203, "learning_rate": 9.866666666666668e-06, "loss": 28.4548, "step": 20445 }, { "epoch": 486.81194029850747, "grad_norm": 18.77472496032715, "learning_rate": 9.866190476190478e-06, "loss": 28.1087, "step": 20446 }, { "epoch": 486.8358208955224, "grad_norm": 31.335275650024414, "learning_rate": 9.865714285714285e-06, "loss": 28.6461, "step": 20447 }, { "epoch": 486.85970149253734, "grad_norm": 24.890287399291992, "learning_rate": 9.865238095238095e-06, "loss": 30.0102, "step": 20448 }, { "epoch": 486.88358208955225, "grad_norm": 26.888362884521484, "learning_rate": 9.864761904761906e-06, "loss": 28.9521, "step": 20449 }, { "epoch": 486.90746268656716, "grad_norm": 24.578062057495117, "learning_rate": 9.864285714285715e-06, "loss": 28.5248, "step": 20450 }, { "epoch": 486.93134328358207, "grad_norm": 26.276634216308594, "learning_rate": 9.863809523809525e-06, "loss": 27.8269, "step": 20451 }, { "epoch": 486.95522388059703, "grad_norm": 23.091188430786133, "learning_rate": 9.863333333333334e-06, "loss": 28.477, "step": 20452 }, { "epoch": 486.97910447761194, "grad_norm": 29.017009735107422, "learning_rate": 9.862857142857144e-06, "loss": 30.1143, "step": 20453 }, { "epoch": 487.0, "grad_norm": NaN, "learning_rate": 9.862380952380953e-06, "loss": 21.857, "step": 20454 }, { "epoch": 487.0238805970149, "grad_norm": 24.752187728881836, "learning_rate": 9.862380952380953e-06, "loss": 29.344, "step": 20455 }, { "epoch": 487.0477611940299, "grad_norm": 21.786725997924805, "learning_rate": 9.861904761904763e-06, "loss": 28.772, "step": 20456 }, { "epoch": 487.0716417910448, "grad_norm": 20.694992065429688, "learning_rate": 9.861428571428572e-06, "loss": 28.9108, "step": 20457 }, { "epoch": 487.0955223880597, "grad_norm": 23.595273971557617, "learning_rate": 9.860952380952382e-06, "loss": 27.9987, "step": 20458 }, { "epoch": 487.1194029850746, "grad_norm": 21.147714614868164, "learning_rate": 9.860476190476191e-06, "loss": 28.4929, "step": 20459 }, { "epoch": 487.14328358208957, "grad_norm": 19.665544509887695, "learning_rate": 9.86e-06, "loss": 28.7373, "step": 20460 }, { "epoch": 487.1671641791045, "grad_norm": 26.432222366333008, "learning_rate": 9.85952380952381e-06, "loss": 28.4443, "step": 20461 }, { "epoch": 487.1910447761194, "grad_norm": 20.7607479095459, "learning_rate": 9.859047619047621e-06, "loss": 29.3951, "step": 20462 }, { "epoch": 487.21492537313435, "grad_norm": 25.161148071289062, "learning_rate": 9.858571428571429e-06, "loss": 28.2722, "step": 20463 }, { "epoch": 487.23880597014926, "grad_norm": 19.327377319335938, "learning_rate": 9.858095238095238e-06, "loss": 28.3339, "step": 20464 }, { "epoch": 487.26268656716417, "grad_norm": 23.760868072509766, "learning_rate": 9.857619047619048e-06, "loss": 28.2444, "step": 20465 }, { "epoch": 487.28656716417913, "grad_norm": 22.485496520996094, "learning_rate": 9.857142857142859e-06, "loss": 27.382, "step": 20466 }, { "epoch": 487.31044776119404, "grad_norm": 20.107269287109375, "learning_rate": 9.856666666666668e-06, "loss": 27.6145, "step": 20467 }, { "epoch": 487.33432835820895, "grad_norm": 23.206260681152344, "learning_rate": 9.856190476190478e-06, "loss": 28.4137, "step": 20468 }, { "epoch": 487.35820895522386, "grad_norm": NaN, "learning_rate": 9.855714285714285e-06, "loss": 41.3093, "step": 20469 }, { "epoch": 487.3820895522388, "grad_norm": 22.463144302368164, "learning_rate": 9.855714285714285e-06, "loss": 28.9473, "step": 20470 }, { "epoch": 487.40597014925373, "grad_norm": 21.36734962463379, "learning_rate": 9.855238095238095e-06, "loss": 28.9185, "step": 20471 }, { "epoch": 487.42985074626864, "grad_norm": 23.00170135498047, "learning_rate": 9.854761904761906e-06, "loss": 29.9494, "step": 20472 }, { "epoch": 487.4537313432836, "grad_norm": 19.47336769104004, "learning_rate": 9.854285714285716e-06, "loss": 29.0055, "step": 20473 }, { "epoch": 487.4776119402985, "grad_norm": 23.60487937927246, "learning_rate": 9.853809523809525e-06, "loss": 29.1422, "step": 20474 }, { "epoch": 487.5014925373134, "grad_norm": 19.489347457885742, "learning_rate": 9.853333333333334e-06, "loss": 28.8813, "step": 20475 }, { "epoch": 487.52537313432833, "grad_norm": 24.1863956451416, "learning_rate": 9.852857142857144e-06, "loss": 28.2533, "step": 20476 }, { "epoch": 487.5492537313433, "grad_norm": 20.77450180053711, "learning_rate": 9.852380952380953e-06, "loss": 29.6745, "step": 20477 }, { "epoch": 487.5731343283582, "grad_norm": 21.465560913085938, "learning_rate": 9.851904761904763e-06, "loss": 27.3342, "step": 20478 }, { "epoch": 487.5970149253731, "grad_norm": 20.797157287597656, "learning_rate": 9.851428571428572e-06, "loss": 27.5897, "step": 20479 }, { "epoch": 487.6208955223881, "grad_norm": 20.005062103271484, "learning_rate": 9.850952380952382e-06, "loss": 28.4831, "step": 20480 }, { "epoch": 487.644776119403, "grad_norm": 23.588376998901367, "learning_rate": 9.850476190476191e-06, "loss": 28.7339, "step": 20481 }, { "epoch": 487.6686567164179, "grad_norm": 19.465957641601562, "learning_rate": 9.85e-06, "loss": 27.5017, "step": 20482 }, { "epoch": 487.6925373134328, "grad_norm": 22.7562255859375, "learning_rate": 9.84952380952381e-06, "loss": 28.7548, "step": 20483 }, { "epoch": 487.7164179104478, "grad_norm": 19.4647216796875, "learning_rate": 9.84904761904762e-06, "loss": 28.9421, "step": 20484 }, { "epoch": 487.7402985074627, "grad_norm": 20.88848114013672, "learning_rate": 9.848571428571429e-06, "loss": 28.945, "step": 20485 }, { "epoch": 487.7641791044776, "grad_norm": 19.675554275512695, "learning_rate": 9.848095238095238e-06, "loss": 27.3426, "step": 20486 }, { "epoch": 487.78805970149256, "grad_norm": 22.92022705078125, "learning_rate": 9.847619047619048e-06, "loss": 28.5039, "step": 20487 }, { "epoch": 487.81194029850747, "grad_norm": 22.068403244018555, "learning_rate": 9.847142857142859e-06, "loss": 28.7027, "step": 20488 }, { "epoch": 487.8358208955224, "grad_norm": 20.618146896362305, "learning_rate": 9.846666666666668e-06, "loss": 29.2972, "step": 20489 }, { "epoch": 487.85970149253734, "grad_norm": 20.165502548217773, "learning_rate": 9.846190476190476e-06, "loss": 28.5441, "step": 20490 }, { "epoch": 487.88358208955225, "grad_norm": 21.83547019958496, "learning_rate": 9.845714285714286e-06, "loss": 28.7002, "step": 20491 }, { "epoch": 487.90746268656716, "grad_norm": 19.256826400756836, "learning_rate": 9.845238095238097e-06, "loss": 28.6319, "step": 20492 }, { "epoch": 487.93134328358207, "grad_norm": 22.157352447509766, "learning_rate": 9.844761904761906e-06, "loss": 27.3618, "step": 20493 }, { "epoch": 487.95522388059703, "grad_norm": 18.738842010498047, "learning_rate": 9.844285714285716e-06, "loss": 28.714, "step": 20494 }, { "epoch": 487.97910447761194, "grad_norm": 24.166479110717773, "learning_rate": 9.843809523809525e-06, "loss": 29.2349, "step": 20495 }, { "epoch": 488.0, "grad_norm": 20.486263275146484, "learning_rate": 9.843333333333333e-06, "loss": 25.4607, "step": 20496 }, { "epoch": 488.0238805970149, "grad_norm": 17.909887313842773, "learning_rate": 9.842857142857144e-06, "loss": 27.7245, "step": 20497 }, { "epoch": 488.0477611940299, "grad_norm": 19.829872131347656, "learning_rate": 9.842380952380953e-06, "loss": 27.8958, "step": 20498 }, { "epoch": 488.0716417910448, "grad_norm": 19.82977867126465, "learning_rate": 9.841904761904763e-06, "loss": 29.2744, "step": 20499 }, { "epoch": 488.0955223880597, "grad_norm": 25.787635803222656, "learning_rate": 9.841428571428572e-06, "loss": 28.9812, "step": 20500 }, { "epoch": 488.1194029850746, "grad_norm": 23.399370193481445, "learning_rate": 9.840952380952382e-06, "loss": 27.4137, "step": 20501 }, { "epoch": 488.14328358208957, "grad_norm": 20.757726669311523, "learning_rate": 9.840476190476191e-06, "loss": 27.7763, "step": 20502 }, { "epoch": 488.1671641791045, "grad_norm": 20.368305206298828, "learning_rate": 9.84e-06, "loss": 27.6568, "step": 20503 }, { "epoch": 488.1910447761194, "grad_norm": 19.854663848876953, "learning_rate": 9.83952380952381e-06, "loss": 29.4262, "step": 20504 }, { "epoch": 488.21492537313435, "grad_norm": 23.090105056762695, "learning_rate": 9.83904761904762e-06, "loss": 27.9766, "step": 20505 }, { "epoch": 488.23880597014926, "grad_norm": 19.876672744750977, "learning_rate": 9.838571428571429e-06, "loss": 29.0273, "step": 20506 }, { "epoch": 488.26268656716417, "grad_norm": 23.163911819458008, "learning_rate": 9.838095238095238e-06, "loss": 28.9141, "step": 20507 }, { "epoch": 488.28656716417913, "grad_norm": 19.896848678588867, "learning_rate": 9.837619047619048e-06, "loss": 29.5914, "step": 20508 }, { "epoch": 488.31044776119404, "grad_norm": 23.92636489868164, "learning_rate": 9.837142857142859e-06, "loss": 29.1336, "step": 20509 }, { "epoch": 488.33432835820895, "grad_norm": 19.84395408630371, "learning_rate": 9.836666666666668e-06, "loss": 28.875, "step": 20510 }, { "epoch": 488.35820895522386, "grad_norm": 23.899215698242188, "learning_rate": 9.836190476190476e-06, "loss": 28.1753, "step": 20511 }, { "epoch": 488.3820895522388, "grad_norm": 22.91050910949707, "learning_rate": 9.835714285714286e-06, "loss": 28.5482, "step": 20512 }, { "epoch": 488.40597014925373, "grad_norm": 18.681930541992188, "learning_rate": 9.835238095238097e-06, "loss": 27.9003, "step": 20513 }, { "epoch": 488.42985074626864, "grad_norm": NaN, "learning_rate": 9.834761904761906e-06, "loss": 36.8107, "step": 20514 }, { "epoch": 488.4537313432836, "grad_norm": 22.024059295654297, "learning_rate": 9.834761904761906e-06, "loss": 29.3889, "step": 20515 }, { "epoch": 488.4776119402985, "grad_norm": 21.196786880493164, "learning_rate": 9.834285714285716e-06, "loss": 28.3016, "step": 20516 }, { "epoch": 488.5014925373134, "grad_norm": 28.17771339416504, "learning_rate": 9.833809523809525e-06, "loss": 28.63, "step": 20517 }, { "epoch": 488.52537313432833, "grad_norm": 19.240524291992188, "learning_rate": 9.833333333333333e-06, "loss": 28.4665, "step": 20518 }, { "epoch": 488.5492537313433, "grad_norm": 23.034313201904297, "learning_rate": 9.832857142857144e-06, "loss": 27.5673, "step": 20519 }, { "epoch": 488.5731343283582, "grad_norm": 26.61051368713379, "learning_rate": 9.832380952380954e-06, "loss": 28.4375, "step": 20520 }, { "epoch": 488.5970149253731, "grad_norm": 22.363718032836914, "learning_rate": 9.831904761904763e-06, "loss": 28.787, "step": 20521 }, { "epoch": 488.6208955223881, "grad_norm": 22.263689041137695, "learning_rate": 9.831428571428572e-06, "loss": 28.063, "step": 20522 }, { "epoch": 488.644776119403, "grad_norm": 25.727582931518555, "learning_rate": 9.830952380952382e-06, "loss": 27.7289, "step": 20523 }, { "epoch": 488.6686567164179, "grad_norm": 27.757431030273438, "learning_rate": 9.830476190476191e-06, "loss": 28.2327, "step": 20524 }, { "epoch": 488.6925373134328, "grad_norm": 20.179288864135742, "learning_rate": 9.83e-06, "loss": 28.6139, "step": 20525 }, { "epoch": 488.7164179104478, "grad_norm": 29.278560638427734, "learning_rate": 9.82952380952381e-06, "loss": 28.9556, "step": 20526 }, { "epoch": 488.7402985074627, "grad_norm": 29.46417236328125, "learning_rate": 9.82904761904762e-06, "loss": 28.5574, "step": 20527 }, { "epoch": 488.7641791044776, "grad_norm": 18.598163604736328, "learning_rate": 9.828571428571429e-06, "loss": 27.8283, "step": 20528 }, { "epoch": 488.78805970149256, "grad_norm": 29.941791534423828, "learning_rate": 9.828095238095239e-06, "loss": 29.1709, "step": 20529 }, { "epoch": 488.81194029850747, "grad_norm": 29.771364212036133, "learning_rate": 9.827619047619048e-06, "loss": 28.6597, "step": 20530 }, { "epoch": 488.8358208955224, "grad_norm": 21.104839324951172, "learning_rate": 9.827142857142859e-06, "loss": 28.4117, "step": 20531 }, { "epoch": 488.85970149253734, "grad_norm": 21.830951690673828, "learning_rate": 9.826666666666667e-06, "loss": 28.0235, "step": 20532 }, { "epoch": 488.88358208955225, "grad_norm": 26.7958984375, "learning_rate": 9.826190476190476e-06, "loss": 28.2631, "step": 20533 }, { "epoch": 488.90746268656716, "grad_norm": 19.71217918395996, "learning_rate": 9.825714285714286e-06, "loss": 27.9734, "step": 20534 }, { "epoch": 488.93134328358207, "grad_norm": 20.5208683013916, "learning_rate": 9.825238095238097e-06, "loss": 30.0463, "step": 20535 }, { "epoch": 488.95522388059703, "grad_norm": 24.053295135498047, "learning_rate": 9.824761904761906e-06, "loss": 29.2291, "step": 20536 }, { "epoch": 488.97910447761194, "grad_norm": 27.024316787719727, "learning_rate": 9.824285714285716e-06, "loss": 28.4378, "step": 20537 }, { "epoch": 489.0, "grad_norm": 16.129531860351562, "learning_rate": 9.823809523809524e-06, "loss": 26.0157, "step": 20538 }, { "epoch": 489.0238805970149, "grad_norm": 21.41499900817871, "learning_rate": 9.823333333333333e-06, "loss": 28.5449, "step": 20539 }, { "epoch": 489.0477611940299, "grad_norm": 19.848459243774414, "learning_rate": 9.822857142857144e-06, "loss": 28.6479, "step": 20540 }, { "epoch": 489.0716417910448, "grad_norm": 21.117923736572266, "learning_rate": 9.822380952380954e-06, "loss": 28.0015, "step": 20541 }, { "epoch": 489.0955223880597, "grad_norm": 23.345962524414062, "learning_rate": 9.821904761904763e-06, "loss": 29.4664, "step": 20542 }, { "epoch": 489.1194029850746, "grad_norm": 22.622806549072266, "learning_rate": 9.821428571428573e-06, "loss": 28.2795, "step": 20543 }, { "epoch": 489.14328358208957, "grad_norm": 20.679941177368164, "learning_rate": 9.820952380952382e-06, "loss": 27.8575, "step": 20544 }, { "epoch": 489.1671641791045, "grad_norm": 19.444679260253906, "learning_rate": 9.820476190476191e-06, "loss": 28.9399, "step": 20545 }, { "epoch": 489.1910447761194, "grad_norm": 24.61482048034668, "learning_rate": 9.820000000000001e-06, "loss": 29.4614, "step": 20546 }, { "epoch": 489.21492537313435, "grad_norm": 22.519628524780273, "learning_rate": 9.81952380952381e-06, "loss": 28.2059, "step": 20547 }, { "epoch": 489.23880597014926, "grad_norm": 24.81570053100586, "learning_rate": 9.81904761904762e-06, "loss": 28.1707, "step": 20548 }, { "epoch": 489.26268656716417, "grad_norm": 21.557804107666016, "learning_rate": 9.81857142857143e-06, "loss": 28.3905, "step": 20549 }, { "epoch": 489.28656716417913, "grad_norm": 18.237991333007812, "learning_rate": 9.818095238095239e-06, "loss": 27.9108, "step": 20550 }, { "epoch": 489.31044776119404, "grad_norm": 26.093198776245117, "learning_rate": 9.817619047619048e-06, "loss": 28.314, "step": 20551 }, { "epoch": 489.33432835820895, "grad_norm": 28.007614135742188, "learning_rate": 9.81714285714286e-06, "loss": 29.3687, "step": 20552 }, { "epoch": 489.35820895522386, "grad_norm": 20.229955673217773, "learning_rate": 9.816666666666667e-06, "loss": 28.474, "step": 20553 }, { "epoch": 489.3820895522388, "grad_norm": 22.9205322265625, "learning_rate": 9.816190476190476e-06, "loss": 28.2055, "step": 20554 }, { "epoch": 489.40597014925373, "grad_norm": 23.881938934326172, "learning_rate": 9.815714285714286e-06, "loss": 28.1712, "step": 20555 }, { "epoch": 489.42985074626864, "grad_norm": 25.942384719848633, "learning_rate": 9.815238095238097e-06, "loss": 28.6335, "step": 20556 }, { "epoch": 489.4537313432836, "grad_norm": 18.147974014282227, "learning_rate": 9.814761904761906e-06, "loss": 28.4984, "step": 20557 }, { "epoch": 489.4776119402985, "grad_norm": 20.59368896484375, "learning_rate": 9.814285714285716e-06, "loss": 28.8074, "step": 20558 }, { "epoch": 489.5014925373134, "grad_norm": 23.09203338623047, "learning_rate": 9.813809523809524e-06, "loss": 28.6791, "step": 20559 }, { "epoch": 489.52537313432833, "grad_norm": 19.724905014038086, "learning_rate": 9.813333333333333e-06, "loss": 29.4053, "step": 20560 }, { "epoch": 489.5492537313433, "grad_norm": 23.015987396240234, "learning_rate": 9.812857142857144e-06, "loss": 27.8035, "step": 20561 }, { "epoch": 489.5731343283582, "grad_norm": 23.38850212097168, "learning_rate": 9.812380952380954e-06, "loss": 28.0382, "step": 20562 }, { "epoch": 489.5970149253731, "grad_norm": 21.746702194213867, "learning_rate": 9.811904761904763e-06, "loss": 27.3168, "step": 20563 }, { "epoch": 489.6208955223881, "grad_norm": 20.839599609375, "learning_rate": 9.811428571428571e-06, "loss": 29.1533, "step": 20564 }, { "epoch": 489.644776119403, "grad_norm": 20.15467071533203, "learning_rate": 9.810952380952382e-06, "loss": 29.1555, "step": 20565 }, { "epoch": 489.6686567164179, "grad_norm": 17.326826095581055, "learning_rate": 9.810476190476191e-06, "loss": 29.3627, "step": 20566 }, { "epoch": 489.6925373134328, "grad_norm": 24.11992645263672, "learning_rate": 9.810000000000001e-06, "loss": 28.7936, "step": 20567 }, { "epoch": 489.7164179104478, "grad_norm": 20.472864151000977, "learning_rate": 9.80952380952381e-06, "loss": 28.9092, "step": 20568 }, { "epoch": 489.7402985074627, "grad_norm": 18.863283157348633, "learning_rate": 9.80904761904762e-06, "loss": 27.479, "step": 20569 }, { "epoch": 489.7641791044776, "grad_norm": 23.724214553833008, "learning_rate": 9.80857142857143e-06, "loss": 27.9523, "step": 20570 }, { "epoch": 489.78805970149256, "grad_norm": 18.749788284301758, "learning_rate": 9.808095238095239e-06, "loss": 28.2665, "step": 20571 }, { "epoch": 489.81194029850747, "grad_norm": 20.606130599975586, "learning_rate": 9.807619047619048e-06, "loss": 28.6258, "step": 20572 }, { "epoch": 489.8358208955224, "grad_norm": 21.461069107055664, "learning_rate": 9.807142857142858e-06, "loss": 28.5052, "step": 20573 }, { "epoch": 489.85970149253734, "grad_norm": 22.927797317504883, "learning_rate": 9.806666666666667e-06, "loss": 27.8446, "step": 20574 }, { "epoch": 489.88358208955225, "grad_norm": 21.20220184326172, "learning_rate": 9.806190476190477e-06, "loss": 28.0913, "step": 20575 }, { "epoch": 489.90746268656716, "grad_norm": 19.64929962158203, "learning_rate": 9.805714285714286e-06, "loss": 27.9339, "step": 20576 }, { "epoch": 489.93134328358207, "grad_norm": 21.036863327026367, "learning_rate": 9.805238095238097e-06, "loss": 28.4291, "step": 20577 }, { "epoch": 489.95522388059703, "grad_norm": 22.513463973999023, "learning_rate": 9.804761904761907e-06, "loss": 29.1155, "step": 20578 }, { "epoch": 489.97910447761194, "grad_norm": 25.38256072998047, "learning_rate": 9.804285714285714e-06, "loss": 28.8781, "step": 20579 }, { "epoch": 490.0, "grad_norm": 15.829379081726074, "learning_rate": 9.803809523809524e-06, "loss": 24.9238, "step": 20580 }, { "epoch": 490.0238805970149, "grad_norm": 24.550865173339844, "learning_rate": 9.803333333333333e-06, "loss": 29.5466, "step": 20581 }, { "epoch": 490.0477611940299, "grad_norm": 26.200740814208984, "learning_rate": 9.802857142857144e-06, "loss": 28.5415, "step": 20582 }, { "epoch": 490.0716417910448, "grad_norm": 24.397523880004883, "learning_rate": 9.802380952380954e-06, "loss": 27.8155, "step": 20583 }, { "epoch": 490.0955223880597, "grad_norm": 17.95159912109375, "learning_rate": 9.801904761904763e-06, "loss": 29.557, "step": 20584 }, { "epoch": 490.1194029850746, "grad_norm": 21.073650360107422, "learning_rate": 9.801428571428571e-06, "loss": 29.3913, "step": 20585 }, { "epoch": 490.14328358208957, "grad_norm": 18.33375358581543, "learning_rate": 9.800952380952382e-06, "loss": 27.4108, "step": 20586 }, { "epoch": 490.1671641791045, "grad_norm": 24.136680603027344, "learning_rate": 9.800476190476192e-06, "loss": 27.8751, "step": 20587 }, { "epoch": 490.1910447761194, "grad_norm": 25.90764045715332, "learning_rate": 9.800000000000001e-06, "loss": 27.9236, "step": 20588 }, { "epoch": 490.21492537313435, "grad_norm": 25.85698699951172, "learning_rate": 9.79952380952381e-06, "loss": 28.3007, "step": 20589 }, { "epoch": 490.23880597014926, "grad_norm": 18.927501678466797, "learning_rate": 9.79904761904762e-06, "loss": 28.3877, "step": 20590 }, { "epoch": 490.26268656716417, "grad_norm": 26.590017318725586, "learning_rate": 9.79857142857143e-06, "loss": 28.2776, "step": 20591 }, { "epoch": 490.28656716417913, "grad_norm": 21.516368865966797, "learning_rate": 9.798095238095239e-06, "loss": 28.4279, "step": 20592 }, { "epoch": 490.31044776119404, "grad_norm": 20.542613983154297, "learning_rate": 9.797619047619048e-06, "loss": 28.7866, "step": 20593 }, { "epoch": 490.33432835820895, "grad_norm": 17.622596740722656, "learning_rate": 9.797142857142858e-06, "loss": 28.3506, "step": 20594 }, { "epoch": 490.35820895522386, "grad_norm": 22.0216007232666, "learning_rate": 9.796666666666667e-06, "loss": 28.3317, "step": 20595 }, { "epoch": 490.3820895522388, "grad_norm": 19.57315444946289, "learning_rate": 9.796190476190477e-06, "loss": 27.5295, "step": 20596 }, { "epoch": 490.40597014925373, "grad_norm": 22.790512084960938, "learning_rate": 9.795714285714286e-06, "loss": 28.7739, "step": 20597 }, { "epoch": 490.42985074626864, "grad_norm": 26.92939567565918, "learning_rate": 9.795238095238097e-06, "loss": 27.6369, "step": 20598 }, { "epoch": 490.4537313432836, "grad_norm": 23.391000747680664, "learning_rate": 9.794761904761905e-06, "loss": 28.4678, "step": 20599 }, { "epoch": 490.4776119402985, "grad_norm": 17.914857864379883, "learning_rate": 9.794285714285714e-06, "loss": 28.5787, "step": 20600 }, { "epoch": 490.5014925373134, "grad_norm": 30.64154815673828, "learning_rate": 9.793809523809524e-06, "loss": 28.5934, "step": 20601 }, { "epoch": 490.52537313432833, "grad_norm": 24.924211502075195, "learning_rate": 9.793333333333333e-06, "loss": 28.0414, "step": 20602 }, { "epoch": 490.5492537313433, "grad_norm": 20.4571533203125, "learning_rate": 9.792857142857144e-06, "loss": 27.6581, "step": 20603 }, { "epoch": 490.5731343283582, "grad_norm": 29.659591674804688, "learning_rate": 9.792380952380954e-06, "loss": 28.3653, "step": 20604 }, { "epoch": 490.5970149253731, "grad_norm": 27.30054473876953, "learning_rate": 9.791904761904762e-06, "loss": 28.6001, "step": 20605 }, { "epoch": 490.6208955223881, "grad_norm": 20.443620681762695, "learning_rate": 9.791428571428571e-06, "loss": 29.7753, "step": 20606 }, { "epoch": 490.644776119403, "grad_norm": 22.74970245361328, "learning_rate": 9.790952380952382e-06, "loss": 29.5318, "step": 20607 }, { "epoch": 490.6686567164179, "grad_norm": 27.318384170532227, "learning_rate": 9.790476190476192e-06, "loss": 28.5929, "step": 20608 }, { "epoch": 490.6925373134328, "grad_norm": 22.951702117919922, "learning_rate": 9.790000000000001e-06, "loss": 29.2245, "step": 20609 }, { "epoch": 490.7164179104478, "grad_norm": 19.618398666381836, "learning_rate": 9.78952380952381e-06, "loss": 28.2458, "step": 20610 }, { "epoch": 490.7402985074627, "grad_norm": 23.40667724609375, "learning_rate": 9.78904761904762e-06, "loss": 29.2817, "step": 20611 }, { "epoch": 490.7641791044776, "grad_norm": 25.584617614746094, "learning_rate": 9.78857142857143e-06, "loss": 28.6231, "step": 20612 }, { "epoch": 490.78805970149256, "grad_norm": 18.078096389770508, "learning_rate": 9.788095238095239e-06, "loss": 28.3387, "step": 20613 }, { "epoch": 490.81194029850747, "grad_norm": 20.8928279876709, "learning_rate": 9.787619047619048e-06, "loss": 29.1491, "step": 20614 }, { "epoch": 490.8358208955224, "grad_norm": 20.053152084350586, "learning_rate": 9.787142857142858e-06, "loss": 28.4399, "step": 20615 }, { "epoch": 490.85970149253734, "grad_norm": 21.45441436767578, "learning_rate": 9.786666666666667e-06, "loss": 29.8171, "step": 20616 }, { "epoch": 490.88358208955225, "grad_norm": 22.75667381286621, "learning_rate": 9.786190476190477e-06, "loss": 28.2599, "step": 20617 }, { "epoch": 490.90746268656716, "grad_norm": 21.57240867614746, "learning_rate": 9.785714285714286e-06, "loss": 28.7879, "step": 20618 }, { "epoch": 490.93134328358207, "grad_norm": 19.456201553344727, "learning_rate": 9.785238095238097e-06, "loss": 26.8378, "step": 20619 }, { "epoch": 490.95522388059703, "grad_norm": NaN, "learning_rate": 9.784761904761905e-06, "loss": 36.7687, "step": 20620 }, { "epoch": 490.97910447761194, "grad_norm": 20.891599655151367, "learning_rate": 9.784761904761905e-06, "loss": 28.4224, "step": 20621 }, { "epoch": 491.0, "grad_norm": 18.681299209594727, "learning_rate": 9.784285714285715e-06, "loss": 24.346, "step": 20622 }, { "epoch": 491.0238805970149, "grad_norm": 17.457279205322266, "learning_rate": 9.783809523809524e-06, "loss": 28.4973, "step": 20623 }, { "epoch": 491.0477611940299, "grad_norm": 20.887794494628906, "learning_rate": 9.783333333333335e-06, "loss": 28.5767, "step": 20624 }, { "epoch": 491.0716417910448, "grad_norm": 23.88166046142578, "learning_rate": 9.782857142857145e-06, "loss": 29.0098, "step": 20625 }, { "epoch": 491.0955223880597, "grad_norm": 24.450098037719727, "learning_rate": 9.782380952380954e-06, "loss": 28.2996, "step": 20626 }, { "epoch": 491.1194029850746, "grad_norm": 21.02593994140625, "learning_rate": 9.781904761904762e-06, "loss": 27.6746, "step": 20627 }, { "epoch": 491.14328358208957, "grad_norm": 18.15875244140625, "learning_rate": 9.781428571428571e-06, "loss": 27.5657, "step": 20628 }, { "epoch": 491.1671641791045, "grad_norm": 21.549335479736328, "learning_rate": 9.780952380952382e-06, "loss": 29.2901, "step": 20629 }, { "epoch": 491.1910447761194, "grad_norm": 21.913280487060547, "learning_rate": 9.780476190476192e-06, "loss": 28.1726, "step": 20630 }, { "epoch": 491.21492537313435, "grad_norm": 19.391664505004883, "learning_rate": 9.780000000000001e-06, "loss": 28.8408, "step": 20631 }, { "epoch": 491.23880597014926, "grad_norm": 22.901397705078125, "learning_rate": 9.77952380952381e-06, "loss": 28.6942, "step": 20632 }, { "epoch": 491.26268656716417, "grad_norm": 21.61741065979004, "learning_rate": 9.77904761904762e-06, "loss": 28.81, "step": 20633 }, { "epoch": 491.28656716417913, "grad_norm": 18.582881927490234, "learning_rate": 9.77857142857143e-06, "loss": 28.9137, "step": 20634 }, { "epoch": 491.31044776119404, "grad_norm": 22.780109405517578, "learning_rate": 9.778095238095239e-06, "loss": 27.9429, "step": 20635 }, { "epoch": 491.33432835820895, "grad_norm": 18.461631774902344, "learning_rate": 9.777619047619048e-06, "loss": 27.4235, "step": 20636 }, { "epoch": 491.35820895522386, "grad_norm": 20.57916831970215, "learning_rate": 9.777142857142858e-06, "loss": 28.3781, "step": 20637 }, { "epoch": 491.3820895522388, "grad_norm": 22.73825454711914, "learning_rate": 9.776666666666667e-06, "loss": 28.005, "step": 20638 }, { "epoch": 491.40597014925373, "grad_norm": 22.66205406188965, "learning_rate": 9.776190476190477e-06, "loss": 28.9232, "step": 20639 }, { "epoch": 491.42985074626864, "grad_norm": 21.115219116210938, "learning_rate": 9.775714285714286e-06, "loss": 28.9405, "step": 20640 }, { "epoch": 491.4537313432836, "grad_norm": 20.472864151000977, "learning_rate": 9.775238095238096e-06, "loss": 27.7504, "step": 20641 }, { "epoch": 491.4776119402985, "grad_norm": 19.246265411376953, "learning_rate": 9.774761904761905e-06, "loss": 28.4893, "step": 20642 }, { "epoch": 491.5014925373134, "grad_norm": 24.54819107055664, "learning_rate": 9.774285714285715e-06, "loss": 28.4976, "step": 20643 }, { "epoch": 491.52537313432833, "grad_norm": 22.02290153503418, "learning_rate": 9.773809523809524e-06, "loss": 26.8735, "step": 20644 }, { "epoch": 491.5492537313433, "grad_norm": 26.296279907226562, "learning_rate": 9.773333333333335e-06, "loss": 28.2008, "step": 20645 }, { "epoch": 491.5731343283582, "grad_norm": 17.50170135498047, "learning_rate": 9.772857142857145e-06, "loss": 27.6978, "step": 20646 }, { "epoch": 491.5970149253731, "grad_norm": 26.12645149230957, "learning_rate": 9.772380952380952e-06, "loss": 29.1888, "step": 20647 }, { "epoch": 491.6208955223881, "grad_norm": 20.375770568847656, "learning_rate": 9.771904761904762e-06, "loss": 29.6394, "step": 20648 }, { "epoch": 491.644776119403, "grad_norm": 22.93171501159668, "learning_rate": 9.771428571428571e-06, "loss": 28.6734, "step": 20649 }, { "epoch": 491.6686567164179, "grad_norm": 20.172964096069336, "learning_rate": 9.770952380952382e-06, "loss": 29.379, "step": 20650 }, { "epoch": 491.6925373134328, "grad_norm": 21.29521369934082, "learning_rate": 9.770476190476192e-06, "loss": 27.9935, "step": 20651 }, { "epoch": 491.7164179104478, "grad_norm": 22.010820388793945, "learning_rate": 9.770000000000001e-06, "loss": 28.3992, "step": 20652 }, { "epoch": 491.7402985074627, "grad_norm": 24.478124618530273, "learning_rate": 9.769523809523809e-06, "loss": 28.2913, "step": 20653 }, { "epoch": 491.7641791044776, "grad_norm": 24.808719635009766, "learning_rate": 9.76904761904762e-06, "loss": 28.9887, "step": 20654 }, { "epoch": 491.78805970149256, "grad_norm": 19.781068801879883, "learning_rate": 9.76857142857143e-06, "loss": 28.4979, "step": 20655 }, { "epoch": 491.81194029850747, "grad_norm": 26.73129653930664, "learning_rate": 9.768095238095239e-06, "loss": 28.2099, "step": 20656 }, { "epoch": 491.8358208955224, "grad_norm": 24.078277587890625, "learning_rate": 9.767619047619049e-06, "loss": 28.0062, "step": 20657 }, { "epoch": 491.85970149253734, "grad_norm": 20.16584587097168, "learning_rate": 9.767142857142858e-06, "loss": 28.2081, "step": 20658 }, { "epoch": 491.88358208955225, "grad_norm": 19.427953720092773, "learning_rate": 9.766666666666667e-06, "loss": 28.8106, "step": 20659 }, { "epoch": 491.90746268656716, "grad_norm": 25.712688446044922, "learning_rate": 9.766190476190477e-06, "loss": 27.6759, "step": 20660 }, { "epoch": 491.93134328358207, "grad_norm": 31.555383682250977, "learning_rate": 9.765714285714286e-06, "loss": 28.4752, "step": 20661 }, { "epoch": 491.95522388059703, "grad_norm": 19.92830467224121, "learning_rate": 9.765238095238096e-06, "loss": 27.751, "step": 20662 }, { "epoch": 491.97910447761194, "grad_norm": 37.74251174926758, "learning_rate": 9.764761904761905e-06, "loss": 29.5221, "step": 20663 }, { "epoch": 492.0, "grad_norm": 23.967205047607422, "learning_rate": 9.764285714285715e-06, "loss": 25.7123, "step": 20664 }, { "epoch": 492.0238805970149, "grad_norm": 32.29738998413086, "learning_rate": 9.763809523809524e-06, "loss": 27.8498, "step": 20665 }, { "epoch": 492.0477611940299, "grad_norm": 28.319860458374023, "learning_rate": 9.763333333333335e-06, "loss": 28.3659, "step": 20666 }, { "epoch": 492.0716417910448, "grad_norm": 27.074281692504883, "learning_rate": 9.762857142857145e-06, "loss": 28.6927, "step": 20667 }, { "epoch": 492.0955223880597, "grad_norm": 22.259380340576172, "learning_rate": 9.762380952380952e-06, "loss": 28.6833, "step": 20668 }, { "epoch": 492.1194029850746, "grad_norm": 36.11917495727539, "learning_rate": 9.761904761904762e-06, "loss": 26.906, "step": 20669 }, { "epoch": 492.14328358208957, "grad_norm": 27.730810165405273, "learning_rate": 9.761428571428571e-06, "loss": 27.4526, "step": 20670 }, { "epoch": 492.1671641791045, "grad_norm": 38.335716247558594, "learning_rate": 9.760952380952383e-06, "loss": 28.4822, "step": 20671 }, { "epoch": 492.1910447761194, "grad_norm": 30.382904052734375, "learning_rate": 9.760476190476192e-06, "loss": 27.8971, "step": 20672 }, { "epoch": 492.21492537313435, "grad_norm": 37.75140380859375, "learning_rate": 9.760000000000001e-06, "loss": 28.7792, "step": 20673 }, { "epoch": 492.23880597014926, "grad_norm": 33.536014556884766, "learning_rate": 9.75952380952381e-06, "loss": 28.5703, "step": 20674 }, { "epoch": 492.26268656716417, "grad_norm": 27.25049591064453, "learning_rate": 9.75904761904762e-06, "loss": 27.561, "step": 20675 }, { "epoch": 492.28656716417913, "grad_norm": 37.93716812133789, "learning_rate": 9.75857142857143e-06, "loss": 28.389, "step": 20676 }, { "epoch": 492.31044776119404, "grad_norm": 22.673091888427734, "learning_rate": 9.75809523809524e-06, "loss": 28.4781, "step": 20677 }, { "epoch": 492.33432835820895, "grad_norm": 45.00788116455078, "learning_rate": 9.757619047619049e-06, "loss": 27.4936, "step": 20678 }, { "epoch": 492.35820895522386, "grad_norm": 34.466392517089844, "learning_rate": 9.757142857142858e-06, "loss": 28.5451, "step": 20679 }, { "epoch": 492.3820895522388, "grad_norm": 47.35041809082031, "learning_rate": 9.756666666666668e-06, "loss": 28.4675, "step": 20680 }, { "epoch": 492.40597014925373, "grad_norm": 40.00800323486328, "learning_rate": 9.756190476190477e-06, "loss": 29.6053, "step": 20681 }, { "epoch": 492.42985074626864, "grad_norm": 42.6414909362793, "learning_rate": 9.755714285714286e-06, "loss": 28.1585, "step": 20682 }, { "epoch": 492.4537313432836, "grad_norm": 33.7565803527832, "learning_rate": 9.755238095238096e-06, "loss": 28.4284, "step": 20683 }, { "epoch": 492.4776119402985, "grad_norm": 46.34212112426758, "learning_rate": 9.754761904761905e-06, "loss": 28.8695, "step": 20684 }, { "epoch": 492.5014925373134, "grad_norm": 37.16715621948242, "learning_rate": 9.754285714285715e-06, "loss": 28.0263, "step": 20685 }, { "epoch": 492.52537313432833, "grad_norm": 43.722320556640625, "learning_rate": 9.753809523809524e-06, "loss": 28.6178, "step": 20686 }, { "epoch": 492.5492537313433, "grad_norm": 41.61458969116211, "learning_rate": 9.753333333333335e-06, "loss": 29.4911, "step": 20687 }, { "epoch": 492.5731343283582, "grad_norm": 36.70028305053711, "learning_rate": 9.752857142857143e-06, "loss": 28.5247, "step": 20688 }, { "epoch": 492.5970149253731, "grad_norm": 31.55572509765625, "learning_rate": 9.752380952380953e-06, "loss": 27.876, "step": 20689 }, { "epoch": 492.6208955223881, "grad_norm": 44.605464935302734, "learning_rate": 9.751904761904762e-06, "loss": 28.996, "step": 20690 }, { "epoch": 492.644776119403, "grad_norm": 32.62446212768555, "learning_rate": 9.751428571428571e-06, "loss": 29.1097, "step": 20691 }, { "epoch": 492.6686567164179, "grad_norm": 44.07311248779297, "learning_rate": 9.750952380952383e-06, "loss": 28.472, "step": 20692 }, { "epoch": 492.6925373134328, "grad_norm": 37.4632453918457, "learning_rate": 9.750476190476192e-06, "loss": 27.7104, "step": 20693 }, { "epoch": 492.7164179104478, "grad_norm": 37.803680419921875, "learning_rate": 9.75e-06, "loss": 28.4512, "step": 20694 }, { "epoch": 492.7402985074627, "grad_norm": 36.55305862426758, "learning_rate": 9.74952380952381e-06, "loss": 28.4662, "step": 20695 }, { "epoch": 492.7641791044776, "grad_norm": 39.61387634277344, "learning_rate": 9.74904761904762e-06, "loss": 29.04, "step": 20696 }, { "epoch": 492.78805970149256, "grad_norm": 33.36046600341797, "learning_rate": 9.74857142857143e-06, "loss": 28.1473, "step": 20697 }, { "epoch": 492.81194029850747, "grad_norm": 42.039085388183594, "learning_rate": 9.74809523809524e-06, "loss": 29.2967, "step": 20698 }, { "epoch": 492.8358208955224, "grad_norm": 35.935523986816406, "learning_rate": 9.747619047619049e-06, "loss": 28.9062, "step": 20699 }, { "epoch": 492.85970149253734, "grad_norm": 40.904727935791016, "learning_rate": 9.747142857142858e-06, "loss": 27.0336, "step": 20700 }, { "epoch": 492.88358208955225, "grad_norm": 38.23332595825195, "learning_rate": 9.746666666666668e-06, "loss": 28.3555, "step": 20701 }, { "epoch": 492.90746268656716, "grad_norm": 36.79536437988281, "learning_rate": 9.746190476190477e-06, "loss": 28.3954, "step": 20702 }, { "epoch": 492.93134328358207, "grad_norm": 34.364891052246094, "learning_rate": 9.745714285714287e-06, "loss": 28.6514, "step": 20703 }, { "epoch": 492.95522388059703, "grad_norm": 40.375328063964844, "learning_rate": 9.745238095238096e-06, "loss": 28.8156, "step": 20704 }, { "epoch": 492.97910447761194, "grad_norm": 33.1904411315918, "learning_rate": 9.744761904761905e-06, "loss": 27.9513, "step": 20705 }, { "epoch": 493.0, "grad_norm": 37.56892776489258, "learning_rate": 9.744285714285715e-06, "loss": 24.7048, "step": 20706 }, { "epoch": 493.0238805970149, "grad_norm": 40.0173454284668, "learning_rate": 9.743809523809524e-06, "loss": 28.0617, "step": 20707 }, { "epoch": 493.0477611940299, "grad_norm": 37.81289291381836, "learning_rate": 9.743333333333335e-06, "loss": 28.1582, "step": 20708 }, { "epoch": 493.0716417910448, "grad_norm": 36.91128921508789, "learning_rate": 9.742857142857143e-06, "loss": 28.8376, "step": 20709 }, { "epoch": 493.0955223880597, "grad_norm": 38.849449157714844, "learning_rate": 9.742380952380953e-06, "loss": 28.3411, "step": 20710 }, { "epoch": 493.1194029850746, "grad_norm": 32.94038772583008, "learning_rate": 9.741904761904762e-06, "loss": 29.4291, "step": 20711 }, { "epoch": 493.14328358208957, "grad_norm": 43.56431198120117, "learning_rate": 9.741428571428572e-06, "loss": 28.7687, "step": 20712 }, { "epoch": 493.1671641791045, "grad_norm": 37.03376770019531, "learning_rate": 9.740952380952383e-06, "loss": 27.7614, "step": 20713 }, { "epoch": 493.1910447761194, "grad_norm": 38.53476333618164, "learning_rate": 9.74047619047619e-06, "loss": 28.7037, "step": 20714 }, { "epoch": 493.21492537313435, "grad_norm": 34.72297668457031, "learning_rate": 9.74e-06, "loss": 28.1507, "step": 20715 }, { "epoch": 493.23880597014926, "grad_norm": 38.77022933959961, "learning_rate": 9.73952380952381e-06, "loss": 28.2794, "step": 20716 }, { "epoch": 493.26268656716417, "grad_norm": 35.69537353515625, "learning_rate": 9.73904761904762e-06, "loss": 28.179, "step": 20717 }, { "epoch": 493.28656716417913, "grad_norm": 37.19712829589844, "learning_rate": 9.73857142857143e-06, "loss": 28.9231, "step": 20718 }, { "epoch": 493.31044776119404, "grad_norm": 36.00667953491211, "learning_rate": 9.73809523809524e-06, "loss": 28.351, "step": 20719 }, { "epoch": 493.33432835820895, "grad_norm": 40.51714324951172, "learning_rate": 9.737619047619047e-06, "loss": 28.2965, "step": 20720 }, { "epoch": 493.35820895522386, "grad_norm": 34.30084991455078, "learning_rate": 9.737142857142858e-06, "loss": 28.6552, "step": 20721 }, { "epoch": 493.3820895522388, "grad_norm": 39.74504470825195, "learning_rate": 9.736666666666668e-06, "loss": 27.5894, "step": 20722 }, { "epoch": 493.40597014925373, "grad_norm": 33.48587417602539, "learning_rate": 9.736190476190477e-06, "loss": 28.6247, "step": 20723 }, { "epoch": 493.42985074626864, "grad_norm": 38.34832000732422, "learning_rate": 9.735714285714287e-06, "loss": 27.4877, "step": 20724 }, { "epoch": 493.4537313432836, "grad_norm": 34.52985763549805, "learning_rate": 9.735238095238096e-06, "loss": 27.321, "step": 20725 }, { "epoch": 493.4776119402985, "grad_norm": 40.60979080200195, "learning_rate": 9.734761904761906e-06, "loss": 28.1651, "step": 20726 }, { "epoch": 493.5014925373134, "grad_norm": 35.58258819580078, "learning_rate": 9.734285714285715e-06, "loss": 29.0992, "step": 20727 }, { "epoch": 493.52537313432833, "grad_norm": 36.798336029052734, "learning_rate": 9.733809523809524e-06, "loss": 28.0711, "step": 20728 }, { "epoch": 493.5492537313433, "grad_norm": 33.452545166015625, "learning_rate": 9.733333333333334e-06, "loss": 27.9841, "step": 20729 }, { "epoch": 493.5731343283582, "grad_norm": 34.856407165527344, "learning_rate": 9.732857142857143e-06, "loss": 28.452, "step": 20730 }, { "epoch": 493.5970149253731, "grad_norm": 28.579875946044922, "learning_rate": 9.732380952380953e-06, "loss": 28.4542, "step": 20731 }, { "epoch": 493.6208955223881, "grad_norm": 41.04036331176758, "learning_rate": 9.731904761904762e-06, "loss": 27.1222, "step": 20732 }, { "epoch": 493.644776119403, "grad_norm": 37.15867233276367, "learning_rate": 9.731428571428573e-06, "loss": 28.4153, "step": 20733 }, { "epoch": 493.6686567164179, "grad_norm": 36.4201545715332, "learning_rate": 9.730952380952383e-06, "loss": 28.1228, "step": 20734 }, { "epoch": 493.6925373134328, "grad_norm": 37.86699676513672, "learning_rate": 9.73047619047619e-06, "loss": 28.3348, "step": 20735 }, { "epoch": 493.7164179104478, "grad_norm": 33.85234069824219, "learning_rate": 9.73e-06, "loss": 28.4184, "step": 20736 }, { "epoch": 493.7402985074627, "grad_norm": 28.41228675842285, "learning_rate": 9.72952380952381e-06, "loss": 28.439, "step": 20737 }, { "epoch": 493.7641791044776, "grad_norm": 39.730228424072266, "learning_rate": 9.72904761904762e-06, "loss": 27.888, "step": 20738 }, { "epoch": 493.78805970149256, "grad_norm": 32.52084732055664, "learning_rate": 9.72857142857143e-06, "loss": 28.8421, "step": 20739 }, { "epoch": 493.81194029850747, "grad_norm": 42.68559265136719, "learning_rate": 9.72809523809524e-06, "loss": 28.4025, "step": 20740 }, { "epoch": 493.8358208955224, "grad_norm": 37.72262954711914, "learning_rate": 9.727619047619047e-06, "loss": 28.3117, "step": 20741 }, { "epoch": 493.85970149253734, "grad_norm": 32.145347595214844, "learning_rate": 9.727142857142858e-06, "loss": 28.1222, "step": 20742 }, { "epoch": 493.88358208955225, "grad_norm": 31.229267120361328, "learning_rate": 9.726666666666668e-06, "loss": 28.8152, "step": 20743 }, { "epoch": 493.90746268656716, "grad_norm": 37.106746673583984, "learning_rate": 9.726190476190477e-06, "loss": 28.6515, "step": 20744 }, { "epoch": 493.93134328358207, "grad_norm": 30.613983154296875, "learning_rate": 9.725714285714287e-06, "loss": 28.4688, "step": 20745 }, { "epoch": 493.95522388059703, "grad_norm": 42.809478759765625, "learning_rate": 9.725238095238096e-06, "loss": 28.5701, "step": 20746 }, { "epoch": 493.97910447761194, "grad_norm": 41.640323638916016, "learning_rate": 9.724761904761906e-06, "loss": 28.7361, "step": 20747 }, { "epoch": 494.0, "grad_norm": 29.621244430541992, "learning_rate": 9.724285714285715e-06, "loss": 25.1453, "step": 20748 }, { "epoch": 494.0238805970149, "grad_norm": 33.29628372192383, "learning_rate": 9.723809523809525e-06, "loss": 26.414, "step": 20749 }, { "epoch": 494.0477611940299, "grad_norm": 35.371337890625, "learning_rate": 9.723333333333334e-06, "loss": 29.0779, "step": 20750 }, { "epoch": 494.0716417910448, "grad_norm": 28.785614013671875, "learning_rate": 9.722857142857143e-06, "loss": 27.5618, "step": 20751 }, { "epoch": 494.0955223880597, "grad_norm": 42.724124908447266, "learning_rate": 9.722380952380953e-06, "loss": 29.1393, "step": 20752 }, { "epoch": 494.1194029850746, "grad_norm": 37.532466888427734, "learning_rate": 9.721904761904762e-06, "loss": 28.0168, "step": 20753 }, { "epoch": 494.14328358208957, "grad_norm": 34.835758209228516, "learning_rate": 9.721428571428573e-06, "loss": 28.8982, "step": 20754 }, { "epoch": 494.1671641791045, "grad_norm": 32.66145324707031, "learning_rate": 9.720952380952381e-06, "loss": 28.4657, "step": 20755 }, { "epoch": 494.1910447761194, "grad_norm": 36.15680694580078, "learning_rate": 9.72047619047619e-06, "loss": 28.8591, "step": 20756 }, { "epoch": 494.21492537313435, "grad_norm": 31.200634002685547, "learning_rate": 9.72e-06, "loss": 28.7846, "step": 20757 }, { "epoch": 494.23880597014926, "grad_norm": 38.072357177734375, "learning_rate": 9.71952380952381e-06, "loss": 28.5182, "step": 20758 }, { "epoch": 494.26268656716417, "grad_norm": 34.48644256591797, "learning_rate": 9.71904761904762e-06, "loss": 28.1121, "step": 20759 }, { "epoch": 494.28656716417913, "grad_norm": 35.038108825683594, "learning_rate": 9.71857142857143e-06, "loss": 26.9679, "step": 20760 }, { "epoch": 494.31044776119404, "grad_norm": 31.996654510498047, "learning_rate": 9.718095238095238e-06, "loss": 27.355, "step": 20761 }, { "epoch": 494.33432835820895, "grad_norm": 37.83359146118164, "learning_rate": 9.717619047619047e-06, "loss": 28.4442, "step": 20762 }, { "epoch": 494.35820895522386, "grad_norm": 32.747623443603516, "learning_rate": 9.717142857142858e-06, "loss": 29.8979, "step": 20763 }, { "epoch": 494.3820895522388, "grad_norm": 38.4122314453125, "learning_rate": 9.716666666666668e-06, "loss": 28.6277, "step": 20764 }, { "epoch": 494.40597014925373, "grad_norm": 33.479129791259766, "learning_rate": 9.716190476190477e-06, "loss": 28.9361, "step": 20765 }, { "epoch": 494.42985074626864, "grad_norm": 35.67233657836914, "learning_rate": 9.715714285714287e-06, "loss": 29.6986, "step": 20766 }, { "epoch": 494.4537313432836, "grad_norm": 34.51249313354492, "learning_rate": 9.715238095238096e-06, "loss": 29.3574, "step": 20767 }, { "epoch": 494.4776119402985, "grad_norm": 36.210018157958984, "learning_rate": 9.714761904761906e-06, "loss": 27.5173, "step": 20768 }, { "epoch": 494.5014925373134, "grad_norm": 30.73526382446289, "learning_rate": 9.714285714285715e-06, "loss": 27.5233, "step": 20769 }, { "epoch": 494.52537313432833, "grad_norm": 38.386051177978516, "learning_rate": 9.713809523809525e-06, "loss": 27.4539, "step": 20770 }, { "epoch": 494.5492537313433, "grad_norm": 34.76150894165039, "learning_rate": 9.713333333333334e-06, "loss": 27.7409, "step": 20771 }, { "epoch": 494.5731343283582, "grad_norm": 38.53193664550781, "learning_rate": 9.712857142857144e-06, "loss": 27.9445, "step": 20772 }, { "epoch": 494.5970149253731, "grad_norm": 32.93208312988281, "learning_rate": 9.712380952380953e-06, "loss": 28.5703, "step": 20773 }, { "epoch": 494.6208955223881, "grad_norm": 34.39596939086914, "learning_rate": 9.711904761904762e-06, "loss": 29.0774, "step": 20774 }, { "epoch": 494.644776119403, "grad_norm": 31.07556915283203, "learning_rate": 9.711428571428574e-06, "loss": 29.1861, "step": 20775 }, { "epoch": 494.6686567164179, "grad_norm": 35.258174896240234, "learning_rate": 9.710952380952381e-06, "loss": 28.9587, "step": 20776 }, { "epoch": 494.6925373134328, "grad_norm": 32.24302673339844, "learning_rate": 9.71047619047619e-06, "loss": 27.7512, "step": 20777 }, { "epoch": 494.7164179104478, "grad_norm": 40.00369644165039, "learning_rate": 9.71e-06, "loss": 28.351, "step": 20778 }, { "epoch": 494.7402985074627, "grad_norm": 35.587223052978516, "learning_rate": 9.70952380952381e-06, "loss": 27.8588, "step": 20779 }, { "epoch": 494.7641791044776, "grad_norm": 37.51652908325195, "learning_rate": 9.70904761904762e-06, "loss": 28.6537, "step": 20780 }, { "epoch": 494.78805970149256, "grad_norm": 35.903865814208984, "learning_rate": 9.70857142857143e-06, "loss": 27.9905, "step": 20781 }, { "epoch": 494.81194029850747, "grad_norm": 33.109432220458984, "learning_rate": 9.708095238095238e-06, "loss": 28.0375, "step": 20782 }, { "epoch": 494.8358208955224, "grad_norm": 33.18064880371094, "learning_rate": 9.707619047619047e-06, "loss": 27.7617, "step": 20783 }, { "epoch": 494.85970149253734, "grad_norm": 35.20663833618164, "learning_rate": 9.707142857142859e-06, "loss": 29.5696, "step": 20784 }, { "epoch": 494.88358208955225, "grad_norm": 29.432218551635742, "learning_rate": 9.706666666666668e-06, "loss": 27.1347, "step": 20785 }, { "epoch": 494.90746268656716, "grad_norm": 38.63973617553711, "learning_rate": 9.706190476190477e-06, "loss": 29.2168, "step": 20786 }, { "epoch": 494.93134328358207, "grad_norm": 34.23154830932617, "learning_rate": 9.705714285714287e-06, "loss": 28.3625, "step": 20787 }, { "epoch": 494.95522388059703, "grad_norm": 34.66427993774414, "learning_rate": 9.705238095238096e-06, "loss": 27.856, "step": 20788 }, { "epoch": 494.97910447761194, "grad_norm": 32.1384391784668, "learning_rate": 9.704761904761906e-06, "loss": 27.9655, "step": 20789 }, { "epoch": 495.0, "grad_norm": 30.091075897216797, "learning_rate": 9.704285714285715e-06, "loss": 24.5011, "step": 20790 }, { "epoch": 495.0238805970149, "grad_norm": 33.29294967651367, "learning_rate": 9.703809523809525e-06, "loss": 28.5263, "step": 20791 }, { "epoch": 495.0477611940299, "grad_norm": 33.34843826293945, "learning_rate": 9.703333333333334e-06, "loss": 27.6245, "step": 20792 }, { "epoch": 495.0716417910448, "grad_norm": 28.877355575561523, "learning_rate": 9.702857142857144e-06, "loss": 27.8937, "step": 20793 }, { "epoch": 495.0955223880597, "grad_norm": 33.017906188964844, "learning_rate": 9.702380952380953e-06, "loss": 28.7918, "step": 20794 }, { "epoch": 495.1194029850746, "grad_norm": 26.1226806640625, "learning_rate": 9.701904761904763e-06, "loss": 29.0832, "step": 20795 }, { "epoch": 495.14328358208957, "grad_norm": 33.49978256225586, "learning_rate": 9.701428571428572e-06, "loss": 28.4444, "step": 20796 }, { "epoch": 495.1671641791045, "grad_norm": 25.88732147216797, "learning_rate": 9.700952380952381e-06, "loss": 27.4717, "step": 20797 }, { "epoch": 495.1910447761194, "grad_norm": 36.241397857666016, "learning_rate": 9.700476190476191e-06, "loss": 28.4154, "step": 20798 }, { "epoch": 495.21492537313435, "grad_norm": 27.889394760131836, "learning_rate": 9.7e-06, "loss": 27.7944, "step": 20799 }, { "epoch": 495.23880597014926, "grad_norm": 33.18143844604492, "learning_rate": 9.69952380952381e-06, "loss": 27.9291, "step": 20800 }, { "epoch": 495.26268656716417, "grad_norm": 29.475618362426758, "learning_rate": 9.699047619047621e-06, "loss": 29.0945, "step": 20801 }, { "epoch": 495.28656716417913, "grad_norm": 31.63974952697754, "learning_rate": 9.698571428571429e-06, "loss": 28.1906, "step": 20802 }, { "epoch": 495.31044776119404, "grad_norm": 26.535804748535156, "learning_rate": 9.698095238095238e-06, "loss": 28.2301, "step": 20803 }, { "epoch": 495.33432835820895, "grad_norm": 29.104087829589844, "learning_rate": 9.697619047619048e-06, "loss": 28.5987, "step": 20804 }, { "epoch": 495.35820895522386, "grad_norm": 25.404827117919922, "learning_rate": 9.697142857142859e-06, "loss": 28.3068, "step": 20805 }, { "epoch": 495.3820895522388, "grad_norm": 31.84894371032715, "learning_rate": 9.696666666666668e-06, "loss": 28.1829, "step": 20806 }, { "epoch": 495.40597014925373, "grad_norm": 24.639556884765625, "learning_rate": 9.696190476190478e-06, "loss": 27.1795, "step": 20807 }, { "epoch": 495.42985074626864, "grad_norm": 27.750885009765625, "learning_rate": 9.695714285714285e-06, "loss": 28.8613, "step": 20808 }, { "epoch": 495.4537313432836, "grad_norm": 24.12563705444336, "learning_rate": 9.695238095238096e-06, "loss": 29.4943, "step": 20809 }, { "epoch": 495.4776119402985, "grad_norm": 24.00174903869629, "learning_rate": 9.694761904761906e-06, "loss": 27.4107, "step": 20810 }, { "epoch": 495.5014925373134, "grad_norm": 22.775577545166016, "learning_rate": 9.694285714285715e-06, "loss": 28.7685, "step": 20811 }, { "epoch": 495.52537313432833, "grad_norm": 26.96075439453125, "learning_rate": 9.693809523809525e-06, "loss": 27.3087, "step": 20812 }, { "epoch": 495.5492537313433, "grad_norm": 24.903715133666992, "learning_rate": 9.693333333333334e-06, "loss": 27.9682, "step": 20813 }, { "epoch": 495.5731343283582, "grad_norm": 23.05387306213379, "learning_rate": 9.692857142857144e-06, "loss": 29.8665, "step": 20814 }, { "epoch": 495.5970149253731, "grad_norm": 25.141271591186523, "learning_rate": 9.692380952380953e-06, "loss": 28.7374, "step": 20815 }, { "epoch": 495.6208955223881, "grad_norm": 22.172853469848633, "learning_rate": 9.691904761904763e-06, "loss": 27.5904, "step": 20816 }, { "epoch": 495.644776119403, "grad_norm": 23.813955307006836, "learning_rate": 9.691428571428572e-06, "loss": 28.0566, "step": 20817 }, { "epoch": 495.6686567164179, "grad_norm": 20.350666046142578, "learning_rate": 9.690952380952382e-06, "loss": 27.8285, "step": 20818 }, { "epoch": 495.6925373134328, "grad_norm": 24.900541305541992, "learning_rate": 9.690476190476191e-06, "loss": 28.5592, "step": 20819 }, { "epoch": 495.7164179104478, "grad_norm": 26.733436584472656, "learning_rate": 9.69e-06, "loss": 28.8422, "step": 20820 }, { "epoch": 495.7402985074627, "grad_norm": 23.212038040161133, "learning_rate": 9.68952380952381e-06, "loss": 28.1579, "step": 20821 }, { "epoch": 495.7641791044776, "grad_norm": 20.795501708984375, "learning_rate": 9.689047619047621e-06, "loss": 28.8828, "step": 20822 }, { "epoch": 495.78805970149256, "grad_norm": 26.071020126342773, "learning_rate": 9.688571428571429e-06, "loss": 29.0935, "step": 20823 }, { "epoch": 495.81194029850747, "grad_norm": 22.473451614379883, "learning_rate": 9.688095238095238e-06, "loss": 29.0665, "step": 20824 }, { "epoch": 495.8358208955224, "grad_norm": 19.913673400878906, "learning_rate": 9.687619047619048e-06, "loss": 28.2591, "step": 20825 }, { "epoch": 495.85970149253734, "grad_norm": 21.749326705932617, "learning_rate": 9.687142857142859e-06, "loss": 27.8754, "step": 20826 }, { "epoch": 495.88358208955225, "grad_norm": 24.374298095703125, "learning_rate": 9.686666666666668e-06, "loss": 28.917, "step": 20827 }, { "epoch": 495.90746268656716, "grad_norm": 21.94034767150879, "learning_rate": 9.686190476190476e-06, "loss": 27.9317, "step": 20828 }, { "epoch": 495.93134328358207, "grad_norm": 20.84528160095215, "learning_rate": 9.685714285714285e-06, "loss": 27.2631, "step": 20829 }, { "epoch": 495.95522388059703, "grad_norm": 26.622499465942383, "learning_rate": 9.685238095238097e-06, "loss": 28.6444, "step": 20830 }, { "epoch": 495.97910447761194, "grad_norm": 22.875476837158203, "learning_rate": 9.684761904761906e-06, "loss": 28.4684, "step": 20831 }, { "epoch": 496.0, "grad_norm": 18.989648818969727, "learning_rate": 9.684285714285715e-06, "loss": 24.8908, "step": 20832 }, { "epoch": 496.0238805970149, "grad_norm": 19.712080001831055, "learning_rate": 9.683809523809525e-06, "loss": 28.5795, "step": 20833 }, { "epoch": 496.0477611940299, "grad_norm": 22.42276382446289, "learning_rate": 9.683333333333334e-06, "loss": 28.7692, "step": 20834 }, { "epoch": 496.0716417910448, "grad_norm": NaN, "learning_rate": 9.682857142857144e-06, "loss": 49.3076, "step": 20835 }, { "epoch": 496.0955223880597, "grad_norm": 20.44896697998047, "learning_rate": 9.682857142857144e-06, "loss": 26.3887, "step": 20836 }, { "epoch": 496.1194029850746, "grad_norm": 20.0717830657959, "learning_rate": 9.682380952380953e-06, "loss": 28.3452, "step": 20837 }, { "epoch": 496.14328358208957, "grad_norm": 19.319658279418945, "learning_rate": 9.681904761904763e-06, "loss": 28.1769, "step": 20838 }, { "epoch": 496.1671641791045, "grad_norm": 21.62850570678711, "learning_rate": 9.681428571428572e-06, "loss": 27.3586, "step": 20839 }, { "epoch": 496.1910447761194, "grad_norm": 26.521385192871094, "learning_rate": 9.680952380952382e-06, "loss": 28.433, "step": 20840 }, { "epoch": 496.21492537313435, "grad_norm": 20.801700592041016, "learning_rate": 9.680476190476191e-06, "loss": 27.3759, "step": 20841 }, { "epoch": 496.23880597014926, "grad_norm": 20.51534652709961, "learning_rate": 9.68e-06, "loss": 28.405, "step": 20842 }, { "epoch": 496.26268656716417, "grad_norm": 21.475597381591797, "learning_rate": 9.67952380952381e-06, "loss": 28.4365, "step": 20843 }, { "epoch": 496.28656716417913, "grad_norm": 21.273569107055664, "learning_rate": 9.67904761904762e-06, "loss": 27.7037, "step": 20844 }, { "epoch": 496.31044776119404, "grad_norm": 20.819162368774414, "learning_rate": 9.678571428571429e-06, "loss": 29.0734, "step": 20845 }, { "epoch": 496.33432835820895, "grad_norm": 18.446334838867188, "learning_rate": 9.678095238095238e-06, "loss": 27.6641, "step": 20846 }, { "epoch": 496.35820895522386, "grad_norm": 21.709346771240234, "learning_rate": 9.677619047619048e-06, "loss": 28.5816, "step": 20847 }, { "epoch": 496.3820895522388, "grad_norm": 23.450361251831055, "learning_rate": 9.677142857142859e-06, "loss": 28.6713, "step": 20848 }, { "epoch": 496.40597014925373, "grad_norm": 22.419282913208008, "learning_rate": 9.676666666666668e-06, "loss": 28.9608, "step": 20849 }, { "epoch": 496.42985074626864, "grad_norm": 21.673707962036133, "learning_rate": 9.676190476190476e-06, "loss": 28.4302, "step": 20850 }, { "epoch": 496.4537313432836, "grad_norm": 18.906776428222656, "learning_rate": 9.675714285714286e-06, "loss": 28.8387, "step": 20851 }, { "epoch": 496.4776119402985, "grad_norm": 21.41265106201172, "learning_rate": 9.675238095238097e-06, "loss": 28.5997, "step": 20852 }, { "epoch": 496.5014925373134, "grad_norm": 23.634733200073242, "learning_rate": 9.674761904761906e-06, "loss": 28.582, "step": 20853 }, { "epoch": 496.52537313432833, "grad_norm": 20.524614334106445, "learning_rate": 9.674285714285716e-06, "loss": 27.9139, "step": 20854 }, { "epoch": 496.5492537313433, "grad_norm": 17.701732635498047, "learning_rate": 9.673809523809525e-06, "loss": 27.4572, "step": 20855 }, { "epoch": 496.5731343283582, "grad_norm": 23.18527603149414, "learning_rate": 9.673333333333334e-06, "loss": 28.901, "step": 20856 }, { "epoch": 496.5970149253731, "grad_norm": 22.233386993408203, "learning_rate": 9.672857142857144e-06, "loss": 27.4171, "step": 20857 }, { "epoch": 496.6208955223881, "grad_norm": 19.928682327270508, "learning_rate": 9.672380952380953e-06, "loss": 28.4359, "step": 20858 }, { "epoch": 496.644776119403, "grad_norm": 22.446205139160156, "learning_rate": 9.671904761904763e-06, "loss": 28.2501, "step": 20859 }, { "epoch": 496.6686567164179, "grad_norm": 21.4228458404541, "learning_rate": 9.671428571428572e-06, "loss": 29.2856, "step": 20860 }, { "epoch": 496.6925373134328, "grad_norm": 20.906360626220703, "learning_rate": 9.670952380952382e-06, "loss": 29.9083, "step": 20861 }, { "epoch": 496.7164179104478, "grad_norm": 19.005783081054688, "learning_rate": 9.670476190476191e-06, "loss": 27.6119, "step": 20862 }, { "epoch": 496.7402985074627, "grad_norm": 25.381637573242188, "learning_rate": 9.67e-06, "loss": 29.3015, "step": 20863 }, { "epoch": 496.7641791044776, "grad_norm": 18.532569885253906, "learning_rate": 9.66952380952381e-06, "loss": 28.3728, "step": 20864 }, { "epoch": 496.78805970149256, "grad_norm": 22.914749145507812, "learning_rate": 9.66904761904762e-06, "loss": 28.9053, "step": 20865 }, { "epoch": 496.81194029850747, "grad_norm": 25.641643524169922, "learning_rate": 9.668571428571429e-06, "loss": 28.7614, "step": 20866 }, { "epoch": 496.8358208955224, "grad_norm": 20.744050979614258, "learning_rate": 9.668095238095238e-06, "loss": 27.1446, "step": 20867 }, { "epoch": 496.85970149253734, "grad_norm": 20.091144561767578, "learning_rate": 9.667619047619048e-06, "loss": 27.76, "step": 20868 }, { "epoch": 496.88358208955225, "grad_norm": 32.3016471862793, "learning_rate": 9.667142857142859e-06, "loss": 28.5978, "step": 20869 }, { "epoch": 496.90746268656716, "grad_norm": 20.259117126464844, "learning_rate": 9.666666666666667e-06, "loss": 27.2739, "step": 20870 }, { "epoch": 496.93134328358207, "grad_norm": 29.763648986816406, "learning_rate": 9.666190476190476e-06, "loss": 28.4107, "step": 20871 }, { "epoch": 496.95522388059703, "grad_norm": 27.468978881835938, "learning_rate": 9.665714285714286e-06, "loss": 29.6403, "step": 20872 }, { "epoch": 496.97910447761194, "grad_norm": 20.50508689880371, "learning_rate": 9.665238095238097e-06, "loss": 27.9621, "step": 20873 }, { "epoch": 497.0, "grad_norm": 26.80634117126465, "learning_rate": 9.664761904761906e-06, "loss": 25.1217, "step": 20874 }, { "epoch": 497.0238805970149, "grad_norm": 22.598939895629883, "learning_rate": 9.664285714285716e-06, "loss": 27.5719, "step": 20875 }, { "epoch": 497.0477611940299, "grad_norm": 31.199256896972656, "learning_rate": 9.663809523809523e-06, "loss": 28.0971, "step": 20876 }, { "epoch": 497.0716417910448, "grad_norm": 27.88519859313965, "learning_rate": 9.663333333333335e-06, "loss": 28.5187, "step": 20877 }, { "epoch": 497.0955223880597, "grad_norm": 26.418851852416992, "learning_rate": 9.662857142857144e-06, "loss": 27.6372, "step": 20878 }, { "epoch": 497.1194029850746, "grad_norm": 23.95099449157715, "learning_rate": 9.662380952380953e-06, "loss": 28.7116, "step": 20879 }, { "epoch": 497.14328358208957, "grad_norm": 22.00933074951172, "learning_rate": 9.661904761904763e-06, "loss": 28.3663, "step": 20880 }, { "epoch": 497.1671641791045, "grad_norm": 25.44548225402832, "learning_rate": 9.661428571428572e-06, "loss": 28.1134, "step": 20881 }, { "epoch": 497.1910447761194, "grad_norm": 19.820497512817383, "learning_rate": 9.660952380952382e-06, "loss": 28.8326, "step": 20882 }, { "epoch": 497.21492537313435, "grad_norm": 27.168376922607422, "learning_rate": 9.660476190476191e-06, "loss": 27.3608, "step": 20883 }, { "epoch": 497.23880597014926, "grad_norm": 20.899885177612305, "learning_rate": 9.66e-06, "loss": 27.7827, "step": 20884 }, { "epoch": 497.26268656716417, "grad_norm": 25.16288948059082, "learning_rate": 9.65952380952381e-06, "loss": 28.3073, "step": 20885 }, { "epoch": 497.28656716417913, "grad_norm": NaN, "learning_rate": 9.65904761904762e-06, "loss": 37.5522, "step": 20886 }, { "epoch": 497.31044776119404, "grad_norm": 21.814376831054688, "learning_rate": 9.65904761904762e-06, "loss": 28.3318, "step": 20887 }, { "epoch": 497.33432835820895, "grad_norm": 22.674816131591797, "learning_rate": 9.658571428571429e-06, "loss": 28.6351, "step": 20888 }, { "epoch": 497.35820895522386, "grad_norm": 26.646257400512695, "learning_rate": 9.658095238095238e-06, "loss": 27.6956, "step": 20889 }, { "epoch": 497.3820895522388, "grad_norm": 21.798696517944336, "learning_rate": 9.657619047619048e-06, "loss": 28.0928, "step": 20890 }, { "epoch": 497.40597014925373, "grad_norm": 26.445837020874023, "learning_rate": 9.657142857142859e-06, "loss": 27.9465, "step": 20891 }, { "epoch": 497.42985074626864, "grad_norm": 21.53672218322754, "learning_rate": 9.656666666666667e-06, "loss": 28.5057, "step": 20892 }, { "epoch": 497.4537313432836, "grad_norm": 23.361955642700195, "learning_rate": 9.656190476190476e-06, "loss": 28.071, "step": 20893 }, { "epoch": 497.4776119402985, "grad_norm": 21.675477981567383, "learning_rate": 9.655714285714286e-06, "loss": 28.3341, "step": 20894 }, { "epoch": 497.5014925373134, "grad_norm": 20.16310691833496, "learning_rate": 9.655238095238097e-06, "loss": 27.9817, "step": 20895 }, { "epoch": 497.52537313432833, "grad_norm": 26.336355209350586, "learning_rate": 9.654761904761906e-06, "loss": 28.2316, "step": 20896 }, { "epoch": 497.5492537313433, "grad_norm": 23.926544189453125, "learning_rate": 9.654285714285716e-06, "loss": 28.53, "step": 20897 }, { "epoch": 497.5731343283582, "grad_norm": 23.746376037597656, "learning_rate": 9.653809523809524e-06, "loss": 29.4673, "step": 20898 }, { "epoch": 497.5970149253731, "grad_norm": 20.361459732055664, "learning_rate": 9.653333333333335e-06, "loss": 27.7836, "step": 20899 }, { "epoch": 497.6208955223881, "grad_norm": 31.99506950378418, "learning_rate": 9.652857142857144e-06, "loss": 28.954, "step": 20900 }, { "epoch": 497.644776119403, "grad_norm": 24.48999786376953, "learning_rate": 9.652380952380954e-06, "loss": 28.9221, "step": 20901 }, { "epoch": 497.6686567164179, "grad_norm": 21.43216896057129, "learning_rate": 9.651904761904763e-06, "loss": 29.9923, "step": 20902 }, { "epoch": 497.6925373134328, "grad_norm": 26.421764373779297, "learning_rate": 9.651428571428572e-06, "loss": 29.4437, "step": 20903 }, { "epoch": 497.7164179104478, "grad_norm": 20.634803771972656, "learning_rate": 9.650952380952382e-06, "loss": 27.7848, "step": 20904 }, { "epoch": 497.7402985074627, "grad_norm": 20.215938568115234, "learning_rate": 9.650476190476191e-06, "loss": 28.4341, "step": 20905 }, { "epoch": 497.7641791044776, "grad_norm": 23.04256248474121, "learning_rate": 9.65e-06, "loss": 28.0126, "step": 20906 }, { "epoch": 497.78805970149256, "grad_norm": 21.338489532470703, "learning_rate": 9.64952380952381e-06, "loss": 27.8711, "step": 20907 }, { "epoch": 497.81194029850747, "grad_norm": 21.005821228027344, "learning_rate": 9.64904761904762e-06, "loss": 28.4734, "step": 20908 }, { "epoch": 497.8358208955224, "grad_norm": 20.176958084106445, "learning_rate": 9.648571428571429e-06, "loss": 28.0377, "step": 20909 }, { "epoch": 497.85970149253734, "grad_norm": 19.563657760620117, "learning_rate": 9.648095238095239e-06, "loss": 28.5272, "step": 20910 }, { "epoch": 497.88358208955225, "grad_norm": 23.009241104125977, "learning_rate": 9.647619047619048e-06, "loss": 28.4381, "step": 20911 }, { "epoch": 497.90746268656716, "grad_norm": 23.48168182373047, "learning_rate": 9.647142857142857e-06, "loss": 27.9372, "step": 20912 }, { "epoch": 497.93134328358207, "grad_norm": 25.975940704345703, "learning_rate": 9.646666666666667e-06, "loss": 28.993, "step": 20913 }, { "epoch": 497.95522388059703, "grad_norm": NaN, "learning_rate": 9.646190476190476e-06, "loss": 45.9307, "step": 20914 }, { "epoch": 497.97910447761194, "grad_norm": 18.228906631469727, "learning_rate": 9.646190476190476e-06, "loss": 26.8278, "step": 20915 }, { "epoch": 498.0, "grad_norm": 19.650875091552734, "learning_rate": 9.645714285714286e-06, "loss": 24.6172, "step": 20916 }, { "epoch": 498.0238805970149, "grad_norm": 20.210227966308594, "learning_rate": 9.645238095238097e-06, "loss": 28.3063, "step": 20917 }, { "epoch": 498.0477611940299, "grad_norm": 20.374176025390625, "learning_rate": 9.644761904761906e-06, "loss": 28.124, "step": 20918 }, { "epoch": 498.0716417910448, "grad_norm": 20.180469512939453, "learning_rate": 9.644285714285714e-06, "loss": 28.4063, "step": 20919 }, { "epoch": 498.0955223880597, "grad_norm": 19.163251876831055, "learning_rate": 9.643809523809524e-06, "loss": 28.2465, "step": 20920 }, { "epoch": 498.1194029850746, "grad_norm": 21.796045303344727, "learning_rate": 9.643333333333335e-06, "loss": 28.1718, "step": 20921 }, { "epoch": 498.14328358208957, "grad_norm": 18.8210391998291, "learning_rate": 9.642857142857144e-06, "loss": 28.1578, "step": 20922 }, { "epoch": 498.1671641791045, "grad_norm": 20.032793045043945, "learning_rate": 9.642380952380954e-06, "loss": 27.8901, "step": 20923 }, { "epoch": 498.1910447761194, "grad_norm": 25.070690155029297, "learning_rate": 9.641904761904763e-06, "loss": 28.9218, "step": 20924 }, { "epoch": 498.21492537313435, "grad_norm": NaN, "learning_rate": 9.641428571428573e-06, "loss": 34.1407, "step": 20925 }, { "epoch": 498.23880597014926, "grad_norm": 23.311649322509766, "learning_rate": 9.641428571428573e-06, "loss": 28.9263, "step": 20926 }, { "epoch": 498.26268656716417, "grad_norm": 22.004257202148438, "learning_rate": 9.640952380952382e-06, "loss": 29.3008, "step": 20927 }, { "epoch": 498.28656716417913, "grad_norm": 19.35313606262207, "learning_rate": 9.640476190476191e-06, "loss": 28.3832, "step": 20928 }, { "epoch": 498.31044776119404, "grad_norm": 31.018909454345703, "learning_rate": 9.640000000000001e-06, "loss": 28.1096, "step": 20929 }, { "epoch": 498.33432835820895, "grad_norm": 21.249258041381836, "learning_rate": 9.63952380952381e-06, "loss": 27.7592, "step": 20930 }, { "epoch": 498.35820895522386, "grad_norm": 28.07059097290039, "learning_rate": 9.63904761904762e-06, "loss": 28.0406, "step": 20931 }, { "epoch": 498.3820895522388, "grad_norm": 28.26436424255371, "learning_rate": 9.63857142857143e-06, "loss": 28.3472, "step": 20932 }, { "epoch": 498.40597014925373, "grad_norm": 21.066938400268555, "learning_rate": 9.638095238095239e-06, "loss": 29.3893, "step": 20933 }, { "epoch": 498.42985074626864, "grad_norm": 23.22842025756836, "learning_rate": 9.637619047619048e-06, "loss": 27.9522, "step": 20934 }, { "epoch": 498.4537313432836, "grad_norm": 23.832916259765625, "learning_rate": 9.637142857142858e-06, "loss": 29.168, "step": 20935 }, { "epoch": 498.4776119402985, "grad_norm": NaN, "learning_rate": 9.636666666666667e-06, "loss": 34.7481, "step": 20936 }, { "epoch": 498.5014925373134, "grad_norm": 21.810894012451172, "learning_rate": 9.636666666666667e-06, "loss": 28.1853, "step": 20937 }, { "epoch": 498.52537313432833, "grad_norm": 18.79465675354004, "learning_rate": 9.636190476190476e-06, "loss": 28.6151, "step": 20938 }, { "epoch": 498.5492537313433, "grad_norm": 24.164047241210938, "learning_rate": 9.635714285714286e-06, "loss": 28.0886, "step": 20939 }, { "epoch": 498.5731343283582, "grad_norm": 20.531005859375, "learning_rate": 9.635238095238097e-06, "loss": 27.9894, "step": 20940 }, { "epoch": 498.5970149253731, "grad_norm": 26.974939346313477, "learning_rate": 9.634761904761906e-06, "loss": 27.7349, "step": 20941 }, { "epoch": 498.6208955223881, "grad_norm": 17.99193572998047, "learning_rate": 9.634285714285714e-06, "loss": 27.3779, "step": 20942 }, { "epoch": 498.644776119403, "grad_norm": 25.251880645751953, "learning_rate": 9.633809523809524e-06, "loss": 27.9092, "step": 20943 }, { "epoch": 498.6686567164179, "grad_norm": 24.771209716796875, "learning_rate": 9.633333333333335e-06, "loss": 27.301, "step": 20944 }, { "epoch": 498.6925373134328, "grad_norm": 22.970561981201172, "learning_rate": 9.632857142857144e-06, "loss": 28.2114, "step": 20945 }, { "epoch": 498.7164179104478, "grad_norm": 23.41929054260254, "learning_rate": 9.632380952380954e-06, "loss": 27.8668, "step": 20946 }, { "epoch": 498.7402985074627, "grad_norm": 22.920629501342773, "learning_rate": 9.631904761904761e-06, "loss": 27.3743, "step": 20947 }, { "epoch": 498.7641791044776, "grad_norm": 23.36177635192871, "learning_rate": 9.631428571428573e-06, "loss": 28.7485, "step": 20948 }, { "epoch": 498.78805970149256, "grad_norm": 20.613203048706055, "learning_rate": 9.630952380952382e-06, "loss": 28.3371, "step": 20949 }, { "epoch": 498.81194029850747, "grad_norm": 19.850215911865234, "learning_rate": 9.630476190476192e-06, "loss": 27.7769, "step": 20950 }, { "epoch": 498.8358208955224, "grad_norm": 26.530431747436523, "learning_rate": 9.630000000000001e-06, "loss": 28.935, "step": 20951 }, { "epoch": 498.85970149253734, "grad_norm": 22.724477767944336, "learning_rate": 9.62952380952381e-06, "loss": 29.1089, "step": 20952 }, { "epoch": 498.88358208955225, "grad_norm": 19.445432662963867, "learning_rate": 9.62904761904762e-06, "loss": 28.0375, "step": 20953 }, { "epoch": 498.90746268656716, "grad_norm": 23.97826385498047, "learning_rate": 9.62857142857143e-06, "loss": 29.9349, "step": 20954 }, { "epoch": 498.93134328358207, "grad_norm": 23.859237670898438, "learning_rate": 9.628095238095239e-06, "loss": 28.0662, "step": 20955 }, { "epoch": 498.95522388059703, "grad_norm": 22.987014770507812, "learning_rate": 9.627619047619048e-06, "loss": 28.8858, "step": 20956 }, { "epoch": 498.97910447761194, "grad_norm": 20.803356170654297, "learning_rate": 9.627142857142858e-06, "loss": 28.6232, "step": 20957 }, { "epoch": 499.0, "grad_norm": 18.23337745666504, "learning_rate": 9.626666666666667e-06, "loss": 24.2695, "step": 20958 }, { "epoch": 499.0238805970149, "grad_norm": 27.508928298950195, "learning_rate": 9.626190476190477e-06, "loss": 28.3257, "step": 20959 }, { "epoch": 499.0477611940299, "grad_norm": 17.526397705078125, "learning_rate": 9.625714285714286e-06, "loss": 27.3096, "step": 20960 }, { "epoch": 499.0716417910448, "grad_norm": 25.70009994506836, "learning_rate": 9.625238095238097e-06, "loss": 27.7073, "step": 20961 }, { "epoch": 499.0955223880597, "grad_norm": 22.4039363861084, "learning_rate": 9.624761904761905e-06, "loss": 28.0874, "step": 20962 }, { "epoch": 499.1194029850746, "grad_norm": 24.198532104492188, "learning_rate": 9.624285714285714e-06, "loss": 28.6423, "step": 20963 }, { "epoch": 499.14328358208957, "grad_norm": 18.16721534729004, "learning_rate": 9.623809523809524e-06, "loss": 28.9667, "step": 20964 }, { "epoch": 499.1671641791045, "grad_norm": 28.273929595947266, "learning_rate": 9.623333333333335e-06, "loss": 30.1087, "step": 20965 }, { "epoch": 499.1910447761194, "grad_norm": 20.1627254486084, "learning_rate": 9.622857142857144e-06, "loss": 28.3263, "step": 20966 }, { "epoch": 499.21492537313435, "grad_norm": 25.170108795166016, "learning_rate": 9.622380952380954e-06, "loss": 28.4635, "step": 20967 }, { "epoch": 499.23880597014926, "grad_norm": 23.069887161254883, "learning_rate": 9.621904761904762e-06, "loss": 28.6754, "step": 20968 }, { "epoch": 499.26268656716417, "grad_norm": 23.311676025390625, "learning_rate": 9.621428571428573e-06, "loss": 27.5467, "step": 20969 }, { "epoch": 499.28656716417913, "grad_norm": 20.688447952270508, "learning_rate": 9.620952380952382e-06, "loss": 27.0024, "step": 20970 }, { "epoch": 499.31044776119404, "grad_norm": 20.822641372680664, "learning_rate": 9.620476190476192e-06, "loss": 27.8511, "step": 20971 }, { "epoch": 499.33432835820895, "grad_norm": 20.742076873779297, "learning_rate": 9.620000000000001e-06, "loss": 28.1952, "step": 20972 }, { "epoch": 499.35820895522386, "grad_norm": 19.333650588989258, "learning_rate": 9.61952380952381e-06, "loss": 27.6435, "step": 20973 }, { "epoch": 499.3820895522388, "grad_norm": 20.332651138305664, "learning_rate": 9.61904761904762e-06, "loss": 29.4264, "step": 20974 }, { "epoch": 499.40597014925373, "grad_norm": 24.245758056640625, "learning_rate": 9.61857142857143e-06, "loss": 28.5288, "step": 20975 }, { "epoch": 499.42985074626864, "grad_norm": 20.788667678833008, "learning_rate": 9.618095238095239e-06, "loss": 27.1222, "step": 20976 }, { "epoch": 499.4537313432836, "grad_norm": 24.14287567138672, "learning_rate": 9.617619047619048e-06, "loss": 28.7891, "step": 20977 }, { "epoch": 499.4776119402985, "grad_norm": 20.23819351196289, "learning_rate": 9.617142857142858e-06, "loss": 28.6117, "step": 20978 }, { "epoch": 499.5014925373134, "grad_norm": 18.791006088256836, "learning_rate": 9.616666666666667e-06, "loss": 28.5994, "step": 20979 }, { "epoch": 499.52537313432833, "grad_norm": 21.903398513793945, "learning_rate": 9.616190476190477e-06, "loss": 27.8117, "step": 20980 }, { "epoch": 499.5492537313433, "grad_norm": 17.492721557617188, "learning_rate": 9.615714285714286e-06, "loss": 28.993, "step": 20981 }, { "epoch": 499.5731343283582, "grad_norm": 19.99384880065918, "learning_rate": 9.615238095238096e-06, "loss": 29.0819, "step": 20982 }, { "epoch": 499.5970149253731, "grad_norm": 18.36116600036621, "learning_rate": 9.614761904761905e-06, "loss": 28.3213, "step": 20983 }, { "epoch": 499.6208955223881, "grad_norm": 21.404644012451172, "learning_rate": 9.614285714285714e-06, "loss": 28.2573, "step": 20984 }, { "epoch": 499.644776119403, "grad_norm": 24.585725784301758, "learning_rate": 9.613809523809524e-06, "loss": 28.1159, "step": 20985 }, { "epoch": 499.6686567164179, "grad_norm": 21.555641174316406, "learning_rate": 9.613333333333335e-06, "loss": 28.6635, "step": 20986 }, { "epoch": 499.6925373134328, "grad_norm": 18.17411231994629, "learning_rate": 9.612857142857144e-06, "loss": 28.0971, "step": 20987 }, { "epoch": 499.7164179104478, "grad_norm": 19.011857986450195, "learning_rate": 9.612380952380952e-06, "loss": 28.881, "step": 20988 }, { "epoch": 499.7402985074627, "grad_norm": 24.65228271484375, "learning_rate": 9.611904761904762e-06, "loss": 27.8626, "step": 20989 }, { "epoch": 499.7641791044776, "grad_norm": 25.38567352294922, "learning_rate": 9.611428571428573e-06, "loss": 27.4399, "step": 20990 }, { "epoch": 499.78805970149256, "grad_norm": 20.75394058227539, "learning_rate": 9.610952380952382e-06, "loss": 29.356, "step": 20991 }, { "epoch": 499.81194029850747, "grad_norm": 18.423067092895508, "learning_rate": 9.610476190476192e-06, "loss": 27.3741, "step": 20992 }, { "epoch": 499.8358208955224, "grad_norm": 20.281923294067383, "learning_rate": 9.610000000000001e-06, "loss": 29.0476, "step": 20993 }, { "epoch": 499.85970149253734, "grad_norm": 22.693010330200195, "learning_rate": 9.60952380952381e-06, "loss": 27.6797, "step": 20994 }, { "epoch": 499.88358208955225, "grad_norm": 22.751773834228516, "learning_rate": 9.60904761904762e-06, "loss": 28.4697, "step": 20995 }, { "epoch": 499.90746268656716, "grad_norm": 20.860456466674805, "learning_rate": 9.60857142857143e-06, "loss": 28.422, "step": 20996 }, { "epoch": 499.93134328358207, "grad_norm": 20.23386573791504, "learning_rate": 9.608095238095239e-06, "loss": 29.1688, "step": 20997 }, { "epoch": 499.95522388059703, "grad_norm": 20.9345645904541, "learning_rate": 9.607619047619048e-06, "loss": 27.6649, "step": 20998 }, { "epoch": 499.97910447761194, "grad_norm": 21.00820541381836, "learning_rate": 9.607142857142858e-06, "loss": 28.7277, "step": 20999 }, { "epoch": 500.0, "grad_norm": 22.95836067199707, "learning_rate": 9.606666666666667e-06, "loss": 24.9002, "step": 21000 }, { "epoch": 500.0, "step": 21000, "total_flos": 1.0323163883664182e+18, "train_loss": 1.1435158755892798, "train_runtime": 25612.2231, "train_samples_per_second": 104.481, "train_steps_per_second": 0.82 }, { "epoch": 500.0238805970149, "grad_norm": 20.994747161865234, "learning_rate": 1e-05, "loss": 28.9044, "step": 21001 }, { "epoch": 500.0477611940299, "grad_norm": Infinity, "learning_rate": 9.99953314659197e-06, "loss": 33.4525, "step": 21002 }, { "epoch": 500.0716417910448, "grad_norm": 259.4839172363281, "learning_rate": 9.99953314659197e-06, "loss": 33.5784, "step": 21003 }, { "epoch": 500.0955223880597, "grad_norm": 136.52828979492188, "learning_rate": 9.999066293183942e-06, "loss": 32.353, "step": 21004 }, { "epoch": 500.1194029850746, "grad_norm": 80.33211517333984, "learning_rate": 9.998599439775911e-06, "loss": 30.0876, "step": 21005 }, { "epoch": 500.14328358208957, "grad_norm": 66.49418640136719, "learning_rate": 9.998132586367881e-06, "loss": 28.7751, "step": 21006 }, { "epoch": 500.1671641791045, "grad_norm": 68.52326965332031, "learning_rate": 9.99766573295985e-06, "loss": 28.7366, "step": 21007 }, { "epoch": 500.1910447761194, "grad_norm": 51.16172409057617, "learning_rate": 9.997198879551822e-06, "loss": 29.2893, "step": 21008 }, { "epoch": 500.21492537313435, "grad_norm": 41.58357238769531, "learning_rate": 9.996732026143792e-06, "loss": 28.8596, "step": 21009 }, { "epoch": 500.23880597014926, "grad_norm": 39.96255111694336, "learning_rate": 9.996265172735761e-06, "loss": 29.4617, "step": 21010 }, { "epoch": 500.26268656716417, "grad_norm": 41.00281524658203, "learning_rate": 9.995798319327733e-06, "loss": 28.443, "step": 21011 }, { "epoch": 500.28656716417913, "grad_norm": 28.769771575927734, "learning_rate": 9.995331465919702e-06, "loss": 29.4015, "step": 21012 }, { "epoch": 500.31044776119404, "grad_norm": 35.28112030029297, "learning_rate": 9.994864612511672e-06, "loss": 29.2957, "step": 21013 }, { "epoch": 500.33432835820895, "grad_norm": 43.132057189941406, "learning_rate": 9.994397759103642e-06, "loss": 28.0807, "step": 21014 }, { "epoch": 500.35820895522386, "grad_norm": 27.295621871948242, "learning_rate": 9.993930905695613e-06, "loss": 27.9422, "step": 21015 }, { "epoch": 500.3820895522388, "grad_norm": 33.87545394897461, "learning_rate": 9.993464052287583e-06, "loss": 27.8584, "step": 21016 }, { "epoch": 500.40597014925373, "grad_norm": 33.8487434387207, "learning_rate": 9.992997198879552e-06, "loss": 28.0341, "step": 21017 }, { "epoch": 500.42985074626864, "grad_norm": 24.433391571044922, "learning_rate": 9.992530345471522e-06, "loss": 28.7581, "step": 21018 }, { "epoch": 500.4537313432836, "grad_norm": 24.387128829956055, "learning_rate": 9.992063492063493e-06, "loss": 28.0095, "step": 21019 }, { "epoch": 500.4776119402985, "grad_norm": 25.16744613647461, "learning_rate": 9.991596638655463e-06, "loss": 27.0831, "step": 21020 }, { "epoch": 500.5014925373134, "grad_norm": 26.85177230834961, "learning_rate": 9.991129785247433e-06, "loss": 28.0962, "step": 21021 }, { "epoch": 500.52537313432833, "grad_norm": 22.53862190246582, "learning_rate": 9.990662931839404e-06, "loss": 27.7265, "step": 21022 }, { "epoch": 500.5492537313433, "grad_norm": 26.969818115234375, "learning_rate": 9.990196078431374e-06, "loss": 29.0725, "step": 21023 }, { "epoch": 500.5731343283582, "grad_norm": 25.750192642211914, "learning_rate": 9.989729225023343e-06, "loss": 27.8878, "step": 21024 }, { "epoch": 500.5970149253731, "grad_norm": 21.379230499267578, "learning_rate": 9.989262371615313e-06, "loss": 28.3741, "step": 21025 }, { "epoch": 500.6208955223881, "grad_norm": 21.2894344329834, "learning_rate": 9.988795518207284e-06, "loss": 28.1301, "step": 21026 }, { "epoch": 500.644776119403, "grad_norm": 28.190319061279297, "learning_rate": 9.988328664799254e-06, "loss": 30.0065, "step": 21027 }, { "epoch": 500.6686567164179, "grad_norm": 26.58777618408203, "learning_rate": 9.987861811391224e-06, "loss": 28.0906, "step": 21028 }, { "epoch": 500.6925373134328, "grad_norm": 19.254587173461914, "learning_rate": 9.987394957983195e-06, "loss": 27.9201, "step": 21029 }, { "epoch": 500.7164179104478, "grad_norm": 21.59400749206543, "learning_rate": 9.986928104575165e-06, "loss": 27.6258, "step": 21030 }, { "epoch": 500.7402985074627, "grad_norm": 29.18903350830078, "learning_rate": 9.986461251167134e-06, "loss": 27.4796, "step": 21031 }, { "epoch": 500.7641791044776, "grad_norm": 24.491056442260742, "learning_rate": 9.985994397759104e-06, "loss": 28.1733, "step": 21032 }, { "epoch": 500.78805970149256, "grad_norm": 18.549049377441406, "learning_rate": 9.985527544351075e-06, "loss": 28.0828, "step": 21033 }, { "epoch": 500.81194029850747, "grad_norm": 22.043691635131836, "learning_rate": 9.985060690943045e-06, "loss": 27.6302, "step": 21034 }, { "epoch": 500.8358208955224, "grad_norm": 26.272783279418945, "learning_rate": 9.984593837535014e-06, "loss": 29.1998, "step": 21035 }, { "epoch": 500.85970149253734, "grad_norm": 23.311805725097656, "learning_rate": 9.984126984126986e-06, "loss": 29.2872, "step": 21036 }, { "epoch": 500.88358208955225, "grad_norm": 18.446914672851562, "learning_rate": 9.983660130718955e-06, "loss": 27.7758, "step": 21037 }, { "epoch": 500.90746268656716, "grad_norm": 19.157445907592773, "learning_rate": 9.983193277310925e-06, "loss": 28.6067, "step": 21038 }, { "epoch": 500.93134328358207, "grad_norm": 23.780330657958984, "learning_rate": 9.982726423902895e-06, "loss": 28.399, "step": 21039 }, { "epoch": 500.95522388059703, "grad_norm": 22.872148513793945, "learning_rate": 9.982259570494866e-06, "loss": 28.0652, "step": 21040 }, { "epoch": 500.97910447761194, "grad_norm": 21.67836570739746, "learning_rate": 9.981792717086836e-06, "loss": 28.8695, "step": 21041 }, { "epoch": 501.0, "grad_norm": 18.095664978027344, "learning_rate": 9.981325863678805e-06, "loss": 23.957, "step": 21042 }, { "epoch": 501.0238805970149, "grad_norm": 20.82918930053711, "learning_rate": 9.980859010270775e-06, "loss": 28.6598, "step": 21043 }, { "epoch": 501.0477611940299, "grad_norm": 24.06619644165039, "learning_rate": 9.980392156862746e-06, "loss": 28.6936, "step": 21044 }, { "epoch": 501.0716417910448, "grad_norm": 19.138500213623047, "learning_rate": 9.979925303454716e-06, "loss": 28.5998, "step": 21045 }, { "epoch": 501.0955223880597, "grad_norm": 20.92177391052246, "learning_rate": 9.979458450046686e-06, "loss": 27.347, "step": 21046 }, { "epoch": 501.1194029850746, "grad_norm": 19.65283966064453, "learning_rate": 9.978991596638657e-06, "loss": 27.7948, "step": 21047 }, { "epoch": 501.14328358208957, "grad_norm": 19.137514114379883, "learning_rate": 9.978524743230627e-06, "loss": 28.9994, "step": 21048 }, { "epoch": 501.1671641791045, "grad_norm": 18.84257698059082, "learning_rate": 9.978057889822596e-06, "loss": 28.2045, "step": 21049 }, { "epoch": 501.1910447761194, "grad_norm": 23.02973175048828, "learning_rate": 9.977591036414566e-06, "loss": 28.5486, "step": 21050 }, { "epoch": 501.21492537313435, "grad_norm": 22.940765380859375, "learning_rate": 9.977124183006537e-06, "loss": 27.802, "step": 21051 }, { "epoch": 501.23880597014926, "grad_norm": 24.046510696411133, "learning_rate": 9.976657329598507e-06, "loss": 27.8676, "step": 21052 }, { "epoch": 501.26268656716417, "grad_norm": 17.54936981201172, "learning_rate": 9.976190476190477e-06, "loss": 27.93, "step": 21053 }, { "epoch": 501.28656716417913, "grad_norm": 19.68419647216797, "learning_rate": 9.975723622782448e-06, "loss": 27.0372, "step": 21054 }, { "epoch": 501.31044776119404, "grad_norm": 23.17284393310547, "learning_rate": 9.975256769374418e-06, "loss": 28.2864, "step": 21055 }, { "epoch": 501.33432835820895, "grad_norm": 21.44428253173828, "learning_rate": 9.974789915966387e-06, "loss": 28.3394, "step": 21056 }, { "epoch": 501.35820895522386, "grad_norm": 25.15224266052246, "learning_rate": 9.974323062558357e-06, "loss": 28.5778, "step": 21057 }, { "epoch": 501.3820895522388, "grad_norm": 23.511661529541016, "learning_rate": 9.973856209150328e-06, "loss": 29.0214, "step": 21058 }, { "epoch": 501.40597014925373, "grad_norm": 21.58730697631836, "learning_rate": 9.973389355742298e-06, "loss": 28.3276, "step": 21059 }, { "epoch": 501.42985074626864, "grad_norm": 19.011512756347656, "learning_rate": 9.972922502334268e-06, "loss": 27.4509, "step": 21060 }, { "epoch": 501.4537313432836, "grad_norm": 21.003311157226562, "learning_rate": 9.972455648926239e-06, "loss": 29.2537, "step": 21061 }, { "epoch": 501.4776119402985, "grad_norm": 27.311450958251953, "learning_rate": 9.971988795518209e-06, "loss": 28.5043, "step": 21062 }, { "epoch": 501.5014925373134, "grad_norm": 21.218217849731445, "learning_rate": 9.971521942110178e-06, "loss": 28.3365, "step": 21063 }, { "epoch": 501.52537313432833, "grad_norm": 19.697734832763672, "learning_rate": 9.971055088702148e-06, "loss": 27.8858, "step": 21064 }, { "epoch": 501.5492537313433, "grad_norm": 26.000232696533203, "learning_rate": 9.970588235294119e-06, "loss": 28.6478, "step": 21065 }, { "epoch": 501.5731343283582, "grad_norm": 19.37291145324707, "learning_rate": 9.970121381886089e-06, "loss": 26.9195, "step": 21066 }, { "epoch": 501.5970149253731, "grad_norm": 23.318817138671875, "learning_rate": 9.969654528478058e-06, "loss": 28.0551, "step": 21067 }, { "epoch": 501.6208955223881, "grad_norm": 36.04966735839844, "learning_rate": 9.969187675070028e-06, "loss": 28.929, "step": 21068 }, { "epoch": 501.644776119403, "grad_norm": 20.03295135498047, "learning_rate": 9.968720821662e-06, "loss": 28.4298, "step": 21069 }, { "epoch": 501.6686567164179, "grad_norm": 31.571426391601562, "learning_rate": 9.968253968253969e-06, "loss": 28.3925, "step": 21070 }, { "epoch": 501.6925373134328, "grad_norm": 25.071041107177734, "learning_rate": 9.967787114845939e-06, "loss": 28.0494, "step": 21071 }, { "epoch": 501.7164179104478, "grad_norm": 22.68183135986328, "learning_rate": 9.96732026143791e-06, "loss": 28.0221, "step": 21072 }, { "epoch": 501.7402985074627, "grad_norm": 29.148317337036133, "learning_rate": 9.96685340802988e-06, "loss": 28.357, "step": 21073 }, { "epoch": 501.7641791044776, "grad_norm": 25.448768615722656, "learning_rate": 9.96638655462185e-06, "loss": 28.0369, "step": 21074 }, { "epoch": 501.78805970149256, "grad_norm": 19.354949951171875, "learning_rate": 9.965919701213819e-06, "loss": 27.7135, "step": 21075 }, { "epoch": 501.81194029850747, "grad_norm": 25.243743896484375, "learning_rate": 9.96545284780579e-06, "loss": 27.6352, "step": 21076 }, { "epoch": 501.8358208955224, "grad_norm": 23.4504337310791, "learning_rate": 9.96498599439776e-06, "loss": 28.1375, "step": 21077 }, { "epoch": 501.85970149253734, "grad_norm": 24.473995208740234, "learning_rate": 9.96451914098973e-06, "loss": 28.8695, "step": 21078 }, { "epoch": 501.88358208955225, "grad_norm": 17.615610122680664, "learning_rate": 9.964052287581701e-06, "loss": 27.7124, "step": 21079 }, { "epoch": 501.90746268656716, "grad_norm": 36.28605651855469, "learning_rate": 9.96358543417367e-06, "loss": 27.0614, "step": 21080 }, { "epoch": 501.93134328358207, "grad_norm": 20.401193618774414, "learning_rate": 9.96311858076564e-06, "loss": 27.9467, "step": 21081 }, { "epoch": 501.95522388059703, "grad_norm": 28.89822006225586, "learning_rate": 9.96265172735761e-06, "loss": 28.0849, "step": 21082 }, { "epoch": 501.97910447761194, "grad_norm": 23.70438575744629, "learning_rate": 9.962184873949581e-06, "loss": 29.1637, "step": 21083 }, { "epoch": 502.0, "grad_norm": 20.932605743408203, "learning_rate": 9.961718020541551e-06, "loss": 24.2315, "step": 21084 }, { "epoch": 502.0238805970149, "grad_norm": 22.112051010131836, "learning_rate": 9.96125116713352e-06, "loss": 27.0224, "step": 21085 }, { "epoch": 502.0477611940299, "grad_norm": 28.497404098510742, "learning_rate": 9.960784313725492e-06, "loss": 28.908, "step": 21086 }, { "epoch": 502.0716417910448, "grad_norm": 22.549741744995117, "learning_rate": 9.960317460317462e-06, "loss": 27.0134, "step": 21087 }, { "epoch": 502.0955223880597, "grad_norm": 24.240373611450195, "learning_rate": 9.959850606909431e-06, "loss": 28.5302, "step": 21088 }, { "epoch": 502.1194029850746, "grad_norm": 21.68141746520996, "learning_rate": 9.959383753501401e-06, "loss": 28.5783, "step": 21089 }, { "epoch": 502.14328358208957, "grad_norm": 26.30082893371582, "learning_rate": 9.958916900093372e-06, "loss": 27.3656, "step": 21090 }, { "epoch": 502.1671641791045, "grad_norm": 21.949626922607422, "learning_rate": 9.958450046685342e-06, "loss": 27.8642, "step": 21091 }, { "epoch": 502.1910447761194, "grad_norm": 28.15694236755371, "learning_rate": 9.957983193277312e-06, "loss": 28.3253, "step": 21092 }, { "epoch": 502.21492537313435, "grad_norm": 28.96364402770996, "learning_rate": 9.957516339869283e-06, "loss": 28.1651, "step": 21093 }, { "epoch": 502.23880597014926, "grad_norm": 20.066509246826172, "learning_rate": 9.957049486461252e-06, "loss": 29.0096, "step": 21094 }, { "epoch": 502.26268656716417, "grad_norm": 25.06045913696289, "learning_rate": 9.956582633053222e-06, "loss": 27.6944, "step": 21095 }, { "epoch": 502.28656716417913, "grad_norm": 25.86124038696289, "learning_rate": 9.956115779645192e-06, "loss": 28.1946, "step": 21096 }, { "epoch": 502.31044776119404, "grad_norm": 21.78862953186035, "learning_rate": 9.955648926237163e-06, "loss": 27.5499, "step": 21097 }, { "epoch": 502.33432835820895, "grad_norm": 21.851198196411133, "learning_rate": 9.955182072829133e-06, "loss": 28.3415, "step": 21098 }, { "epoch": 502.35820895522386, "grad_norm": 22.2988224029541, "learning_rate": 9.954715219421102e-06, "loss": 26.9728, "step": 21099 }, { "epoch": 502.3820895522388, "grad_norm": 26.04551124572754, "learning_rate": 9.954248366013072e-06, "loss": 27.9351, "step": 21100 }, { "epoch": 502.40597014925373, "grad_norm": 21.442798614501953, "learning_rate": 9.953781512605043e-06, "loss": 28.412, "step": 21101 }, { "epoch": 502.42985074626864, "grad_norm": 20.65431022644043, "learning_rate": 9.953314659197013e-06, "loss": 27.3245, "step": 21102 }, { "epoch": 502.4537313432836, "grad_norm": 25.879169464111328, "learning_rate": 9.952847805788983e-06, "loss": 28.0147, "step": 21103 }, { "epoch": 502.4776119402985, "grad_norm": 21.48764991760254, "learning_rate": 9.952380952380954e-06, "loss": 28.0975, "step": 21104 }, { "epoch": 502.5014925373134, "grad_norm": 21.691556930541992, "learning_rate": 9.951914098972924e-06, "loss": 27.5669, "step": 21105 }, { "epoch": 502.52537313432833, "grad_norm": 18.60375213623047, "learning_rate": 9.951447245564893e-06, "loss": 27.0437, "step": 21106 }, { "epoch": 502.5492537313433, "grad_norm": 22.894426345825195, "learning_rate": 9.950980392156863e-06, "loss": 29.519, "step": 21107 }, { "epoch": 502.5731343283582, "grad_norm": 26.589784622192383, "learning_rate": 9.950513538748834e-06, "loss": 28.4307, "step": 21108 }, { "epoch": 502.5970149253731, "grad_norm": 18.449207305908203, "learning_rate": 9.950046685340804e-06, "loss": 27.4139, "step": 21109 }, { "epoch": 502.6208955223881, "grad_norm": 25.139741897583008, "learning_rate": 9.949579831932774e-06, "loss": 28.9418, "step": 21110 }, { "epoch": 502.644776119403, "grad_norm": 22.64548683166504, "learning_rate": 9.949112978524745e-06, "loss": 28.5826, "step": 21111 }, { "epoch": 502.6686567164179, "grad_norm": 26.385900497436523, "learning_rate": 9.948646125116715e-06, "loss": 28.9727, "step": 21112 }, { "epoch": 502.6925373134328, "grad_norm": 19.45063018798828, "learning_rate": 9.948179271708684e-06, "loss": 28.0435, "step": 21113 }, { "epoch": 502.7164179104478, "grad_norm": 21.010211944580078, "learning_rate": 9.947712418300654e-06, "loss": 28.116, "step": 21114 }, { "epoch": 502.7402985074627, "grad_norm": 19.28559684753418, "learning_rate": 9.947245564892625e-06, "loss": 28.4744, "step": 21115 }, { "epoch": 502.7641791044776, "grad_norm": 22.60014533996582, "learning_rate": 9.946778711484595e-06, "loss": 28.8962, "step": 21116 }, { "epoch": 502.78805970149256, "grad_norm": 30.412460327148438, "learning_rate": 9.946311858076565e-06, "loss": 28.1681, "step": 21117 }, { "epoch": 502.81194029850747, "grad_norm": 22.158214569091797, "learning_rate": 9.945845004668536e-06, "loss": 28.5227, "step": 21118 }, { "epoch": 502.8358208955224, "grad_norm": 22.690452575683594, "learning_rate": 9.945378151260506e-06, "loss": 29.0967, "step": 21119 }, { "epoch": 502.85970149253734, "grad_norm": 31.366369247436523, "learning_rate": 9.944911297852475e-06, "loss": 28.5448, "step": 21120 }, { "epoch": 502.88358208955225, "grad_norm": 20.471948623657227, "learning_rate": 9.944444444444445e-06, "loss": 28.1113, "step": 21121 }, { "epoch": 502.90746268656716, "grad_norm": 20.324731826782227, "learning_rate": 9.943977591036416e-06, "loss": 27.9393, "step": 21122 }, { "epoch": 502.93134328358207, "grad_norm": 23.242095947265625, "learning_rate": 9.943510737628386e-06, "loss": 28.9603, "step": 21123 }, { "epoch": 502.95522388059703, "grad_norm": 31.62868309020996, "learning_rate": 9.943043884220355e-06, "loss": 26.9154, "step": 21124 }, { "epoch": 502.97910447761194, "grad_norm": 19.55793571472168, "learning_rate": 9.942577030812325e-06, "loss": 27.1933, "step": 21125 }, { "epoch": 503.0, "grad_norm": 24.540315628051758, "learning_rate": 9.942110177404296e-06, "loss": 25.4648, "step": 21126 }, { "epoch": 503.0238805970149, "grad_norm": 28.986347198486328, "learning_rate": 9.941643323996266e-06, "loss": 29.024, "step": 21127 }, { "epoch": 503.0477611940299, "grad_norm": 23.006702423095703, "learning_rate": 9.941176470588236e-06, "loss": 28.1397, "step": 21128 }, { "epoch": 503.0716417910448, "grad_norm": 19.35124397277832, "learning_rate": 9.940709617180207e-06, "loss": 27.5588, "step": 21129 }, { "epoch": 503.0955223880597, "grad_norm": 23.022239685058594, "learning_rate": 9.940242763772177e-06, "loss": 27.6897, "step": 21130 }, { "epoch": 503.1194029850746, "grad_norm": 22.941160202026367, "learning_rate": 9.939775910364146e-06, "loss": 29.2029, "step": 21131 }, { "epoch": 503.14328358208957, "grad_norm": 19.594255447387695, "learning_rate": 9.939309056956116e-06, "loss": 28.3599, "step": 21132 }, { "epoch": 503.1671641791045, "grad_norm": 19.7531795501709, "learning_rate": 9.938842203548087e-06, "loss": 28.1695, "step": 21133 }, { "epoch": 503.1910447761194, "grad_norm": 20.19512939453125, "learning_rate": 9.938375350140057e-06, "loss": 26.8819, "step": 21134 }, { "epoch": 503.21492537313435, "grad_norm": 24.001623153686523, "learning_rate": 9.937908496732027e-06, "loss": 28.4577, "step": 21135 }, { "epoch": 503.23880597014926, "grad_norm": 17.587099075317383, "learning_rate": 9.937441643323998e-06, "loss": 28.126, "step": 21136 }, { "epoch": 503.26268656716417, "grad_norm": 25.678569793701172, "learning_rate": 9.936974789915968e-06, "loss": 26.7843, "step": 21137 }, { "epoch": 503.28656716417913, "grad_norm": 25.210262298583984, "learning_rate": 9.936507936507937e-06, "loss": 28.5021, "step": 21138 }, { "epoch": 503.31044776119404, "grad_norm": 18.71291160583496, "learning_rate": 9.936041083099907e-06, "loss": 28.9529, "step": 21139 }, { "epoch": 503.33432835820895, "grad_norm": 23.529813766479492, "learning_rate": 9.935574229691878e-06, "loss": 27.2222, "step": 21140 }, { "epoch": 503.35820895522386, "grad_norm": 26.273536682128906, "learning_rate": 9.935107376283848e-06, "loss": 29.3134, "step": 21141 }, { "epoch": 503.3820895522388, "grad_norm": 24.92511558532715, "learning_rate": 9.934640522875818e-06, "loss": 27.7335, "step": 21142 }, { "epoch": 503.40597014925373, "grad_norm": 22.067546844482422, "learning_rate": 9.934173669467789e-06, "loss": 28.975, "step": 21143 }, { "epoch": 503.42985074626864, "grad_norm": 30.499740600585938, "learning_rate": 9.933706816059759e-06, "loss": 27.1154, "step": 21144 }, { "epoch": 503.4537313432836, "grad_norm": 24.56410026550293, "learning_rate": 9.933239962651728e-06, "loss": 27.9925, "step": 21145 }, { "epoch": 503.4776119402985, "grad_norm": 19.01593589782715, "learning_rate": 9.932773109243698e-06, "loss": 27.8154, "step": 21146 }, { "epoch": 503.5014925373134, "grad_norm": 30.58491325378418, "learning_rate": 9.93230625583567e-06, "loss": 26.7964, "step": 21147 }, { "epoch": 503.52537313432833, "grad_norm": 25.642250061035156, "learning_rate": 9.931839402427639e-06, "loss": 27.0988, "step": 21148 }, { "epoch": 503.5492537313433, "grad_norm": 20.22638511657715, "learning_rate": 9.931372549019609e-06, "loss": 27.1321, "step": 21149 }, { "epoch": 503.5731343283582, "grad_norm": 35.37609100341797, "learning_rate": 9.930905695611578e-06, "loss": 28.3421, "step": 21150 }, { "epoch": 503.5970149253731, "grad_norm": 23.705442428588867, "learning_rate": 9.93043884220355e-06, "loss": 27.7855, "step": 21151 }, { "epoch": 503.6208955223881, "grad_norm": 25.034828186035156, "learning_rate": 9.92997198879552e-06, "loss": 27.9829, "step": 21152 }, { "epoch": 503.644776119403, "grad_norm": 25.596284866333008, "learning_rate": 9.929505135387489e-06, "loss": 28.734, "step": 21153 }, { "epoch": 503.6686567164179, "grad_norm": 22.99311065673828, "learning_rate": 9.92903828197946e-06, "loss": 28.4827, "step": 21154 }, { "epoch": 503.6925373134328, "grad_norm": 23.289445877075195, "learning_rate": 9.92857142857143e-06, "loss": 27.8652, "step": 21155 }, { "epoch": 503.7164179104478, "grad_norm": 20.31854248046875, "learning_rate": 9.9281045751634e-06, "loss": 28.2086, "step": 21156 }, { "epoch": 503.7402985074627, "grad_norm": 20.84747314453125, "learning_rate": 9.927637721755369e-06, "loss": 28.1406, "step": 21157 }, { "epoch": 503.7641791044776, "grad_norm": 22.43474578857422, "learning_rate": 9.92717086834734e-06, "loss": 27.9258, "step": 21158 }, { "epoch": 503.78805970149256, "grad_norm": 19.196847915649414, "learning_rate": 9.92670401493931e-06, "loss": 28.2474, "step": 21159 }, { "epoch": 503.81194029850747, "grad_norm": 21.983352661132812, "learning_rate": 9.92623716153128e-06, "loss": 28.8086, "step": 21160 }, { "epoch": 503.8358208955224, "grad_norm": 26.887073516845703, "learning_rate": 9.925770308123251e-06, "loss": 28.0466, "step": 21161 }, { "epoch": 503.85970149253734, "grad_norm": 22.579288482666016, "learning_rate": 9.92530345471522e-06, "loss": 27.4533, "step": 21162 }, { "epoch": 503.88358208955225, "grad_norm": 23.448007583618164, "learning_rate": 9.92483660130719e-06, "loss": 28.9192, "step": 21163 }, { "epoch": 503.90746268656716, "grad_norm": 19.832740783691406, "learning_rate": 9.92436974789916e-06, "loss": 28.0685, "step": 21164 }, { "epoch": 503.93134328358207, "grad_norm": 26.27367401123047, "learning_rate": 9.923902894491131e-06, "loss": 28.6838, "step": 21165 }, { "epoch": 503.95522388059703, "grad_norm": 34.06510543823242, "learning_rate": 9.923436041083101e-06, "loss": 28.6422, "step": 21166 }, { "epoch": 503.97910447761194, "grad_norm": 19.338708877563477, "learning_rate": 9.92296918767507e-06, "loss": 28.339, "step": 21167 }, { "epoch": 504.0, "grad_norm": 30.34807586669922, "learning_rate": 9.922502334267042e-06, "loss": 24.9793, "step": 21168 }, { "epoch": 504.0238805970149, "grad_norm": 26.74033546447754, "learning_rate": 9.922035480859012e-06, "loss": 27.9234, "step": 21169 }, { "epoch": 504.0477611940299, "grad_norm": 24.528417587280273, "learning_rate": 9.921568627450981e-06, "loss": 28.2701, "step": 21170 }, { "epoch": 504.0716417910448, "grad_norm": 34.65286636352539, "learning_rate": 9.921101774042951e-06, "loss": 27.6263, "step": 21171 }, { "epoch": 504.0955223880597, "grad_norm": 24.95806312561035, "learning_rate": 9.920634920634922e-06, "loss": 28.5004, "step": 21172 }, { "epoch": 504.1194029850746, "grad_norm": 36.069732666015625, "learning_rate": 9.920168067226892e-06, "loss": 28.7157, "step": 21173 }, { "epoch": 504.14328358208957, "grad_norm": 29.118051528930664, "learning_rate": 9.919701213818862e-06, "loss": 28.079, "step": 21174 }, { "epoch": 504.1671641791045, "grad_norm": 23.490345001220703, "learning_rate": 9.919234360410831e-06, "loss": 28.6303, "step": 21175 }, { "epoch": 504.1910447761194, "grad_norm": 41.46742630004883, "learning_rate": 9.918767507002803e-06, "loss": 28.6664, "step": 21176 }, { "epoch": 504.21492537313435, "grad_norm": 27.912216186523438, "learning_rate": 9.918300653594772e-06, "loss": 28.6894, "step": 21177 }, { "epoch": 504.23880597014926, "grad_norm": 41.55944061279297, "learning_rate": 9.917833800186742e-06, "loss": 28.7637, "step": 21178 }, { "epoch": 504.26268656716417, "grad_norm": 22.24034309387207, "learning_rate": 9.917366946778713e-06, "loss": 27.6768, "step": 21179 }, { "epoch": 504.28656716417913, "grad_norm": 44.1727409362793, "learning_rate": 9.916900093370683e-06, "loss": 28.9202, "step": 21180 }, { "epoch": 504.31044776119404, "grad_norm": 26.673538208007812, "learning_rate": 9.916433239962653e-06, "loss": 27.9474, "step": 21181 }, { "epoch": 504.33432835820895, "grad_norm": 45.89542007446289, "learning_rate": 9.915966386554622e-06, "loss": 27.7979, "step": 21182 }, { "epoch": 504.35820895522386, "grad_norm": 34.50465774536133, "learning_rate": 9.915499533146594e-06, "loss": 28.0889, "step": 21183 }, { "epoch": 504.3820895522388, "grad_norm": 48.3437385559082, "learning_rate": 9.915032679738563e-06, "loss": 28.6671, "step": 21184 }, { "epoch": 504.40597014925373, "grad_norm": 39.412445068359375, "learning_rate": 9.914565826330533e-06, "loss": 27.2383, "step": 21185 }, { "epoch": 504.42985074626864, "grad_norm": 47.468955993652344, "learning_rate": 9.914098972922504e-06, "loss": 28.4514, "step": 21186 }, { "epoch": 504.4537313432836, "grad_norm": 43.152801513671875, "learning_rate": 9.913632119514474e-06, "loss": 27.9501, "step": 21187 }, { "epoch": 504.4776119402985, "grad_norm": 35.3669548034668, "learning_rate": 9.913165266106443e-06, "loss": 27.3486, "step": 21188 }, { "epoch": 504.5014925373134, "grad_norm": 35.4449577331543, "learning_rate": 9.912698412698413e-06, "loss": 27.8657, "step": 21189 }, { "epoch": 504.52537313432833, "grad_norm": 43.907684326171875, "learning_rate": 9.912231559290384e-06, "loss": 28.0574, "step": 21190 }, { "epoch": 504.5492537313433, "grad_norm": 36.61515808105469, "learning_rate": 9.911764705882354e-06, "loss": 27.8055, "step": 21191 }, { "epoch": 504.5731343283582, "grad_norm": 43.89160919189453, "learning_rate": 9.911297852474324e-06, "loss": 27.6316, "step": 21192 }, { "epoch": 504.5970149253731, "grad_norm": 39.70940399169922, "learning_rate": 9.910830999066295e-06, "loss": 28.2522, "step": 21193 }, { "epoch": 504.6208955223881, "grad_norm": 36.74070358276367, "learning_rate": 9.910364145658265e-06, "loss": 27.1695, "step": 21194 }, { "epoch": 504.644776119403, "grad_norm": 33.70294952392578, "learning_rate": 9.909897292250234e-06, "loss": 27.6793, "step": 21195 }, { "epoch": 504.6686567164179, "grad_norm": 35.90504837036133, "learning_rate": 9.909430438842204e-06, "loss": 26.9859, "step": 21196 }, { "epoch": 504.6925373134328, "grad_norm": 32.692649841308594, "learning_rate": 9.908963585434175e-06, "loss": 27.9611, "step": 21197 }, { "epoch": 504.7164179104478, "grad_norm": 40.6618537902832, "learning_rate": 9.908496732026145e-06, "loss": 28.5113, "step": 21198 }, { "epoch": 504.7402985074627, "grad_norm": 29.54161834716797, "learning_rate": 9.908029878618115e-06, "loss": 27.8308, "step": 21199 }, { "epoch": 504.7641791044776, "grad_norm": 50.1970329284668, "learning_rate": 9.907563025210084e-06, "loss": 27.4828, "step": 21200 }, { "epoch": 504.78805970149256, "grad_norm": 42.49684524536133, "learning_rate": 9.907096171802056e-06, "loss": 27.3752, "step": 21201 }, { "epoch": 504.81194029850747, "grad_norm": 34.46174621582031, "learning_rate": 9.906629318394025e-06, "loss": 27.9061, "step": 21202 }, { "epoch": 504.8358208955224, "grad_norm": 33.278072357177734, "learning_rate": 9.906162464985995e-06, "loss": 28.4152, "step": 21203 }, { "epoch": 504.85970149253734, "grad_norm": 39.52162170410156, "learning_rate": 9.905695611577966e-06, "loss": 28.724, "step": 21204 }, { "epoch": 504.88358208955225, "grad_norm": 30.84549331665039, "learning_rate": 9.905228758169936e-06, "loss": 28.6687, "step": 21205 }, { "epoch": 504.90746268656716, "grad_norm": 48.49695587158203, "learning_rate": 9.904761904761906e-06, "loss": 28.0305, "step": 21206 }, { "epoch": 504.93134328358207, "grad_norm": 43.94157409667969, "learning_rate": 9.904295051353875e-06, "loss": 27.2805, "step": 21207 }, { "epoch": 504.95522388059703, "grad_norm": 34.45707702636719, "learning_rate": 9.903828197945847e-06, "loss": 27.6081, "step": 21208 }, { "epoch": 504.97910447761194, "grad_norm": 30.84587860107422, "learning_rate": 9.903361344537816e-06, "loss": 28.5764, "step": 21209 }, { "epoch": 505.0, "grad_norm": 34.0885124206543, "learning_rate": 9.902894491129786e-06, "loss": 25.3896, "step": 21210 }, { "epoch": 505.0238805970149, "grad_norm": 30.44505500793457, "learning_rate": 9.902427637721757e-06, "loss": 27.6935, "step": 21211 }, { "epoch": 505.0477611940299, "grad_norm": 46.40093994140625, "learning_rate": 9.901960784313727e-06, "loss": 28.4051, "step": 21212 }, { "epoch": 505.0716417910448, "grad_norm": 41.12664794921875, "learning_rate": 9.901493930905697e-06, "loss": 28.2046, "step": 21213 }, { "epoch": 505.0955223880597, "grad_norm": 36.79216003417969, "learning_rate": 9.901027077497666e-06, "loss": 27.3934, "step": 21214 }, { "epoch": 505.1194029850746, "grad_norm": 34.292030334472656, "learning_rate": 9.900560224089638e-06, "loss": 28.743, "step": 21215 }, { "epoch": 505.14328358208957, "grad_norm": 41.40416717529297, "learning_rate": 9.900093370681607e-06, "loss": 27.2014, "step": 21216 }, { "epoch": 505.1671641791045, "grad_norm": 32.969505310058594, "learning_rate": 9.899626517273577e-06, "loss": 27.4256, "step": 21217 }, { "epoch": 505.1910447761194, "grad_norm": 42.884307861328125, "learning_rate": 9.899159663865548e-06, "loss": 27.887, "step": 21218 }, { "epoch": 505.21492537313435, "grad_norm": 37.778236389160156, "learning_rate": 9.898692810457518e-06, "loss": 26.6744, "step": 21219 }, { "epoch": 505.23880597014926, "grad_norm": 34.072879791259766, "learning_rate": 9.898225957049487e-06, "loss": 28.6051, "step": 21220 }, { "epoch": 505.26268656716417, "grad_norm": 33.67082595825195, "learning_rate": 9.897759103641457e-06, "loss": 29.2256, "step": 21221 }, { "epoch": 505.28656716417913, "grad_norm": 36.765838623046875, "learning_rate": 9.897292250233428e-06, "loss": 28.7984, "step": 21222 }, { "epoch": 505.31044776119404, "grad_norm": 29.590885162353516, "learning_rate": 9.896825396825398e-06, "loss": 27.0532, "step": 21223 }, { "epoch": 505.33432835820895, "grad_norm": 44.07057189941406, "learning_rate": 9.896358543417368e-06, "loss": 28.7154, "step": 21224 }, { "epoch": 505.35820895522386, "grad_norm": 36.137699127197266, "learning_rate": 9.895891690009339e-06, "loss": 28.4168, "step": 21225 }, { "epoch": 505.3820895522388, "grad_norm": 40.134857177734375, "learning_rate": 9.895424836601309e-06, "loss": 26.9011, "step": 21226 }, { "epoch": 505.40597014925373, "grad_norm": 34.73773193359375, "learning_rate": 9.894957983193278e-06, "loss": 28.0271, "step": 21227 }, { "epoch": 505.42985074626864, "grad_norm": 36.73931884765625, "learning_rate": 9.894491129785248e-06, "loss": 28.9074, "step": 21228 }, { "epoch": 505.4537313432836, "grad_norm": 31.253671646118164, "learning_rate": 9.89402427637722e-06, "loss": 29.2251, "step": 21229 }, { "epoch": 505.4776119402985, "grad_norm": 35.754634857177734, "learning_rate": 9.893557422969189e-06, "loss": 27.8501, "step": 21230 }, { "epoch": 505.5014925373134, "grad_norm": 31.550336837768555, "learning_rate": 9.893090569561159e-06, "loss": 27.7375, "step": 21231 }, { "epoch": 505.52537313432833, "grad_norm": 40.92430114746094, "learning_rate": 9.892623716153128e-06, "loss": 29.0526, "step": 21232 }, { "epoch": 505.5492537313433, "grad_norm": 36.674739837646484, "learning_rate": 9.8921568627451e-06, "loss": 28.019, "step": 21233 }, { "epoch": 505.5731343283582, "grad_norm": 38.905765533447266, "learning_rate": 9.89169000933707e-06, "loss": 27.5913, "step": 21234 }, { "epoch": 505.5970149253731, "grad_norm": 36.868167877197266, "learning_rate": 9.891223155929039e-06, "loss": 27.9188, "step": 21235 }, { "epoch": 505.6208955223881, "grad_norm": 35.07797622680664, "learning_rate": 9.89075630252101e-06, "loss": 27.6094, "step": 21236 }, { "epoch": 505.644776119403, "grad_norm": 29.965456008911133, "learning_rate": 9.89028944911298e-06, "loss": 27.5501, "step": 21237 }, { "epoch": 505.6686567164179, "grad_norm": 37.09678649902344, "learning_rate": 9.88982259570495e-06, "loss": 28.2282, "step": 21238 }, { "epoch": 505.6925373134328, "grad_norm": 33.17462921142578, "learning_rate": 9.88935574229692e-06, "loss": 27.8771, "step": 21239 }, { "epoch": 505.7164179104478, "grad_norm": 36.86302947998047, "learning_rate": 9.88888888888889e-06, "loss": 28.8529, "step": 21240 }, { "epoch": 505.7402985074627, "grad_norm": 34.65397262573242, "learning_rate": 9.88842203548086e-06, "loss": 27.2425, "step": 21241 }, { "epoch": 505.7641791044776, "grad_norm": 38.80485534667969, "learning_rate": 9.88795518207283e-06, "loss": 27.8157, "step": 21242 }, { "epoch": 505.78805970149256, "grad_norm": 30.847076416015625, "learning_rate": 9.887488328664801e-06, "loss": 26.8907, "step": 21243 }, { "epoch": 505.81194029850747, "grad_norm": 40.40427780151367, "learning_rate": 9.887021475256771e-06, "loss": 27.7673, "step": 21244 }, { "epoch": 505.8358208955224, "grad_norm": 32.44212341308594, "learning_rate": 9.88655462184874e-06, "loss": 28.3682, "step": 21245 }, { "epoch": 505.85970149253734, "grad_norm": 38.917423248291016, "learning_rate": 9.88608776844071e-06, "loss": 28.1077, "step": 21246 }, { "epoch": 505.88358208955225, "grad_norm": 34.065101623535156, "learning_rate": 9.885620915032682e-06, "loss": 27.915, "step": 21247 }, { "epoch": 505.90746268656716, "grad_norm": 34.07404327392578, "learning_rate": 9.885154061624651e-06, "loss": 27.4914, "step": 21248 }, { "epoch": 505.93134328358207, "grad_norm": 30.135225296020508, "learning_rate": 9.88468720821662e-06, "loss": 28.884, "step": 21249 }, { "epoch": 505.95522388059703, "grad_norm": 39.35141372680664, "learning_rate": 9.884220354808592e-06, "loss": 28.174, "step": 21250 }, { "epoch": 505.97910447761194, "grad_norm": 30.05372428894043, "learning_rate": 9.883753501400562e-06, "loss": 28.0419, "step": 21251 }, { "epoch": 506.0, "grad_norm": 37.511871337890625, "learning_rate": 9.883286647992531e-06, "loss": 24.6568, "step": 21252 }, { "epoch": 506.0238805970149, "grad_norm": 39.010520935058594, "learning_rate": 9.882819794584501e-06, "loss": 26.9154, "step": 21253 }, { "epoch": 506.0477611940299, "grad_norm": 38.16585922241211, "learning_rate": 9.882352941176472e-06, "loss": 28.6003, "step": 21254 }, { "epoch": 506.0716417910448, "grad_norm": 32.99650573730469, "learning_rate": 9.881886087768442e-06, "loss": 25.8021, "step": 21255 }, { "epoch": 506.0955223880597, "grad_norm": 34.75502014160156, "learning_rate": 9.881419234360412e-06, "loss": 27.0242, "step": 21256 }, { "epoch": 506.1194029850746, "grad_norm": 30.391948699951172, "learning_rate": 9.880952380952381e-06, "loss": 27.6668, "step": 21257 }, { "epoch": 506.14328358208957, "grad_norm": 37.704345703125, "learning_rate": 9.880485527544353e-06, "loss": 27.7905, "step": 21258 }, { "epoch": 506.1671641791045, "grad_norm": 33.522727966308594, "learning_rate": 9.880018674136322e-06, "loss": 27.2848, "step": 21259 }, { "epoch": 506.1910447761194, "grad_norm": 38.34471130371094, "learning_rate": 9.879551820728292e-06, "loss": 28.7412, "step": 21260 }, { "epoch": 506.21492537313435, "grad_norm": 34.701297760009766, "learning_rate": 9.879084967320263e-06, "loss": 28.3909, "step": 21261 }, { "epoch": 506.23880597014926, "grad_norm": 35.83405685424805, "learning_rate": 9.878618113912233e-06, "loss": 28.0841, "step": 21262 }, { "epoch": 506.26268656716417, "grad_norm": 30.083688735961914, "learning_rate": 9.878151260504203e-06, "loss": 27.7864, "step": 21263 }, { "epoch": 506.28656716417913, "grad_norm": 36.37599563598633, "learning_rate": 9.877684407096172e-06, "loss": 27.0791, "step": 21264 }, { "epoch": 506.31044776119404, "grad_norm": 32.7655143737793, "learning_rate": 9.877217553688144e-06, "loss": 28.2691, "step": 21265 }, { "epoch": 506.33432835820895, "grad_norm": 42.66666030883789, "learning_rate": 9.876750700280113e-06, "loss": 27.9825, "step": 21266 }, { "epoch": 506.35820895522386, "grad_norm": 35.34077835083008, "learning_rate": 9.876283846872083e-06, "loss": 27.3065, "step": 21267 }, { "epoch": 506.3820895522388, "grad_norm": 35.264251708984375, "learning_rate": 9.875816993464054e-06, "loss": 27.9741, "step": 21268 }, { "epoch": 506.40597014925373, "grad_norm": 32.250431060791016, "learning_rate": 9.875350140056024e-06, "loss": 27.7649, "step": 21269 }, { "epoch": 506.42985074626864, "grad_norm": 31.748281478881836, "learning_rate": 9.874883286647994e-06, "loss": 29.3175, "step": 21270 }, { "epoch": 506.4537313432836, "grad_norm": 27.325435638427734, "learning_rate": 9.874416433239963e-06, "loss": 27.5718, "step": 21271 }, { "epoch": 506.4776119402985, "grad_norm": 41.22060012817383, "learning_rate": 9.873949579831935e-06, "loss": 27.9462, "step": 21272 }, { "epoch": 506.5014925373134, "grad_norm": 31.07601547241211, "learning_rate": 9.873482726423904e-06, "loss": 27.9135, "step": 21273 }, { "epoch": 506.52537313432833, "grad_norm": 38.015724182128906, "learning_rate": 9.873015873015874e-06, "loss": 28.5744, "step": 21274 }, { "epoch": 506.5492537313433, "grad_norm": 33.83741760253906, "learning_rate": 9.872549019607845e-06, "loss": 27.826, "step": 21275 }, { "epoch": 506.5731343283582, "grad_norm": 34.283206939697266, "learning_rate": 9.872082166199815e-06, "loss": 28.1836, "step": 21276 }, { "epoch": 506.5970149253731, "grad_norm": 30.662593841552734, "learning_rate": 9.871615312791785e-06, "loss": 29.2503, "step": 21277 }, { "epoch": 506.6208955223881, "grad_norm": 31.94662094116211, "learning_rate": 9.871148459383754e-06, "loss": 28.2742, "step": 21278 }, { "epoch": 506.644776119403, "grad_norm": 25.840579986572266, "learning_rate": 9.870681605975725e-06, "loss": 29.1178, "step": 21279 }, { "epoch": 506.6686567164179, "grad_norm": 34.97330856323242, "learning_rate": 9.870214752567695e-06, "loss": 28.6061, "step": 21280 }, { "epoch": 506.6925373134328, "grad_norm": 26.768444061279297, "learning_rate": 9.869747899159665e-06, "loss": 27.8573, "step": 21281 }, { "epoch": 506.7164179104478, "grad_norm": 39.44446563720703, "learning_rate": 9.869281045751634e-06, "loss": 28.0546, "step": 21282 }, { "epoch": 506.7402985074627, "grad_norm": 31.65951919555664, "learning_rate": 9.868814192343606e-06, "loss": 28.6752, "step": 21283 }, { "epoch": 506.7641791044776, "grad_norm": 36.30107498168945, "learning_rate": 9.868347338935575e-06, "loss": 28.1886, "step": 21284 }, { "epoch": 506.78805970149256, "grad_norm": 32.08913803100586, "learning_rate": 9.867880485527545e-06, "loss": 28.1245, "step": 21285 }, { "epoch": 506.81194029850747, "grad_norm": 30.302745819091797, "learning_rate": 9.867413632119516e-06, "loss": 27.4927, "step": 21286 }, { "epoch": 506.8358208955224, "grad_norm": 28.511917114257812, "learning_rate": 9.866946778711486e-06, "loss": 27.7537, "step": 21287 }, { "epoch": 506.85970149253734, "grad_norm": 28.391435623168945, "learning_rate": 9.866479925303456e-06, "loss": 28.4897, "step": 21288 }, { "epoch": 506.88358208955225, "grad_norm": 24.644542694091797, "learning_rate": 9.866013071895425e-06, "loss": 27.6647, "step": 21289 }, { "epoch": 506.90746268656716, "grad_norm": 30.86452293395996, "learning_rate": 9.865546218487397e-06, "loss": 27.888, "step": 21290 }, { "epoch": 506.93134328358207, "grad_norm": 25.56391716003418, "learning_rate": 9.865079365079366e-06, "loss": 27.89, "step": 21291 }, { "epoch": 506.95522388059703, "grad_norm": 32.308807373046875, "learning_rate": 9.864612511671336e-06, "loss": 28.3882, "step": 21292 }, { "epoch": 506.97910447761194, "grad_norm": 29.104408264160156, "learning_rate": 9.864145658263307e-06, "loss": 29.103, "step": 21293 }, { "epoch": 507.0, "grad_norm": 26.439367294311523, "learning_rate": 9.863678804855277e-06, "loss": 24.045, "step": 21294 }, { "epoch": 507.0238805970149, "grad_norm": 29.155086517333984, "learning_rate": 9.863211951447247e-06, "loss": 28.0521, "step": 21295 }, { "epoch": 507.0477611940299, "grad_norm": 25.229236602783203, "learning_rate": 9.862745098039216e-06, "loss": 26.881, "step": 21296 }, { "epoch": 507.0716417910448, "grad_norm": 28.120941162109375, "learning_rate": 9.862278244631188e-06, "loss": 27.7964, "step": 21297 }, { "epoch": 507.0955223880597, "grad_norm": 21.03717613220215, "learning_rate": 9.861811391223157e-06, "loss": 27.9562, "step": 21298 }, { "epoch": 507.1194029850746, "grad_norm": 26.21407127380371, "learning_rate": 9.861344537815127e-06, "loss": 28.6069, "step": 21299 }, { "epoch": 507.14328358208957, "grad_norm": 20.998117446899414, "learning_rate": 9.860877684407098e-06, "loss": 28.885, "step": 21300 }, { "epoch": 507.1671641791045, "grad_norm": 23.592296600341797, "learning_rate": 9.860410830999068e-06, "loss": 28.0091, "step": 21301 }, { "epoch": 507.1910447761194, "grad_norm": NaN, "learning_rate": 9.859943977591038e-06, "loss": 33.7971, "step": 21302 }, { "epoch": 507.21492537313435, "grad_norm": 23.730674743652344, "learning_rate": 9.859943977591038e-06, "loss": 28.2305, "step": 21303 }, { "epoch": 507.23880597014926, "grad_norm": 25.508012771606445, "learning_rate": 9.859477124183007e-06, "loss": 28.2606, "step": 21304 }, { "epoch": 507.26268656716417, "grad_norm": 23.643741607666016, "learning_rate": 9.859010270774979e-06, "loss": 27.4153, "step": 21305 }, { "epoch": 507.28656716417913, "grad_norm": 22.89531707763672, "learning_rate": 9.858543417366948e-06, "loss": 28.3869, "step": 21306 }, { "epoch": 507.31044776119404, "grad_norm": 20.851030349731445, "learning_rate": 9.858076563958918e-06, "loss": 28.449, "step": 21307 }, { "epoch": 507.33432835820895, "grad_norm": 22.659093856811523, "learning_rate": 9.857609710550888e-06, "loss": 27.5342, "step": 21308 }, { "epoch": 507.35820895522386, "grad_norm": 23.955463409423828, "learning_rate": 9.857142857142859e-06, "loss": 27.788, "step": 21309 }, { "epoch": 507.3820895522388, "grad_norm": 20.41070556640625, "learning_rate": 9.856676003734828e-06, "loss": 29.3651, "step": 21310 }, { "epoch": 507.40597014925373, "grad_norm": 22.165922164916992, "learning_rate": 9.856209150326798e-06, "loss": 28.1723, "step": 21311 }, { "epoch": 507.42985074626864, "grad_norm": 22.777143478393555, "learning_rate": 9.85574229691877e-06, "loss": 28.198, "step": 21312 }, { "epoch": 507.4537313432836, "grad_norm": 22.59945297241211, "learning_rate": 9.855275443510739e-06, "loss": 27.332, "step": 21313 }, { "epoch": 507.4776119402985, "grad_norm": 19.576068878173828, "learning_rate": 9.854808590102709e-06, "loss": 27.6079, "step": 21314 }, { "epoch": 507.5014925373134, "grad_norm": 25.745716094970703, "learning_rate": 9.854341736694678e-06, "loss": 28.1403, "step": 21315 }, { "epoch": 507.52537313432833, "grad_norm": 25.160043716430664, "learning_rate": 9.85387488328665e-06, "loss": 27.9878, "step": 21316 }, { "epoch": 507.5492537313433, "grad_norm": 20.43986701965332, "learning_rate": 9.85340802987862e-06, "loss": 27.9095, "step": 21317 }, { "epoch": 507.5731343283582, "grad_norm": 22.10592269897461, "learning_rate": 9.852941176470589e-06, "loss": 27.8429, "step": 21318 }, { "epoch": 507.5970149253731, "grad_norm": 22.458219528198242, "learning_rate": 9.85247432306256e-06, "loss": 28.16, "step": 21319 }, { "epoch": 507.6208955223881, "grad_norm": 19.77086067199707, "learning_rate": 9.85200746965453e-06, "loss": 28.1538, "step": 21320 }, { "epoch": 507.644776119403, "grad_norm": 22.223417282104492, "learning_rate": 9.8515406162465e-06, "loss": 28.3054, "step": 21321 }, { "epoch": 507.6686567164179, "grad_norm": 23.48130989074707, "learning_rate": 9.85107376283847e-06, "loss": 27.9155, "step": 21322 }, { "epoch": 507.6925373134328, "grad_norm": 28.075151443481445, "learning_rate": 9.85060690943044e-06, "loss": 28.301, "step": 21323 }, { "epoch": 507.7164179104478, "grad_norm": 23.72812843322754, "learning_rate": 9.85014005602241e-06, "loss": 28.4572, "step": 21324 }, { "epoch": 507.7402985074627, "grad_norm": 24.523365020751953, "learning_rate": 9.84967320261438e-06, "loss": 27.4524, "step": 21325 }, { "epoch": 507.7641791044776, "grad_norm": 26.36513900756836, "learning_rate": 9.849206349206351e-06, "loss": 27.4242, "step": 21326 }, { "epoch": 507.78805970149256, "grad_norm": 20.853225708007812, "learning_rate": 9.848739495798321e-06, "loss": 28.7073, "step": 21327 }, { "epoch": 507.81194029850747, "grad_norm": 32.79405212402344, "learning_rate": 9.84827264239029e-06, "loss": 27.9903, "step": 21328 }, { "epoch": 507.8358208955224, "grad_norm": 22.327993392944336, "learning_rate": 9.84780578898226e-06, "loss": 27.699, "step": 21329 }, { "epoch": 507.85970149253734, "grad_norm": 28.957857131958008, "learning_rate": 9.847338935574232e-06, "loss": 28.6736, "step": 21330 }, { "epoch": 507.88358208955225, "grad_norm": 26.346660614013672, "learning_rate": 9.846872082166201e-06, "loss": 27.7286, "step": 21331 }, { "epoch": 507.90746268656716, "grad_norm": 21.8032283782959, "learning_rate": 9.846405228758171e-06, "loss": 27.561, "step": 21332 }, { "epoch": 507.93134328358207, "grad_norm": 25.423463821411133, "learning_rate": 9.84593837535014e-06, "loss": 28.1149, "step": 21333 }, { "epoch": 507.95522388059703, "grad_norm": 23.55913734436035, "learning_rate": 9.845471521942112e-06, "loss": 28.0801, "step": 21334 }, { "epoch": 507.97910447761194, "grad_norm": 23.461881637573242, "learning_rate": 9.845004668534082e-06, "loss": 28.3506, "step": 21335 }, { "epoch": 508.0, "grad_norm": 18.697492599487305, "learning_rate": 9.844537815126051e-06, "loss": 23.9141, "step": 21336 }, { "epoch": 508.0238805970149, "grad_norm": 24.34683609008789, "learning_rate": 9.844070961718023e-06, "loss": 27.589, "step": 21337 }, { "epoch": 508.0477611940299, "grad_norm": 20.33064842224121, "learning_rate": 9.84360410830999e-06, "loss": 27.6094, "step": 21338 }, { "epoch": 508.0716417910448, "grad_norm": 26.28217887878418, "learning_rate": 9.843137254901962e-06, "loss": 27.4263, "step": 21339 }, { "epoch": 508.0955223880597, "grad_norm": 19.506500244140625, "learning_rate": 9.842670401493931e-06, "loss": 27.4561, "step": 21340 }, { "epoch": 508.1194029850746, "grad_norm": 21.44455909729004, "learning_rate": 9.842203548085901e-06, "loss": 27.8377, "step": 21341 }, { "epoch": 508.14328358208957, "grad_norm": 24.80335235595703, "learning_rate": 9.84173669467787e-06, "loss": 27.7989, "step": 21342 }, { "epoch": 508.1671641791045, "grad_norm": 21.9842529296875, "learning_rate": 9.841269841269842e-06, "loss": 28.7167, "step": 21343 }, { "epoch": 508.1910447761194, "grad_norm": 21.242937088012695, "learning_rate": 9.840802987861812e-06, "loss": 27.3926, "step": 21344 }, { "epoch": 508.21492537313435, "grad_norm": 20.488792419433594, "learning_rate": 9.840336134453781e-06, "loss": 27.1951, "step": 21345 }, { "epoch": 508.23880597014926, "grad_norm": 19.69783592224121, "learning_rate": 9.839869281045751e-06, "loss": 28.1669, "step": 21346 }, { "epoch": 508.26268656716417, "grad_norm": 24.16853141784668, "learning_rate": 9.839402427637722e-06, "loss": 27.7841, "step": 21347 }, { "epoch": 508.28656716417913, "grad_norm": 21.939790725708008, "learning_rate": 9.838935574229692e-06, "loss": 27.9634, "step": 21348 }, { "epoch": 508.31044776119404, "grad_norm": 25.50762176513672, "learning_rate": 9.838468720821662e-06, "loss": 28.7789, "step": 21349 }, { "epoch": 508.33432835820895, "grad_norm": 21.085182189941406, "learning_rate": 9.838001867413633e-06, "loss": 28.8194, "step": 21350 }, { "epoch": 508.35820895522386, "grad_norm": 19.441993713378906, "learning_rate": 9.837535014005603e-06, "loss": 27.5244, "step": 21351 }, { "epoch": 508.3820895522388, "grad_norm": 23.50131607055664, "learning_rate": 9.837068160597572e-06, "loss": 28.4858, "step": 21352 }, { "epoch": 508.40597014925373, "grad_norm": 23.738468170166016, "learning_rate": 9.836601307189542e-06, "loss": 28.0974, "step": 21353 }, { "epoch": 508.42985074626864, "grad_norm": 21.322433471679688, "learning_rate": 9.836134453781513e-06, "loss": 26.8956, "step": 21354 }, { "epoch": 508.4537313432836, "grad_norm": 21.4801025390625, "learning_rate": 9.835667600373483e-06, "loss": 28.4926, "step": 21355 }, { "epoch": 508.4776119402985, "grad_norm": 23.877161026000977, "learning_rate": 9.835200746965453e-06, "loss": 28.8609, "step": 21356 }, { "epoch": 508.5014925373134, "grad_norm": 24.0955867767334, "learning_rate": 9.834733893557424e-06, "loss": 27.803, "step": 21357 }, { "epoch": 508.52537313432833, "grad_norm": 22.115087509155273, "learning_rate": 9.834267040149394e-06, "loss": 27.8758, "step": 21358 }, { "epoch": 508.5492537313433, "grad_norm": 20.592193603515625, "learning_rate": 9.833800186741363e-06, "loss": 27.9796, "step": 21359 }, { "epoch": 508.5731343283582, "grad_norm": 20.88570213317871, "learning_rate": 9.833333333333333e-06, "loss": 28.2075, "step": 21360 }, { "epoch": 508.5970149253731, "grad_norm": 21.85407257080078, "learning_rate": 9.832866479925304e-06, "loss": 29.2214, "step": 21361 }, { "epoch": 508.6208955223881, "grad_norm": 19.322256088256836, "learning_rate": 9.832399626517274e-06, "loss": 28.0389, "step": 21362 }, { "epoch": 508.644776119403, "grad_norm": 27.230100631713867, "learning_rate": 9.831932773109244e-06, "loss": 28.1424, "step": 21363 }, { "epoch": 508.6686567164179, "grad_norm": 20.59050178527832, "learning_rate": 9.831465919701215e-06, "loss": 28.0162, "step": 21364 }, { "epoch": 508.6925373134328, "grad_norm": 24.440053939819336, "learning_rate": 9.830999066293185e-06, "loss": 27.9469, "step": 21365 }, { "epoch": 508.7164179104478, "grad_norm": 25.04475975036621, "learning_rate": 9.830532212885154e-06, "loss": 28.2094, "step": 21366 }, { "epoch": 508.7402985074627, "grad_norm": 23.71200942993164, "learning_rate": 9.830065359477124e-06, "loss": 27.9906, "step": 21367 }, { "epoch": 508.7641791044776, "grad_norm": 18.10508918762207, "learning_rate": 9.829598506069095e-06, "loss": 27.1249, "step": 21368 }, { "epoch": 508.78805970149256, "grad_norm": 23.997806549072266, "learning_rate": 9.829131652661065e-06, "loss": 27.7144, "step": 21369 }, { "epoch": 508.81194029850747, "grad_norm": 24.10673713684082, "learning_rate": 9.828664799253034e-06, "loss": 28.1056, "step": 21370 }, { "epoch": 508.8358208955224, "grad_norm": 23.25332260131836, "learning_rate": 9.828197945845004e-06, "loss": 29.2244, "step": 21371 }, { "epoch": 508.85970149253734, "grad_norm": 19.615428924560547, "learning_rate": 9.827731092436975e-06, "loss": 27.8166, "step": 21372 }, { "epoch": 508.88358208955225, "grad_norm": 23.88954734802246, "learning_rate": 9.827264239028945e-06, "loss": 28.4119, "step": 21373 }, { "epoch": 508.90746268656716, "grad_norm": 21.850177764892578, "learning_rate": 9.826797385620915e-06, "loss": 27.5983, "step": 21374 }, { "epoch": 508.93134328358207, "grad_norm": 24.489336013793945, "learning_rate": 9.826330532212886e-06, "loss": 28.1896, "step": 21375 }, { "epoch": 508.95522388059703, "grad_norm": 20.659374237060547, "learning_rate": 9.825863678804856e-06, "loss": 26.5402, "step": 21376 }, { "epoch": 508.97910447761194, "grad_norm": 25.96872329711914, "learning_rate": 9.825396825396825e-06, "loss": 27.7307, "step": 21377 }, { "epoch": 509.0, "grad_norm": 19.351993560791016, "learning_rate": 9.824929971988795e-06, "loss": 24.3871, "step": 21378 }, { "epoch": 509.0238805970149, "grad_norm": 24.105003356933594, "learning_rate": 9.824463118580766e-06, "loss": 27.6137, "step": 21379 }, { "epoch": 509.0477611940299, "grad_norm": 29.952077865600586, "learning_rate": 9.823996265172736e-06, "loss": 27.7045, "step": 21380 }, { "epoch": 509.0716417910448, "grad_norm": 22.358003616333008, "learning_rate": 9.823529411764706e-06, "loss": 27.986, "step": 21381 }, { "epoch": 509.0955223880597, "grad_norm": 30.596952438354492, "learning_rate": 9.823062558356677e-06, "loss": 27.9709, "step": 21382 }, { "epoch": 509.1194029850746, "grad_norm": 23.459857940673828, "learning_rate": 9.822595704948647e-06, "loss": 27.6645, "step": 21383 }, { "epoch": 509.14328358208957, "grad_norm": 31.294933319091797, "learning_rate": 9.822128851540616e-06, "loss": 27.2999, "step": 21384 }, { "epoch": 509.1671641791045, "grad_norm": 25.938318252563477, "learning_rate": 9.821661998132586e-06, "loss": 28.9197, "step": 21385 }, { "epoch": 509.1910447761194, "grad_norm": 25.501726150512695, "learning_rate": 9.821195144724557e-06, "loss": 27.6493, "step": 21386 }, { "epoch": 509.21492537313435, "grad_norm": 28.900867462158203, "learning_rate": 9.820728291316527e-06, "loss": 27.8225, "step": 21387 }, { "epoch": 509.23880597014926, "grad_norm": 23.728988647460938, "learning_rate": 9.820261437908497e-06, "loss": 27.7885, "step": 21388 }, { "epoch": 509.26268656716417, "grad_norm": 26.782629013061523, "learning_rate": 9.819794584500468e-06, "loss": 29.4113, "step": 21389 }, { "epoch": 509.28656716417913, "grad_norm": 26.739818572998047, "learning_rate": 9.819327731092438e-06, "loss": 27.8762, "step": 21390 }, { "epoch": 509.31044776119404, "grad_norm": 21.551393508911133, "learning_rate": 9.818860877684407e-06, "loss": 27.7501, "step": 21391 }, { "epoch": 509.33432835820895, "grad_norm": 29.073942184448242, "learning_rate": 9.818394024276377e-06, "loss": 28.9911, "step": 21392 }, { "epoch": 509.35820895522386, "grad_norm": 22.602781295776367, "learning_rate": 9.817927170868348e-06, "loss": 28.3302, "step": 21393 }, { "epoch": 509.3820895522388, "grad_norm": 25.241395950317383, "learning_rate": 9.817460317460318e-06, "loss": 28.2194, "step": 21394 }, { "epoch": 509.40597014925373, "grad_norm": 23.504905700683594, "learning_rate": 9.816993464052288e-06, "loss": 27.145, "step": 21395 }, { "epoch": 509.42985074626864, "grad_norm": 22.798948287963867, "learning_rate": 9.816526610644259e-06, "loss": 28.3587, "step": 21396 }, { "epoch": 509.4537313432836, "grad_norm": 24.901124954223633, "learning_rate": 9.816059757236229e-06, "loss": 26.1461, "step": 21397 }, { "epoch": 509.4776119402985, "grad_norm": 30.47629737854004, "learning_rate": 9.815592903828198e-06, "loss": 28.3165, "step": 21398 }, { "epoch": 509.5014925373134, "grad_norm": 23.883150100708008, "learning_rate": 9.815126050420168e-06, "loss": 27.4739, "step": 21399 }, { "epoch": 509.52537313432833, "grad_norm": 24.094249725341797, "learning_rate": 9.81465919701214e-06, "loss": 27.6118, "step": 21400 }, { "epoch": 509.5492537313433, "grad_norm": 19.696121215820312, "learning_rate": 9.814192343604109e-06, "loss": 27.5423, "step": 21401 }, { "epoch": 509.5731343283582, "grad_norm": 28.735164642333984, "learning_rate": 9.813725490196078e-06, "loss": 27.4227, "step": 21402 }, { "epoch": 509.5970149253731, "grad_norm": 22.280521392822266, "learning_rate": 9.813258636788048e-06, "loss": 27.1796, "step": 21403 }, { "epoch": 509.6208955223881, "grad_norm": 24.285503387451172, "learning_rate": 9.81279178338002e-06, "loss": 27.6399, "step": 21404 }, { "epoch": 509.644776119403, "grad_norm": 25.543659210205078, "learning_rate": 9.812324929971989e-06, "loss": 27.8722, "step": 21405 }, { "epoch": 509.6686567164179, "grad_norm": 31.681840896606445, "learning_rate": 9.811858076563959e-06, "loss": 28.4708, "step": 21406 }, { "epoch": 509.6925373134328, "grad_norm": 22.420093536376953, "learning_rate": 9.81139122315593e-06, "loss": 28.5557, "step": 21407 }, { "epoch": 509.7164179104478, "grad_norm": 39.86437225341797, "learning_rate": 9.8109243697479e-06, "loss": 28.0586, "step": 21408 }, { "epoch": 509.7402985074627, "grad_norm": 24.385238647460938, "learning_rate": 9.81045751633987e-06, "loss": 27.2287, "step": 21409 }, { "epoch": 509.7641791044776, "grad_norm": 37.51817321777344, "learning_rate": 9.809990662931839e-06, "loss": 28.4225, "step": 21410 }, { "epoch": 509.78805970149256, "grad_norm": 28.358720779418945, "learning_rate": 9.80952380952381e-06, "loss": 28.3494, "step": 21411 }, { "epoch": 509.81194029850747, "grad_norm": 29.724750518798828, "learning_rate": 9.80905695611578e-06, "loss": 27.8502, "step": 21412 }, { "epoch": 509.8358208955224, "grad_norm": 30.700946807861328, "learning_rate": 9.80859010270775e-06, "loss": 27.8706, "step": 21413 }, { "epoch": 509.85970149253734, "grad_norm": 21.830127716064453, "learning_rate": 9.808123249299721e-06, "loss": 27.6413, "step": 21414 }, { "epoch": 509.88358208955225, "grad_norm": 35.75211715698242, "learning_rate": 9.80765639589169e-06, "loss": 27.776, "step": 21415 }, { "epoch": 509.90746268656716, "grad_norm": 22.80150032043457, "learning_rate": 9.80718954248366e-06, "loss": 28.0285, "step": 21416 }, { "epoch": 509.93134328358207, "grad_norm": 34.470645904541016, "learning_rate": 9.80672268907563e-06, "loss": 28.8277, "step": 21417 }, { "epoch": 509.95522388059703, "grad_norm": 27.827823638916016, "learning_rate": 9.806255835667601e-06, "loss": 28.3454, "step": 21418 }, { "epoch": 509.97910447761194, "grad_norm": 30.001171112060547, "learning_rate": 9.805788982259571e-06, "loss": 27.3294, "step": 21419 }, { "epoch": 510.0, "grad_norm": 24.663623809814453, "learning_rate": 9.80532212885154e-06, "loss": 25.6083, "step": 21420 }, { "epoch": 510.0, "step": 21420, "total_flos": 1.0529483771397358e+18, "train_loss": 0.5500211917830894, "train_runtime": 12810.2672, "train_samples_per_second": 213.073, "train_steps_per_second": 1.672 }, { "epoch": 510.0238805970149, "grad_norm": 26.442594528198242, "learning_rate": 1e-05, "loss": 27.8498, "step": 21421 }, { "epoch": 510.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999550763701707e-06, "loss": 34.7123, "step": 21422 }, { "epoch": 510.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999550763701707e-06, "loss": 35.1303, "step": 21423 }, { "epoch": 510.0955223880597, "grad_norm": 369.8858642578125, "learning_rate": 9.999550763701707e-06, "loss": 34.4121, "step": 21424 }, { "epoch": 510.1194029850746, "grad_norm": 189.5615692138672, "learning_rate": 9.999101527403415e-06, "loss": 33.2041, "step": 21425 }, { "epoch": 510.14328358208957, "grad_norm": 103.87483978271484, "learning_rate": 9.998652291105122e-06, "loss": 30.6859, "step": 21426 }, { "epoch": 510.1671641791045, "grad_norm": 75.19010162353516, "learning_rate": 9.998203054806828e-06, "loss": 29.7048, "step": 21427 }, { "epoch": 510.1910447761194, "grad_norm": 60.41788864135742, "learning_rate": 9.997753818508536e-06, "loss": 29.7021, "step": 21428 }, { "epoch": 510.21492537313435, "grad_norm": 53.26457977294922, "learning_rate": 9.997304582210244e-06, "loss": 29.332, "step": 21429 }, { "epoch": 510.23880597014926, "grad_norm": 53.90962600708008, "learning_rate": 9.99685534591195e-06, "loss": 29.3723, "step": 21430 }, { "epoch": 510.26268656716417, "grad_norm": 38.81138610839844, "learning_rate": 9.996406109613657e-06, "loss": 28.1144, "step": 21431 }, { "epoch": 510.28656716417913, "grad_norm": 38.978790283203125, "learning_rate": 9.995956873315365e-06, "loss": 28.5133, "step": 21432 }, { "epoch": 510.31044776119404, "grad_norm": 35.58748245239258, "learning_rate": 9.995507637017073e-06, "loss": 28.9188, "step": 21433 }, { "epoch": 510.33432835820895, "grad_norm": 44.637046813964844, "learning_rate": 9.995058400718779e-06, "loss": 28.1991, "step": 21434 }, { "epoch": 510.35820895522386, "grad_norm": 36.643741607666016, "learning_rate": 9.994609164420486e-06, "loss": 28.2076, "step": 21435 }, { "epoch": 510.3820895522388, "grad_norm": 27.821117401123047, "learning_rate": 9.994159928122194e-06, "loss": 27.051, "step": 21436 }, { "epoch": 510.40597014925373, "grad_norm": 34.01607894897461, "learning_rate": 9.9937106918239e-06, "loss": 27.7307, "step": 21437 }, { "epoch": 510.42985074626864, "grad_norm": 31.115936279296875, "learning_rate": 9.993261455525606e-06, "loss": 28.0671, "step": 21438 }, { "epoch": 510.4537313432836, "grad_norm": 24.99266242980957, "learning_rate": 9.992812219227316e-06, "loss": 28.6115, "step": 21439 }, { "epoch": 510.4776119402985, "grad_norm": 24.026716232299805, "learning_rate": 9.992362982929022e-06, "loss": 27.9299, "step": 21440 }, { "epoch": 510.5014925373134, "grad_norm": 32.019981384277344, "learning_rate": 9.991913746630728e-06, "loss": 27.6948, "step": 21441 }, { "epoch": 510.52537313432833, "grad_norm": 26.014591217041016, "learning_rate": 9.991464510332435e-06, "loss": 27.8818, "step": 21442 }, { "epoch": 510.5492537313433, "grad_norm": 22.7497501373291, "learning_rate": 9.991015274034143e-06, "loss": 28.46, "step": 21443 }, { "epoch": 510.5731343283582, "grad_norm": 31.263031005859375, "learning_rate": 9.990566037735849e-06, "loss": 28.2059, "step": 21444 }, { "epoch": 510.5970149253731, "grad_norm": 32.6760368347168, "learning_rate": 9.990116801437557e-06, "loss": 27.3843, "step": 21445 }, { "epoch": 510.6208955223881, "grad_norm": 21.58977699279785, "learning_rate": 9.989667565139264e-06, "loss": 27.7683, "step": 21446 }, { "epoch": 510.644776119403, "grad_norm": 22.051664352416992, "learning_rate": 9.989218328840972e-06, "loss": 26.8584, "step": 21447 }, { "epoch": 510.6686567164179, "grad_norm": 29.64303970336914, "learning_rate": 9.988769092542678e-06, "loss": 28.3014, "step": 21448 }, { "epoch": 510.6925373134328, "grad_norm": 18.141202926635742, "learning_rate": 9.988319856244386e-06, "loss": 27.2324, "step": 21449 }, { "epoch": 510.7164179104478, "grad_norm": 23.543994903564453, "learning_rate": 9.987870619946093e-06, "loss": 28.4333, "step": 21450 }, { "epoch": 510.7402985074627, "grad_norm": 32.53776550292969, "learning_rate": 9.9874213836478e-06, "loss": 28.8211, "step": 21451 }, { "epoch": 510.7641791044776, "grad_norm": 19.184324264526367, "learning_rate": 9.986972147349507e-06, "loss": 27.4283, "step": 21452 }, { "epoch": 510.78805970149256, "grad_norm": 28.916866302490234, "learning_rate": 9.986522911051215e-06, "loss": 27.3814, "step": 21453 }, { "epoch": 510.81194029850747, "grad_norm": 27.22681999206543, "learning_rate": 9.98607367475292e-06, "loss": 28.0787, "step": 21454 }, { "epoch": 510.8358208955224, "grad_norm": 20.02556037902832, "learning_rate": 9.985624438454627e-06, "loss": 28.6093, "step": 21455 }, { "epoch": 510.85970149253734, "grad_norm": 25.502721786499023, "learning_rate": 9.985175202156335e-06, "loss": 28.4224, "step": 21456 }, { "epoch": 510.88358208955225, "grad_norm": 26.91769027709961, "learning_rate": 9.984725965858042e-06, "loss": 28.6368, "step": 21457 }, { "epoch": 510.90746268656716, "grad_norm": 19.702804565429688, "learning_rate": 9.984276729559748e-06, "loss": 28.6071, "step": 21458 }, { "epoch": 510.93134328358207, "grad_norm": NaN, "learning_rate": 9.983827493261456e-06, "loss": 28.8768, "step": 21459 }, { "epoch": 510.95522388059703, "grad_norm": 19.210491180419922, "learning_rate": 9.983827493261456e-06, "loss": 28.4423, "step": 21460 }, { "epoch": 510.97910447761194, "grad_norm": 25.382944107055664, "learning_rate": 9.983378256963164e-06, "loss": 28.5388, "step": 21461 }, { "epoch": 511.0, "grad_norm": 23.345169067382812, "learning_rate": 9.982929020664871e-06, "loss": 23.1061, "step": 21462 }, { "epoch": 511.0238805970149, "grad_norm": 18.84296989440918, "learning_rate": 9.982479784366577e-06, "loss": 26.3893, "step": 21463 }, { "epoch": 511.0477611940299, "grad_norm": 23.460371017456055, "learning_rate": 9.982030548068285e-06, "loss": 28.4339, "step": 21464 }, { "epoch": 511.0716417910448, "grad_norm": 28.15729331970215, "learning_rate": 9.981581311769993e-06, "loss": 27.5295, "step": 21465 }, { "epoch": 511.0955223880597, "grad_norm": 22.73273277282715, "learning_rate": 9.981132075471699e-06, "loss": 27.7858, "step": 21466 }, { "epoch": 511.1194029850746, "grad_norm": 21.55552864074707, "learning_rate": 9.980682839173406e-06, "loss": 28.8695, "step": 21467 }, { "epoch": 511.14328358208957, "grad_norm": 30.53504753112793, "learning_rate": 9.980233602875114e-06, "loss": 28.3621, "step": 21468 }, { "epoch": 511.1671641791045, "grad_norm": 22.25741195678711, "learning_rate": 9.97978436657682e-06, "loss": 27.3942, "step": 21469 }, { "epoch": 511.1910447761194, "grad_norm": 22.777700424194336, "learning_rate": 9.979335130278526e-06, "loss": 27.9212, "step": 21470 }, { "epoch": 511.21492537313435, "grad_norm": 30.056678771972656, "learning_rate": 9.978885893980235e-06, "loss": 26.9227, "step": 21471 }, { "epoch": 511.23880597014926, "grad_norm": 21.618358612060547, "learning_rate": 9.978436657681941e-06, "loss": 27.4352, "step": 21472 }, { "epoch": 511.26268656716417, "grad_norm": 30.54107093811035, "learning_rate": 9.977987421383647e-06, "loss": 28.0403, "step": 21473 }, { "epoch": 511.28656716417913, "grad_norm": 28.673583984375, "learning_rate": 9.977538185085355e-06, "loss": 27.8076, "step": 21474 }, { "epoch": 511.31044776119404, "grad_norm": 21.406396865844727, "learning_rate": 9.977088948787063e-06, "loss": 28.2065, "step": 21475 }, { "epoch": 511.33432835820895, "grad_norm": 30.372713088989258, "learning_rate": 9.97663971248877e-06, "loss": 27.3541, "step": 21476 }, { "epoch": 511.35820895522386, "grad_norm": NaN, "learning_rate": 9.976190476190477e-06, "loss": 23.4502, "step": 21477 }, { "epoch": 511.3820895522388, "grad_norm": 23.392169952392578, "learning_rate": 9.976190476190477e-06, "loss": 26.8061, "step": 21478 }, { "epoch": 511.40597014925373, "grad_norm": 21.294248580932617, "learning_rate": 9.975741239892184e-06, "loss": 29.0305, "step": 21479 }, { "epoch": 511.42985074626864, "grad_norm": 24.153898239135742, "learning_rate": 9.975292003593892e-06, "loss": 27.9476, "step": 21480 }, { "epoch": 511.4537313432836, "grad_norm": 19.395076751708984, "learning_rate": 9.974842767295598e-06, "loss": 27.4622, "step": 21481 }, { "epoch": 511.4776119402985, "grad_norm": 19.85757827758789, "learning_rate": 9.974393530997306e-06, "loss": 28.054, "step": 21482 }, { "epoch": 511.5014925373134, "grad_norm": 21.295183181762695, "learning_rate": 9.973944294699013e-06, "loss": 28.1775, "step": 21483 }, { "epoch": 511.52537313432833, "grad_norm": 19.273386001586914, "learning_rate": 9.97349505840072e-06, "loss": 28.2382, "step": 21484 }, { "epoch": 511.5492537313433, "grad_norm": 20.26430320739746, "learning_rate": 9.973045822102425e-06, "loss": 26.8822, "step": 21485 }, { "epoch": 511.5731343283582, "grad_norm": 23.50433921813965, "learning_rate": 9.972596585804135e-06, "loss": 27.0757, "step": 21486 }, { "epoch": 511.5970149253731, "grad_norm": 20.83809471130371, "learning_rate": 9.97214734950584e-06, "loss": 28.1458, "step": 21487 }, { "epoch": 511.6208955223881, "grad_norm": 30.78812026977539, "learning_rate": 9.971698113207547e-06, "loss": 28.3347, "step": 21488 }, { "epoch": 511.644776119403, "grad_norm": 22.814861297607422, "learning_rate": 9.971248876909254e-06, "loss": 28.0592, "step": 21489 }, { "epoch": 511.6686567164179, "grad_norm": 20.527183532714844, "learning_rate": 9.970799640610962e-06, "loss": 27.8778, "step": 21490 }, { "epoch": 511.6925373134328, "grad_norm": 20.288068771362305, "learning_rate": 9.97035040431267e-06, "loss": 29.2234, "step": 21491 }, { "epoch": 511.7164179104478, "grad_norm": 20.13239288330078, "learning_rate": 9.969901168014376e-06, "loss": 28.4085, "step": 21492 }, { "epoch": 511.7402985074627, "grad_norm": 20.0487060546875, "learning_rate": 9.969451931716084e-06, "loss": 28.6841, "step": 21493 }, { "epoch": 511.7641791044776, "grad_norm": NaN, "learning_rate": 9.969002695417791e-06, "loss": 44.7783, "step": 21494 }, { "epoch": 511.78805970149256, "grad_norm": 21.97346305847168, "learning_rate": 9.969002695417791e-06, "loss": 28.1874, "step": 21495 }, { "epoch": 511.81194029850747, "grad_norm": 24.994972229003906, "learning_rate": 9.968553459119497e-06, "loss": 28.4067, "step": 21496 }, { "epoch": 511.8358208955224, "grad_norm": 22.71796226501465, "learning_rate": 9.968104222821205e-06, "loss": 27.4744, "step": 21497 }, { "epoch": 511.85970149253734, "grad_norm": 21.4873104095459, "learning_rate": 9.967654986522913e-06, "loss": 27.573, "step": 21498 }, { "epoch": 511.88358208955225, "grad_norm": 20.09888458251953, "learning_rate": 9.967205750224619e-06, "loss": 28.0669, "step": 21499 }, { "epoch": 511.90746268656716, "grad_norm": 24.194473266601562, "learning_rate": 9.966756513926326e-06, "loss": 27.6661, "step": 21500 }, { "epoch": 511.93134328358207, "grad_norm": 24.05251121520996, "learning_rate": 9.966307277628034e-06, "loss": 28.5737, "step": 21501 }, { "epoch": 511.95522388059703, "grad_norm": 20.505271911621094, "learning_rate": 9.96585804132974e-06, "loss": 28.1069, "step": 21502 }, { "epoch": 511.97910447761194, "grad_norm": 19.58890151977539, "learning_rate": 9.965408805031446e-06, "loss": 28.0829, "step": 21503 }, { "epoch": 512.0, "grad_norm": 18.54958152770996, "learning_rate": 9.964959568733154e-06, "loss": 24.7103, "step": 21504 }, { "epoch": 512.0238805970149, "grad_norm": 23.973127365112305, "learning_rate": 9.964510332434861e-06, "loss": 28.4862, "step": 21505 }, { "epoch": 512.0477611940298, "grad_norm": 21.36614418029785, "learning_rate": 9.96406109613657e-06, "loss": 27.4788, "step": 21506 }, { "epoch": 512.0716417910447, "grad_norm": 23.38019561767578, "learning_rate": 9.963611859838275e-06, "loss": 27.8888, "step": 21507 }, { "epoch": 512.0955223880597, "grad_norm": 22.763999938964844, "learning_rate": 9.963162623539983e-06, "loss": 28.1837, "step": 21508 }, { "epoch": 512.1194029850747, "grad_norm": 19.910966873168945, "learning_rate": 9.96271338724169e-06, "loss": 27.7858, "step": 21509 }, { "epoch": 512.1432835820896, "grad_norm": 20.851341247558594, "learning_rate": 9.962264150943397e-06, "loss": 27.3277, "step": 21510 }, { "epoch": 512.1671641791045, "grad_norm": 26.34923553466797, "learning_rate": 9.961814914645104e-06, "loss": 28.7662, "step": 21511 }, { "epoch": 512.1910447761194, "grad_norm": 21.57990074157715, "learning_rate": 9.961365678346812e-06, "loss": 27.8787, "step": 21512 }, { "epoch": 512.2149253731343, "grad_norm": 22.088834762573242, "learning_rate": 9.960916442048518e-06, "loss": 26.4951, "step": 21513 }, { "epoch": 512.2388059701492, "grad_norm": 18.3950252532959, "learning_rate": 9.960467205750226e-06, "loss": 26.5776, "step": 21514 }, { "epoch": 512.2626865671642, "grad_norm": 20.20016860961914, "learning_rate": 9.960017969451933e-06, "loss": 29.0021, "step": 21515 }, { "epoch": 512.2865671641791, "grad_norm": 19.848520278930664, "learning_rate": 9.95956873315364e-06, "loss": 27.7748, "step": 21516 }, { "epoch": 512.310447761194, "grad_norm": 18.84009552001953, "learning_rate": 9.959119496855345e-06, "loss": 27.3511, "step": 21517 }, { "epoch": 512.334328358209, "grad_norm": NaN, "learning_rate": 9.958670260557055e-06, "loss": 48.916, "step": 21518 }, { "epoch": 512.3582089552239, "grad_norm": 23.695302963256836, "learning_rate": 9.958670260557055e-06, "loss": 28.1263, "step": 21519 }, { "epoch": 512.3820895522388, "grad_norm": 25.749338150024414, "learning_rate": 9.95822102425876e-06, "loss": 27.2236, "step": 21520 }, { "epoch": 512.4059701492537, "grad_norm": 22.23241424560547, "learning_rate": 9.957771787960468e-06, "loss": 28.3447, "step": 21521 }, { "epoch": 512.4298507462687, "grad_norm": 21.025794982910156, "learning_rate": 9.957322551662174e-06, "loss": 27.8307, "step": 21522 }, { "epoch": 512.4537313432836, "grad_norm": 30.593481063842773, "learning_rate": 9.956873315363882e-06, "loss": 27.0458, "step": 21523 }, { "epoch": 512.4776119402985, "grad_norm": 22.22791862487793, "learning_rate": 9.95642407906559e-06, "loss": 27.6409, "step": 21524 }, { "epoch": 512.5014925373134, "grad_norm": 22.053985595703125, "learning_rate": 9.955974842767296e-06, "loss": 27.2213, "step": 21525 }, { "epoch": 512.5253731343283, "grad_norm": 29.660554885864258, "learning_rate": 9.955525606469004e-06, "loss": 28.361, "step": 21526 }, { "epoch": 512.5492537313432, "grad_norm": 26.038280487060547, "learning_rate": 9.955076370170711e-06, "loss": 28.5896, "step": 21527 }, { "epoch": 512.5731343283583, "grad_norm": 18.840133666992188, "learning_rate": 9.954627133872417e-06, "loss": 28.9948, "step": 21528 }, { "epoch": 512.5970149253732, "grad_norm": 24.624767303466797, "learning_rate": 9.954177897574125e-06, "loss": 28.224, "step": 21529 }, { "epoch": 512.6208955223881, "grad_norm": 22.618478775024414, "learning_rate": 9.953728661275833e-06, "loss": 26.918, "step": 21530 }, { "epoch": 512.644776119403, "grad_norm": 24.16161346435547, "learning_rate": 9.953279424977539e-06, "loss": 28.7024, "step": 21531 }, { "epoch": 512.6686567164179, "grad_norm": 19.563488006591797, "learning_rate": 9.952830188679246e-06, "loss": 28.1034, "step": 21532 }, { "epoch": 512.6925373134328, "grad_norm": 19.506616592407227, "learning_rate": 9.952380952380954e-06, "loss": 27.0905, "step": 21533 }, { "epoch": 512.7164179104477, "grad_norm": 20.41429328918457, "learning_rate": 9.95193171608266e-06, "loss": 28.0607, "step": 21534 }, { "epoch": 512.7402985074627, "grad_norm": 23.266233444213867, "learning_rate": 9.951482479784368e-06, "loss": 27.9301, "step": 21535 }, { "epoch": 512.7641791044776, "grad_norm": 25.997812271118164, "learning_rate": 9.951033243486074e-06, "loss": 28.4379, "step": 21536 }, { "epoch": 512.7880597014926, "grad_norm": 17.36048126220703, "learning_rate": 9.950584007187781e-06, "loss": 27.5112, "step": 21537 }, { "epoch": 512.8119402985075, "grad_norm": 16.981061935424805, "learning_rate": 9.95013477088949e-06, "loss": 28.8636, "step": 21538 }, { "epoch": 512.8358208955224, "grad_norm": 20.58241844177246, "learning_rate": 9.949685534591195e-06, "loss": 28.1392, "step": 21539 }, { "epoch": 512.8597014925373, "grad_norm": 28.074987411499023, "learning_rate": 9.949236298292903e-06, "loss": 28.8487, "step": 21540 }, { "epoch": 512.8835820895522, "grad_norm": 22.40904998779297, "learning_rate": 9.94878706199461e-06, "loss": 27.2329, "step": 21541 }, { "epoch": 512.9074626865672, "grad_norm": 17.647769927978516, "learning_rate": 9.948337825696317e-06, "loss": 26.255, "step": 21542 }, { "epoch": 512.9313432835821, "grad_norm": 16.548133850097656, "learning_rate": 9.947888589398024e-06, "loss": 27.8467, "step": 21543 }, { "epoch": 512.955223880597, "grad_norm": 18.2730712890625, "learning_rate": 9.947439353099732e-06, "loss": 28.8395, "step": 21544 }, { "epoch": 512.9791044776119, "grad_norm": 18.18562889099121, "learning_rate": 9.946990116801438e-06, "loss": 26.7839, "step": 21545 }, { "epoch": 513.0, "grad_norm": 18.630231857299805, "learning_rate": 9.946540880503146e-06, "loss": 23.9774, "step": 21546 }, { "epoch": 513.0238805970149, "grad_norm": NaN, "learning_rate": 9.946091644204853e-06, "loss": 31.3795, "step": 21547 }, { "epoch": 513.0477611940298, "grad_norm": 20.329532623291016, "learning_rate": 9.946091644204853e-06, "loss": 27.7949, "step": 21548 }, { "epoch": 513.0716417910447, "grad_norm": 24.32185173034668, "learning_rate": 9.94564240790656e-06, "loss": 29.1748, "step": 21549 }, { "epoch": 513.0955223880597, "grad_norm": 17.739744186401367, "learning_rate": 9.945193171608267e-06, "loss": 28.6531, "step": 21550 }, { "epoch": 513.1194029850747, "grad_norm": 18.64484977722168, "learning_rate": 9.944743935309975e-06, "loss": 26.1246, "step": 21551 }, { "epoch": 513.1432835820896, "grad_norm": 23.789958953857422, "learning_rate": 9.94429469901168e-06, "loss": 28.7915, "step": 21552 }, { "epoch": 513.1671641791045, "grad_norm": 24.964664459228516, "learning_rate": 9.943845462713388e-06, "loss": 27.6079, "step": 21553 }, { "epoch": 513.1910447761194, "grad_norm": 22.63495445251465, "learning_rate": 9.943396226415094e-06, "loss": 27.8474, "step": 21554 }, { "epoch": 513.2149253731343, "grad_norm": 20.37474822998047, "learning_rate": 9.942946990116802e-06, "loss": 28.0368, "step": 21555 }, { "epoch": 513.2388059701492, "grad_norm": 22.8182430267334, "learning_rate": 9.94249775381851e-06, "loss": 27.4334, "step": 21556 }, { "epoch": 513.2626865671642, "grad_norm": 18.373273849487305, "learning_rate": 9.942048517520216e-06, "loss": 28.5182, "step": 21557 }, { "epoch": 513.2865671641791, "grad_norm": 23.723846435546875, "learning_rate": 9.941599281221924e-06, "loss": 28.0378, "step": 21558 }, { "epoch": 513.310447761194, "grad_norm": 22.8382568359375, "learning_rate": 9.941150044923631e-06, "loss": 28.3188, "step": 21559 }, { "epoch": 513.334328358209, "grad_norm": 20.13503074645996, "learning_rate": 9.940700808625337e-06, "loss": 26.9951, "step": 21560 }, { "epoch": 513.3582089552239, "grad_norm": 22.450618743896484, "learning_rate": 9.940251572327045e-06, "loss": 27.7008, "step": 21561 }, { "epoch": 513.3820895522388, "grad_norm": 24.15396499633789, "learning_rate": 9.939802336028753e-06, "loss": 27.5259, "step": 21562 }, { "epoch": 513.4059701492537, "grad_norm": 22.020544052124023, "learning_rate": 9.939353099730459e-06, "loss": 27.7177, "step": 21563 }, { "epoch": 513.4298507462687, "grad_norm": 24.043819427490234, "learning_rate": 9.938903863432166e-06, "loss": 27.0455, "step": 21564 }, { "epoch": 513.4537313432836, "grad_norm": 21.093305587768555, "learning_rate": 9.938454627133874e-06, "loss": 28.2764, "step": 21565 }, { "epoch": 513.4776119402985, "grad_norm": 20.694528579711914, "learning_rate": 9.93800539083558e-06, "loss": 27.2599, "step": 21566 }, { "epoch": 513.5014925373134, "grad_norm": 22.053272247314453, "learning_rate": 9.937556154537288e-06, "loss": 28.739, "step": 21567 }, { "epoch": 513.5253731343283, "grad_norm": 17.595293045043945, "learning_rate": 9.937106918238994e-06, "loss": 26.8809, "step": 21568 }, { "epoch": 513.5492537313432, "grad_norm": 24.12435531616211, "learning_rate": 9.936657681940701e-06, "loss": 28.4575, "step": 21569 }, { "epoch": 513.5731343283583, "grad_norm": 19.6004638671875, "learning_rate": 9.936208445642409e-06, "loss": 27.8448, "step": 21570 }, { "epoch": 513.5970149253732, "grad_norm": 30.04730224609375, "learning_rate": 9.935759209344115e-06, "loss": 27.7334, "step": 21571 }, { "epoch": 513.6208955223881, "grad_norm": 20.0417423248291, "learning_rate": 9.935309973045823e-06, "loss": 27.7722, "step": 21572 }, { "epoch": 513.644776119403, "grad_norm": 23.099567413330078, "learning_rate": 9.93486073674753e-06, "loss": 27.2105, "step": 21573 }, { "epoch": 513.6686567164179, "grad_norm": 19.221696853637695, "learning_rate": 9.934411500449237e-06, "loss": 27.7626, "step": 21574 }, { "epoch": 513.6925373134328, "grad_norm": 24.71274757385254, "learning_rate": 9.933962264150944e-06, "loss": 27.6057, "step": 21575 }, { "epoch": 513.7164179104477, "grad_norm": 19.95201301574707, "learning_rate": 9.933513027852652e-06, "loss": 28.8007, "step": 21576 }, { "epoch": 513.7402985074627, "grad_norm": 27.601764678955078, "learning_rate": 9.933063791554358e-06, "loss": 28.3522, "step": 21577 }, { "epoch": 513.7641791044776, "grad_norm": 20.677928924560547, "learning_rate": 9.932614555256066e-06, "loss": 27.8876, "step": 21578 }, { "epoch": 513.7880597014926, "grad_norm": 28.12769889831543, "learning_rate": 9.932165318957773e-06, "loss": 27.8655, "step": 21579 }, { "epoch": 513.8119402985075, "grad_norm": 26.687488555908203, "learning_rate": 9.93171608265948e-06, "loss": 28.8418, "step": 21580 }, { "epoch": 513.8358208955224, "grad_norm": 22.083097457885742, "learning_rate": 9.931266846361187e-06, "loss": 27.4719, "step": 21581 }, { "epoch": 513.8597014925373, "grad_norm": 26.769729614257812, "learning_rate": 9.930817610062895e-06, "loss": 27.2091, "step": 21582 }, { "epoch": 513.8835820895522, "grad_norm": 20.189041137695312, "learning_rate": 9.9303683737646e-06, "loss": 27.1143, "step": 21583 }, { "epoch": 513.9074626865672, "grad_norm": 23.525306701660156, "learning_rate": 9.929919137466308e-06, "loss": 27.7169, "step": 21584 }, { "epoch": 513.9313432835821, "grad_norm": 22.568157196044922, "learning_rate": 9.929469901168014e-06, "loss": 26.4957, "step": 21585 }, { "epoch": 513.955223880597, "grad_norm": 23.69367218017578, "learning_rate": 9.929020664869722e-06, "loss": 27.4599, "step": 21586 }, { "epoch": 513.9791044776119, "grad_norm": 30.850175857543945, "learning_rate": 9.92857142857143e-06, "loss": 27.9317, "step": 21587 }, { "epoch": 514.0, "grad_norm": 17.379840850830078, "learning_rate": 9.928122192273136e-06, "loss": 25.4845, "step": 21588 }, { "epoch": 514.0238805970149, "grad_norm": 28.295024871826172, "learning_rate": 9.927672955974844e-06, "loss": 28.2649, "step": 21589 }, { "epoch": 514.0477611940298, "grad_norm": 27.77696418762207, "learning_rate": 9.927223719676551e-06, "loss": 27.5083, "step": 21590 }, { "epoch": 514.0716417910447, "grad_norm": 21.837291717529297, "learning_rate": 9.926774483378257e-06, "loss": 27.9196, "step": 21591 }, { "epoch": 514.0955223880597, "grad_norm": 26.355857849121094, "learning_rate": 9.926325247079965e-06, "loss": 26.6739, "step": 21592 }, { "epoch": 514.1194029850747, "grad_norm": 30.20018768310547, "learning_rate": 9.925876010781673e-06, "loss": 26.7223, "step": 21593 }, { "epoch": 514.1432835820896, "grad_norm": 22.564838409423828, "learning_rate": 9.925426774483379e-06, "loss": 28.0543, "step": 21594 }, { "epoch": 514.1671641791045, "grad_norm": 19.718629837036133, "learning_rate": 9.924977538185086e-06, "loss": 27.8927, "step": 21595 }, { "epoch": 514.1910447761194, "grad_norm": 31.671934127807617, "learning_rate": 9.924528301886794e-06, "loss": 28.537, "step": 21596 }, { "epoch": 514.2149253731343, "grad_norm": 22.018573760986328, "learning_rate": 9.9240790655885e-06, "loss": 27.6739, "step": 21597 }, { "epoch": 514.2388059701492, "grad_norm": 19.200719833374023, "learning_rate": 9.923629829290208e-06, "loss": 27.5437, "step": 21598 }, { "epoch": 514.2626865671642, "grad_norm": 30.32841682434082, "learning_rate": 9.923180592991914e-06, "loss": 27.799, "step": 21599 }, { "epoch": 514.2865671641791, "grad_norm": 26.030811309814453, "learning_rate": 9.922731356693621e-06, "loss": 28.1499, "step": 21600 }, { "epoch": 514.310447761194, "grad_norm": 18.102516174316406, "learning_rate": 9.922282120395329e-06, "loss": 27.516, "step": 21601 }, { "epoch": 514.334328358209, "grad_norm": NaN, "learning_rate": 9.921832884097035e-06, "loss": 32.4994, "step": 21602 }, { "epoch": 514.3582089552239, "grad_norm": 26.100170135498047, "learning_rate": 9.921832884097035e-06, "loss": 26.666, "step": 21603 }, { "epoch": 514.3820895522388, "grad_norm": 29.493741989135742, "learning_rate": 9.921383647798743e-06, "loss": 27.283, "step": 21604 }, { "epoch": 514.4059701492537, "grad_norm": 23.393545150756836, "learning_rate": 9.92093441150045e-06, "loss": 27.6182, "step": 21605 }, { "epoch": 514.4298507462687, "grad_norm": 17.65104103088379, "learning_rate": 9.920485175202157e-06, "loss": 27.1788, "step": 21606 }, { "epoch": 514.4537313432836, "grad_norm": 31.139450073242188, "learning_rate": 9.920035938903864e-06, "loss": 27.755, "step": 21607 }, { "epoch": 514.4776119402985, "grad_norm": 22.203067779541016, "learning_rate": 9.919586702605572e-06, "loss": 28.0266, "step": 21608 }, { "epoch": 514.5014925373134, "grad_norm": 23.504892349243164, "learning_rate": 9.919137466307278e-06, "loss": 28.1124, "step": 21609 }, { "epoch": 514.5253731343283, "grad_norm": 28.821104049682617, "learning_rate": 9.918688230008986e-06, "loss": 28.2345, "step": 21610 }, { "epoch": 514.5492537313432, "grad_norm": 28.62894058227539, "learning_rate": 9.918238993710693e-06, "loss": 27.5187, "step": 21611 }, { "epoch": 514.5731343283583, "grad_norm": 19.827516555786133, "learning_rate": 9.9177897574124e-06, "loss": 28.428, "step": 21612 }, { "epoch": 514.5970149253732, "grad_norm": 22.487863540649414, "learning_rate": 9.917340521114107e-06, "loss": 28.6042, "step": 21613 }, { "epoch": 514.6208955223881, "grad_norm": 31.087533950805664, "learning_rate": 9.916891284815813e-06, "loss": 28.7582, "step": 21614 }, { "epoch": 514.644776119403, "grad_norm": 21.041988372802734, "learning_rate": 9.91644204851752e-06, "loss": 26.9013, "step": 21615 }, { "epoch": 514.6686567164179, "grad_norm": 25.78226089477539, "learning_rate": 9.915992812219228e-06, "loss": 27.4522, "step": 21616 }, { "epoch": 514.6925373134328, "grad_norm": 28.856327056884766, "learning_rate": 9.915543575920934e-06, "loss": 28.3021, "step": 21617 }, { "epoch": 514.7164179104477, "grad_norm": 21.69150161743164, "learning_rate": 9.915094339622642e-06, "loss": 27.3098, "step": 21618 }, { "epoch": 514.7402985074627, "grad_norm": 22.89239501953125, "learning_rate": 9.91464510332435e-06, "loss": 27.4647, "step": 21619 }, { "epoch": 514.7641791044776, "grad_norm": 29.973249435424805, "learning_rate": 9.914195867026056e-06, "loss": 27.4625, "step": 21620 }, { "epoch": 514.7880597014926, "grad_norm": 21.247161865234375, "learning_rate": 9.913746630727764e-06, "loss": 28.1523, "step": 21621 }, { "epoch": 514.8119402985075, "grad_norm": 21.955209732055664, "learning_rate": 9.913297394429471e-06, "loss": 28.3169, "step": 21622 }, { "epoch": 514.8358208955224, "grad_norm": 30.859249114990234, "learning_rate": 9.912848158131177e-06, "loss": 27.6755, "step": 21623 }, { "epoch": 514.8597014925373, "grad_norm": 23.869352340698242, "learning_rate": 9.912398921832885e-06, "loss": 27.5127, "step": 21624 }, { "epoch": 514.8835820895522, "grad_norm": 22.109182357788086, "learning_rate": 9.911949685534593e-06, "loss": 28.2989, "step": 21625 }, { "epoch": 514.9074626865672, "grad_norm": 25.04633331298828, "learning_rate": 9.911500449236299e-06, "loss": 27.8582, "step": 21626 }, { "epoch": 514.9313432835821, "grad_norm": 22.078144073486328, "learning_rate": 9.911051212938006e-06, "loss": 28.7317, "step": 21627 }, { "epoch": 514.955223880597, "grad_norm": 20.70109748840332, "learning_rate": 9.910601976639714e-06, "loss": 27.3195, "step": 21628 }, { "epoch": 514.9791044776119, "grad_norm": 28.343643188476562, "learning_rate": 9.91015274034142e-06, "loss": 28.7183, "step": 21629 }, { "epoch": 515.0, "grad_norm": 26.17251205444336, "learning_rate": 9.909703504043128e-06, "loss": 24.7953, "step": 21630 }, { "epoch": 515.0238805970149, "grad_norm": NaN, "learning_rate": 9.909254267744834e-06, "loss": 42.5596, "step": 21631 }, { "epoch": 515.0477611940298, "grad_norm": 20.282733917236328, "learning_rate": 9.909254267744834e-06, "loss": 27.2335, "step": 21632 }, { "epoch": 515.0716417910447, "grad_norm": 24.667251586914062, "learning_rate": 9.908805031446541e-06, "loss": 27.9227, "step": 21633 }, { "epoch": 515.0955223880597, "grad_norm": 28.468549728393555, "learning_rate": 9.908355795148249e-06, "loss": 27.6596, "step": 21634 }, { "epoch": 515.1194029850747, "grad_norm": 20.9326114654541, "learning_rate": 9.907906558849955e-06, "loss": 27.4309, "step": 21635 }, { "epoch": 515.1432835820896, "grad_norm": 25.70855140686035, "learning_rate": 9.907457322551663e-06, "loss": 28.5297, "step": 21636 }, { "epoch": 515.1671641791045, "grad_norm": 28.05694007873535, "learning_rate": 9.90700808625337e-06, "loss": 26.9682, "step": 21637 }, { "epoch": 515.1910447761194, "grad_norm": 22.081872940063477, "learning_rate": 9.906558849955077e-06, "loss": 27.0309, "step": 21638 }, { "epoch": 515.2149253731343, "grad_norm": 26.45865821838379, "learning_rate": 9.906109613656784e-06, "loss": 28.9902, "step": 21639 }, { "epoch": 515.2388059701492, "grad_norm": 23.216976165771484, "learning_rate": 9.905660377358492e-06, "loss": 28.7427, "step": 21640 }, { "epoch": 515.2626865671642, "grad_norm": 27.242130279541016, "learning_rate": 9.905211141060198e-06, "loss": 27.5055, "step": 21641 }, { "epoch": 515.2865671641791, "grad_norm": 21.991708755493164, "learning_rate": 9.904761904761906e-06, "loss": 27.5207, "step": 21642 }, { "epoch": 515.310447761194, "grad_norm": 22.885717391967773, "learning_rate": 9.904312668463613e-06, "loss": 27.8432, "step": 21643 }, { "epoch": 515.334328358209, "grad_norm": 33.125545501708984, "learning_rate": 9.90386343216532e-06, "loss": 27.3549, "step": 21644 }, { "epoch": 515.3582089552239, "grad_norm": 22.39228630065918, "learning_rate": 9.903414195867027e-06, "loss": 27.0835, "step": 21645 }, { "epoch": 515.3820895522388, "grad_norm": 37.384681701660156, "learning_rate": 9.902964959568733e-06, "loss": 28.1108, "step": 21646 }, { "epoch": 515.4059701492537, "grad_norm": 31.38356590270996, "learning_rate": 9.90251572327044e-06, "loss": 28.2852, "step": 21647 }, { "epoch": 515.4298507462687, "grad_norm": 26.012630462646484, "learning_rate": 9.902066486972148e-06, "loss": 27.1584, "step": 21648 }, { "epoch": 515.4537313432836, "grad_norm": 29.484516143798828, "learning_rate": 9.901617250673854e-06, "loss": 28.4994, "step": 21649 }, { "epoch": 515.4776119402985, "grad_norm": 25.3248291015625, "learning_rate": 9.901168014375562e-06, "loss": 27.7003, "step": 21650 }, { "epoch": 515.5014925373134, "grad_norm": 20.70560073852539, "learning_rate": 9.90071877807727e-06, "loss": 26.9506, "step": 21651 }, { "epoch": 515.5253731343283, "grad_norm": 31.545900344848633, "learning_rate": 9.900269541778976e-06, "loss": 27.4308, "step": 21652 }, { "epoch": 515.5492537313432, "grad_norm": 22.4016056060791, "learning_rate": 9.899820305480683e-06, "loss": 27.8674, "step": 21653 }, { "epoch": 515.5731343283583, "grad_norm": 29.844066619873047, "learning_rate": 9.899371069182391e-06, "loss": 25.8312, "step": 21654 }, { "epoch": 515.5970149253732, "grad_norm": 29.098264694213867, "learning_rate": 9.898921832884097e-06, "loss": 27.8961, "step": 21655 }, { "epoch": 515.6208955223881, "grad_norm": 20.760007858276367, "learning_rate": 9.898472596585805e-06, "loss": 27.2201, "step": 21656 }, { "epoch": 515.644776119403, "grad_norm": 33.20096206665039, "learning_rate": 9.898023360287513e-06, "loss": 27.7175, "step": 21657 }, { "epoch": 515.6686567164179, "grad_norm": 22.978384017944336, "learning_rate": 9.897574123989219e-06, "loss": 27.6497, "step": 21658 }, { "epoch": 515.6925373134328, "grad_norm": 33.304534912109375, "learning_rate": 9.897124887690926e-06, "loss": 27.7168, "step": 21659 }, { "epoch": 515.7164179104477, "grad_norm": 25.812091827392578, "learning_rate": 9.896675651392634e-06, "loss": 28.5152, "step": 21660 }, { "epoch": 515.7402985074627, "grad_norm": 23.090736389160156, "learning_rate": 9.89622641509434e-06, "loss": 26.5377, "step": 21661 }, { "epoch": 515.7641791044776, "grad_norm": 30.844602584838867, "learning_rate": 9.895777178796048e-06, "loss": 27.1885, "step": 21662 }, { "epoch": 515.7880597014926, "grad_norm": 24.319456100463867, "learning_rate": 9.895327942497754e-06, "loss": 28.1109, "step": 21663 }, { "epoch": 515.8119402985075, "grad_norm": 23.7265625, "learning_rate": 9.894878706199461e-06, "loss": 29.4844, "step": 21664 }, { "epoch": 515.8358208955224, "grad_norm": 28.06551742553711, "learning_rate": 9.894429469901169e-06, "loss": 28.1404, "step": 21665 }, { "epoch": 515.8597014925373, "grad_norm": 22.02609634399414, "learning_rate": 9.893980233602875e-06, "loss": 28.4072, "step": 21666 }, { "epoch": 515.8835820895522, "grad_norm": 19.22784423828125, "learning_rate": 9.893530997304583e-06, "loss": 27.3256, "step": 21667 }, { "epoch": 515.9074626865672, "grad_norm": 26.322189331054688, "learning_rate": 9.89308176100629e-06, "loss": 27.2069, "step": 21668 }, { "epoch": 515.9313432835821, "grad_norm": 21.456789016723633, "learning_rate": 9.892632524707996e-06, "loss": 28.1412, "step": 21669 }, { "epoch": 515.955223880597, "grad_norm": 18.581209182739258, "learning_rate": 9.892183288409704e-06, "loss": 28.3791, "step": 21670 }, { "epoch": 515.9791044776119, "grad_norm": 26.211307525634766, "learning_rate": 9.891734052111412e-06, "loss": 27.6167, "step": 21671 }, { "epoch": 516.0, "grad_norm": 21.054927825927734, "learning_rate": 9.891284815813118e-06, "loss": 24.857, "step": 21672 }, { "epoch": 516.0238805970149, "grad_norm": 20.497390747070312, "learning_rate": 9.890835579514826e-06, "loss": 28.3948, "step": 21673 }, { "epoch": 516.0477611940298, "grad_norm": 24.384416580200195, "learning_rate": 9.890386343216533e-06, "loss": 25.9488, "step": 21674 }, { "epoch": 516.0716417910447, "grad_norm": 27.981292724609375, "learning_rate": 9.88993710691824e-06, "loss": 28.1365, "step": 21675 }, { "epoch": 516.0955223880597, "grad_norm": 18.89853858947754, "learning_rate": 9.889487870619947e-06, "loss": 28.543, "step": 21676 }, { "epoch": 516.1194029850747, "grad_norm": 31.437824249267578, "learning_rate": 9.889038634321653e-06, "loss": 28.4984, "step": 21677 }, { "epoch": 516.1432835820896, "grad_norm": 28.440996170043945, "learning_rate": 9.888589398023362e-06, "loss": 28.2415, "step": 21678 }, { "epoch": 516.1671641791045, "grad_norm": 19.629634857177734, "learning_rate": 9.888140161725068e-06, "loss": 27.7257, "step": 21679 }, { "epoch": 516.1910447761194, "grad_norm": 32.61162185668945, "learning_rate": 9.887690925426774e-06, "loss": 27.0916, "step": 21680 }, { "epoch": 516.2149253731343, "grad_norm": 24.415966033935547, "learning_rate": 9.887241689128482e-06, "loss": 27.419, "step": 21681 }, { "epoch": 516.2388059701492, "grad_norm": 20.411731719970703, "learning_rate": 9.88679245283019e-06, "loss": 28.0113, "step": 21682 }, { "epoch": 516.2626865671642, "grad_norm": 32.67917251586914, "learning_rate": 9.886343216531896e-06, "loss": 28.8901, "step": 21683 }, { "epoch": 516.2865671641791, "grad_norm": 24.923175811767578, "learning_rate": 9.885893980233603e-06, "loss": 27.6355, "step": 21684 }, { "epoch": 516.310447761194, "grad_norm": 22.664600372314453, "learning_rate": 9.885444743935311e-06, "loss": 28.37, "step": 21685 }, { "epoch": 516.334328358209, "grad_norm": 32.668548583984375, "learning_rate": 9.884995507637017e-06, "loss": 27.6649, "step": 21686 }, { "epoch": 516.3582089552239, "grad_norm": 21.361717224121094, "learning_rate": 9.884546271338725e-06, "loss": 27.6061, "step": 21687 }, { "epoch": 516.3820895522388, "grad_norm": 27.900144577026367, "learning_rate": 9.884097035040433e-06, "loss": 27.7867, "step": 21688 }, { "epoch": 516.4059701492537, "grad_norm": 28.56827163696289, "learning_rate": 9.883647798742139e-06, "loss": 26.7692, "step": 21689 }, { "epoch": 516.4298507462687, "grad_norm": 18.844985961914062, "learning_rate": 9.883198562443846e-06, "loss": 27.6026, "step": 21690 }, { "epoch": 516.4537313432836, "grad_norm": 31.13259506225586, "learning_rate": 9.882749326145554e-06, "loss": 26.4714, "step": 21691 }, { "epoch": 516.4776119402985, "grad_norm": 27.423490524291992, "learning_rate": 9.882300089847262e-06, "loss": 27.2318, "step": 21692 }, { "epoch": 516.5014925373134, "grad_norm": 20.842519760131836, "learning_rate": 9.881850853548968e-06, "loss": 27.1159, "step": 21693 }, { "epoch": 516.5253731343283, "grad_norm": 29.76698875427246, "learning_rate": 9.881401617250674e-06, "loss": 27.4943, "step": 21694 }, { "epoch": 516.5492537313432, "grad_norm": 23.085983276367188, "learning_rate": 9.880952380952381e-06, "loss": 27.6944, "step": 21695 }, { "epoch": 516.5731343283583, "grad_norm": 29.604248046875, "learning_rate": 9.880503144654089e-06, "loss": 28.3449, "step": 21696 }, { "epoch": 516.5970149253732, "grad_norm": 30.173906326293945, "learning_rate": 9.880053908355795e-06, "loss": 27.5993, "step": 21697 }, { "epoch": 516.6208955223881, "grad_norm": 24.564701080322266, "learning_rate": 9.879604672057503e-06, "loss": 27.6989, "step": 21698 }, { "epoch": 516.644776119403, "grad_norm": 23.60506820678711, "learning_rate": 9.87915543575921e-06, "loss": 27.4608, "step": 21699 }, { "epoch": 516.6686567164179, "grad_norm": 31.178543090820312, "learning_rate": 9.878706199460916e-06, "loss": 27.0965, "step": 21700 }, { "epoch": 516.6925373134328, "grad_norm": NaN, "learning_rate": 9.878256963162624e-06, "loss": 42.6145, "step": 21701 }, { "epoch": 516.7164179104477, "grad_norm": 24.332061767578125, "learning_rate": 9.878256963162624e-06, "loss": 28.3085, "step": 21702 }, { "epoch": 516.7402985074627, "grad_norm": 27.74613380432129, "learning_rate": 9.877807726864332e-06, "loss": 28.3491, "step": 21703 }, { "epoch": 516.7641791044776, "grad_norm": 30.60664176940918, "learning_rate": 9.877358490566038e-06, "loss": 28.5062, "step": 21704 }, { "epoch": 516.7880597014926, "grad_norm": 21.864852905273438, "learning_rate": 9.876909254267746e-06, "loss": 26.9536, "step": 21705 }, { "epoch": 516.8119402985075, "grad_norm": 25.763607025146484, "learning_rate": 9.876460017969453e-06, "loss": 27.0184, "step": 21706 }, { "epoch": 516.8358208955224, "grad_norm": 26.926694869995117, "learning_rate": 9.876010781671161e-06, "loss": 27.3287, "step": 21707 }, { "epoch": 516.8597014925373, "grad_norm": 21.655099868774414, "learning_rate": 9.875561545372867e-06, "loss": 27.9798, "step": 21708 }, { "epoch": 516.8835820895522, "grad_norm": 27.325164794921875, "learning_rate": 9.875112309074573e-06, "loss": 27.3776, "step": 21709 }, { "epoch": 516.9074626865672, "grad_norm": 23.568622589111328, "learning_rate": 9.874663072776282e-06, "loss": 27.8209, "step": 21710 }, { "epoch": 516.9313432835821, "grad_norm": 24.443635940551758, "learning_rate": 9.874213836477988e-06, "loss": 27.6849, "step": 21711 }, { "epoch": 516.955223880597, "grad_norm": 27.272857666015625, "learning_rate": 9.873764600179694e-06, "loss": 27.9861, "step": 21712 }, { "epoch": 516.9791044776119, "grad_norm": 26.47084617614746, "learning_rate": 9.873315363881402e-06, "loss": 28.8347, "step": 21713 }, { "epoch": 517.0, "grad_norm": 21.795198440551758, "learning_rate": 9.87286612758311e-06, "loss": 24.311, "step": 21714 }, { "epoch": 517.0238805970149, "grad_norm": 28.518024444580078, "learning_rate": 9.872416891284816e-06, "loss": 27.8603, "step": 21715 }, { "epoch": 517.0477611940298, "grad_norm": 23.930618286132812, "learning_rate": 9.871967654986523e-06, "loss": 28.9695, "step": 21716 }, { "epoch": 517.0716417910447, "grad_norm": 22.76991081237793, "learning_rate": 9.871518418688231e-06, "loss": 27.607, "step": 21717 }, { "epoch": 517.0955223880597, "grad_norm": 25.707523345947266, "learning_rate": 9.871069182389937e-06, "loss": 27.0297, "step": 21718 }, { "epoch": 517.1194029850747, "grad_norm": 32.72481155395508, "learning_rate": 9.870619946091645e-06, "loss": 27.4762, "step": 21719 }, { "epoch": 517.1432835820896, "grad_norm": 21.89505386352539, "learning_rate": 9.870170709793353e-06, "loss": 28.0655, "step": 21720 }, { "epoch": 517.1671641791045, "grad_norm": 31.385147094726562, "learning_rate": 9.86972147349506e-06, "loss": 27.5956, "step": 21721 }, { "epoch": 517.1910447761194, "grad_norm": 25.03203010559082, "learning_rate": 9.869272237196766e-06, "loss": 26.548, "step": 21722 }, { "epoch": 517.2149253731343, "grad_norm": 23.157451629638672, "learning_rate": 9.868823000898474e-06, "loss": 28.3766, "step": 21723 }, { "epoch": 517.2388059701492, "grad_norm": 23.48427963256836, "learning_rate": 9.868373764600182e-06, "loss": 27.1991, "step": 21724 }, { "epoch": 517.2626865671642, "grad_norm": 28.257354736328125, "learning_rate": 9.867924528301888e-06, "loss": 27.5017, "step": 21725 }, { "epoch": 517.2865671641791, "grad_norm": 20.039213180541992, "learning_rate": 9.867475292003594e-06, "loss": 27.6491, "step": 21726 }, { "epoch": 517.310447761194, "grad_norm": 27.618616104125977, "learning_rate": 9.867026055705301e-06, "loss": 28.4576, "step": 21727 }, { "epoch": 517.334328358209, "grad_norm": 24.134733200073242, "learning_rate": 9.866576819407009e-06, "loss": 27.3578, "step": 21728 }, { "epoch": 517.3582089552239, "grad_norm": 27.075977325439453, "learning_rate": 9.866127583108715e-06, "loss": 28.1224, "step": 21729 }, { "epoch": 517.3820895522388, "grad_norm": 22.33618927001953, "learning_rate": 9.865678346810423e-06, "loss": 28.7257, "step": 21730 }, { "epoch": 517.4059701492537, "grad_norm": 25.295019149780273, "learning_rate": 9.86522911051213e-06, "loss": 28.0239, "step": 21731 }, { "epoch": 517.4298507462687, "grad_norm": 23.56560707092285, "learning_rate": 9.864779874213836e-06, "loss": 27.6616, "step": 21732 }, { "epoch": 517.4537313432836, "grad_norm": 22.875019073486328, "learning_rate": 9.864330637915544e-06, "loss": 26.8025, "step": 21733 }, { "epoch": 517.4776119402985, "grad_norm": 24.476356506347656, "learning_rate": 9.863881401617252e-06, "loss": 28.5032, "step": 21734 }, { "epoch": 517.5014925373134, "grad_norm": 20.47708511352539, "learning_rate": 9.86343216531896e-06, "loss": 27.7381, "step": 21735 }, { "epoch": 517.5253731343283, "grad_norm": 19.082660675048828, "learning_rate": 9.862982929020666e-06, "loss": 26.3483, "step": 21736 }, { "epoch": 517.5492537313432, "grad_norm": 21.372356414794922, "learning_rate": 9.862533692722373e-06, "loss": 27.5711, "step": 21737 }, { "epoch": 517.5731343283583, "grad_norm": 20.410852432250977, "learning_rate": 9.862084456424081e-06, "loss": 28.252, "step": 21738 }, { "epoch": 517.5970149253732, "grad_norm": 19.973249435424805, "learning_rate": 9.861635220125787e-06, "loss": 27.2461, "step": 21739 }, { "epoch": 517.6208955223881, "grad_norm": 27.540544509887695, "learning_rate": 9.861185983827493e-06, "loss": 28.6237, "step": 21740 }, { "epoch": 517.644776119403, "grad_norm": 23.465295791625977, "learning_rate": 9.860736747529202e-06, "loss": 27.7936, "step": 21741 }, { "epoch": 517.6686567164179, "grad_norm": 23.674266815185547, "learning_rate": 9.860287511230908e-06, "loss": 27.6564, "step": 21742 }, { "epoch": 517.6925373134328, "grad_norm": 22.25040626525879, "learning_rate": 9.859838274932614e-06, "loss": 28.4454, "step": 21743 }, { "epoch": 517.7164179104477, "grad_norm": 20.566673278808594, "learning_rate": 9.859389038634322e-06, "loss": 28.1205, "step": 21744 }, { "epoch": 517.7402985074627, "grad_norm": 20.69699478149414, "learning_rate": 9.85893980233603e-06, "loss": 27.9536, "step": 21745 }, { "epoch": 517.7641791044776, "grad_norm": 19.827260971069336, "learning_rate": 9.858490566037736e-06, "loss": 26.9161, "step": 21746 }, { "epoch": 517.7880597014926, "grad_norm": 19.385042190551758, "learning_rate": 9.858041329739443e-06, "loss": 27.8164, "step": 21747 }, { "epoch": 517.8119402985075, "grad_norm": 22.353933334350586, "learning_rate": 9.857592093441151e-06, "loss": 27.3796, "step": 21748 }, { "epoch": 517.8358208955224, "grad_norm": 26.337583541870117, "learning_rate": 9.857142857142859e-06, "loss": 27.2904, "step": 21749 }, { "epoch": 517.8597014925373, "grad_norm": 21.569988250732422, "learning_rate": 9.856693620844565e-06, "loss": 28.011, "step": 21750 }, { "epoch": 517.8835820895522, "grad_norm": 19.059200286865234, "learning_rate": 9.856244384546273e-06, "loss": 28.0632, "step": 21751 }, { "epoch": 517.9074626865672, "grad_norm": 22.718130111694336, "learning_rate": 9.85579514824798e-06, "loss": 27.4308, "step": 21752 }, { "epoch": 517.9313432835821, "grad_norm": 22.16653060913086, "learning_rate": 9.855345911949686e-06, "loss": 27.1883, "step": 21753 }, { "epoch": 517.955223880597, "grad_norm": 23.898616790771484, "learning_rate": 9.854896675651392e-06, "loss": 27.7217, "step": 21754 }, { "epoch": 517.9791044776119, "grad_norm": 17.923721313476562, "learning_rate": 9.854447439353102e-06, "loss": 27.61, "step": 21755 }, { "epoch": 518.0, "grad_norm": 20.62942886352539, "learning_rate": 9.853998203054808e-06, "loss": 23.3875, "step": 21756 }, { "epoch": 518.0238805970149, "grad_norm": 29.906103134155273, "learning_rate": 9.853548966756514e-06, "loss": 27.6396, "step": 21757 }, { "epoch": 518.0477611940298, "grad_norm": 22.50103759765625, "learning_rate": 9.853099730458221e-06, "loss": 27.0246, "step": 21758 }, { "epoch": 518.0716417910447, "grad_norm": 19.861974716186523, "learning_rate": 9.852650494159929e-06, "loss": 28.6889, "step": 21759 }, { "epoch": 518.0955223880597, "grad_norm": 27.074562072753906, "learning_rate": 9.852201257861635e-06, "loss": 27.9506, "step": 21760 }, { "epoch": 518.1194029850747, "grad_norm": 22.602731704711914, "learning_rate": 9.851752021563343e-06, "loss": 28.2499, "step": 21761 }, { "epoch": 518.1432835820896, "grad_norm": 24.860353469848633, "learning_rate": 9.85130278526505e-06, "loss": 28.0795, "step": 21762 }, { "epoch": 518.1671641791045, "grad_norm": 23.33847427368164, "learning_rate": 9.850853548966758e-06, "loss": 26.7023, "step": 21763 }, { "epoch": 518.1910447761194, "grad_norm": 22.403915405273438, "learning_rate": 9.850404312668464e-06, "loss": 26.9584, "step": 21764 }, { "epoch": 518.2149253731343, "grad_norm": 28.278718948364258, "learning_rate": 9.849955076370172e-06, "loss": 28.0283, "step": 21765 }, { "epoch": 518.2388059701492, "grad_norm": 19.93996810913086, "learning_rate": 9.84950584007188e-06, "loss": 27.0013, "step": 21766 }, { "epoch": 518.2626865671642, "grad_norm": 33.21797180175781, "learning_rate": 9.849056603773586e-06, "loss": 27.4757, "step": 21767 }, { "epoch": 518.2865671641791, "grad_norm": 26.072654724121094, "learning_rate": 9.848607367475293e-06, "loss": 27.717, "step": 21768 }, { "epoch": 518.310447761194, "grad_norm": 25.370380401611328, "learning_rate": 9.848158131177001e-06, "loss": 29.1331, "step": 21769 }, { "epoch": 518.334328358209, "grad_norm": 27.19320297241211, "learning_rate": 9.847708894878707e-06, "loss": 27.3321, "step": 21770 }, { "epoch": 518.3582089552239, "grad_norm": 26.28431510925293, "learning_rate": 9.847259658580413e-06, "loss": 27.4208, "step": 21771 }, { "epoch": 518.3820895522388, "grad_norm": 25.471406936645508, "learning_rate": 9.84681042228212e-06, "loss": 28.7764, "step": 21772 }, { "epoch": 518.4059701492537, "grad_norm": 18.700939178466797, "learning_rate": 9.846361185983828e-06, "loss": 26.67, "step": 21773 }, { "epoch": 518.4298507462687, "grad_norm": 26.55215835571289, "learning_rate": 9.845911949685534e-06, "loss": 27.0883, "step": 21774 }, { "epoch": 518.4537313432836, "grad_norm": 24.064233779907227, "learning_rate": 9.845462713387242e-06, "loss": 27.3547, "step": 21775 }, { "epoch": 518.4776119402985, "grad_norm": 24.784257888793945, "learning_rate": 9.84501347708895e-06, "loss": 28.1824, "step": 21776 }, { "epoch": 518.5014925373134, "grad_norm": 20.096349716186523, "learning_rate": 9.844564240790657e-06, "loss": 27.637, "step": 21777 }, { "epoch": 518.5253731343283, "grad_norm": 24.399246215820312, "learning_rate": 9.844115004492363e-06, "loss": 28.3638, "step": 21778 }, { "epoch": 518.5492537313432, "grad_norm": 29.41051483154297, "learning_rate": 9.843665768194071e-06, "loss": 27.6907, "step": 21779 }, { "epoch": 518.5731343283583, "grad_norm": 26.938379287719727, "learning_rate": 9.843216531895779e-06, "loss": 27.6042, "step": 21780 }, { "epoch": 518.5970149253732, "grad_norm": 20.671375274658203, "learning_rate": 9.842767295597485e-06, "loss": 26.9389, "step": 21781 }, { "epoch": 518.6208955223881, "grad_norm": 27.31082534790039, "learning_rate": 9.842318059299193e-06, "loss": 28.6122, "step": 21782 }, { "epoch": 518.644776119403, "grad_norm": 32.02412414550781, "learning_rate": 9.8418688230009e-06, "loss": 27.8036, "step": 21783 }, { "epoch": 518.6686567164179, "grad_norm": 19.73435401916504, "learning_rate": 9.841419586702606e-06, "loss": 27.6352, "step": 21784 }, { "epoch": 518.6925373134328, "grad_norm": 28.182241439819336, "learning_rate": 9.840970350404312e-06, "loss": 27.86, "step": 21785 }, { "epoch": 518.7164179104477, "grad_norm": 29.98647117614746, "learning_rate": 9.840521114106022e-06, "loss": 27.8816, "step": 21786 }, { "epoch": 518.7402985074627, "grad_norm": 18.701396942138672, "learning_rate": 9.840071877807728e-06, "loss": 29.0445, "step": 21787 }, { "epoch": 518.7641791044776, "grad_norm": 26.623016357421875, "learning_rate": 9.839622641509434e-06, "loss": 28.0583, "step": 21788 }, { "epoch": 518.7880597014926, "grad_norm": 27.627317428588867, "learning_rate": 9.839173405211141e-06, "loss": 27.0538, "step": 21789 }, { "epoch": 518.8119402985075, "grad_norm": 18.918073654174805, "learning_rate": 9.838724168912849e-06, "loss": 26.773, "step": 21790 }, { "epoch": 518.8358208955224, "grad_norm": 30.508647918701172, "learning_rate": 9.838274932614557e-06, "loss": 28.3289, "step": 21791 }, { "epoch": 518.8597014925373, "grad_norm": 24.855390548706055, "learning_rate": 9.837825696316263e-06, "loss": 27.9811, "step": 21792 }, { "epoch": 518.8835820895522, "grad_norm": 19.293182373046875, "learning_rate": 9.83737646001797e-06, "loss": 27.0579, "step": 21793 }, { "epoch": 518.9074626865672, "grad_norm": 25.082075119018555, "learning_rate": 9.836927223719678e-06, "loss": 27.8153, "step": 21794 }, { "epoch": 518.9313432835821, "grad_norm": 24.83421516418457, "learning_rate": 9.836477987421384e-06, "loss": 26.2309, "step": 21795 }, { "epoch": 518.955223880597, "grad_norm": 21.51737403869629, "learning_rate": 9.836028751123092e-06, "loss": 27.5154, "step": 21796 }, { "epoch": 518.9791044776119, "grad_norm": 23.787391662597656, "learning_rate": 9.8355795148248e-06, "loss": 27.2786, "step": 21797 }, { "epoch": 519.0, "grad_norm": 20.682092666625977, "learning_rate": 9.835130278526506e-06, "loss": 24.1076, "step": 21798 }, { "epoch": 519.0238805970149, "grad_norm": 20.79419708251953, "learning_rate": 9.834681042228213e-06, "loss": 26.7232, "step": 21799 }, { "epoch": 519.0477611940298, "grad_norm": 25.15159797668457, "learning_rate": 9.834231805929921e-06, "loss": 27.6666, "step": 21800 }, { "epoch": 519.0716417910447, "grad_norm": 23.570877075195312, "learning_rate": 9.833782569631627e-06, "loss": 27.3374, "step": 21801 }, { "epoch": 519.0955223880597, "grad_norm": 24.737117767333984, "learning_rate": 9.833333333333333e-06, "loss": 28.3914, "step": 21802 }, { "epoch": 519.1194029850747, "grad_norm": 18.356201171875, "learning_rate": 9.83288409703504e-06, "loss": 27.1612, "step": 21803 }, { "epoch": 519.1432835820896, "grad_norm": 22.09642219543457, "learning_rate": 9.832434860736748e-06, "loss": 26.5864, "step": 21804 }, { "epoch": 519.1671641791045, "grad_norm": 28.149484634399414, "learning_rate": 9.831985624438456e-06, "loss": 27.8215, "step": 21805 }, { "epoch": 519.1910447761194, "grad_norm": 26.350130081176758, "learning_rate": 9.831536388140162e-06, "loss": 27.8425, "step": 21806 }, { "epoch": 519.2149253731343, "grad_norm": 20.825870513916016, "learning_rate": 9.83108715184187e-06, "loss": 27.5822, "step": 21807 }, { "epoch": 519.2388059701492, "grad_norm": 19.48748779296875, "learning_rate": 9.830637915543577e-06, "loss": 27.4396, "step": 21808 }, { "epoch": 519.2626865671642, "grad_norm": 29.32198143005371, "learning_rate": 9.830188679245283e-06, "loss": 27.8819, "step": 21809 }, { "epoch": 519.2865671641791, "grad_norm": 21.654258728027344, "learning_rate": 9.829739442946991e-06, "loss": 28.4951, "step": 21810 }, { "epoch": 519.310447761194, "grad_norm": 18.36747932434082, "learning_rate": 9.829290206648699e-06, "loss": 27.8675, "step": 21811 }, { "epoch": 519.334328358209, "grad_norm": 21.358001708984375, "learning_rate": 9.828840970350405e-06, "loss": 27.4426, "step": 21812 }, { "epoch": 519.3582089552239, "grad_norm": 18.967296600341797, "learning_rate": 9.828391734052113e-06, "loss": 27.118, "step": 21813 }, { "epoch": 519.3820895522388, "grad_norm": 28.558290481567383, "learning_rate": 9.82794249775382e-06, "loss": 27.1918, "step": 21814 }, { "epoch": 519.4059701492537, "grad_norm": 24.092529296875, "learning_rate": 9.827493261455526e-06, "loss": 27.8899, "step": 21815 }, { "epoch": 519.4298507462687, "grad_norm": 19.331336975097656, "learning_rate": 9.827044025157232e-06, "loss": 28.2928, "step": 21816 }, { "epoch": 519.4537313432836, "grad_norm": 23.73537254333496, "learning_rate": 9.826594788858942e-06, "loss": 27.9544, "step": 21817 }, { "epoch": 519.4776119402985, "grad_norm": 30.279638290405273, "learning_rate": 9.826145552560648e-06, "loss": 27.3553, "step": 21818 }, { "epoch": 519.5014925373134, "grad_norm": 20.56175422668457, "learning_rate": 9.825696316262355e-06, "loss": 27.7656, "step": 21819 }, { "epoch": 519.5253731343283, "grad_norm": 19.15595054626465, "learning_rate": 9.825247079964061e-06, "loss": 27.1821, "step": 21820 }, { "epoch": 519.5492537313432, "grad_norm": 20.788070678710938, "learning_rate": 9.824797843665769e-06, "loss": 27.7529, "step": 21821 }, { "epoch": 519.5731343283583, "grad_norm": 21.661048889160156, "learning_rate": 9.824348607367477e-06, "loss": 27.5209, "step": 21822 }, { "epoch": 519.5970149253732, "grad_norm": 20.179365158081055, "learning_rate": 9.823899371069183e-06, "loss": 27.3203, "step": 21823 }, { "epoch": 519.6208955223881, "grad_norm": 27.38581657409668, "learning_rate": 9.82345013477089e-06, "loss": 28.3245, "step": 21824 }, { "epoch": 519.644776119403, "grad_norm": 22.046419143676758, "learning_rate": 9.823000898472598e-06, "loss": 26.8216, "step": 21825 }, { "epoch": 519.6686567164179, "grad_norm": 24.190542221069336, "learning_rate": 9.822551662174304e-06, "loss": 28.533, "step": 21826 }, { "epoch": 519.6925373134328, "grad_norm": 18.79587173461914, "learning_rate": 9.822102425876012e-06, "loss": 27.4371, "step": 21827 }, { "epoch": 519.7164179104477, "grad_norm": 22.078031539916992, "learning_rate": 9.82165318957772e-06, "loss": 27.0266, "step": 21828 }, { "epoch": 519.7402985074627, "grad_norm": 31.198925018310547, "learning_rate": 9.821203953279426e-06, "loss": 26.884, "step": 21829 }, { "epoch": 519.7641791044776, "grad_norm": 20.493186950683594, "learning_rate": 9.820754716981133e-06, "loss": 28.1416, "step": 21830 }, { "epoch": 519.7880597014926, "grad_norm": 25.465431213378906, "learning_rate": 9.820305480682841e-06, "loss": 26.6509, "step": 21831 }, { "epoch": 519.8119402985075, "grad_norm": 23.525535583496094, "learning_rate": 9.819856244384547e-06, "loss": 28.3868, "step": 21832 }, { "epoch": 519.8358208955224, "grad_norm": 26.357696533203125, "learning_rate": 9.819407008086255e-06, "loss": 27.9333, "step": 21833 }, { "epoch": 519.8597014925373, "grad_norm": 18.746950149536133, "learning_rate": 9.81895777178796e-06, "loss": 28.0843, "step": 21834 }, { "epoch": 519.8835820895522, "grad_norm": 29.76654624938965, "learning_rate": 9.818508535489668e-06, "loss": 28.4486, "step": 21835 }, { "epoch": 519.9074626865672, "grad_norm": 23.920286178588867, "learning_rate": 9.818059299191376e-06, "loss": 28.1144, "step": 21836 }, { "epoch": 519.9313432835821, "grad_norm": 26.739810943603516, "learning_rate": 9.817610062893082e-06, "loss": 27.5082, "step": 21837 }, { "epoch": 519.955223880597, "grad_norm": 23.01997947692871, "learning_rate": 9.81716082659479e-06, "loss": 28.07, "step": 21838 }, { "epoch": 519.9791044776119, "grad_norm": 26.788480758666992, "learning_rate": 9.816711590296497e-06, "loss": 27.1742, "step": 21839 }, { "epoch": 520.0, "grad_norm": 19.99673080444336, "learning_rate": 9.816262353998203e-06, "loss": 24.263, "step": 21840 }, { "epoch": 520.0238805970149, "grad_norm": 26.185970306396484, "learning_rate": 9.815813117699911e-06, "loss": 28.505, "step": 21841 }, { "epoch": 520.0477611940298, "grad_norm": 22.54168128967285, "learning_rate": 9.815363881401619e-06, "loss": 26.8569, "step": 21842 }, { "epoch": 520.0716417910447, "grad_norm": 25.119726181030273, "learning_rate": 9.814914645103325e-06, "loss": 27.2507, "step": 21843 }, { "epoch": 520.0955223880597, "grad_norm": 22.890592575073242, "learning_rate": 9.814465408805032e-06, "loss": 27.064, "step": 21844 }, { "epoch": 520.1194029850747, "grad_norm": 21.528884887695312, "learning_rate": 9.81401617250674e-06, "loss": 27.0784, "step": 21845 }, { "epoch": 520.1432835820896, "grad_norm": 24.57941436767578, "learning_rate": 9.813566936208446e-06, "loss": 27.9967, "step": 21846 }, { "epoch": 520.1671641791045, "grad_norm": 20.49301528930664, "learning_rate": 9.813117699910154e-06, "loss": 27.9727, "step": 21847 }, { "epoch": 520.1910447761194, "grad_norm": 25.044458389282227, "learning_rate": 9.812668463611862e-06, "loss": 26.9774, "step": 21848 }, { "epoch": 520.2149253731343, "grad_norm": 20.48120880126953, "learning_rate": 9.812219227313568e-06, "loss": 27.3512, "step": 21849 }, { "epoch": 520.2388059701492, "grad_norm": 23.395328521728516, "learning_rate": 9.811769991015275e-06, "loss": 27.7079, "step": 21850 }, { "epoch": 520.2626865671642, "grad_norm": 20.06919288635254, "learning_rate": 9.811320754716981e-06, "loss": 27.7774, "step": 21851 }, { "epoch": 520.2865671641791, "grad_norm": 23.317312240600586, "learning_rate": 9.810871518418689e-06, "loss": 27.4268, "step": 21852 }, { "epoch": 520.310447761194, "grad_norm": NaN, "learning_rate": 9.810422282120397e-06, "loss": 37.7559, "step": 21853 }, { "epoch": 520.334328358209, "grad_norm": 23.730548858642578, "learning_rate": 9.810422282120397e-06, "loss": 27.7594, "step": 21854 }, { "epoch": 520.3582089552239, "grad_norm": 24.764188766479492, "learning_rate": 9.809973045822103e-06, "loss": 26.4074, "step": 21855 }, { "epoch": 520.3820895522388, "grad_norm": 21.798688888549805, "learning_rate": 9.80952380952381e-06, "loss": 28.2252, "step": 21856 }, { "epoch": 520.4059701492537, "grad_norm": 21.538745880126953, "learning_rate": 9.809074573225518e-06, "loss": 28.915, "step": 21857 }, { "epoch": 520.4298507462687, "grad_norm": 23.203367233276367, "learning_rate": 9.808625336927224e-06, "loss": 27.0386, "step": 21858 }, { "epoch": 520.4537313432836, "grad_norm": 21.737985610961914, "learning_rate": 9.808176100628932e-06, "loss": 27.7507, "step": 21859 }, { "epoch": 520.4776119402985, "grad_norm": 20.488481521606445, "learning_rate": 9.80772686433064e-06, "loss": 27.0843, "step": 21860 }, { "epoch": 520.5014925373134, "grad_norm": 21.45555305480957, "learning_rate": 9.807277628032345e-06, "loss": 26.4768, "step": 21861 }, { "epoch": 520.5253731343283, "grad_norm": 19.95534896850586, "learning_rate": 9.806828391734053e-06, "loss": 27.4384, "step": 21862 }, { "epoch": 520.5492537313432, "grad_norm": 22.64781951904297, "learning_rate": 9.806379155435761e-06, "loss": 27.7817, "step": 21863 }, { "epoch": 520.5731343283583, "grad_norm": 27.715412139892578, "learning_rate": 9.805929919137467e-06, "loss": 27.0465, "step": 21864 }, { "epoch": 520.5970149253732, "grad_norm": 22.01350212097168, "learning_rate": 9.805480682839175e-06, "loss": 27.4622, "step": 21865 }, { "epoch": 520.6208955223881, "grad_norm": 18.924888610839844, "learning_rate": 9.80503144654088e-06, "loss": 27.909, "step": 21866 }, { "epoch": 520.644776119403, "grad_norm": 26.336620330810547, "learning_rate": 9.804582210242588e-06, "loss": 27.5305, "step": 21867 }, { "epoch": 520.6686567164179, "grad_norm": 31.01498031616211, "learning_rate": 9.804132973944296e-06, "loss": 28.4937, "step": 21868 }, { "epoch": 520.6925373134328, "grad_norm": 20.668901443481445, "learning_rate": 9.803683737646002e-06, "loss": 26.9767, "step": 21869 }, { "epoch": 520.7164179104477, "grad_norm": 26.870407104492188, "learning_rate": 9.80323450134771e-06, "loss": 28.2402, "step": 21870 }, { "epoch": 520.7402985074627, "grad_norm": 33.693199157714844, "learning_rate": 9.802785265049417e-06, "loss": 28.3435, "step": 21871 }, { "epoch": 520.7641791044776, "grad_norm": 19.6314640045166, "learning_rate": 9.802336028751123e-06, "loss": 27.3012, "step": 21872 }, { "epoch": 520.7880597014926, "grad_norm": 33.801422119140625, "learning_rate": 9.801886792452831e-06, "loss": 26.8335, "step": 21873 }, { "epoch": 520.8119402985075, "grad_norm": 25.769350051879883, "learning_rate": 9.801437556154539e-06, "loss": 28.4008, "step": 21874 }, { "epoch": 520.8358208955224, "grad_norm": 25.01428985595703, "learning_rate": 9.800988319856245e-06, "loss": 28.1589, "step": 21875 }, { "epoch": 520.8597014925373, "grad_norm": 28.905651092529297, "learning_rate": 9.800539083557952e-06, "loss": 28.4749, "step": 21876 }, { "epoch": 520.8835820895522, "grad_norm": 20.17479133605957, "learning_rate": 9.80008984725966e-06, "loss": 29.0177, "step": 21877 }, { "epoch": 520.9074626865672, "grad_norm": 28.19568634033203, "learning_rate": 9.799640610961366e-06, "loss": 27.346, "step": 21878 }, { "epoch": 520.9313432835821, "grad_norm": 23.473121643066406, "learning_rate": 9.799191374663074e-06, "loss": 27.2038, "step": 21879 }, { "epoch": 520.955223880597, "grad_norm": 20.66986083984375, "learning_rate": 9.79874213836478e-06, "loss": 26.7685, "step": 21880 }, { "epoch": 520.9791044776119, "grad_norm": 26.414480209350586, "learning_rate": 9.798292902066488e-06, "loss": 27.465, "step": 21881 }, { "epoch": 521.0, "grad_norm": 23.942264556884766, "learning_rate": 9.797843665768195e-06, "loss": 25.1907, "step": 21882 }, { "epoch": 521.0238805970149, "grad_norm": 24.674116134643555, "learning_rate": 9.797394429469901e-06, "loss": 27.313, "step": 21883 }, { "epoch": 521.0477611940298, "grad_norm": 20.43309783935547, "learning_rate": 9.796945193171609e-06, "loss": 27.1801, "step": 21884 }, { "epoch": 521.0716417910447, "grad_norm": 22.88199806213379, "learning_rate": 9.796495956873317e-06, "loss": 27.4074, "step": 21885 }, { "epoch": 521.0955223880597, "grad_norm": 20.035079956054688, "learning_rate": 9.796046720575023e-06, "loss": 27.4805, "step": 21886 }, { "epoch": 521.1194029850747, "grad_norm": 21.800487518310547, "learning_rate": 9.79559748427673e-06, "loss": 28.1569, "step": 21887 }, { "epoch": 521.1432835820896, "grad_norm": 22.18018341064453, "learning_rate": 9.795148247978438e-06, "loss": 27.6382, "step": 21888 }, { "epoch": 521.1671641791045, "grad_norm": 17.701454162597656, "learning_rate": 9.794699011680144e-06, "loss": 28.2573, "step": 21889 }, { "epoch": 521.1910447761194, "grad_norm": 22.78199005126953, "learning_rate": 9.794249775381852e-06, "loss": 27.8088, "step": 21890 }, { "epoch": 521.2149253731343, "grad_norm": 20.791852951049805, "learning_rate": 9.79380053908356e-06, "loss": 27.088, "step": 21891 }, { "epoch": 521.2388059701492, "grad_norm": 23.88992691040039, "learning_rate": 9.793351302785265e-06, "loss": 26.3135, "step": 21892 }, { "epoch": 521.2626865671642, "grad_norm": 20.690683364868164, "learning_rate": 9.792902066486973e-06, "loss": 27.4946, "step": 21893 }, { "epoch": 521.2865671641791, "grad_norm": 19.934492111206055, "learning_rate": 9.792452830188681e-06, "loss": 27.5752, "step": 21894 }, { "epoch": 521.310447761194, "grad_norm": 19.471118927001953, "learning_rate": 9.792003593890387e-06, "loss": 26.7415, "step": 21895 }, { "epoch": 521.334328358209, "grad_norm": 21.993759155273438, "learning_rate": 9.791554357592095e-06, "loss": 27.584, "step": 21896 }, { "epoch": 521.3582089552239, "grad_norm": 28.450679779052734, "learning_rate": 9.7911051212938e-06, "loss": 26.9495, "step": 21897 }, { "epoch": 521.3820895522388, "grad_norm": 23.315725326538086, "learning_rate": 9.790655884995508e-06, "loss": 27.5719, "step": 21898 }, { "epoch": 521.4059701492537, "grad_norm": 18.491777420043945, "learning_rate": 9.790206648697216e-06, "loss": 27.6177, "step": 21899 }, { "epoch": 521.4298507462687, "grad_norm": 27.5102481842041, "learning_rate": 9.789757412398922e-06, "loss": 27.9731, "step": 21900 }, { "epoch": 521.4537313432836, "grad_norm": 19.653606414794922, "learning_rate": 9.78930817610063e-06, "loss": 28.1392, "step": 21901 }, { "epoch": 521.4776119402985, "grad_norm": 23.841938018798828, "learning_rate": 9.788858939802337e-06, "loss": 26.5091, "step": 21902 }, { "epoch": 521.5014925373134, "grad_norm": 24.58788299560547, "learning_rate": 9.788409703504043e-06, "loss": 27.0881, "step": 21903 }, { "epoch": 521.5253731343283, "grad_norm": 21.909109115600586, "learning_rate": 9.787960467205751e-06, "loss": 28.3311, "step": 21904 }, { "epoch": 521.5492537313432, "grad_norm": 24.341161727905273, "learning_rate": 9.787511230907459e-06, "loss": 28.4754, "step": 21905 }, { "epoch": 521.5731343283583, "grad_norm": 20.298810958862305, "learning_rate": 9.787061994609165e-06, "loss": 27.2671, "step": 21906 }, { "epoch": 521.5970149253732, "grad_norm": 19.572080612182617, "learning_rate": 9.786612758310872e-06, "loss": 27.2251, "step": 21907 }, { "epoch": 521.6208955223881, "grad_norm": 18.02648162841797, "learning_rate": 9.78616352201258e-06, "loss": 27.3343, "step": 21908 }, { "epoch": 521.644776119403, "grad_norm": 21.776975631713867, "learning_rate": 9.785714285714286e-06, "loss": 27.9712, "step": 21909 }, { "epoch": 521.6686567164179, "grad_norm": 22.791404724121094, "learning_rate": 9.785265049415994e-06, "loss": 28.1948, "step": 21910 }, { "epoch": 521.6925373134328, "grad_norm": 24.978303909301758, "learning_rate": 9.7848158131177e-06, "loss": 28.1634, "step": 21911 }, { "epoch": 521.7164179104477, "grad_norm": 24.030155181884766, "learning_rate": 9.784366576819408e-06, "loss": 28.1112, "step": 21912 }, { "epoch": 521.7402985074627, "grad_norm": 21.234760284423828, "learning_rate": 9.783917340521115e-06, "loss": 26.7715, "step": 21913 }, { "epoch": 521.7641791044776, "grad_norm": 22.00531578063965, "learning_rate": 9.783468104222821e-06, "loss": 27.3535, "step": 21914 }, { "epoch": 521.7880597014926, "grad_norm": 25.123613357543945, "learning_rate": 9.783018867924529e-06, "loss": 27.4897, "step": 21915 }, { "epoch": 521.8119402985075, "grad_norm": 22.837560653686523, "learning_rate": 9.782569631626237e-06, "loss": 28.5306, "step": 21916 }, { "epoch": 521.8358208955224, "grad_norm": 21.256044387817383, "learning_rate": 9.782120395327943e-06, "loss": 27.7472, "step": 21917 }, { "epoch": 521.8597014925373, "grad_norm": 22.4714412689209, "learning_rate": 9.78167115902965e-06, "loss": 28.09, "step": 21918 }, { "epoch": 521.8835820895522, "grad_norm": 24.508073806762695, "learning_rate": 9.781221922731358e-06, "loss": 27.5353, "step": 21919 }, { "epoch": 521.9074626865672, "grad_norm": 26.64885711669922, "learning_rate": 9.780772686433064e-06, "loss": 27.8271, "step": 21920 }, { "epoch": 521.9313432835821, "grad_norm": 18.148815155029297, "learning_rate": 9.780323450134772e-06, "loss": 27.462, "step": 21921 }, { "epoch": 521.955223880597, "grad_norm": NaN, "learning_rate": 9.77987421383648e-06, "loss": 24.2447, "step": 21922 }, { "epoch": 521.9791044776119, "grad_norm": 21.145898818969727, "learning_rate": 9.77987421383648e-06, "loss": 26.9715, "step": 21923 }, { "epoch": 522.0, "grad_norm": 23.862957000732422, "learning_rate": 9.779424977538185e-06, "loss": 24.6019, "step": 21924 }, { "epoch": 522.0238805970149, "grad_norm": 29.071855545043945, "learning_rate": 9.778975741239893e-06, "loss": 27.6894, "step": 21925 }, { "epoch": 522.0477611940298, "grad_norm": 20.588993072509766, "learning_rate": 9.778526504941601e-06, "loss": 27.3381, "step": 21926 }, { "epoch": 522.0716417910447, "grad_norm": 24.542457580566406, "learning_rate": 9.778077268643307e-06, "loss": 27.2666, "step": 21927 }, { "epoch": 522.0955223880597, "grad_norm": 30.889163970947266, "learning_rate": 9.777628032345015e-06, "loss": 27.8556, "step": 21928 }, { "epoch": 522.1194029850747, "grad_norm": 24.724925994873047, "learning_rate": 9.77717879604672e-06, "loss": 27.2032, "step": 21929 }, { "epoch": 522.1432835820896, "grad_norm": 19.982868194580078, "learning_rate": 9.776729559748428e-06, "loss": 27.4161, "step": 21930 }, { "epoch": 522.1671641791045, "grad_norm": 21.751720428466797, "learning_rate": 9.776280323450136e-06, "loss": 27.3743, "step": 21931 }, { "epoch": 522.1910447761194, "grad_norm": 26.331787109375, "learning_rate": 9.775831087151842e-06, "loss": 28.5496, "step": 21932 }, { "epoch": 522.2149253731343, "grad_norm": 19.935657501220703, "learning_rate": 9.77538185085355e-06, "loss": 26.9678, "step": 21933 }, { "epoch": 522.2388059701492, "grad_norm": 21.59682273864746, "learning_rate": 9.774932614555257e-06, "loss": 27.4099, "step": 21934 }, { "epoch": 522.2626865671642, "grad_norm": 20.099193572998047, "learning_rate": 9.774483378256963e-06, "loss": 27.8028, "step": 21935 }, { "epoch": 522.2865671641791, "grad_norm": 22.032798767089844, "learning_rate": 9.774034141958671e-06, "loss": 27.6096, "step": 21936 }, { "epoch": 522.310447761194, "grad_norm": 26.52093505859375, "learning_rate": 9.773584905660379e-06, "loss": 27.9405, "step": 21937 }, { "epoch": 522.334328358209, "grad_norm": 24.871915817260742, "learning_rate": 9.773135669362085e-06, "loss": 28.1541, "step": 21938 }, { "epoch": 522.3582089552239, "grad_norm": 24.239261627197266, "learning_rate": 9.772686433063792e-06, "loss": 27.5245, "step": 21939 }, { "epoch": 522.3820895522388, "grad_norm": 19.114582061767578, "learning_rate": 9.7722371967655e-06, "loss": 27.9783, "step": 21940 }, { "epoch": 522.4059701492537, "grad_norm": 24.1092529296875, "learning_rate": 9.771787960467206e-06, "loss": 27.692, "step": 21941 }, { "epoch": 522.4298507462687, "grad_norm": 20.612781524658203, "learning_rate": 9.771338724168914e-06, "loss": 27.0501, "step": 21942 }, { "epoch": 522.4537313432836, "grad_norm": 29.333890914916992, "learning_rate": 9.77088948787062e-06, "loss": 26.9214, "step": 21943 }, { "epoch": 522.4776119402985, "grad_norm": 24.59489631652832, "learning_rate": 9.770440251572328e-06, "loss": 28.0283, "step": 21944 }, { "epoch": 522.5014925373134, "grad_norm": 19.06707000732422, "learning_rate": 9.769991015274035e-06, "loss": 27.2803, "step": 21945 }, { "epoch": 522.5253731343283, "grad_norm": 30.813156127929688, "learning_rate": 9.769541778975741e-06, "loss": 28.1398, "step": 21946 }, { "epoch": 522.5492537313432, "grad_norm": 23.200908660888672, "learning_rate": 9.769092542677449e-06, "loss": 27.6681, "step": 21947 }, { "epoch": 522.5731343283583, "grad_norm": 34.47504806518555, "learning_rate": 9.768643306379157e-06, "loss": 27.4813, "step": 21948 }, { "epoch": 522.5970149253732, "grad_norm": 22.975505828857422, "learning_rate": 9.768194070080863e-06, "loss": 27.7021, "step": 21949 }, { "epoch": 522.6208955223881, "grad_norm": 43.781185150146484, "learning_rate": 9.76774483378257e-06, "loss": 28.583, "step": 21950 }, { "epoch": 522.644776119403, "grad_norm": 31.205949783325195, "learning_rate": 9.767295597484278e-06, "loss": 27.9282, "step": 21951 }, { "epoch": 522.6686567164179, "grad_norm": 38.94379425048828, "learning_rate": 9.766846361185984e-06, "loss": 27.2006, "step": 21952 }, { "epoch": 522.6925373134328, "grad_norm": 35.188899993896484, "learning_rate": 9.766397124887692e-06, "loss": 27.6692, "step": 21953 }, { "epoch": 522.7164179104477, "grad_norm": 35.44150161743164, "learning_rate": 9.7659478885894e-06, "loss": 27.8026, "step": 21954 }, { "epoch": 522.7402985074627, "grad_norm": 31.10190200805664, "learning_rate": 9.765498652291105e-06, "loss": 26.3922, "step": 21955 }, { "epoch": 522.7641791044776, "grad_norm": 31.388643264770508, "learning_rate": 9.765049415992813e-06, "loss": 27.5955, "step": 21956 }, { "epoch": 522.7880597014926, "grad_norm": 30.88457679748535, "learning_rate": 9.76460017969452e-06, "loss": 27.9658, "step": 21957 }, { "epoch": 522.8119402985075, "grad_norm": 23.38211441040039, "learning_rate": 9.764150943396227e-06, "loss": 27.2008, "step": 21958 }, { "epoch": 522.8358208955224, "grad_norm": 31.676528930664062, "learning_rate": 9.763701707097935e-06, "loss": 27.8098, "step": 21959 }, { "epoch": 522.8597014925373, "grad_norm": 28.370651245117188, "learning_rate": 9.76325247079964e-06, "loss": 27.5273, "step": 21960 }, { "epoch": 522.8835820895522, "grad_norm": 19.236652374267578, "learning_rate": 9.762803234501348e-06, "loss": 27.6772, "step": 21961 }, { "epoch": 522.9074626865672, "grad_norm": 29.735157012939453, "learning_rate": 9.762353998203056e-06, "loss": 27.6071, "step": 21962 }, { "epoch": 522.9313432835821, "grad_norm": 23.041860580444336, "learning_rate": 9.761904761904762e-06, "loss": 27.9309, "step": 21963 }, { "epoch": 522.955223880597, "grad_norm": 29.138315200805664, "learning_rate": 9.76145552560647e-06, "loss": 26.3577, "step": 21964 }, { "epoch": 522.9791044776119, "grad_norm": 21.35258674621582, "learning_rate": 9.761006289308177e-06, "loss": 26.8157, "step": 21965 }, { "epoch": 523.0, "grad_norm": 25.327035903930664, "learning_rate": 9.760557053009883e-06, "loss": 24.6021, "step": 21966 }, { "epoch": 523.0238805970149, "grad_norm": 23.554819107055664, "learning_rate": 9.760107816711591e-06, "loss": 27.4528, "step": 21967 }, { "epoch": 523.0477611940298, "grad_norm": 28.281919479370117, "learning_rate": 9.759658580413299e-06, "loss": 26.8908, "step": 21968 }, { "epoch": 523.0716417910447, "grad_norm": 24.569664001464844, "learning_rate": 9.759209344115005e-06, "loss": 26.8138, "step": 21969 }, { "epoch": 523.0955223880597, "grad_norm": NaN, "learning_rate": 9.758760107816712e-06, "loss": 35.7604, "step": 21970 }, { "epoch": 523.1194029850747, "grad_norm": 25.85531997680664, "learning_rate": 9.758760107816712e-06, "loss": 27.329, "step": 21971 }, { "epoch": 523.1432835820896, "grad_norm": 26.79378318786621, "learning_rate": 9.75831087151842e-06, "loss": 27.7795, "step": 21972 }, { "epoch": 523.1671641791045, "grad_norm": 22.9012508392334, "learning_rate": 9.757861635220126e-06, "loss": 26.3394, "step": 21973 }, { "epoch": 523.1910447761194, "grad_norm": 20.149099349975586, "learning_rate": 9.757412398921834e-06, "loss": 28.1909, "step": 21974 }, { "epoch": 523.2149253731343, "grad_norm": 23.305301666259766, "learning_rate": 9.75696316262354e-06, "loss": 26.6027, "step": 21975 }, { "epoch": 523.2388059701492, "grad_norm": 24.736085891723633, "learning_rate": 9.75651392632525e-06, "loss": 27.1052, "step": 21976 }, { "epoch": 523.2626865671642, "grad_norm": 21.752609252929688, "learning_rate": 9.756064690026955e-06, "loss": 26.5154, "step": 21977 }, { "epoch": 523.2865671641791, "grad_norm": 20.591161727905273, "learning_rate": 9.755615453728661e-06, "loss": 27.0228, "step": 21978 }, { "epoch": 523.310447761194, "grad_norm": 18.632421493530273, "learning_rate": 9.755166217430369e-06, "loss": 27.345, "step": 21979 }, { "epoch": 523.334328358209, "grad_norm": 21.17516326904297, "learning_rate": 9.754716981132077e-06, "loss": 28.315, "step": 21980 }, { "epoch": 523.3582089552239, "grad_norm": 26.14686393737793, "learning_rate": 9.754267744833783e-06, "loss": 28.9255, "step": 21981 }, { "epoch": 523.3820895522388, "grad_norm": 26.047889709472656, "learning_rate": 9.75381850853549e-06, "loss": 27.8988, "step": 21982 }, { "epoch": 523.4059701492537, "grad_norm": 22.308977127075195, "learning_rate": 9.753369272237198e-06, "loss": 28.075, "step": 21983 }, { "epoch": 523.4298507462687, "grad_norm": 19.43041229248047, "learning_rate": 9.752920035938904e-06, "loss": 27.1009, "step": 21984 }, { "epoch": 523.4537313432836, "grad_norm": 19.227313995361328, "learning_rate": 9.752470799640612e-06, "loss": 27.9661, "step": 21985 }, { "epoch": 523.4776119402985, "grad_norm": 21.761789321899414, "learning_rate": 9.75202156334232e-06, "loss": 28.0734, "step": 21986 }, { "epoch": 523.5014925373134, "grad_norm": 20.176984786987305, "learning_rate": 9.751572327044025e-06, "loss": 28.7811, "step": 21987 }, { "epoch": 523.5253731343283, "grad_norm": 26.33782196044922, "learning_rate": 9.751123090745733e-06, "loss": 27.8945, "step": 21988 }, { "epoch": 523.5492537313432, "grad_norm": 26.224023818969727, "learning_rate": 9.750673854447439e-06, "loss": 28.0205, "step": 21989 }, { "epoch": 523.5731343283583, "grad_norm": 20.78628158569336, "learning_rate": 9.750224618149149e-06, "loss": 27.5455, "step": 21990 }, { "epoch": 523.5970149253732, "grad_norm": 20.637025833129883, "learning_rate": 9.749775381850855e-06, "loss": 27.7249, "step": 21991 }, { "epoch": 523.6208955223881, "grad_norm": 29.557363510131836, "learning_rate": 9.74932614555256e-06, "loss": 28.0976, "step": 21992 }, { "epoch": 523.644776119403, "grad_norm": 23.633155822753906, "learning_rate": 9.748876909254268e-06, "loss": 28.1371, "step": 21993 }, { "epoch": 523.6686567164179, "grad_norm": 18.11345672607422, "learning_rate": 9.748427672955976e-06, "loss": 26.2494, "step": 21994 }, { "epoch": 523.6925373134328, "grad_norm": 23.827730178833008, "learning_rate": 9.747978436657682e-06, "loss": 27.8384, "step": 21995 }, { "epoch": 523.7164179104477, "grad_norm": 27.027769088745117, "learning_rate": 9.74752920035939e-06, "loss": 26.7665, "step": 21996 }, { "epoch": 523.7402985074627, "grad_norm": 26.45939826965332, "learning_rate": 9.747079964061097e-06, "loss": 27.5458, "step": 21997 }, { "epoch": 523.7641791044776, "grad_norm": 18.6143798828125, "learning_rate": 9.746630727762803e-06, "loss": 27.4178, "step": 21998 }, { "epoch": 523.7880597014926, "grad_norm": 28.844249725341797, "learning_rate": 9.746181491464511e-06, "loss": 27.4368, "step": 21999 }, { "epoch": 523.8119402985075, "grad_norm": 26.21137046813965, "learning_rate": 9.745732255166219e-06, "loss": 26.4404, "step": 22000 }, { "epoch": 523.8358208955224, "grad_norm": 19.797163009643555, "learning_rate": 9.745283018867925e-06, "loss": 28.0181, "step": 22001 }, { "epoch": 523.8597014925373, "grad_norm": 21.56658935546875, "learning_rate": 9.744833782569632e-06, "loss": 27.0844, "step": 22002 }, { "epoch": 523.8835820895522, "grad_norm": 19.412853240966797, "learning_rate": 9.74438454627134e-06, "loss": 27.3817, "step": 22003 }, { "epoch": 523.9074626865672, "grad_norm": 32.84844970703125, "learning_rate": 9.743935309973048e-06, "loss": 28.5913, "step": 22004 }, { "epoch": 523.9313432835821, "grad_norm": 22.33148765563965, "learning_rate": 9.743486073674754e-06, "loss": 29.1605, "step": 22005 }, { "epoch": 523.955223880597, "grad_norm": 27.021780014038086, "learning_rate": 9.74303683737646e-06, "loss": 27.3206, "step": 22006 }, { "epoch": 523.9791044776119, "grad_norm": 34.95838165283203, "learning_rate": 9.74258760107817e-06, "loss": 28.6381, "step": 22007 }, { "epoch": 524.0, "grad_norm": 18.16769790649414, "learning_rate": 9.742138364779875e-06, "loss": 23.2786, "step": 22008 }, { "epoch": 524.0238805970149, "grad_norm": 26.83973503112793, "learning_rate": 9.741689128481581e-06, "loss": 28.4729, "step": 22009 }, { "epoch": 524.0477611940298, "grad_norm": 26.809703826904297, "learning_rate": 9.741239892183289e-06, "loss": 27.8234, "step": 22010 }, { "epoch": 524.0716417910447, "grad_norm": 19.780582427978516, "learning_rate": 9.740790655884997e-06, "loss": 26.7686, "step": 22011 }, { "epoch": 524.0955223880597, "grad_norm": 29.10514259338379, "learning_rate": 9.740341419586703e-06, "loss": 27.8667, "step": 22012 }, { "epoch": 524.1194029850747, "grad_norm": 30.203929901123047, "learning_rate": 9.73989218328841e-06, "loss": 27.1571, "step": 22013 }, { "epoch": 524.1432835820896, "grad_norm": 21.44710922241211, "learning_rate": 9.739442946990118e-06, "loss": 27.3454, "step": 22014 }, { "epoch": 524.1671641791045, "grad_norm": 38.319129943847656, "learning_rate": 9.738993710691824e-06, "loss": 27.4132, "step": 22015 }, { "epoch": 524.1910447761194, "grad_norm": 24.488142013549805, "learning_rate": 9.738544474393532e-06, "loss": 27.3489, "step": 22016 }, { "epoch": 524.2149253731343, "grad_norm": 28.435531616210938, "learning_rate": 9.73809523809524e-06, "loss": 27.9821, "step": 22017 }, { "epoch": 524.2388059701492, "grad_norm": 29.320144653320312, "learning_rate": 9.737646001796947e-06, "loss": 27.3856, "step": 22018 }, { "epoch": 524.2626865671642, "grad_norm": 22.949113845825195, "learning_rate": 9.737196765498653e-06, "loss": 27.7287, "step": 22019 }, { "epoch": 524.2865671641791, "grad_norm": 35.278018951416016, "learning_rate": 9.736747529200359e-06, "loss": 27.7047, "step": 22020 }, { "epoch": 524.310447761194, "grad_norm": 25.07569122314453, "learning_rate": 9.736298292902068e-06, "loss": 28.3155, "step": 22021 }, { "epoch": 524.334328358209, "grad_norm": 29.907686233520508, "learning_rate": 9.735849056603775e-06, "loss": 28.7265, "step": 22022 }, { "epoch": 524.3582089552239, "grad_norm": 27.843420028686523, "learning_rate": 9.73539982030548e-06, "loss": 27.2455, "step": 22023 }, { "epoch": 524.3820895522388, "grad_norm": 23.379886627197266, "learning_rate": 9.734950584007188e-06, "loss": 27.6648, "step": 22024 }, { "epoch": 524.4059701492537, "grad_norm": 38.64365005493164, "learning_rate": 9.734501347708896e-06, "loss": 26.649, "step": 22025 }, { "epoch": 524.4298507462687, "grad_norm": 23.74056625366211, "learning_rate": 9.734052111410602e-06, "loss": 28.1807, "step": 22026 }, { "epoch": 524.4537313432836, "grad_norm": 38.831748962402344, "learning_rate": 9.73360287511231e-06, "loss": 28.0801, "step": 22027 }, { "epoch": 524.4776119402985, "grad_norm": 24.381237030029297, "learning_rate": 9.733153638814017e-06, "loss": 27.2276, "step": 22028 }, { "epoch": 524.5014925373134, "grad_norm": 28.353883743286133, "learning_rate": 9.732704402515723e-06, "loss": 27.205, "step": 22029 }, { "epoch": 524.5253731343283, "grad_norm": 29.69536018371582, "learning_rate": 9.732255166217431e-06, "loss": 27.5355, "step": 22030 }, { "epoch": 524.5492537313432, "grad_norm": 21.487764358520508, "learning_rate": 9.731805929919139e-06, "loss": 27.8784, "step": 22031 }, { "epoch": 524.5731343283583, "grad_norm": 27.180248260498047, "learning_rate": 9.731356693620846e-06, "loss": 27.4751, "step": 22032 }, { "epoch": 524.5970149253732, "grad_norm": 25.970874786376953, "learning_rate": 9.730907457322552e-06, "loss": 27.8691, "step": 22033 }, { "epoch": 524.6208955223881, "grad_norm": 20.626001358032227, "learning_rate": 9.73045822102426e-06, "loss": 26.7285, "step": 22034 }, { "epoch": 524.644776119403, "grad_norm": 37.68687057495117, "learning_rate": 9.730008984725968e-06, "loss": 26.5339, "step": 22035 }, { "epoch": 524.6686567164179, "grad_norm": 27.091371536254883, "learning_rate": 9.729559748427674e-06, "loss": 27.2097, "step": 22036 }, { "epoch": 524.6925373134328, "grad_norm": 30.075578689575195, "learning_rate": 9.72911051212938e-06, "loss": 28.7706, "step": 22037 }, { "epoch": 524.7164179104477, "grad_norm": 28.021453857421875, "learning_rate": 9.728661275831087e-06, "loss": 27.7001, "step": 22038 }, { "epoch": 524.7402985074627, "grad_norm": 24.61546516418457, "learning_rate": 9.728212039532795e-06, "loss": 26.3, "step": 22039 }, { "epoch": 524.7641791044776, "grad_norm": 26.187524795532227, "learning_rate": 9.727762803234501e-06, "loss": 26.4828, "step": 22040 }, { "epoch": 524.7880597014926, "grad_norm": 31.5671443939209, "learning_rate": 9.727313566936209e-06, "loss": 29.4198, "step": 22041 }, { "epoch": 524.8119402985075, "grad_norm": 23.362335205078125, "learning_rate": 9.726864330637917e-06, "loss": 26.6016, "step": 22042 }, { "epoch": 524.8358208955224, "grad_norm": 41.15494155883789, "learning_rate": 9.726415094339623e-06, "loss": 28.4164, "step": 22043 }, { "epoch": 524.8597014925373, "grad_norm": 30.4233341217041, "learning_rate": 9.72596585804133e-06, "loss": 27.7521, "step": 22044 }, { "epoch": 524.8835820895522, "grad_norm": 36.54107666015625, "learning_rate": 9.725516621743038e-06, "loss": 27.0663, "step": 22045 }, { "epoch": 524.9074626865672, "grad_norm": 31.715665817260742, "learning_rate": 9.725067385444746e-06, "loss": 27.8973, "step": 22046 }, { "epoch": 524.9313432835821, "grad_norm": NaN, "learning_rate": 9.724618149146452e-06, "loss": 26.833, "step": 22047 }, { "epoch": 524.955223880597, "grad_norm": NaN, "learning_rate": 9.724618149146452e-06, "loss": 24.6176, "step": 22048 }, { "epoch": 524.9791044776119, "grad_norm": 27.9968318939209, "learning_rate": 9.724618149146452e-06, "loss": 27.7566, "step": 22049 }, { "epoch": 525.0, "grad_norm": 29.12142562866211, "learning_rate": 9.72416891284816e-06, "loss": 25.3048, "step": 22050 }, { "epoch": 525.0238805970149, "grad_norm": 21.799718856811523, "learning_rate": 9.723719676549867e-06, "loss": 27.5608, "step": 22051 }, { "epoch": 525.0477611940298, "grad_norm": 32.79853057861328, "learning_rate": 9.723270440251573e-06, "loss": 27.6178, "step": 22052 }, { "epoch": 525.0716417910447, "grad_norm": 25.366121292114258, "learning_rate": 9.722821203953279e-06, "loss": 28.7055, "step": 22053 }, { "epoch": 525.0955223880597, "grad_norm": 25.761531829833984, "learning_rate": 9.722371967654988e-06, "loss": 28.1594, "step": 22054 }, { "epoch": 525.1194029850747, "grad_norm": 35.063262939453125, "learning_rate": 9.721922731356694e-06, "loss": 28.3933, "step": 22055 }, { "epoch": 525.1432835820896, "grad_norm": 24.698339462280273, "learning_rate": 9.7214734950584e-06, "loss": 26.9906, "step": 22056 }, { "epoch": 525.1671641791045, "grad_norm": 21.228670120239258, "learning_rate": 9.721024258760108e-06, "loss": 27.0005, "step": 22057 }, { "epoch": 525.1910447761194, "grad_norm": 21.76850128173828, "learning_rate": 9.720575022461816e-06, "loss": 28.0133, "step": 22058 }, { "epoch": 525.2149253731343, "grad_norm": 21.235618591308594, "learning_rate": 9.720125786163522e-06, "loss": 29.1421, "step": 22059 }, { "epoch": 525.2388059701492, "grad_norm": 22.96685028076172, "learning_rate": 9.71967654986523e-06, "loss": 27.8852, "step": 22060 }, { "epoch": 525.2626865671642, "grad_norm": 23.98881721496582, "learning_rate": 9.719227313566937e-06, "loss": 27.9888, "step": 22061 }, { "epoch": 525.2865671641791, "grad_norm": 24.72205924987793, "learning_rate": 9.718778077268645e-06, "loss": 28.4538, "step": 22062 }, { "epoch": 525.310447761194, "grad_norm": 23.8490047454834, "learning_rate": 9.718328840970351e-06, "loss": 29.1482, "step": 22063 }, { "epoch": 525.334328358209, "grad_norm": 23.248031616210938, "learning_rate": 9.717879604672059e-06, "loss": 28.123, "step": 22064 }, { "epoch": 525.3582089552239, "grad_norm": 25.4141788482666, "learning_rate": 9.717430368373766e-06, "loss": 27.376, "step": 22065 }, { "epoch": 525.3820895522388, "grad_norm": 20.0844783782959, "learning_rate": 9.716981132075472e-06, "loss": 28.5341, "step": 22066 }, { "epoch": 525.4059701492537, "grad_norm": 21.39870834350586, "learning_rate": 9.71653189577718e-06, "loss": 28.2775, "step": 22067 }, { "epoch": 525.4298507462687, "grad_norm": 19.625951766967773, "learning_rate": 9.716082659478888e-06, "loss": 27.6578, "step": 22068 }, { "epoch": 525.4537313432836, "grad_norm": NaN, "learning_rate": 9.715633423180594e-06, "loss": 28.4692, "step": 22069 }, { "epoch": 525.4776119402985, "grad_norm": 37.723724365234375, "learning_rate": 9.715633423180594e-06, "loss": 28.2765, "step": 22070 }, { "epoch": 525.5014925373134, "grad_norm": 24.554397583007812, "learning_rate": 9.7151841868823e-06, "loss": 27.7673, "step": 22071 }, { "epoch": 525.5253731343283, "grad_norm": 24.67937469482422, "learning_rate": 9.714734950584007e-06, "loss": 28.4303, "step": 22072 }, { "epoch": 525.5492537313432, "grad_norm": 33.64335250854492, "learning_rate": 9.714285714285715e-06, "loss": 29.5191, "step": 22073 }, { "epoch": 525.5731343283583, "grad_norm": 21.163360595703125, "learning_rate": 9.713836477987421e-06, "loss": 28.3088, "step": 22074 }, { "epoch": 525.5970149253732, "grad_norm": 35.30911636352539, "learning_rate": 9.713387241689129e-06, "loss": 28.4676, "step": 22075 }, { "epoch": 525.6208955223881, "grad_norm": 25.277820587158203, "learning_rate": 9.712938005390837e-06, "loss": 28.4814, "step": 22076 }, { "epoch": 525.644776119403, "grad_norm": 28.32034683227539, "learning_rate": 9.712488769092544e-06, "loss": 28.5705, "step": 22077 }, { "epoch": 525.6686567164179, "grad_norm": 32.4446907043457, "learning_rate": 9.71203953279425e-06, "loss": 28.6474, "step": 22078 }, { "epoch": 525.6925373134328, "grad_norm": 21.268281936645508, "learning_rate": 9.711590296495958e-06, "loss": 28.423, "step": 22079 }, { "epoch": 525.7164179104477, "grad_norm": 20.388687133789062, "learning_rate": 9.711141060197666e-06, "loss": 28.4695, "step": 22080 }, { "epoch": 525.7402985074627, "grad_norm": 31.581005096435547, "learning_rate": 9.710691823899372e-06, "loss": 27.7228, "step": 22081 }, { "epoch": 525.7641791044776, "grad_norm": 21.799171447753906, "learning_rate": 9.71024258760108e-06, "loss": 28.3541, "step": 22082 }, { "epoch": 525.7880597014926, "grad_norm": 21.58499526977539, "learning_rate": 9.709793351302787e-06, "loss": 29.2608, "step": 22083 }, { "epoch": 525.8119402985075, "grad_norm": 34.036460876464844, "learning_rate": 9.709344115004493e-06, "loss": 30.0249, "step": 22084 }, { "epoch": 525.8358208955224, "grad_norm": 21.299144744873047, "learning_rate": 9.708894878706199e-06, "loss": 28.8915, "step": 22085 }, { "epoch": 525.8597014925373, "grad_norm": 21.87760353088379, "learning_rate": 9.708445642407908e-06, "loss": 29.0367, "step": 22086 }, { "epoch": 525.8835820895522, "grad_norm": 20.818422317504883, "learning_rate": 9.707996406109614e-06, "loss": 28.8182, "step": 22087 }, { "epoch": 525.9074626865672, "grad_norm": 27.253870010375977, "learning_rate": 9.70754716981132e-06, "loss": 28.6677, "step": 22088 }, { "epoch": 525.9313432835821, "grad_norm": 23.793920516967773, "learning_rate": 9.707097933513028e-06, "loss": 29.431, "step": 22089 }, { "epoch": 525.955223880597, "grad_norm": 18.011985778808594, "learning_rate": 9.706648697214736e-06, "loss": 27.6111, "step": 22090 }, { "epoch": 525.9791044776119, "grad_norm": 21.6423282623291, "learning_rate": 9.706199460916444e-06, "loss": 28.3514, "step": 22091 }, { "epoch": 526.0, "grad_norm": 19.503658294677734, "learning_rate": 9.70575022461815e-06, "loss": 25.9219, "step": 22092 }, { "epoch": 526.0238805970149, "grad_norm": 21.550613403320312, "learning_rate": 9.705300988319857e-06, "loss": 27.2737, "step": 22093 }, { "epoch": 526.0477611940298, "grad_norm": 17.036521911621094, "learning_rate": 9.704851752021565e-06, "loss": 28.3173, "step": 22094 }, { "epoch": 526.0716417910447, "grad_norm": 19.320476531982422, "learning_rate": 9.704402515723271e-06, "loss": 28.8837, "step": 22095 }, { "epoch": 526.0955223880597, "grad_norm": NaN, "learning_rate": 9.703953279424979e-06, "loss": 47.7963, "step": 22096 }, { "epoch": 526.1194029850747, "grad_norm": 26.653745651245117, "learning_rate": 9.703953279424979e-06, "loss": 28.8593, "step": 22097 }, { "epoch": 526.1432835820896, "grad_norm": 25.15312957763672, "learning_rate": 9.703504043126686e-06, "loss": 29.1955, "step": 22098 }, { "epoch": 526.1671641791045, "grad_norm": 34.60872268676758, "learning_rate": 9.703054806828392e-06, "loss": 29.2201, "step": 22099 }, { "epoch": 526.1910447761194, "grad_norm": 23.0037841796875, "learning_rate": 9.7026055705301e-06, "loss": 30.7373, "step": 22100 }, { "epoch": 526.2149253731343, "grad_norm": 21.453479766845703, "learning_rate": 9.702156334231808e-06, "loss": 30.053, "step": 22101 }, { "epoch": 526.2388059701492, "grad_norm": 41.64917755126953, "learning_rate": 9.701707097933514e-06, "loss": 28.8394, "step": 22102 }, { "epoch": 526.2626865671642, "grad_norm": 26.15958023071289, "learning_rate": 9.70125786163522e-06, "loss": 30.1088, "step": 22103 }, { "epoch": 526.2865671641791, "grad_norm": 37.80615234375, "learning_rate": 9.700808625336927e-06, "loss": 29.8066, "step": 22104 }, { "epoch": 526.310447761194, "grad_norm": 28.387367248535156, "learning_rate": 9.700359389038635e-06, "loss": 29.7998, "step": 22105 }, { "epoch": 526.334328358209, "grad_norm": 24.410898208618164, "learning_rate": 9.699910152740343e-06, "loss": 29.0127, "step": 22106 }, { "epoch": 526.3582089552239, "grad_norm": 44.07614517211914, "learning_rate": 9.699460916442049e-06, "loss": 29.7754, "step": 22107 }, { "epoch": 526.3820895522388, "grad_norm": 23.433792114257812, "learning_rate": 9.699011680143757e-06, "loss": 30.1731, "step": 22108 }, { "epoch": 526.4059701492537, "grad_norm": 41.31439971923828, "learning_rate": 9.698562443845464e-06, "loss": 29.1865, "step": 22109 }, { "epoch": 526.4298507462687, "grad_norm": 25.9730224609375, "learning_rate": 9.69811320754717e-06, "loss": 30.3225, "step": 22110 }, { "epoch": 526.4537313432836, "grad_norm": 30.047687530517578, "learning_rate": 9.697663971248878e-06, "loss": 30.3878, "step": 22111 }, { "epoch": 526.4776119402985, "grad_norm": 35.606163024902344, "learning_rate": 9.697214734950586e-06, "loss": 29.4662, "step": 22112 }, { "epoch": 526.5014925373134, "grad_norm": 19.349075317382812, "learning_rate": 9.696765498652292e-06, "loss": 29.4752, "step": 22113 }, { "epoch": 526.5253731343283, "grad_norm": 40.02689743041992, "learning_rate": 9.696316262354e-06, "loss": 30.372, "step": 22114 }, { "epoch": 526.5492537313432, "grad_norm": 23.727418899536133, "learning_rate": 9.695867026055707e-06, "loss": 29.6443, "step": 22115 }, { "epoch": 526.5731343283583, "grad_norm": 37.272377014160156, "learning_rate": 9.695417789757413e-06, "loss": 29.1354, "step": 22116 }, { "epoch": 526.5970149253732, "grad_norm": 34.67998123168945, "learning_rate": 9.694968553459119e-06, "loss": 30.5329, "step": 22117 }, { "epoch": 526.6208955223881, "grad_norm": 25.44391441345215, "learning_rate": 9.694519317160828e-06, "loss": 29.8631, "step": 22118 }, { "epoch": 526.644776119403, "grad_norm": 47.87326431274414, "learning_rate": 9.694070080862534e-06, "loss": 29.7947, "step": 22119 }, { "epoch": 526.6686567164179, "grad_norm": 34.44795608520508, "learning_rate": 9.693620844564242e-06, "loss": 30.048, "step": 22120 }, { "epoch": 526.6925373134328, "grad_norm": 51.506317138671875, "learning_rate": 9.693171608265948e-06, "loss": 28.7891, "step": 22121 }, { "epoch": 526.7164179104477, "grad_norm": 40.5924186706543, "learning_rate": 9.692722371967656e-06, "loss": 30.6912, "step": 22122 }, { "epoch": 526.7402985074627, "grad_norm": 56.8275032043457, "learning_rate": 9.692273135669364e-06, "loss": 30.0767, "step": 22123 }, { "epoch": 526.7641791044776, "grad_norm": 52.18195724487305, "learning_rate": 9.69182389937107e-06, "loss": 29.1029, "step": 22124 }, { "epoch": 526.7880597014926, "grad_norm": 43.24692153930664, "learning_rate": 9.691374663072777e-06, "loss": 29.8456, "step": 22125 }, { "epoch": 526.8119402985075, "grad_norm": 45.255123138427734, "learning_rate": 9.690925426774485e-06, "loss": 29.1691, "step": 22126 }, { "epoch": 526.8358208955224, "grad_norm": 45.69075012207031, "learning_rate": 9.690476190476191e-06, "loss": 31.2448, "step": 22127 }, { "epoch": 526.8597014925373, "grad_norm": 37.37767791748047, "learning_rate": 9.690026954177899e-06, "loss": 28.9641, "step": 22128 }, { "epoch": 526.8835820895522, "grad_norm": 47.79941177368164, "learning_rate": 9.689577717879606e-06, "loss": 29.5262, "step": 22129 }, { "epoch": 526.9074626865672, "grad_norm": 42.16619873046875, "learning_rate": 9.689128481581312e-06, "loss": 28.7842, "step": 22130 }, { "epoch": 526.9313432835821, "grad_norm": 47.35902404785156, "learning_rate": 9.688679245283018e-06, "loss": 30.0091, "step": 22131 }, { "epoch": 526.955223880597, "grad_norm": 47.48914337158203, "learning_rate": 9.688230008984728e-06, "loss": 29.1025, "step": 22132 }, { "epoch": 526.9791044776119, "grad_norm": 47.98970031738281, "learning_rate": 9.687780772686434e-06, "loss": 30.7726, "step": 22133 }, { "epoch": 527.0, "grad_norm": 39.86455535888672, "learning_rate": 9.687331536388141e-06, "loss": 25.6167, "step": 22134 }, { "epoch": 527.0238805970149, "grad_norm": 41.169822692871094, "learning_rate": 9.686882300089847e-06, "loss": 29.683, "step": 22135 }, { "epoch": 527.0477611940298, "grad_norm": 37.56767654418945, "learning_rate": 9.686433063791555e-06, "loss": 29.5595, "step": 22136 }, { "epoch": 527.0716417910447, "grad_norm": 47.807552337646484, "learning_rate": 9.685983827493263e-06, "loss": 29.9184, "step": 22137 }, { "epoch": 527.0955223880597, "grad_norm": 41.12236404418945, "learning_rate": 9.685534591194969e-06, "loss": 28.6099, "step": 22138 }, { "epoch": 527.1194029850747, "grad_norm": 48.45296096801758, "learning_rate": 9.685085354896677e-06, "loss": 29.7936, "step": 22139 }, { "epoch": 527.1432835820896, "grad_norm": 42.31224060058594, "learning_rate": 9.684636118598384e-06, "loss": 29.8157, "step": 22140 }, { "epoch": 527.1671641791045, "grad_norm": 44.66638946533203, "learning_rate": 9.68418688230009e-06, "loss": 29.7532, "step": 22141 }, { "epoch": 527.1910447761194, "grad_norm": 43.3022575378418, "learning_rate": 9.683737646001798e-06, "loss": 29.4145, "step": 22142 }, { "epoch": 527.2149253731343, "grad_norm": 45.49367904663086, "learning_rate": 9.683288409703506e-06, "loss": 31.2758, "step": 22143 }, { "epoch": 527.2388059701492, "grad_norm": 42.596946716308594, "learning_rate": 9.682839173405212e-06, "loss": 29.1679, "step": 22144 }, { "epoch": 527.2626865671642, "grad_norm": NaN, "learning_rate": 9.68238993710692e-06, "loss": 39.3312, "step": 22145 }, { "epoch": 527.2865671641791, "grad_norm": 25.218868255615234, "learning_rate": 9.68238993710692e-06, "loss": 28.8042, "step": 22146 }, { "epoch": 527.310447761194, "grad_norm": 37.25248336791992, "learning_rate": 9.681940700808627e-06, "loss": 30.2397, "step": 22147 }, { "epoch": 527.334328358209, "grad_norm": 30.136014938354492, "learning_rate": 9.681491464510333e-06, "loss": 30.012, "step": 22148 }, { "epoch": 527.3582089552239, "grad_norm": 19.42925262451172, "learning_rate": 9.68104222821204e-06, "loss": 31.4146, "step": 22149 }, { "epoch": 527.3820895522388, "grad_norm": 38.092323303222656, "learning_rate": 9.680592991913747e-06, "loss": 30.4639, "step": 22150 }, { "epoch": 527.4059701492537, "grad_norm": 24.493724822998047, "learning_rate": 9.680143755615454e-06, "loss": 32.3149, "step": 22151 }, { "epoch": 527.4298507462687, "grad_norm": 42.249290466308594, "learning_rate": 9.679694519317162e-06, "loss": 30.4227, "step": 22152 }, { "epoch": 527.4537313432836, "grad_norm": 30.774702072143555, "learning_rate": 9.679245283018868e-06, "loss": 30.4863, "step": 22153 }, { "epoch": 527.4776119402985, "grad_norm": 31.267133712768555, "learning_rate": 9.678796046720576e-06, "loss": 29.9388, "step": 22154 }, { "epoch": 527.5014925373134, "grad_norm": 43.16439437866211, "learning_rate": 9.678346810422284e-06, "loss": 31.2763, "step": 22155 }, { "epoch": 527.5253731343283, "grad_norm": 25.15555763244629, "learning_rate": 9.67789757412399e-06, "loss": 30.7215, "step": 22156 }, { "epoch": 527.5492537313432, "grad_norm": 66.44638061523438, "learning_rate": 9.677448337825697e-06, "loss": 30.9, "step": 22157 }, { "epoch": 527.5731343283583, "grad_norm": 53.03017044067383, "learning_rate": 9.676999101527405e-06, "loss": 30.7407, "step": 22158 }, { "epoch": 527.5970149253732, "grad_norm": 47.17311477661133, "learning_rate": 9.676549865229111e-06, "loss": 31.4128, "step": 22159 }, { "epoch": 527.6208955223881, "grad_norm": 40.371185302734375, "learning_rate": 9.676100628930819e-06, "loss": 31.6378, "step": 22160 }, { "epoch": 527.644776119403, "grad_norm": 49.54840850830078, "learning_rate": 9.675651392632526e-06, "loss": 31.2749, "step": 22161 }, { "epoch": 527.6686567164179, "grad_norm": 34.82026290893555, "learning_rate": 9.675202156334232e-06, "loss": 31.2187, "step": 22162 }, { "epoch": 527.6925373134328, "grad_norm": 53.49551773071289, "learning_rate": 9.67475292003594e-06, "loss": 30.5303, "step": 22163 }, { "epoch": 527.7164179104477, "grad_norm": 43.28769302368164, "learning_rate": 9.674303683737648e-06, "loss": 29.7201, "step": 22164 }, { "epoch": 527.7402985074627, "grad_norm": 49.75438690185547, "learning_rate": 9.673854447439354e-06, "loss": 31.8018, "step": 22165 }, { "epoch": 527.7641791044776, "grad_norm": 46.46643829345703, "learning_rate": 9.673405211141061e-06, "loss": 31.417, "step": 22166 }, { "epoch": 527.7880597014926, "grad_norm": 48.31850814819336, "learning_rate": 9.672955974842767e-06, "loss": 30.9475, "step": 22167 }, { "epoch": 527.8119402985075, "grad_norm": 43.49681091308594, "learning_rate": 9.672506738544475e-06, "loss": 31.2654, "step": 22168 }, { "epoch": 527.8358208955224, "grad_norm": 50.4387092590332, "learning_rate": 9.672057502246183e-06, "loss": 32.3977, "step": 22169 }, { "epoch": 527.8597014925373, "grad_norm": 42.25696563720703, "learning_rate": 9.671608265947889e-06, "loss": 30.7672, "step": 22170 }, { "epoch": 527.8835820895522, "grad_norm": 46.48463821411133, "learning_rate": 9.671159029649597e-06, "loss": 30.5259, "step": 22171 }, { "epoch": 527.9074626865672, "grad_norm": 38.592899322509766, "learning_rate": 9.670709793351304e-06, "loss": 29.5515, "step": 22172 }, { "epoch": 527.9313432835821, "grad_norm": 54.23112106323242, "learning_rate": 9.67026055705301e-06, "loss": 31.397, "step": 22173 }, { "epoch": 527.955223880597, "grad_norm": 47.772377014160156, "learning_rate": 9.669811320754718e-06, "loss": 31.2692, "step": 22174 }, { "epoch": 527.9791044776119, "grad_norm": 50.216487884521484, "learning_rate": 9.669362084456426e-06, "loss": 31.0915, "step": 22175 }, { "epoch": 528.0, "grad_norm": 38.074195861816406, "learning_rate": 9.668912848158132e-06, "loss": 26.1614, "step": 22176 }, { "epoch": 528.0238805970149, "grad_norm": 44.01345443725586, "learning_rate": 9.66846361185984e-06, "loss": 30.8702, "step": 22177 }, { "epoch": 528.0477611940298, "grad_norm": 39.29638671875, "learning_rate": 9.668014375561547e-06, "loss": 32.4846, "step": 22178 }, { "epoch": 528.0716417910447, "grad_norm": 48.860137939453125, "learning_rate": 9.667565139263253e-06, "loss": 30.5981, "step": 22179 }, { "epoch": 528.0955223880597, "grad_norm": 41.380252838134766, "learning_rate": 9.66711590296496e-06, "loss": 30.9628, "step": 22180 }, { "epoch": 528.1194029850747, "grad_norm": 48.854400634765625, "learning_rate": 9.666666666666667e-06, "loss": 32.0694, "step": 22181 }, { "epoch": 528.1432835820896, "grad_norm": 50.13348388671875, "learning_rate": 9.666217430368374e-06, "loss": 30.5434, "step": 22182 }, { "epoch": 528.1671641791045, "grad_norm": 44.145729064941406, "learning_rate": 9.665768194070082e-06, "loss": 31.3294, "step": 22183 }, { "epoch": 528.1910447761194, "grad_norm": 39.551414489746094, "learning_rate": 9.665318957771788e-06, "loss": 30.3827, "step": 22184 }, { "epoch": 528.2149253731343, "grad_norm": 49.65778732299805, "learning_rate": 9.664869721473496e-06, "loss": 32.1092, "step": 22185 }, { "epoch": 528.2388059701492, "grad_norm": 39.769676208496094, "learning_rate": 9.664420485175204e-06, "loss": 30.6498, "step": 22186 }, { "epoch": 528.2626865671642, "grad_norm": 57.85197067260742, "learning_rate": 9.66397124887691e-06, "loss": 30.8761, "step": 22187 }, { "epoch": 528.2865671641791, "grad_norm": 50.192054748535156, "learning_rate": 9.663522012578617e-06, "loss": 30.7335, "step": 22188 }, { "epoch": 528.310447761194, "grad_norm": 41.9471321105957, "learning_rate": 9.663072776280325e-06, "loss": 29.3703, "step": 22189 }, { "epoch": 528.334328358209, "grad_norm": 37.38637924194336, "learning_rate": 9.662623539982031e-06, "loss": 29.8278, "step": 22190 }, { "epoch": 528.3582089552239, "grad_norm": 48.94220733642578, "learning_rate": 9.662174303683739e-06, "loss": 31.171, "step": 22191 }, { "epoch": 528.3820895522388, "grad_norm": 46.35200500488281, "learning_rate": 9.661725067385446e-06, "loss": 31.8408, "step": 22192 }, { "epoch": 528.4059701492537, "grad_norm": 50.275150299072266, "learning_rate": 9.661275831087152e-06, "loss": 30.9588, "step": 22193 }, { "epoch": 528.4298507462687, "grad_norm": 52.18654251098633, "learning_rate": 9.66082659478886e-06, "loss": 31.5643, "step": 22194 }, { "epoch": 528.4537313432836, "grad_norm": 42.77907180786133, "learning_rate": 9.660377358490568e-06, "loss": 31.7276, "step": 22195 }, { "epoch": 528.4776119402985, "grad_norm": 37.33218765258789, "learning_rate": 9.659928122192274e-06, "loss": 31.4074, "step": 22196 }, { "epoch": 528.5014925373134, "grad_norm": 53.75764083862305, "learning_rate": 9.659478885893981e-06, "loss": 30.0901, "step": 22197 }, { "epoch": 528.5253731343283, "grad_norm": 43.489742279052734, "learning_rate": 9.659029649595687e-06, "loss": 30.8834, "step": 22198 }, { "epoch": 528.5492537313432, "grad_norm": 46.624420166015625, "learning_rate": 9.658580413297395e-06, "loss": 31.886, "step": 22199 }, { "epoch": 528.5731343283583, "grad_norm": 45.321773529052734, "learning_rate": 9.658131176999103e-06, "loss": 30.1266, "step": 22200 }, { "epoch": 528.5970149253732, "grad_norm": 51.145870208740234, "learning_rate": 9.657681940700809e-06, "loss": 31.4917, "step": 22201 }, { "epoch": 528.6208955223881, "grad_norm": 38.13311767578125, "learning_rate": 9.657232704402517e-06, "loss": 31.8917, "step": 22202 }, { "epoch": 528.644776119403, "grad_norm": 55.31405258178711, "learning_rate": 9.656783468104224e-06, "loss": 30.6504, "step": 22203 }, { "epoch": 528.6686567164179, "grad_norm": 46.59998321533203, "learning_rate": 9.65633423180593e-06, "loss": 31.2234, "step": 22204 }, { "epoch": 528.6925373134328, "grad_norm": 40.71480941772461, "learning_rate": 9.655884995507638e-06, "loss": 30.6073, "step": 22205 }, { "epoch": 528.7164179104477, "grad_norm": 37.6241569519043, "learning_rate": 9.655435759209346e-06, "loss": 30.5166, "step": 22206 }, { "epoch": 528.7402985074627, "grad_norm": 50.80268478393555, "learning_rate": 9.654986522911052e-06, "loss": 30.1307, "step": 22207 }, { "epoch": 528.7641791044776, "grad_norm": 42.48664855957031, "learning_rate": 9.65453728661276e-06, "loss": 30.4538, "step": 22208 }, { "epoch": 528.7880597014926, "grad_norm": 52.624080657958984, "learning_rate": 9.654088050314467e-06, "loss": 31.2897, "step": 22209 }, { "epoch": 528.8119402985075, "grad_norm": 47.081031799316406, "learning_rate": 9.653638814016173e-06, "loss": 31.4564, "step": 22210 }, { "epoch": 528.8358208955224, "grad_norm": 42.2213249206543, "learning_rate": 9.65318957771788e-06, "loss": 30.2154, "step": 22211 }, { "epoch": 528.8597014925373, "grad_norm": 38.6678581237793, "learning_rate": 9.652740341419587e-06, "loss": 30.053, "step": 22212 }, { "epoch": 528.8835820895522, "grad_norm": 51.701683044433594, "learning_rate": 9.652291105121294e-06, "loss": 31.8994, "step": 22213 }, { "epoch": 528.9074626865672, "grad_norm": 42.75740051269531, "learning_rate": 9.651841868823002e-06, "loss": 29.3973, "step": 22214 }, { "epoch": 528.9313432835821, "grad_norm": 44.30796813964844, "learning_rate": 9.651392632524708e-06, "loss": 30.8206, "step": 22215 }, { "epoch": 528.955223880597, "grad_norm": 47.242149353027344, "learning_rate": 9.650943396226416e-06, "loss": 31.5675, "step": 22216 }, { "epoch": 528.9791044776119, "grad_norm": 42.27659225463867, "learning_rate": 9.650494159928123e-06, "loss": 32.2186, "step": 22217 }, { "epoch": 529.0, "grad_norm": 38.32880783081055, "learning_rate": 9.65004492362983e-06, "loss": 26.9871, "step": 22218 }, { "epoch": 529.0238805970149, "grad_norm": 45.243377685546875, "learning_rate": 9.649595687331537e-06, "loss": 30.8294, "step": 22219 }, { "epoch": 529.0477611940298, "grad_norm": 46.014278411865234, "learning_rate": 9.649146451033245e-06, "loss": 32.5627, "step": 22220 }, { "epoch": 529.0716417910447, "grad_norm": 48.891971588134766, "learning_rate": 9.648697214734951e-06, "loss": 32.1761, "step": 22221 }, { "epoch": 529.0955223880597, "grad_norm": 43.077125549316406, "learning_rate": 9.648247978436659e-06, "loss": 31.1395, "step": 22222 }, { "epoch": 529.1194029850747, "grad_norm": 45.85806655883789, "learning_rate": 9.647798742138366e-06, "loss": 30.934, "step": 22223 }, { "epoch": 529.1432835820896, "grad_norm": 40.86077117919922, "learning_rate": 9.647349505840072e-06, "loss": 31.3214, "step": 22224 }, { "epoch": 529.1671641791045, "grad_norm": 55.784637451171875, "learning_rate": 9.64690026954178e-06, "loss": 31.046, "step": 22225 }, { "epoch": 529.1910447761194, "grad_norm": 49.36458969116211, "learning_rate": 9.646451033243488e-06, "loss": 31.4717, "step": 22226 }, { "epoch": 529.2149253731343, "grad_norm": 44.36309814453125, "learning_rate": 9.646001796945194e-06, "loss": 31.5238, "step": 22227 }, { "epoch": 529.2388059701492, "grad_norm": 44.84843063354492, "learning_rate": 9.645552560646901e-06, "loss": 31.3072, "step": 22228 }, { "epoch": 529.2626865671642, "grad_norm": 46.173851013183594, "learning_rate": 9.645103324348607e-06, "loss": 31.9383, "step": 22229 }, { "epoch": 529.2865671641791, "grad_norm": 38.43056106567383, "learning_rate": 9.644654088050315e-06, "loss": 31.1839, "step": 22230 }, { "epoch": 529.310447761194, "grad_norm": 52.045352935791016, "learning_rate": 9.644204851752023e-06, "loss": 30.0301, "step": 22231 }, { "epoch": 529.334328358209, "grad_norm": 47.27676773071289, "learning_rate": 9.643755615453729e-06, "loss": 30.3871, "step": 22232 }, { "epoch": 529.3582089552239, "grad_norm": 38.307003021240234, "learning_rate": 9.643306379155436e-06, "loss": 32.5529, "step": 22233 }, { "epoch": 529.3820895522388, "grad_norm": 38.82638168334961, "learning_rate": 9.642857142857144e-06, "loss": 31.1304, "step": 22234 }, { "epoch": 529.4059701492537, "grad_norm": 48.117332458496094, "learning_rate": 9.64240790655885e-06, "loss": 31.0856, "step": 22235 }, { "epoch": 529.4298507462687, "grad_norm": 42.845394134521484, "learning_rate": 9.641958670260558e-06, "loss": 30.1313, "step": 22236 }, { "epoch": 529.4537313432836, "grad_norm": 50.49531936645508, "learning_rate": 9.641509433962266e-06, "loss": 32.5647, "step": 22237 }, { "epoch": 529.4776119402985, "grad_norm": 46.69673538208008, "learning_rate": 9.641060197663972e-06, "loss": 30.995, "step": 22238 }, { "epoch": 529.5014925373134, "grad_norm": 45.6102409362793, "learning_rate": 9.64061096136568e-06, "loss": 31.4181, "step": 22239 }, { "epoch": 529.5253731343283, "grad_norm": 42.4984245300293, "learning_rate": 9.640161725067387e-06, "loss": 31.7666, "step": 22240 }, { "epoch": 529.5492537313432, "grad_norm": 54.48153305053711, "learning_rate": 9.639712488769093e-06, "loss": 31.9872, "step": 22241 }, { "epoch": 529.5731343283583, "grad_norm": 44.90237045288086, "learning_rate": 9.6392632524708e-06, "loss": 31.2717, "step": 22242 }, { "epoch": 529.5970149253732, "grad_norm": 42.30410385131836, "learning_rate": 9.638814016172507e-06, "loss": 29.9128, "step": 22243 }, { "epoch": 529.6208955223881, "grad_norm": NaN, "learning_rate": 9.638364779874214e-06, "loss": 53.4284, "step": 22244 }, { "epoch": 529.644776119403, "grad_norm": 71.01400756835938, "learning_rate": 9.638364779874214e-06, "loss": 31.6246, "step": 22245 }, { "epoch": 529.6686567164179, "grad_norm": 24.63185691833496, "learning_rate": 9.637915543575922e-06, "loss": 31.1906, "step": 22246 }, { "epoch": 529.6925373134328, "grad_norm": 40.2476692199707, "learning_rate": 9.637466307277628e-06, "loss": 30.8335, "step": 22247 }, { "epoch": 529.7164179104477, "grad_norm": 32.79083251953125, "learning_rate": 9.637017070979336e-06, "loss": 32.0013, "step": 22248 }, { "epoch": 529.7402985074627, "grad_norm": 20.42189598083496, "learning_rate": 9.636567834681043e-06, "loss": 32.2867, "step": 22249 }, { "epoch": 529.7641791044776, "grad_norm": 24.69414520263672, "learning_rate": 9.63611859838275e-06, "loss": 31.6292, "step": 22250 }, { "epoch": 529.7880597014926, "grad_norm": 24.63829231262207, "learning_rate": 9.635669362084457e-06, "loss": 32.5983, "step": 22251 }, { "epoch": 529.8119402985075, "grad_norm": 25.561689376831055, "learning_rate": 9.635220125786165e-06, "loss": 33.8498, "step": 22252 }, { "epoch": 529.8358208955224, "grad_norm": 22.448833465576172, "learning_rate": 9.634770889487871e-06, "loss": 31.816, "step": 22253 }, { "epoch": 529.8597014925373, "grad_norm": 24.949813842773438, "learning_rate": 9.634321653189579e-06, "loss": 32.1997, "step": 22254 }, { "epoch": 529.8835820895522, "grad_norm": 22.870609283447266, "learning_rate": 9.633872416891286e-06, "loss": 30.5517, "step": 22255 }, { "epoch": 529.9074626865672, "grad_norm": 32.31019592285156, "learning_rate": 9.633423180592992e-06, "loss": 32.5695, "step": 22256 }, { "epoch": 529.9313432835821, "grad_norm": 26.170310974121094, "learning_rate": 9.6329739442947e-06, "loss": 30.2398, "step": 22257 }, { "epoch": 529.955223880597, "grad_norm": 19.678421020507812, "learning_rate": 9.632524707996406e-06, "loss": 30.577, "step": 22258 }, { "epoch": 529.9791044776119, "grad_norm": 19.96293830871582, "learning_rate": 9.632075471698114e-06, "loss": 33.2031, "step": 22259 }, { "epoch": 530.0, "grad_norm": 22.14073944091797, "learning_rate": 9.631626235399821e-06, "loss": 26.877, "step": 22260 }, { "epoch": 530.0, "step": 22260, "total_flos": 1.0942362264264942e+18, "train_loss": 1.0738943237071623, "train_runtime": 25670.2136, "train_samples_per_second": 110.5, "train_steps_per_second": 0.867 }, { "epoch": 530.0238805970149, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 33.5905, "step": 22261 }, { "epoch": 530.0477611940298, "grad_norm": 184.3211212158203, "learning_rate": 1e-05, "loss": 32.239, "step": 22262 }, { "epoch": 530.0716417910447, "grad_norm": Infinity, "learning_rate": 9.999559082892417e-06, "loss": 40.9942, "step": 22263 }, { "epoch": 530.0955223880597, "grad_norm": 591.2413330078125, "learning_rate": 9.999559082892417e-06, "loss": 41.7166, "step": 22264 }, { "epoch": 530.1194029850747, "grad_norm": 326.15362548828125, "learning_rate": 9.999118165784834e-06, "loss": 36.8172, "step": 22265 }, { "epoch": 530.1432835820896, "grad_norm": 107.65775299072266, "learning_rate": 9.99867724867725e-06, "loss": 33.732, "step": 22266 }, { "epoch": 530.1671641791045, "grad_norm": 138.0474395751953, "learning_rate": 9.998236331569667e-06, "loss": 32.4478, "step": 22267 }, { "epoch": 530.1910447761194, "grad_norm": 110.90072631835938, "learning_rate": 9.997795414462082e-06, "loss": 31.0441, "step": 22268 }, { "epoch": 530.2149253731343, "grad_norm": 66.60848236083984, "learning_rate": 9.997354497354498e-06, "loss": 31.0896, "step": 22269 }, { "epoch": 530.2388059701492, "grad_norm": 58.90656661987305, "learning_rate": 9.996913580246915e-06, "loss": 30.043, "step": 22270 }, { "epoch": 530.2626865671642, "grad_norm": 57.01136779785156, "learning_rate": 9.99647266313933e-06, "loss": 29.8235, "step": 22271 }, { "epoch": 530.2865671641791, "grad_norm": 39.77330017089844, "learning_rate": 9.996031746031746e-06, "loss": 29.0427, "step": 22272 }, { "epoch": 530.310447761194, "grad_norm": 36.44517135620117, "learning_rate": 9.995590828924163e-06, "loss": 28.6685, "step": 22273 }, { "epoch": 530.334328358209, "grad_norm": 37.68239974975586, "learning_rate": 9.99514991181658e-06, "loss": 30.7747, "step": 22274 }, { "epoch": 530.3582089552239, "grad_norm": 35.89151382446289, "learning_rate": 9.994708994708996e-06, "loss": 28.5278, "step": 22275 }, { "epoch": 530.3820895522388, "grad_norm": NaN, "learning_rate": 9.994268077601412e-06, "loss": 33.3337, "step": 22276 }, { "epoch": 530.4059701492537, "grad_norm": 27.973182678222656, "learning_rate": 9.994268077601412e-06, "loss": 28.2186, "step": 22277 }, { "epoch": 530.4298507462687, "grad_norm": 32.61405944824219, "learning_rate": 9.993827160493827e-06, "loss": 29.2315, "step": 22278 }, { "epoch": 530.4537313432836, "grad_norm": 33.839271545410156, "learning_rate": 9.993386243386244e-06, "loss": 29.5127, "step": 22279 }, { "epoch": 530.4776119402985, "grad_norm": 28.42644500732422, "learning_rate": 9.99294532627866e-06, "loss": 29.271, "step": 22280 }, { "epoch": 530.5014925373134, "grad_norm": 28.08054542541504, "learning_rate": 9.992504409171077e-06, "loss": 28.5656, "step": 22281 }, { "epoch": 530.5253731343283, "grad_norm": 34.24485397338867, "learning_rate": 9.992063492063493e-06, "loss": 28.5095, "step": 22282 }, { "epoch": 530.5492537313432, "grad_norm": 23.834680557250977, "learning_rate": 9.99162257495591e-06, "loss": 27.6259, "step": 22283 }, { "epoch": 530.5731343283583, "grad_norm": NaN, "learning_rate": 9.991181657848326e-06, "loss": 44.2497, "step": 22284 }, { "epoch": 530.5970149253732, "grad_norm": 27.871755599975586, "learning_rate": 9.991181657848326e-06, "loss": 27.7783, "step": 22285 }, { "epoch": 530.6208955223881, "grad_norm": 27.34736442565918, "learning_rate": 9.990740740740741e-06, "loss": 29.4647, "step": 22286 }, { "epoch": 530.644776119403, "grad_norm": 23.31960105895996, "learning_rate": 9.990299823633158e-06, "loss": 29.0302, "step": 22287 }, { "epoch": 530.6686567164179, "grad_norm": 29.143417358398438, "learning_rate": 9.989858906525574e-06, "loss": 29.0941, "step": 22288 }, { "epoch": 530.6925373134328, "grad_norm": 26.37790870666504, "learning_rate": 9.989417989417989e-06, "loss": 28.834, "step": 22289 }, { "epoch": 530.7164179104477, "grad_norm": 21.976282119750977, "learning_rate": 9.988977072310406e-06, "loss": 28.0217, "step": 22290 }, { "epoch": 530.7402985074627, "grad_norm": 18.361448287963867, "learning_rate": 9.988536155202822e-06, "loss": 28.0243, "step": 22291 }, { "epoch": 530.7641791044776, "grad_norm": 20.222015380859375, "learning_rate": 9.988095238095239e-06, "loss": 27.8809, "step": 22292 }, { "epoch": 530.7880597014926, "grad_norm": 20.160781860351562, "learning_rate": 9.987654320987655e-06, "loss": 27.8178, "step": 22293 }, { "epoch": 530.8119402985075, "grad_norm": 25.561498641967773, "learning_rate": 9.987213403880072e-06, "loss": 27.7246, "step": 22294 }, { "epoch": 530.8358208955224, "grad_norm": 19.488428115844727, "learning_rate": 9.986772486772488e-06, "loss": 27.7085, "step": 22295 }, { "epoch": 530.8597014925373, "grad_norm": 20.887144088745117, "learning_rate": 9.986331569664905e-06, "loss": 28.0525, "step": 22296 }, { "epoch": 530.8835820895522, "grad_norm": 22.56454086303711, "learning_rate": 9.98589065255732e-06, "loss": 27.126, "step": 22297 }, { "epoch": 530.9074626865672, "grad_norm": 23.173744201660156, "learning_rate": 9.985449735449736e-06, "loss": 27.6267, "step": 22298 }, { "epoch": 530.9313432835821, "grad_norm": 20.76484489440918, "learning_rate": 9.985008818342153e-06, "loss": 28.0359, "step": 22299 }, { "epoch": 530.955223880597, "grad_norm": 20.45835304260254, "learning_rate": 9.98456790123457e-06, "loss": 26.9319, "step": 22300 }, { "epoch": 530.9791044776119, "grad_norm": 21.790769577026367, "learning_rate": 9.984126984126986e-06, "loss": 27.7802, "step": 22301 }, { "epoch": 531.0, "grad_norm": 19.529150009155273, "learning_rate": 9.9836860670194e-06, "loss": 24.4549, "step": 22302 }, { "epoch": 531.0238805970149, "grad_norm": 27.342422485351562, "learning_rate": 9.983245149911817e-06, "loss": 27.6288, "step": 22303 }, { "epoch": 531.0477611940298, "grad_norm": 20.394575119018555, "learning_rate": 9.982804232804234e-06, "loss": 27.3626, "step": 22304 }, { "epoch": 531.0716417910447, "grad_norm": 22.567365646362305, "learning_rate": 9.982363315696649e-06, "loss": 27.4944, "step": 22305 }, { "epoch": 531.0955223880597, "grad_norm": 23.05344009399414, "learning_rate": 9.981922398589065e-06, "loss": 27.8187, "step": 22306 }, { "epoch": 531.1194029850747, "grad_norm": 19.801074981689453, "learning_rate": 9.981481481481482e-06, "loss": 26.7521, "step": 22307 }, { "epoch": 531.1432835820896, "grad_norm": 25.236167907714844, "learning_rate": 9.981040564373898e-06, "loss": 28.5996, "step": 22308 }, { "epoch": 531.1671641791045, "grad_norm": 19.784086227416992, "learning_rate": 9.980599647266315e-06, "loss": 28.6786, "step": 22309 }, { "epoch": 531.1910447761194, "grad_norm": 21.481016159057617, "learning_rate": 9.980158730158731e-06, "loss": 27.3929, "step": 22310 }, { "epoch": 531.2149253731343, "grad_norm": 25.44837188720703, "learning_rate": 9.979717813051148e-06, "loss": 27.2924, "step": 22311 }, { "epoch": 531.2388059701492, "grad_norm": 22.924367904663086, "learning_rate": 9.979276895943564e-06, "loss": 28.244, "step": 22312 }, { "epoch": 531.2626865671642, "grad_norm": 24.945667266845703, "learning_rate": 9.97883597883598e-06, "loss": 28.6607, "step": 22313 }, { "epoch": 531.2865671641791, "grad_norm": 28.873857498168945, "learning_rate": 9.978395061728396e-06, "loss": 28.0732, "step": 22314 }, { "epoch": 531.310447761194, "grad_norm": 21.4174747467041, "learning_rate": 9.977954144620812e-06, "loss": 27.0403, "step": 22315 }, { "epoch": 531.334328358209, "grad_norm": 29.917020797729492, "learning_rate": 9.977513227513229e-06, "loss": 28.5551, "step": 22316 }, { "epoch": 531.3582089552239, "grad_norm": 22.963184356689453, "learning_rate": 9.977072310405645e-06, "loss": 27.3219, "step": 22317 }, { "epoch": 531.3820895522388, "grad_norm": 28.867406845092773, "learning_rate": 9.97663139329806e-06, "loss": 26.935, "step": 22318 }, { "epoch": 531.4059701492537, "grad_norm": 28.450048446655273, "learning_rate": 9.976190476190477e-06, "loss": 27.0548, "step": 22319 }, { "epoch": 531.4298507462687, "grad_norm": 32.02577209472656, "learning_rate": 9.975749559082893e-06, "loss": 28.3402, "step": 22320 }, { "epoch": 531.4537313432836, "grad_norm": 24.886417388916016, "learning_rate": 9.97530864197531e-06, "loss": 27.9598, "step": 22321 }, { "epoch": 531.4776119402985, "grad_norm": 31.078466415405273, "learning_rate": 9.974867724867726e-06, "loss": 27.3667, "step": 22322 }, { "epoch": 531.5014925373134, "grad_norm": 23.742759704589844, "learning_rate": 9.974426807760141e-06, "loss": 26.3877, "step": 22323 }, { "epoch": 531.5253731343283, "grad_norm": 28.557519912719727, "learning_rate": 9.973985890652558e-06, "loss": 27.091, "step": 22324 }, { "epoch": 531.5492537313432, "grad_norm": 26.89826202392578, "learning_rate": 9.973544973544974e-06, "loss": 27.7719, "step": 22325 }, { "epoch": 531.5731343283583, "grad_norm": 24.996946334838867, "learning_rate": 9.97310405643739e-06, "loss": 27.3498, "step": 22326 }, { "epoch": 531.5970149253732, "grad_norm": 29.25736427307129, "learning_rate": 9.972663139329807e-06, "loss": 28.1547, "step": 22327 }, { "epoch": 531.6208955223881, "grad_norm": 23.268402099609375, "learning_rate": 9.972222222222224e-06, "loss": 27.6315, "step": 22328 }, { "epoch": 531.644776119403, "grad_norm": 23.572797775268555, "learning_rate": 9.97178130511464e-06, "loss": 27.3225, "step": 22329 }, { "epoch": 531.6686567164179, "grad_norm": 27.478666305541992, "learning_rate": 9.971340388007055e-06, "loss": 28.5599, "step": 22330 }, { "epoch": 531.6925373134328, "grad_norm": 21.660585403442383, "learning_rate": 9.970899470899472e-06, "loss": 26.9582, "step": 22331 }, { "epoch": 531.7164179104477, "grad_norm": 28.24553108215332, "learning_rate": 9.970458553791888e-06, "loss": 26.9278, "step": 22332 }, { "epoch": 531.7402985074627, "grad_norm": 24.888479232788086, "learning_rate": 9.970017636684303e-06, "loss": 28.1093, "step": 22333 }, { "epoch": 531.7641791044776, "grad_norm": 28.02437973022461, "learning_rate": 9.96957671957672e-06, "loss": 27.9717, "step": 22334 }, { "epoch": 531.7880597014926, "grad_norm": 25.380538940429688, "learning_rate": 9.969135802469136e-06, "loss": 28.8052, "step": 22335 }, { "epoch": 531.8119402985075, "grad_norm": 30.885801315307617, "learning_rate": 9.968694885361553e-06, "loss": 26.7256, "step": 22336 }, { "epoch": 531.8358208955224, "grad_norm": 24.29593276977539, "learning_rate": 9.968253968253969e-06, "loss": 27.2806, "step": 22337 }, { "epoch": 531.8597014925373, "grad_norm": 31.429122924804688, "learning_rate": 9.967813051146386e-06, "loss": 28.6356, "step": 22338 }, { "epoch": 531.8835820895522, "grad_norm": 27.329513549804688, "learning_rate": 9.967372134038802e-06, "loss": 27.5439, "step": 22339 }, { "epoch": 531.9074626865672, "grad_norm": 22.88738441467285, "learning_rate": 9.966931216931219e-06, "loss": 26.8142, "step": 22340 }, { "epoch": 531.9313432835821, "grad_norm": 25.245140075683594, "learning_rate": 9.966490299823634e-06, "loss": 27.7964, "step": 22341 }, { "epoch": 531.955223880597, "grad_norm": 21.450862884521484, "learning_rate": 9.96604938271605e-06, "loss": 27.5991, "step": 22342 }, { "epoch": 531.9791044776119, "grad_norm": 26.08378791809082, "learning_rate": 9.965608465608467e-06, "loss": 28.7279, "step": 22343 }, { "epoch": 532.0, "grad_norm": 17.811325073242188, "learning_rate": 9.965167548500883e-06, "loss": 23.277, "step": 22344 }, { "epoch": 532.0238805970149, "grad_norm": 23.823408126831055, "learning_rate": 9.9647266313933e-06, "loss": 27.4675, "step": 22345 }, { "epoch": 532.0477611940298, "grad_norm": 20.064332962036133, "learning_rate": 9.964285714285714e-06, "loss": 27.0998, "step": 22346 }, { "epoch": 532.0716417910447, "grad_norm": 23.926429748535156, "learning_rate": 9.963844797178131e-06, "loss": 27.6172, "step": 22347 }, { "epoch": 532.0955223880597, "grad_norm": 25.20891761779785, "learning_rate": 9.963403880070548e-06, "loss": 26.3295, "step": 22348 }, { "epoch": 532.1194029850747, "grad_norm": 22.44538116455078, "learning_rate": 9.962962962962964e-06, "loss": 26.5907, "step": 22349 }, { "epoch": 532.1432835820896, "grad_norm": 20.83099937438965, "learning_rate": 9.962522045855379e-06, "loss": 27.5323, "step": 22350 }, { "epoch": 532.1671641791045, "grad_norm": 19.19281578063965, "learning_rate": 9.962081128747795e-06, "loss": 26.8956, "step": 22351 }, { "epoch": 532.1910447761194, "grad_norm": 23.706636428833008, "learning_rate": 9.961640211640212e-06, "loss": 28.3272, "step": 22352 }, { "epoch": 532.2149253731343, "grad_norm": 20.41766929626465, "learning_rate": 9.961199294532629e-06, "loss": 27.0761, "step": 22353 }, { "epoch": 532.2388059701492, "grad_norm": 22.53948974609375, "learning_rate": 9.960758377425045e-06, "loss": 27.1104, "step": 22354 }, { "epoch": 532.2626865671642, "grad_norm": 20.2275333404541, "learning_rate": 9.960317460317462e-06, "loss": 27.2542, "step": 22355 }, { "epoch": 532.2865671641791, "grad_norm": 22.583974838256836, "learning_rate": 9.959876543209878e-06, "loss": 26.9263, "step": 22356 }, { "epoch": 532.310447761194, "grad_norm": 22.52589988708496, "learning_rate": 9.959435626102295e-06, "loss": 28.0735, "step": 22357 }, { "epoch": 532.334328358209, "grad_norm": 21.491973876953125, "learning_rate": 9.958994708994711e-06, "loss": 28.2148, "step": 22358 }, { "epoch": 532.3582089552239, "grad_norm": 23.158111572265625, "learning_rate": 9.958553791887126e-06, "loss": 27.7611, "step": 22359 }, { "epoch": 532.3820895522388, "grad_norm": 19.572826385498047, "learning_rate": 9.958112874779543e-06, "loss": 27.5085, "step": 22360 }, { "epoch": 532.4059701492537, "grad_norm": 22.097436904907227, "learning_rate": 9.957671957671959e-06, "loss": 27.9156, "step": 22361 }, { "epoch": 532.4298507462687, "grad_norm": 21.830718994140625, "learning_rate": 9.957231040564374e-06, "loss": 27.5823, "step": 22362 }, { "epoch": 532.4537313432836, "grad_norm": 18.855859756469727, "learning_rate": 9.95679012345679e-06, "loss": 27.4439, "step": 22363 }, { "epoch": 532.4776119402985, "grad_norm": 20.550552368164062, "learning_rate": 9.956349206349207e-06, "loss": 27.2208, "step": 22364 }, { "epoch": 532.5014925373134, "grad_norm": 25.17828941345215, "learning_rate": 9.955908289241623e-06, "loss": 27.4606, "step": 22365 }, { "epoch": 532.5253731343283, "grad_norm": 23.91171646118164, "learning_rate": 9.95546737213404e-06, "loss": 28.252, "step": 22366 }, { "epoch": 532.5492537313432, "grad_norm": 22.16992950439453, "learning_rate": 9.955026455026457e-06, "loss": 26.1614, "step": 22367 }, { "epoch": 532.5731343283583, "grad_norm": 22.47079849243164, "learning_rate": 9.954585537918871e-06, "loss": 27.7128, "step": 22368 }, { "epoch": 532.5970149253732, "grad_norm": 18.119491577148438, "learning_rate": 9.954144620811288e-06, "loss": 27.6894, "step": 22369 }, { "epoch": 532.6208955223881, "grad_norm": 22.22065544128418, "learning_rate": 9.953703703703704e-06, "loss": 26.2969, "step": 22370 }, { "epoch": 532.644776119403, "grad_norm": 22.984132766723633, "learning_rate": 9.953262786596121e-06, "loss": 28.1412, "step": 22371 }, { "epoch": 532.6686567164179, "grad_norm": 24.673757553100586, "learning_rate": 9.952821869488538e-06, "loss": 27.1466, "step": 22372 }, { "epoch": 532.6925373134328, "grad_norm": 20.742095947265625, "learning_rate": 9.952380952380954e-06, "loss": 26.8909, "step": 22373 }, { "epoch": 532.7164179104477, "grad_norm": 23.06160545349121, "learning_rate": 9.951940035273369e-06, "loss": 29.0387, "step": 22374 }, { "epoch": 532.7402985074627, "grad_norm": 19.36674690246582, "learning_rate": 9.951499118165785e-06, "loss": 27.734, "step": 22375 }, { "epoch": 532.7641791044776, "grad_norm": 20.280113220214844, "learning_rate": 9.951058201058202e-06, "loss": 27.5243, "step": 22376 }, { "epoch": 532.7880597014926, "grad_norm": 23.2104434967041, "learning_rate": 9.950617283950618e-06, "loss": 27.0535, "step": 22377 }, { "epoch": 532.8119402985075, "grad_norm": 21.621553421020508, "learning_rate": 9.950176366843033e-06, "loss": 28.1369, "step": 22378 }, { "epoch": 532.8358208955224, "grad_norm": 24.774995803833008, "learning_rate": 9.94973544973545e-06, "loss": 27.4184, "step": 22379 }, { "epoch": 532.8597014925373, "grad_norm": 22.573516845703125, "learning_rate": 9.949294532627866e-06, "loss": 27.6192, "step": 22380 }, { "epoch": 532.8835820895522, "grad_norm": 21.297657012939453, "learning_rate": 9.948853615520283e-06, "loss": 27.0132, "step": 22381 }, { "epoch": 532.9074626865672, "grad_norm": 20.015064239501953, "learning_rate": 9.9484126984127e-06, "loss": 27.5346, "step": 22382 }, { "epoch": 532.9313432835821, "grad_norm": 26.82492446899414, "learning_rate": 9.947971781305116e-06, "loss": 27.6072, "step": 22383 }, { "epoch": 532.955223880597, "grad_norm": 31.301273345947266, "learning_rate": 9.947530864197533e-06, "loss": 28.1114, "step": 22384 }, { "epoch": 532.9791044776119, "grad_norm": 19.813770294189453, "learning_rate": 9.947089947089947e-06, "loss": 26.4053, "step": 22385 }, { "epoch": 533.0, "grad_norm": 19.770280838012695, "learning_rate": 9.946649029982364e-06, "loss": 25.0305, "step": 22386 }, { "epoch": 533.0238805970149, "grad_norm": 21.65032386779785, "learning_rate": 9.94620811287478e-06, "loss": 26.4864, "step": 22387 }, { "epoch": 533.0477611940298, "grad_norm": 23.210893630981445, "learning_rate": 9.945767195767197e-06, "loss": 27.3599, "step": 22388 }, { "epoch": 533.0716417910447, "grad_norm": 24.48051643371582, "learning_rate": 9.945326278659613e-06, "loss": 26.6673, "step": 22389 }, { "epoch": 533.0955223880597, "grad_norm": 23.336427688598633, "learning_rate": 9.944885361552028e-06, "loss": 26.5886, "step": 22390 }, { "epoch": 533.1194029850747, "grad_norm": 20.50021743774414, "learning_rate": 9.944444444444445e-06, "loss": 26.8536, "step": 22391 }, { "epoch": 533.1432835820896, "grad_norm": 23.88351058959961, "learning_rate": 9.944003527336861e-06, "loss": 28.4703, "step": 22392 }, { "epoch": 533.1671641791045, "grad_norm": 26.253616333007812, "learning_rate": 9.943562610229278e-06, "loss": 27.3253, "step": 22393 }, { "epoch": 533.1910447761194, "grad_norm": 29.149927139282227, "learning_rate": 9.943121693121693e-06, "loss": 27.1826, "step": 22394 }, { "epoch": 533.2149253731343, "grad_norm": 19.215831756591797, "learning_rate": 9.94268077601411e-06, "loss": 27.366, "step": 22395 }, { "epoch": 533.2388059701492, "grad_norm": 24.60365104675293, "learning_rate": 9.942239858906526e-06, "loss": 27.5952, "step": 22396 }, { "epoch": 533.2626865671642, "grad_norm": 24.624414443969727, "learning_rate": 9.941798941798942e-06, "loss": 28.5944, "step": 22397 }, { "epoch": 533.2865671641791, "grad_norm": 21.789840698242188, "learning_rate": 9.941358024691359e-06, "loss": 26.0995, "step": 22398 }, { "epoch": 533.310447761194, "grad_norm": 21.467388153076172, "learning_rate": 9.940917107583775e-06, "loss": 27.2203, "step": 22399 }, { "epoch": 533.334328358209, "grad_norm": 18.84593963623047, "learning_rate": 9.940476190476192e-06, "loss": 26.8144, "step": 22400 }, { "epoch": 533.3582089552239, "grad_norm": 24.010242462158203, "learning_rate": 9.940035273368608e-06, "loss": 28.3794, "step": 22401 }, { "epoch": 533.3820895522388, "grad_norm": 24.850126266479492, "learning_rate": 9.939594356261025e-06, "loss": 27.1451, "step": 22402 }, { "epoch": 533.4059701492537, "grad_norm": 22.48638916015625, "learning_rate": 9.93915343915344e-06, "loss": 27.2285, "step": 22403 }, { "epoch": 533.4298507462687, "grad_norm": 19.690593719482422, "learning_rate": 9.938712522045856e-06, "loss": 26.9743, "step": 22404 }, { "epoch": 533.4537313432836, "grad_norm": 17.965593338012695, "learning_rate": 9.938271604938273e-06, "loss": 28.0756, "step": 22405 }, { "epoch": 533.4776119402985, "grad_norm": 22.357107162475586, "learning_rate": 9.937830687830688e-06, "loss": 27.7762, "step": 22406 }, { "epoch": 533.5014925373134, "grad_norm": 24.871623992919922, "learning_rate": 9.937389770723104e-06, "loss": 28.7413, "step": 22407 }, { "epoch": 533.5253731343283, "grad_norm": 22.940221786499023, "learning_rate": 9.93694885361552e-06, "loss": 27.1734, "step": 22408 }, { "epoch": 533.5492537313432, "grad_norm": 22.236665725708008, "learning_rate": 9.936507936507937e-06, "loss": 28.3284, "step": 22409 }, { "epoch": 533.5731343283583, "grad_norm": 17.537620544433594, "learning_rate": 9.936067019400354e-06, "loss": 25.9755, "step": 22410 }, { "epoch": 533.5970149253732, "grad_norm": NaN, "learning_rate": 9.93562610229277e-06, "loss": 49.0078, "step": 22411 }, { "epoch": 533.6208955223881, "grad_norm": 23.432174682617188, "learning_rate": 9.93562610229277e-06, "loss": 27.4726, "step": 22412 }, { "epoch": 533.644776119403, "grad_norm": 26.451030731201172, "learning_rate": 9.935185185185185e-06, "loss": 28.0838, "step": 22413 }, { "epoch": 533.6686567164179, "grad_norm": 22.523677825927734, "learning_rate": 9.934744268077602e-06, "loss": 27.6937, "step": 22414 }, { "epoch": 533.6925373134328, "grad_norm": 21.035146713256836, "learning_rate": 9.934303350970018e-06, "loss": 27.2167, "step": 22415 }, { "epoch": 533.7164179104477, "grad_norm": 22.463809967041016, "learning_rate": 9.933862433862435e-06, "loss": 28.2938, "step": 22416 }, { "epoch": 533.7402985074627, "grad_norm": 27.674476623535156, "learning_rate": 9.933421516754851e-06, "loss": 27.2195, "step": 22417 }, { "epoch": 533.7641791044776, "grad_norm": 25.9950008392334, "learning_rate": 9.932980599647268e-06, "loss": 27.4153, "step": 22418 }, { "epoch": 533.7880597014926, "grad_norm": 20.0242862701416, "learning_rate": 9.932539682539684e-06, "loss": 27.1033, "step": 22419 }, { "epoch": 533.8119402985075, "grad_norm": 25.041311264038086, "learning_rate": 9.9320987654321e-06, "loss": 27.4993, "step": 22420 }, { "epoch": 533.8358208955224, "grad_norm": 21.91097068786621, "learning_rate": 9.931657848324516e-06, "loss": 26.8343, "step": 22421 }, { "epoch": 533.8597014925373, "grad_norm": 21.6551456451416, "learning_rate": 9.931216931216932e-06, "loss": 27.4127, "step": 22422 }, { "epoch": 533.8835820895522, "grad_norm": 20.54346466064453, "learning_rate": 9.930776014109347e-06, "loss": 27.0101, "step": 22423 }, { "epoch": 533.9074626865672, "grad_norm": 21.954782485961914, "learning_rate": 9.930335097001764e-06, "loss": 27.726, "step": 22424 }, { "epoch": 533.9313432835821, "grad_norm": 24.218088150024414, "learning_rate": 9.92989417989418e-06, "loss": 27.6896, "step": 22425 }, { "epoch": 533.955223880597, "grad_norm": 23.766786575317383, "learning_rate": 9.929453262786597e-06, "loss": 26.7488, "step": 22426 }, { "epoch": 533.9791044776119, "grad_norm": 24.014829635620117, "learning_rate": 9.929012345679013e-06, "loss": 27.4641, "step": 22427 }, { "epoch": 534.0, "grad_norm": 18.472013473510742, "learning_rate": 9.92857142857143e-06, "loss": 22.6395, "step": 22428 }, { "epoch": 534.0238805970149, "grad_norm": 19.18737030029297, "learning_rate": 9.928130511463846e-06, "loss": 27.3587, "step": 22429 }, { "epoch": 534.0477611940298, "grad_norm": 17.891708374023438, "learning_rate": 9.927689594356263e-06, "loss": 27.6367, "step": 22430 }, { "epoch": 534.0716417910447, "grad_norm": 20.547056198120117, "learning_rate": 9.927248677248678e-06, "loss": 27.512, "step": 22431 }, { "epoch": 534.0955223880597, "grad_norm": 20.672502517700195, "learning_rate": 9.926807760141094e-06, "loss": 28.8589, "step": 22432 }, { "epoch": 534.1194029850747, "grad_norm": 19.1113224029541, "learning_rate": 9.92636684303351e-06, "loss": 27.8864, "step": 22433 }, { "epoch": 534.1432835820896, "grad_norm": 20.721036911010742, "learning_rate": 9.925925925925927e-06, "loss": 28.7532, "step": 22434 }, { "epoch": 534.1671641791045, "grad_norm": 20.795230865478516, "learning_rate": 9.925485008818342e-06, "loss": 27.4886, "step": 22435 }, { "epoch": 534.1910447761194, "grad_norm": 19.18734359741211, "learning_rate": 9.925044091710759e-06, "loss": 26.37, "step": 22436 }, { "epoch": 534.2149253731343, "grad_norm": 21.10427474975586, "learning_rate": 9.924603174603175e-06, "loss": 27.7404, "step": 22437 }, { "epoch": 534.2388059701492, "grad_norm": 25.075178146362305, "learning_rate": 9.924162257495592e-06, "loss": 27.3313, "step": 22438 }, { "epoch": 534.2626865671642, "grad_norm": 24.023197174072266, "learning_rate": 9.923721340388008e-06, "loss": 27.4498, "step": 22439 }, { "epoch": 534.2865671641791, "grad_norm": 20.461999893188477, "learning_rate": 9.923280423280423e-06, "loss": 26.8257, "step": 22440 }, { "epoch": 534.310447761194, "grad_norm": 19.135459899902344, "learning_rate": 9.92283950617284e-06, "loss": 26.0714, "step": 22441 }, { "epoch": 534.334328358209, "grad_norm": 18.590688705444336, "learning_rate": 9.922398589065256e-06, "loss": 26.9507, "step": 22442 }, { "epoch": 534.3582089552239, "grad_norm": 22.010831832885742, "learning_rate": 9.921957671957673e-06, "loss": 27.0479, "step": 22443 }, { "epoch": 534.3820895522388, "grad_norm": 24.427047729492188, "learning_rate": 9.92151675485009e-06, "loss": 28.1341, "step": 22444 }, { "epoch": 534.4059701492537, "grad_norm": 20.808456420898438, "learning_rate": 9.921075837742506e-06, "loss": 27.4496, "step": 22445 }, { "epoch": 534.4298507462687, "grad_norm": 18.40505027770996, "learning_rate": 9.920634920634922e-06, "loss": 25.7877, "step": 22446 }, { "epoch": 534.4537313432836, "grad_norm": 23.70652198791504, "learning_rate": 9.920194003527339e-06, "loss": 28.188, "step": 22447 }, { "epoch": 534.4776119402985, "grad_norm": 17.734853744506836, "learning_rate": 9.919753086419754e-06, "loss": 26.7262, "step": 22448 }, { "epoch": 534.5014925373134, "grad_norm": 22.237585067749023, "learning_rate": 9.91931216931217e-06, "loss": 28.1128, "step": 22449 }, { "epoch": 534.5253731343283, "grad_norm": 20.855926513671875, "learning_rate": 9.918871252204587e-06, "loss": 26.8808, "step": 22450 }, { "epoch": 534.5492537313432, "grad_norm": 24.835647583007812, "learning_rate": 9.918430335097002e-06, "loss": 27.2563, "step": 22451 }, { "epoch": 534.5731343283583, "grad_norm": 24.418048858642578, "learning_rate": 9.917989417989418e-06, "loss": 28.2072, "step": 22452 }, { "epoch": 534.5970149253732, "grad_norm": 20.07954216003418, "learning_rate": 9.917548500881835e-06, "loss": 26.9799, "step": 22453 }, { "epoch": 534.6208955223881, "grad_norm": 21.232831954956055, "learning_rate": 9.917107583774251e-06, "loss": 26.905, "step": 22454 }, { "epoch": 534.644776119403, "grad_norm": 23.318424224853516, "learning_rate": 9.916666666666668e-06, "loss": 26.7242, "step": 22455 }, { "epoch": 534.6686567164179, "grad_norm": 25.782976150512695, "learning_rate": 9.916225749559084e-06, "loss": 27.712, "step": 22456 }, { "epoch": 534.6925373134328, "grad_norm": 20.375844955444336, "learning_rate": 9.9157848324515e-06, "loss": 26.5105, "step": 22457 }, { "epoch": 534.7164179104477, "grad_norm": 20.663360595703125, "learning_rate": 9.915343915343916e-06, "loss": 26.2866, "step": 22458 }, { "epoch": 534.7402985074627, "grad_norm": 21.816205978393555, "learning_rate": 9.914902998236332e-06, "loss": 27.484, "step": 22459 }, { "epoch": 534.7641791044776, "grad_norm": 27.956167221069336, "learning_rate": 9.914462081128749e-06, "loss": 28.1141, "step": 22460 }, { "epoch": 534.7880597014926, "grad_norm": 23.123931884765625, "learning_rate": 9.914021164021165e-06, "loss": 27.7049, "step": 22461 }, { "epoch": 534.8119402985075, "grad_norm": 18.388334274291992, "learning_rate": 9.913580246913582e-06, "loss": 27.9219, "step": 22462 }, { "epoch": 534.8358208955224, "grad_norm": 27.602989196777344, "learning_rate": 9.913139329805998e-06, "loss": 27.1642, "step": 22463 }, { "epoch": 534.8597014925373, "grad_norm": 27.81449317932129, "learning_rate": 9.912698412698413e-06, "loss": 27.3329, "step": 22464 }, { "epoch": 534.8835820895522, "grad_norm": 19.349363327026367, "learning_rate": 9.91225749559083e-06, "loss": 26.91, "step": 22465 }, { "epoch": 534.9074626865672, "grad_norm": 17.39202308654785, "learning_rate": 9.911816578483246e-06, "loss": 27.0442, "step": 22466 }, { "epoch": 534.9313432835821, "grad_norm": 19.985509872436523, "learning_rate": 9.911375661375661e-06, "loss": 27.8528, "step": 22467 }, { "epoch": 534.955223880597, "grad_norm": 22.345287322998047, "learning_rate": 9.910934744268078e-06, "loss": 27.1795, "step": 22468 }, { "epoch": 534.9791044776119, "grad_norm": 17.518003463745117, "learning_rate": 9.910493827160494e-06, "loss": 26.9017, "step": 22469 }, { "epoch": 535.0, "grad_norm": 17.263235092163086, "learning_rate": 9.91005291005291e-06, "loss": 23.2689, "step": 22470 }, { "epoch": 535.0238805970149, "grad_norm": NaN, "learning_rate": 9.909611992945327e-06, "loss": 34.9669, "step": 22471 }, { "epoch": 535.0477611940298, "grad_norm": 19.686817169189453, "learning_rate": 9.909611992945327e-06, "loss": 27.2178, "step": 22472 }, { "epoch": 535.0716417910447, "grad_norm": 25.950895309448242, "learning_rate": 9.909171075837744e-06, "loss": 26.8046, "step": 22473 }, { "epoch": 535.0955223880597, "grad_norm": 20.2141170501709, "learning_rate": 9.90873015873016e-06, "loss": 26.1263, "step": 22474 }, { "epoch": 535.1194029850747, "grad_norm": 19.95455551147461, "learning_rate": 9.908289241622577e-06, "loss": 27.732, "step": 22475 }, { "epoch": 535.1432835820896, "grad_norm": 21.11615753173828, "learning_rate": 9.907848324514992e-06, "loss": 26.2472, "step": 22476 }, { "epoch": 535.1671641791045, "grad_norm": 24.45387077331543, "learning_rate": 9.907407407407408e-06, "loss": 27.2901, "step": 22477 }, { "epoch": 535.1910447761194, "grad_norm": 20.91878318786621, "learning_rate": 9.906966490299825e-06, "loss": 27.725, "step": 22478 }, { "epoch": 535.2149253731343, "grad_norm": 21.3599853515625, "learning_rate": 9.906525573192241e-06, "loss": 27.2856, "step": 22479 }, { "epoch": 535.2388059701492, "grad_norm": 23.707181930541992, "learning_rate": 9.906084656084658e-06, "loss": 27.8417, "step": 22480 }, { "epoch": 535.2626865671642, "grad_norm": 23.15378761291504, "learning_rate": 9.905643738977073e-06, "loss": 26.3975, "step": 22481 }, { "epoch": 535.2865671641791, "grad_norm": 19.638864517211914, "learning_rate": 9.905202821869489e-06, "loss": 27.1936, "step": 22482 }, { "epoch": 535.310447761194, "grad_norm": 18.376785278320312, "learning_rate": 9.904761904761906e-06, "loss": 27.3191, "step": 22483 }, { "epoch": 535.334328358209, "grad_norm": 20.514171600341797, "learning_rate": 9.904320987654322e-06, "loss": 27.4638, "step": 22484 }, { "epoch": 535.3582089552239, "grad_norm": 22.139846801757812, "learning_rate": 9.903880070546737e-06, "loss": 27.5101, "step": 22485 }, { "epoch": 535.3820895522388, "grad_norm": 19.437814712524414, "learning_rate": 9.903439153439154e-06, "loss": 28.1024, "step": 22486 }, { "epoch": 535.4059701492537, "grad_norm": 24.240327835083008, "learning_rate": 9.90299823633157e-06, "loss": 27.6567, "step": 22487 }, { "epoch": 535.4298507462687, "grad_norm": 29.873802185058594, "learning_rate": 9.902557319223987e-06, "loss": 26.9294, "step": 22488 }, { "epoch": 535.4537313432836, "grad_norm": 24.539289474487305, "learning_rate": 9.902116402116403e-06, "loss": 26.5491, "step": 22489 }, { "epoch": 535.4776119402985, "grad_norm": 22.95474624633789, "learning_rate": 9.90167548500882e-06, "loss": 27.9536, "step": 22490 }, { "epoch": 535.5014925373134, "grad_norm": 20.193130493164062, "learning_rate": 9.901234567901236e-06, "loss": 27.8749, "step": 22491 }, { "epoch": 535.5253731343283, "grad_norm": 20.517446517944336, "learning_rate": 9.900793650793653e-06, "loss": 26.8714, "step": 22492 }, { "epoch": 535.5492537313432, "grad_norm": 29.257619857788086, "learning_rate": 9.900352733686068e-06, "loss": 27.4426, "step": 22493 }, { "epoch": 535.5731343283583, "grad_norm": 24.16229248046875, "learning_rate": 9.899911816578484e-06, "loss": 27.6313, "step": 22494 }, { "epoch": 535.5970149253732, "grad_norm": 18.582435607910156, "learning_rate": 9.8994708994709e-06, "loss": 27.6146, "step": 22495 }, { "epoch": 535.6208955223881, "grad_norm": 19.299070358276367, "learning_rate": 9.899029982363315e-06, "loss": 27.0961, "step": 22496 }, { "epoch": 535.644776119403, "grad_norm": 22.744964599609375, "learning_rate": 9.898589065255732e-06, "loss": 26.0137, "step": 22497 }, { "epoch": 535.6686567164179, "grad_norm": 22.7616024017334, "learning_rate": 9.898148148148148e-06, "loss": 28.1439, "step": 22498 }, { "epoch": 535.6925373134328, "grad_norm": 21.42750358581543, "learning_rate": 9.897707231040565e-06, "loss": 27.9652, "step": 22499 }, { "epoch": 535.7164179104477, "grad_norm": 18.92816925048828, "learning_rate": 9.897266313932982e-06, "loss": 27.0832, "step": 22500 }, { "epoch": 535.7402985074627, "grad_norm": 20.294326782226562, "learning_rate": 9.896825396825398e-06, "loss": 26.9709, "step": 22501 }, { "epoch": 535.7641791044776, "grad_norm": 20.10538673400879, "learning_rate": 9.896384479717815e-06, "loss": 27.8188, "step": 22502 }, { "epoch": 535.7880597014926, "grad_norm": 24.34432601928711, "learning_rate": 9.89594356261023e-06, "loss": 28.5382, "step": 22503 }, { "epoch": 535.8119402985075, "grad_norm": 19.795089721679688, "learning_rate": 9.895502645502646e-06, "loss": 26.7827, "step": 22504 }, { "epoch": 535.8358208955224, "grad_norm": 22.791250228881836, "learning_rate": 9.895061728395063e-06, "loss": 26.581, "step": 22505 }, { "epoch": 535.8597014925373, "grad_norm": 23.637184143066406, "learning_rate": 9.894620811287479e-06, "loss": 26.7627, "step": 22506 }, { "epoch": 535.8835820895522, "grad_norm": 22.06707191467285, "learning_rate": 9.894179894179896e-06, "loss": 27.3515, "step": 22507 }, { "epoch": 535.9074626865672, "grad_norm": 26.189876556396484, "learning_rate": 9.893738977072312e-06, "loss": 27.1241, "step": 22508 }, { "epoch": 535.9313432835821, "grad_norm": 21.331993103027344, "learning_rate": 9.893298059964727e-06, "loss": 27.7149, "step": 22509 }, { "epoch": 535.955223880597, "grad_norm": 23.3685359954834, "learning_rate": 9.892857142857143e-06, "loss": 27.297, "step": 22510 }, { "epoch": 535.9791044776119, "grad_norm": 24.92168617248535, "learning_rate": 9.89241622574956e-06, "loss": 26.7381, "step": 22511 }, { "epoch": 536.0, "grad_norm": 24.624170303344727, "learning_rate": 9.891975308641975e-06, "loss": 24.0795, "step": 22512 }, { "epoch": 536.0238805970149, "grad_norm": 25.83100700378418, "learning_rate": 9.891534391534391e-06, "loss": 27.7777, "step": 22513 }, { "epoch": 536.0477611940298, "grad_norm": 28.5374813079834, "learning_rate": 9.891093474426808e-06, "loss": 27.2852, "step": 22514 }, { "epoch": 536.0716417910447, "grad_norm": 28.344635009765625, "learning_rate": 9.890652557319224e-06, "loss": 26.7924, "step": 22515 }, { "epoch": 536.0955223880597, "grad_norm": 24.572572708129883, "learning_rate": 9.890211640211641e-06, "loss": 27.4864, "step": 22516 }, { "epoch": 536.1194029850747, "grad_norm": 19.798866271972656, "learning_rate": 9.889770723104058e-06, "loss": 26.9623, "step": 22517 }, { "epoch": 536.1432835820896, "grad_norm": 29.332923889160156, "learning_rate": 9.889329805996474e-06, "loss": 27.6356, "step": 22518 }, { "epoch": 536.1671641791045, "grad_norm": 24.290958404541016, "learning_rate": 9.88888888888889e-06, "loss": 27.1686, "step": 22519 }, { "epoch": 536.1910447761194, "grad_norm": 22.704120635986328, "learning_rate": 9.888447971781307e-06, "loss": 26.8635, "step": 22520 }, { "epoch": 536.2149253731343, "grad_norm": 23.569108963012695, "learning_rate": 9.888007054673722e-06, "loss": 27.1461, "step": 22521 }, { "epoch": 536.2388059701492, "grad_norm": 22.49357795715332, "learning_rate": 9.887566137566138e-06, "loss": 26.1593, "step": 22522 }, { "epoch": 536.2626865671642, "grad_norm": 27.392438888549805, "learning_rate": 9.887125220458555e-06, "loss": 28.0829, "step": 22523 }, { "epoch": 536.2865671641791, "grad_norm": 22.855676651000977, "learning_rate": 9.886684303350972e-06, "loss": 26.6635, "step": 22524 }, { "epoch": 536.310447761194, "grad_norm": 23.412033081054688, "learning_rate": 9.886243386243386e-06, "loss": 27.8738, "step": 22525 }, { "epoch": 536.334328358209, "grad_norm": 29.12438201904297, "learning_rate": 9.885802469135803e-06, "loss": 27.0748, "step": 22526 }, { "epoch": 536.3582089552239, "grad_norm": 27.19154167175293, "learning_rate": 9.88536155202822e-06, "loss": 27.2091, "step": 22527 }, { "epoch": 536.3820895522388, "grad_norm": 23.562419891357422, "learning_rate": 9.884920634920636e-06, "loss": 26.6624, "step": 22528 }, { "epoch": 536.4059701492537, "grad_norm": 20.101280212402344, "learning_rate": 9.884479717813053e-06, "loss": 27.959, "step": 22529 }, { "epoch": 536.4298507462687, "grad_norm": 30.638893127441406, "learning_rate": 9.884038800705467e-06, "loss": 26.4038, "step": 22530 }, { "epoch": 536.4537313432836, "grad_norm": 21.61842918395996, "learning_rate": 9.883597883597884e-06, "loss": 26.5682, "step": 22531 }, { "epoch": 536.4776119402985, "grad_norm": 21.55774688720703, "learning_rate": 9.8831569664903e-06, "loss": 27.4277, "step": 22532 }, { "epoch": 536.5014925373134, "grad_norm": 26.923988342285156, "learning_rate": 9.882716049382717e-06, "loss": 27.6642, "step": 22533 }, { "epoch": 536.5253731343283, "grad_norm": 22.995519638061523, "learning_rate": 9.882275132275133e-06, "loss": 26.9567, "step": 22534 }, { "epoch": 536.5492537313432, "grad_norm": 21.69466781616211, "learning_rate": 9.88183421516755e-06, "loss": 27.9706, "step": 22535 }, { "epoch": 536.5731343283583, "grad_norm": 23.619272232055664, "learning_rate": 9.881393298059967e-06, "loss": 26.9408, "step": 22536 }, { "epoch": 536.5970149253732, "grad_norm": 32.57325744628906, "learning_rate": 9.880952380952381e-06, "loss": 26.247, "step": 22537 }, { "epoch": 536.6208955223881, "grad_norm": 20.327392578125, "learning_rate": 9.880511463844798e-06, "loss": 27.3861, "step": 22538 }, { "epoch": 536.644776119403, "grad_norm": 22.358285903930664, "learning_rate": 9.880070546737214e-06, "loss": 27.9745, "step": 22539 }, { "epoch": 536.6686567164179, "grad_norm": 29.140474319458008, "learning_rate": 9.87962962962963e-06, "loss": 28.0283, "step": 22540 }, { "epoch": 536.6925373134328, "grad_norm": 24.79902458190918, "learning_rate": 9.879188712522046e-06, "loss": 27.9355, "step": 22541 }, { "epoch": 536.7164179104477, "grad_norm": 25.05819320678711, "learning_rate": 9.878747795414462e-06, "loss": 27.1325, "step": 22542 }, { "epoch": 536.7402985074627, "grad_norm": NaN, "learning_rate": 9.878306878306879e-06, "loss": 27.3236, "step": 22543 }, { "epoch": 536.7641791044776, "grad_norm": NaN, "learning_rate": 9.878306878306879e-06, "loss": 44.1133, "step": 22544 }, { "epoch": 536.7880597014926, "grad_norm": 28.758211135864258, "learning_rate": 9.878306878306879e-06, "loss": 26.0602, "step": 22545 }, { "epoch": 536.8119402985075, "grad_norm": 23.846317291259766, "learning_rate": 9.877865961199295e-06, "loss": 26.1909, "step": 22546 }, { "epoch": 536.8358208955224, "grad_norm": 18.002384185791016, "learning_rate": 9.877425044091712e-06, "loss": 27.7252, "step": 22547 }, { "epoch": 536.8597014925373, "grad_norm": 23.8514461517334, "learning_rate": 9.876984126984128e-06, "loss": 27.0527, "step": 22548 }, { "epoch": 536.8835820895522, "grad_norm": 24.39070701599121, "learning_rate": 9.876543209876543e-06, "loss": 26.8267, "step": 22549 }, { "epoch": 536.9074626865672, "grad_norm": 25.25308609008789, "learning_rate": 9.87610229276896e-06, "loss": 26.8755, "step": 22550 }, { "epoch": 536.9313432835821, "grad_norm": 19.13920021057129, "learning_rate": 9.875661375661376e-06, "loss": 27.5965, "step": 22551 }, { "epoch": 536.955223880597, "grad_norm": 19.668241500854492, "learning_rate": 9.875220458553793e-06, "loss": 27.7655, "step": 22552 }, { "epoch": 536.9791044776119, "grad_norm": 28.026195526123047, "learning_rate": 9.87477954144621e-06, "loss": 27.4479, "step": 22553 }, { "epoch": 537.0, "grad_norm": 24.75084114074707, "learning_rate": 9.874338624338626e-06, "loss": 24.0805, "step": 22554 }, { "epoch": 537.0238805970149, "grad_norm": 19.46573257446289, "learning_rate": 9.87389770723104e-06, "loss": 26.4781, "step": 22555 }, { "epoch": 537.0477611940298, "grad_norm": 23.085187911987305, "learning_rate": 9.873456790123457e-06, "loss": 26.7306, "step": 22556 }, { "epoch": 537.0716417910447, "grad_norm": 33.464324951171875, "learning_rate": 9.873015873015874e-06, "loss": 27.3026, "step": 22557 }, { "epoch": 537.0955223880597, "grad_norm": 19.861328125, "learning_rate": 9.872574955908289e-06, "loss": 27.5215, "step": 22558 }, { "epoch": 537.1194029850747, "grad_norm": 19.998010635375977, "learning_rate": 9.872134038800705e-06, "loss": 27.7903, "step": 22559 }, { "epoch": 537.1432835820896, "grad_norm": 23.5821590423584, "learning_rate": 9.871693121693122e-06, "loss": 26.9226, "step": 22560 }, { "epoch": 537.1671641791045, "grad_norm": 20.692121505737305, "learning_rate": 9.871252204585538e-06, "loss": 27.0251, "step": 22561 }, { "epoch": 537.1910447761194, "grad_norm": 22.407434463500977, "learning_rate": 9.870811287477955e-06, "loss": 27.7793, "step": 22562 }, { "epoch": 537.2149253731343, "grad_norm": 21.668136596679688, "learning_rate": 9.870370370370371e-06, "loss": 27.7303, "step": 22563 }, { "epoch": 537.2388059701492, "grad_norm": 20.594364166259766, "learning_rate": 9.869929453262788e-06, "loss": 27.3363, "step": 22564 }, { "epoch": 537.2626865671642, "grad_norm": 21.937841415405273, "learning_rate": 9.869488536155204e-06, "loss": 26.4728, "step": 22565 }, { "epoch": 537.2865671641791, "grad_norm": 19.151905059814453, "learning_rate": 9.869047619047621e-06, "loss": 26.6888, "step": 22566 }, { "epoch": 537.310447761194, "grad_norm": 19.975196838378906, "learning_rate": 9.868606701940036e-06, "loss": 27.6065, "step": 22567 }, { "epoch": 537.334328358209, "grad_norm": 20.095966339111328, "learning_rate": 9.868165784832452e-06, "loss": 26.1899, "step": 22568 }, { "epoch": 537.3582089552239, "grad_norm": 25.833251953125, "learning_rate": 9.867724867724869e-06, "loss": 26.4702, "step": 22569 }, { "epoch": 537.3820895522388, "grad_norm": 21.473718643188477, "learning_rate": 9.867283950617285e-06, "loss": 26.7871, "step": 22570 }, { "epoch": 537.4059701492537, "grad_norm": 19.835546493530273, "learning_rate": 9.8668430335097e-06, "loss": 27.6302, "step": 22571 }, { "epoch": 537.4298507462687, "grad_norm": 20.312244415283203, "learning_rate": 9.866402116402117e-06, "loss": 27.2927, "step": 22572 }, { "epoch": 537.4537313432836, "grad_norm": 28.913959503173828, "learning_rate": 9.865961199294533e-06, "loss": 27.6971, "step": 22573 }, { "epoch": 537.4776119402985, "grad_norm": 27.033300399780273, "learning_rate": 9.86552028218695e-06, "loss": 27.2134, "step": 22574 }, { "epoch": 537.5014925373134, "grad_norm": 19.170652389526367, "learning_rate": 9.865079365079366e-06, "loss": 26.809, "step": 22575 }, { "epoch": 537.5253731343283, "grad_norm": 21.609230041503906, "learning_rate": 9.864638447971781e-06, "loss": 27.5003, "step": 22576 }, { "epoch": 537.5492537313432, "grad_norm": 29.28260612487793, "learning_rate": 9.864197530864198e-06, "loss": 27.4236, "step": 22577 }, { "epoch": 537.5731343283583, "grad_norm": 21.108583450317383, "learning_rate": 9.863756613756614e-06, "loss": 27.77, "step": 22578 }, { "epoch": 537.5970149253732, "grad_norm": 18.43410301208496, "learning_rate": 9.86331569664903e-06, "loss": 27.2547, "step": 22579 }, { "epoch": 537.6208955223881, "grad_norm": 30.946550369262695, "learning_rate": 9.862874779541447e-06, "loss": 26.9828, "step": 22580 }, { "epoch": 537.644776119403, "grad_norm": 25.353782653808594, "learning_rate": 9.862433862433864e-06, "loss": 27.1928, "step": 22581 }, { "epoch": 537.6686567164179, "grad_norm": NaN, "learning_rate": 9.86199294532628e-06, "loss": 44.15, "step": 22582 }, { "epoch": 537.6925373134328, "grad_norm": 19.844594955444336, "learning_rate": 9.86199294532628e-06, "loss": 26.6143, "step": 22583 }, { "epoch": 537.7164179104477, "grad_norm": 25.748411178588867, "learning_rate": 9.861552028218695e-06, "loss": 26.7221, "step": 22584 }, { "epoch": 537.7402985074627, "grad_norm": 23.141979217529297, "learning_rate": 9.861111111111112e-06, "loss": 26.7576, "step": 22585 }, { "epoch": 537.7641791044776, "grad_norm": 23.614429473876953, "learning_rate": 9.860670194003528e-06, "loss": 28.3312, "step": 22586 }, { "epoch": 537.7880597014926, "grad_norm": 18.73713493347168, "learning_rate": 9.860229276895945e-06, "loss": 27.2176, "step": 22587 }, { "epoch": 537.8119402985075, "grad_norm": 23.825496673583984, "learning_rate": 9.85978835978836e-06, "loss": 26.8146, "step": 22588 }, { "epoch": 537.8358208955224, "grad_norm": 27.13507080078125, "learning_rate": 9.859347442680776e-06, "loss": 27.7104, "step": 22589 }, { "epoch": 537.8597014925373, "grad_norm": 21.664226531982422, "learning_rate": 9.858906525573193e-06, "loss": 28.0976, "step": 22590 }, { "epoch": 537.8835820895522, "grad_norm": 21.490623474121094, "learning_rate": 9.85846560846561e-06, "loss": 27.4746, "step": 22591 }, { "epoch": 537.9074626865672, "grad_norm": 27.1832275390625, "learning_rate": 9.858024691358026e-06, "loss": 27.1981, "step": 22592 }, { "epoch": 537.9313432835821, "grad_norm": 20.21778106689453, "learning_rate": 9.857583774250442e-06, "loss": 27.5261, "step": 22593 }, { "epoch": 537.955223880597, "grad_norm": 28.26300048828125, "learning_rate": 9.857142857142859e-06, "loss": 27.1909, "step": 22594 }, { "epoch": 537.9791044776119, "grad_norm": 23.691030502319336, "learning_rate": 9.856701940035274e-06, "loss": 28.051, "step": 22595 }, { "epoch": 538.0, "grad_norm": 22.364282608032227, "learning_rate": 9.85626102292769e-06, "loss": 23.4502, "step": 22596 }, { "epoch": 538.0238805970149, "grad_norm": 25.23514747619629, "learning_rate": 9.855820105820107e-06, "loss": 27.1065, "step": 22597 }, { "epoch": 538.0477611940298, "grad_norm": NaN, "learning_rate": 9.855379188712523e-06, "loss": 30.1881, "step": 22598 }, { "epoch": 538.0716417910447, "grad_norm": 23.368839263916016, "learning_rate": 9.855379188712523e-06, "loss": 27.474, "step": 22599 }, { "epoch": 538.0955223880597, "grad_norm": 24.78042221069336, "learning_rate": 9.85493827160494e-06, "loss": 27.775, "step": 22600 }, { "epoch": 538.1194029850747, "grad_norm": 23.71150779724121, "learning_rate": 9.854497354497355e-06, "loss": 27.1383, "step": 22601 }, { "epoch": 538.1432835820896, "grad_norm": 20.556501388549805, "learning_rate": 9.854056437389771e-06, "loss": 26.6698, "step": 22602 }, { "epoch": 538.1671641791045, "grad_norm": 29.000913619995117, "learning_rate": 9.853615520282188e-06, "loss": 25.9034, "step": 22603 }, { "epoch": 538.1910447761194, "grad_norm": 21.553499221801758, "learning_rate": 9.853174603174604e-06, "loss": 27.7262, "step": 22604 }, { "epoch": 538.2149253731343, "grad_norm": 28.604848861694336, "learning_rate": 9.852733686067019e-06, "loss": 27.0199, "step": 22605 }, { "epoch": 538.2388059701492, "grad_norm": 22.579057693481445, "learning_rate": 9.852292768959436e-06, "loss": 26.5847, "step": 22606 }, { "epoch": 538.2626865671642, "grad_norm": 30.063045501708984, "learning_rate": 9.851851851851852e-06, "loss": 28.0595, "step": 22607 }, { "epoch": 538.2865671641791, "grad_norm": 20.772003173828125, "learning_rate": 9.851410934744269e-06, "loss": 27.4501, "step": 22608 }, { "epoch": 538.310447761194, "grad_norm": 27.116085052490234, "learning_rate": 9.850970017636685e-06, "loss": 26.9168, "step": 22609 }, { "epoch": 538.334328358209, "grad_norm": 21.46515655517578, "learning_rate": 9.850529100529102e-06, "loss": 27.4109, "step": 22610 }, { "epoch": 538.3582089552239, "grad_norm": 23.722970962524414, "learning_rate": 9.850088183421518e-06, "loss": 26.5997, "step": 22611 }, { "epoch": 538.3820895522388, "grad_norm": 24.532934188842773, "learning_rate": 9.849647266313935e-06, "loss": 26.4444, "step": 22612 }, { "epoch": 538.4059701492537, "grad_norm": 24.09141731262207, "learning_rate": 9.849206349206351e-06, "loss": 27.7507, "step": 22613 }, { "epoch": 538.4298507462687, "grad_norm": 27.301523208618164, "learning_rate": 9.848765432098766e-06, "loss": 26.5668, "step": 22614 }, { "epoch": 538.4537313432836, "grad_norm": 20.786502838134766, "learning_rate": 9.848324514991183e-06, "loss": 27.4298, "step": 22615 }, { "epoch": 538.4776119402985, "grad_norm": 21.352670669555664, "learning_rate": 9.8478835978836e-06, "loss": 26.4628, "step": 22616 }, { "epoch": 538.5014925373134, "grad_norm": 25.247947692871094, "learning_rate": 9.847442680776014e-06, "loss": 27.2451, "step": 22617 }, { "epoch": 538.5253731343283, "grad_norm": 26.056245803833008, "learning_rate": 9.84700176366843e-06, "loss": 26.554, "step": 22618 }, { "epoch": 538.5492537313432, "grad_norm": 26.41104507446289, "learning_rate": 9.846560846560847e-06, "loss": 27.2468, "step": 22619 }, { "epoch": 538.5731343283583, "grad_norm": 18.445724487304688, "learning_rate": 9.846119929453264e-06, "loss": 26.5459, "step": 22620 }, { "epoch": 538.5970149253732, "grad_norm": 23.833271026611328, "learning_rate": 9.84567901234568e-06, "loss": 26.5959, "step": 22621 }, { "epoch": 538.6208955223881, "grad_norm": 28.511812210083008, "learning_rate": 9.845238095238097e-06, "loss": 26.1688, "step": 22622 }, { "epoch": 538.644776119403, "grad_norm": 26.755817413330078, "learning_rate": 9.844797178130512e-06, "loss": 27.6528, "step": 22623 }, { "epoch": 538.6686567164179, "grad_norm": 19.098854064941406, "learning_rate": 9.844356261022928e-06, "loss": 27.1026, "step": 22624 }, { "epoch": 538.6925373134328, "grad_norm": 21.007841110229492, "learning_rate": 9.843915343915345e-06, "loss": 27.965, "step": 22625 }, { "epoch": 538.7164179104477, "grad_norm": 22.09528923034668, "learning_rate": 9.843474426807761e-06, "loss": 28.9433, "step": 22626 }, { "epoch": 538.7402985074627, "grad_norm": 22.58936309814453, "learning_rate": 9.843033509700178e-06, "loss": 28.2245, "step": 22627 }, { "epoch": 538.7641791044776, "grad_norm": 21.274749755859375, "learning_rate": 9.842592592592594e-06, "loss": 27.7946, "step": 22628 }, { "epoch": 538.7880597014926, "grad_norm": 22.99148178100586, "learning_rate": 9.84215167548501e-06, "loss": 27.6026, "step": 22629 }, { "epoch": 538.8119402985075, "grad_norm": 22.10028839111328, "learning_rate": 9.841710758377426e-06, "loss": 26.9463, "step": 22630 }, { "epoch": 538.8358208955224, "grad_norm": 26.343629837036133, "learning_rate": 9.841269841269842e-06, "loss": 27.6137, "step": 22631 }, { "epoch": 538.8597014925373, "grad_norm": NaN, "learning_rate": 9.840828924162259e-06, "loss": 30.6006, "step": 22632 }, { "epoch": 538.8835820895522, "grad_norm": 27.75356674194336, "learning_rate": 9.840828924162259e-06, "loss": 27.7752, "step": 22633 }, { "epoch": 538.9074626865672, "grad_norm": 19.054424285888672, "learning_rate": 9.840388007054673e-06, "loss": 26.9925, "step": 22634 }, { "epoch": 538.9313432835821, "grad_norm": 19.00421142578125, "learning_rate": 9.83994708994709e-06, "loss": 27.169, "step": 22635 }, { "epoch": 538.955223880597, "grad_norm": 23.218971252441406, "learning_rate": 9.839506172839507e-06, "loss": 27.3496, "step": 22636 }, { "epoch": 538.9791044776119, "grad_norm": 22.85158348083496, "learning_rate": 9.839065255731923e-06, "loss": 26.8531, "step": 22637 }, { "epoch": 539.0, "grad_norm": 18.36467170715332, "learning_rate": 9.83862433862434e-06, "loss": 23.2005, "step": 22638 }, { "epoch": 539.0238805970149, "grad_norm": 23.457977294921875, "learning_rate": 9.838183421516756e-06, "loss": 27.4211, "step": 22639 }, { "epoch": 539.0477611940298, "grad_norm": 24.168725967407227, "learning_rate": 9.837742504409173e-06, "loss": 27.9751, "step": 22640 }, { "epoch": 539.0716417910447, "grad_norm": 24.36417007446289, "learning_rate": 9.837301587301588e-06, "loss": 26.6068, "step": 22641 }, { "epoch": 539.0955223880597, "grad_norm": 22.020328521728516, "learning_rate": 9.836860670194004e-06, "loss": 25.9093, "step": 22642 }, { "epoch": 539.1194029850747, "grad_norm": 21.25326156616211, "learning_rate": 9.83641975308642e-06, "loss": 28.1929, "step": 22643 }, { "epoch": 539.1432835820896, "grad_norm": 24.16004180908203, "learning_rate": 9.835978835978837e-06, "loss": 27.9925, "step": 22644 }, { "epoch": 539.1671641791045, "grad_norm": 22.182390213012695, "learning_rate": 9.835537918871254e-06, "loss": 27.211, "step": 22645 }, { "epoch": 539.1910447761194, "grad_norm": 22.366058349609375, "learning_rate": 9.835097001763668e-06, "loss": 27.1342, "step": 22646 }, { "epoch": 539.2149253731343, "grad_norm": 26.9032039642334, "learning_rate": 9.834656084656085e-06, "loss": 27.2971, "step": 22647 }, { "epoch": 539.2388059701492, "grad_norm": 28.938505172729492, "learning_rate": 9.834215167548502e-06, "loss": 27.2721, "step": 22648 }, { "epoch": 539.2626865671642, "grad_norm": 21.147132873535156, "learning_rate": 9.833774250440918e-06, "loss": 26.9241, "step": 22649 }, { "epoch": 539.2865671641791, "grad_norm": 20.836528778076172, "learning_rate": 9.833333333333333e-06, "loss": 27.6346, "step": 22650 }, { "epoch": 539.310447761194, "grad_norm": 21.985231399536133, "learning_rate": 9.83289241622575e-06, "loss": 26.5951, "step": 22651 }, { "epoch": 539.334328358209, "grad_norm": 25.770822525024414, "learning_rate": 9.832451499118166e-06, "loss": 28.0414, "step": 22652 }, { "epoch": 539.3582089552239, "grad_norm": 22.784103393554688, "learning_rate": 9.832010582010583e-06, "loss": 27.0461, "step": 22653 }, { "epoch": 539.3820895522388, "grad_norm": 23.382471084594727, "learning_rate": 9.831569664902999e-06, "loss": 27.8623, "step": 22654 }, { "epoch": 539.4059701492537, "grad_norm": 20.910614013671875, "learning_rate": 9.831128747795416e-06, "loss": 28.4954, "step": 22655 }, { "epoch": 539.4298507462687, "grad_norm": 20.65690040588379, "learning_rate": 9.830687830687832e-06, "loss": 26.8344, "step": 22656 }, { "epoch": 539.4537313432836, "grad_norm": 23.212080001831055, "learning_rate": 9.830246913580249e-06, "loss": 27.6062, "step": 22657 }, { "epoch": 539.4776119402985, "grad_norm": 28.016387939453125, "learning_rate": 9.829805996472665e-06, "loss": 26.7502, "step": 22658 }, { "epoch": 539.5014925373134, "grad_norm": 23.290367126464844, "learning_rate": 9.82936507936508e-06, "loss": 26.6864, "step": 22659 }, { "epoch": 539.5253731343283, "grad_norm": 19.748188018798828, "learning_rate": 9.828924162257497e-06, "loss": 27.7651, "step": 22660 }, { "epoch": 539.5492537313432, "grad_norm": 21.157638549804688, "learning_rate": 9.828483245149913e-06, "loss": 27.7473, "step": 22661 }, { "epoch": 539.5731343283583, "grad_norm": 20.899051666259766, "learning_rate": 9.828042328042328e-06, "loss": 27.0741, "step": 22662 }, { "epoch": 539.5970149253732, "grad_norm": 22.863367080688477, "learning_rate": 9.827601410934744e-06, "loss": 26.7518, "step": 22663 }, { "epoch": 539.6208955223881, "grad_norm": 23.841033935546875, "learning_rate": 9.827160493827161e-06, "loss": 27.385, "step": 22664 }, { "epoch": 539.644776119403, "grad_norm": 24.495861053466797, "learning_rate": 9.826719576719578e-06, "loss": 26.9412, "step": 22665 }, { "epoch": 539.6686567164179, "grad_norm": 22.65199089050293, "learning_rate": 9.826278659611994e-06, "loss": 27.1902, "step": 22666 }, { "epoch": 539.6925373134328, "grad_norm": 20.867231369018555, "learning_rate": 9.82583774250441e-06, "loss": 26.7763, "step": 22667 }, { "epoch": 539.7164179104477, "grad_norm": 19.7003116607666, "learning_rate": 9.825396825396825e-06, "loss": 26.3565, "step": 22668 }, { "epoch": 539.7402985074627, "grad_norm": 25.662933349609375, "learning_rate": 9.824955908289242e-06, "loss": 27.0557, "step": 22669 }, { "epoch": 539.7641791044776, "grad_norm": 25.48198127746582, "learning_rate": 9.824514991181658e-06, "loss": 26.8353, "step": 22670 }, { "epoch": 539.7880597014926, "grad_norm": 20.452903747558594, "learning_rate": 9.824074074074075e-06, "loss": 26.0362, "step": 22671 }, { "epoch": 539.8119402985075, "grad_norm": 21.493316650390625, "learning_rate": 9.823633156966492e-06, "loss": 27.3522, "step": 22672 }, { "epoch": 539.8358208955224, "grad_norm": 23.253000259399414, "learning_rate": 9.823192239858908e-06, "loss": 27.2333, "step": 22673 }, { "epoch": 539.8597014925373, "grad_norm": 30.43463134765625, "learning_rate": 9.822751322751325e-06, "loss": 26.9823, "step": 22674 }, { "epoch": 539.8835820895522, "grad_norm": 18.613784790039062, "learning_rate": 9.82231040564374e-06, "loss": 26.596, "step": 22675 }, { "epoch": 539.9074626865672, "grad_norm": 25.021608352661133, "learning_rate": 9.821869488536156e-06, "loss": 27.0864, "step": 22676 }, { "epoch": 539.9313432835821, "grad_norm": 33.280208587646484, "learning_rate": 9.821428571428573e-06, "loss": 27.1451, "step": 22677 }, { "epoch": 539.955223880597, "grad_norm": 25.168264389038086, "learning_rate": 9.820987654320987e-06, "loss": 26.7969, "step": 22678 }, { "epoch": 539.9791044776119, "grad_norm": 24.958337783813477, "learning_rate": 9.820546737213404e-06, "loss": 26.4429, "step": 22679 }, { "epoch": 540.0, "grad_norm": NaN, "learning_rate": 9.82010582010582e-06, "loss": 34.4538, "step": 22680 }, { "epoch": 540.0, "step": 22680, "total_flos": 1.1148979352504324e+18, "train_loss": 0.5134432251070753, "train_runtime": 12847.4939, "train_samples_per_second": 224.953, "train_steps_per_second": 1.765 }, { "epoch": 540.0238805970149, "grad_norm": 26.11031723022461, "learning_rate": 1e-05, "loss": 27.4052, "step": 22681 }, { "epoch": 540.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999574829931974e-06, "loss": 35.077, "step": 22682 }, { "epoch": 540.0716417910447, "grad_norm": Infinity, "learning_rate": 9.999574829931974e-06, "loss": 35.2205, "step": 22683 }, { "epoch": 540.0955223880597, "grad_norm": 456.9329833984375, "learning_rate": 9.999574829931974e-06, "loss": 35.0205, "step": 22684 }, { "epoch": 540.1194029850747, "grad_norm": 210.22067260742188, "learning_rate": 9.999149659863946e-06, "loss": 32.504, "step": 22685 }, { "epoch": 540.1432835820896, "grad_norm": 130.05076599121094, "learning_rate": 9.99872448979592e-06, "loss": 28.9319, "step": 22686 }, { "epoch": 540.1671641791045, "grad_norm": 91.99980163574219, "learning_rate": 9.998299319727893e-06, "loss": 28.9176, "step": 22687 }, { "epoch": 540.1910447761194, "grad_norm": 85.5173110961914, "learning_rate": 9.997874149659865e-06, "loss": 28.8632, "step": 22688 }, { "epoch": 540.2149253731343, "grad_norm": 60.63201904296875, "learning_rate": 9.997448979591836e-06, "loss": 28.9781, "step": 22689 }, { "epoch": 540.2388059701492, "grad_norm": 70.86917877197266, "learning_rate": 9.99702380952381e-06, "loss": 28.0813, "step": 22690 }, { "epoch": 540.2626865671642, "grad_norm": 49.03374099731445, "learning_rate": 9.996598639455783e-06, "loss": 27.0161, "step": 22691 }, { "epoch": 540.2865671641791, "grad_norm": 59.66175079345703, "learning_rate": 9.996173469387755e-06, "loss": 27.7044, "step": 22692 }, { "epoch": 540.310447761194, "grad_norm": 43.32854461669922, "learning_rate": 9.995748299319729e-06, "loss": 28.1692, "step": 22693 }, { "epoch": 540.334328358209, "grad_norm": 46.14690399169922, "learning_rate": 9.995323129251702e-06, "loss": 26.9753, "step": 22694 }, { "epoch": 540.3582089552239, "grad_norm": 38.455020904541016, "learning_rate": 9.994897959183675e-06, "loss": 27.3718, "step": 22695 }, { "epoch": 540.3820895522388, "grad_norm": 32.901763916015625, "learning_rate": 9.994472789115647e-06, "loss": 27.9995, "step": 22696 }, { "epoch": 540.4059701492537, "grad_norm": 36.26639175415039, "learning_rate": 9.99404761904762e-06, "loss": 26.9758, "step": 22697 }, { "epoch": 540.4298507462687, "grad_norm": 26.183879852294922, "learning_rate": 9.993622448979592e-06, "loss": 28.4125, "step": 22698 }, { "epoch": 540.4537313432836, "grad_norm": 37.08821105957031, "learning_rate": 9.993197278911566e-06, "loss": 27.4725, "step": 22699 }, { "epoch": 540.4776119402985, "grad_norm": 27.99712371826172, "learning_rate": 9.992772108843538e-06, "loss": 28.7566, "step": 22700 }, { "epoch": 540.5014925373134, "grad_norm": 27.783218383789062, "learning_rate": 9.992346938775511e-06, "loss": 28.5829, "step": 22701 }, { "epoch": 540.5253731343283, "grad_norm": 27.987735748291016, "learning_rate": 9.991921768707484e-06, "loss": 27.3247, "step": 22702 }, { "epoch": 540.5492537313432, "grad_norm": 25.560556411743164, "learning_rate": 9.991496598639456e-06, "loss": 27.6239, "step": 22703 }, { "epoch": 540.5731343283583, "grad_norm": 28.11143684387207, "learning_rate": 9.99107142857143e-06, "loss": 26.9488, "step": 22704 }, { "epoch": 540.5970149253732, "grad_norm": 23.124826431274414, "learning_rate": 9.990646258503403e-06, "loss": 27.2645, "step": 22705 }, { "epoch": 540.6208955223881, "grad_norm": 25.638011932373047, "learning_rate": 9.990221088435375e-06, "loss": 27.2043, "step": 22706 }, { "epoch": 540.644776119403, "grad_norm": 23.690834045410156, "learning_rate": 9.989795918367348e-06, "loss": 27.2182, "step": 22707 }, { "epoch": 540.6686567164179, "grad_norm": 22.456310272216797, "learning_rate": 9.98937074829932e-06, "loss": 27.4126, "step": 22708 }, { "epoch": 540.6925373134328, "grad_norm": 25.212392807006836, "learning_rate": 9.988945578231294e-06, "loss": 26.8198, "step": 22709 }, { "epoch": 540.7164179104477, "grad_norm": 24.31630516052246, "learning_rate": 9.988520408163265e-06, "loss": 26.5145, "step": 22710 }, { "epoch": 540.7402985074627, "grad_norm": 21.923768997192383, "learning_rate": 9.988095238095239e-06, "loss": 26.0583, "step": 22711 }, { "epoch": 540.7641791044776, "grad_norm": 26.101318359375, "learning_rate": 9.987670068027212e-06, "loss": 26.5161, "step": 22712 }, { "epoch": 540.7880597014926, "grad_norm": 24.043033599853516, "learning_rate": 9.987244897959184e-06, "loss": 27.4482, "step": 22713 }, { "epoch": 540.8119402985075, "grad_norm": 19.502344131469727, "learning_rate": 9.986819727891157e-06, "loss": 25.8609, "step": 22714 }, { "epoch": 540.8358208955224, "grad_norm": 24.339263916015625, "learning_rate": 9.98639455782313e-06, "loss": 28.2449, "step": 22715 }, { "epoch": 540.8597014925373, "grad_norm": 24.117612838745117, "learning_rate": 9.985969387755103e-06, "loss": 26.2548, "step": 22716 }, { "epoch": 540.8835820895522, "grad_norm": 26.459529876708984, "learning_rate": 9.985544217687076e-06, "loss": 27.4043, "step": 22717 }, { "epoch": 540.9074626865672, "grad_norm": 19.846118927001953, "learning_rate": 9.985119047619048e-06, "loss": 26.0427, "step": 22718 }, { "epoch": 540.9313432835821, "grad_norm": 22.514734268188477, "learning_rate": 9.984693877551021e-06, "loss": 27.3712, "step": 22719 }, { "epoch": 540.955223880597, "grad_norm": 22.52625274658203, "learning_rate": 9.984268707482993e-06, "loss": 27.9772, "step": 22720 }, { "epoch": 540.9791044776119, "grad_norm": NaN, "learning_rate": 9.983843537414966e-06, "loss": 48.5662, "step": 22721 }, { "epoch": 541.0, "grad_norm": 25.125782012939453, "learning_rate": 9.983843537414966e-06, "loss": 24.5208, "step": 22722 }, { "epoch": 541.0238805970149, "grad_norm": 24.450674057006836, "learning_rate": 9.98341836734694e-06, "loss": 28.1583, "step": 22723 }, { "epoch": 541.0477611940298, "grad_norm": 19.32257652282715, "learning_rate": 9.982993197278913e-06, "loss": 26.5865, "step": 22724 }, { "epoch": 541.0716417910447, "grad_norm": 24.770217895507812, "learning_rate": 9.982568027210885e-06, "loss": 26.561, "step": 22725 }, { "epoch": 541.0955223880597, "grad_norm": 26.881439208984375, "learning_rate": 9.982142857142858e-06, "loss": 27.1665, "step": 22726 }, { "epoch": 541.1194029850747, "grad_norm": 25.0325927734375, "learning_rate": 9.981717687074832e-06, "loss": 27.353, "step": 22727 }, { "epoch": 541.1432835820896, "grad_norm": 18.990942001342773, "learning_rate": 9.981292517006804e-06, "loss": 27.4299, "step": 22728 }, { "epoch": 541.1671641791045, "grad_norm": 23.905757904052734, "learning_rate": 9.980867346938775e-06, "loss": 27.0124, "step": 22729 }, { "epoch": 541.1910447761194, "grad_norm": 20.040929794311523, "learning_rate": 9.980442176870749e-06, "loss": 27.3936, "step": 22730 }, { "epoch": 541.2149253731343, "grad_norm": 23.186555862426758, "learning_rate": 9.980017006802722e-06, "loss": 27.6259, "step": 22731 }, { "epoch": 541.2388059701492, "grad_norm": 24.685590744018555, "learning_rate": 9.979591836734694e-06, "loss": 26.6181, "step": 22732 }, { "epoch": 541.2626865671642, "grad_norm": 23.01544761657715, "learning_rate": 9.979166666666668e-06, "loss": 27.0863, "step": 22733 }, { "epoch": 541.2865671641791, "grad_norm": 26.564455032348633, "learning_rate": 9.978741496598641e-06, "loss": 26.6202, "step": 22734 }, { "epoch": 541.310447761194, "grad_norm": 21.0655460357666, "learning_rate": 9.978316326530613e-06, "loss": 27.4406, "step": 22735 }, { "epoch": 541.334328358209, "grad_norm": 21.959962844848633, "learning_rate": 9.977891156462586e-06, "loss": 26.7175, "step": 22736 }, { "epoch": 541.3582089552239, "grad_norm": 25.380859375, "learning_rate": 9.97746598639456e-06, "loss": 26.0173, "step": 22737 }, { "epoch": 541.3820895522388, "grad_norm": 25.796411514282227, "learning_rate": 9.977040816326531e-06, "loss": 26.6719, "step": 22738 }, { "epoch": 541.4059701492537, "grad_norm": 20.734708786010742, "learning_rate": 9.976615646258503e-06, "loss": 27.7755, "step": 22739 }, { "epoch": 541.4298507462687, "grad_norm": 19.578428268432617, "learning_rate": 9.976190476190477e-06, "loss": 27.3375, "step": 22740 }, { "epoch": 541.4537313432836, "grad_norm": 21.736919403076172, "learning_rate": 9.97576530612245e-06, "loss": 26.2273, "step": 22741 }, { "epoch": 541.4776119402985, "grad_norm": 22.52037239074707, "learning_rate": 9.975340136054422e-06, "loss": 26.6833, "step": 22742 }, { "epoch": 541.5014925373134, "grad_norm": 23.83514976501465, "learning_rate": 9.974914965986395e-06, "loss": 26.2824, "step": 22743 }, { "epoch": 541.5253731343283, "grad_norm": 23.22620964050293, "learning_rate": 9.974489795918369e-06, "loss": 26.3339, "step": 22744 }, { "epoch": 541.5492537313432, "grad_norm": 21.371021270751953, "learning_rate": 9.97406462585034e-06, "loss": 27.6411, "step": 22745 }, { "epoch": 541.5731343283583, "grad_norm": 21.01276969909668, "learning_rate": 9.973639455782314e-06, "loss": 27.5918, "step": 22746 }, { "epoch": 541.5970149253732, "grad_norm": 21.13165855407715, "learning_rate": 9.973214285714287e-06, "loss": 26.4888, "step": 22747 }, { "epoch": 541.6208955223881, "grad_norm": 19.736989974975586, "learning_rate": 9.972789115646259e-06, "loss": 27.8526, "step": 22748 }, { "epoch": 541.644776119403, "grad_norm": 25.349960327148438, "learning_rate": 9.972363945578233e-06, "loss": 26.6681, "step": 22749 }, { "epoch": 541.6686567164179, "grad_norm": 24.233264923095703, "learning_rate": 9.971938775510204e-06, "loss": 27.1328, "step": 22750 }, { "epoch": 541.6925373134328, "grad_norm": 21.29230499267578, "learning_rate": 9.971513605442178e-06, "loss": 27.0152, "step": 22751 }, { "epoch": 541.7164179104477, "grad_norm": 23.79230499267578, "learning_rate": 9.97108843537415e-06, "loss": 26.8073, "step": 22752 }, { "epoch": 541.7402985074627, "grad_norm": 21.793010711669922, "learning_rate": 9.970663265306123e-06, "loss": 27.7934, "step": 22753 }, { "epoch": 541.7641791044776, "grad_norm": 18.090404510498047, "learning_rate": 9.970238095238096e-06, "loss": 26.9957, "step": 22754 }, { "epoch": 541.7880597014926, "grad_norm": 19.67119789123535, "learning_rate": 9.96981292517007e-06, "loss": 27.5326, "step": 22755 }, { "epoch": 541.8119402985075, "grad_norm": 19.79151153564453, "learning_rate": 9.969387755102042e-06, "loss": 26.8789, "step": 22756 }, { "epoch": 541.8358208955224, "grad_norm": 22.107955932617188, "learning_rate": 9.968962585034015e-06, "loss": 26.9784, "step": 22757 }, { "epoch": 541.8597014925373, "grad_norm": 19.081995010375977, "learning_rate": 9.968537414965987e-06, "loss": 27.5892, "step": 22758 }, { "epoch": 541.8835820895522, "grad_norm": 19.65016746520996, "learning_rate": 9.96811224489796e-06, "loss": 27.5598, "step": 22759 }, { "epoch": 541.9074626865672, "grad_norm": 20.423789978027344, "learning_rate": 9.967687074829932e-06, "loss": 27.1083, "step": 22760 }, { "epoch": 541.9313432835821, "grad_norm": 19.925212860107422, "learning_rate": 9.967261904761905e-06, "loss": 27.6419, "step": 22761 }, { "epoch": 541.955223880597, "grad_norm": 23.93865966796875, "learning_rate": 9.966836734693879e-06, "loss": 27.9276, "step": 22762 }, { "epoch": 541.9791044776119, "grad_norm": 19.79216194152832, "learning_rate": 9.96641156462585e-06, "loss": 27.5706, "step": 22763 }, { "epoch": 542.0, "grad_norm": 25.17494010925293, "learning_rate": 9.965986394557824e-06, "loss": 24.0682, "step": 22764 }, { "epoch": 542.0238805970149, "grad_norm": 27.356380462646484, "learning_rate": 9.965561224489798e-06, "loss": 27.2517, "step": 22765 }, { "epoch": 542.0477611940298, "grad_norm": 21.392099380493164, "learning_rate": 9.96513605442177e-06, "loss": 27.0518, "step": 22766 }, { "epoch": 542.0716417910447, "grad_norm": 26.933889389038086, "learning_rate": 9.964710884353743e-06, "loss": 26.0325, "step": 22767 }, { "epoch": 542.0955223880597, "grad_norm": 28.394826889038086, "learning_rate": 9.964285714285714e-06, "loss": 27.0857, "step": 22768 }, { "epoch": 542.1194029850747, "grad_norm": 23.515317916870117, "learning_rate": 9.963860544217688e-06, "loss": 26.5305, "step": 22769 }, { "epoch": 542.1432835820896, "grad_norm": 32.52069854736328, "learning_rate": 9.96343537414966e-06, "loss": 27.2113, "step": 22770 }, { "epoch": 542.1671641791045, "grad_norm": 30.387094497680664, "learning_rate": 9.963010204081633e-06, "loss": 27.8561, "step": 22771 }, { "epoch": 542.1910447761194, "grad_norm": 25.44467544555664, "learning_rate": 9.962585034013607e-06, "loss": 26.5901, "step": 22772 }, { "epoch": 542.2149253731343, "grad_norm": 37.75099182128906, "learning_rate": 9.962159863945578e-06, "loss": 26.3673, "step": 22773 }, { "epoch": 542.2388059701492, "grad_norm": 22.91534423828125, "learning_rate": 9.961734693877552e-06, "loss": 27.7594, "step": 22774 }, { "epoch": 542.2626865671642, "grad_norm": 33.31121063232422, "learning_rate": 9.961309523809525e-06, "loss": 27.4553, "step": 22775 }, { "epoch": 542.2865671641791, "grad_norm": 27.672893524169922, "learning_rate": 9.960884353741499e-06, "loss": 26.5818, "step": 22776 }, { "epoch": 542.310447761194, "grad_norm": 21.980356216430664, "learning_rate": 9.96045918367347e-06, "loss": 27.0741, "step": 22777 }, { "epoch": 542.334328358209, "grad_norm": 38.786895751953125, "learning_rate": 9.960034013605442e-06, "loss": 27.4406, "step": 22778 }, { "epoch": 542.3582089552239, "grad_norm": 22.32477569580078, "learning_rate": 9.959608843537416e-06, "loss": 27.1527, "step": 22779 }, { "epoch": 542.3820895522388, "grad_norm": 32.35005569458008, "learning_rate": 9.959183673469387e-06, "loss": 27.2271, "step": 22780 }, { "epoch": 542.4059701492537, "grad_norm": 30.014068603515625, "learning_rate": 9.95875850340136e-06, "loss": 27.0938, "step": 22781 }, { "epoch": 542.4298507462687, "grad_norm": 20.474782943725586, "learning_rate": 9.958333333333334e-06, "loss": 26.269, "step": 22782 }, { "epoch": 542.4537313432836, "grad_norm": 34.0982551574707, "learning_rate": 9.957908163265308e-06, "loss": 27.5543, "step": 22783 }, { "epoch": 542.4776119402985, "grad_norm": 24.589208602905273, "learning_rate": 9.95748299319728e-06, "loss": 27.2922, "step": 22784 }, { "epoch": 542.5014925373134, "grad_norm": 32.52626419067383, "learning_rate": 9.957057823129253e-06, "loss": 28.0229, "step": 22785 }, { "epoch": 542.5253731343283, "grad_norm": 27.70754051208496, "learning_rate": 9.956632653061226e-06, "loss": 27.29, "step": 22786 }, { "epoch": 542.5492537313432, "grad_norm": NaN, "learning_rate": 9.956207482993198e-06, "loss": 49.4222, "step": 22787 }, { "epoch": 542.5731343283583, "grad_norm": 24.435718536376953, "learning_rate": 9.956207482993198e-06, "loss": 26.875, "step": 22788 }, { "epoch": 542.5970149253732, "grad_norm": 27.114608764648438, "learning_rate": 9.955782312925172e-06, "loss": 27.0622, "step": 22789 }, { "epoch": 542.6208955223881, "grad_norm": 28.450515747070312, "learning_rate": 9.955357142857143e-06, "loss": 26.7376, "step": 22790 }, { "epoch": 542.644776119403, "grad_norm": 22.1857967376709, "learning_rate": 9.954931972789117e-06, "loss": 27.0837, "step": 22791 }, { "epoch": 542.6686567164179, "grad_norm": 31.692537307739258, "learning_rate": 9.954506802721089e-06, "loss": 26.8949, "step": 22792 }, { "epoch": 542.6925373134328, "grad_norm": 26.306516647338867, "learning_rate": 9.954081632653062e-06, "loss": 26.6994, "step": 22793 }, { "epoch": 542.7164179104477, "grad_norm": 24.12993621826172, "learning_rate": 9.953656462585035e-06, "loss": 26.4758, "step": 22794 }, { "epoch": 542.7402985074627, "grad_norm": 29.833219528198242, "learning_rate": 9.953231292517007e-06, "loss": 28.2991, "step": 22795 }, { "epoch": 542.7641791044776, "grad_norm": 22.763425827026367, "learning_rate": 9.95280612244898e-06, "loss": 26.6989, "step": 22796 }, { "epoch": 542.7880597014926, "grad_norm": 25.00774574279785, "learning_rate": 9.952380952380954e-06, "loss": 26.4783, "step": 22797 }, { "epoch": 542.8119402985075, "grad_norm": 29.201438903808594, "learning_rate": 9.951955782312926e-06, "loss": 26.9168, "step": 22798 }, { "epoch": 542.8358208955224, "grad_norm": 23.200525283813477, "learning_rate": 9.9515306122449e-06, "loss": 27.3159, "step": 22799 }, { "epoch": 542.8597014925373, "grad_norm": 20.0549373626709, "learning_rate": 9.951105442176871e-06, "loss": 26.5675, "step": 22800 }, { "epoch": 542.8835820895522, "grad_norm": 28.697433471679688, "learning_rate": 9.950680272108844e-06, "loss": 26.675, "step": 22801 }, { "epoch": 542.9074626865672, "grad_norm": 23.093639373779297, "learning_rate": 9.950255102040816e-06, "loss": 27.4019, "step": 22802 }, { "epoch": 542.9313432835821, "grad_norm": 20.948200225830078, "learning_rate": 9.94982993197279e-06, "loss": 27.0637, "step": 22803 }, { "epoch": 542.955223880597, "grad_norm": 31.880043029785156, "learning_rate": 9.949404761904763e-06, "loss": 27.2959, "step": 22804 }, { "epoch": 542.9791044776119, "grad_norm": 23.49485206604004, "learning_rate": 9.948979591836737e-06, "loss": 26.7427, "step": 22805 }, { "epoch": 543.0, "grad_norm": 19.74584197998047, "learning_rate": 9.948554421768708e-06, "loss": 24.0891, "step": 22806 }, { "epoch": 543.0238805970149, "grad_norm": 29.813852310180664, "learning_rate": 9.948129251700682e-06, "loss": 27.7747, "step": 22807 }, { "epoch": 543.0477611940298, "grad_norm": 20.328022003173828, "learning_rate": 9.947704081632654e-06, "loss": 26.08, "step": 22808 }, { "epoch": 543.0716417910447, "grad_norm": 24.439958572387695, "learning_rate": 9.947278911564627e-06, "loss": 28.1726, "step": 22809 }, { "epoch": 543.0955223880597, "grad_norm": 31.247055053710938, "learning_rate": 9.946853741496599e-06, "loss": 25.6225, "step": 22810 }, { "epoch": 543.1194029850747, "grad_norm": 19.17137336730957, "learning_rate": 9.946428571428572e-06, "loss": 27.2878, "step": 22811 }, { "epoch": 543.1432835820896, "grad_norm": 24.40829086303711, "learning_rate": 9.946003401360546e-06, "loss": 26.0402, "step": 22812 }, { "epoch": 543.1671641791045, "grad_norm": 32.29423141479492, "learning_rate": 9.945578231292517e-06, "loss": 27.5756, "step": 22813 }, { "epoch": 543.1910447761194, "grad_norm": 20.371301651000977, "learning_rate": 9.94515306122449e-06, "loss": 26.8344, "step": 22814 }, { "epoch": 543.2149253731343, "grad_norm": 25.279232025146484, "learning_rate": 9.944727891156464e-06, "loss": 27.4465, "step": 22815 }, { "epoch": 543.2388059701492, "grad_norm": 26.93621253967285, "learning_rate": 9.944302721088436e-06, "loss": 27.9931, "step": 22816 }, { "epoch": 543.2626865671642, "grad_norm": 21.020410537719727, "learning_rate": 9.94387755102041e-06, "loss": 27.1776, "step": 22817 }, { "epoch": 543.2865671641791, "grad_norm": 20.8376407623291, "learning_rate": 9.943452380952381e-06, "loss": 28.066, "step": 22818 }, { "epoch": 543.310447761194, "grad_norm": 26.094697952270508, "learning_rate": 9.943027210884355e-06, "loss": 26.9795, "step": 22819 }, { "epoch": 543.334328358209, "grad_norm": 27.72844696044922, "learning_rate": 9.942602040816326e-06, "loss": 26.7826, "step": 22820 }, { "epoch": 543.3582089552239, "grad_norm": 21.371004104614258, "learning_rate": 9.9421768707483e-06, "loss": 26.4618, "step": 22821 }, { "epoch": 543.3820895522388, "grad_norm": 20.45743751525879, "learning_rate": 9.941751700680273e-06, "loss": 26.2355, "step": 22822 }, { "epoch": 543.4059701492537, "grad_norm": 23.288448333740234, "learning_rate": 9.941326530612245e-06, "loss": 27.8943, "step": 22823 }, { "epoch": 543.4298507462687, "grad_norm": 19.347368240356445, "learning_rate": 9.940901360544218e-06, "loss": 27.2505, "step": 22824 }, { "epoch": 543.4537313432836, "grad_norm": 25.948772430419922, "learning_rate": 9.940476190476192e-06, "loss": 27.4245, "step": 22825 }, { "epoch": 543.4776119402985, "grad_norm": 30.79142189025879, "learning_rate": 9.940051020408165e-06, "loss": 27.1637, "step": 22826 }, { "epoch": 543.5014925373134, "grad_norm": 20.749937057495117, "learning_rate": 9.939625850340137e-06, "loss": 26.2973, "step": 22827 }, { "epoch": 543.5253731343283, "grad_norm": 21.160030364990234, "learning_rate": 9.939200680272109e-06, "loss": 26.6825, "step": 22828 }, { "epoch": 543.5492537313432, "grad_norm": 25.807395935058594, "learning_rate": 9.938775510204082e-06, "loss": 26.8681, "step": 22829 }, { "epoch": 543.5731343283583, "grad_norm": 24.197248458862305, "learning_rate": 9.938350340136054e-06, "loss": 26.8464, "step": 22830 }, { "epoch": 543.5970149253732, "grad_norm": 20.574190139770508, "learning_rate": 9.937925170068028e-06, "loss": 27.4497, "step": 22831 }, { "epoch": 543.6208955223881, "grad_norm": 21.036195755004883, "learning_rate": 9.937500000000001e-06, "loss": 25.6156, "step": 22832 }, { "epoch": 543.644776119403, "grad_norm": 32.04380416870117, "learning_rate": 9.937074829931974e-06, "loss": 28.2363, "step": 22833 }, { "epoch": 543.6686567164179, "grad_norm": 24.90824317932129, "learning_rate": 9.936649659863946e-06, "loss": 27.6668, "step": 22834 }, { "epoch": 543.6925373134328, "grad_norm": 21.2226619720459, "learning_rate": 9.93622448979592e-06, "loss": 26.3171, "step": 22835 }, { "epoch": 543.7164179104477, "grad_norm": 27.054590225219727, "learning_rate": 9.935799319727893e-06, "loss": 27.178, "step": 22836 }, { "epoch": 543.7402985074627, "grad_norm": 27.703702926635742, "learning_rate": 9.935374149659865e-06, "loss": 27.3607, "step": 22837 }, { "epoch": 543.7641791044776, "grad_norm": 19.671226501464844, "learning_rate": 9.934948979591838e-06, "loss": 26.8281, "step": 22838 }, { "epoch": 543.7880597014926, "grad_norm": 24.26233673095703, "learning_rate": 9.93452380952381e-06, "loss": 26.0703, "step": 22839 }, { "epoch": 543.8119402985075, "grad_norm": 21.844532012939453, "learning_rate": 9.934098639455783e-06, "loss": 28.0753, "step": 22840 }, { "epoch": 543.8358208955224, "grad_norm": 23.960371017456055, "learning_rate": 9.933673469387755e-06, "loss": 27.1468, "step": 22841 }, { "epoch": 543.8597014925373, "grad_norm": 26.376514434814453, "learning_rate": 9.933248299319729e-06, "loss": 27.7569, "step": 22842 }, { "epoch": 543.8835820895522, "grad_norm": 22.535493850708008, "learning_rate": 9.932823129251702e-06, "loss": 27.5041, "step": 22843 }, { "epoch": 543.9074626865672, "grad_norm": 21.89960479736328, "learning_rate": 9.932397959183674e-06, "loss": 27.0763, "step": 22844 }, { "epoch": 543.9313432835821, "grad_norm": 22.617650985717773, "learning_rate": 9.931972789115647e-06, "loss": 26.2527, "step": 22845 }, { "epoch": 543.955223880597, "grad_norm": 21.930749893188477, "learning_rate": 9.93154761904762e-06, "loss": 26.9686, "step": 22846 }, { "epoch": 543.9791044776119, "grad_norm": 25.937076568603516, "learning_rate": 9.931122448979593e-06, "loss": 26.6181, "step": 22847 }, { "epoch": 544.0, "grad_norm": 20.92900276184082, "learning_rate": 9.930697278911566e-06, "loss": 23.1947, "step": 22848 }, { "epoch": 544.0238805970149, "grad_norm": 23.782766342163086, "learning_rate": 9.930272108843538e-06, "loss": 26.7543, "step": 22849 }, { "epoch": 544.0477611940298, "grad_norm": 23.792335510253906, "learning_rate": 9.929846938775511e-06, "loss": 26.9741, "step": 22850 }, { "epoch": 544.0716417910447, "grad_norm": 25.923891067504883, "learning_rate": 9.929421768707483e-06, "loss": 27.4857, "step": 22851 }, { "epoch": 544.0955223880597, "grad_norm": 25.234193801879883, "learning_rate": 9.928996598639456e-06, "loss": 26.6338, "step": 22852 }, { "epoch": 544.1194029850747, "grad_norm": 24.298236846923828, "learning_rate": 9.92857142857143e-06, "loss": 26.6468, "step": 22853 }, { "epoch": 544.1432835820896, "grad_norm": 19.29990577697754, "learning_rate": 9.928146258503402e-06, "loss": 27.7053, "step": 22854 }, { "epoch": 544.1671641791045, "grad_norm": 18.186203002929688, "learning_rate": 9.927721088435375e-06, "loss": 26.9752, "step": 22855 }, { "epoch": 544.1910447761194, "grad_norm": 24.411212921142578, "learning_rate": 9.927295918367348e-06, "loss": 27.0923, "step": 22856 }, { "epoch": 544.2149253731343, "grad_norm": 22.239341735839844, "learning_rate": 9.92687074829932e-06, "loss": 26.7683, "step": 22857 }, { "epoch": 544.2388059701492, "grad_norm": 22.09562110900879, "learning_rate": 9.926445578231294e-06, "loss": 26.7298, "step": 22858 }, { "epoch": 544.2626865671642, "grad_norm": 24.148544311523438, "learning_rate": 9.926020408163265e-06, "loss": 26.1523, "step": 22859 }, { "epoch": 544.2865671641791, "grad_norm": 22.188982009887695, "learning_rate": 9.925595238095239e-06, "loss": 27.6528, "step": 22860 }, { "epoch": 544.310447761194, "grad_norm": 21.599462509155273, "learning_rate": 9.92517006802721e-06, "loss": 27.4218, "step": 22861 }, { "epoch": 544.334328358209, "grad_norm": 26.31645393371582, "learning_rate": 9.924744897959184e-06, "loss": 26.3834, "step": 22862 }, { "epoch": 544.3582089552239, "grad_norm": 22.917715072631836, "learning_rate": 9.924319727891158e-06, "loss": 27.3521, "step": 22863 }, { "epoch": 544.3820895522388, "grad_norm": 34.408843994140625, "learning_rate": 9.923894557823131e-06, "loss": 25.7894, "step": 22864 }, { "epoch": 544.4059701492537, "grad_norm": 24.03213119506836, "learning_rate": 9.923469387755103e-06, "loss": 28.1165, "step": 22865 }, { "epoch": 544.4298507462687, "grad_norm": 31.83980369567871, "learning_rate": 9.923044217687076e-06, "loss": 26.7954, "step": 22866 }, { "epoch": 544.4537313432836, "grad_norm": 28.52781105041504, "learning_rate": 9.922619047619048e-06, "loss": 26.7336, "step": 22867 }, { "epoch": 544.4776119402985, "grad_norm": 23.83939552307129, "learning_rate": 9.922193877551021e-06, "loss": 27.3861, "step": 22868 }, { "epoch": 544.5014925373134, "grad_norm": 34.466705322265625, "learning_rate": 9.921768707482993e-06, "loss": 27.4932, "step": 22869 }, { "epoch": 544.5253731343283, "grad_norm": 26.89195442199707, "learning_rate": 9.921343537414967e-06, "loss": 26.6149, "step": 22870 }, { "epoch": 544.5492537313432, "grad_norm": 29.820484161376953, "learning_rate": 9.92091836734694e-06, "loss": 27.1781, "step": 22871 }, { "epoch": 544.5731343283583, "grad_norm": 25.28359031677246, "learning_rate": 9.920493197278912e-06, "loss": 26.2896, "step": 22872 }, { "epoch": 544.5970149253732, "grad_norm": 31.34544563293457, "learning_rate": 9.920068027210885e-06, "loss": 27.2025, "step": 22873 }, { "epoch": 544.6208955223881, "grad_norm": 24.093542098999023, "learning_rate": 9.919642857142859e-06, "loss": 27.0092, "step": 22874 }, { "epoch": 544.644776119403, "grad_norm": 40.34746551513672, "learning_rate": 9.91921768707483e-06, "loss": 27.546, "step": 22875 }, { "epoch": 544.6686567164179, "grad_norm": 26.101947784423828, "learning_rate": 9.918792517006804e-06, "loss": 26.9976, "step": 22876 }, { "epoch": 544.6925373134328, "grad_norm": 44.020179748535156, "learning_rate": 9.918367346938776e-06, "loss": 27.8036, "step": 22877 }, { "epoch": 544.7164179104477, "grad_norm": 33.598472595214844, "learning_rate": 9.917942176870749e-06, "loss": 27.7683, "step": 22878 }, { "epoch": 544.7402985074627, "grad_norm": 32.95793533325195, "learning_rate": 9.91751700680272e-06, "loss": 27.5901, "step": 22879 }, { "epoch": 544.7641791044776, "grad_norm": 29.60683250427246, "learning_rate": 9.917091836734694e-06, "loss": 28.1837, "step": 22880 }, { "epoch": 544.7880597014926, "grad_norm": 28.104406356811523, "learning_rate": 9.916666666666668e-06, "loss": 26.1862, "step": 22881 }, { "epoch": 544.8119402985075, "grad_norm": 25.75876808166504, "learning_rate": 9.91624149659864e-06, "loss": 25.6949, "step": 22882 }, { "epoch": 544.8358208955224, "grad_norm": 30.336139678955078, "learning_rate": 9.915816326530613e-06, "loss": 27.6387, "step": 22883 }, { "epoch": 544.8597014925373, "grad_norm": 21.78815460205078, "learning_rate": 9.915391156462586e-06, "loss": 27.0617, "step": 22884 }, { "epoch": 544.8835820895522, "grad_norm": 32.5551643371582, "learning_rate": 9.91496598639456e-06, "loss": 27.2003, "step": 22885 }, { "epoch": 544.9074626865672, "grad_norm": 22.994651794433594, "learning_rate": 9.914540816326532e-06, "loss": 26.2537, "step": 22886 }, { "epoch": 544.9313432835821, "grad_norm": 35.47404861450195, "learning_rate": 9.914115646258505e-06, "loss": 26.5937, "step": 22887 }, { "epoch": 544.955223880597, "grad_norm": 28.5518741607666, "learning_rate": 9.913690476190477e-06, "loss": 26.7997, "step": 22888 }, { "epoch": 544.9791044776119, "grad_norm": 32.366355895996094, "learning_rate": 9.913265306122449e-06, "loss": 26.9707, "step": 22889 }, { "epoch": 545.0, "grad_norm": 25.925806045532227, "learning_rate": 9.912840136054422e-06, "loss": 23.8925, "step": 22890 }, { "epoch": 545.0238805970149, "grad_norm": 30.878803253173828, "learning_rate": 9.912414965986395e-06, "loss": 27.3068, "step": 22891 }, { "epoch": 545.0477611940298, "grad_norm": 27.74859619140625, "learning_rate": 9.911989795918369e-06, "loss": 26.9728, "step": 22892 }, { "epoch": 545.0716417910447, "grad_norm": 32.78179931640625, "learning_rate": 9.91156462585034e-06, "loss": 26.4226, "step": 22893 }, { "epoch": 545.0955223880597, "grad_norm": 30.287303924560547, "learning_rate": 9.911139455782314e-06, "loss": 26.7221, "step": 22894 }, { "epoch": 545.1194029850747, "grad_norm": 27.594554901123047, "learning_rate": 9.910714285714288e-06, "loss": 27.3353, "step": 22895 }, { "epoch": 545.1432835820896, "grad_norm": 23.96770668029785, "learning_rate": 9.91028911564626e-06, "loss": 27.6987, "step": 22896 }, { "epoch": 545.1671641791045, "grad_norm": 29.76472282409668, "learning_rate": 9.909863945578233e-06, "loss": 27.0108, "step": 22897 }, { "epoch": 545.1910447761194, "grad_norm": 25.704824447631836, "learning_rate": 9.909438775510204e-06, "loss": 28.3261, "step": 22898 }, { "epoch": 545.2149253731343, "grad_norm": 26.29448127746582, "learning_rate": 9.909013605442178e-06, "loss": 27.8064, "step": 22899 }, { "epoch": 545.2388059701492, "grad_norm": 26.080604553222656, "learning_rate": 9.90858843537415e-06, "loss": 25.1543, "step": 22900 }, { "epoch": 545.2626865671642, "grad_norm": 28.90460777282715, "learning_rate": 9.908163265306123e-06, "loss": 26.8993, "step": 22901 }, { "epoch": 545.2865671641791, "grad_norm": 21.533506393432617, "learning_rate": 9.907738095238097e-06, "loss": 26.3536, "step": 22902 }, { "epoch": 545.310447761194, "grad_norm": 24.566164016723633, "learning_rate": 9.907312925170068e-06, "loss": 27.6103, "step": 22903 }, { "epoch": 545.334328358209, "grad_norm": 26.952571868896484, "learning_rate": 9.906887755102042e-06, "loss": 26.8068, "step": 22904 }, { "epoch": 545.3582089552239, "grad_norm": 23.175312042236328, "learning_rate": 9.906462585034015e-06, "loss": 26.5865, "step": 22905 }, { "epoch": 545.3820895522388, "grad_norm": 20.01838493347168, "learning_rate": 9.906037414965987e-06, "loss": 26.8108, "step": 22906 }, { "epoch": 545.4059701492537, "grad_norm": 25.91183853149414, "learning_rate": 9.90561224489796e-06, "loss": 26.4145, "step": 22907 }, { "epoch": 545.4298507462687, "grad_norm": 25.95762062072754, "learning_rate": 9.905187074829932e-06, "loss": 26.4995, "step": 22908 }, { "epoch": 545.4537313432836, "grad_norm": 25.254344940185547, "learning_rate": 9.904761904761906e-06, "loss": 27.4917, "step": 22909 }, { "epoch": 545.4776119402985, "grad_norm": 21.832822799682617, "learning_rate": 9.904336734693877e-06, "loss": 28.2885, "step": 22910 }, { "epoch": 545.5014925373134, "grad_norm": 27.315887451171875, "learning_rate": 9.90391156462585e-06, "loss": 26.5748, "step": 22911 }, { "epoch": 545.5253731343283, "grad_norm": 28.406246185302734, "learning_rate": 9.903486394557824e-06, "loss": 26.6118, "step": 22912 }, { "epoch": 545.5492537313432, "grad_norm": 22.083415985107422, "learning_rate": 9.903061224489798e-06, "loss": 26.9666, "step": 22913 }, { "epoch": 545.5731343283583, "grad_norm": 22.240201950073242, "learning_rate": 9.90263605442177e-06, "loss": 26.1196, "step": 22914 }, { "epoch": 545.5970149253732, "grad_norm": 26.09334373474121, "learning_rate": 9.902210884353743e-06, "loss": 26.9747, "step": 22915 }, { "epoch": 545.6208955223881, "grad_norm": 30.436017990112305, "learning_rate": 9.901785714285715e-06, "loss": 26.4404, "step": 22916 }, { "epoch": 545.644776119403, "grad_norm": 21.417753219604492, "learning_rate": 9.901360544217688e-06, "loss": 26.9746, "step": 22917 }, { "epoch": 545.6686567164179, "grad_norm": 26.30109214782715, "learning_rate": 9.90093537414966e-06, "loss": 26.9216, "step": 22918 }, { "epoch": 545.6925373134328, "grad_norm": 28.73200035095215, "learning_rate": 9.900510204081633e-06, "loss": 27.8554, "step": 22919 }, { "epoch": 545.7164179104477, "grad_norm": 23.190383911132812, "learning_rate": 9.900085034013607e-06, "loss": 26.9586, "step": 22920 }, { "epoch": 545.7402985074627, "grad_norm": 21.123790740966797, "learning_rate": 9.899659863945579e-06, "loss": 27.1928, "step": 22921 }, { "epoch": 545.7641791044776, "grad_norm": 36.54177474975586, "learning_rate": 9.899234693877552e-06, "loss": 26.6876, "step": 22922 }, { "epoch": 545.7880597014926, "grad_norm": 24.1467227935791, "learning_rate": 9.898809523809525e-06, "loss": 27.3869, "step": 22923 }, { "epoch": 545.8119402985075, "grad_norm": 25.17458152770996, "learning_rate": 9.898384353741497e-06, "loss": 25.8412, "step": 22924 }, { "epoch": 545.8358208955224, "grad_norm": 32.45463180541992, "learning_rate": 9.89795918367347e-06, "loss": 26.8202, "step": 22925 }, { "epoch": 545.8597014925373, "grad_norm": 21.83772087097168, "learning_rate": 9.897534013605442e-06, "loss": 26.5969, "step": 22926 }, { "epoch": 545.8835820895522, "grad_norm": 23.75897979736328, "learning_rate": 9.897108843537416e-06, "loss": 27.4846, "step": 22927 }, { "epoch": 545.9074626865672, "grad_norm": 34.18503189086914, "learning_rate": 9.896683673469388e-06, "loss": 27.739, "step": 22928 }, { "epoch": 545.9313432835821, "grad_norm": 22.01070785522461, "learning_rate": 9.896258503401361e-06, "loss": 27.5625, "step": 22929 }, { "epoch": 545.955223880597, "grad_norm": 30.15606689453125, "learning_rate": 9.895833333333334e-06, "loss": 26.8057, "step": 22930 }, { "epoch": 545.9791044776119, "grad_norm": 29.840742111206055, "learning_rate": 9.895408163265306e-06, "loss": 26.2582, "step": 22931 }, { "epoch": 546.0, "grad_norm": 19.571582794189453, "learning_rate": 9.89498299319728e-06, "loss": 24.1288, "step": 22932 }, { "epoch": 546.0238805970149, "grad_norm": 35.25588607788086, "learning_rate": 9.894557823129253e-06, "loss": 28.5994, "step": 22933 }, { "epoch": 546.0477611940298, "grad_norm": 27.55889892578125, "learning_rate": 9.894132653061227e-06, "loss": 27.1107, "step": 22934 }, { "epoch": 546.0716417910447, "grad_norm": NaN, "learning_rate": 9.893707482993198e-06, "loss": 33.5302, "step": 22935 }, { "epoch": 546.0955223880597, "grad_norm": 21.316408157348633, "learning_rate": 9.893707482993198e-06, "loss": 26.4359, "step": 22936 }, { "epoch": 546.1194029850747, "grad_norm": 39.2155876159668, "learning_rate": 9.893282312925172e-06, "loss": 27.5005, "step": 22937 }, { "epoch": 546.1432835820896, "grad_norm": 24.481470108032227, "learning_rate": 9.892857142857143e-06, "loss": 25.9999, "step": 22938 }, { "epoch": 546.1671641791045, "grad_norm": 32.56854248046875, "learning_rate": 9.892431972789115e-06, "loss": 27.8324, "step": 22939 }, { "epoch": 546.1910447761194, "grad_norm": 27.094512939453125, "learning_rate": 9.892006802721089e-06, "loss": 26.95, "step": 22940 }, { "epoch": 546.2149253731343, "grad_norm": 21.205799102783203, "learning_rate": 9.891581632653062e-06, "loss": 27.2797, "step": 22941 }, { "epoch": 546.2388059701492, "grad_norm": 28.589385986328125, "learning_rate": 9.891156462585036e-06, "loss": 27.3804, "step": 22942 }, { "epoch": 546.2626865671642, "grad_norm": 30.168182373046875, "learning_rate": 9.890731292517007e-06, "loss": 27.6092, "step": 22943 }, { "epoch": 546.2865671641791, "grad_norm": 22.11419677734375, "learning_rate": 9.89030612244898e-06, "loss": 27.2992, "step": 22944 }, { "epoch": 546.310447761194, "grad_norm": 33.05706024169922, "learning_rate": 9.889880952380954e-06, "loss": 27.4671, "step": 22945 }, { "epoch": 546.334328358209, "grad_norm": 24.594585418701172, "learning_rate": 9.889455782312926e-06, "loss": 26.8509, "step": 22946 }, { "epoch": 546.3582089552239, "grad_norm": 24.41642189025879, "learning_rate": 9.8890306122449e-06, "loss": 27.5157, "step": 22947 }, { "epoch": 546.3820895522388, "grad_norm": 31.40077018737793, "learning_rate": 9.888605442176871e-06, "loss": 26.829, "step": 22948 }, { "epoch": 546.4059701492537, "grad_norm": 49.25104904174805, "learning_rate": 9.888180272108845e-06, "loss": 26.4477, "step": 22949 }, { "epoch": 546.4298507462687, "grad_norm": 31.808303833007812, "learning_rate": 9.887755102040816e-06, "loss": 26.7749, "step": 22950 }, { "epoch": 546.4537313432836, "grad_norm": 25.96924591064453, "learning_rate": 9.88732993197279e-06, "loss": 26.4634, "step": 22951 }, { "epoch": 546.4776119402985, "grad_norm": 19.45685386657715, "learning_rate": 9.886904761904763e-06, "loss": 26.3484, "step": 22952 }, { "epoch": 546.5014925373134, "grad_norm": 25.633058547973633, "learning_rate": 9.886479591836735e-06, "loss": 26.3388, "step": 22953 }, { "epoch": 546.5253731343283, "grad_norm": 29.455699920654297, "learning_rate": 9.886054421768708e-06, "loss": 26.4783, "step": 22954 }, { "epoch": 546.5492537313432, "grad_norm": 20.580322265625, "learning_rate": 9.885629251700682e-06, "loss": 27.0392, "step": 22955 }, { "epoch": 546.5731343283583, "grad_norm": 32.9859733581543, "learning_rate": 9.885204081632654e-06, "loss": 27.6835, "step": 22956 }, { "epoch": 546.5970149253732, "grad_norm": 27.880577087402344, "learning_rate": 9.884778911564627e-06, "loss": 26.3718, "step": 22957 }, { "epoch": 546.6208955223881, "grad_norm": 22.280309677124023, "learning_rate": 9.884353741496599e-06, "loss": 26.6637, "step": 22958 }, { "epoch": 546.644776119403, "grad_norm": 24.177730560302734, "learning_rate": 9.883928571428572e-06, "loss": 26.885, "step": 22959 }, { "epoch": 546.6686567164179, "grad_norm": 28.973617553710938, "learning_rate": 9.883503401360544e-06, "loss": 26.7135, "step": 22960 }, { "epoch": 546.6925373134328, "grad_norm": 19.81191635131836, "learning_rate": 9.883078231292518e-06, "loss": 26.5165, "step": 22961 }, { "epoch": 546.7164179104477, "grad_norm": 25.507354736328125, "learning_rate": 9.882653061224491e-06, "loss": 27.8573, "step": 22962 }, { "epoch": 546.7402985074627, "grad_norm": 27.796030044555664, "learning_rate": 9.882227891156463e-06, "loss": 26.885, "step": 22963 }, { "epoch": 546.7641791044776, "grad_norm": 21.52277374267578, "learning_rate": 9.881802721088436e-06, "loss": 27.0566, "step": 22964 }, { "epoch": 546.7880597014926, "grad_norm": 23.451866149902344, "learning_rate": 9.88137755102041e-06, "loss": 26.836, "step": 22965 }, { "epoch": 546.8119402985075, "grad_norm": NaN, "learning_rate": 9.880952380952381e-06, "loss": 26.2176, "step": 22966 }, { "epoch": 546.8358208955224, "grad_norm": 22.969980239868164, "learning_rate": 9.880952380952381e-06, "loss": 26.5486, "step": 22967 }, { "epoch": 546.8597014925373, "grad_norm": 21.908382415771484, "learning_rate": 9.880527210884355e-06, "loss": 26.6911, "step": 22968 }, { "epoch": 546.8835820895522, "grad_norm": 21.892475128173828, "learning_rate": 9.880102040816327e-06, "loss": 26.7509, "step": 22969 }, { "epoch": 546.9074626865672, "grad_norm": 22.169540405273438, "learning_rate": 9.8796768707483e-06, "loss": 26.0875, "step": 22970 }, { "epoch": 546.9313432835821, "grad_norm": 25.34871482849121, "learning_rate": 9.879251700680272e-06, "loss": 26.8499, "step": 22971 }, { "epoch": 546.955223880597, "grad_norm": 21.703510284423828, "learning_rate": 9.878826530612245e-06, "loss": 27.4347, "step": 22972 }, { "epoch": 546.9791044776119, "grad_norm": 28.596725463867188, "learning_rate": 9.878401360544219e-06, "loss": 26.8683, "step": 22973 }, { "epoch": 547.0, "grad_norm": 22.343963623046875, "learning_rate": 9.877976190476192e-06, "loss": 23.5175, "step": 22974 }, { "epoch": 547.0238805970149, "grad_norm": 25.880977630615234, "learning_rate": 9.877551020408164e-06, "loss": 26.893, "step": 22975 }, { "epoch": 547.0477611940298, "grad_norm": 21.374263763427734, "learning_rate": 9.877125850340137e-06, "loss": 26.9489, "step": 22976 }, { "epoch": 547.0716417910447, "grad_norm": 23.32733154296875, "learning_rate": 9.87670068027211e-06, "loss": 27.3502, "step": 22977 }, { "epoch": 547.0955223880597, "grad_norm": 21.9045467376709, "learning_rate": 9.876275510204083e-06, "loss": 27.7478, "step": 22978 }, { "epoch": 547.1194029850747, "grad_norm": 23.278564453125, "learning_rate": 9.875850340136054e-06, "loss": 26.1408, "step": 22979 }, { "epoch": 547.1432835820896, "grad_norm": 25.30042266845703, "learning_rate": 9.875425170068028e-06, "loss": 26.8451, "step": 22980 }, { "epoch": 547.1671641791045, "grad_norm": 24.40042495727539, "learning_rate": 9.875000000000001e-06, "loss": 27.0016, "step": 22981 }, { "epoch": 547.1910447761194, "grad_norm": 22.102861404418945, "learning_rate": 9.874574829931973e-06, "loss": 26.3905, "step": 22982 }, { "epoch": 547.2149253731343, "grad_norm": 21.65555763244629, "learning_rate": 9.874149659863946e-06, "loss": 27.4021, "step": 22983 }, { "epoch": 547.2388059701492, "grad_norm": 20.80206871032715, "learning_rate": 9.87372448979592e-06, "loss": 26.1273, "step": 22984 }, { "epoch": 547.2626865671642, "grad_norm": NaN, "learning_rate": 9.873299319727892e-06, "loss": 42.3968, "step": 22985 }, { "epoch": 547.2865671641791, "grad_norm": 21.585899353027344, "learning_rate": 9.873299319727892e-06, "loss": 27.2362, "step": 22986 }, { "epoch": 547.310447761194, "grad_norm": 21.757436752319336, "learning_rate": 9.872874149659865e-06, "loss": 27.2051, "step": 22987 }, { "epoch": 547.334328358209, "grad_norm": 22.367286682128906, "learning_rate": 9.872448979591838e-06, "loss": 27.3535, "step": 22988 }, { "epoch": 547.3582089552239, "grad_norm": 23.50747299194336, "learning_rate": 9.87202380952381e-06, "loss": 27.7105, "step": 22989 }, { "epoch": 547.3820895522388, "grad_norm": 24.469736099243164, "learning_rate": 9.871598639455782e-06, "loss": 27.0735, "step": 22990 }, { "epoch": 547.4059701492537, "grad_norm": 35.363136291503906, "learning_rate": 9.871173469387755e-06, "loss": 26.367, "step": 22991 }, { "epoch": 547.4298507462687, "grad_norm": 20.59636688232422, "learning_rate": 9.870748299319729e-06, "loss": 26.3005, "step": 22992 }, { "epoch": 547.4537313432836, "grad_norm": 36.1402702331543, "learning_rate": 9.8703231292517e-06, "loss": 25.3125, "step": 22993 }, { "epoch": 547.4776119402985, "grad_norm": 29.700183868408203, "learning_rate": 9.869897959183674e-06, "loss": 27.566, "step": 22994 }, { "epoch": 547.5014925373134, "grad_norm": 27.151620864868164, "learning_rate": 9.869472789115648e-06, "loss": 26.5478, "step": 22995 }, { "epoch": 547.5253731343283, "grad_norm": 33.926597595214844, "learning_rate": 9.869047619047621e-06, "loss": 26.3476, "step": 22996 }, { "epoch": 547.5492537313432, "grad_norm": 22.255908966064453, "learning_rate": 9.868622448979593e-06, "loss": 27.7888, "step": 22997 }, { "epoch": 547.5731343283583, "grad_norm": 38.616355895996094, "learning_rate": 9.868197278911566e-06, "loss": 26.3304, "step": 22998 }, { "epoch": 547.5970149253732, "grad_norm": 24.49024200439453, "learning_rate": 9.867772108843538e-06, "loss": 26.0832, "step": 22999 }, { "epoch": 547.6208955223881, "grad_norm": 40.87156677246094, "learning_rate": 9.867346938775511e-06, "loss": 27.0987, "step": 23000 }, { "epoch": 547.644776119403, "grad_norm": 25.664426803588867, "learning_rate": 9.866921768707483e-06, "loss": 27.0746, "step": 23001 }, { "epoch": 547.6686567164179, "grad_norm": 38.425724029541016, "learning_rate": 9.866496598639457e-06, "loss": 26.9046, "step": 23002 }, { "epoch": 547.6925373134328, "grad_norm": 28.68681526184082, "learning_rate": 9.86607142857143e-06, "loss": 27.3349, "step": 23003 }, { "epoch": 547.7164179104477, "grad_norm": 25.113889694213867, "learning_rate": 9.865646258503402e-06, "loss": 26.1208, "step": 23004 }, { "epoch": 547.7402985074627, "grad_norm": 40.17877197265625, "learning_rate": 9.865221088435375e-06, "loss": 27.3483, "step": 23005 }, { "epoch": 547.7641791044776, "grad_norm": 24.559547424316406, "learning_rate": 9.864795918367349e-06, "loss": 25.9991, "step": 23006 }, { "epoch": 547.7880597014926, "grad_norm": 42.76939392089844, "learning_rate": 9.86437074829932e-06, "loss": 27.1299, "step": 23007 }, { "epoch": 547.8119402985075, "grad_norm": 29.24649429321289, "learning_rate": 9.863945578231294e-06, "loss": 26.9399, "step": 23008 }, { "epoch": 547.8358208955224, "grad_norm": 43.59734344482422, "learning_rate": 9.863520408163266e-06, "loss": 27.3808, "step": 23009 }, { "epoch": 547.8597014925373, "grad_norm": 29.752126693725586, "learning_rate": 9.863095238095239e-06, "loss": 27.3197, "step": 23010 }, { "epoch": 547.8835820895522, "grad_norm": 39.88623046875, "learning_rate": 9.86267006802721e-06, "loss": 26.865, "step": 23011 }, { "epoch": 547.9074626865672, "grad_norm": 29.577978134155273, "learning_rate": 9.862244897959184e-06, "loss": 27.3095, "step": 23012 }, { "epoch": 547.9313432835821, "grad_norm": 36.32057571411133, "learning_rate": 9.861819727891158e-06, "loss": 26.5676, "step": 23013 }, { "epoch": 547.955223880597, "grad_norm": 30.19183349609375, "learning_rate": 9.86139455782313e-06, "loss": 28.5436, "step": 23014 }, { "epoch": 547.9791044776119, "grad_norm": 29.320655822753906, "learning_rate": 9.860969387755103e-06, "loss": 26.9673, "step": 23015 }, { "epoch": 548.0, "grad_norm": 30.955862045288086, "learning_rate": 9.860544217687076e-06, "loss": 22.794, "step": 23016 }, { "epoch": 548.0238805970149, "grad_norm": 22.712387084960938, "learning_rate": 9.860119047619048e-06, "loss": 27.0453, "step": 23017 }, { "epoch": 548.0477611940298, "grad_norm": 35.32093048095703, "learning_rate": 9.859693877551022e-06, "loss": 26.6155, "step": 23018 }, { "epoch": 548.0716417910447, "grad_norm": 25.39784812927246, "learning_rate": 9.859268707482993e-06, "loss": 26.7977, "step": 23019 }, { "epoch": 548.0955223880597, "grad_norm": 30.481311798095703, "learning_rate": 9.858843537414967e-06, "loss": 27.2128, "step": 23020 }, { "epoch": 548.1194029850747, "grad_norm": 29.56197166442871, "learning_rate": 9.858418367346939e-06, "loss": 26.7045, "step": 23021 }, { "epoch": 548.1432835820896, "grad_norm": 24.794937133789062, "learning_rate": 9.857993197278912e-06, "loss": 27.5555, "step": 23022 }, { "epoch": 548.1671641791045, "grad_norm": 29.837493896484375, "learning_rate": 9.857568027210885e-06, "loss": 27.0338, "step": 23023 }, { "epoch": 548.1910447761194, "grad_norm": 26.014440536499023, "learning_rate": 9.857142857142859e-06, "loss": 26.3483, "step": 23024 }, { "epoch": 548.2149253731343, "grad_norm": 24.971160888671875, "learning_rate": 9.85671768707483e-06, "loss": 27.0978, "step": 23025 }, { "epoch": 548.2388059701492, "grad_norm": 31.558292388916016, "learning_rate": 9.856292517006804e-06, "loss": 26.9209, "step": 23026 }, { "epoch": 548.2626865671642, "grad_norm": 22.212209701538086, "learning_rate": 9.855867346938777e-06, "loss": 26.5672, "step": 23027 }, { "epoch": 548.2865671641791, "grad_norm": 23.878887176513672, "learning_rate": 9.85544217687075e-06, "loss": 26.7905, "step": 23028 }, { "epoch": 548.310447761194, "grad_norm": 33.174949645996094, "learning_rate": 9.855017006802721e-06, "loss": 26.3602, "step": 23029 }, { "epoch": 548.334328358209, "grad_norm": 23.49136734008789, "learning_rate": 9.854591836734694e-06, "loss": 27.4864, "step": 23030 }, { "epoch": 548.3582089552239, "grad_norm": 30.31797218322754, "learning_rate": 9.854166666666668e-06, "loss": 26.4852, "step": 23031 }, { "epoch": 548.3820895522388, "grad_norm": 32.399147033691406, "learning_rate": 9.85374149659864e-06, "loss": 25.3418, "step": 23032 }, { "epoch": 548.4059701492537, "grad_norm": 22.394649505615234, "learning_rate": 9.853316326530613e-06, "loss": 27.0016, "step": 23033 }, { "epoch": 548.4298507462687, "grad_norm": 32.698524475097656, "learning_rate": 9.852891156462587e-06, "loss": 26.8915, "step": 23034 }, { "epoch": 548.4537313432836, "grad_norm": 25.463558197021484, "learning_rate": 9.852465986394558e-06, "loss": 25.9988, "step": 23035 }, { "epoch": 548.4776119402985, "grad_norm": 28.31138038635254, "learning_rate": 9.852040816326532e-06, "loss": 27.4725, "step": 23036 }, { "epoch": 548.5014925373134, "grad_norm": 34.36621856689453, "learning_rate": 9.851615646258505e-06, "loss": 25.4873, "step": 23037 }, { "epoch": 548.5253731343283, "grad_norm": 22.033445358276367, "learning_rate": 9.851190476190477e-06, "loss": 26.8974, "step": 23038 }, { "epoch": 548.5492537313432, "grad_norm": 42.20850372314453, "learning_rate": 9.850765306122449e-06, "loss": 26.4087, "step": 23039 }, { "epoch": 548.5731343283583, "grad_norm": 26.84571647644043, "learning_rate": 9.850340136054422e-06, "loss": 27.3602, "step": 23040 }, { "epoch": 548.5970149253732, "grad_norm": NaN, "learning_rate": 9.849914965986396e-06, "loss": 22.3938, "step": 23041 }, { "epoch": 548.6208955223881, "grad_norm": 30.544620513916016, "learning_rate": 9.849914965986396e-06, "loss": 27.5512, "step": 23042 }, { "epoch": 548.644776119403, "grad_norm": 26.58920669555664, "learning_rate": 9.849489795918367e-06, "loss": 28.6798, "step": 23043 }, { "epoch": 548.6686567164179, "grad_norm": 22.889862060546875, "learning_rate": 9.84906462585034e-06, "loss": 26.9864, "step": 23044 }, { "epoch": 548.6925373134328, "grad_norm": 22.786264419555664, "learning_rate": 9.848639455782314e-06, "loss": 26.4403, "step": 23045 }, { "epoch": 548.7164179104477, "grad_norm": 25.342235565185547, "learning_rate": 9.848214285714288e-06, "loss": 27.3787, "step": 23046 }, { "epoch": 548.7402985074627, "grad_norm": 19.8823184967041, "learning_rate": 9.84778911564626e-06, "loss": 27.0394, "step": 23047 }, { "epoch": 548.7641791044776, "grad_norm": 25.339954376220703, "learning_rate": 9.847363945578233e-06, "loss": 26.6933, "step": 23048 }, { "epoch": 548.7880597014926, "grad_norm": 24.939437866210938, "learning_rate": 9.846938775510205e-06, "loss": 27.4429, "step": 23049 }, { "epoch": 548.8119402985075, "grad_norm": 25.012550354003906, "learning_rate": 9.846513605442178e-06, "loss": 26.7723, "step": 23050 }, { "epoch": 548.8358208955224, "grad_norm": 23.035640716552734, "learning_rate": 9.84608843537415e-06, "loss": 27.4802, "step": 23051 }, { "epoch": 548.8597014925373, "grad_norm": 20.69957733154297, "learning_rate": 9.845663265306123e-06, "loss": 26.1331, "step": 23052 }, { "epoch": 548.8835820895522, "grad_norm": 23.65705680847168, "learning_rate": 9.845238095238097e-06, "loss": 27.5703, "step": 23053 }, { "epoch": 548.9074626865672, "grad_norm": 22.209716796875, "learning_rate": 9.844812925170068e-06, "loss": 26.6222, "step": 23054 }, { "epoch": 548.9313432835821, "grad_norm": 23.58316993713379, "learning_rate": 9.844387755102042e-06, "loss": 26.3599, "step": 23055 }, { "epoch": 548.955223880597, "grad_norm": 20.732494354248047, "learning_rate": 9.843962585034015e-06, "loss": 27.7003, "step": 23056 }, { "epoch": 548.9791044776119, "grad_norm": 22.33191680908203, "learning_rate": 9.843537414965987e-06, "loss": 27.0842, "step": 23057 }, { "epoch": 549.0, "grad_norm": 18.780569076538086, "learning_rate": 9.84311224489796e-06, "loss": 23.8918, "step": 23058 }, { "epoch": 549.0238805970149, "grad_norm": 23.25261878967285, "learning_rate": 9.842687074829932e-06, "loss": 27.0745, "step": 23059 }, { "epoch": 549.0477611940298, "grad_norm": 21.587928771972656, "learning_rate": 9.842261904761906e-06, "loss": 26.3591, "step": 23060 }, { "epoch": 549.0716417910447, "grad_norm": 21.746570587158203, "learning_rate": 9.841836734693878e-06, "loss": 26.5524, "step": 23061 }, { "epoch": 549.0955223880597, "grad_norm": 23.970552444458008, "learning_rate": 9.841411564625851e-06, "loss": 27.3968, "step": 23062 }, { "epoch": 549.1194029850747, "grad_norm": 23.262718200683594, "learning_rate": 9.840986394557824e-06, "loss": 26.2595, "step": 23063 }, { "epoch": 549.1432835820896, "grad_norm": 23.717182159423828, "learning_rate": 9.840561224489796e-06, "loss": 26.5826, "step": 23064 }, { "epoch": 549.1671641791045, "grad_norm": 19.857885360717773, "learning_rate": 9.84013605442177e-06, "loss": 26.7406, "step": 23065 }, { "epoch": 549.1910447761194, "grad_norm": 28.654621124267578, "learning_rate": 9.839710884353743e-06, "loss": 27.079, "step": 23066 }, { "epoch": 549.2149253731343, "grad_norm": 21.05666160583496, "learning_rate": 9.839285714285715e-06, "loss": 27.5045, "step": 23067 }, { "epoch": 549.2388059701492, "grad_norm": 20.218181610107422, "learning_rate": 9.838860544217688e-06, "loss": 27.7459, "step": 23068 }, { "epoch": 549.2626865671642, "grad_norm": 23.153308868408203, "learning_rate": 9.83843537414966e-06, "loss": 27.0609, "step": 23069 }, { "epoch": 549.2865671641791, "grad_norm": 23.806121826171875, "learning_rate": 9.838010204081633e-06, "loss": 27.6594, "step": 23070 }, { "epoch": 549.310447761194, "grad_norm": 27.011611938476562, "learning_rate": 9.837585034013605e-06, "loss": 27.5727, "step": 23071 }, { "epoch": 549.334328358209, "grad_norm": 18.620420455932617, "learning_rate": 9.837159863945579e-06, "loss": 26.2419, "step": 23072 }, { "epoch": 549.3582089552239, "grad_norm": 30.387453079223633, "learning_rate": 9.836734693877552e-06, "loss": 27.6743, "step": 23073 }, { "epoch": 549.3820895522388, "grad_norm": 29.8458194732666, "learning_rate": 9.836309523809524e-06, "loss": 26.5699, "step": 23074 }, { "epoch": 549.4059701492537, "grad_norm": 22.739704132080078, "learning_rate": 9.835884353741497e-06, "loss": 27.1293, "step": 23075 }, { "epoch": 549.4298507462687, "grad_norm": 26.987668991088867, "learning_rate": 9.83545918367347e-06, "loss": 26.9405, "step": 23076 }, { "epoch": 549.4537313432836, "grad_norm": 26.463573455810547, "learning_rate": 9.835034013605444e-06, "loss": 25.942, "step": 23077 }, { "epoch": 549.4776119402985, "grad_norm": 21.697717666625977, "learning_rate": 9.834608843537416e-06, "loss": 27.041, "step": 23078 }, { "epoch": 549.5014925373134, "grad_norm": 22.666189193725586, "learning_rate": 9.834183673469388e-06, "loss": 26.8047, "step": 23079 }, { "epoch": 549.5253731343283, "grad_norm": 26.256366729736328, "learning_rate": 9.833758503401361e-06, "loss": 26.1077, "step": 23080 }, { "epoch": 549.5492537313432, "grad_norm": 25.795434951782227, "learning_rate": 9.833333333333333e-06, "loss": 26.7982, "step": 23081 }, { "epoch": 549.5731343283583, "grad_norm": 20.52659797668457, "learning_rate": 9.832908163265306e-06, "loss": 26.812, "step": 23082 }, { "epoch": 549.5970149253732, "grad_norm": 19.22266387939453, "learning_rate": 9.83248299319728e-06, "loss": 26.245, "step": 23083 }, { "epoch": 549.6208955223881, "grad_norm": 24.83538055419922, "learning_rate": 9.832057823129253e-06, "loss": 26.5028, "step": 23084 }, { "epoch": 549.644776119403, "grad_norm": 23.78462791442871, "learning_rate": 9.831632653061225e-06, "loss": 26.0317, "step": 23085 }, { "epoch": 549.6686567164179, "grad_norm": 21.808422088623047, "learning_rate": 9.831207482993198e-06, "loss": 26.3387, "step": 23086 }, { "epoch": 549.6925373134328, "grad_norm": 19.214622497558594, "learning_rate": 9.830782312925172e-06, "loss": 26.5155, "step": 23087 }, { "epoch": 549.7164179104477, "grad_norm": 23.114221572875977, "learning_rate": 9.830357142857144e-06, "loss": 26.5005, "step": 23088 }, { "epoch": 549.7402985074627, "grad_norm": 25.497344970703125, "learning_rate": 9.829931972789115e-06, "loss": 27.3694, "step": 23089 }, { "epoch": 549.7641791044776, "grad_norm": 22.119598388671875, "learning_rate": 9.829506802721089e-06, "loss": 26.9038, "step": 23090 }, { "epoch": 549.7880597014926, "grad_norm": 19.676259994506836, "learning_rate": 9.829081632653062e-06, "loss": 27.4572, "step": 23091 }, { "epoch": 549.8119402985075, "grad_norm": 21.97138023376465, "learning_rate": 9.828656462585034e-06, "loss": 26.8107, "step": 23092 }, { "epoch": 549.8358208955224, "grad_norm": 21.13986587524414, "learning_rate": 9.828231292517008e-06, "loss": 26.2227, "step": 23093 }, { "epoch": 549.8597014925373, "grad_norm": 25.693586349487305, "learning_rate": 9.827806122448981e-06, "loss": 26.2675, "step": 23094 }, { "epoch": 549.8835820895522, "grad_norm": 21.41407012939453, "learning_rate": 9.827380952380953e-06, "loss": 27.3354, "step": 23095 }, { "epoch": 549.9074626865672, "grad_norm": 22.705402374267578, "learning_rate": 9.826955782312926e-06, "loss": 27.1743, "step": 23096 }, { "epoch": 549.9313432835821, "grad_norm": 22.59469223022461, "learning_rate": 9.8265306122449e-06, "loss": 27.1451, "step": 23097 }, { "epoch": 549.955223880597, "grad_norm": 30.12335968017578, "learning_rate": 9.826105442176871e-06, "loss": 27.5314, "step": 23098 }, { "epoch": 549.9791044776119, "grad_norm": 23.527379989624023, "learning_rate": 9.825680272108845e-06, "loss": 27.3064, "step": 23099 }, { "epoch": 550.0, "grad_norm": 19.314720153808594, "learning_rate": 9.825255102040817e-06, "loss": 24.0002, "step": 23100 }, { "epoch": 550.0238805970149, "grad_norm": 20.565654754638672, "learning_rate": 9.82482993197279e-06, "loss": 26.8418, "step": 23101 }, { "epoch": 550.0477611940298, "grad_norm": 28.252260208129883, "learning_rate": 9.824404761904762e-06, "loss": 26.6467, "step": 23102 }, { "epoch": 550.0716417910447, "grad_norm": 23.813154220581055, "learning_rate": 9.823979591836735e-06, "loss": 27.3245, "step": 23103 }, { "epoch": 550.0955223880597, "grad_norm": 26.232004165649414, "learning_rate": 9.823554421768709e-06, "loss": 27.9007, "step": 23104 }, { "epoch": 550.1194029850747, "grad_norm": 28.611555099487305, "learning_rate": 9.823129251700682e-06, "loss": 25.8132, "step": 23105 }, { "epoch": 550.1432835820896, "grad_norm": 26.25252342224121, "learning_rate": 9.822704081632654e-06, "loss": 27.842, "step": 23106 }, { "epoch": 550.1671641791045, "grad_norm": 22.779029846191406, "learning_rate": 9.822278911564627e-06, "loss": 27.4826, "step": 23107 }, { "epoch": 550.1910447761194, "grad_norm": 22.23136329650879, "learning_rate": 9.821853741496599e-06, "loss": 26.8047, "step": 23108 }, { "epoch": 550.2149253731343, "grad_norm": 23.97928810119629, "learning_rate": 9.821428571428573e-06, "loss": 27.0638, "step": 23109 }, { "epoch": 550.2388059701492, "grad_norm": 23.40543556213379, "learning_rate": 9.821003401360544e-06, "loss": 26.6057, "step": 23110 }, { "epoch": 550.2626865671642, "grad_norm": 21.659671783447266, "learning_rate": 9.820578231292518e-06, "loss": 26.6436, "step": 23111 }, { "epoch": 550.2865671641791, "grad_norm": 22.681320190429688, "learning_rate": 9.820153061224491e-06, "loss": 26.8461, "step": 23112 }, { "epoch": 550.310447761194, "grad_norm": 23.800640106201172, "learning_rate": 9.819727891156463e-06, "loss": 25.4585, "step": 23113 }, { "epoch": 550.334328358209, "grad_norm": 22.108478546142578, "learning_rate": 9.819302721088436e-06, "loss": 26.8653, "step": 23114 }, { "epoch": 550.3582089552239, "grad_norm": 22.911361694335938, "learning_rate": 9.81887755102041e-06, "loss": 27.2434, "step": 23115 }, { "epoch": 550.3820895522388, "grad_norm": 29.838186264038086, "learning_rate": 9.818452380952382e-06, "loss": 26.5697, "step": 23116 }, { "epoch": 550.4059701492537, "grad_norm": 21.949581146240234, "learning_rate": 9.818027210884355e-06, "loss": 26.2497, "step": 23117 }, { "epoch": 550.4298507462687, "grad_norm": 25.016523361206055, "learning_rate": 9.817602040816327e-06, "loss": 27.5685, "step": 23118 }, { "epoch": 550.4537313432836, "grad_norm": 25.066362380981445, "learning_rate": 9.8171768707483e-06, "loss": 26.3074, "step": 23119 }, { "epoch": 550.4776119402985, "grad_norm": 22.587282180786133, "learning_rate": 9.816751700680272e-06, "loss": 26.3973, "step": 23120 }, { "epoch": 550.5014925373134, "grad_norm": 22.968456268310547, "learning_rate": 9.816326530612245e-06, "loss": 26.4845, "step": 23121 }, { "epoch": 550.5253731343283, "grad_norm": 23.084138870239258, "learning_rate": 9.815901360544219e-06, "loss": 26.6648, "step": 23122 }, { "epoch": 550.5492537313432, "grad_norm": 22.290802001953125, "learning_rate": 9.81547619047619e-06, "loss": 26.9216, "step": 23123 }, { "epoch": 550.5731343283583, "grad_norm": 23.432058334350586, "learning_rate": 9.815051020408164e-06, "loss": 26.7392, "step": 23124 }, { "epoch": 550.5970149253732, "grad_norm": 24.511510848999023, "learning_rate": 9.814625850340137e-06, "loss": 26.7817, "step": 23125 }, { "epoch": 550.6208955223881, "grad_norm": 23.013153076171875, "learning_rate": 9.814200680272111e-06, "loss": 27.1831, "step": 23126 }, { "epoch": 550.644776119403, "grad_norm": 22.16325569152832, "learning_rate": 9.813775510204083e-06, "loss": 25.9318, "step": 23127 }, { "epoch": 550.6686567164179, "grad_norm": 24.9527645111084, "learning_rate": 9.813350340136054e-06, "loss": 25.9348, "step": 23128 }, { "epoch": 550.6925373134328, "grad_norm": 19.98915672302246, "learning_rate": 9.812925170068028e-06, "loss": 27.2876, "step": 23129 }, { "epoch": 550.7164179104477, "grad_norm": 26.12885284423828, "learning_rate": 9.8125e-06, "loss": 26.8072, "step": 23130 }, { "epoch": 550.7402985074627, "grad_norm": 22.822261810302734, "learning_rate": 9.812074829931973e-06, "loss": 27.2909, "step": 23131 }, { "epoch": 550.7641791044776, "grad_norm": 23.481815338134766, "learning_rate": 9.811649659863947e-06, "loss": 27.8775, "step": 23132 }, { "epoch": 550.7880597014926, "grad_norm": 22.126087188720703, "learning_rate": 9.81122448979592e-06, "loss": 26.6118, "step": 23133 }, { "epoch": 550.8119402985075, "grad_norm": 20.709285736083984, "learning_rate": 9.810799319727892e-06, "loss": 26.5806, "step": 23134 }, { "epoch": 550.8358208955224, "grad_norm": 21.8825740814209, "learning_rate": 9.810374149659865e-06, "loss": 27.0455, "step": 23135 }, { "epoch": 550.8597014925373, "grad_norm": 24.603843688964844, "learning_rate": 9.809948979591839e-06, "loss": 27.1514, "step": 23136 }, { "epoch": 550.8835820895522, "grad_norm": 23.52168083190918, "learning_rate": 9.80952380952381e-06, "loss": 27.1626, "step": 23137 }, { "epoch": 550.9074626865672, "grad_norm": 21.74613380432129, "learning_rate": 9.809098639455784e-06, "loss": 26.7873, "step": 23138 }, { "epoch": 550.9313432835821, "grad_norm": 21.328771591186523, "learning_rate": 9.808673469387756e-06, "loss": 26.8257, "step": 23139 }, { "epoch": 550.955223880597, "grad_norm": 21.85343360900879, "learning_rate": 9.808248299319729e-06, "loss": 26.4202, "step": 23140 }, { "epoch": 550.9791044776119, "grad_norm": 18.101791381835938, "learning_rate": 9.8078231292517e-06, "loss": 27.2397, "step": 23141 }, { "epoch": 551.0, "grad_norm": 21.275150299072266, "learning_rate": 9.807397959183674e-06, "loss": 23.4403, "step": 23142 }, { "epoch": 551.0238805970149, "grad_norm": 24.425575256347656, "learning_rate": 9.806972789115648e-06, "loss": 26.1261, "step": 23143 }, { "epoch": 551.0477611940298, "grad_norm": 22.24608612060547, "learning_rate": 9.80654761904762e-06, "loss": 27.6304, "step": 23144 }, { "epoch": 551.0716417910447, "grad_norm": 23.44178009033203, "learning_rate": 9.806122448979593e-06, "loss": 26.6671, "step": 23145 }, { "epoch": 551.0955223880597, "grad_norm": 24.367143630981445, "learning_rate": 9.805697278911566e-06, "loss": 25.7995, "step": 23146 }, { "epoch": 551.1194029850747, "grad_norm": 22.366479873657227, "learning_rate": 9.805272108843538e-06, "loss": 25.6966, "step": 23147 }, { "epoch": 551.1432835820896, "grad_norm": 23.0607852935791, "learning_rate": 9.804846938775512e-06, "loss": 27.0115, "step": 23148 }, { "epoch": 551.1671641791045, "grad_norm": 21.187355041503906, "learning_rate": 9.804421768707483e-06, "loss": 26.9822, "step": 23149 }, { "epoch": 551.1910447761194, "grad_norm": 19.54372787475586, "learning_rate": 9.803996598639457e-06, "loss": 26.3266, "step": 23150 }, { "epoch": 551.2149253731343, "grad_norm": 26.077943801879883, "learning_rate": 9.803571428571428e-06, "loss": 26.2043, "step": 23151 }, { "epoch": 551.2388059701492, "grad_norm": 23.991180419921875, "learning_rate": 9.803146258503402e-06, "loss": 26.4589, "step": 23152 }, { "epoch": 551.2626865671642, "grad_norm": 21.91676902770996, "learning_rate": 9.802721088435375e-06, "loss": 26.1631, "step": 23153 }, { "epoch": 551.2865671641791, "grad_norm": NaN, "learning_rate": 9.802295918367349e-06, "loss": 43.4002, "step": 23154 }, { "epoch": 551.310447761194, "grad_norm": 23.622753143310547, "learning_rate": 9.802295918367349e-06, "loss": 27.0796, "step": 23155 }, { "epoch": 551.334328358209, "grad_norm": 28.837556838989258, "learning_rate": 9.80187074829932e-06, "loss": 27.6227, "step": 23156 }, { "epoch": 551.3582089552239, "grad_norm": 26.748538970947266, "learning_rate": 9.801445578231294e-06, "loss": 25.8171, "step": 23157 }, { "epoch": 551.3820895522388, "grad_norm": 23.56424331665039, "learning_rate": 9.801020408163266e-06, "loss": 26.9865, "step": 23158 }, { "epoch": 551.4059701492537, "grad_norm": 30.473098754882812, "learning_rate": 9.80059523809524e-06, "loss": 26.3323, "step": 23159 }, { "epoch": 551.4298507462687, "grad_norm": 24.970640182495117, "learning_rate": 9.800170068027211e-06, "loss": 26.9041, "step": 23160 }, { "epoch": 551.4537313432836, "grad_norm": 22.914976119995117, "learning_rate": 9.799744897959184e-06, "loss": 27.2978, "step": 23161 }, { "epoch": 551.4776119402985, "grad_norm": 25.1602840423584, "learning_rate": 9.799319727891158e-06, "loss": 26.671, "step": 23162 }, { "epoch": 551.5014925373134, "grad_norm": 28.781038284301758, "learning_rate": 9.79889455782313e-06, "loss": 26.872, "step": 23163 }, { "epoch": 551.5253731343283, "grad_norm": 20.689260482788086, "learning_rate": 9.798469387755103e-06, "loss": 26.8574, "step": 23164 }, { "epoch": 551.5492537313432, "grad_norm": 23.619779586791992, "learning_rate": 9.798044217687077e-06, "loss": 27.32, "step": 23165 }, { "epoch": 551.5731343283583, "grad_norm": 29.56684684753418, "learning_rate": 9.797619047619048e-06, "loss": 26.0825, "step": 23166 }, { "epoch": 551.5970149253732, "grad_norm": 21.955678939819336, "learning_rate": 9.797193877551022e-06, "loss": 26.5474, "step": 23167 }, { "epoch": 551.6208955223881, "grad_norm": 22.76906967163086, "learning_rate": 9.796768707482993e-06, "loss": 27.5334, "step": 23168 }, { "epoch": 551.644776119403, "grad_norm": 29.94280242919922, "learning_rate": 9.796343537414967e-06, "loss": 26.6865, "step": 23169 }, { "epoch": 551.6686567164179, "grad_norm": 26.073335647583008, "learning_rate": 9.795918367346939e-06, "loss": 28.1389, "step": 23170 }, { "epoch": 551.6925373134328, "grad_norm": 22.582971572875977, "learning_rate": 9.795493197278912e-06, "loss": 27.1921, "step": 23171 }, { "epoch": 551.7164179104477, "grad_norm": 28.547958374023438, "learning_rate": 9.795068027210886e-06, "loss": 26.4313, "step": 23172 }, { "epoch": 551.7402985074627, "grad_norm": 30.060951232910156, "learning_rate": 9.794642857142857e-06, "loss": 26.6968, "step": 23173 }, { "epoch": 551.7641791044776, "grad_norm": 19.238950729370117, "learning_rate": 9.79421768707483e-06, "loss": 26.937, "step": 23174 }, { "epoch": 551.7880597014926, "grad_norm": 25.914609909057617, "learning_rate": 9.793792517006804e-06, "loss": 25.9997, "step": 23175 }, { "epoch": 551.8119402985075, "grad_norm": 28.622140884399414, "learning_rate": 9.793367346938776e-06, "loss": 26.1521, "step": 23176 }, { "epoch": 551.8358208955224, "grad_norm": 19.21538734436035, "learning_rate": 9.79294217687075e-06, "loss": 26.621, "step": 23177 }, { "epoch": 551.8597014925373, "grad_norm": 22.553592681884766, "learning_rate": 9.792517006802721e-06, "loss": 26.4321, "step": 23178 }, { "epoch": 551.8835820895522, "grad_norm": 29.21376609802246, "learning_rate": 9.792091836734695e-06, "loss": 27.5846, "step": 23179 }, { "epoch": 551.9074626865672, "grad_norm": 22.489356994628906, "learning_rate": 9.791666666666666e-06, "loss": 27.7074, "step": 23180 }, { "epoch": 551.9313432835821, "grad_norm": 20.625944137573242, "learning_rate": 9.79124149659864e-06, "loss": 27.5289, "step": 23181 }, { "epoch": 551.955223880597, "grad_norm": 26.809602737426758, "learning_rate": 9.790816326530613e-06, "loss": 26.3487, "step": 23182 }, { "epoch": 551.9791044776119, "grad_norm": 19.698688507080078, "learning_rate": 9.790391156462585e-06, "loss": 28.2878, "step": 23183 }, { "epoch": 552.0, "grad_norm": 21.33458709716797, "learning_rate": 9.789965986394558e-06, "loss": 23.1352, "step": 23184 }, { "epoch": 552.0238805970149, "grad_norm": 19.613479614257812, "learning_rate": 9.789540816326532e-06, "loss": 26.9606, "step": 23185 }, { "epoch": 552.0477611940298, "grad_norm": 29.175310134887695, "learning_rate": 9.789115646258505e-06, "loss": 26.9196, "step": 23186 }, { "epoch": 552.0716417910447, "grad_norm": 22.801074981689453, "learning_rate": 9.788690476190477e-06, "loss": 25.3321, "step": 23187 }, { "epoch": 552.0955223880597, "grad_norm": 28.533605575561523, "learning_rate": 9.78826530612245e-06, "loss": 26.2336, "step": 23188 }, { "epoch": 552.1194029850747, "grad_norm": 23.426956176757812, "learning_rate": 9.787840136054422e-06, "loss": 26.8209, "step": 23189 }, { "epoch": 552.1432835820896, "grad_norm": 23.982030868530273, "learning_rate": 9.787414965986394e-06, "loss": 27.9589, "step": 23190 }, { "epoch": 552.1671641791045, "grad_norm": 24.91910743713379, "learning_rate": 9.786989795918368e-06, "loss": 26.5541, "step": 23191 }, { "epoch": 552.1910447761194, "grad_norm": 25.104036331176758, "learning_rate": 9.786564625850341e-06, "loss": 26.5907, "step": 23192 }, { "epoch": 552.2149253731343, "grad_norm": 23.585145950317383, "learning_rate": 9.786139455782314e-06, "loss": 27.2511, "step": 23193 }, { "epoch": 552.2388059701492, "grad_norm": 19.284912109375, "learning_rate": 9.785714285714286e-06, "loss": 27.594, "step": 23194 }, { "epoch": 552.2626865671642, "grad_norm": 25.284812927246094, "learning_rate": 9.78528911564626e-06, "loss": 26.3137, "step": 23195 }, { "epoch": 552.2865671641791, "grad_norm": 25.30999183654785, "learning_rate": 9.784863945578233e-06, "loss": 25.7517, "step": 23196 }, { "epoch": 552.310447761194, "grad_norm": 21.004079818725586, "learning_rate": 9.784438775510205e-06, "loss": 26.64, "step": 23197 }, { "epoch": 552.334328358209, "grad_norm": 19.800817489624023, "learning_rate": 9.784013605442178e-06, "loss": 27.3497, "step": 23198 }, { "epoch": 552.3582089552239, "grad_norm": 27.62127685546875, "learning_rate": 9.78358843537415e-06, "loss": 26.0379, "step": 23199 }, { "epoch": 552.3820895522388, "grad_norm": 27.012426376342773, "learning_rate": 9.783163265306123e-06, "loss": 27.6457, "step": 23200 }, { "epoch": 552.4059701492537, "grad_norm": 22.314891815185547, "learning_rate": 9.782738095238095e-06, "loss": 25.7557, "step": 23201 }, { "epoch": 552.4298507462687, "grad_norm": 20.646827697753906, "learning_rate": 9.782312925170069e-06, "loss": 26.9554, "step": 23202 }, { "epoch": 552.4537313432836, "grad_norm": 24.4808349609375, "learning_rate": 9.781887755102042e-06, "loss": 27.0642, "step": 23203 }, { "epoch": 552.4776119402985, "grad_norm": 23.65561294555664, "learning_rate": 9.781462585034014e-06, "loss": 26.832, "step": 23204 }, { "epoch": 552.5014925373134, "grad_norm": 19.699853897094727, "learning_rate": 9.781037414965987e-06, "loss": 27.8202, "step": 23205 }, { "epoch": 552.5253731343283, "grad_norm": 24.844799041748047, "learning_rate": 9.78061224489796e-06, "loss": 26.9127, "step": 23206 }, { "epoch": 552.5492537313432, "grad_norm": 27.748868942260742, "learning_rate": 9.780187074829933e-06, "loss": 27.2514, "step": 23207 }, { "epoch": 552.5731343283583, "grad_norm": 21.646263122558594, "learning_rate": 9.779761904761906e-06, "loss": 25.8531, "step": 23208 }, { "epoch": 552.5970149253732, "grad_norm": 22.192548751831055, "learning_rate": 9.779336734693878e-06, "loss": 26.1922, "step": 23209 }, { "epoch": 552.6208955223881, "grad_norm": 31.024192810058594, "learning_rate": 9.778911564625851e-06, "loss": 27.1376, "step": 23210 }, { "epoch": 552.644776119403, "grad_norm": 23.21217918395996, "learning_rate": 9.778486394557823e-06, "loss": 26.4848, "step": 23211 }, { "epoch": 552.6686567164179, "grad_norm": 22.290607452392578, "learning_rate": 9.778061224489796e-06, "loss": 26.199, "step": 23212 }, { "epoch": 552.6925373134328, "grad_norm": 28.92233657836914, "learning_rate": 9.77763605442177e-06, "loss": 26.6219, "step": 23213 }, { "epoch": 552.7164179104477, "grad_norm": 29.54973602294922, "learning_rate": 9.777210884353743e-06, "loss": 27.1995, "step": 23214 }, { "epoch": 552.7402985074627, "grad_norm": 19.796241760253906, "learning_rate": 9.776785714285715e-06, "loss": 26.4115, "step": 23215 }, { "epoch": 552.7641791044776, "grad_norm": 24.339929580688477, "learning_rate": 9.776360544217688e-06, "loss": 26.7238, "step": 23216 }, { "epoch": 552.7880597014926, "grad_norm": 28.18512725830078, "learning_rate": 9.77593537414966e-06, "loss": 26.1556, "step": 23217 }, { "epoch": 552.8119402985075, "grad_norm": 18.920501708984375, "learning_rate": 9.775510204081634e-06, "loss": 27.133, "step": 23218 }, { "epoch": 552.8358208955224, "grad_norm": NaN, "learning_rate": 9.775085034013605e-06, "loss": 23.0158, "step": 23219 }, { "epoch": 552.8597014925373, "grad_norm": 23.507221221923828, "learning_rate": 9.775085034013605e-06, "loss": 26.5095, "step": 23220 }, { "epoch": 552.8835820895522, "grad_norm": 26.70237922668457, "learning_rate": 9.774659863945579e-06, "loss": 27.2127, "step": 23221 }, { "epoch": 552.9074626865672, "grad_norm": 24.57735824584961, "learning_rate": 9.774234693877552e-06, "loss": 26.719, "step": 23222 }, { "epoch": 552.9313432835821, "grad_norm": 19.88035774230957, "learning_rate": 9.773809523809524e-06, "loss": 27.112, "step": 23223 }, { "epoch": 552.955223880597, "grad_norm": 24.205886840820312, "learning_rate": 9.773384353741497e-06, "loss": 26.5845, "step": 23224 }, { "epoch": 552.9791044776119, "grad_norm": NaN, "learning_rate": 9.772959183673471e-06, "loss": 48.3396, "step": 23225 }, { "epoch": 553.0, "grad_norm": 23.955015182495117, "learning_rate": 9.772959183673471e-06, "loss": 24.226, "step": 23226 }, { "epoch": 553.0238805970149, "grad_norm": 19.974395751953125, "learning_rate": 9.772534013605443e-06, "loss": 26.1802, "step": 23227 }, { "epoch": 553.0477611940298, "grad_norm": 23.343358993530273, "learning_rate": 9.772108843537416e-06, "loss": 26.5532, "step": 23228 }, { "epoch": 553.0716417910447, "grad_norm": 20.342151641845703, "learning_rate": 9.771683673469388e-06, "loss": 27.1503, "step": 23229 }, { "epoch": 553.0955223880597, "grad_norm": 25.41259002685547, "learning_rate": 9.771258503401361e-06, "loss": 26.3433, "step": 23230 }, { "epoch": 553.1194029850747, "grad_norm": 26.440706253051758, "learning_rate": 9.770833333333333e-06, "loss": 27.5754, "step": 23231 }, { "epoch": 553.1432835820896, "grad_norm": 21.061298370361328, "learning_rate": 9.770408163265307e-06, "loss": 26.4013, "step": 23232 }, { "epoch": 553.1671641791045, "grad_norm": 22.252832412719727, "learning_rate": 9.76998299319728e-06, "loss": 26.7129, "step": 23233 }, { "epoch": 553.1910447761194, "grad_norm": 20.58338737487793, "learning_rate": 9.769557823129252e-06, "loss": 26.0195, "step": 23234 }, { "epoch": 553.2149253731343, "grad_norm": 23.640586853027344, "learning_rate": 9.769132653061225e-06, "loss": 25.8207, "step": 23235 }, { "epoch": 553.2388059701492, "grad_norm": 21.997180938720703, "learning_rate": 9.768707482993199e-06, "loss": 26.9882, "step": 23236 }, { "epoch": 553.2626865671642, "grad_norm": 24.50812339782715, "learning_rate": 9.768282312925172e-06, "loss": 26.6655, "step": 23237 }, { "epoch": 553.2865671641791, "grad_norm": 21.092844009399414, "learning_rate": 9.767857142857144e-06, "loss": 26.0067, "step": 23238 }, { "epoch": 553.310447761194, "grad_norm": 27.765716552734375, "learning_rate": 9.767431972789117e-06, "loss": 27.1966, "step": 23239 }, { "epoch": 553.334328358209, "grad_norm": 25.302780151367188, "learning_rate": 9.767006802721089e-06, "loss": 27.7254, "step": 23240 }, { "epoch": 553.3582089552239, "grad_norm": 20.325105667114258, "learning_rate": 9.76658163265306e-06, "loss": 26.9484, "step": 23241 }, { "epoch": 553.3820895522388, "grad_norm": 30.660839080810547, "learning_rate": 9.766156462585034e-06, "loss": 27.7853, "step": 23242 }, { "epoch": 553.4059701492537, "grad_norm": 28.226287841796875, "learning_rate": 9.765731292517008e-06, "loss": 27.1198, "step": 23243 }, { "epoch": 553.4298507462687, "grad_norm": 21.383699417114258, "learning_rate": 9.765306122448981e-06, "loss": 26.1207, "step": 23244 }, { "epoch": 553.4537313432836, "grad_norm": 25.88629150390625, "learning_rate": 9.764880952380953e-06, "loss": 26.8459, "step": 23245 }, { "epoch": 553.4776119402985, "grad_norm": 31.894081115722656, "learning_rate": 9.764455782312926e-06, "loss": 25.8107, "step": 23246 }, { "epoch": 553.5014925373134, "grad_norm": 22.16810417175293, "learning_rate": 9.7640306122449e-06, "loss": 27.4056, "step": 23247 }, { "epoch": 553.5253731343283, "grad_norm": 34.5290412902832, "learning_rate": 9.763605442176872e-06, "loss": 27.3146, "step": 23248 }, { "epoch": 553.5492537313432, "grad_norm": 32.151512145996094, "learning_rate": 9.763180272108845e-06, "loss": 27.3724, "step": 23249 }, { "epoch": 553.5731343283583, "grad_norm": 21.228046417236328, "learning_rate": 9.762755102040817e-06, "loss": 24.9474, "step": 23250 }, { "epoch": 553.5970149253732, "grad_norm": 41.6654052734375, "learning_rate": 9.76232993197279e-06, "loss": 26.9829, "step": 23251 }, { "epoch": 553.6208955223881, "grad_norm": 27.169994354248047, "learning_rate": 9.761904761904762e-06, "loss": 26.3301, "step": 23252 }, { "epoch": 553.644776119403, "grad_norm": 37.770023345947266, "learning_rate": 9.761479591836735e-06, "loss": 26.6202, "step": 23253 }, { "epoch": 553.6686567164179, "grad_norm": 29.378576278686523, "learning_rate": 9.761054421768709e-06, "loss": 26.3574, "step": 23254 }, { "epoch": 553.6925373134328, "grad_norm": 35.22139358520508, "learning_rate": 9.76062925170068e-06, "loss": 26.3158, "step": 23255 }, { "epoch": 553.7164179104477, "grad_norm": 31.528770446777344, "learning_rate": 9.760204081632654e-06, "loss": 27.057, "step": 23256 }, { "epoch": 553.7402985074627, "grad_norm": 30.852102279663086, "learning_rate": 9.759778911564627e-06, "loss": 27.4528, "step": 23257 }, { "epoch": 553.7641791044776, "grad_norm": 35.68628692626953, "learning_rate": 9.7593537414966e-06, "loss": 28.2765, "step": 23258 }, { "epoch": 553.7880597014926, "grad_norm": 25.633359909057617, "learning_rate": 9.758928571428573e-06, "loss": 28.4917, "step": 23259 }, { "epoch": 553.8119402985075, "grad_norm": 37.772438049316406, "learning_rate": 9.758503401360544e-06, "loss": 26.8292, "step": 23260 }, { "epoch": 553.8358208955224, "grad_norm": 24.40919303894043, "learning_rate": 9.758078231292518e-06, "loss": 25.6009, "step": 23261 }, { "epoch": 553.8597014925373, "grad_norm": 43.200439453125, "learning_rate": 9.75765306122449e-06, "loss": 26.7473, "step": 23262 }, { "epoch": 553.8835820895522, "grad_norm": 30.848594665527344, "learning_rate": 9.757227891156463e-06, "loss": 26.8838, "step": 23263 }, { "epoch": 553.9074626865672, "grad_norm": 35.38853073120117, "learning_rate": 9.756802721088437e-06, "loss": 26.7669, "step": 23264 }, { "epoch": 553.9313432835821, "grad_norm": 30.56417465209961, "learning_rate": 9.75637755102041e-06, "loss": 27.1119, "step": 23265 }, { "epoch": 553.955223880597, "grad_norm": 26.0107479095459, "learning_rate": 9.755952380952382e-06, "loss": 25.6785, "step": 23266 }, { "epoch": 553.9791044776119, "grad_norm": 37.30509948730469, "learning_rate": 9.755527210884355e-06, "loss": 26.9306, "step": 23267 }, { "epoch": 554.0, "grad_norm": 22.97418975830078, "learning_rate": 9.755102040816327e-06, "loss": 23.2, "step": 23268 }, { "epoch": 554.0238805970149, "grad_norm": 39.398681640625, "learning_rate": 9.7546768707483e-06, "loss": 26.7748, "step": 23269 }, { "epoch": 554.0477611940298, "grad_norm": 25.795520782470703, "learning_rate": 9.754251700680272e-06, "loss": 26.3103, "step": 23270 }, { "epoch": 554.0716417910447, "grad_norm": 46.675052642822266, "learning_rate": 9.753826530612246e-06, "loss": 27.342, "step": 23271 }, { "epoch": 554.0955223880597, "grad_norm": 29.136455535888672, "learning_rate": 9.753401360544217e-06, "loss": 26.7686, "step": 23272 }, { "epoch": 554.1194029850747, "grad_norm": 47.1674690246582, "learning_rate": 9.75297619047619e-06, "loss": 26.7398, "step": 23273 }, { "epoch": 554.1432835820896, "grad_norm": 33.41481018066406, "learning_rate": 9.752551020408164e-06, "loss": 25.4461, "step": 23274 }, { "epoch": 554.1671641791045, "grad_norm": 43.85054397583008, "learning_rate": 9.752125850340138e-06, "loss": 26.1519, "step": 23275 }, { "epoch": 554.1910447761194, "grad_norm": 34.75920867919922, "learning_rate": 9.75170068027211e-06, "loss": 26.5309, "step": 23276 }, { "epoch": 554.2149253731343, "grad_norm": 48.437259674072266, "learning_rate": 9.751275510204083e-06, "loss": 27.254, "step": 23277 }, { "epoch": 554.2388059701492, "grad_norm": 36.25301742553711, "learning_rate": 9.750850340136055e-06, "loss": 26.3407, "step": 23278 }, { "epoch": 554.2626865671642, "grad_norm": 49.48542785644531, "learning_rate": 9.750425170068028e-06, "loss": 27.3725, "step": 23279 }, { "epoch": 554.2865671641791, "grad_norm": 43.85820007324219, "learning_rate": 9.75e-06, "loss": 25.9211, "step": 23280 }, { "epoch": 554.310447761194, "grad_norm": 42.87506103515625, "learning_rate": 9.749574829931973e-06, "loss": 26.0229, "step": 23281 }, { "epoch": 554.334328358209, "grad_norm": 44.62392044067383, "learning_rate": 9.749149659863947e-06, "loss": 27.83, "step": 23282 }, { "epoch": 554.3582089552239, "grad_norm": 38.360965728759766, "learning_rate": 9.748724489795918e-06, "loss": 26.9094, "step": 23283 }, { "epoch": 554.3820895522388, "grad_norm": 37.21182632446289, "learning_rate": 9.748299319727892e-06, "loss": 26.7967, "step": 23284 }, { "epoch": 554.4059701492537, "grad_norm": 39.86935806274414, "learning_rate": 9.747874149659865e-06, "loss": 26.5697, "step": 23285 }, { "epoch": 554.4298507462687, "grad_norm": 34.82604217529297, "learning_rate": 9.747448979591837e-06, "loss": 27.6769, "step": 23286 }, { "epoch": 554.4537313432836, "grad_norm": 43.03139114379883, "learning_rate": 9.74702380952381e-06, "loss": 26.9981, "step": 23287 }, { "epoch": 554.4776119402985, "grad_norm": 39.490936279296875, "learning_rate": 9.746598639455784e-06, "loss": 26.8851, "step": 23288 }, { "epoch": 554.5014925373134, "grad_norm": 47.94719696044922, "learning_rate": 9.746173469387756e-06, "loss": 28.1823, "step": 23289 }, { "epoch": 554.5253731343283, "grad_norm": 40.36114501953125, "learning_rate": 9.745748299319728e-06, "loss": 25.8916, "step": 23290 }, { "epoch": 554.5492537313432, "grad_norm": 47.409889221191406, "learning_rate": 9.745323129251701e-06, "loss": 25.7807, "step": 23291 }, { "epoch": 554.5731343283583, "grad_norm": 44.42271423339844, "learning_rate": 9.744897959183674e-06, "loss": 27.3314, "step": 23292 }, { "epoch": 554.5970149253732, "grad_norm": 41.485816955566406, "learning_rate": 9.744472789115646e-06, "loss": 27.3099, "step": 23293 }, { "epoch": 554.6208955223881, "grad_norm": 40.51455307006836, "learning_rate": 9.74404761904762e-06, "loss": 26.9637, "step": 23294 }, { "epoch": 554.644776119403, "grad_norm": 37.47166061401367, "learning_rate": 9.743622448979593e-06, "loss": 26.8751, "step": 23295 }, { "epoch": 554.6686567164179, "grad_norm": 35.11647415161133, "learning_rate": 9.743197278911567e-06, "loss": 27.1881, "step": 23296 }, { "epoch": 554.6925373134328, "grad_norm": 39.99231719970703, "learning_rate": 9.742772108843538e-06, "loss": 25.8469, "step": 23297 }, { "epoch": 554.7164179104477, "grad_norm": 35.37990188598633, "learning_rate": 9.742346938775512e-06, "loss": 27.6573, "step": 23298 }, { "epoch": 554.7402985074627, "grad_norm": 42.724151611328125, "learning_rate": 9.741921768707483e-06, "loss": 25.4755, "step": 23299 }, { "epoch": 554.7641791044776, "grad_norm": 38.98291015625, "learning_rate": 9.741496598639457e-06, "loss": 26.1959, "step": 23300 }, { "epoch": 554.7880597014926, "grad_norm": 43.2686882019043, "learning_rate": 9.741071428571429e-06, "loss": 26.0769, "step": 23301 }, { "epoch": 554.8119402985075, "grad_norm": 35.896114349365234, "learning_rate": 9.740646258503402e-06, "loss": 26.5831, "step": 23302 }, { "epoch": 554.8358208955224, "grad_norm": 42.473838806152344, "learning_rate": 9.740221088435376e-06, "loss": 26.1494, "step": 23303 }, { "epoch": 554.8597014925373, "grad_norm": 37.97227096557617, "learning_rate": 9.739795918367347e-06, "loss": 26.4449, "step": 23304 }, { "epoch": 554.8835820895522, "grad_norm": 46.01294708251953, "learning_rate": 9.73937074829932e-06, "loss": 27.0682, "step": 23305 }, { "epoch": 554.9074626865672, "grad_norm": 42.848426818847656, "learning_rate": 9.738945578231294e-06, "loss": 26.1521, "step": 23306 }, { "epoch": 554.9313432835821, "grad_norm": 37.734710693359375, "learning_rate": 9.738520408163266e-06, "loss": 27.0734, "step": 23307 }, { "epoch": 554.955223880597, "grad_norm": 41.29497146606445, "learning_rate": 9.73809523809524e-06, "loss": 27.5037, "step": 23308 }, { "epoch": 554.9791044776119, "grad_norm": 37.79869079589844, "learning_rate": 9.737670068027211e-06, "loss": 26.0212, "step": 23309 }, { "epoch": 555.0, "grad_norm": 29.877809524536133, "learning_rate": 9.737244897959185e-06, "loss": 23.7207, "step": 23310 }, { "epoch": 555.0238805970149, "grad_norm": 42.006797790527344, "learning_rate": 9.736819727891156e-06, "loss": 27.4501, "step": 23311 }, { "epoch": 555.0477611940298, "grad_norm": 34.94815444946289, "learning_rate": 9.73639455782313e-06, "loss": 27.2041, "step": 23312 }, { "epoch": 555.0716417910447, "grad_norm": 44.6822509765625, "learning_rate": 9.735969387755103e-06, "loss": 25.5074, "step": 23313 }, { "epoch": 555.0955223880597, "grad_norm": 36.776771545410156, "learning_rate": 9.735544217687075e-06, "loss": 26.255, "step": 23314 }, { "epoch": 555.1194029850747, "grad_norm": 38.68332290649414, "learning_rate": 9.735119047619048e-06, "loss": 25.4031, "step": 23315 }, { "epoch": 555.1432835820896, "grad_norm": 35.432865142822266, "learning_rate": 9.734693877551022e-06, "loss": 26.7791, "step": 23316 }, { "epoch": 555.1671641791045, "grad_norm": 40.801231384277344, "learning_rate": 9.734268707482994e-06, "loss": 26.7503, "step": 23317 }, { "epoch": 555.1910447761194, "grad_norm": 34.09660339355469, "learning_rate": 9.733843537414967e-06, "loss": 26.569, "step": 23318 }, { "epoch": 555.2149253731343, "grad_norm": 38.680328369140625, "learning_rate": 9.733418367346939e-06, "loss": 27.0134, "step": 23319 }, { "epoch": 555.2388059701492, "grad_norm": 37.9785041809082, "learning_rate": 9.732993197278912e-06, "loss": 26.8832, "step": 23320 }, { "epoch": 555.2626865671642, "grad_norm": 38.32898712158203, "learning_rate": 9.732568027210884e-06, "loss": 26.4791, "step": 23321 }, { "epoch": 555.2865671641791, "grad_norm": 35.66883087158203, "learning_rate": 9.732142857142858e-06, "loss": 26.9603, "step": 23322 }, { "epoch": 555.310447761194, "grad_norm": 42.385520935058594, "learning_rate": 9.731717687074831e-06, "loss": 26.9402, "step": 23323 }, { "epoch": 555.334328358209, "grad_norm": 32.81938171386719, "learning_rate": 9.731292517006804e-06, "loss": 27.1107, "step": 23324 }, { "epoch": 555.3582089552239, "grad_norm": 36.41639709472656, "learning_rate": 9.730867346938776e-06, "loss": 25.5401, "step": 23325 }, { "epoch": 555.3820895522388, "grad_norm": 33.9809684753418, "learning_rate": 9.73044217687075e-06, "loss": 26.7968, "step": 23326 }, { "epoch": 555.4059701492537, "grad_norm": 46.745033264160156, "learning_rate": 9.730017006802723e-06, "loss": 27.2899, "step": 23327 }, { "epoch": 555.4298507462687, "grad_norm": 38.37321472167969, "learning_rate": 9.729591836734695e-06, "loss": 26.7111, "step": 23328 }, { "epoch": 555.4537313432836, "grad_norm": 37.90778732299805, "learning_rate": 9.729166666666667e-06, "loss": 26.8808, "step": 23329 }, { "epoch": 555.4776119402985, "grad_norm": 38.079383850097656, "learning_rate": 9.72874149659864e-06, "loss": 27.4088, "step": 23330 }, { "epoch": 555.5014925373134, "grad_norm": 38.98528289794922, "learning_rate": 9.728316326530613e-06, "loss": 26.7514, "step": 23331 }, { "epoch": 555.5253731343283, "grad_norm": 34.466163635253906, "learning_rate": 9.727891156462585e-06, "loss": 26.1195, "step": 23332 }, { "epoch": 555.5492537313432, "grad_norm": 44.46697998046875, "learning_rate": 9.727465986394559e-06, "loss": 27.2456, "step": 23333 }, { "epoch": 555.5731343283583, "grad_norm": 38.03745651245117, "learning_rate": 9.727040816326532e-06, "loss": 26.7802, "step": 23334 }, { "epoch": 555.5970149253732, "grad_norm": 38.761497497558594, "learning_rate": 9.726615646258504e-06, "loss": 27.0495, "step": 23335 }, { "epoch": 555.6208955223881, "grad_norm": 37.48834991455078, "learning_rate": 9.726190476190477e-06, "loss": 27.8018, "step": 23336 }, { "epoch": 555.644776119403, "grad_norm": 38.93128204345703, "learning_rate": 9.72576530612245e-06, "loss": 26.7339, "step": 23337 }, { "epoch": 555.6686567164179, "grad_norm": 34.739845275878906, "learning_rate": 9.725340136054422e-06, "loss": 26.4621, "step": 23338 }, { "epoch": 555.6925373134328, "grad_norm": 41.784236907958984, "learning_rate": 9.724914965986394e-06, "loss": 25.9695, "step": 23339 }, { "epoch": 555.7164179104477, "grad_norm": 37.71913528442383, "learning_rate": 9.724489795918368e-06, "loss": 27.2568, "step": 23340 }, { "epoch": 555.7402985074627, "grad_norm": 39.94352340698242, "learning_rate": 9.724064625850341e-06, "loss": 26.8902, "step": 23341 }, { "epoch": 555.7641791044776, "grad_norm": 37.479393005371094, "learning_rate": 9.723639455782313e-06, "loss": 26.5927, "step": 23342 }, { "epoch": 555.7880597014926, "grad_norm": 35.0898323059082, "learning_rate": 9.723214285714286e-06, "loss": 26.4204, "step": 23343 }, { "epoch": 555.8119402985075, "grad_norm": 33.63698959350586, "learning_rate": 9.72278911564626e-06, "loss": 26.5491, "step": 23344 }, { "epoch": 555.8358208955224, "grad_norm": 42.4615592956543, "learning_rate": 9.722363945578233e-06, "loss": 26.5485, "step": 23345 }, { "epoch": 555.8597014925373, "grad_norm": 34.70098114013672, "learning_rate": 9.721938775510205e-06, "loss": 26.7322, "step": 23346 }, { "epoch": 555.8835820895522, "grad_norm": 41.08753204345703, "learning_rate": 9.721513605442178e-06, "loss": 26.2938, "step": 23347 }, { "epoch": 555.9074626865672, "grad_norm": 36.34037780761719, "learning_rate": 9.72108843537415e-06, "loss": 25.9481, "step": 23348 }, { "epoch": 555.9313432835821, "grad_norm": 40.09077072143555, "learning_rate": 9.720663265306124e-06, "loss": 27.4316, "step": 23349 }, { "epoch": 555.955223880597, "grad_norm": 32.94746780395508, "learning_rate": 9.720238095238095e-06, "loss": 26.2247, "step": 23350 }, { "epoch": 555.9791044776119, "grad_norm": 38.50681686401367, "learning_rate": 9.719812925170069e-06, "loss": 25.6113, "step": 23351 }, { "epoch": 556.0, "grad_norm": 32.77855682373047, "learning_rate": 9.719387755102042e-06, "loss": 23.341, "step": 23352 }, { "epoch": 556.0238805970149, "grad_norm": 42.094505310058594, "learning_rate": 9.718962585034014e-06, "loss": 26.4318, "step": 23353 }, { "epoch": 556.0477611940298, "grad_norm": 40.028465270996094, "learning_rate": 9.718537414965987e-06, "loss": 26.2582, "step": 23354 }, { "epoch": 556.0716417910447, "grad_norm": 42.60651779174805, "learning_rate": 9.718112244897961e-06, "loss": 26.0491, "step": 23355 }, { "epoch": 556.0955223880597, "grad_norm": 38.449180603027344, "learning_rate": 9.717687074829933e-06, "loss": 26.6049, "step": 23356 }, { "epoch": 556.1194029850747, "grad_norm": 39.6091194152832, "learning_rate": 9.717261904761906e-06, "loss": 26.2914, "step": 23357 }, { "epoch": 556.1432835820896, "grad_norm": 34.58478546142578, "learning_rate": 9.716836734693878e-06, "loss": 27.2521, "step": 23358 }, { "epoch": 556.1671641791045, "grad_norm": 38.47686767578125, "learning_rate": 9.716411564625851e-06, "loss": 27.7727, "step": 23359 }, { "epoch": 556.1910447761194, "grad_norm": 29.725627899169922, "learning_rate": 9.715986394557823e-06, "loss": 27.0688, "step": 23360 }, { "epoch": 556.2149253731343, "grad_norm": 41.7398681640625, "learning_rate": 9.715561224489797e-06, "loss": 26.0333, "step": 23361 }, { "epoch": 556.2388059701492, "grad_norm": 33.20061111450195, "learning_rate": 9.71513605442177e-06, "loss": 27.0753, "step": 23362 }, { "epoch": 556.2626865671642, "grad_norm": 38.68074417114258, "learning_rate": 9.714710884353742e-06, "loss": 27.3724, "step": 23363 }, { "epoch": 556.2865671641791, "grad_norm": 30.487937927246094, "learning_rate": 9.714285714285715e-06, "loss": 26.881, "step": 23364 }, { "epoch": 556.310447761194, "grad_norm": 35.829689025878906, "learning_rate": 9.713860544217689e-06, "loss": 25.6362, "step": 23365 }, { "epoch": 556.334328358209, "grad_norm": 31.873289108276367, "learning_rate": 9.71343537414966e-06, "loss": 26.9256, "step": 23366 }, { "epoch": 556.3582089552239, "grad_norm": 39.309288024902344, "learning_rate": 9.713010204081634e-06, "loss": 26.7186, "step": 23367 }, { "epoch": 556.3820895522388, "grad_norm": 32.810394287109375, "learning_rate": 9.712585034013606e-06, "loss": 26.0083, "step": 23368 }, { "epoch": 556.4059701492537, "grad_norm": 41.72153091430664, "learning_rate": 9.712159863945579e-06, "loss": 27.3589, "step": 23369 }, { "epoch": 556.4298507462687, "grad_norm": 39.56908416748047, "learning_rate": 9.71173469387755e-06, "loss": 27.2925, "step": 23370 }, { "epoch": 556.4537313432836, "grad_norm": 39.324676513671875, "learning_rate": 9.711309523809524e-06, "loss": 26.2259, "step": 23371 }, { "epoch": 556.4776119402985, "grad_norm": 36.649349212646484, "learning_rate": 9.710884353741498e-06, "loss": 26.8334, "step": 23372 }, { "epoch": 556.5014925373134, "grad_norm": 35.94498062133789, "learning_rate": 9.710459183673471e-06, "loss": 25.575, "step": 23373 }, { "epoch": 556.5253731343283, "grad_norm": 27.940011978149414, "learning_rate": 9.710034013605443e-06, "loss": 26.0353, "step": 23374 }, { "epoch": 556.5492537313432, "grad_norm": 36.63886260986328, "learning_rate": 9.709608843537416e-06, "loss": 27.4627, "step": 23375 }, { "epoch": 556.5731343283583, "grad_norm": 25.218708038330078, "learning_rate": 9.70918367346939e-06, "loss": 26.2817, "step": 23376 }, { "epoch": 556.5970149253732, "grad_norm": 38.785606384277344, "learning_rate": 9.708758503401362e-06, "loss": 26.3044, "step": 23377 }, { "epoch": 556.6208955223881, "grad_norm": 31.12144660949707, "learning_rate": 9.708333333333333e-06, "loss": 26.1822, "step": 23378 }, { "epoch": 556.644776119403, "grad_norm": 39.452239990234375, "learning_rate": 9.707908163265307e-06, "loss": 26.6543, "step": 23379 }, { "epoch": 556.6686567164179, "grad_norm": 35.05186080932617, "learning_rate": 9.707482993197278e-06, "loss": 25.9734, "step": 23380 }, { "epoch": 556.6925373134328, "grad_norm": 40.453338623046875, "learning_rate": 9.707057823129252e-06, "loss": 27.8834, "step": 23381 }, { "epoch": 556.7164179104477, "grad_norm": 32.459449768066406, "learning_rate": 9.706632653061225e-06, "loss": 26.7118, "step": 23382 }, { "epoch": 556.7402985074627, "grad_norm": 38.76224136352539, "learning_rate": 9.706207482993199e-06, "loss": 27.1582, "step": 23383 }, { "epoch": 556.7641791044776, "grad_norm": 33.08258056640625, "learning_rate": 9.70578231292517e-06, "loss": 26.682, "step": 23384 }, { "epoch": 556.7880597014926, "grad_norm": 36.442352294921875, "learning_rate": 9.705357142857144e-06, "loss": 26.5371, "step": 23385 }, { "epoch": 556.8119402985075, "grad_norm": 31.3975830078125, "learning_rate": 9.704931972789117e-06, "loss": 26.3419, "step": 23386 }, { "epoch": 556.8358208955224, "grad_norm": 38.908416748046875, "learning_rate": 9.70450680272109e-06, "loss": 27.0671, "step": 23387 }, { "epoch": 556.8597014925373, "grad_norm": 30.213533401489258, "learning_rate": 9.704081632653061e-06, "loss": 26.9, "step": 23388 }, { "epoch": 556.8835820895522, "grad_norm": 35.888465881347656, "learning_rate": 9.703656462585034e-06, "loss": 27.475, "step": 23389 }, { "epoch": 556.9074626865672, "grad_norm": 27.077762603759766, "learning_rate": 9.703231292517008e-06, "loss": 25.9305, "step": 23390 }, { "epoch": 556.9313432835821, "grad_norm": 38.23672866821289, "learning_rate": 9.70280612244898e-06, "loss": 26.3817, "step": 23391 }, { "epoch": 556.955223880597, "grad_norm": 29.382871627807617, "learning_rate": 9.702380952380953e-06, "loss": 26.1582, "step": 23392 }, { "epoch": 556.9791044776119, "grad_norm": 33.82618713378906, "learning_rate": 9.701955782312927e-06, "loss": 26.8479, "step": 23393 }, { "epoch": 557.0, "grad_norm": 26.306079864501953, "learning_rate": 9.701530612244898e-06, "loss": 24.3226, "step": 23394 }, { "epoch": 557.0238805970149, "grad_norm": NaN, "learning_rate": 9.701105442176872e-06, "loss": 47.1293, "step": 23395 }, { "epoch": 557.0477611940298, "grad_norm": 31.781518936157227, "learning_rate": 9.701105442176872e-06, "loss": 27.0778, "step": 23396 }, { "epoch": 557.0716417910447, "grad_norm": 32.13970184326172, "learning_rate": 9.700680272108845e-06, "loss": 27.6892, "step": 23397 }, { "epoch": 557.0955223880597, "grad_norm": 27.933650970458984, "learning_rate": 9.700255102040817e-06, "loss": 26.9876, "step": 23398 }, { "epoch": 557.1194029850747, "grad_norm": 27.703584671020508, "learning_rate": 9.69982993197279e-06, "loss": 26.4798, "step": 23399 }, { "epoch": 557.1432835820896, "grad_norm": 24.464845657348633, "learning_rate": 9.699404761904762e-06, "loss": 26.9148, "step": 23400 }, { "epoch": 557.1671641791045, "grad_norm": 26.23798179626465, "learning_rate": 9.698979591836736e-06, "loss": 27.0259, "step": 23401 }, { "epoch": 557.1910447761194, "grad_norm": 27.316373825073242, "learning_rate": 9.698554421768707e-06, "loss": 26.5356, "step": 23402 }, { "epoch": 557.2149253731343, "grad_norm": 21.149913787841797, "learning_rate": 9.69812925170068e-06, "loss": 27.0681, "step": 23403 }, { "epoch": 557.2388059701492, "grad_norm": 25.997827529907227, "learning_rate": 9.697704081632654e-06, "loss": 26.7021, "step": 23404 }, { "epoch": 557.2626865671642, "grad_norm": 25.28985595703125, "learning_rate": 9.697278911564628e-06, "loss": 26.4065, "step": 23405 }, { "epoch": 557.2865671641791, "grad_norm": 22.50048828125, "learning_rate": 9.6968537414966e-06, "loss": 27.1324, "step": 23406 }, { "epoch": 557.310447761194, "grad_norm": 23.647062301635742, "learning_rate": 9.696428571428573e-06, "loss": 25.5492, "step": 23407 }, { "epoch": 557.334328358209, "grad_norm": 23.377147674560547, "learning_rate": 9.696003401360545e-06, "loss": 27.3549, "step": 23408 }, { "epoch": 557.3582089552239, "grad_norm": 24.625423431396484, "learning_rate": 9.695578231292518e-06, "loss": 27.5636, "step": 23409 }, { "epoch": 557.3820895522388, "grad_norm": 23.172574996948242, "learning_rate": 9.69515306122449e-06, "loss": 26.4804, "step": 23410 }, { "epoch": 557.4059701492537, "grad_norm": 26.130126953125, "learning_rate": 9.694727891156463e-06, "loss": 27.299, "step": 23411 }, { "epoch": 557.4298507462687, "grad_norm": NaN, "learning_rate": 9.694302721088437e-06, "loss": 22.5573, "step": 23412 }, { "epoch": 557.4537313432836, "grad_norm": 22.168424606323242, "learning_rate": 9.694302721088437e-06, "loss": 27.5369, "step": 23413 }, { "epoch": 557.4776119402985, "grad_norm": 22.72999382019043, "learning_rate": 9.693877551020408e-06, "loss": 26.1985, "step": 23414 }, { "epoch": 557.5014925373134, "grad_norm": 22.641036987304688, "learning_rate": 9.693452380952382e-06, "loss": 26.9516, "step": 23415 }, { "epoch": 557.5253731343283, "grad_norm": 19.72484588623047, "learning_rate": 9.693027210884355e-06, "loss": 27.5126, "step": 23416 }, { "epoch": 557.5492537313432, "grad_norm": 26.650623321533203, "learning_rate": 9.692602040816327e-06, "loss": 25.2128, "step": 23417 }, { "epoch": 557.5731343283583, "grad_norm": 24.93844985961914, "learning_rate": 9.6921768707483e-06, "loss": 27.1775, "step": 23418 }, { "epoch": 557.5970149253732, "grad_norm": 24.692888259887695, "learning_rate": 9.691751700680272e-06, "loss": 26.6257, "step": 23419 }, { "epoch": 557.6208955223881, "grad_norm": 23.82071304321289, "learning_rate": 9.691326530612246e-06, "loss": 26.1611, "step": 23420 }, { "epoch": 557.644776119403, "grad_norm": 20.455490112304688, "learning_rate": 9.690901360544218e-06, "loss": 26.6977, "step": 23421 }, { "epoch": 557.6686567164179, "grad_norm": 24.085735321044922, "learning_rate": 9.690476190476191e-06, "loss": 26.9005, "step": 23422 }, { "epoch": 557.6925373134328, "grad_norm": 21.47185516357422, "learning_rate": 9.690051020408164e-06, "loss": 25.7139, "step": 23423 }, { "epoch": 557.7164179104477, "grad_norm": 22.937623977661133, "learning_rate": 9.689625850340136e-06, "loss": 26.3554, "step": 23424 }, { "epoch": 557.7402985074627, "grad_norm": 20.344087600708008, "learning_rate": 9.68920068027211e-06, "loss": 26.4805, "step": 23425 }, { "epoch": 557.7641791044776, "grad_norm": 30.05604362487793, "learning_rate": 9.688775510204083e-06, "loss": 26.9571, "step": 23426 }, { "epoch": 557.7880597014926, "grad_norm": 20.913618087768555, "learning_rate": 9.688350340136056e-06, "loss": 25.7418, "step": 23427 }, { "epoch": 557.8119402985075, "grad_norm": 22.318445205688477, "learning_rate": 9.687925170068028e-06, "loss": 26.336, "step": 23428 }, { "epoch": 557.8358208955224, "grad_norm": 21.78098487854004, "learning_rate": 9.6875e-06, "loss": 26.8911, "step": 23429 }, { "epoch": 557.8597014925373, "grad_norm": 21.395206451416016, "learning_rate": 9.687074829931973e-06, "loss": 27.1464, "step": 23430 }, { "epoch": 557.8835820895522, "grad_norm": 22.668529510498047, "learning_rate": 9.686649659863945e-06, "loss": 27.0101, "step": 23431 }, { "epoch": 557.9074626865672, "grad_norm": 23.389375686645508, "learning_rate": 9.686224489795919e-06, "loss": 26.5747, "step": 23432 }, { "epoch": 557.9313432835821, "grad_norm": 21.325698852539062, "learning_rate": 9.685799319727892e-06, "loss": 26.4943, "step": 23433 }, { "epoch": 557.955223880597, "grad_norm": 23.199058532714844, "learning_rate": 9.685374149659866e-06, "loss": 27.0143, "step": 23434 }, { "epoch": 557.9791044776119, "grad_norm": 22.641616821289062, "learning_rate": 9.684948979591837e-06, "loss": 27.9513, "step": 23435 }, { "epoch": 558.0, "grad_norm": 17.809022903442383, "learning_rate": 9.68452380952381e-06, "loss": 22.9683, "step": 23436 }, { "epoch": 558.0238805970149, "grad_norm": 23.902198791503906, "learning_rate": 9.684098639455784e-06, "loss": 26.732, "step": 23437 }, { "epoch": 558.0477611940298, "grad_norm": 27.889766693115234, "learning_rate": 9.683673469387756e-06, "loss": 26.4781, "step": 23438 }, { "epoch": 558.0716417910447, "grad_norm": 21.632522583007812, "learning_rate": 9.683248299319728e-06, "loss": 27.2515, "step": 23439 }, { "epoch": 558.0955223880597, "grad_norm": 23.8851318359375, "learning_rate": 9.682823129251701e-06, "loss": 25.5213, "step": 23440 }, { "epoch": 558.1194029850747, "grad_norm": 25.77159881591797, "learning_rate": 9.682397959183675e-06, "loss": 28.0042, "step": 23441 }, { "epoch": 558.1432835820896, "grad_norm": 20.833959579467773, "learning_rate": 9.681972789115646e-06, "loss": 26.9595, "step": 23442 }, { "epoch": 558.1671641791045, "grad_norm": 23.879878997802734, "learning_rate": 9.68154761904762e-06, "loss": 27.5921, "step": 23443 }, { "epoch": 558.1910447761194, "grad_norm": 25.668033599853516, "learning_rate": 9.681122448979593e-06, "loss": 26.4615, "step": 23444 }, { "epoch": 558.2149253731343, "grad_norm": 23.994531631469727, "learning_rate": 9.680697278911565e-06, "loss": 26.7609, "step": 23445 }, { "epoch": 558.2388059701492, "grad_norm": 21.377443313598633, "learning_rate": 9.680272108843538e-06, "loss": 26.9813, "step": 23446 }, { "epoch": 558.2626865671642, "grad_norm": 23.410104751586914, "learning_rate": 9.679846938775512e-06, "loss": 26.754, "step": 23447 }, { "epoch": 558.2865671641791, "grad_norm": 22.51956558227539, "learning_rate": 9.679421768707484e-06, "loss": 27.9537, "step": 23448 }, { "epoch": 558.310447761194, "grad_norm": 21.752050399780273, "learning_rate": 9.678996598639457e-06, "loss": 26.4445, "step": 23449 }, { "epoch": 558.334328358209, "grad_norm": 19.525142669677734, "learning_rate": 9.678571428571429e-06, "loss": 25.5764, "step": 23450 }, { "epoch": 558.3582089552239, "grad_norm": 22.664981842041016, "learning_rate": 9.678146258503402e-06, "loss": 25.5785, "step": 23451 }, { "epoch": 558.3820895522388, "grad_norm": 22.346824645996094, "learning_rate": 9.677721088435374e-06, "loss": 27.7901, "step": 23452 }, { "epoch": 558.4059701492537, "grad_norm": 23.330322265625, "learning_rate": 9.677295918367347e-06, "loss": 27.1397, "step": 23453 }, { "epoch": 558.4298507462687, "grad_norm": 22.45599937438965, "learning_rate": 9.676870748299321e-06, "loss": 26.8396, "step": 23454 }, { "epoch": 558.4537313432836, "grad_norm": 23.150222778320312, "learning_rate": 9.676445578231294e-06, "loss": 26.2244, "step": 23455 }, { "epoch": 558.4776119402985, "grad_norm": 22.715320587158203, "learning_rate": 9.676020408163266e-06, "loss": 26.2017, "step": 23456 }, { "epoch": 558.5014925373134, "grad_norm": 22.330331802368164, "learning_rate": 9.67559523809524e-06, "loss": 25.9757, "step": 23457 }, { "epoch": 558.5253731343283, "grad_norm": 23.513750076293945, "learning_rate": 9.675170068027211e-06, "loss": 26.7391, "step": 23458 }, { "epoch": 558.5492537313432, "grad_norm": 22.0711612701416, "learning_rate": 9.674744897959185e-06, "loss": 26.6273, "step": 23459 }, { "epoch": 558.5731343283583, "grad_norm": 21.44964027404785, "learning_rate": 9.674319727891157e-06, "loss": 27.1573, "step": 23460 }, { "epoch": 558.5970149253732, "grad_norm": 22.6213321685791, "learning_rate": 9.67389455782313e-06, "loss": 26.3224, "step": 23461 }, { "epoch": 558.6208955223881, "grad_norm": 21.28236198425293, "learning_rate": 9.673469387755103e-06, "loss": 26.5529, "step": 23462 }, { "epoch": 558.644776119403, "grad_norm": 19.950298309326172, "learning_rate": 9.673044217687075e-06, "loss": 26.138, "step": 23463 }, { "epoch": 558.6686567164179, "grad_norm": 25.557241439819336, "learning_rate": 9.672619047619049e-06, "loss": 27.0672, "step": 23464 }, { "epoch": 558.6925373134328, "grad_norm": 21.473756790161133, "learning_rate": 9.672193877551022e-06, "loss": 26.8822, "step": 23465 }, { "epoch": 558.7164179104477, "grad_norm": 23.96717071533203, "learning_rate": 9.671768707482994e-06, "loss": 26.993, "step": 23466 }, { "epoch": 558.7402985074627, "grad_norm": 19.824419021606445, "learning_rate": 9.671343537414967e-06, "loss": 27.2442, "step": 23467 }, { "epoch": 558.7641791044776, "grad_norm": 26.511062622070312, "learning_rate": 9.670918367346939e-06, "loss": 27.4919, "step": 23468 }, { "epoch": 558.7880597014926, "grad_norm": 21.6457462310791, "learning_rate": 9.670493197278912e-06, "loss": 26.8626, "step": 23469 }, { "epoch": 558.8119402985075, "grad_norm": 24.867124557495117, "learning_rate": 9.670068027210884e-06, "loss": 27.6245, "step": 23470 }, { "epoch": 558.8358208955224, "grad_norm": 23.0361270904541, "learning_rate": 9.669642857142858e-06, "loss": 26.8359, "step": 23471 }, { "epoch": 558.8597014925373, "grad_norm": 22.939023971557617, "learning_rate": 9.669217687074831e-06, "loss": 27.4419, "step": 23472 }, { "epoch": 558.8835820895522, "grad_norm": 19.473115921020508, "learning_rate": 9.668792517006803e-06, "loss": 26.2743, "step": 23473 }, { "epoch": 558.9074626865672, "grad_norm": 21.90241813659668, "learning_rate": 9.668367346938776e-06, "loss": 26.2099, "step": 23474 }, { "epoch": 558.9313432835821, "grad_norm": 20.432287216186523, "learning_rate": 9.66794217687075e-06, "loss": 26.1591, "step": 23475 }, { "epoch": 558.955223880597, "grad_norm": 20.800071716308594, "learning_rate": 9.667517006802723e-06, "loss": 26.9922, "step": 23476 }, { "epoch": 558.9791044776119, "grad_norm": 20.50545310974121, "learning_rate": 9.667091836734695e-06, "loss": 27.6572, "step": 23477 }, { "epoch": 559.0, "grad_norm": 20.381742477416992, "learning_rate": 9.666666666666667e-06, "loss": 23.9211, "step": 23478 }, { "epoch": 559.0238805970149, "grad_norm": 26.875795364379883, "learning_rate": 9.66624149659864e-06, "loss": 26.974, "step": 23479 }, { "epoch": 559.0477611940298, "grad_norm": 22.34898567199707, "learning_rate": 9.665816326530612e-06, "loss": 26.2245, "step": 23480 }, { "epoch": 559.0716417910447, "grad_norm": 25.5328369140625, "learning_rate": 9.665391156462585e-06, "loss": 25.8994, "step": 23481 }, { "epoch": 559.0955223880597, "grad_norm": 25.548038482666016, "learning_rate": 9.664965986394559e-06, "loss": 26.8673, "step": 23482 }, { "epoch": 559.1194029850747, "grad_norm": 23.521812438964844, "learning_rate": 9.66454081632653e-06, "loss": 25.9625, "step": 23483 }, { "epoch": 559.1432835820896, "grad_norm": 22.54533576965332, "learning_rate": 9.664115646258504e-06, "loss": 26.3911, "step": 23484 }, { "epoch": 559.1671641791045, "grad_norm": 23.1918888092041, "learning_rate": 9.663690476190477e-06, "loss": 27.3017, "step": 23485 }, { "epoch": 559.1910447761194, "grad_norm": 27.790245056152344, "learning_rate": 9.663265306122451e-06, "loss": 27.4232, "step": 23486 }, { "epoch": 559.2149253731343, "grad_norm": 23.54196548461914, "learning_rate": 9.662840136054423e-06, "loss": 27.4519, "step": 23487 }, { "epoch": 559.2388059701492, "grad_norm": 20.048526763916016, "learning_rate": 9.662414965986396e-06, "loss": 26.0576, "step": 23488 }, { "epoch": 559.2626865671642, "grad_norm": 22.943103790283203, "learning_rate": 9.661989795918368e-06, "loss": 27.471, "step": 23489 }, { "epoch": 559.2865671641791, "grad_norm": 26.151500701904297, "learning_rate": 9.66156462585034e-06, "loss": 26.6634, "step": 23490 }, { "epoch": 559.310447761194, "grad_norm": 21.27530288696289, "learning_rate": 9.661139455782313e-06, "loss": 28.1442, "step": 23491 }, { "epoch": 559.334328358209, "grad_norm": 22.15818977355957, "learning_rate": 9.660714285714287e-06, "loss": 26.9028, "step": 23492 }, { "epoch": 559.3582089552239, "grad_norm": 22.97620391845703, "learning_rate": 9.66028911564626e-06, "loss": 26.3547, "step": 23493 }, { "epoch": 559.3820895522388, "grad_norm": 27.140077590942383, "learning_rate": 9.659863945578232e-06, "loss": 26.4364, "step": 23494 }, { "epoch": 559.4059701492537, "grad_norm": 21.387941360473633, "learning_rate": 9.659438775510205e-06, "loss": 25.9361, "step": 23495 }, { "epoch": 559.4298507462687, "grad_norm": 26.88277816772461, "learning_rate": 9.659013605442179e-06, "loss": 27.3891, "step": 23496 }, { "epoch": 559.4537313432836, "grad_norm": 22.36930274963379, "learning_rate": 9.65858843537415e-06, "loss": 26.9317, "step": 23497 }, { "epoch": 559.4776119402985, "grad_norm": 33.13166809082031, "learning_rate": 9.658163265306124e-06, "loss": 26.7937, "step": 23498 }, { "epoch": 559.5014925373134, "grad_norm": 21.51146697998047, "learning_rate": 9.657738095238096e-06, "loss": 26.0667, "step": 23499 }, { "epoch": 559.5253731343283, "grad_norm": 28.953887939453125, "learning_rate": 9.657312925170069e-06, "loss": 27.4694, "step": 23500 }, { "epoch": 559.5492537313432, "grad_norm": 26.367673873901367, "learning_rate": 9.65688775510204e-06, "loss": 27.4737, "step": 23501 }, { "epoch": 559.5731343283583, "grad_norm": 25.037479400634766, "learning_rate": 9.656462585034014e-06, "loss": 26.4272, "step": 23502 }, { "epoch": 559.5970149253732, "grad_norm": 24.962305068969727, "learning_rate": 9.656037414965988e-06, "loss": 27.3249, "step": 23503 }, { "epoch": 559.6208955223881, "grad_norm": 23.59852409362793, "learning_rate": 9.65561224489796e-06, "loss": 26.2311, "step": 23504 }, { "epoch": 559.644776119403, "grad_norm": 23.450002670288086, "learning_rate": 9.655187074829933e-06, "loss": 26.8132, "step": 23505 }, { "epoch": 559.6686567164179, "grad_norm": 26.193180084228516, "learning_rate": 9.654761904761906e-06, "loss": 27.6959, "step": 23506 }, { "epoch": 559.6925373134328, "grad_norm": 19.342546463012695, "learning_rate": 9.654336734693878e-06, "loss": 26.5438, "step": 23507 }, { "epoch": 559.7164179104477, "grad_norm": 24.329862594604492, "learning_rate": 9.653911564625852e-06, "loss": 26.2056, "step": 23508 }, { "epoch": 559.7402985074627, "grad_norm": 20.235631942749023, "learning_rate": 9.653486394557823e-06, "loss": 26.3653, "step": 23509 }, { "epoch": 559.7641791044776, "grad_norm": 21.953914642333984, "learning_rate": 9.653061224489797e-06, "loss": 26.9957, "step": 23510 }, { "epoch": 559.7880597014926, "grad_norm": 23.27764320373535, "learning_rate": 9.652636054421768e-06, "loss": 26.7447, "step": 23511 }, { "epoch": 559.8119402985075, "grad_norm": 24.91969108581543, "learning_rate": 9.652210884353742e-06, "loss": 26.1085, "step": 23512 }, { "epoch": 559.8358208955224, "grad_norm": 26.48617172241211, "learning_rate": 9.651785714285715e-06, "loss": 26.8796, "step": 23513 }, { "epoch": 559.8597014925373, "grad_norm": 20.129688262939453, "learning_rate": 9.651360544217689e-06, "loss": 27.3376, "step": 23514 }, { "epoch": 559.8835820895522, "grad_norm": 33.15237808227539, "learning_rate": 9.65093537414966e-06, "loss": 27.0032, "step": 23515 }, { "epoch": 559.9074626865672, "grad_norm": 21.58848762512207, "learning_rate": 9.650510204081634e-06, "loss": 26.1864, "step": 23516 }, { "epoch": 559.9313432835821, "grad_norm": 25.37381362915039, "learning_rate": 9.650085034013606e-06, "loss": 26.4771, "step": 23517 }, { "epoch": 559.955223880597, "grad_norm": 26.385150909423828, "learning_rate": 9.64965986394558e-06, "loss": 26.5652, "step": 23518 }, { "epoch": 559.9791044776119, "grad_norm": 22.514446258544922, "learning_rate": 9.649234693877551e-06, "loss": 26.9709, "step": 23519 }, { "epoch": 560.0, "grad_norm": 20.769372940063477, "learning_rate": 9.648809523809524e-06, "loss": 22.934, "step": 23520 }, { "epoch": 560.0, "step": 23520, "total_flos": 1.1561832325471516e+18, "train_loss": 0.9634926647556071, "train_runtime": 25661.7095, "train_samples_per_second": 116.793, "train_steps_per_second": 0.917 }, { "epoch": 560.0238805970149, "grad_norm": 26.255491256713867, "learning_rate": 1e-05, "loss": 26.5734, "step": 23521 }, { "epoch": 560.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999582289055974e-06, "loss": 32.7472, "step": 23522 }, { "epoch": 560.0716417910447, "grad_norm": Infinity, "learning_rate": 9.999582289055974e-06, "loss": 32.2115, "step": 23523 }, { "epoch": 560.0955223880597, "grad_norm": 394.327392578125, "learning_rate": 9.999582289055974e-06, "loss": 33.2226, "step": 23524 }, { "epoch": 560.1194029850747, "grad_norm": 161.38314819335938, "learning_rate": 9.999164578111947e-06, "loss": 31.6317, "step": 23525 }, { "epoch": 560.1432835820896, "grad_norm": 126.87483978271484, "learning_rate": 9.99874686716792e-06, "loss": 30.1443, "step": 23526 }, { "epoch": 560.1671641791045, "grad_norm": 76.22613525390625, "learning_rate": 9.998329156223894e-06, "loss": 28.8646, "step": 23527 }, { "epoch": 560.1910447761194, "grad_norm": 56.914939880371094, "learning_rate": 9.997911445279867e-06, "loss": 27.8159, "step": 23528 }, { "epoch": 560.2149253731343, "grad_norm": 52.20094299316406, "learning_rate": 9.99749373433584e-06, "loss": 28.9411, "step": 23529 }, { "epoch": 560.2388059701492, "grad_norm": 47.898040771484375, "learning_rate": 9.997076023391813e-06, "loss": 26.8566, "step": 23530 }, { "epoch": 560.2626865671642, "grad_norm": 44.364994049072266, "learning_rate": 9.996658312447786e-06, "loss": 27.0471, "step": 23531 }, { "epoch": 560.2865671641791, "grad_norm": 47.417320251464844, "learning_rate": 9.996240601503761e-06, "loss": 27.523, "step": 23532 }, { "epoch": 560.310447761194, "grad_norm": 38.120140075683594, "learning_rate": 9.995822890559733e-06, "loss": 28.056, "step": 23533 }, { "epoch": 560.334328358209, "grad_norm": 30.051715850830078, "learning_rate": 9.995405179615708e-06, "loss": 27.1312, "step": 23534 }, { "epoch": 560.3582089552239, "grad_norm": 37.537593841552734, "learning_rate": 9.99498746867168e-06, "loss": 26.8672, "step": 23535 }, { "epoch": 560.3820895522388, "grad_norm": 33.44776916503906, "learning_rate": 9.994569757727654e-06, "loss": 26.3561, "step": 23536 }, { "epoch": 560.4059701492537, "grad_norm": 25.32264518737793, "learning_rate": 9.994152046783626e-06, "loss": 27.2234, "step": 23537 }, { "epoch": 560.4298507462687, "grad_norm": 30.813364028930664, "learning_rate": 9.9937343358396e-06, "loss": 27.3545, "step": 23538 }, { "epoch": 560.4537313432836, "grad_norm": 44.555686950683594, "learning_rate": 9.993316624895572e-06, "loss": 25.874, "step": 23539 }, { "epoch": 560.4776119402985, "grad_norm": 23.835121154785156, "learning_rate": 9.992898913951547e-06, "loss": 26.151, "step": 23540 }, { "epoch": 560.5014925373134, "grad_norm": 31.222606658935547, "learning_rate": 9.992481203007518e-06, "loss": 27.1653, "step": 23541 }, { "epoch": 560.5253731343283, "grad_norm": 38.91552734375, "learning_rate": 9.992063492063493e-06, "loss": 27.3511, "step": 23542 }, { "epoch": 560.5492537313432, "grad_norm": 25.727649688720703, "learning_rate": 9.991645781119465e-06, "loss": 27.3803, "step": 23543 }, { "epoch": 560.5731343283583, "grad_norm": 41.207149505615234, "learning_rate": 9.99122807017544e-06, "loss": 25.7318, "step": 23544 }, { "epoch": 560.5970149253732, "grad_norm": 26.149003982543945, "learning_rate": 9.990810359231413e-06, "loss": 26.3984, "step": 23545 }, { "epoch": 560.6208955223881, "grad_norm": 33.237815856933594, "learning_rate": 9.990392648287386e-06, "loss": 25.9815, "step": 23546 }, { "epoch": 560.644776119403, "grad_norm": 31.196027755737305, "learning_rate": 9.98997493734336e-06, "loss": 26.833, "step": 23547 }, { "epoch": 560.6686567164179, "grad_norm": 25.543935775756836, "learning_rate": 9.989557226399333e-06, "loss": 26.7305, "step": 23548 }, { "epoch": 560.6925373134328, "grad_norm": NaN, "learning_rate": 9.989139515455306e-06, "loss": 29.5646, "step": 23549 }, { "epoch": 560.7164179104477, "grad_norm": 30.809814453125, "learning_rate": 9.989139515455306e-06, "loss": 26.763, "step": 23550 }, { "epoch": 560.7402985074627, "grad_norm": 28.54103660583496, "learning_rate": 9.988721804511279e-06, "loss": 26.0337, "step": 23551 }, { "epoch": 560.7641791044776, "grad_norm": 25.84275245666504, "learning_rate": 9.988304093567252e-06, "loss": 26.8045, "step": 23552 }, { "epoch": 560.7880597014926, "grad_norm": 34.48431396484375, "learning_rate": 9.987886382623225e-06, "loss": 26.834, "step": 23553 }, { "epoch": 560.8119402985075, "grad_norm": 23.485387802124023, "learning_rate": 9.987468671679199e-06, "loss": 26.227, "step": 23554 }, { "epoch": 560.8358208955224, "grad_norm": 31.5029239654541, "learning_rate": 9.987050960735172e-06, "loss": 26.3552, "step": 23555 }, { "epoch": 560.8597014925373, "grad_norm": 24.59949493408203, "learning_rate": 9.986633249791145e-06, "loss": 26.9063, "step": 23556 }, { "epoch": 560.8835820895522, "grad_norm": 31.3134765625, "learning_rate": 9.986215538847118e-06, "loss": 26.3359, "step": 23557 }, { "epoch": 560.9074626865672, "grad_norm": 24.788082122802734, "learning_rate": 9.985797827903091e-06, "loss": 26.2917, "step": 23558 }, { "epoch": 560.9313432835821, "grad_norm": 33.93471145629883, "learning_rate": 9.985380116959065e-06, "loss": 27.2515, "step": 23559 }, { "epoch": 560.955223880597, "grad_norm": 24.208168029785156, "learning_rate": 9.984962406015038e-06, "loss": 26.3668, "step": 23560 }, { "epoch": 560.9791044776119, "grad_norm": 32.07076644897461, "learning_rate": 9.984544695071011e-06, "loss": 28.1203, "step": 23561 }, { "epoch": 561.0, "grad_norm": 24.097423553466797, "learning_rate": 9.984126984126986e-06, "loss": 23.3021, "step": 23562 }, { "epoch": 561.0238805970149, "grad_norm": 30.14097785949707, "learning_rate": 9.983709273182957e-06, "loss": 26.2071, "step": 23563 }, { "epoch": 561.0477611940298, "grad_norm": 30.369443893432617, "learning_rate": 9.983291562238932e-06, "loss": 27.0279, "step": 23564 }, { "epoch": 561.0716417910447, "grad_norm": 28.13910484313965, "learning_rate": 9.982873851294905e-06, "loss": 26.6422, "step": 23565 }, { "epoch": 561.0955223880597, "grad_norm": 26.59886932373047, "learning_rate": 9.982456140350879e-06, "loss": 26.7479, "step": 23566 }, { "epoch": 561.1194029850747, "grad_norm": 22.135549545288086, "learning_rate": 9.982038429406852e-06, "loss": 26.4651, "step": 23567 }, { "epoch": 561.1432835820896, "grad_norm": 30.92716407775879, "learning_rate": 9.981620718462825e-06, "loss": 27.6129, "step": 23568 }, { "epoch": 561.1671641791045, "grad_norm": 21.301116943359375, "learning_rate": 9.981203007518798e-06, "loss": 26.3877, "step": 23569 }, { "epoch": 561.1910447761194, "grad_norm": 33.429534912109375, "learning_rate": 9.980785296574771e-06, "loss": 27.3824, "step": 23570 }, { "epoch": 561.2149253731343, "grad_norm": 28.99327278137207, "learning_rate": 9.980367585630745e-06, "loss": 26.0585, "step": 23571 }, { "epoch": 561.2388059701492, "grad_norm": 22.86519432067871, "learning_rate": 9.979949874686718e-06, "loss": 26.3583, "step": 23572 }, { "epoch": 561.2626865671642, "grad_norm": 33.3988151550293, "learning_rate": 9.979532163742691e-06, "loss": 27.284, "step": 23573 }, { "epoch": 561.2865671641791, "grad_norm": 30.524402618408203, "learning_rate": 9.979114452798664e-06, "loss": 26.5236, "step": 23574 }, { "epoch": 561.310447761194, "grad_norm": 20.62517547607422, "learning_rate": 9.978696741854637e-06, "loss": 26.6884, "step": 23575 }, { "epoch": 561.334328358209, "grad_norm": 24.612627029418945, "learning_rate": 9.97827903091061e-06, "loss": 26.1028, "step": 23576 }, { "epoch": 561.3582089552239, "grad_norm": 26.009998321533203, "learning_rate": 9.977861319966584e-06, "loss": 26.0744, "step": 23577 }, { "epoch": 561.3820895522388, "grad_norm": 22.0828914642334, "learning_rate": 9.977443609022557e-06, "loss": 26.3747, "step": 23578 }, { "epoch": 561.4059701492537, "grad_norm": 21.158578872680664, "learning_rate": 9.97702589807853e-06, "loss": 26.86, "step": 23579 }, { "epoch": 561.4298507462687, "grad_norm": 23.312891006469727, "learning_rate": 9.976608187134503e-06, "loss": 26.0915, "step": 23580 }, { "epoch": 561.4537313432836, "grad_norm": 24.50956916809082, "learning_rate": 9.976190476190477e-06, "loss": 26.6483, "step": 23581 }, { "epoch": 561.4776119402985, "grad_norm": 21.31568717956543, "learning_rate": 9.97577276524645e-06, "loss": 26.9736, "step": 23582 }, { "epoch": 561.5014925373134, "grad_norm": 24.23200798034668, "learning_rate": 9.975355054302423e-06, "loss": 26.8156, "step": 23583 }, { "epoch": 561.5253731343283, "grad_norm": 21.449459075927734, "learning_rate": 9.974937343358396e-06, "loss": 26.7222, "step": 23584 }, { "epoch": 561.5492537313432, "grad_norm": 20.1185302734375, "learning_rate": 9.97451963241437e-06, "loss": 25.7863, "step": 23585 }, { "epoch": 561.5731343283583, "grad_norm": 23.14252471923828, "learning_rate": 9.974101921470344e-06, "loss": 26.6586, "step": 23586 }, { "epoch": 561.5970149253732, "grad_norm": 23.049753189086914, "learning_rate": 9.973684210526316e-06, "loss": 26.8728, "step": 23587 }, { "epoch": 561.6208955223881, "grad_norm": 26.362287521362305, "learning_rate": 9.97326649958229e-06, "loss": 26.398, "step": 23588 }, { "epoch": 561.644776119403, "grad_norm": 22.63477325439453, "learning_rate": 9.972848788638262e-06, "loss": 27.4229, "step": 23589 }, { "epoch": 561.6686567164179, "grad_norm": 27.045448303222656, "learning_rate": 9.972431077694237e-06, "loss": 25.9074, "step": 23590 }, { "epoch": 561.6925373134328, "grad_norm": 26.925687789916992, "learning_rate": 9.97201336675021e-06, "loss": 26.7855, "step": 23591 }, { "epoch": 561.7164179104477, "grad_norm": 25.48073959350586, "learning_rate": 9.971595655806183e-06, "loss": 27.3364, "step": 23592 }, { "epoch": 561.7402985074627, "grad_norm": 25.456918716430664, "learning_rate": 9.971177944862157e-06, "loss": 26.8154, "step": 23593 }, { "epoch": 561.7641791044776, "grad_norm": 18.99306869506836, "learning_rate": 9.97076023391813e-06, "loss": 25.8579, "step": 23594 }, { "epoch": 561.7880597014926, "grad_norm": 27.361736297607422, "learning_rate": 9.970342522974103e-06, "loss": 26.4155, "step": 23595 }, { "epoch": 561.8119402985075, "grad_norm": 28.508228302001953, "learning_rate": 9.969924812030076e-06, "loss": 26.5938, "step": 23596 }, { "epoch": 561.8358208955224, "grad_norm": 22.71171760559082, "learning_rate": 9.96950710108605e-06, "loss": 26.7301, "step": 23597 }, { "epoch": 561.8597014925373, "grad_norm": 26.349992752075195, "learning_rate": 9.969089390142023e-06, "loss": 26.5137, "step": 23598 }, { "epoch": 561.8835820895522, "grad_norm": 21.160863876342773, "learning_rate": 9.968671679197996e-06, "loss": 26.2691, "step": 23599 }, { "epoch": 561.9074626865672, "grad_norm": 23.326492309570312, "learning_rate": 9.968253968253969e-06, "loss": 26.8047, "step": 23600 }, { "epoch": 561.9313432835821, "grad_norm": 25.24452018737793, "learning_rate": 9.967836257309942e-06, "loss": 26.6302, "step": 23601 }, { "epoch": 561.955223880597, "grad_norm": 23.558279037475586, "learning_rate": 9.967418546365915e-06, "loss": 25.8925, "step": 23602 }, { "epoch": 561.9791044776119, "grad_norm": 22.902400970458984, "learning_rate": 9.967000835421889e-06, "loss": 25.4934, "step": 23603 }, { "epoch": 562.0, "grad_norm": NaN, "learning_rate": 9.966583124477862e-06, "loss": 23.5609, "step": 23604 }, { "epoch": 562.0238805970149, "grad_norm": 30.663074493408203, "learning_rate": 9.966583124477862e-06, "loss": 26.2393, "step": 23605 }, { "epoch": 562.0477611940298, "grad_norm": 24.07648277282715, "learning_rate": 9.966165413533837e-06, "loss": 26.9277, "step": 23606 }, { "epoch": 562.0716417910447, "grad_norm": 23.12439727783203, "learning_rate": 9.965747702589808e-06, "loss": 26.8794, "step": 23607 }, { "epoch": 562.0955223880597, "grad_norm": 21.676387786865234, "learning_rate": 9.965329991645783e-06, "loss": 26.3695, "step": 23608 }, { "epoch": 562.1194029850747, "grad_norm": 21.274797439575195, "learning_rate": 9.964912280701755e-06, "loss": 26.5485, "step": 23609 }, { "epoch": 562.1432835820896, "grad_norm": 20.806303024291992, "learning_rate": 9.96449456975773e-06, "loss": 26.6569, "step": 23610 }, { "epoch": 562.1671641791045, "grad_norm": 23.204885482788086, "learning_rate": 9.964076858813701e-06, "loss": 26.231, "step": 23611 }, { "epoch": 562.1910447761194, "grad_norm": NaN, "learning_rate": 9.963659147869676e-06, "loss": 23.341, "step": 23612 }, { "epoch": 562.2149253731343, "grad_norm": 23.160419464111328, "learning_rate": 9.963659147869676e-06, "loss": 26.3663, "step": 23613 }, { "epoch": 562.2388059701492, "grad_norm": 22.380102157592773, "learning_rate": 9.963241436925647e-06, "loss": 25.4257, "step": 23614 }, { "epoch": 562.2626865671642, "grad_norm": 23.842082977294922, "learning_rate": 9.962823725981622e-06, "loss": 26.4665, "step": 23615 }, { "epoch": 562.2865671641791, "grad_norm": 23.077119827270508, "learning_rate": 9.962406015037594e-06, "loss": 26.746, "step": 23616 }, { "epoch": 562.310447761194, "grad_norm": 26.763202667236328, "learning_rate": 9.961988304093569e-06, "loss": 26.4032, "step": 23617 }, { "epoch": 562.334328358209, "grad_norm": NaN, "learning_rate": 9.96157059314954e-06, "loss": 26.5164, "step": 23618 }, { "epoch": 562.3582089552239, "grad_norm": 25.73220443725586, "learning_rate": 9.96157059314954e-06, "loss": 26.2785, "step": 23619 }, { "epoch": 562.3820895522388, "grad_norm": 21.650012969970703, "learning_rate": 9.961152882205515e-06, "loss": 26.9273, "step": 23620 }, { "epoch": 562.4059701492537, "grad_norm": 21.64678192138672, "learning_rate": 9.960735171261487e-06, "loss": 26.4496, "step": 23621 }, { "epoch": 562.4298507462687, "grad_norm": 22.60647201538086, "learning_rate": 9.960317460317462e-06, "loss": 26.8542, "step": 23622 }, { "epoch": 562.4537313432836, "grad_norm": 20.466136932373047, "learning_rate": 9.959899749373435e-06, "loss": 25.7979, "step": 23623 }, { "epoch": 562.4776119402985, "grad_norm": 25.00999641418457, "learning_rate": 9.959482038429408e-06, "loss": 26.7623, "step": 23624 }, { "epoch": 562.5014925373134, "grad_norm": 22.553794860839844, "learning_rate": 9.959064327485381e-06, "loss": 26.6372, "step": 23625 }, { "epoch": 562.5253731343283, "grad_norm": 26.19355010986328, "learning_rate": 9.958646616541354e-06, "loss": 27.4986, "step": 23626 }, { "epoch": 562.5492537313432, "grad_norm": 26.826427459716797, "learning_rate": 9.958228905597328e-06, "loss": 27.0185, "step": 23627 }, { "epoch": 562.5731343283583, "grad_norm": 23.22182846069336, "learning_rate": 9.9578111946533e-06, "loss": 26.802, "step": 23628 }, { "epoch": 562.5970149253732, "grad_norm": 23.338768005371094, "learning_rate": 9.957393483709274e-06, "loss": 26.5343, "step": 23629 }, { "epoch": 562.6208955223881, "grad_norm": 22.88638687133789, "learning_rate": 9.956975772765247e-06, "loss": 26.3177, "step": 23630 }, { "epoch": 562.644776119403, "grad_norm": 22.587753295898438, "learning_rate": 9.95655806182122e-06, "loss": 26.5895, "step": 23631 }, { "epoch": 562.6686567164179, "grad_norm": 26.443870544433594, "learning_rate": 9.956140350877194e-06, "loss": 26.8485, "step": 23632 }, { "epoch": 562.6925373134328, "grad_norm": 22.400760650634766, "learning_rate": 9.955722639933167e-06, "loss": 25.5362, "step": 23633 }, { "epoch": 562.7164179104477, "grad_norm": 20.51897430419922, "learning_rate": 9.95530492898914e-06, "loss": 26.8251, "step": 23634 }, { "epoch": 562.7402985074627, "grad_norm": 20.079404830932617, "learning_rate": 9.954887218045113e-06, "loss": 27.2624, "step": 23635 }, { "epoch": 562.7641791044776, "grad_norm": 19.95222282409668, "learning_rate": 9.954469507101086e-06, "loss": 27.571, "step": 23636 }, { "epoch": 562.7880597014926, "grad_norm": 27.25938606262207, "learning_rate": 9.95405179615706e-06, "loss": 26.921, "step": 23637 }, { "epoch": 562.8119402985075, "grad_norm": 27.11653709411621, "learning_rate": 9.953634085213033e-06, "loss": 25.8823, "step": 23638 }, { "epoch": 562.8358208955224, "grad_norm": 20.083728790283203, "learning_rate": 9.953216374269008e-06, "loss": 24.838, "step": 23639 }, { "epoch": 562.8597014925373, "grad_norm": 23.624923706054688, "learning_rate": 9.95279866332498e-06, "loss": 26.8125, "step": 23640 }, { "epoch": 562.8835820895522, "grad_norm": 27.02467155456543, "learning_rate": 9.952380952380954e-06, "loss": 25.6447, "step": 23641 }, { "epoch": 562.9074626865672, "grad_norm": 26.210933685302734, "learning_rate": 9.951963241436926e-06, "loss": 25.7848, "step": 23642 }, { "epoch": 562.9313432835821, "grad_norm": 20.29302406311035, "learning_rate": 9.9515455304929e-06, "loss": 27.1087, "step": 23643 }, { "epoch": 562.955223880597, "grad_norm": 25.29659080505371, "learning_rate": 9.951127819548872e-06, "loss": 26.1348, "step": 23644 }, { "epoch": 562.9791044776119, "grad_norm": 27.183080673217773, "learning_rate": 9.950710108604847e-06, "loss": 26.8815, "step": 23645 }, { "epoch": 563.0, "grad_norm": 22.49205780029297, "learning_rate": 9.950292397660818e-06, "loss": 23.3639, "step": 23646 }, { "epoch": 563.0238805970149, "grad_norm": 21.46384620666504, "learning_rate": 9.949874686716793e-06, "loss": 26.6871, "step": 23647 }, { "epoch": 563.0477611940298, "grad_norm": 26.02153778076172, "learning_rate": 9.949456975772766e-06, "loss": 25.9556, "step": 23648 }, { "epoch": 563.0716417910447, "grad_norm": 26.766128540039062, "learning_rate": 9.94903926482874e-06, "loss": 26.403, "step": 23649 }, { "epoch": 563.0955223880597, "grad_norm": 26.451826095581055, "learning_rate": 9.948621553884713e-06, "loss": 25.8983, "step": 23650 }, { "epoch": 563.1194029850747, "grad_norm": 19.235509872436523, "learning_rate": 9.948203842940686e-06, "loss": 25.9219, "step": 23651 }, { "epoch": 563.1432835820896, "grad_norm": 23.49021339416504, "learning_rate": 9.94778613199666e-06, "loss": 27.4971, "step": 23652 }, { "epoch": 563.1671641791045, "grad_norm": 21.75162124633789, "learning_rate": 9.947368421052632e-06, "loss": 25.8018, "step": 23653 }, { "epoch": 563.1910447761194, "grad_norm": 23.461639404296875, "learning_rate": 9.946950710108606e-06, "loss": 26.8817, "step": 23654 }, { "epoch": 563.2149253731343, "grad_norm": 24.30748176574707, "learning_rate": 9.946532999164579e-06, "loss": 25.854, "step": 23655 }, { "epoch": 563.2388059701492, "grad_norm": 26.175756454467773, "learning_rate": 9.946115288220552e-06, "loss": 26.346, "step": 23656 }, { "epoch": 563.2626865671642, "grad_norm": 22.66387367248535, "learning_rate": 9.945697577276525e-06, "loss": 25.9412, "step": 23657 }, { "epoch": 563.2865671641791, "grad_norm": 22.722688674926758, "learning_rate": 9.945279866332498e-06, "loss": 26.5764, "step": 23658 }, { "epoch": 563.310447761194, "grad_norm": NaN, "learning_rate": 9.944862155388472e-06, "loss": 34.9563, "step": 23659 }, { "epoch": 563.334328358209, "grad_norm": 28.983909606933594, "learning_rate": 9.944862155388472e-06, "loss": 26.6631, "step": 23660 }, { "epoch": 563.3582089552239, "grad_norm": 29.25253677368164, "learning_rate": 9.944444444444445e-06, "loss": 26.775, "step": 23661 }, { "epoch": 563.3820895522388, "grad_norm": 21.919052124023438, "learning_rate": 9.944026733500418e-06, "loss": 26.6358, "step": 23662 }, { "epoch": 563.4059701492537, "grad_norm": 25.973270416259766, "learning_rate": 9.943609022556391e-06, "loss": 26.6166, "step": 23663 }, { "epoch": 563.4298507462687, "grad_norm": 33.11449432373047, "learning_rate": 9.943191311612364e-06, "loss": 26.7435, "step": 23664 }, { "epoch": 563.4537313432836, "grad_norm": 19.36798095703125, "learning_rate": 9.942773600668338e-06, "loss": 26.9445, "step": 23665 }, { "epoch": 563.4776119402985, "grad_norm": 31.558591842651367, "learning_rate": 9.942355889724311e-06, "loss": 25.7842, "step": 23666 }, { "epoch": 563.5014925373134, "grad_norm": 25.96759796142578, "learning_rate": 9.941938178780284e-06, "loss": 25.475, "step": 23667 }, { "epoch": 563.5253731343283, "grad_norm": 22.82520294189453, "learning_rate": 9.941520467836257e-06, "loss": 25.8942, "step": 23668 }, { "epoch": 563.5492537313432, "grad_norm": 25.997966766357422, "learning_rate": 9.941102756892232e-06, "loss": 25.8902, "step": 23669 }, { "epoch": 563.5731343283583, "grad_norm": 30.352434158325195, "learning_rate": 9.940685045948205e-06, "loss": 26.8582, "step": 23670 }, { "epoch": 563.5970149253732, "grad_norm": 23.510778427124023, "learning_rate": 9.940267335004179e-06, "loss": 25.9671, "step": 23671 }, { "epoch": 563.6208955223881, "grad_norm": 20.746623992919922, "learning_rate": 9.939849624060152e-06, "loss": 25.6796, "step": 23672 }, { "epoch": 563.644776119403, "grad_norm": 29.746822357177734, "learning_rate": 9.939431913116125e-06, "loss": 27.5798, "step": 23673 }, { "epoch": 563.6686567164179, "grad_norm": 32.431583404541016, "learning_rate": 9.939014202172098e-06, "loss": 27.2809, "step": 23674 }, { "epoch": 563.6925373134328, "grad_norm": 21.95711326599121, "learning_rate": 9.938596491228071e-06, "loss": 26.2787, "step": 23675 }, { "epoch": 563.7164179104477, "grad_norm": 32.56563949584961, "learning_rate": 9.938178780284045e-06, "loss": 25.4225, "step": 23676 }, { "epoch": 563.7402985074627, "grad_norm": 28.277259826660156, "learning_rate": 9.937761069340018e-06, "loss": 26.7222, "step": 23677 }, { "epoch": 563.7641791044776, "grad_norm": 28.10365867614746, "learning_rate": 9.937343358395991e-06, "loss": 26.2137, "step": 23678 }, { "epoch": 563.7880597014926, "grad_norm": 25.3802547454834, "learning_rate": 9.936925647451964e-06, "loss": 25.9959, "step": 23679 }, { "epoch": 563.8119402985075, "grad_norm": 34.679996490478516, "learning_rate": 9.936507936507937e-06, "loss": 27.1194, "step": 23680 }, { "epoch": 563.8358208955224, "grad_norm": 24.221187591552734, "learning_rate": 9.93609022556391e-06, "loss": 26.295, "step": 23681 }, { "epoch": 563.8597014925373, "grad_norm": 36.25589370727539, "learning_rate": 9.935672514619884e-06, "loss": 27.4401, "step": 23682 }, { "epoch": 563.8835820895522, "grad_norm": 30.20199966430664, "learning_rate": 9.935254803675857e-06, "loss": 27.0049, "step": 23683 }, { "epoch": 563.9074626865672, "grad_norm": 27.73431396484375, "learning_rate": 9.93483709273183e-06, "loss": 25.843, "step": 23684 }, { "epoch": 563.9313432835821, "grad_norm": 31.855018615722656, "learning_rate": 9.934419381787803e-06, "loss": 26.8736, "step": 23685 }, { "epoch": 563.955223880597, "grad_norm": 25.846391677856445, "learning_rate": 9.934001670843777e-06, "loss": 26.8266, "step": 23686 }, { "epoch": 563.9791044776119, "grad_norm": 25.250991821289062, "learning_rate": 9.93358395989975e-06, "loss": 26.8002, "step": 23687 }, { "epoch": 564.0, "grad_norm": 26.443342208862305, "learning_rate": 9.933166248955723e-06, "loss": 24.1152, "step": 23688 }, { "epoch": 564.0238805970149, "grad_norm": 25.90817642211914, "learning_rate": 9.932748538011698e-06, "loss": 25.7425, "step": 23689 }, { "epoch": 564.0477611940298, "grad_norm": 30.38178062438965, "learning_rate": 9.93233082706767e-06, "loss": 25.809, "step": 23690 }, { "epoch": 564.0716417910447, "grad_norm": 23.270000457763672, "learning_rate": 9.931913116123644e-06, "loss": 26.2533, "step": 23691 }, { "epoch": 564.0955223880597, "grad_norm": 33.90614700317383, "learning_rate": 9.931495405179616e-06, "loss": 26.2127, "step": 23692 }, { "epoch": 564.1194029850747, "grad_norm": 26.58173179626465, "learning_rate": 9.93107769423559e-06, "loss": 26.7998, "step": 23693 }, { "epoch": 564.1432835820896, "grad_norm": 33.245765686035156, "learning_rate": 9.930659983291562e-06, "loss": 26.8807, "step": 23694 }, { "epoch": 564.1671641791045, "grad_norm": 27.575122833251953, "learning_rate": 9.930242272347537e-06, "loss": 27.1788, "step": 23695 }, { "epoch": 564.1910447761194, "grad_norm": 33.524635314941406, "learning_rate": 9.929824561403509e-06, "loss": 25.8086, "step": 23696 }, { "epoch": 564.2149253731343, "grad_norm": 29.29264259338379, "learning_rate": 9.929406850459483e-06, "loss": 25.9399, "step": 23697 }, { "epoch": 564.2388059701492, "grad_norm": 30.490549087524414, "learning_rate": 9.928989139515457e-06, "loss": 26.5216, "step": 23698 }, { "epoch": 564.2626865671642, "grad_norm": 27.55715560913086, "learning_rate": 9.92857142857143e-06, "loss": 26.4164, "step": 23699 }, { "epoch": 564.2865671641791, "grad_norm": 31.067838668823242, "learning_rate": 9.928153717627403e-06, "loss": 25.2571, "step": 23700 }, { "epoch": 564.310447761194, "grad_norm": 32.191688537597656, "learning_rate": 9.927736006683376e-06, "loss": 25.7794, "step": 23701 }, { "epoch": 564.334328358209, "grad_norm": 25.4544734954834, "learning_rate": 9.92731829573935e-06, "loss": 26.727, "step": 23702 }, { "epoch": 564.3582089552239, "grad_norm": 24.4661865234375, "learning_rate": 9.926900584795323e-06, "loss": 26.6651, "step": 23703 }, { "epoch": 564.3820895522388, "grad_norm": 28.834213256835938, "learning_rate": 9.926482873851296e-06, "loss": 27.4696, "step": 23704 }, { "epoch": 564.4059701492537, "grad_norm": 29.1453914642334, "learning_rate": 9.926065162907269e-06, "loss": 27.3567, "step": 23705 }, { "epoch": 564.4298507462687, "grad_norm": 25.30147361755371, "learning_rate": 9.925647451963242e-06, "loss": 26.7306, "step": 23706 }, { "epoch": 564.4537313432836, "grad_norm": 21.967741012573242, "learning_rate": 9.925229741019215e-06, "loss": 26.3134, "step": 23707 }, { "epoch": 564.4776119402985, "grad_norm": 27.185609817504883, "learning_rate": 9.924812030075189e-06, "loss": 26.7591, "step": 23708 }, { "epoch": 564.5014925373134, "grad_norm": 20.552715301513672, "learning_rate": 9.924394319131162e-06, "loss": 25.9522, "step": 23709 }, { "epoch": 564.5253731343283, "grad_norm": 24.75069808959961, "learning_rate": 9.923976608187135e-06, "loss": 27.2835, "step": 23710 }, { "epoch": 564.5492537313432, "grad_norm": 23.13471794128418, "learning_rate": 9.923558897243108e-06, "loss": 26.6129, "step": 23711 }, { "epoch": 564.5731343283583, "grad_norm": 25.378582000732422, "learning_rate": 9.923141186299083e-06, "loss": 26.2405, "step": 23712 }, { "epoch": 564.5970149253732, "grad_norm": 22.67595100402832, "learning_rate": 9.922723475355055e-06, "loss": 26.8134, "step": 23713 }, { "epoch": 564.6208955223881, "grad_norm": 26.86991310119629, "learning_rate": 9.92230576441103e-06, "loss": 26.6897, "step": 23714 }, { "epoch": 564.644776119403, "grad_norm": 24.39398956298828, "learning_rate": 9.921888053467001e-06, "loss": 25.9367, "step": 23715 }, { "epoch": 564.6686567164179, "grad_norm": 25.693429946899414, "learning_rate": 9.921470342522976e-06, "loss": 26.6609, "step": 23716 }, { "epoch": 564.6925373134328, "grad_norm": 22.935562133789062, "learning_rate": 9.921052631578947e-06, "loss": 26.0862, "step": 23717 }, { "epoch": 564.7164179104477, "grad_norm": 21.470413208007812, "learning_rate": 9.920634920634922e-06, "loss": 26.8785, "step": 23718 }, { "epoch": 564.7402985074627, "grad_norm": 22.697294235229492, "learning_rate": 9.920217209690894e-06, "loss": 26.6917, "step": 23719 }, { "epoch": 564.7641791044776, "grad_norm": 23.184507369995117, "learning_rate": 9.919799498746869e-06, "loss": 25.4668, "step": 23720 }, { "epoch": 564.7880597014926, "grad_norm": 24.89699935913086, "learning_rate": 9.91938178780284e-06, "loss": 25.1113, "step": 23721 }, { "epoch": 564.8119402985075, "grad_norm": 23.82237434387207, "learning_rate": 9.918964076858815e-06, "loss": 26.8289, "step": 23722 }, { "epoch": 564.8358208955224, "grad_norm": 25.322357177734375, "learning_rate": 9.918546365914787e-06, "loss": 26.8817, "step": 23723 }, { "epoch": 564.8597014925373, "grad_norm": 24.796249389648438, "learning_rate": 9.918128654970762e-06, "loss": 26.9794, "step": 23724 }, { "epoch": 564.8835820895522, "grad_norm": 22.427818298339844, "learning_rate": 9.917710944026733e-06, "loss": 25.4344, "step": 23725 }, { "epoch": 564.9074626865672, "grad_norm": 20.43692398071289, "learning_rate": 9.917293233082708e-06, "loss": 26.0368, "step": 23726 }, { "epoch": 564.9313432835821, "grad_norm": 23.06389808654785, "learning_rate": 9.916875522138681e-06, "loss": 27.0646, "step": 23727 }, { "epoch": 564.955223880597, "grad_norm": 27.793733596801758, "learning_rate": 9.916457811194654e-06, "loss": 26.5561, "step": 23728 }, { "epoch": 564.9791044776119, "grad_norm": 20.584028244018555, "learning_rate": 9.916040100250628e-06, "loss": 26.888, "step": 23729 }, { "epoch": 565.0, "grad_norm": 20.552404403686523, "learning_rate": 9.9156223893066e-06, "loss": 23.5392, "step": 23730 }, { "epoch": 565.0238805970149, "grad_norm": 29.950559616088867, "learning_rate": 9.915204678362574e-06, "loss": 27.1168, "step": 23731 }, { "epoch": 565.0477611940298, "grad_norm": 23.59206771850586, "learning_rate": 9.914786967418547e-06, "loss": 26.4111, "step": 23732 }, { "epoch": 565.0716417910447, "grad_norm": 21.800989151000977, "learning_rate": 9.91436925647452e-06, "loss": 25.714, "step": 23733 }, { "epoch": 565.0955223880597, "grad_norm": 27.313610076904297, "learning_rate": 9.913951545530494e-06, "loss": 26.7948, "step": 23734 }, { "epoch": 565.1194029850747, "grad_norm": 28.16474151611328, "learning_rate": 9.913533834586467e-06, "loss": 25.9635, "step": 23735 }, { "epoch": 565.1432835820896, "grad_norm": 24.574480056762695, "learning_rate": 9.91311612364244e-06, "loss": 26.7496, "step": 23736 }, { "epoch": 565.1671641791045, "grad_norm": 19.585813522338867, "learning_rate": 9.912698412698413e-06, "loss": 26.0735, "step": 23737 }, { "epoch": 565.1910447761194, "grad_norm": 25.81985092163086, "learning_rate": 9.912280701754386e-06, "loss": 27.2188, "step": 23738 }, { "epoch": 565.2149253731343, "grad_norm": 27.314002990722656, "learning_rate": 9.91186299081036e-06, "loss": 26.3101, "step": 23739 }, { "epoch": 565.2388059701492, "grad_norm": 22.767860412597656, "learning_rate": 9.911445279866333e-06, "loss": 26.5679, "step": 23740 }, { "epoch": 565.2626865671642, "grad_norm": 20.395307540893555, "learning_rate": 9.911027568922308e-06, "loss": 26.9513, "step": 23741 }, { "epoch": 565.2865671641791, "grad_norm": 37.375858306884766, "learning_rate": 9.910609857978279e-06, "loss": 25.4959, "step": 23742 }, { "epoch": 565.310447761194, "grad_norm": 23.700416564941406, "learning_rate": 9.910192147034254e-06, "loss": 27.1365, "step": 23743 }, { "epoch": 565.334328358209, "grad_norm": 30.24735450744629, "learning_rate": 9.909774436090226e-06, "loss": 25.541, "step": 23744 }, { "epoch": 565.3582089552239, "grad_norm": 34.74346923828125, "learning_rate": 9.9093567251462e-06, "loss": 25.9061, "step": 23745 }, { "epoch": 565.3820895522388, "grad_norm": 23.324203491210938, "learning_rate": 9.908939014202172e-06, "loss": 26.917, "step": 23746 }, { "epoch": 565.4059701492537, "grad_norm": 32.456146240234375, "learning_rate": 9.908521303258147e-06, "loss": 27.1887, "step": 23747 }, { "epoch": 565.4298507462687, "grad_norm": 29.9266414642334, "learning_rate": 9.908103592314118e-06, "loss": 26.8595, "step": 23748 }, { "epoch": 565.4537313432836, "grad_norm": 21.539011001586914, "learning_rate": 9.907685881370093e-06, "loss": 26.4977, "step": 23749 }, { "epoch": 565.4776119402985, "grad_norm": 28.92084312438965, "learning_rate": 9.907268170426066e-06, "loss": 25.4429, "step": 23750 }, { "epoch": 565.5014925373134, "grad_norm": 28.529151916503906, "learning_rate": 9.90685045948204e-06, "loss": 26.4127, "step": 23751 }, { "epoch": 565.5253731343283, "grad_norm": 20.454988479614258, "learning_rate": 9.906432748538013e-06, "loss": 27.4506, "step": 23752 }, { "epoch": 565.5492537313432, "grad_norm": NaN, "learning_rate": 9.906015037593986e-06, "loss": 45.7109, "step": 23753 }, { "epoch": 565.5731343283583, "grad_norm": 28.240163803100586, "learning_rate": 9.906015037593986e-06, "loss": 26.0852, "step": 23754 }, { "epoch": 565.5970149253732, "grad_norm": 29.44090461730957, "learning_rate": 9.90559732664996e-06, "loss": 26.6621, "step": 23755 }, { "epoch": 565.6208955223881, "grad_norm": 21.105379104614258, "learning_rate": 9.905179615705932e-06, "loss": 26.6984, "step": 23756 }, { "epoch": 565.644776119403, "grad_norm": 20.51219367980957, "learning_rate": 9.904761904761906e-06, "loss": 26.3774, "step": 23757 }, { "epoch": 565.6686567164179, "grad_norm": 24.400819778442383, "learning_rate": 9.904344193817879e-06, "loss": 26.8398, "step": 23758 }, { "epoch": 565.6925373134328, "grad_norm": 25.335538864135742, "learning_rate": 9.903926482873852e-06, "loss": 25.9729, "step": 23759 }, { "epoch": 565.7164179104477, "grad_norm": 22.544496536254883, "learning_rate": 9.903508771929825e-06, "loss": 26.6304, "step": 23760 }, { "epoch": 565.7402985074627, "grad_norm": 21.3834228515625, "learning_rate": 9.903091060985798e-06, "loss": 26.82, "step": 23761 }, { "epoch": 565.7641791044776, "grad_norm": 21.863222122192383, "learning_rate": 9.902673350041772e-06, "loss": 25.066, "step": 23762 }, { "epoch": 565.7880597014926, "grad_norm": 21.65203285217285, "learning_rate": 9.902255639097745e-06, "loss": 26.4391, "step": 23763 }, { "epoch": 565.8119402985075, "grad_norm": 25.525148391723633, "learning_rate": 9.901837928153718e-06, "loss": 26.7204, "step": 23764 }, { "epoch": 565.8358208955224, "grad_norm": 21.591636657714844, "learning_rate": 9.901420217209691e-06, "loss": 26.6277, "step": 23765 }, { "epoch": 565.8597014925373, "grad_norm": 24.74223518371582, "learning_rate": 9.901002506265664e-06, "loss": 26.3815, "step": 23766 }, { "epoch": 565.8835820895522, "grad_norm": 24.8945255279541, "learning_rate": 9.900584795321638e-06, "loss": 25.9986, "step": 23767 }, { "epoch": 565.9074626865672, "grad_norm": NaN, "learning_rate": 9.90016708437761e-06, "loss": 40.3311, "step": 23768 }, { "epoch": 565.9313432835821, "grad_norm": 22.683549880981445, "learning_rate": 9.90016708437761e-06, "loss": 25.851, "step": 23769 }, { "epoch": 565.955223880597, "grad_norm": 24.68444061279297, "learning_rate": 9.899749373433584e-06, "loss": 26.1236, "step": 23770 }, { "epoch": 565.9791044776119, "grad_norm": 21.616886138916016, "learning_rate": 9.899331662489559e-06, "loss": 26.8605, "step": 23771 }, { "epoch": 566.0, "grad_norm": 22.75284767150879, "learning_rate": 9.898913951545532e-06, "loss": 22.0762, "step": 23772 }, { "epoch": 566.0238805970149, "grad_norm": 21.76479148864746, "learning_rate": 9.898496240601505e-06, "loss": 27.1164, "step": 23773 }, { "epoch": 566.0477611940298, "grad_norm": 26.77754783630371, "learning_rate": 9.898078529657478e-06, "loss": 25.7543, "step": 23774 }, { "epoch": 566.0716417910447, "grad_norm": 24.262187957763672, "learning_rate": 9.897660818713452e-06, "loss": 26.2617, "step": 23775 }, { "epoch": 566.0955223880597, "grad_norm": 21.66884422302246, "learning_rate": 9.897243107769425e-06, "loss": 27.0564, "step": 23776 }, { "epoch": 566.1194029850747, "grad_norm": 23.957138061523438, "learning_rate": 9.896825396825398e-06, "loss": 26.5792, "step": 23777 }, { "epoch": 566.1432835820896, "grad_norm": 24.19512367248535, "learning_rate": 9.896407685881371e-06, "loss": 26.6686, "step": 23778 }, { "epoch": 566.1671641791045, "grad_norm": 33.72188949584961, "learning_rate": 9.895989974937344e-06, "loss": 26.7725, "step": 23779 }, { "epoch": 566.1910447761194, "grad_norm": 26.611392974853516, "learning_rate": 9.895572263993318e-06, "loss": 26.9072, "step": 23780 }, { "epoch": 566.2149253731343, "grad_norm": 23.203113555908203, "learning_rate": 9.895154553049291e-06, "loss": 26.343, "step": 23781 }, { "epoch": 566.2388059701492, "grad_norm": 23.41036033630371, "learning_rate": 9.894736842105264e-06, "loss": 27.1599, "step": 23782 }, { "epoch": 566.2626865671642, "grad_norm": 31.533239364624023, "learning_rate": 9.894319131161237e-06, "loss": 27.0212, "step": 23783 }, { "epoch": 566.2865671641791, "grad_norm": 22.478673934936523, "learning_rate": 9.89390142021721e-06, "loss": 25.8017, "step": 23784 }, { "epoch": 566.310447761194, "grad_norm": 27.432357788085938, "learning_rate": 9.893483709273184e-06, "loss": 26.9019, "step": 23785 }, { "epoch": 566.334328358209, "grad_norm": 26.323720932006836, "learning_rate": 9.893065998329157e-06, "loss": 25.4232, "step": 23786 }, { "epoch": 566.3582089552239, "grad_norm": 29.570405960083008, "learning_rate": 9.89264828738513e-06, "loss": 25.6695, "step": 23787 }, { "epoch": 566.3820895522388, "grad_norm": 24.054838180541992, "learning_rate": 9.892230576441103e-06, "loss": 26.6879, "step": 23788 }, { "epoch": 566.4059701492537, "grad_norm": 27.681907653808594, "learning_rate": 9.891812865497076e-06, "loss": 25.5181, "step": 23789 }, { "epoch": 566.4298507462687, "grad_norm": 23.428884506225586, "learning_rate": 9.89139515455305e-06, "loss": 26.663, "step": 23790 }, { "epoch": 566.4537313432836, "grad_norm": 27.2926025390625, "learning_rate": 9.890977443609023e-06, "loss": 26.5025, "step": 23791 }, { "epoch": 566.4776119402985, "grad_norm": 23.921537399291992, "learning_rate": 9.890559732664998e-06, "loss": 26.0019, "step": 23792 }, { "epoch": 566.5014925373134, "grad_norm": 24.471771240234375, "learning_rate": 9.89014202172097e-06, "loss": 26.0416, "step": 23793 }, { "epoch": 566.5253731343283, "grad_norm": 21.048561096191406, "learning_rate": 9.889724310776944e-06, "loss": 26.287, "step": 23794 }, { "epoch": 566.5492537313432, "grad_norm": 21.85687255859375, "learning_rate": 9.889306599832916e-06, "loss": 26.6016, "step": 23795 }, { "epoch": 566.5731343283583, "grad_norm": 27.359785079956055, "learning_rate": 9.88888888888889e-06, "loss": 26.8981, "step": 23796 }, { "epoch": 566.5970149253732, "grad_norm": 29.195331573486328, "learning_rate": 9.888471177944862e-06, "loss": 27.1879, "step": 23797 }, { "epoch": 566.6208955223881, "grad_norm": 22.776803970336914, "learning_rate": 9.888053467000837e-06, "loss": 26.0333, "step": 23798 }, { "epoch": 566.644776119403, "grad_norm": 23.32175636291504, "learning_rate": 9.887635756056808e-06, "loss": 26.4177, "step": 23799 }, { "epoch": 566.6686567164179, "grad_norm": 25.30919647216797, "learning_rate": 9.887218045112783e-06, "loss": 25.8426, "step": 23800 }, { "epoch": 566.6925373134328, "grad_norm": 29.665197372436523, "learning_rate": 9.886800334168755e-06, "loss": 26.2675, "step": 23801 }, { "epoch": 566.7164179104477, "grad_norm": 20.515344619750977, "learning_rate": 9.88638262322473e-06, "loss": 24.8977, "step": 23802 }, { "epoch": 566.7402985074627, "grad_norm": 29.57931900024414, "learning_rate": 9.885964912280703e-06, "loss": 26.1702, "step": 23803 }, { "epoch": 566.7641791044776, "grad_norm": 26.30927848815918, "learning_rate": 9.885547201336676e-06, "loss": 27.2483, "step": 23804 }, { "epoch": 566.7880597014926, "grad_norm": 23.971118927001953, "learning_rate": 9.88512949039265e-06, "loss": 25.7944, "step": 23805 }, { "epoch": 566.8119402985075, "grad_norm": 23.730464935302734, "learning_rate": 9.884711779448623e-06, "loss": 25.9248, "step": 23806 }, { "epoch": 566.8358208955224, "grad_norm": 33.00741958618164, "learning_rate": 9.884294068504596e-06, "loss": 26.4039, "step": 23807 }, { "epoch": 566.8597014925373, "grad_norm": 23.741291046142578, "learning_rate": 9.883876357560569e-06, "loss": 26.2516, "step": 23808 }, { "epoch": 566.8835820895522, "grad_norm": 28.061382293701172, "learning_rate": 9.883458646616542e-06, "loss": 27.5737, "step": 23809 }, { "epoch": 566.9074626865672, "grad_norm": 34.39457321166992, "learning_rate": 9.883040935672515e-06, "loss": 27.0964, "step": 23810 }, { "epoch": 566.9313432835821, "grad_norm": 26.787654876708984, "learning_rate": 9.882623224728489e-06, "loss": 25.95, "step": 23811 }, { "epoch": 566.955223880597, "grad_norm": 23.230384826660156, "learning_rate": 9.882205513784462e-06, "loss": 25.0261, "step": 23812 }, { "epoch": 566.9791044776119, "grad_norm": 36.9458122253418, "learning_rate": 9.881787802840435e-06, "loss": 26.8098, "step": 23813 }, { "epoch": 567.0, "grad_norm": 20.746456146240234, "learning_rate": 9.881370091896408e-06, "loss": 23.3568, "step": 23814 }, { "epoch": 567.0238805970149, "grad_norm": 32.61921310424805, "learning_rate": 9.880952380952381e-06, "loss": 26.2276, "step": 23815 }, { "epoch": 567.0477611940298, "grad_norm": 31.973731994628906, "learning_rate": 9.880534670008355e-06, "loss": 26.8241, "step": 23816 }, { "epoch": 567.0716417910447, "grad_norm": 23.668720245361328, "learning_rate": 9.88011695906433e-06, "loss": 26.9811, "step": 23817 }, { "epoch": 567.0955223880597, "grad_norm": 37.304298400878906, "learning_rate": 9.879699248120301e-06, "loss": 25.4223, "step": 23818 }, { "epoch": 567.1194029850747, "grad_norm": 26.362661361694336, "learning_rate": 9.879281537176276e-06, "loss": 26.0631, "step": 23819 }, { "epoch": 567.1432835820896, "grad_norm": 33.12252426147461, "learning_rate": 9.878863826232247e-06, "loss": 26.373, "step": 23820 }, { "epoch": 567.1671641791045, "grad_norm": 32.89870071411133, "learning_rate": 9.878446115288222e-06, "loss": 25.5514, "step": 23821 }, { "epoch": 567.1910447761194, "grad_norm": 24.49205207824707, "learning_rate": 9.878028404344194e-06, "loss": 25.7396, "step": 23822 }, { "epoch": 567.2149253731343, "grad_norm": 42.28169250488281, "learning_rate": 9.877610693400169e-06, "loss": 26.3106, "step": 23823 }, { "epoch": 567.2388059701492, "grad_norm": 29.378843307495117, "learning_rate": 9.87719298245614e-06, "loss": 26.2003, "step": 23824 }, { "epoch": 567.2626865671642, "grad_norm": 48.37864685058594, "learning_rate": 9.876775271512115e-06, "loss": 26.1866, "step": 23825 }, { "epoch": 567.2865671641791, "grad_norm": 27.85256576538086, "learning_rate": 9.876357560568087e-06, "loss": 25.7191, "step": 23826 }, { "epoch": 567.310447761194, "grad_norm": 38.11613082885742, "learning_rate": 9.875939849624061e-06, "loss": 25.8988, "step": 23827 }, { "epoch": 567.334328358209, "grad_norm": 25.297067642211914, "learning_rate": 9.875522138680033e-06, "loss": 26.4947, "step": 23828 }, { "epoch": 567.3582089552239, "grad_norm": 39.448875427246094, "learning_rate": 9.875104427736008e-06, "loss": 26.9062, "step": 23829 }, { "epoch": 567.3820895522388, "grad_norm": 32.145172119140625, "learning_rate": 9.87468671679198e-06, "loss": 25.6169, "step": 23830 }, { "epoch": 567.4059701492537, "grad_norm": 27.65107536315918, "learning_rate": 9.874269005847954e-06, "loss": 26.4955, "step": 23831 }, { "epoch": 567.4298507462687, "grad_norm": 30.114639282226562, "learning_rate": 9.873851294903927e-06, "loss": 26.4125, "step": 23832 }, { "epoch": 567.4537313432836, "grad_norm": 28.678020477294922, "learning_rate": 9.8734335839599e-06, "loss": 26.9537, "step": 23833 }, { "epoch": 567.4776119402985, "grad_norm": 21.79130744934082, "learning_rate": 9.873015873015874e-06, "loss": 26.0688, "step": 23834 }, { "epoch": 567.5014925373134, "grad_norm": 27.869319915771484, "learning_rate": 9.872598162071847e-06, "loss": 26.6602, "step": 23835 }, { "epoch": 567.5253731343283, "grad_norm": 25.20841407775879, "learning_rate": 9.87218045112782e-06, "loss": 26.213, "step": 23836 }, { "epoch": 567.5492537313432, "grad_norm": 22.583965301513672, "learning_rate": 9.871762740183793e-06, "loss": 26.8763, "step": 23837 }, { "epoch": 567.5731343283583, "grad_norm": 24.410783767700195, "learning_rate": 9.871345029239767e-06, "loss": 25.866, "step": 23838 }, { "epoch": 567.5970149253732, "grad_norm": 30.948387145996094, "learning_rate": 9.87092731829574e-06, "loss": 26.5676, "step": 23839 }, { "epoch": 567.6208955223881, "grad_norm": 21.999759674072266, "learning_rate": 9.870509607351713e-06, "loss": 26.0784, "step": 23840 }, { "epoch": 567.644776119403, "grad_norm": 24.823606491088867, "learning_rate": 9.870091896407686e-06, "loss": 26.9853, "step": 23841 }, { "epoch": 567.6686567164179, "grad_norm": 30.108613967895508, "learning_rate": 9.86967418546366e-06, "loss": 26.2027, "step": 23842 }, { "epoch": 567.6925373134328, "grad_norm": 23.505809783935547, "learning_rate": 9.869256474519633e-06, "loss": 26.8185, "step": 23843 }, { "epoch": 567.7164179104477, "grad_norm": 19.372089385986328, "learning_rate": 9.868838763575606e-06, "loss": 27.1556, "step": 23844 }, { "epoch": 567.7402985074627, "grad_norm": 28.037391662597656, "learning_rate": 9.868421052631579e-06, "loss": 25.5598, "step": 23845 }, { "epoch": 567.7641791044776, "grad_norm": 27.682588577270508, "learning_rate": 9.868003341687554e-06, "loss": 26.9076, "step": 23846 }, { "epoch": 567.7880597014926, "grad_norm": 19.882556915283203, "learning_rate": 9.867585630743525e-06, "loss": 26.1105, "step": 23847 }, { "epoch": 567.8119402985075, "grad_norm": 27.421144485473633, "learning_rate": 9.8671679197995e-06, "loss": 25.5298, "step": 23848 }, { "epoch": 567.8358208955224, "grad_norm": 31.762453079223633, "learning_rate": 9.866750208855472e-06, "loss": 26.8555, "step": 23849 }, { "epoch": 567.8597014925373, "grad_norm": 21.555099487304688, "learning_rate": 9.866332497911447e-06, "loss": 27.3945, "step": 23850 }, { "epoch": 567.8835820895522, "grad_norm": 24.008394241333008, "learning_rate": 9.86591478696742e-06, "loss": 26.2444, "step": 23851 }, { "epoch": 567.9074626865672, "grad_norm": 32.28863525390625, "learning_rate": 9.865497076023393e-06, "loss": 26.2454, "step": 23852 }, { "epoch": 567.9313432835821, "grad_norm": 23.118669509887695, "learning_rate": 9.865079365079366e-06, "loss": 26.4815, "step": 23853 }, { "epoch": 567.955223880597, "grad_norm": 22.90361213684082, "learning_rate": 9.86466165413534e-06, "loss": 26.4859, "step": 23854 }, { "epoch": 567.9791044776119, "grad_norm": 22.06544303894043, "learning_rate": 9.864243943191313e-06, "loss": 27.1104, "step": 23855 }, { "epoch": 568.0, "grad_norm": 25.045108795166016, "learning_rate": 9.863826232247286e-06, "loss": 22.3956, "step": 23856 }, { "epoch": 568.0238805970149, "grad_norm": 26.431283950805664, "learning_rate": 9.86340852130326e-06, "loss": 27.0567, "step": 23857 }, { "epoch": 568.0477611940298, "grad_norm": 20.926355361938477, "learning_rate": 9.862990810359232e-06, "loss": 26.5017, "step": 23858 }, { "epoch": 568.0716417910447, "grad_norm": 33.461769104003906, "learning_rate": 9.862573099415206e-06, "loss": 27.63, "step": 23859 }, { "epoch": 568.0955223880597, "grad_norm": 25.31022834777832, "learning_rate": 9.862155388471179e-06, "loss": 26.1127, "step": 23860 }, { "epoch": 568.1194029850747, "grad_norm": 27.705821990966797, "learning_rate": 9.861737677527152e-06, "loss": 27.0382, "step": 23861 }, { "epoch": 568.1432835820896, "grad_norm": 23.213760375976562, "learning_rate": 9.861319966583125e-06, "loss": 25.5124, "step": 23862 }, { "epoch": 568.1671641791045, "grad_norm": 30.25445556640625, "learning_rate": 9.860902255639098e-06, "loss": 25.9037, "step": 23863 }, { "epoch": 568.1910447761194, "grad_norm": 23.24781036376953, "learning_rate": 9.860484544695072e-06, "loss": 26.0301, "step": 23864 }, { "epoch": 568.2149253731343, "grad_norm": 22.812480926513672, "learning_rate": 9.860066833751045e-06, "loss": 25.3951, "step": 23865 }, { "epoch": 568.2388059701492, "grad_norm": 26.467082977294922, "learning_rate": 9.859649122807018e-06, "loss": 26.6548, "step": 23866 }, { "epoch": 568.2626865671642, "grad_norm": 28.423986434936523, "learning_rate": 9.859231411862991e-06, "loss": 25.5615, "step": 23867 }, { "epoch": 568.2865671641791, "grad_norm": 24.0599365234375, "learning_rate": 9.858813700918964e-06, "loss": 26.7302, "step": 23868 }, { "epoch": 568.310447761194, "grad_norm": 27.203506469726562, "learning_rate": 9.858395989974938e-06, "loss": 26.4695, "step": 23869 }, { "epoch": 568.334328358209, "grad_norm": 23.717056274414062, "learning_rate": 9.85797827903091e-06, "loss": 25.6773, "step": 23870 }, { "epoch": 568.3582089552239, "grad_norm": 31.209680557250977, "learning_rate": 9.857560568086884e-06, "loss": 27.1165, "step": 23871 }, { "epoch": 568.3820895522388, "grad_norm": 23.342193603515625, "learning_rate": 9.857142857142859e-06, "loss": 27.0693, "step": 23872 }, { "epoch": 568.4059701492537, "grad_norm": 22.662874221801758, "learning_rate": 9.85672514619883e-06, "loss": 25.8355, "step": 23873 }, { "epoch": 568.4298507462687, "grad_norm": 26.853635787963867, "learning_rate": 9.856307435254805e-06, "loss": 26.0491, "step": 23874 }, { "epoch": 568.4537313432836, "grad_norm": 20.288026809692383, "learning_rate": 9.855889724310778e-06, "loss": 26.3124, "step": 23875 }, { "epoch": 568.4776119402985, "grad_norm": 24.453645706176758, "learning_rate": 9.855472013366752e-06, "loss": 26.4866, "step": 23876 }, { "epoch": 568.5014925373134, "grad_norm": 24.523523330688477, "learning_rate": 9.855054302422725e-06, "loss": 26.769, "step": 23877 }, { "epoch": 568.5253731343283, "grad_norm": 27.624801635742188, "learning_rate": 9.854636591478698e-06, "loss": 25.4402, "step": 23878 }, { "epoch": 568.5492537313432, "grad_norm": 22.38642120361328, "learning_rate": 9.854218880534671e-06, "loss": 26.7069, "step": 23879 }, { "epoch": 568.5731343283583, "grad_norm": 26.98256492614746, "learning_rate": 9.853801169590644e-06, "loss": 26.0063, "step": 23880 }, { "epoch": 568.5970149253732, "grad_norm": 28.403945922851562, "learning_rate": 9.853383458646618e-06, "loss": 26.1671, "step": 23881 }, { "epoch": 568.6208955223881, "grad_norm": 22.268957138061523, "learning_rate": 9.85296574770259e-06, "loss": 26.6158, "step": 23882 }, { "epoch": 568.644776119403, "grad_norm": 23.984603881835938, "learning_rate": 9.852548036758564e-06, "loss": 26.0094, "step": 23883 }, { "epoch": 568.6686567164179, "grad_norm": 24.53106689453125, "learning_rate": 9.852130325814537e-06, "loss": 26.9294, "step": 23884 }, { "epoch": 568.6925373134328, "grad_norm": 22.009431838989258, "learning_rate": 9.85171261487051e-06, "loss": 25.9849, "step": 23885 }, { "epoch": 568.7164179104477, "grad_norm": 22.293556213378906, "learning_rate": 9.851294903926484e-06, "loss": 25.8994, "step": 23886 }, { "epoch": 568.7402985074627, "grad_norm": 22.927831649780273, "learning_rate": 9.850877192982457e-06, "loss": 25.5854, "step": 23887 }, { "epoch": 568.7641791044776, "grad_norm": 21.137184143066406, "learning_rate": 9.85045948203843e-06, "loss": 26.858, "step": 23888 }, { "epoch": 568.7880597014926, "grad_norm": 21.094701766967773, "learning_rate": 9.850041771094403e-06, "loss": 25.8237, "step": 23889 }, { "epoch": 568.8119402985075, "grad_norm": 21.14961051940918, "learning_rate": 9.849624060150376e-06, "loss": 26.5344, "step": 23890 }, { "epoch": 568.8358208955224, "grad_norm": 25.183141708374023, "learning_rate": 9.849206349206351e-06, "loss": 26.5489, "step": 23891 }, { "epoch": 568.8597014925373, "grad_norm": 24.516653060913086, "learning_rate": 9.848788638262323e-06, "loss": 26.3094, "step": 23892 }, { "epoch": 568.8835820895522, "grad_norm": 22.52709197998047, "learning_rate": 9.848370927318298e-06, "loss": 26.3296, "step": 23893 }, { "epoch": 568.9074626865672, "grad_norm": 19.3737735748291, "learning_rate": 9.84795321637427e-06, "loss": 26.8094, "step": 23894 }, { "epoch": 568.9313432835821, "grad_norm": 23.426040649414062, "learning_rate": 9.847535505430244e-06, "loss": 25.9427, "step": 23895 }, { "epoch": 568.955223880597, "grad_norm": 24.755640029907227, "learning_rate": 9.847117794486216e-06, "loss": 25.9815, "step": 23896 }, { "epoch": 568.9791044776119, "grad_norm": 30.128999710083008, "learning_rate": 9.84670008354219e-06, "loss": 26.3705, "step": 23897 }, { "epoch": 569.0, "grad_norm": 18.356645584106445, "learning_rate": 9.846282372598162e-06, "loss": 23.6146, "step": 23898 }, { "epoch": 569.0238805970149, "grad_norm": 27.41018295288086, "learning_rate": 9.845864661654137e-06, "loss": 26.5304, "step": 23899 }, { "epoch": 569.0477611940298, "grad_norm": 34.44643020629883, "learning_rate": 9.845446950710108e-06, "loss": 26.209, "step": 23900 }, { "epoch": 569.0716417910447, "grad_norm": 21.803659439086914, "learning_rate": 9.845029239766083e-06, "loss": 25.7817, "step": 23901 }, { "epoch": 569.0955223880597, "grad_norm": 26.30161476135254, "learning_rate": 9.844611528822055e-06, "loss": 26.047, "step": 23902 }, { "epoch": 569.1194029850747, "grad_norm": 35.21712875366211, "learning_rate": 9.84419381787803e-06, "loss": 26.7004, "step": 23903 }, { "epoch": 569.1432835820896, "grad_norm": 24.308612823486328, "learning_rate": 9.843776106934003e-06, "loss": 27.2733, "step": 23904 }, { "epoch": 569.1671641791045, "grad_norm": 31.982866287231445, "learning_rate": 9.843358395989976e-06, "loss": 26.5746, "step": 23905 }, { "epoch": 569.1910447761194, "grad_norm": 26.804380416870117, "learning_rate": 9.84294068504595e-06, "loss": 27.0236, "step": 23906 }, { "epoch": 569.2149253731343, "grad_norm": 24.77034568786621, "learning_rate": 9.842522974101923e-06, "loss": 26.0737, "step": 23907 }, { "epoch": 569.2388059701492, "grad_norm": 23.245311737060547, "learning_rate": 9.842105263157896e-06, "loss": 25.2782, "step": 23908 }, { "epoch": 569.2626865671642, "grad_norm": 36.48830795288086, "learning_rate": 9.841687552213869e-06, "loss": 25.8022, "step": 23909 }, { "epoch": 569.2865671641791, "grad_norm": 23.598806381225586, "learning_rate": 9.841269841269842e-06, "loss": 26.8316, "step": 23910 }, { "epoch": 569.310447761194, "grad_norm": 40.69245910644531, "learning_rate": 9.840852130325815e-06, "loss": 26.2641, "step": 23911 }, { "epoch": 569.334328358209, "grad_norm": 28.051515579223633, "learning_rate": 9.840434419381789e-06, "loss": 26.4625, "step": 23912 }, { "epoch": 569.3582089552239, "grad_norm": NaN, "learning_rate": 9.840016708437762e-06, "loss": 34.5396, "step": 23913 }, { "epoch": 569.3820895522388, "grad_norm": 29.037803649902344, "learning_rate": 9.840016708437762e-06, "loss": 26.3539, "step": 23914 }, { "epoch": 569.4059701492537, "grad_norm": NaN, "learning_rate": 9.839598997493735e-06, "loss": 39.6859, "step": 23915 }, { "epoch": 569.4298507462687, "grad_norm": 34.411563873291016, "learning_rate": 9.839598997493735e-06, "loss": 25.3494, "step": 23916 }, { "epoch": 569.4537313432836, "grad_norm": 23.01874351501465, "learning_rate": 9.839181286549708e-06, "loss": 26.366, "step": 23917 }, { "epoch": 569.4776119402985, "grad_norm": 48.12674331665039, "learning_rate": 9.838763575605681e-06, "loss": 26.1515, "step": 23918 }, { "epoch": 569.5014925373134, "grad_norm": 32.58781814575195, "learning_rate": 9.838345864661655e-06, "loss": 26.2076, "step": 23919 }, { "epoch": 569.5253731343283, "grad_norm": 48.90977096557617, "learning_rate": 9.837928153717628e-06, "loss": 25.9473, "step": 23920 }, { "epoch": 569.5492537313432, "grad_norm": 40.48601531982422, "learning_rate": 9.837510442773601e-06, "loss": 25.9983, "step": 23921 }, { "epoch": 569.5731343283583, "grad_norm": 52.31279373168945, "learning_rate": 9.837092731829576e-06, "loss": 25.7318, "step": 23922 }, { "epoch": 569.5970149253732, "grad_norm": 45.004764556884766, "learning_rate": 9.836675020885547e-06, "loss": 25.8271, "step": 23923 }, { "epoch": 569.6208955223881, "grad_norm": 42.57500076293945, "learning_rate": 9.836257309941522e-06, "loss": 26.6654, "step": 23924 }, { "epoch": 569.644776119403, "grad_norm": 39.146263122558594, "learning_rate": 9.835839598997494e-06, "loss": 25.9079, "step": 23925 }, { "epoch": 569.6686567164179, "grad_norm": 40.32451629638672, "learning_rate": 9.835421888053469e-06, "loss": 27.1731, "step": 23926 }, { "epoch": 569.6925373134328, "grad_norm": 32.661354064941406, "learning_rate": 9.83500417710944e-06, "loss": 26.1625, "step": 23927 }, { "epoch": 569.7164179104477, "grad_norm": 37.228363037109375, "learning_rate": 9.834586466165415e-06, "loss": 26.526, "step": 23928 }, { "epoch": 569.7402985074627, "grad_norm": 31.09703254699707, "learning_rate": 9.834168755221387e-06, "loss": 26.443, "step": 23929 }, { "epoch": 569.7641791044776, "grad_norm": 41.54597854614258, "learning_rate": 9.833751044277361e-06, "loss": 27.6506, "step": 23930 }, { "epoch": 569.7880597014926, "grad_norm": 32.10820770263672, "learning_rate": 9.833333333333333e-06, "loss": 26.7484, "step": 23931 }, { "epoch": 569.8119402985075, "grad_norm": 40.70206069946289, "learning_rate": 9.832915622389308e-06, "loss": 25.9274, "step": 23932 }, { "epoch": 569.8358208955224, "grad_norm": 33.74925231933594, "learning_rate": 9.832497911445281e-06, "loss": 26.9701, "step": 23933 }, { "epoch": 569.8597014925373, "grad_norm": 38.592037200927734, "learning_rate": 9.832080200501254e-06, "loss": 27.2839, "step": 23934 }, { "epoch": 569.8835820895522, "grad_norm": 32.93434143066406, "learning_rate": 9.831662489557227e-06, "loss": 25.594, "step": 23935 }, { "epoch": 569.9074626865672, "grad_norm": 31.15291976928711, "learning_rate": 9.8312447786132e-06, "loss": 26.3383, "step": 23936 }, { "epoch": 569.9313432835821, "grad_norm": 30.818117141723633, "learning_rate": 9.830827067669174e-06, "loss": 26.3346, "step": 23937 }, { "epoch": 569.955223880597, "grad_norm": 24.00845718383789, "learning_rate": 9.830409356725147e-06, "loss": 25.7952, "step": 23938 }, { "epoch": 569.9791044776119, "grad_norm": 35.22734832763672, "learning_rate": 9.82999164578112e-06, "loss": 26.0524, "step": 23939 }, { "epoch": 570.0, "grad_norm": 19.58418846130371, "learning_rate": 9.829573934837093e-06, "loss": 23.0361, "step": 23940 }, { "epoch": 570.0, "step": 23940, "total_flos": 1.1767922002436908e+18, "train_loss": 0.4666626299234261, "train_runtime": 12820.6805, "train_samples_per_second": 237.947, "train_steps_per_second": 1.867 }, { "epoch": 570.0238805970149, "grad_norm": 30.193344116210938, "learning_rate": 1e-05, "loss": 25.2667, "step": 23941 }, { "epoch": 570.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999596448748991e-06, "loss": 35.0593, "step": 23942 }, { "epoch": 570.0716417910447, "grad_norm": Infinity, "learning_rate": 9.999596448748991e-06, "loss": 35.3714, "step": 23943 }, { "epoch": 570.0955223880597, "grad_norm": 511.2779235839844, "learning_rate": 9.999596448748991e-06, "loss": 36.1882, "step": 23944 }, { "epoch": 570.1194029850747, "grad_norm": NaN, "learning_rate": 9.999192897497983e-06, "loss": 40.2555, "step": 23945 }, { "epoch": 570.1432835820896, "grad_norm": 270.35540771484375, "learning_rate": 9.999192897497983e-06, "loss": 32.2942, "step": 23946 }, { "epoch": 570.1671641791045, "grad_norm": 112.23412322998047, "learning_rate": 9.998789346246974e-06, "loss": 29.3251, "step": 23947 }, { "epoch": 570.1910447761194, "grad_norm": 115.56561279296875, "learning_rate": 9.998385794995966e-06, "loss": 27.0258, "step": 23948 }, { "epoch": 570.2149253731343, "grad_norm": 70.28520965576172, "learning_rate": 9.997982243744958e-06, "loss": 28.6172, "step": 23949 }, { "epoch": 570.2388059701492, "grad_norm": 66.74665069580078, "learning_rate": 9.997578692493948e-06, "loss": 27.0061, "step": 23950 }, { "epoch": 570.2626865671642, "grad_norm": 59.5788459777832, "learning_rate": 9.997175141242938e-06, "loss": 27.0403, "step": 23951 }, { "epoch": 570.2865671641791, "grad_norm": 49.978946685791016, "learning_rate": 9.996771589991929e-06, "loss": 27.4091, "step": 23952 }, { "epoch": 570.310447761194, "grad_norm": 49.20445251464844, "learning_rate": 9.996368038740921e-06, "loss": 27.9772, "step": 23953 }, { "epoch": 570.334328358209, "grad_norm": 39.7263298034668, "learning_rate": 9.995964487489911e-06, "loss": 26.5932, "step": 23954 }, { "epoch": 570.3582089552239, "grad_norm": 33.18780517578125, "learning_rate": 9.995560936238903e-06, "loss": 26.9544, "step": 23955 }, { "epoch": 570.3820895522388, "grad_norm": 36.03767395019531, "learning_rate": 9.995157384987895e-06, "loss": 27.6122, "step": 23956 }, { "epoch": 570.4059701492537, "grad_norm": 29.83487319946289, "learning_rate": 9.994753833736886e-06, "loss": 26.9484, "step": 23957 }, { "epoch": 570.4298507462687, "grad_norm": 29.18062973022461, "learning_rate": 9.994350282485876e-06, "loss": 26.2349, "step": 23958 }, { "epoch": 570.4537313432836, "grad_norm": 27.747690200805664, "learning_rate": 9.993946731234868e-06, "loss": 26.317, "step": 23959 }, { "epoch": 570.4776119402985, "grad_norm": 25.323944091796875, "learning_rate": 9.993543179983859e-06, "loss": 25.8144, "step": 23960 }, { "epoch": 570.5014925373134, "grad_norm": 24.29176902770996, "learning_rate": 9.993139628732849e-06, "loss": 26.3103, "step": 23961 }, { "epoch": 570.5253731343283, "grad_norm": 24.347991943359375, "learning_rate": 9.992736077481841e-06, "loss": 26.539, "step": 23962 }, { "epoch": 570.5492537313432, "grad_norm": 25.916732788085938, "learning_rate": 9.992332526230833e-06, "loss": 26.3588, "step": 23963 }, { "epoch": 570.5731343283583, "grad_norm": 30.141239166259766, "learning_rate": 9.991928974979823e-06, "loss": 26.2859, "step": 23964 }, { "epoch": 570.5970149253732, "grad_norm": 23.581836700439453, "learning_rate": 9.991525423728814e-06, "loss": 25.9106, "step": 23965 }, { "epoch": 570.6208955223881, "grad_norm": 22.125450134277344, "learning_rate": 9.991121872477806e-06, "loss": 26.2524, "step": 23966 }, { "epoch": 570.644776119403, "grad_norm": 20.94761848449707, "learning_rate": 9.990718321226796e-06, "loss": 26.6476, "step": 23967 }, { "epoch": 570.6686567164179, "grad_norm": 21.223426818847656, "learning_rate": 9.990314769975787e-06, "loss": 26.1099, "step": 23968 }, { "epoch": 570.6925373134328, "grad_norm": 24.430906295776367, "learning_rate": 9.989911218724779e-06, "loss": 26.4795, "step": 23969 }, { "epoch": 570.7164179104477, "grad_norm": 21.18742561340332, "learning_rate": 9.98950766747377e-06, "loss": 26.9429, "step": 23970 }, { "epoch": 570.7402985074627, "grad_norm": 26.01275634765625, "learning_rate": 9.989104116222761e-06, "loss": 26.6424, "step": 23971 }, { "epoch": 570.7641791044776, "grad_norm": 29.28586196899414, "learning_rate": 9.988700564971753e-06, "loss": 26.3757, "step": 23972 }, { "epoch": 570.7880597014926, "grad_norm": 23.061616897583008, "learning_rate": 9.988297013720744e-06, "loss": 26.4372, "step": 23973 }, { "epoch": 570.8119402985075, "grad_norm": 22.411911010742188, "learning_rate": 9.987893462469734e-06, "loss": 26.5806, "step": 23974 }, { "epoch": 570.8358208955224, "grad_norm": 33.1296501159668, "learning_rate": 9.987489911218726e-06, "loss": 26.7805, "step": 23975 }, { "epoch": 570.8597014925373, "grad_norm": 24.064136505126953, "learning_rate": 9.987086359967716e-06, "loss": 26.0189, "step": 23976 }, { "epoch": 570.8835820895522, "grad_norm": 22.974924087524414, "learning_rate": 9.986682808716708e-06, "loss": 26.159, "step": 23977 }, { "epoch": 570.9074626865672, "grad_norm": 24.003713607788086, "learning_rate": 9.986279257465699e-06, "loss": 26.1804, "step": 23978 }, { "epoch": 570.9313432835821, "grad_norm": 19.772377014160156, "learning_rate": 9.98587570621469e-06, "loss": 25.0821, "step": 23979 }, { "epoch": 570.955223880597, "grad_norm": 24.541913986206055, "learning_rate": 9.985472154963681e-06, "loss": 26.5659, "step": 23980 }, { "epoch": 570.9791044776119, "grad_norm": 25.030941009521484, "learning_rate": 9.985068603712672e-06, "loss": 25.5245, "step": 23981 }, { "epoch": 571.0, "grad_norm": 23.272247314453125, "learning_rate": 9.984665052461664e-06, "loss": 22.9441, "step": 23982 }, { "epoch": 571.0238805970149, "grad_norm": 21.212961196899414, "learning_rate": 9.984261501210654e-06, "loss": 26.7891, "step": 23983 }, { "epoch": 571.0477611940298, "grad_norm": 23.754220962524414, "learning_rate": 9.983857949959646e-06, "loss": 26.9083, "step": 23984 }, { "epoch": 571.0716417910447, "grad_norm": 22.96526527404785, "learning_rate": 9.983454398708636e-06, "loss": 25.5479, "step": 23985 }, { "epoch": 571.0955223880597, "grad_norm": 22.94721221923828, "learning_rate": 9.983050847457628e-06, "loss": 27.0763, "step": 23986 }, { "epoch": 571.1194029850747, "grad_norm": 24.373004913330078, "learning_rate": 9.982647296206619e-06, "loss": 26.5276, "step": 23987 }, { "epoch": 571.1432835820896, "grad_norm": 20.585973739624023, "learning_rate": 9.98224374495561e-06, "loss": 25.1078, "step": 23988 }, { "epoch": 571.1671641791045, "grad_norm": 24.114551544189453, "learning_rate": 9.981840193704601e-06, "loss": 26.0694, "step": 23989 }, { "epoch": 571.1910447761194, "grad_norm": 26.512006759643555, "learning_rate": 9.981436642453592e-06, "loss": 27.3142, "step": 23990 }, { "epoch": 571.2149253731343, "grad_norm": 21.08365821838379, "learning_rate": 9.981033091202584e-06, "loss": 25.4212, "step": 23991 }, { "epoch": 571.2388059701492, "grad_norm": 23.667583465576172, "learning_rate": 9.980629539951576e-06, "loss": 27.547, "step": 23992 }, { "epoch": 571.2626865671642, "grad_norm": 21.026180267333984, "learning_rate": 9.980225988700566e-06, "loss": 26.1989, "step": 23993 }, { "epoch": 571.2865671641791, "grad_norm": 20.369022369384766, "learning_rate": 9.979822437449557e-06, "loss": 27.6384, "step": 23994 }, { "epoch": 571.310447761194, "grad_norm": 22.792266845703125, "learning_rate": 9.979418886198547e-06, "loss": 26.7557, "step": 23995 }, { "epoch": 571.334328358209, "grad_norm": 31.986400604248047, "learning_rate": 9.979015334947539e-06, "loss": 25.6464, "step": 23996 }, { "epoch": 571.3582089552239, "grad_norm": 24.96759796142578, "learning_rate": 9.978611783696531e-06, "loss": 26.6908, "step": 23997 }, { "epoch": 571.3820895522388, "grad_norm": 22.031883239746094, "learning_rate": 9.978208232445521e-06, "loss": 26.218, "step": 23998 }, { "epoch": 571.4059701492537, "grad_norm": 28.17102813720703, "learning_rate": 9.977804681194513e-06, "loss": 25.7434, "step": 23999 }, { "epoch": 571.4298507462687, "grad_norm": 27.292449951171875, "learning_rate": 9.977401129943504e-06, "loss": 25.7213, "step": 24000 }, { "epoch": 571.4537313432836, "grad_norm": 19.219881057739258, "learning_rate": 9.976997578692494e-06, "loss": 26.255, "step": 24001 }, { "epoch": 571.4776119402985, "grad_norm": 23.764001846313477, "learning_rate": 9.976594027441486e-06, "loss": 26.3665, "step": 24002 }, { "epoch": 571.5014925373134, "grad_norm": 24.13587760925293, "learning_rate": 9.976190476190477e-06, "loss": 25.6273, "step": 24003 }, { "epoch": 571.5253731343283, "grad_norm": 22.029754638671875, "learning_rate": 9.975786924939469e-06, "loss": 26.9161, "step": 24004 }, { "epoch": 571.5492537313432, "grad_norm": 21.84878158569336, "learning_rate": 9.975383373688459e-06, "loss": 26.4977, "step": 24005 }, { "epoch": 571.5731343283583, "grad_norm": 21.676511764526367, "learning_rate": 9.974979822437451e-06, "loss": 25.497, "step": 24006 }, { "epoch": 571.5970149253732, "grad_norm": 23.52407455444336, "learning_rate": 9.974576271186441e-06, "loss": 26.528, "step": 24007 }, { "epoch": 571.6208955223881, "grad_norm": 32.32431411743164, "learning_rate": 9.974172719935432e-06, "loss": 25.3513, "step": 24008 }, { "epoch": 571.644776119403, "grad_norm": 24.137542724609375, "learning_rate": 9.973769168684424e-06, "loss": 26.683, "step": 24009 }, { "epoch": 571.6686567164179, "grad_norm": 22.614944458007812, "learning_rate": 9.973365617433414e-06, "loss": 26.5924, "step": 24010 }, { "epoch": 571.6925373134328, "grad_norm": 25.70018768310547, "learning_rate": 9.972962066182406e-06, "loss": 26.1927, "step": 24011 }, { "epoch": 571.7164179104477, "grad_norm": 27.24943733215332, "learning_rate": 9.972558514931397e-06, "loss": 26.439, "step": 24012 }, { "epoch": 571.7402985074627, "grad_norm": 26.981307983398438, "learning_rate": 9.972154963680389e-06, "loss": 25.9861, "step": 24013 }, { "epoch": 571.7641791044776, "grad_norm": 20.645151138305664, "learning_rate": 9.971751412429379e-06, "loss": 26.6204, "step": 24014 }, { "epoch": 571.7880597014926, "grad_norm": 23.221248626708984, "learning_rate": 9.971347861178371e-06, "loss": 26.3232, "step": 24015 }, { "epoch": 571.8119402985075, "grad_norm": 21.14935302734375, "learning_rate": 9.970944309927362e-06, "loss": 24.6416, "step": 24016 }, { "epoch": 571.8358208955224, "grad_norm": 28.34040641784668, "learning_rate": 9.970540758676352e-06, "loss": 26.2769, "step": 24017 }, { "epoch": 571.8597014925373, "grad_norm": 23.77415657043457, "learning_rate": 9.970137207425344e-06, "loss": 26.4697, "step": 24018 }, { "epoch": 571.8835820895522, "grad_norm": 24.574047088623047, "learning_rate": 9.969733656174336e-06, "loss": 26.0816, "step": 24019 }, { "epoch": 571.9074626865672, "grad_norm": 23.53451919555664, "learning_rate": 9.969330104923326e-06, "loss": 26.3559, "step": 24020 }, { "epoch": 571.9313432835821, "grad_norm": 21.579662322998047, "learning_rate": 9.968926553672317e-06, "loss": 26.1107, "step": 24021 }, { "epoch": 571.955223880597, "grad_norm": 26.272085189819336, "learning_rate": 9.968523002421309e-06, "loss": 26.1981, "step": 24022 }, { "epoch": 571.9791044776119, "grad_norm": 24.16188621520996, "learning_rate": 9.9681194511703e-06, "loss": 26.3567, "step": 24023 }, { "epoch": 572.0, "grad_norm": 18.441164016723633, "learning_rate": 9.96771589991929e-06, "loss": 23.126, "step": 24024 }, { "epoch": 572.0238805970149, "grad_norm": 25.07834243774414, "learning_rate": 9.967312348668282e-06, "loss": 25.8125, "step": 24025 }, { "epoch": 572.0477611940298, "grad_norm": 24.944927215576172, "learning_rate": 9.966908797417274e-06, "loss": 26.4034, "step": 24026 }, { "epoch": 572.0716417910447, "grad_norm": 22.60517692565918, "learning_rate": 9.966505246166264e-06, "loss": 26.2051, "step": 24027 }, { "epoch": 572.0955223880597, "grad_norm": 18.674816131591797, "learning_rate": 9.966101694915256e-06, "loss": 25.331, "step": 24028 }, { "epoch": 572.1194029850747, "grad_norm": 20.34892463684082, "learning_rate": 9.965698143664246e-06, "loss": 26.4111, "step": 24029 }, { "epoch": 572.1432835820896, "grad_norm": 23.972251892089844, "learning_rate": 9.965294592413237e-06, "loss": 26.7123, "step": 24030 }, { "epoch": 572.1671641791045, "grad_norm": 22.86956787109375, "learning_rate": 9.964891041162227e-06, "loss": 25.9988, "step": 24031 }, { "epoch": 572.1910447761194, "grad_norm": 27.162931442260742, "learning_rate": 9.96448748991122e-06, "loss": 26.3858, "step": 24032 }, { "epoch": 572.2149253731343, "grad_norm": 22.02787208557129, "learning_rate": 9.964083938660211e-06, "loss": 25.776, "step": 24033 }, { "epoch": 572.2388059701492, "grad_norm": 23.386877059936523, "learning_rate": 9.963680387409202e-06, "loss": 26.301, "step": 24034 }, { "epoch": 572.2626865671642, "grad_norm": 24.412508010864258, "learning_rate": 9.963276836158194e-06, "loss": 26.5974, "step": 24035 }, { "epoch": 572.2865671641791, "grad_norm": NaN, "learning_rate": 9.962873284907184e-06, "loss": 38.6336, "step": 24036 }, { "epoch": 572.310447761194, "grad_norm": 29.396345138549805, "learning_rate": 9.962873284907184e-06, "loss": 26.4778, "step": 24037 }, { "epoch": 572.334328358209, "grad_norm": 23.032835006713867, "learning_rate": 9.962469733656175e-06, "loss": 26.1654, "step": 24038 }, { "epoch": 572.3582089552239, "grad_norm": 21.683156967163086, "learning_rate": 9.962066182405167e-06, "loss": 26.4057, "step": 24039 }, { "epoch": 572.3820895522388, "grad_norm": 25.860557556152344, "learning_rate": 9.961662631154157e-06, "loss": 27.1678, "step": 24040 }, { "epoch": 572.4059701492537, "grad_norm": 27.968830108642578, "learning_rate": 9.961259079903149e-06, "loss": 26.0479, "step": 24041 }, { "epoch": 572.4298507462687, "grad_norm": 24.918210983276367, "learning_rate": 9.96085552865214e-06, "loss": 27.1622, "step": 24042 }, { "epoch": 572.4537313432836, "grad_norm": 18.79123878479004, "learning_rate": 9.960451977401131e-06, "loss": 25.7008, "step": 24043 }, { "epoch": 572.4776119402985, "grad_norm": 22.398956298828125, "learning_rate": 9.960048426150122e-06, "loss": 26.4414, "step": 24044 }, { "epoch": 572.5014925373134, "grad_norm": 19.748933792114258, "learning_rate": 9.959644874899112e-06, "loss": 25.4496, "step": 24045 }, { "epoch": 572.5253731343283, "grad_norm": 26.312820434570312, "learning_rate": 9.959241323648104e-06, "loss": 27.2233, "step": 24046 }, { "epoch": 572.5492537313432, "grad_norm": 23.85513687133789, "learning_rate": 9.958837772397095e-06, "loss": 25.2183, "step": 24047 }, { "epoch": 572.5731343283583, "grad_norm": 24.512001037597656, "learning_rate": 9.958434221146087e-06, "loss": 25.5819, "step": 24048 }, { "epoch": 572.5970149253732, "grad_norm": 23.426240921020508, "learning_rate": 9.958030669895079e-06, "loss": 26.3059, "step": 24049 }, { "epoch": 572.6208955223881, "grad_norm": 23.811784744262695, "learning_rate": 9.957627118644069e-06, "loss": 26.7618, "step": 24050 }, { "epoch": 572.644776119403, "grad_norm": 21.203752517700195, "learning_rate": 9.95722356739306e-06, "loss": 27.1102, "step": 24051 }, { "epoch": 572.6686567164179, "grad_norm": 21.730989456176758, "learning_rate": 9.956820016142052e-06, "loss": 25.8231, "step": 24052 }, { "epoch": 572.6925373134328, "grad_norm": 22.406150817871094, "learning_rate": 9.956416464891042e-06, "loss": 26.0786, "step": 24053 }, { "epoch": 572.7164179104477, "grad_norm": 25.631507873535156, "learning_rate": 9.956012913640032e-06, "loss": 26.1103, "step": 24054 }, { "epoch": 572.7402985074627, "grad_norm": 30.963092803955078, "learning_rate": 9.955609362389024e-06, "loss": 25.9865, "step": 24055 }, { "epoch": 572.7641791044776, "grad_norm": 23.31107521057129, "learning_rate": 9.955205811138016e-06, "loss": 25.9798, "step": 24056 }, { "epoch": 572.7880597014926, "grad_norm": 22.566333770751953, "learning_rate": 9.954802259887007e-06, "loss": 26.028, "step": 24057 }, { "epoch": 572.8119402985075, "grad_norm": 23.53976058959961, "learning_rate": 9.954398708635997e-06, "loss": 27.1741, "step": 24058 }, { "epoch": 572.8358208955224, "grad_norm": 29.447227478027344, "learning_rate": 9.95399515738499e-06, "loss": 25.9107, "step": 24059 }, { "epoch": 572.8597014925373, "grad_norm": 24.463165283203125, "learning_rate": 9.95359160613398e-06, "loss": 26.9676, "step": 24060 }, { "epoch": 572.8835820895522, "grad_norm": 21.23699951171875, "learning_rate": 9.95318805488297e-06, "loss": 26.4905, "step": 24061 }, { "epoch": 572.9074626865672, "grad_norm": 21.914012908935547, "learning_rate": 9.952784503631962e-06, "loss": 25.3534, "step": 24062 }, { "epoch": 572.9313432835821, "grad_norm": 21.197917938232422, "learning_rate": 9.952380952380954e-06, "loss": 26.2932, "step": 24063 }, { "epoch": 572.955223880597, "grad_norm": 24.31838035583496, "learning_rate": 9.951977401129944e-06, "loss": 26.7211, "step": 24064 }, { "epoch": 572.9791044776119, "grad_norm": 23.255504608154297, "learning_rate": 9.951573849878935e-06, "loss": 25.8493, "step": 24065 }, { "epoch": 573.0, "grad_norm": 20.842851638793945, "learning_rate": 9.951170298627927e-06, "loss": 23.0911, "step": 24066 }, { "epoch": 573.0238805970149, "grad_norm": 23.88947868347168, "learning_rate": 9.950766747376917e-06, "loss": 26.912, "step": 24067 }, { "epoch": 573.0477611940298, "grad_norm": 28.783008575439453, "learning_rate": 9.950363196125908e-06, "loss": 26.506, "step": 24068 }, { "epoch": 573.0716417910447, "grad_norm": 24.746326446533203, "learning_rate": 9.9499596448749e-06, "loss": 25.5665, "step": 24069 }, { "epoch": 573.0955223880597, "grad_norm": 20.54570960998535, "learning_rate": 9.949556093623892e-06, "loss": 25.6216, "step": 24070 }, { "epoch": 573.1194029850747, "grad_norm": 21.675840377807617, "learning_rate": 9.949152542372882e-06, "loss": 25.1262, "step": 24071 }, { "epoch": 573.1432835820896, "grad_norm": 20.872629165649414, "learning_rate": 9.948748991121874e-06, "loss": 26.3844, "step": 24072 }, { "epoch": 573.1671641791045, "grad_norm": 25.81085205078125, "learning_rate": 9.948345439870865e-06, "loss": 25.619, "step": 24073 }, { "epoch": 573.1910447761194, "grad_norm": 26.320589065551758, "learning_rate": 9.947941888619855e-06, "loss": 25.8909, "step": 24074 }, { "epoch": 573.2149253731343, "grad_norm": 23.139320373535156, "learning_rate": 9.947538337368847e-06, "loss": 26.0643, "step": 24075 }, { "epoch": 573.2388059701492, "grad_norm": 25.61953353881836, "learning_rate": 9.947134786117837e-06, "loss": 25.6219, "step": 24076 }, { "epoch": 573.2626865671642, "grad_norm": 19.67869758605957, "learning_rate": 9.94673123486683e-06, "loss": 26.0409, "step": 24077 }, { "epoch": 573.2865671641791, "grad_norm": 28.441448211669922, "learning_rate": 9.94632768361582e-06, "loss": 26.8511, "step": 24078 }, { "epoch": 573.310447761194, "grad_norm": 24.314929962158203, "learning_rate": 9.945924132364812e-06, "loss": 25.9387, "step": 24079 }, { "epoch": 573.334328358209, "grad_norm": 24.933263778686523, "learning_rate": 9.945520581113802e-06, "loss": 26.2331, "step": 24080 }, { "epoch": 573.3582089552239, "grad_norm": 22.949386596679688, "learning_rate": 9.945117029862793e-06, "loss": 26.8475, "step": 24081 }, { "epoch": 573.3820895522388, "grad_norm": 22.225521087646484, "learning_rate": 9.944713478611785e-06, "loss": 25.9992, "step": 24082 }, { "epoch": 573.4059701492537, "grad_norm": 33.52238082885742, "learning_rate": 9.944309927360775e-06, "loss": 27.0388, "step": 24083 }, { "epoch": 573.4298507462687, "grad_norm": 21.10393714904785, "learning_rate": 9.943906376109767e-06, "loss": 25.7574, "step": 24084 }, { "epoch": 573.4537313432836, "grad_norm": 23.103307723999023, "learning_rate": 9.943502824858759e-06, "loss": 25.982, "step": 24085 }, { "epoch": 573.4776119402985, "grad_norm": 31.789321899414062, "learning_rate": 9.94309927360775e-06, "loss": 26.2107, "step": 24086 }, { "epoch": 573.5014925373134, "grad_norm": 26.790449142456055, "learning_rate": 9.94269572235674e-06, "loss": 26.2648, "step": 24087 }, { "epoch": 573.5253731343283, "grad_norm": 23.800857543945312, "learning_rate": 9.94229217110573e-06, "loss": 25.7805, "step": 24088 }, { "epoch": 573.5492537313432, "grad_norm": 20.004297256469727, "learning_rate": 9.941888619854722e-06, "loss": 25.1267, "step": 24089 }, { "epoch": 573.5731343283583, "grad_norm": 23.895984649658203, "learning_rate": 9.941485068603713e-06, "loss": 27.2898, "step": 24090 }, { "epoch": 573.5970149253732, "grad_norm": 24.850526809692383, "learning_rate": 9.941081517352705e-06, "loss": 25.6838, "step": 24091 }, { "epoch": 573.6208955223881, "grad_norm": 23.890548706054688, "learning_rate": 9.940677966101697e-06, "loss": 25.843, "step": 24092 }, { "epoch": 573.644776119403, "grad_norm": 27.77503204345703, "learning_rate": 9.940274414850687e-06, "loss": 25.917, "step": 24093 }, { "epoch": 573.6686567164179, "grad_norm": 23.257705688476562, "learning_rate": 9.939870863599677e-06, "loss": 26.4869, "step": 24094 }, { "epoch": 573.6925373134328, "grad_norm": 28.826934814453125, "learning_rate": 9.93946731234867e-06, "loss": 26.6706, "step": 24095 }, { "epoch": 573.7164179104477, "grad_norm": 24.84096336364746, "learning_rate": 9.93906376109766e-06, "loss": 26.2399, "step": 24096 }, { "epoch": 573.7402985074627, "grad_norm": 25.174442291259766, "learning_rate": 9.93866020984665e-06, "loss": 26.2324, "step": 24097 }, { "epoch": 573.7641791044776, "grad_norm": 21.762331008911133, "learning_rate": 9.938256658595642e-06, "loss": 26.3897, "step": 24098 }, { "epoch": 573.7880597014926, "grad_norm": 24.059003829956055, "learning_rate": 9.937853107344634e-06, "loss": 26.7416, "step": 24099 }, { "epoch": 573.8119402985075, "grad_norm": 21.178558349609375, "learning_rate": 9.937449556093625e-06, "loss": 25.4664, "step": 24100 }, { "epoch": 573.8358208955224, "grad_norm": 22.714265823364258, "learning_rate": 9.937046004842615e-06, "loss": 26.2357, "step": 24101 }, { "epoch": 573.8597014925373, "grad_norm": 24.024551391601562, "learning_rate": 9.936642453591607e-06, "loss": 26.6572, "step": 24102 }, { "epoch": 573.8835820895522, "grad_norm": 25.171255111694336, "learning_rate": 9.936238902340598e-06, "loss": 26.8321, "step": 24103 }, { "epoch": 573.9074626865672, "grad_norm": 23.179468154907227, "learning_rate": 9.93583535108959e-06, "loss": 25.5316, "step": 24104 }, { "epoch": 573.9313432835821, "grad_norm": 22.864025115966797, "learning_rate": 9.93543179983858e-06, "loss": 27.2418, "step": 24105 }, { "epoch": 573.955223880597, "grad_norm": 21.143701553344727, "learning_rate": 9.935028248587572e-06, "loss": 26.2614, "step": 24106 }, { "epoch": 573.9791044776119, "grad_norm": 21.40639305114746, "learning_rate": 9.934624697336562e-06, "loss": 26.5752, "step": 24107 }, { "epoch": 574.0, "grad_norm": 21.089744567871094, "learning_rate": 9.934221146085555e-06, "loss": 23.1321, "step": 24108 }, { "epoch": 574.0238805970149, "grad_norm": 26.302915573120117, "learning_rate": 9.933817594834545e-06, "loss": 26.3128, "step": 24109 }, { "epoch": 574.0477611940298, "grad_norm": 21.847110748291016, "learning_rate": 9.933414043583535e-06, "loss": 26.0925, "step": 24110 }, { "epoch": 574.0716417910447, "grad_norm": 22.883712768554688, "learning_rate": 9.933010492332527e-06, "loss": 25.8776, "step": 24111 }, { "epoch": 574.0955223880597, "grad_norm": 21.186979293823242, "learning_rate": 9.932606941081518e-06, "loss": 26.4052, "step": 24112 }, { "epoch": 574.1194029850747, "grad_norm": 24.178756713867188, "learning_rate": 9.93220338983051e-06, "loss": 25.5099, "step": 24113 }, { "epoch": 574.1432835820896, "grad_norm": 23.507671356201172, "learning_rate": 9.9317998385795e-06, "loss": 25.9339, "step": 24114 }, { "epoch": 574.1671641791045, "grad_norm": 26.260761260986328, "learning_rate": 9.931396287328492e-06, "loss": 24.7878, "step": 24115 }, { "epoch": 574.1910447761194, "grad_norm": 25.100126266479492, "learning_rate": 9.930992736077483e-06, "loss": 26.2212, "step": 24116 }, { "epoch": 574.2149253731343, "grad_norm": 23.443241119384766, "learning_rate": 9.930589184826473e-06, "loss": 25.9411, "step": 24117 }, { "epoch": 574.2388059701492, "grad_norm": 25.517316818237305, "learning_rate": 9.930185633575465e-06, "loss": 26.3735, "step": 24118 }, { "epoch": 574.2626865671642, "grad_norm": 21.19073486328125, "learning_rate": 9.929782082324455e-06, "loss": 25.3462, "step": 24119 }, { "epoch": 574.2865671641791, "grad_norm": 27.664058685302734, "learning_rate": 9.929378531073447e-06, "loss": 24.8596, "step": 24120 }, { "epoch": 574.310447761194, "grad_norm": 26.616334915161133, "learning_rate": 9.928974979822438e-06, "loss": 26.6212, "step": 24121 }, { "epoch": 574.334328358209, "grad_norm": 27.095014572143555, "learning_rate": 9.92857142857143e-06, "loss": 27.0339, "step": 24122 }, { "epoch": 574.3582089552239, "grad_norm": 21.17949676513672, "learning_rate": 9.92816787732042e-06, "loss": 25.9344, "step": 24123 }, { "epoch": 574.3820895522388, "grad_norm": 21.71689224243164, "learning_rate": 9.92776432606941e-06, "loss": 26.0455, "step": 24124 }, { "epoch": 574.4059701492537, "grad_norm": 23.791555404663086, "learning_rate": 9.927360774818403e-06, "loss": 26.3468, "step": 24125 }, { "epoch": 574.4298507462687, "grad_norm": 21.414264678955078, "learning_rate": 9.926957223567395e-06, "loss": 27.2251, "step": 24126 }, { "epoch": 574.4537313432836, "grad_norm": 25.190500259399414, "learning_rate": 9.926553672316385e-06, "loss": 26.7521, "step": 24127 }, { "epoch": 574.4776119402985, "grad_norm": 23.50606346130371, "learning_rate": 9.926150121065377e-06, "loss": 26.0774, "step": 24128 }, { "epoch": 574.5014925373134, "grad_norm": 22.292131423950195, "learning_rate": 9.925746569814367e-06, "loss": 26.8934, "step": 24129 }, { "epoch": 574.5253731343283, "grad_norm": 22.090383529663086, "learning_rate": 9.925343018563358e-06, "loss": 24.7211, "step": 24130 }, { "epoch": 574.5492537313432, "grad_norm": 22.971866607666016, "learning_rate": 9.92493946731235e-06, "loss": 26.6281, "step": 24131 }, { "epoch": 574.5731343283583, "grad_norm": 23.84174919128418, "learning_rate": 9.92453591606134e-06, "loss": 26.0551, "step": 24132 }, { "epoch": 574.5970149253732, "grad_norm": 20.67957305908203, "learning_rate": 9.924132364810332e-06, "loss": 25.7299, "step": 24133 }, { "epoch": 574.6208955223881, "grad_norm": 26.593875885009766, "learning_rate": 9.923728813559323e-06, "loss": 26.5227, "step": 24134 }, { "epoch": 574.644776119403, "grad_norm": 20.144285202026367, "learning_rate": 9.923325262308315e-06, "loss": 25.8312, "step": 24135 }, { "epoch": 574.6686567164179, "grad_norm": 32.20690155029297, "learning_rate": 9.922921711057305e-06, "loss": 26.1126, "step": 24136 }, { "epoch": 574.6925373134328, "grad_norm": 29.16192054748535, "learning_rate": 9.922518159806296e-06, "loss": 26.3806, "step": 24137 }, { "epoch": 574.7164179104477, "grad_norm": 24.11377716064453, "learning_rate": 9.922114608555288e-06, "loss": 26.1153, "step": 24138 }, { "epoch": 574.7402985074627, "grad_norm": 23.88627052307129, "learning_rate": 9.921711057304278e-06, "loss": 25.6155, "step": 24139 }, { "epoch": 574.7641791044776, "grad_norm": 25.38883399963379, "learning_rate": 9.92130750605327e-06, "loss": 26.2066, "step": 24140 }, { "epoch": 574.7880597014926, "grad_norm": 29.78312110900879, "learning_rate": 9.92090395480226e-06, "loss": 26.0272, "step": 24141 }, { "epoch": 574.8119402985075, "grad_norm": 29.156667709350586, "learning_rate": 9.920500403551252e-06, "loss": 25.4514, "step": 24142 }, { "epoch": 574.8358208955224, "grad_norm": 20.495092391967773, "learning_rate": 9.920096852300243e-06, "loss": 26.919, "step": 24143 }, { "epoch": 574.8597014925373, "grad_norm": 33.71098709106445, "learning_rate": 9.919693301049233e-06, "loss": 27.5382, "step": 24144 }, { "epoch": 574.8835820895522, "grad_norm": 27.680313110351562, "learning_rate": 9.919289749798225e-06, "loss": 27.0673, "step": 24145 }, { "epoch": 574.9074626865672, "grad_norm": 22.805252075195312, "learning_rate": 9.918886198547216e-06, "loss": 25.3541, "step": 24146 }, { "epoch": 574.9313432835821, "grad_norm": 22.15680694580078, "learning_rate": 9.918482647296208e-06, "loss": 26.5238, "step": 24147 }, { "epoch": 574.955223880597, "grad_norm": 28.623735427856445, "learning_rate": 9.9180790960452e-06, "loss": 27.1977, "step": 24148 }, { "epoch": 574.9791044776119, "grad_norm": 25.844993591308594, "learning_rate": 9.91767554479419e-06, "loss": 26.056, "step": 24149 }, { "epoch": 575.0, "grad_norm": 20.232728958129883, "learning_rate": 9.91727199354318e-06, "loss": 22.5509, "step": 24150 }, { "epoch": 575.0238805970149, "grad_norm": 22.322906494140625, "learning_rate": 9.916868442292173e-06, "loss": 25.931, "step": 24151 }, { "epoch": 575.0477611940298, "grad_norm": 21.993743896484375, "learning_rate": 9.916464891041163e-06, "loss": 26.9082, "step": 24152 }, { "epoch": 575.0716417910447, "grad_norm": 20.991506576538086, "learning_rate": 9.916061339790153e-06, "loss": 25.8223, "step": 24153 }, { "epoch": 575.0955223880597, "grad_norm": 22.311983108520508, "learning_rate": 9.915657788539145e-06, "loss": 25.677, "step": 24154 }, { "epoch": 575.1194029850747, "grad_norm": 24.9354190826416, "learning_rate": 9.915254237288137e-06, "loss": 25.0819, "step": 24155 }, { "epoch": 575.1432835820896, "grad_norm": 24.773122787475586, "learning_rate": 9.914850686037128e-06, "loss": 26.9128, "step": 24156 }, { "epoch": 575.1671641791045, "grad_norm": 21.21263313293457, "learning_rate": 9.914447134786118e-06, "loss": 25.448, "step": 24157 }, { "epoch": 575.1910447761194, "grad_norm": 22.196163177490234, "learning_rate": 9.91404358353511e-06, "loss": 25.8818, "step": 24158 }, { "epoch": 575.2149253731343, "grad_norm": 23.80371856689453, "learning_rate": 9.9136400322841e-06, "loss": 26.1007, "step": 24159 }, { "epoch": 575.2388059701492, "grad_norm": 28.52412223815918, "learning_rate": 9.913236481033091e-06, "loss": 26.5244, "step": 24160 }, { "epoch": 575.2626865671642, "grad_norm": 23.43604850769043, "learning_rate": 9.912832929782083e-06, "loss": 26.4392, "step": 24161 }, { "epoch": 575.2865671641791, "grad_norm": 20.977386474609375, "learning_rate": 9.912429378531075e-06, "loss": 25.6302, "step": 24162 }, { "epoch": 575.310447761194, "grad_norm": 25.210773468017578, "learning_rate": 9.912025827280065e-06, "loss": 25.9142, "step": 24163 }, { "epoch": 575.334328358209, "grad_norm": 25.076990127563477, "learning_rate": 9.911622276029057e-06, "loss": 26.9875, "step": 24164 }, { "epoch": 575.3582089552239, "grad_norm": 25.54120635986328, "learning_rate": 9.911218724778048e-06, "loss": 25.7949, "step": 24165 }, { "epoch": 575.3820895522388, "grad_norm": 23.30572509765625, "learning_rate": 9.910815173527038e-06, "loss": 26.1684, "step": 24166 }, { "epoch": 575.4059701492537, "grad_norm": 21.42876625061035, "learning_rate": 9.910411622276029e-06, "loss": 26.1654, "step": 24167 }, { "epoch": 575.4298507462687, "grad_norm": 20.250606536865234, "learning_rate": 9.91000807102502e-06, "loss": 26.176, "step": 24168 }, { "epoch": 575.4537313432836, "grad_norm": 23.873958587646484, "learning_rate": 9.909604519774013e-06, "loss": 26.4697, "step": 24169 }, { "epoch": 575.4776119402985, "grad_norm": 22.984445571899414, "learning_rate": 9.909200968523003e-06, "loss": 25.5956, "step": 24170 }, { "epoch": 575.5014925373134, "grad_norm": 27.90502166748047, "learning_rate": 9.908797417271995e-06, "loss": 26.1954, "step": 24171 }, { "epoch": 575.5253731343283, "grad_norm": 24.59435272216797, "learning_rate": 9.908393866020986e-06, "loss": 26.8012, "step": 24172 }, { "epoch": 575.5492537313432, "grad_norm": 21.542463302612305, "learning_rate": 9.907990314769976e-06, "loss": 25.775, "step": 24173 }, { "epoch": 575.5731343283583, "grad_norm": 25.20526123046875, "learning_rate": 9.907586763518968e-06, "loss": 25.707, "step": 24174 }, { "epoch": 575.5970149253732, "grad_norm": 20.976503372192383, "learning_rate": 9.907183212267958e-06, "loss": 25.525, "step": 24175 }, { "epoch": 575.6208955223881, "grad_norm": 20.37404441833496, "learning_rate": 9.90677966101695e-06, "loss": 24.6446, "step": 24176 }, { "epoch": 575.644776119403, "grad_norm": 19.336965560913086, "learning_rate": 9.90637610976594e-06, "loss": 26.7801, "step": 24177 }, { "epoch": 575.6686567164179, "grad_norm": 19.967987060546875, "learning_rate": 9.905972558514933e-06, "loss": 25.4081, "step": 24178 }, { "epoch": 575.6925373134328, "grad_norm": 25.215801239013672, "learning_rate": 9.905569007263923e-06, "loss": 27.0092, "step": 24179 }, { "epoch": 575.7164179104477, "grad_norm": 22.57303237915039, "learning_rate": 9.905165456012914e-06, "loss": 27.1091, "step": 24180 }, { "epoch": 575.7402985074627, "grad_norm": 19.155336380004883, "learning_rate": 9.904761904761906e-06, "loss": 25.46, "step": 24181 }, { "epoch": 575.7641791044776, "grad_norm": 23.237895965576172, "learning_rate": 9.904358353510896e-06, "loss": 26.0899, "step": 24182 }, { "epoch": 575.7880597014926, "grad_norm": 27.396194458007812, "learning_rate": 9.903954802259888e-06, "loss": 27.3771, "step": 24183 }, { "epoch": 575.8119402985075, "grad_norm": 24.11444854736328, "learning_rate": 9.90355125100888e-06, "loss": 26.9532, "step": 24184 }, { "epoch": 575.8358208955224, "grad_norm": 25.83213996887207, "learning_rate": 9.90314769975787e-06, "loss": 25.2889, "step": 24185 }, { "epoch": 575.8597014925373, "grad_norm": 22.742656707763672, "learning_rate": 9.90274414850686e-06, "loss": 26.3567, "step": 24186 }, { "epoch": 575.8835820895522, "grad_norm": 20.968530654907227, "learning_rate": 9.902340597255853e-06, "loss": 27.0767, "step": 24187 }, { "epoch": 575.9074626865672, "grad_norm": 23.059701919555664, "learning_rate": 9.901937046004843e-06, "loss": 25.7867, "step": 24188 }, { "epoch": 575.9313432835821, "grad_norm": 20.781890869140625, "learning_rate": 9.901533494753834e-06, "loss": 25.4601, "step": 24189 }, { "epoch": 575.955223880597, "grad_norm": 21.089384078979492, "learning_rate": 9.901129943502826e-06, "loss": 26.6087, "step": 24190 }, { "epoch": 575.9791044776119, "grad_norm": 21.92460823059082, "learning_rate": 9.900726392251818e-06, "loss": 25.9893, "step": 24191 }, { "epoch": 576.0, "grad_norm": 27.633859634399414, "learning_rate": 9.900322841000808e-06, "loss": 22.9107, "step": 24192 }, { "epoch": 576.0238805970149, "grad_norm": 24.27855110168457, "learning_rate": 9.899919289749798e-06, "loss": 26.0993, "step": 24193 }, { "epoch": 576.0477611940298, "grad_norm": 24.00995635986328, "learning_rate": 9.89951573849879e-06, "loss": 25.4255, "step": 24194 }, { "epoch": 576.0716417910447, "grad_norm": 23.29939079284668, "learning_rate": 9.899112187247781e-06, "loss": 26.241, "step": 24195 }, { "epoch": 576.0955223880597, "grad_norm": 26.296524047851562, "learning_rate": 9.898708635996771e-06, "loss": 24.9991, "step": 24196 }, { "epoch": 576.1194029850747, "grad_norm": 37.72909164428711, "learning_rate": 9.898305084745763e-06, "loss": 26.6598, "step": 24197 }, { "epoch": 576.1432835820896, "grad_norm": 19.486417770385742, "learning_rate": 9.897901533494755e-06, "loss": 25.2479, "step": 24198 }, { "epoch": 576.1671641791045, "grad_norm": 36.57334518432617, "learning_rate": 9.897497982243746e-06, "loss": 25.301, "step": 24199 }, { "epoch": 576.1910447761194, "grad_norm": 25.518373489379883, "learning_rate": 9.897094430992736e-06, "loss": 25.5581, "step": 24200 }, { "epoch": 576.2149253731343, "grad_norm": 28.71117401123047, "learning_rate": 9.896690879741728e-06, "loss": 26.4934, "step": 24201 }, { "epoch": 576.2388059701492, "grad_norm": 26.295974731445312, "learning_rate": 9.896287328490719e-06, "loss": 25.7336, "step": 24202 }, { "epoch": 576.2626865671642, "grad_norm": 30.834056854248047, "learning_rate": 9.89588377723971e-06, "loss": 26.1055, "step": 24203 }, { "epoch": 576.2865671641791, "grad_norm": 27.119916915893555, "learning_rate": 9.895480225988701e-06, "loss": 26.9774, "step": 24204 }, { "epoch": 576.310447761194, "grad_norm": 23.101593017578125, "learning_rate": 9.895076674737693e-06, "loss": 26.3573, "step": 24205 }, { "epoch": 576.334328358209, "grad_norm": 32.946468353271484, "learning_rate": 9.894673123486683e-06, "loss": 26.5645, "step": 24206 }, { "epoch": 576.3582089552239, "grad_norm": 28.253629684448242, "learning_rate": 9.894269572235676e-06, "loss": 26.7415, "step": 24207 }, { "epoch": 576.3820895522388, "grad_norm": 23.134624481201172, "learning_rate": 9.893866020984666e-06, "loss": 25.2559, "step": 24208 }, { "epoch": 576.4059701492537, "grad_norm": 28.6090087890625, "learning_rate": 9.893462469733656e-06, "loss": 26.0049, "step": 24209 }, { "epoch": 576.4298507462687, "grad_norm": 27.757505416870117, "learning_rate": 9.893058918482648e-06, "loss": 26.0127, "step": 24210 }, { "epoch": 576.4537313432836, "grad_norm": 24.23702621459961, "learning_rate": 9.892655367231639e-06, "loss": 26.5059, "step": 24211 }, { "epoch": 576.4776119402985, "grad_norm": 24.041000366210938, "learning_rate": 9.89225181598063e-06, "loss": 25.6247, "step": 24212 }, { "epoch": 576.5014925373134, "grad_norm": 33.139923095703125, "learning_rate": 9.891848264729621e-06, "loss": 26.0654, "step": 24213 }, { "epoch": 576.5253731343283, "grad_norm": 25.00043296813965, "learning_rate": 9.891444713478613e-06, "loss": 26.8566, "step": 24214 }, { "epoch": 576.5492537313432, "grad_norm": 21.081710815429688, "learning_rate": 9.891041162227604e-06, "loss": 25.6806, "step": 24215 }, { "epoch": 576.5731343283583, "grad_norm": 27.378856658935547, "learning_rate": 9.890637610976594e-06, "loss": 25.7553, "step": 24216 }, { "epoch": 576.5970149253732, "grad_norm": 29.733448028564453, "learning_rate": 9.890234059725586e-06, "loss": 26.0282, "step": 24217 }, { "epoch": 576.6208955223881, "grad_norm": 20.743431091308594, "learning_rate": 9.889830508474576e-06, "loss": 26.3404, "step": 24218 }, { "epoch": 576.644776119403, "grad_norm": 24.288854598999023, "learning_rate": 9.889426957223568e-06, "loss": 25.447, "step": 24219 }, { "epoch": 576.6686567164179, "grad_norm": 25.996915817260742, "learning_rate": 9.88902340597256e-06, "loss": 25.7725, "step": 24220 }, { "epoch": 576.6925373134328, "grad_norm": 28.017393112182617, "learning_rate": 9.88861985472155e-06, "loss": 25.8946, "step": 24221 }, { "epoch": 576.7164179104477, "grad_norm": 21.24134635925293, "learning_rate": 9.888216303470541e-06, "loss": 26.7417, "step": 24222 }, { "epoch": 576.7402985074627, "grad_norm": 22.633209228515625, "learning_rate": 9.887812752219532e-06, "loss": 26.5589, "step": 24223 }, { "epoch": 576.7641791044776, "grad_norm": 24.822765350341797, "learning_rate": 9.887409200968524e-06, "loss": 25.5851, "step": 24224 }, { "epoch": 576.7880597014926, "grad_norm": 29.977224349975586, "learning_rate": 9.887005649717516e-06, "loss": 25.2401, "step": 24225 }, { "epoch": 576.8119402985075, "grad_norm": NaN, "learning_rate": 9.886602098466506e-06, "loss": 28.6111, "step": 24226 }, { "epoch": 576.8358208955224, "grad_norm": 22.276187896728516, "learning_rate": 9.886602098466506e-06, "loss": 26.3968, "step": 24227 }, { "epoch": 576.8597014925373, "grad_norm": 19.267215728759766, "learning_rate": 9.886198547215498e-06, "loss": 26.1588, "step": 24228 }, { "epoch": 576.8835820895522, "grad_norm": 20.242109298706055, "learning_rate": 9.885794995964488e-06, "loss": 26.6027, "step": 24229 }, { "epoch": 576.9074626865672, "grad_norm": 23.113723754882812, "learning_rate": 9.885391444713479e-06, "loss": 26.6478, "step": 24230 }, { "epoch": 576.9313432835821, "grad_norm": 24.462846755981445, "learning_rate": 9.884987893462471e-06, "loss": 26.3754, "step": 24231 }, { "epoch": 576.955223880597, "grad_norm": 20.949182510375977, "learning_rate": 9.884584342211461e-06, "loss": 26.2779, "step": 24232 }, { "epoch": 576.9791044776119, "grad_norm": 30.058650970458984, "learning_rate": 9.884180790960453e-06, "loss": 26.5661, "step": 24233 }, { "epoch": 577.0, "grad_norm": 28.42578887939453, "learning_rate": 9.883777239709444e-06, "loss": 24.0771, "step": 24234 }, { "epoch": 577.0238805970149, "grad_norm": 22.077268600463867, "learning_rate": 9.883373688458436e-06, "loss": 25.4105, "step": 24235 }, { "epoch": 577.0477611940298, "grad_norm": 26.45479393005371, "learning_rate": 9.882970137207426e-06, "loss": 25.341, "step": 24236 }, { "epoch": 577.0716417910447, "grad_norm": 26.880840301513672, "learning_rate": 9.882566585956417e-06, "loss": 26.0538, "step": 24237 }, { "epoch": 577.0955223880597, "grad_norm": 29.150930404663086, "learning_rate": 9.882163034705409e-06, "loss": 25.3408, "step": 24238 }, { "epoch": 577.1194029850747, "grad_norm": 20.98368263244629, "learning_rate": 9.881759483454399e-06, "loss": 25.1606, "step": 24239 }, { "epoch": 577.1432835820896, "grad_norm": 26.83998680114746, "learning_rate": 9.881355932203391e-06, "loss": 25.6146, "step": 24240 }, { "epoch": 577.1671641791045, "grad_norm": 25.10946273803711, "learning_rate": 9.880952380952381e-06, "loss": 26.423, "step": 24241 }, { "epoch": 577.1910447761194, "grad_norm": 21.77827262878418, "learning_rate": 9.880548829701373e-06, "loss": 25.8885, "step": 24242 }, { "epoch": 577.2149253731343, "grad_norm": 21.667938232421875, "learning_rate": 9.880145278450364e-06, "loss": 26.2442, "step": 24243 }, { "epoch": 577.2388059701492, "grad_norm": 24.184541702270508, "learning_rate": 9.879741727199356e-06, "loss": 26.2533, "step": 24244 }, { "epoch": 577.2626865671642, "grad_norm": 31.423175811767578, "learning_rate": 9.879338175948346e-06, "loss": 27.5175, "step": 24245 }, { "epoch": 577.2865671641791, "grad_norm": 24.77033233642578, "learning_rate": 9.878934624697337e-06, "loss": 25.9532, "step": 24246 }, { "epoch": 577.310447761194, "grad_norm": 21.27184295654297, "learning_rate": 9.878531073446329e-06, "loss": 24.8154, "step": 24247 }, { "epoch": 577.334328358209, "grad_norm": 32.21891403198242, "learning_rate": 9.87812752219532e-06, "loss": 26.4465, "step": 24248 }, { "epoch": 577.3582089552239, "grad_norm": 30.890832901000977, "learning_rate": 9.877723970944311e-06, "loss": 26.1392, "step": 24249 }, { "epoch": 577.3820895522388, "grad_norm": 19.568174362182617, "learning_rate": 9.877320419693301e-06, "loss": 25.9405, "step": 24250 }, { "epoch": 577.4059701492537, "grad_norm": 22.912548065185547, "learning_rate": 9.876916868442294e-06, "loss": 26.0771, "step": 24251 }, { "epoch": 577.4298507462687, "grad_norm": 25.530912399291992, "learning_rate": 9.876513317191284e-06, "loss": 26.5706, "step": 24252 }, { "epoch": 577.4537313432836, "grad_norm": 23.462488174438477, "learning_rate": 9.876109765940274e-06, "loss": 26.1721, "step": 24253 }, { "epoch": 577.4776119402985, "grad_norm": 23.213850021362305, "learning_rate": 9.875706214689266e-06, "loss": 24.9426, "step": 24254 }, { "epoch": 577.5014925373134, "grad_norm": 20.441198348999023, "learning_rate": 9.875302663438258e-06, "loss": 27.1047, "step": 24255 }, { "epoch": 577.5253731343283, "grad_norm": 20.49765968322754, "learning_rate": 9.874899112187249e-06, "loss": 25.4928, "step": 24256 }, { "epoch": 577.5492537313432, "grad_norm": 21.341079711914062, "learning_rate": 9.874495560936239e-06, "loss": 25.8964, "step": 24257 }, { "epoch": 577.5731343283583, "grad_norm": 23.20456314086914, "learning_rate": 9.874092009685231e-06, "loss": 26.0103, "step": 24258 }, { "epoch": 577.5970149253732, "grad_norm": 24.6489200592041, "learning_rate": 9.873688458434222e-06, "loss": 26.5501, "step": 24259 }, { "epoch": 577.6208955223881, "grad_norm": 26.167984008789062, "learning_rate": 9.873284907183212e-06, "loss": 25.8295, "step": 24260 }, { "epoch": 577.644776119403, "grad_norm": 25.409072875976562, "learning_rate": 9.872881355932204e-06, "loss": 27.0118, "step": 24261 }, { "epoch": 577.6686567164179, "grad_norm": 21.98760986328125, "learning_rate": 9.872477804681196e-06, "loss": 26.3258, "step": 24262 }, { "epoch": 577.6925373134328, "grad_norm": 20.215179443359375, "learning_rate": 9.872074253430186e-06, "loss": 26.061, "step": 24263 }, { "epoch": 577.7164179104477, "grad_norm": 21.61227035522461, "learning_rate": 9.871670702179178e-06, "loss": 25.4823, "step": 24264 }, { "epoch": 577.7402985074627, "grad_norm": 25.235151290893555, "learning_rate": 9.871267150928169e-06, "loss": 25.0481, "step": 24265 }, { "epoch": 577.7641791044776, "grad_norm": 30.680025100708008, "learning_rate": 9.87086359967716e-06, "loss": 26.8394, "step": 24266 }, { "epoch": 577.7880597014926, "grad_norm": 25.141204833984375, "learning_rate": 9.870460048426151e-06, "loss": 27.0426, "step": 24267 }, { "epoch": 577.8119402985075, "grad_norm": 19.33418083190918, "learning_rate": 9.870056497175142e-06, "loss": 25.9249, "step": 24268 }, { "epoch": 577.8358208955224, "grad_norm": 30.321638107299805, "learning_rate": 9.869652945924134e-06, "loss": 25.5524, "step": 24269 }, { "epoch": 577.8597014925373, "grad_norm": 29.581995010375977, "learning_rate": 9.869249394673124e-06, "loss": 26.3754, "step": 24270 }, { "epoch": 577.8835820895522, "grad_norm": 24.0113468170166, "learning_rate": 9.868845843422116e-06, "loss": 26.7511, "step": 24271 }, { "epoch": 577.9074626865672, "grad_norm": 24.194473266601562, "learning_rate": 9.868442292171107e-06, "loss": 25.8113, "step": 24272 }, { "epoch": 577.9313432835821, "grad_norm": 37.20102310180664, "learning_rate": 9.868038740920097e-06, "loss": 26.2726, "step": 24273 }, { "epoch": 577.955223880597, "grad_norm": 20.964021682739258, "learning_rate": 9.867635189669089e-06, "loss": 25.1926, "step": 24274 }, { "epoch": 577.9791044776119, "grad_norm": 42.6253776550293, "learning_rate": 9.86723163841808e-06, "loss": 26.8801, "step": 24275 }, { "epoch": 578.0, "grad_norm": 28.623554229736328, "learning_rate": 9.866828087167071e-06, "loss": 24.0609, "step": 24276 }, { "epoch": 578.0238805970149, "grad_norm": 38.894203186035156, "learning_rate": 9.866424535916063e-06, "loss": 25.6569, "step": 24277 }, { "epoch": 578.0477611940298, "grad_norm": 33.78681945800781, "learning_rate": 9.866020984665054e-06, "loss": 26.1968, "step": 24278 }, { "epoch": 578.0716417910447, "grad_norm": 28.782487869262695, "learning_rate": 9.865617433414044e-06, "loss": 26.4103, "step": 24279 }, { "epoch": 578.0955223880597, "grad_norm": 48.110599517822266, "learning_rate": 9.865213882163035e-06, "loss": 27.0297, "step": 24280 }, { "epoch": 578.1194029850747, "grad_norm": 31.361919403076172, "learning_rate": 9.864810330912027e-06, "loss": 25.9312, "step": 24281 }, { "epoch": 578.1432835820896, "grad_norm": 50.586307525634766, "learning_rate": 9.864406779661017e-06, "loss": 26.309, "step": 24282 }, { "epoch": 578.1671641791045, "grad_norm": 40.667564392089844, "learning_rate": 9.864003228410009e-06, "loss": 25.7865, "step": 24283 }, { "epoch": 578.1910447761194, "grad_norm": 52.46881866455078, "learning_rate": 9.863599677159001e-06, "loss": 24.68, "step": 24284 }, { "epoch": 578.2149253731343, "grad_norm": 42.30326461791992, "learning_rate": 9.863196125907991e-06, "loss": 25.5462, "step": 24285 }, { "epoch": 578.2388059701492, "grad_norm": 50.60557556152344, "learning_rate": 9.862792574656982e-06, "loss": 28.0072, "step": 24286 }, { "epoch": 578.2626865671642, "grad_norm": 46.72174835205078, "learning_rate": 9.862389023405974e-06, "loss": 25.9093, "step": 24287 }, { "epoch": 578.2865671641791, "grad_norm": 42.248069763183594, "learning_rate": 9.861985472154964e-06, "loss": 26.2697, "step": 24288 }, { "epoch": 578.310447761194, "grad_norm": 39.542137145996094, "learning_rate": 9.861581920903955e-06, "loss": 26.2257, "step": 24289 }, { "epoch": 578.334328358209, "grad_norm": 40.8088264465332, "learning_rate": 9.861178369652947e-06, "loss": 26.2354, "step": 24290 }, { "epoch": 578.3582089552239, "grad_norm": 33.23478317260742, "learning_rate": 9.860774818401939e-06, "loss": 25.7435, "step": 24291 }, { "epoch": 578.3820895522388, "grad_norm": 48.01786804199219, "learning_rate": 9.860371267150929e-06, "loss": 26.2833, "step": 24292 }, { "epoch": 578.4059701492537, "grad_norm": 39.83214569091797, "learning_rate": 9.85996771589992e-06, "loss": 26.6156, "step": 24293 }, { "epoch": 578.4298507462687, "grad_norm": 47.306068420410156, "learning_rate": 9.859564164648912e-06, "loss": 26.1824, "step": 24294 }, { "epoch": 578.4537313432836, "grad_norm": NaN, "learning_rate": 9.859160613397902e-06, "loss": 42.0217, "step": 24295 }, { "epoch": 578.4776119402985, "grad_norm": 42.440406799316406, "learning_rate": 9.859160613397902e-06, "loss": 26.138, "step": 24296 }, { "epoch": 578.5014925373134, "grad_norm": 44.7374153137207, "learning_rate": 9.858757062146892e-06, "loss": 26.501, "step": 24297 }, { "epoch": 578.5253731343283, "grad_norm": 41.082130432128906, "learning_rate": 9.858353510895884e-06, "loss": 26.2245, "step": 24298 }, { "epoch": 578.5492537313432, "grad_norm": 43.1970329284668, "learning_rate": 9.857949959644876e-06, "loss": 26.0262, "step": 24299 }, { "epoch": 578.5731343283583, "grad_norm": 39.7103385925293, "learning_rate": 9.857546408393867e-06, "loss": 25.4855, "step": 24300 }, { "epoch": 578.5970149253732, "grad_norm": 44.98000717163086, "learning_rate": 9.857142857142859e-06, "loss": 25.9327, "step": 24301 }, { "epoch": 578.6208955223881, "grad_norm": 38.69963073730469, "learning_rate": 9.85673930589185e-06, "loss": 26.5657, "step": 24302 }, { "epoch": 578.644776119403, "grad_norm": 43.54710388183594, "learning_rate": 9.85633575464084e-06, "loss": 25.5979, "step": 24303 }, { "epoch": 578.6686567164179, "grad_norm": 38.8855094909668, "learning_rate": 9.855932203389832e-06, "loss": 26.5245, "step": 24304 }, { "epoch": 578.6925373134328, "grad_norm": 43.57744598388672, "learning_rate": 9.855528652138822e-06, "loss": 26.9419, "step": 24305 }, { "epoch": 578.7164179104477, "grad_norm": 35.181270599365234, "learning_rate": 9.855125100887814e-06, "loss": 26.2575, "step": 24306 }, { "epoch": 578.7402985074627, "grad_norm": 41.91697692871094, "learning_rate": 9.854721549636804e-06, "loss": 25.8151, "step": 24307 }, { "epoch": 578.7641791044776, "grad_norm": 37.03630447387695, "learning_rate": 9.854317998385797e-06, "loss": 24.7906, "step": 24308 }, { "epoch": 578.7880597014926, "grad_norm": 46.335540771484375, "learning_rate": 9.853914447134787e-06, "loss": 25.422, "step": 24309 }, { "epoch": 578.8119402985075, "grad_norm": 42.71828079223633, "learning_rate": 9.853510895883777e-06, "loss": 25.7957, "step": 24310 }, { "epoch": 578.8358208955224, "grad_norm": 40.856266021728516, "learning_rate": 9.85310734463277e-06, "loss": 26.228, "step": 24311 }, { "epoch": 578.8597014925373, "grad_norm": 40.25164794921875, "learning_rate": 9.85270379338176e-06, "loss": 25.1414, "step": 24312 }, { "epoch": 578.8835820895522, "grad_norm": 37.5943717956543, "learning_rate": 9.852300242130752e-06, "loss": 26.4408, "step": 24313 }, { "epoch": 578.9074626865672, "grad_norm": 34.698734283447266, "learning_rate": 9.851896690879742e-06, "loss": 25.4681, "step": 24314 }, { "epoch": 578.9313432835821, "grad_norm": 47.627105712890625, "learning_rate": 9.851493139628734e-06, "loss": 26.1259, "step": 24315 }, { "epoch": 578.955223880597, "grad_norm": 41.3843879699707, "learning_rate": 9.851089588377725e-06, "loss": 25.8089, "step": 24316 }, { "epoch": 578.9791044776119, "grad_norm": 41.02177047729492, "learning_rate": 9.850686037126715e-06, "loss": 25.3028, "step": 24317 }, { "epoch": 579.0, "grad_norm": 35.24673080444336, "learning_rate": 9.850282485875707e-06, "loss": 22.4419, "step": 24318 }, { "epoch": 579.0238805970149, "grad_norm": 36.79585266113281, "learning_rate": 9.849878934624697e-06, "loss": 25.6291, "step": 24319 }, { "epoch": 579.0477611940298, "grad_norm": 32.13479995727539, "learning_rate": 9.84947538337369e-06, "loss": 25.5493, "step": 24320 }, { "epoch": 579.0716417910447, "grad_norm": 45.72109603881836, "learning_rate": 9.849071832122681e-06, "loss": 25.4253, "step": 24321 }, { "epoch": 579.0955223880597, "grad_norm": 40.34111404418945, "learning_rate": 9.848668280871672e-06, "loss": 25.6974, "step": 24322 }, { "epoch": 579.1194029850747, "grad_norm": 39.82575988769531, "learning_rate": 9.848264729620662e-06, "loss": 26.8754, "step": 24323 }, { "epoch": 579.1432835820896, "grad_norm": 40.520381927490234, "learning_rate": 9.847861178369654e-06, "loss": 24.9471, "step": 24324 }, { "epoch": 579.1671641791045, "grad_norm": 38.79893112182617, "learning_rate": 9.847457627118645e-06, "loss": 26.9048, "step": 24325 }, { "epoch": 579.1910447761194, "grad_norm": 35.58498001098633, "learning_rate": 9.847054075867637e-06, "loss": 26.6545, "step": 24326 }, { "epoch": 579.2149253731343, "grad_norm": 41.20244598388672, "learning_rate": 9.846650524616627e-06, "loss": 25.914, "step": 24327 }, { "epoch": 579.2388059701492, "grad_norm": 33.95054626464844, "learning_rate": 9.846246973365619e-06, "loss": 25.8116, "step": 24328 }, { "epoch": 579.2626865671642, "grad_norm": 44.657596588134766, "learning_rate": 9.84584342211461e-06, "loss": 26.3628, "step": 24329 }, { "epoch": 579.2865671641791, "grad_norm": 35.741153717041016, "learning_rate": 9.8454398708636e-06, "loss": 25.9778, "step": 24330 }, { "epoch": 579.310447761194, "grad_norm": 40.448978424072266, "learning_rate": 9.845036319612592e-06, "loss": 25.7301, "step": 24331 }, { "epoch": 579.334328358209, "grad_norm": 36.45576095581055, "learning_rate": 9.844632768361582e-06, "loss": 26.9968, "step": 24332 }, { "epoch": 579.3582089552239, "grad_norm": 42.242469787597656, "learning_rate": 9.844229217110574e-06, "loss": 25.6596, "step": 24333 }, { "epoch": 579.3820895522388, "grad_norm": 39.32024383544922, "learning_rate": 9.843825665859565e-06, "loss": 26.2102, "step": 24334 }, { "epoch": 579.4059701492537, "grad_norm": 42.509674072265625, "learning_rate": 9.843422114608557e-06, "loss": 25.2685, "step": 24335 }, { "epoch": 579.4298507462687, "grad_norm": 36.51924514770508, "learning_rate": 9.843018563357547e-06, "loss": 25.5354, "step": 24336 }, { "epoch": 579.4537313432836, "grad_norm": 40.83980941772461, "learning_rate": 9.842615012106538e-06, "loss": 25.8731, "step": 24337 }, { "epoch": 579.4776119402985, "grad_norm": 34.00606918334961, "learning_rate": 9.84221146085553e-06, "loss": 26.1242, "step": 24338 }, { "epoch": 579.5014925373134, "grad_norm": 40.37692642211914, "learning_rate": 9.84180790960452e-06, "loss": 24.7412, "step": 24339 }, { "epoch": 579.5253731343283, "grad_norm": 35.402915954589844, "learning_rate": 9.841404358353512e-06, "loss": 26.9768, "step": 24340 }, { "epoch": 579.5492537313432, "grad_norm": 42.388343811035156, "learning_rate": 9.841000807102502e-06, "loss": 25.8351, "step": 24341 }, { "epoch": 579.5731343283583, "grad_norm": 38.46949005126953, "learning_rate": 9.840597255851494e-06, "loss": 25.2451, "step": 24342 }, { "epoch": 579.5970149253732, "grad_norm": 40.253173828125, "learning_rate": 9.840193704600485e-06, "loss": 25.3762, "step": 24343 }, { "epoch": 579.6208955223881, "grad_norm": 36.45711898803711, "learning_rate": 9.839790153349477e-06, "loss": 25.369, "step": 24344 }, { "epoch": 579.644776119403, "grad_norm": 39.64451599121094, "learning_rate": 9.839386602098467e-06, "loss": 26.6922, "step": 24345 }, { "epoch": 579.6686567164179, "grad_norm": 35.69731903076172, "learning_rate": 9.838983050847458e-06, "loss": 25.9966, "step": 24346 }, { "epoch": 579.6925373134328, "grad_norm": 44.520076751708984, "learning_rate": 9.83857949959645e-06, "loss": 26.1306, "step": 24347 }, { "epoch": 579.7164179104477, "grad_norm": 35.1386604309082, "learning_rate": 9.83817594834544e-06, "loss": 26.6648, "step": 24348 }, { "epoch": 579.7402985074627, "grad_norm": 43.063499450683594, "learning_rate": 9.837772397094432e-06, "loss": 27.1043, "step": 24349 }, { "epoch": 579.7641791044776, "grad_norm": 37.07965087890625, "learning_rate": 9.837368845843422e-06, "loss": 25.3399, "step": 24350 }, { "epoch": 579.7880597014926, "grad_norm": 39.18414306640625, "learning_rate": 9.836965294592415e-06, "loss": 26.117, "step": 24351 }, { "epoch": 579.8119402985075, "grad_norm": NaN, "learning_rate": 9.836561743341405e-06, "loss": 44.4591, "step": 24352 }, { "epoch": 579.8358208955224, "grad_norm": 32.0230712890625, "learning_rate": 9.836561743341405e-06, "loss": 25.9422, "step": 24353 }, { "epoch": 579.8597014925373, "grad_norm": 42.18384552001953, "learning_rate": 9.836158192090395e-06, "loss": 25.7583, "step": 24354 }, { "epoch": 579.8835820895522, "grad_norm": 35.70800018310547, "learning_rate": 9.835754640839387e-06, "loss": 26.727, "step": 24355 }, { "epoch": 579.9074626865672, "grad_norm": 44.64503860473633, "learning_rate": 9.83535108958838e-06, "loss": 26.9302, "step": 24356 }, { "epoch": 579.9313432835821, "grad_norm": 34.72065734863281, "learning_rate": 9.83494753833737e-06, "loss": 26.9807, "step": 24357 }, { "epoch": 579.955223880597, "grad_norm": 38.93190383911133, "learning_rate": 9.834543987086362e-06, "loss": 25.3657, "step": 24358 }, { "epoch": 579.9791044776119, "grad_norm": 35.26409912109375, "learning_rate": 9.834140435835352e-06, "loss": 25.3315, "step": 24359 }, { "epoch": 580.0, "grad_norm": 35.74294662475586, "learning_rate": 9.833736884584343e-06, "loss": 22.876, "step": 24360 }, { "epoch": 580.0238805970149, "grad_norm": 36.351318359375, "learning_rate": 9.833333333333333e-06, "loss": 25.8496, "step": 24361 }, { "epoch": 580.0477611940298, "grad_norm": 35.3722038269043, "learning_rate": 9.832929782082325e-06, "loss": 26.3848, "step": 24362 }, { "epoch": 580.0716417910447, "grad_norm": 30.765153884887695, "learning_rate": 9.832526230831317e-06, "loss": 25.3719, "step": 24363 }, { "epoch": 580.0955223880597, "grad_norm": 35.37749481201172, "learning_rate": 9.832122679580307e-06, "loss": 25.6217, "step": 24364 }, { "epoch": 580.1194029850747, "grad_norm": 30.13596534729004, "learning_rate": 9.8317191283293e-06, "loss": 26.2312, "step": 24365 }, { "epoch": 580.1432835820896, "grad_norm": 38.33512496948242, "learning_rate": 9.83131557707829e-06, "loss": 26.3174, "step": 24366 }, { "epoch": 580.1671641791045, "grad_norm": 35.33651351928711, "learning_rate": 9.83091202582728e-06, "loss": 26.4668, "step": 24367 }, { "epoch": 580.1910447761194, "grad_norm": 40.0327033996582, "learning_rate": 9.830508474576272e-06, "loss": 25.9023, "step": 24368 }, { "epoch": 580.2149253731343, "grad_norm": 38.003204345703125, "learning_rate": 9.830104923325263e-06, "loss": 26.5375, "step": 24369 }, { "epoch": 580.2388059701492, "grad_norm": 35.34696960449219, "learning_rate": 9.829701372074255e-06, "loss": 27.0991, "step": 24370 }, { "epoch": 580.2626865671642, "grad_norm": 32.42076873779297, "learning_rate": 9.829297820823245e-06, "loss": 26.8309, "step": 24371 }, { "epoch": 580.2865671641791, "grad_norm": 35.067989349365234, "learning_rate": 9.828894269572237e-06, "loss": 25.2841, "step": 24372 }, { "epoch": 580.310447761194, "grad_norm": 29.482826232910156, "learning_rate": 9.828490718321228e-06, "loss": 24.9303, "step": 24373 }, { "epoch": 580.334328358209, "grad_norm": 41.3367805480957, "learning_rate": 9.828087167070218e-06, "loss": 26.2656, "step": 24374 }, { "epoch": 580.3582089552239, "grad_norm": 35.24522399902344, "learning_rate": 9.82768361581921e-06, "loss": 26.6382, "step": 24375 }, { "epoch": 580.3820895522388, "grad_norm": 36.94068145751953, "learning_rate": 9.8272800645682e-06, "loss": 24.6068, "step": 24376 }, { "epoch": 580.4059701492537, "grad_norm": 35.999908447265625, "learning_rate": 9.826876513317192e-06, "loss": 27.3574, "step": 24377 }, { "epoch": 580.4298507462687, "grad_norm": 35.36229705810547, "learning_rate": 9.826472962066184e-06, "loss": 26.1537, "step": 24378 }, { "epoch": 580.4537313432836, "grad_norm": 30.853734970092773, "learning_rate": 9.826069410815175e-06, "loss": 25.1849, "step": 24379 }, { "epoch": 580.4776119402985, "grad_norm": 36.929229736328125, "learning_rate": 9.825665859564165e-06, "loss": 25.913, "step": 24380 }, { "epoch": 580.5014925373134, "grad_norm": 30.963043212890625, "learning_rate": 9.825262308313157e-06, "loss": 25.6884, "step": 24381 }, { "epoch": 580.5253731343283, "grad_norm": 40.29777145385742, "learning_rate": 9.824858757062148e-06, "loss": 26.207, "step": 24382 }, { "epoch": 580.5492537313432, "grad_norm": 30.370567321777344, "learning_rate": 9.824455205811138e-06, "loss": 25.3299, "step": 24383 }, { "epoch": 580.5731343283583, "grad_norm": 37.300716400146484, "learning_rate": 9.82405165456013e-06, "loss": 25.9618, "step": 24384 }, { "epoch": 580.5970149253732, "grad_norm": NaN, "learning_rate": 9.823648103309122e-06, "loss": 26.6273, "step": 24385 }, { "epoch": 580.6208955223881, "grad_norm": 33.033084869384766, "learning_rate": 9.823648103309122e-06, "loss": 26.5122, "step": 24386 }, { "epoch": 580.644776119403, "grad_norm": 35.0532112121582, "learning_rate": 9.823244552058112e-06, "loss": 25.3584, "step": 24387 }, { "epoch": 580.6686567164179, "grad_norm": 32.36638259887695, "learning_rate": 9.822841000807103e-06, "loss": 25.2591, "step": 24388 }, { "epoch": 580.6925373134328, "grad_norm": 31.572265625, "learning_rate": 9.822437449556095e-06, "loss": 27.4964, "step": 24389 }, { "epoch": 580.7164179104477, "grad_norm": 28.017118453979492, "learning_rate": 9.822033898305085e-06, "loss": 25.1772, "step": 24390 }, { "epoch": 580.7402985074627, "grad_norm": 29.795686721801758, "learning_rate": 9.821630347054076e-06, "loss": 26.9749, "step": 24391 }, { "epoch": 580.7641791044776, "grad_norm": 26.194974899291992, "learning_rate": 9.821226795803068e-06, "loss": 25.8875, "step": 24392 }, { "epoch": 580.7880597014926, "grad_norm": 29.715530395507812, "learning_rate": 9.82082324455206e-06, "loss": 25.4979, "step": 24393 }, { "epoch": 580.8119402985075, "grad_norm": 23.867475509643555, "learning_rate": 9.82041969330105e-06, "loss": 25.5996, "step": 24394 }, { "epoch": 580.8358208955224, "grad_norm": 25.0240478515625, "learning_rate": 9.82001614205004e-06, "loss": 26.0134, "step": 24395 }, { "epoch": 580.8597014925373, "grad_norm": 24.46703338623047, "learning_rate": 9.819612590799033e-06, "loss": 25.6797, "step": 24396 }, { "epoch": 580.8835820895522, "grad_norm": 25.778980255126953, "learning_rate": 9.819209039548023e-06, "loss": 25.5582, "step": 24397 }, { "epoch": 580.9074626865672, "grad_norm": 25.899660110473633, "learning_rate": 9.818805488297013e-06, "loss": 25.7628, "step": 24398 }, { "epoch": 580.9313432835821, "grad_norm": 24.644716262817383, "learning_rate": 9.818401937046005e-06, "loss": 25.5296, "step": 24399 }, { "epoch": 580.955223880597, "grad_norm": 22.42902946472168, "learning_rate": 9.817998385794997e-06, "loss": 26.1107, "step": 24400 }, { "epoch": 580.9791044776119, "grad_norm": 28.277116775512695, "learning_rate": 9.817594834543988e-06, "loss": 26.0095, "step": 24401 }, { "epoch": 581.0, "grad_norm": 21.57637596130371, "learning_rate": 9.81719128329298e-06, "loss": 22.5018, "step": 24402 }, { "epoch": 581.0238805970149, "grad_norm": 27.61351203918457, "learning_rate": 9.81678773204197e-06, "loss": 26.1534, "step": 24403 }, { "epoch": 581.0477611940298, "grad_norm": 21.82658576965332, "learning_rate": 9.81638418079096e-06, "loss": 26.1399, "step": 24404 }, { "epoch": 581.0716417910447, "grad_norm": 27.59967041015625, "learning_rate": 9.815980629539953e-06, "loss": 25.5481, "step": 24405 }, { "epoch": 581.0955223880597, "grad_norm": 24.987524032592773, "learning_rate": 9.815577078288943e-06, "loss": 25.3064, "step": 24406 }, { "epoch": 581.1194029850747, "grad_norm": 26.58732032775879, "learning_rate": 9.815173527037935e-06, "loss": 25.5666, "step": 24407 }, { "epoch": 581.1432835820896, "grad_norm": 26.30469512939453, "learning_rate": 9.814769975786925e-06, "loss": 26.2216, "step": 24408 }, { "epoch": 581.1671641791045, "grad_norm": 31.859657287597656, "learning_rate": 9.814366424535918e-06, "loss": 25.2246, "step": 24409 }, { "epoch": 581.1910447761194, "grad_norm": 23.056110382080078, "learning_rate": 9.813962873284908e-06, "loss": 26.017, "step": 24410 }, { "epoch": 581.2149253731343, "grad_norm": 27.42281150817871, "learning_rate": 9.813559322033898e-06, "loss": 26.3378, "step": 24411 }, { "epoch": 581.2388059701492, "grad_norm": 30.183202743530273, "learning_rate": 9.81315577078289e-06, "loss": 25.5112, "step": 24412 }, { "epoch": 581.2626865671642, "grad_norm": 24.34949493408203, "learning_rate": 9.81275221953188e-06, "loss": 26.7144, "step": 24413 }, { "epoch": 581.2865671641791, "grad_norm": 26.42401885986328, "learning_rate": 9.812348668280873e-06, "loss": 24.6351, "step": 24414 }, { "epoch": 581.310447761194, "grad_norm": 29.026399612426758, "learning_rate": 9.811945117029865e-06, "loss": 26.7357, "step": 24415 }, { "epoch": 581.334328358209, "grad_norm": 23.629470825195312, "learning_rate": 9.811541565778855e-06, "loss": 25.2155, "step": 24416 }, { "epoch": 581.3582089552239, "grad_norm": 29.794118881225586, "learning_rate": 9.811138014527846e-06, "loss": 26.2714, "step": 24417 }, { "epoch": 581.3820895522388, "grad_norm": 27.444660186767578, "learning_rate": 9.810734463276836e-06, "loss": 26.0444, "step": 24418 }, { "epoch": 581.4059701492537, "grad_norm": 26.139053344726562, "learning_rate": 9.810330912025828e-06, "loss": 26.0747, "step": 24419 }, { "epoch": 581.4298507462687, "grad_norm": 26.46725082397461, "learning_rate": 9.809927360774818e-06, "loss": 25.5462, "step": 24420 }, { "epoch": 581.4537313432836, "grad_norm": 22.8532657623291, "learning_rate": 9.80952380952381e-06, "loss": 26.158, "step": 24421 }, { "epoch": 581.4776119402985, "grad_norm": 26.606639862060547, "learning_rate": 9.809120258272802e-06, "loss": 26.8532, "step": 24422 }, { "epoch": 581.5014925373134, "grad_norm": 22.454097747802734, "learning_rate": 9.808716707021793e-06, "loss": 26.5464, "step": 24423 }, { "epoch": 581.5253731343283, "grad_norm": 20.92378044128418, "learning_rate": 9.808313155770783e-06, "loss": 25.7654, "step": 24424 }, { "epoch": 581.5492537313432, "grad_norm": 24.866594314575195, "learning_rate": 9.807909604519775e-06, "loss": 26.007, "step": 24425 }, { "epoch": 581.5731343283583, "grad_norm": 24.2376708984375, "learning_rate": 9.807506053268766e-06, "loss": 25.8941, "step": 24426 }, { "epoch": 581.5970149253732, "grad_norm": 25.77760124206543, "learning_rate": 9.807102502017756e-06, "loss": 26.4854, "step": 24427 }, { "epoch": 581.6208955223881, "grad_norm": NaN, "learning_rate": 9.806698950766748e-06, "loss": 35.9411, "step": 24428 }, { "epoch": 581.644776119403, "grad_norm": 22.37326431274414, "learning_rate": 9.806698950766748e-06, "loss": 23.9512, "step": 24429 }, { "epoch": 581.6686567164179, "grad_norm": 21.178653717041016, "learning_rate": 9.80629539951574e-06, "loss": 25.7085, "step": 24430 }, { "epoch": 581.6925373134328, "grad_norm": 22.774295806884766, "learning_rate": 9.80589184826473e-06, "loss": 25.4254, "step": 24431 }, { "epoch": 581.7164179104477, "grad_norm": 22.571651458740234, "learning_rate": 9.805488297013721e-06, "loss": 26.6635, "step": 24432 }, { "epoch": 581.7402985074627, "grad_norm": 22.538297653198242, "learning_rate": 9.805084745762713e-06, "loss": 26.1619, "step": 24433 }, { "epoch": 581.7641791044776, "grad_norm": 24.157346725463867, "learning_rate": 9.804681194511703e-06, "loss": 26.5613, "step": 24434 }, { "epoch": 581.7880597014926, "grad_norm": 19.317853927612305, "learning_rate": 9.804277643260695e-06, "loss": 26.4616, "step": 24435 }, { "epoch": 581.8119402985075, "grad_norm": 22.397842407226562, "learning_rate": 9.803874092009686e-06, "loss": 26.0067, "step": 24436 }, { "epoch": 581.8358208955224, "grad_norm": 22.150562286376953, "learning_rate": 9.803470540758678e-06, "loss": 26.639, "step": 24437 }, { "epoch": 581.8597014925373, "grad_norm": 22.107318878173828, "learning_rate": 9.803066989507668e-06, "loss": 26.6367, "step": 24438 }, { "epoch": 581.8835820895522, "grad_norm": 24.694072723388672, "learning_rate": 9.80266343825666e-06, "loss": 26.1821, "step": 24439 }, { "epoch": 581.9074626865672, "grad_norm": 30.164819717407227, "learning_rate": 9.80225988700565e-06, "loss": 26.7824, "step": 24440 }, { "epoch": 581.9313432835821, "grad_norm": 24.200082778930664, "learning_rate": 9.801856335754641e-06, "loss": 25.5074, "step": 24441 }, { "epoch": 581.955223880597, "grad_norm": 26.07074737548828, "learning_rate": 9.801452784503633e-06, "loss": 25.9307, "step": 24442 }, { "epoch": 581.9791044776119, "grad_norm": 23.705331802368164, "learning_rate": 9.801049233252623e-06, "loss": 26.26, "step": 24443 }, { "epoch": 582.0, "grad_norm": 28.394094467163086, "learning_rate": 9.800645682001615e-06, "loss": 22.205, "step": 24444 }, { "epoch": 582.0238805970149, "grad_norm": 23.692485809326172, "learning_rate": 9.800242130750606e-06, "loss": 25.7224, "step": 24445 }, { "epoch": 582.0477611940298, "grad_norm": 32.96034240722656, "learning_rate": 9.799838579499598e-06, "loss": 26.0433, "step": 24446 }, { "epoch": 582.0716417910447, "grad_norm": 26.276552200317383, "learning_rate": 9.799435028248588e-06, "loss": 26.6248, "step": 24447 }, { "epoch": 582.0955223880597, "grad_norm": 25.00955581665039, "learning_rate": 9.799031476997579e-06, "loss": 25.318, "step": 24448 }, { "epoch": 582.1194029850747, "grad_norm": 36.655025482177734, "learning_rate": 9.79862792574657e-06, "loss": 25.8541, "step": 24449 }, { "epoch": 582.1432835820896, "grad_norm": 25.300626754760742, "learning_rate": 9.798224374495561e-06, "loss": 25.7979, "step": 24450 }, { "epoch": 582.1671641791045, "grad_norm": 25.91741180419922, "learning_rate": 9.797820823244553e-06, "loss": 25.485, "step": 24451 }, { "epoch": 582.1910447761194, "grad_norm": 33.45578384399414, "learning_rate": 9.797417271993545e-06, "loss": 25.9918, "step": 24452 }, { "epoch": 582.2149253731343, "grad_norm": 24.104305267333984, "learning_rate": 9.797013720742536e-06, "loss": 26.3269, "step": 24453 }, { "epoch": 582.2388059701492, "grad_norm": 27.494102478027344, "learning_rate": 9.796610169491526e-06, "loss": 26.4015, "step": 24454 }, { "epoch": 582.2626865671642, "grad_norm": 29.21506690979004, "learning_rate": 9.796206618240516e-06, "loss": 25.8633, "step": 24455 }, { "epoch": 582.2865671641791, "grad_norm": 22.91203498840332, "learning_rate": 9.795803066989508e-06, "loss": 25.9097, "step": 24456 }, { "epoch": 582.310447761194, "grad_norm": 27.230588912963867, "learning_rate": 9.7953995157385e-06, "loss": 25.6334, "step": 24457 }, { "epoch": 582.334328358209, "grad_norm": 27.879961013793945, "learning_rate": 9.79499596448749e-06, "loss": 25.702, "step": 24458 }, { "epoch": 582.3582089552239, "grad_norm": 24.769521713256836, "learning_rate": 9.794592413236483e-06, "loss": 25.7191, "step": 24459 }, { "epoch": 582.3820895522388, "grad_norm": 19.24974822998047, "learning_rate": 9.794188861985473e-06, "loss": 25.1324, "step": 24460 }, { "epoch": 582.4059701492537, "grad_norm": 23.08672332763672, "learning_rate": 9.793785310734464e-06, "loss": 25.7407, "step": 24461 }, { "epoch": 582.4298507462687, "grad_norm": 23.771713256835938, "learning_rate": 9.793381759483456e-06, "loss": 25.2486, "step": 24462 }, { "epoch": 582.4537313432836, "grad_norm": 31.64727783203125, "learning_rate": 9.792978208232446e-06, "loss": 25.9107, "step": 24463 }, { "epoch": 582.4776119402985, "grad_norm": 24.014074325561523, "learning_rate": 9.792574656981438e-06, "loss": 25.2879, "step": 24464 }, { "epoch": 582.5014925373134, "grad_norm": 22.849716186523438, "learning_rate": 9.792171105730428e-06, "loss": 25.9584, "step": 24465 }, { "epoch": 582.5253731343283, "grad_norm": 27.887630462646484, "learning_rate": 9.79176755447942e-06, "loss": 26.2971, "step": 24466 }, { "epoch": 582.5492537313432, "grad_norm": 26.222806930541992, "learning_rate": 9.79136400322841e-06, "loss": 26.0464, "step": 24467 }, { "epoch": 582.5731343283583, "grad_norm": 26.51136016845703, "learning_rate": 9.790960451977401e-06, "loss": 27.077, "step": 24468 }, { "epoch": 582.5970149253732, "grad_norm": 21.140390396118164, "learning_rate": 9.790556900726393e-06, "loss": 26.97, "step": 24469 }, { "epoch": 582.6208955223881, "grad_norm": 29.479379653930664, "learning_rate": 9.790153349475384e-06, "loss": 25.2589, "step": 24470 }, { "epoch": 582.644776119403, "grad_norm": 22.94227409362793, "learning_rate": 9.789749798224376e-06, "loss": 24.3188, "step": 24471 }, { "epoch": 582.6686567164179, "grad_norm": 27.414764404296875, "learning_rate": 9.789346246973366e-06, "loss": 26.3945, "step": 24472 }, { "epoch": 582.6925373134328, "grad_norm": 29.075517654418945, "learning_rate": 9.788942695722358e-06, "loss": 26.2763, "step": 24473 }, { "epoch": 582.7164179104477, "grad_norm": 22.82202911376953, "learning_rate": 9.788539144471349e-06, "loss": 26.1042, "step": 24474 }, { "epoch": 582.7402985074627, "grad_norm": 29.129688262939453, "learning_rate": 9.788135593220339e-06, "loss": 27.3891, "step": 24475 }, { "epoch": 582.7641791044776, "grad_norm": 23.167558670043945, "learning_rate": 9.787732041969331e-06, "loss": 25.8926, "step": 24476 }, { "epoch": 582.7880597014926, "grad_norm": 28.02666664123535, "learning_rate": 9.787328490718321e-06, "loss": 25.9286, "step": 24477 }, { "epoch": 582.8119402985075, "grad_norm": 26.493375778198242, "learning_rate": 9.786924939467313e-06, "loss": 26.1112, "step": 24478 }, { "epoch": 582.8358208955224, "grad_norm": 24.483457565307617, "learning_rate": 9.786521388216305e-06, "loss": 26.2353, "step": 24479 }, { "epoch": 582.8597014925373, "grad_norm": 27.450105667114258, "learning_rate": 9.786117836965296e-06, "loss": 25.7043, "step": 24480 }, { "epoch": 582.8835820895522, "grad_norm": 23.689550399780273, "learning_rate": 9.785714285714286e-06, "loss": 26.0288, "step": 24481 }, { "epoch": 582.9074626865672, "grad_norm": 26.828569412231445, "learning_rate": 9.785310734463278e-06, "loss": 26.6619, "step": 24482 }, { "epoch": 582.9313432835821, "grad_norm": 21.48996925354004, "learning_rate": 9.784907183212269e-06, "loss": 25.8146, "step": 24483 }, { "epoch": 582.955223880597, "grad_norm": 25.73160171508789, "learning_rate": 9.784503631961259e-06, "loss": 25.8638, "step": 24484 }, { "epoch": 582.9791044776119, "grad_norm": 24.520671844482422, "learning_rate": 9.784100080710251e-06, "loss": 25.1585, "step": 24485 }, { "epoch": 583.0, "grad_norm": 20.779743194580078, "learning_rate": 9.783696529459243e-06, "loss": 24.0136, "step": 24486 }, { "epoch": 583.0238805970149, "grad_norm": 24.840673446655273, "learning_rate": 9.783292978208233e-06, "loss": 26.8713, "step": 24487 }, { "epoch": 583.0477611940298, "grad_norm": NaN, "learning_rate": 9.782889426957224e-06, "loss": 45.6856, "step": 24488 }, { "epoch": 583.0716417910447, "grad_norm": 24.5146484375, "learning_rate": 9.782889426957224e-06, "loss": 25.3559, "step": 24489 }, { "epoch": 583.0955223880597, "grad_norm": 26.159759521484375, "learning_rate": 9.782485875706216e-06, "loss": 26.6939, "step": 24490 }, { "epoch": 583.1194029850747, "grad_norm": 24.484975814819336, "learning_rate": 9.782082324455206e-06, "loss": 25.7242, "step": 24491 }, { "epoch": 583.1432835820896, "grad_norm": 25.851167678833008, "learning_rate": 9.781678773204197e-06, "loss": 25.297, "step": 24492 }, { "epoch": 583.1671641791045, "grad_norm": 20.824731826782227, "learning_rate": 9.781275221953189e-06, "loss": 25.6516, "step": 24493 }, { "epoch": 583.1910447761194, "grad_norm": 24.765382766723633, "learning_rate": 9.78087167070218e-06, "loss": 26.1118, "step": 24494 }, { "epoch": 583.2149253731343, "grad_norm": 24.576892852783203, "learning_rate": 9.780468119451171e-06, "loss": 25.6221, "step": 24495 }, { "epoch": 583.2388059701492, "grad_norm": 23.470373153686523, "learning_rate": 9.780064568200163e-06, "loss": 26.1059, "step": 24496 }, { "epoch": 583.2626865671642, "grad_norm": 28.47627830505371, "learning_rate": 9.779661016949154e-06, "loss": 26.5295, "step": 24497 }, { "epoch": 583.2865671641791, "grad_norm": 19.377901077270508, "learning_rate": 9.779257465698144e-06, "loss": 25.1515, "step": 24498 }, { "epoch": 583.310447761194, "grad_norm": 25.84722900390625, "learning_rate": 9.778853914447134e-06, "loss": 25.7796, "step": 24499 }, { "epoch": 583.334328358209, "grad_norm": 25.915218353271484, "learning_rate": 9.778450363196126e-06, "loss": 27.1315, "step": 24500 }, { "epoch": 583.3582089552239, "grad_norm": 26.644569396972656, "learning_rate": 9.778046811945118e-06, "loss": 25.6175, "step": 24501 }, { "epoch": 583.3820895522388, "grad_norm": 22.823020935058594, "learning_rate": 9.777643260694109e-06, "loss": 26.1772, "step": 24502 }, { "epoch": 583.4059701492537, "grad_norm": 20.889068603515625, "learning_rate": 9.7772397094431e-06, "loss": 25.3648, "step": 24503 }, { "epoch": 583.4298507462687, "grad_norm": 23.727008819580078, "learning_rate": 9.776836158192091e-06, "loss": 26.1178, "step": 24504 }, { "epoch": 583.4537313432836, "grad_norm": 21.645435333251953, "learning_rate": 9.776432606941082e-06, "loss": 26.1976, "step": 24505 }, { "epoch": 583.4776119402985, "grad_norm": 23.96001434326172, "learning_rate": 9.776029055690074e-06, "loss": 26.1851, "step": 24506 }, { "epoch": 583.5014925373134, "grad_norm": 24.269643783569336, "learning_rate": 9.775625504439064e-06, "loss": 26.1764, "step": 24507 }, { "epoch": 583.5253731343283, "grad_norm": NaN, "learning_rate": 9.775221953188056e-06, "loss": 39.0039, "step": 24508 }, { "epoch": 583.5492537313432, "grad_norm": 20.57512855529785, "learning_rate": 9.775221953188056e-06, "loss": 26.21, "step": 24509 }, { "epoch": 583.5731343283583, "grad_norm": 22.069801330566406, "learning_rate": 9.774818401937048e-06, "loss": 25.8568, "step": 24510 }, { "epoch": 583.5970149253732, "grad_norm": 20.338821411132812, "learning_rate": 9.774414850686038e-06, "loss": 26.377, "step": 24511 }, { "epoch": 583.6208955223881, "grad_norm": 22.835693359375, "learning_rate": 9.774011299435029e-06, "loss": 26.0489, "step": 24512 }, { "epoch": 583.644776119403, "grad_norm": 24.25733757019043, "learning_rate": 9.77360774818402e-06, "loss": 25.9732, "step": 24513 }, { "epoch": 583.6686567164179, "grad_norm": 20.793434143066406, "learning_rate": 9.773204196933011e-06, "loss": 25.6473, "step": 24514 }, { "epoch": 583.6925373134328, "grad_norm": 22.297283172607422, "learning_rate": 9.772800645682002e-06, "loss": 24.9373, "step": 24515 }, { "epoch": 583.7164179104477, "grad_norm": 23.697107315063477, "learning_rate": 9.772397094430994e-06, "loss": 24.5092, "step": 24516 }, { "epoch": 583.7402985074627, "grad_norm": 21.790321350097656, "learning_rate": 9.771993543179986e-06, "loss": 26.0889, "step": 24517 }, { "epoch": 583.7641791044776, "grad_norm": 24.06023406982422, "learning_rate": 9.771589991928976e-06, "loss": 26.4275, "step": 24518 }, { "epoch": 583.7880597014926, "grad_norm": 27.50176429748535, "learning_rate": 9.771186440677967e-06, "loss": 25.8419, "step": 24519 }, { "epoch": 583.8119402985075, "grad_norm": 28.17275619506836, "learning_rate": 9.770782889426959e-06, "loss": 26.1458, "step": 24520 }, { "epoch": 583.8358208955224, "grad_norm": 19.488229751586914, "learning_rate": 9.770379338175949e-06, "loss": 25.5123, "step": 24521 }, { "epoch": 583.8597014925373, "grad_norm": 31.61526107788086, "learning_rate": 9.76997578692494e-06, "loss": 26.0959, "step": 24522 }, { "epoch": 583.8835820895522, "grad_norm": 33.698482513427734, "learning_rate": 9.769572235673931e-06, "loss": 25.8873, "step": 24523 }, { "epoch": 583.9074626865672, "grad_norm": 20.80262565612793, "learning_rate": 9.769168684422923e-06, "loss": 24.977, "step": 24524 }, { "epoch": 583.9313432835821, "grad_norm": 35.16957092285156, "learning_rate": 9.768765133171914e-06, "loss": 25.7814, "step": 24525 }, { "epoch": 583.955223880597, "grad_norm": 26.059839248657227, "learning_rate": 9.768361581920904e-06, "loss": 25.5937, "step": 24526 }, { "epoch": 583.9791044776119, "grad_norm": 27.297861099243164, "learning_rate": 9.767958030669896e-06, "loss": 26.6894, "step": 24527 }, { "epoch": 584.0, "grad_norm": 26.637611389160156, "learning_rate": 9.767554479418887e-06, "loss": 22.4952, "step": 24528 }, { "epoch": 584.0238805970149, "grad_norm": 26.009828567504883, "learning_rate": 9.767150928167877e-06, "loss": 25.8571, "step": 24529 }, { "epoch": 584.0477611940298, "grad_norm": 24.861997604370117, "learning_rate": 9.766747376916869e-06, "loss": 25.8289, "step": 24530 }, { "epoch": 584.0716417910447, "grad_norm": 26.03972053527832, "learning_rate": 9.766343825665861e-06, "loss": 25.1554, "step": 24531 }, { "epoch": 584.0955223880597, "grad_norm": 23.18956756591797, "learning_rate": 9.765940274414851e-06, "loss": 25.9184, "step": 24532 }, { "epoch": 584.1194029850747, "grad_norm": 26.988967895507812, "learning_rate": 9.765536723163844e-06, "loss": 27.0819, "step": 24533 }, { "epoch": 584.1432835820896, "grad_norm": 33.62266540527344, "learning_rate": 9.765133171912834e-06, "loss": 26.4171, "step": 24534 }, { "epoch": 584.1671641791045, "grad_norm": 21.30532455444336, "learning_rate": 9.764729620661824e-06, "loss": 25.2701, "step": 24535 }, { "epoch": 584.1910447761194, "grad_norm": 31.464004516601562, "learning_rate": 9.764326069410816e-06, "loss": 25.5168, "step": 24536 }, { "epoch": 584.2149253731343, "grad_norm": NaN, "learning_rate": 9.763922518159807e-06, "loss": 38.4994, "step": 24537 }, { "epoch": 584.2388059701492, "grad_norm": 32.162750244140625, "learning_rate": 9.763922518159807e-06, "loss": 26.2951, "step": 24538 }, { "epoch": 584.2626865671642, "grad_norm": 22.810697555541992, "learning_rate": 9.763518966908799e-06, "loss": 25.9173, "step": 24539 }, { "epoch": 584.2865671641791, "grad_norm": 35.042510986328125, "learning_rate": 9.763115415657789e-06, "loss": 26.0953, "step": 24540 }, { "epoch": 584.310447761194, "grad_norm": 28.63813591003418, "learning_rate": 9.762711864406781e-06, "loss": 26.4358, "step": 24541 }, { "epoch": 584.334328358209, "grad_norm": 24.727294921875, "learning_rate": 9.762308313155772e-06, "loss": 26.3001, "step": 24542 }, { "epoch": 584.3582089552239, "grad_norm": 41.99859619140625, "learning_rate": 9.761904761904762e-06, "loss": 25.6589, "step": 24543 }, { "epoch": 584.3820895522388, "grad_norm": 29.437156677246094, "learning_rate": 9.761501210653754e-06, "loss": 25.4593, "step": 24544 }, { "epoch": 584.4059701492537, "grad_norm": 43.8251953125, "learning_rate": 9.761097659402744e-06, "loss": 25.2506, "step": 24545 }, { "epoch": 584.4298507462687, "grad_norm": 32.51760482788086, "learning_rate": 9.760694108151736e-06, "loss": 26.2082, "step": 24546 }, { "epoch": 584.4537313432836, "grad_norm": 46.370521545410156, "learning_rate": 9.760290556900727e-06, "loss": 26.4104, "step": 24547 }, { "epoch": 584.4776119402985, "grad_norm": 30.58124542236328, "learning_rate": 9.759887005649719e-06, "loss": 23.988, "step": 24548 }, { "epoch": 584.5014925373134, "grad_norm": 43.06747055053711, "learning_rate": 9.75948345439871e-06, "loss": 26.5351, "step": 24549 }, { "epoch": 584.5253731343283, "grad_norm": 32.64202117919922, "learning_rate": 9.7590799031477e-06, "loss": 25.8628, "step": 24550 }, { "epoch": 584.5492537313432, "grad_norm": 41.09425735473633, "learning_rate": 9.758676351896692e-06, "loss": 26.015, "step": 24551 }, { "epoch": 584.5731343283583, "grad_norm": 29.04777717590332, "learning_rate": 9.758272800645682e-06, "loss": 26.5636, "step": 24552 }, { "epoch": 584.5970149253732, "grad_norm": 32.82481384277344, "learning_rate": 9.757869249394674e-06, "loss": 26.3606, "step": 24553 }, { "epoch": 584.6208955223881, "grad_norm": 33.78539276123047, "learning_rate": 9.757465698143666e-06, "loss": 25.5074, "step": 24554 }, { "epoch": 584.644776119403, "grad_norm": 26.175601959228516, "learning_rate": 9.757062146892657e-06, "loss": 25.2004, "step": 24555 }, { "epoch": 584.6686567164179, "grad_norm": 42.17500305175781, "learning_rate": 9.756658595641647e-06, "loss": 27.2455, "step": 24556 }, { "epoch": 584.6925373134328, "grad_norm": 29.725669860839844, "learning_rate": 9.756255044390637e-06, "loss": 26.2867, "step": 24557 }, { "epoch": 584.7164179104477, "grad_norm": 46.25615310668945, "learning_rate": 9.75585149313963e-06, "loss": 25.7312, "step": 24558 }, { "epoch": 584.7402985074627, "grad_norm": 34.736846923828125, "learning_rate": 9.755447941888621e-06, "loss": 25.4942, "step": 24559 }, { "epoch": 584.7641791044776, "grad_norm": 46.48570251464844, "learning_rate": 9.755044390637612e-06, "loss": 25.4824, "step": 24560 }, { "epoch": 584.7880597014926, "grad_norm": 36.145164489746094, "learning_rate": 9.754640839386604e-06, "loss": 27.1171, "step": 24561 }, { "epoch": 584.8119402985075, "grad_norm": 52.038570404052734, "learning_rate": 9.754237288135594e-06, "loss": 25.4528, "step": 24562 }, { "epoch": 584.8358208955224, "grad_norm": 38.67026138305664, "learning_rate": 9.753833736884585e-06, "loss": 26.7234, "step": 24563 }, { "epoch": 584.8597014925373, "grad_norm": 51.10511779785156, "learning_rate": 9.753430185633577e-06, "loss": 25.0709, "step": 24564 }, { "epoch": 584.8835820895522, "grad_norm": 47.28604507446289, "learning_rate": 9.753026634382567e-06, "loss": 26.1222, "step": 24565 }, { "epoch": 584.9074626865672, "grad_norm": 37.597599029541016, "learning_rate": 9.752623083131559e-06, "loss": 25.463, "step": 24566 }, { "epoch": 584.9313432835821, "grad_norm": 36.40436935424805, "learning_rate": 9.75221953188055e-06, "loss": 25.068, "step": 24567 }, { "epoch": 584.955223880597, "grad_norm": 41.407222747802734, "learning_rate": 9.751815980629541e-06, "loss": 26.3523, "step": 24568 }, { "epoch": 584.9791044776119, "grad_norm": 35.92876434326172, "learning_rate": 9.751412429378532e-06, "loss": 26.5921, "step": 24569 }, { "epoch": 585.0, "grad_norm": 41.61369323730469, "learning_rate": 9.751008878127522e-06, "loss": 22.2543, "step": 24570 }, { "epoch": 585.0238805970149, "grad_norm": NaN, "learning_rate": 9.750605326876514e-06, "loss": 29.249, "step": 24571 }, { "epoch": 585.0477611940298, "grad_norm": 43.26472091674805, "learning_rate": 9.750605326876514e-06, "loss": 26.4406, "step": 24572 }, { "epoch": 585.0716417910447, "grad_norm": 43.21359634399414, "learning_rate": 9.750201775625505e-06, "loss": 25.8937, "step": 24573 }, { "epoch": 585.0955223880597, "grad_norm": 38.407039642333984, "learning_rate": 9.749798224374497e-06, "loss": 25.411, "step": 24574 }, { "epoch": 585.1194029850747, "grad_norm": 42.677188873291016, "learning_rate": 9.749394673123487e-06, "loss": 26.2245, "step": 24575 }, { "epoch": 585.1432835820896, "grad_norm": 34.69563293457031, "learning_rate": 9.748991121872479e-06, "loss": 25.7404, "step": 24576 }, { "epoch": 585.1671641791045, "grad_norm": 45.96746063232422, "learning_rate": 9.74858757062147e-06, "loss": 26.0998, "step": 24577 }, { "epoch": 585.1910447761194, "grad_norm": 39.66915512084961, "learning_rate": 9.748184019370462e-06, "loss": 25.5889, "step": 24578 }, { "epoch": 585.2149253731343, "grad_norm": 40.189918518066406, "learning_rate": 9.747780468119452e-06, "loss": 24.9975, "step": 24579 }, { "epoch": 585.2388059701492, "grad_norm": 40.431583404541016, "learning_rate": 9.747376916868442e-06, "loss": 25.2487, "step": 24580 }, { "epoch": 585.2626865671642, "grad_norm": 39.49040222167969, "learning_rate": 9.746973365617434e-06, "loss": 25.887, "step": 24581 }, { "epoch": 585.2865671641791, "grad_norm": 30.466411590576172, "learning_rate": 9.746569814366425e-06, "loss": 25.8333, "step": 24582 }, { "epoch": 585.310447761194, "grad_norm": 47.894039154052734, "learning_rate": 9.746166263115417e-06, "loss": 25.6534, "step": 24583 }, { "epoch": 585.334328358209, "grad_norm": 37.659339904785156, "learning_rate": 9.745762711864407e-06, "loss": 26.3017, "step": 24584 }, { "epoch": 585.3582089552239, "grad_norm": 44.940914154052734, "learning_rate": 9.7453591606134e-06, "loss": 25.7883, "step": 24585 }, { "epoch": 585.3820895522388, "grad_norm": 42.62520217895508, "learning_rate": 9.74495560936239e-06, "loss": 25.7588, "step": 24586 }, { "epoch": 585.4059701492537, "grad_norm": 36.27079772949219, "learning_rate": 9.74455205811138e-06, "loss": 26.8295, "step": 24587 }, { "epoch": 585.4298507462687, "grad_norm": 36.72421646118164, "learning_rate": 9.744148506860372e-06, "loss": 25.7727, "step": 24588 }, { "epoch": 585.4537313432836, "grad_norm": 42.6402587890625, "learning_rate": 9.743744955609364e-06, "loss": 25.6423, "step": 24589 }, { "epoch": 585.4776119402985, "grad_norm": 32.8854866027832, "learning_rate": 9.743341404358354e-06, "loss": 25.4074, "step": 24590 }, { "epoch": 585.5014925373134, "grad_norm": 43.56322479248047, "learning_rate": 9.742937853107347e-06, "loss": 25.6009, "step": 24591 }, { "epoch": 585.5253731343283, "grad_norm": 35.991310119628906, "learning_rate": 9.742534301856337e-06, "loss": 26.157, "step": 24592 }, { "epoch": 585.5492537313432, "grad_norm": 43.44573211669922, "learning_rate": 9.742130750605327e-06, "loss": 26.1089, "step": 24593 }, { "epoch": 585.5731343283583, "grad_norm": 35.83282470703125, "learning_rate": 9.741727199354318e-06, "loss": 24.7264, "step": 24594 }, { "epoch": 585.5970149253732, "grad_norm": 39.83034896850586, "learning_rate": 9.74132364810331e-06, "loss": 26.3399, "step": 24595 }, { "epoch": 585.6208955223881, "grad_norm": 36.233978271484375, "learning_rate": 9.740920096852302e-06, "loss": 25.4349, "step": 24596 }, { "epoch": 585.644776119403, "grad_norm": 43.33831787109375, "learning_rate": 9.740516545601292e-06, "loss": 25.4655, "step": 24597 }, { "epoch": 585.6686567164179, "grad_norm": 38.355350494384766, "learning_rate": 9.740112994350284e-06, "loss": 26.0184, "step": 24598 }, { "epoch": 585.6925373134328, "grad_norm": 39.94011306762695, "learning_rate": 9.739709443099275e-06, "loss": 26.4566, "step": 24599 }, { "epoch": 585.7164179104477, "grad_norm": 36.522132873535156, "learning_rate": 9.739305891848265e-06, "loss": 26.5675, "step": 24600 }, { "epoch": 585.7402985074627, "grad_norm": 39.99588394165039, "learning_rate": 9.738902340597257e-06, "loss": 26.0309, "step": 24601 }, { "epoch": 585.7641791044776, "grad_norm": 34.210811614990234, "learning_rate": 9.738498789346247e-06, "loss": 26.5714, "step": 24602 }, { "epoch": 585.7880597014926, "grad_norm": 44.4177360534668, "learning_rate": 9.73809523809524e-06, "loss": 26.6315, "step": 24603 }, { "epoch": 585.8119402985075, "grad_norm": 36.08319854736328, "learning_rate": 9.73769168684423e-06, "loss": 25.6566, "step": 24604 }, { "epoch": 585.8358208955224, "grad_norm": 42.086524963378906, "learning_rate": 9.737288135593222e-06, "loss": 26.3462, "step": 24605 }, { "epoch": 585.8597014925373, "grad_norm": 42.89249038696289, "learning_rate": 9.736884584342212e-06, "loss": 27.1162, "step": 24606 }, { "epoch": 585.8835820895522, "grad_norm": 38.59859085083008, "learning_rate": 9.736481033091203e-06, "loss": 26.2108, "step": 24607 }, { "epoch": 585.9074626865672, "grad_norm": 34.027191162109375, "learning_rate": 9.736077481840195e-06, "loss": 25.4312, "step": 24608 }, { "epoch": 585.9313432835821, "grad_norm": 41.09379959106445, "learning_rate": 9.735673930589185e-06, "loss": 26.7559, "step": 24609 }, { "epoch": 585.955223880597, "grad_norm": 32.74630355834961, "learning_rate": 9.735270379338177e-06, "loss": 26.0178, "step": 24610 }, { "epoch": 585.9791044776119, "grad_norm": 43.25794219970703, "learning_rate": 9.734866828087169e-06, "loss": 25.6515, "step": 24611 }, { "epoch": 586.0, "grad_norm": 34.86247253417969, "learning_rate": 9.73446327683616e-06, "loss": 23.2108, "step": 24612 }, { "epoch": 586.0238805970149, "grad_norm": 40.81575012207031, "learning_rate": 9.73405972558515e-06, "loss": 26.2687, "step": 24613 }, { "epoch": 586.0477611940298, "grad_norm": 37.84720993041992, "learning_rate": 9.733656174334142e-06, "loss": 24.8797, "step": 24614 }, { "epoch": 586.0716417910447, "grad_norm": 43.54043960571289, "learning_rate": 9.733252623083132e-06, "loss": 25.7996, "step": 24615 }, { "epoch": 586.0955223880597, "grad_norm": 37.07904052734375, "learning_rate": 9.732849071832123e-06, "loss": 25.3908, "step": 24616 }, { "epoch": 586.1194029850747, "grad_norm": 37.61185073852539, "learning_rate": 9.732445520581115e-06, "loss": 25.2111, "step": 24617 }, { "epoch": 586.1432835820896, "grad_norm": 40.292205810546875, "learning_rate": 9.732041969330107e-06, "loss": 26.3784, "step": 24618 }, { "epoch": 586.1671641791045, "grad_norm": 42.68985366821289, "learning_rate": 9.731638418079097e-06, "loss": 25.8939, "step": 24619 }, { "epoch": 586.1910447761194, "grad_norm": 31.833606719970703, "learning_rate": 9.731234866828088e-06, "loss": 25.2029, "step": 24620 }, { "epoch": 586.2149253731343, "grad_norm": 41.21000671386719, "learning_rate": 9.73083131557708e-06, "loss": 27.2667, "step": 24621 }, { "epoch": 586.2388059701492, "grad_norm": 36.61752700805664, "learning_rate": 9.73042776432607e-06, "loss": 26.252, "step": 24622 }, { "epoch": 586.2626865671642, "grad_norm": 42.807735443115234, "learning_rate": 9.73002421307506e-06, "loss": 26.1446, "step": 24623 }, { "epoch": 586.2865671641791, "grad_norm": 37.60398483276367, "learning_rate": 9.729620661824052e-06, "loss": 26.0102, "step": 24624 }, { "epoch": 586.310447761194, "grad_norm": 42.36183166503906, "learning_rate": 9.729217110573044e-06, "loss": 25.7253, "step": 24625 }, { "epoch": 586.334328358209, "grad_norm": 37.31015396118164, "learning_rate": 9.728813559322035e-06, "loss": 25.7699, "step": 24626 }, { "epoch": 586.3582089552239, "grad_norm": 41.05231857299805, "learning_rate": 9.728410008071025e-06, "loss": 25.9175, "step": 24627 }, { "epoch": 586.3820895522388, "grad_norm": 34.441158294677734, "learning_rate": 9.728006456820017e-06, "loss": 26.0244, "step": 24628 }, { "epoch": 586.4059701492537, "grad_norm": 39.97518539428711, "learning_rate": 9.727602905569008e-06, "loss": 25.8294, "step": 24629 }, { "epoch": 586.4298507462687, "grad_norm": 33.8151741027832, "learning_rate": 9.727199354317998e-06, "loss": 26.3512, "step": 24630 }, { "epoch": 586.4537313432836, "grad_norm": 42.42046356201172, "learning_rate": 9.72679580306699e-06, "loss": 25.2631, "step": 24631 }, { "epoch": 586.4776119402985, "grad_norm": 39.93988800048828, "learning_rate": 9.726392251815982e-06, "loss": 25.3851, "step": 24632 }, { "epoch": 586.5014925373134, "grad_norm": 37.59446716308594, "learning_rate": 9.725988700564972e-06, "loss": 26.3432, "step": 24633 }, { "epoch": 586.5253731343283, "grad_norm": 39.115211486816406, "learning_rate": 9.725585149313965e-06, "loss": 26.5349, "step": 24634 }, { "epoch": 586.5492537313432, "grad_norm": 37.68468475341797, "learning_rate": 9.725181598062955e-06, "loss": 26.2362, "step": 24635 }, { "epoch": 586.5731343283583, "grad_norm": 33.40425109863281, "learning_rate": 9.724778046811945e-06, "loss": 26.3725, "step": 24636 }, { "epoch": 586.5970149253732, "grad_norm": 38.56224822998047, "learning_rate": 9.724374495560937e-06, "loss": 25.4085, "step": 24637 }, { "epoch": 586.6208955223881, "grad_norm": 32.63430404663086, "learning_rate": 9.723970944309928e-06, "loss": 26.4516, "step": 24638 }, { "epoch": 586.644776119403, "grad_norm": 38.71021270751953, "learning_rate": 9.72356739305892e-06, "loss": 26.2306, "step": 24639 }, { "epoch": 586.6686567164179, "grad_norm": 37.68341827392578, "learning_rate": 9.72316384180791e-06, "loss": 27.2151, "step": 24640 }, { "epoch": 586.6925373134328, "grad_norm": 40.96088409423828, "learning_rate": 9.722760290556902e-06, "loss": 25.8228, "step": 24641 }, { "epoch": 586.7164179104477, "grad_norm": 36.18296813964844, "learning_rate": 9.722356739305893e-06, "loss": 25.7762, "step": 24642 }, { "epoch": 586.7402985074627, "grad_norm": 47.22551345825195, "learning_rate": 9.721953188054883e-06, "loss": 25.8277, "step": 24643 }, { "epoch": 586.7641791044776, "grad_norm": NaN, "learning_rate": 9.721549636803875e-06, "loss": 38.347, "step": 24644 }, { "epoch": 586.7880597014926, "grad_norm": 39.047401428222656, "learning_rate": 9.721549636803875e-06, "loss": 25.7949, "step": 24645 }, { "epoch": 586.8119402985075, "grad_norm": 40.93299102783203, "learning_rate": 9.721146085552865e-06, "loss": 27.0083, "step": 24646 }, { "epoch": 586.8358208955224, "grad_norm": 37.23798751831055, "learning_rate": 9.720742534301857e-06, "loss": 25.8492, "step": 24647 }, { "epoch": 586.8597014925373, "grad_norm": 36.167423248291016, "learning_rate": 9.72033898305085e-06, "loss": 25.0553, "step": 24648 }, { "epoch": 586.8835820895522, "grad_norm": 35.24478530883789, "learning_rate": 9.71993543179984e-06, "loss": 26.769, "step": 24649 }, { "epoch": 586.9074626865672, "grad_norm": 40.67232894897461, "learning_rate": 9.71953188054883e-06, "loss": 26.1508, "step": 24650 }, { "epoch": 586.9313432835821, "grad_norm": 34.756752014160156, "learning_rate": 9.71912832929782e-06, "loss": 26.4622, "step": 24651 }, { "epoch": 586.955223880597, "grad_norm": 39.78337478637695, "learning_rate": 9.718724778046813e-06, "loss": 25.8525, "step": 24652 }, { "epoch": 586.9791044776119, "grad_norm": 35.094356536865234, "learning_rate": 9.718321226795803e-06, "loss": 26.0893, "step": 24653 }, { "epoch": 587.0, "grad_norm": 33.49990463256836, "learning_rate": 9.717917675544795e-06, "loss": 22.5796, "step": 24654 }, { "epoch": 587.0238805970149, "grad_norm": 32.90514373779297, "learning_rate": 9.717514124293787e-06, "loss": 25.6185, "step": 24655 }, { "epoch": 587.0477611940298, "grad_norm": 41.225032806396484, "learning_rate": 9.717110573042778e-06, "loss": 26.0264, "step": 24656 }, { "epoch": 587.0716417910447, "grad_norm": 35.7687873840332, "learning_rate": 9.716707021791768e-06, "loss": 26.2254, "step": 24657 }, { "epoch": 587.0955223880597, "grad_norm": 35.199092864990234, "learning_rate": 9.71630347054076e-06, "loss": 25.9611, "step": 24658 }, { "epoch": 587.1194029850747, "grad_norm": 33.63139724731445, "learning_rate": 9.71589991928975e-06, "loss": 26.5918, "step": 24659 }, { "epoch": 587.1432835820896, "grad_norm": 38.649208068847656, "learning_rate": 9.71549636803874e-06, "loss": 24.9147, "step": 24660 }, { "epoch": 587.1671641791045, "grad_norm": 33.001625061035156, "learning_rate": 9.715092816787733e-06, "loss": 26.9053, "step": 24661 }, { "epoch": 587.1910447761194, "grad_norm": 45.36368179321289, "learning_rate": 9.714689265536725e-06, "loss": 26.0223, "step": 24662 }, { "epoch": 587.2149253731343, "grad_norm": 34.52979278564453, "learning_rate": 9.714285714285715e-06, "loss": 25.6348, "step": 24663 }, { "epoch": 587.2388059701492, "grad_norm": 36.893733978271484, "learning_rate": 9.713882163034706e-06, "loss": 26.1461, "step": 24664 }, { "epoch": 587.2626865671642, "grad_norm": 32.64133071899414, "learning_rate": 9.713478611783698e-06, "loss": 25.5956, "step": 24665 }, { "epoch": 587.2865671641791, "grad_norm": 35.74478530883789, "learning_rate": 9.713075060532688e-06, "loss": 25.858, "step": 24666 }, { "epoch": 587.310447761194, "grad_norm": 30.200468063354492, "learning_rate": 9.71267150928168e-06, "loss": 26.3666, "step": 24667 }, { "epoch": 587.334328358209, "grad_norm": 40.89311599731445, "learning_rate": 9.71226795803067e-06, "loss": 26.758, "step": 24668 }, { "epoch": 587.3582089552239, "grad_norm": 33.98561477661133, "learning_rate": 9.711864406779662e-06, "loss": 25.6933, "step": 24669 }, { "epoch": 587.3820895522388, "grad_norm": 38.80975341796875, "learning_rate": 9.711460855528653e-06, "loss": 26.127, "step": 24670 }, { "epoch": 587.4059701492537, "grad_norm": 31.422046661376953, "learning_rate": 9.711057304277645e-06, "loss": 25.7733, "step": 24671 }, { "epoch": 587.4298507462687, "grad_norm": 31.526445388793945, "learning_rate": 9.710653753026635e-06, "loss": 25.6824, "step": 24672 }, { "epoch": 587.4537313432836, "grad_norm": 30.435800552368164, "learning_rate": 9.710250201775626e-06, "loss": 26.8437, "step": 24673 }, { "epoch": 587.4776119402985, "grad_norm": 39.78872299194336, "learning_rate": 9.709846650524618e-06, "loss": 26.7349, "step": 24674 }, { "epoch": 587.5014925373134, "grad_norm": 30.54741668701172, "learning_rate": 9.709443099273608e-06, "loss": 25.9134, "step": 24675 }, { "epoch": 587.5253731343283, "grad_norm": 33.28197479248047, "learning_rate": 9.7090395480226e-06, "loss": 25.6512, "step": 24676 }, { "epoch": 587.5492537313432, "grad_norm": 29.899456024169922, "learning_rate": 9.70863599677159e-06, "loss": 25.7924, "step": 24677 }, { "epoch": 587.5731343283583, "grad_norm": 34.584693908691406, "learning_rate": 9.708232445520583e-06, "loss": 26.9679, "step": 24678 }, { "epoch": 587.5970149253732, "grad_norm": 27.348249435424805, "learning_rate": 9.707828894269573e-06, "loss": 25.209, "step": 24679 }, { "epoch": 587.6208955223881, "grad_norm": 35.02487564086914, "learning_rate": 9.707425343018563e-06, "loss": 26.281, "step": 24680 }, { "epoch": 587.644776119403, "grad_norm": 26.517881393432617, "learning_rate": 9.707021791767555e-06, "loss": 26.0729, "step": 24681 }, { "epoch": 587.6686567164179, "grad_norm": 36.899566650390625, "learning_rate": 9.706618240516546e-06, "loss": 27.1841, "step": 24682 }, { "epoch": 587.6925373134328, "grad_norm": 25.399940490722656, "learning_rate": 9.706214689265538e-06, "loss": 25.5128, "step": 24683 }, { "epoch": 587.7164179104477, "grad_norm": 36.873931884765625, "learning_rate": 9.705811138014528e-06, "loss": 27.1701, "step": 24684 }, { "epoch": 587.7402985074627, "grad_norm": 30.730371475219727, "learning_rate": 9.70540758676352e-06, "loss": 26.9802, "step": 24685 }, { "epoch": 587.7641791044776, "grad_norm": 34.20701599121094, "learning_rate": 9.70500403551251e-06, "loss": 26.6263, "step": 24686 }, { "epoch": 587.7880597014926, "grad_norm": 29.05893325805664, "learning_rate": 9.704600484261501e-06, "loss": 25.7005, "step": 24687 }, { "epoch": 587.8119402985075, "grad_norm": 32.58463668823242, "learning_rate": 9.704196933010493e-06, "loss": 26.7319, "step": 24688 }, { "epoch": 587.8358208955224, "grad_norm": 27.28272819519043, "learning_rate": 9.703793381759485e-06, "loss": 26.3147, "step": 24689 }, { "epoch": 587.8597014925373, "grad_norm": 33.5809211730957, "learning_rate": 9.703389830508475e-06, "loss": 26.2305, "step": 24690 }, { "epoch": 587.8835820895522, "grad_norm": 26.055068969726562, "learning_rate": 9.702986279257468e-06, "loss": 26.4775, "step": 24691 }, { "epoch": 587.9074626865672, "grad_norm": 30.527408599853516, "learning_rate": 9.702582728006458e-06, "loss": 26.5182, "step": 24692 }, { "epoch": 587.9313432835821, "grad_norm": 23.693635940551758, "learning_rate": 9.702179176755448e-06, "loss": 25.9359, "step": 24693 }, { "epoch": 587.955223880597, "grad_norm": 28.379199981689453, "learning_rate": 9.70177562550444e-06, "loss": 26.8988, "step": 24694 }, { "epoch": 587.9791044776119, "grad_norm": 27.96656036376953, "learning_rate": 9.70137207425343e-06, "loss": 26.4724, "step": 24695 }, { "epoch": 588.0, "grad_norm": 26.61273193359375, "learning_rate": 9.700968523002423e-06, "loss": 22.3443, "step": 24696 }, { "epoch": 588.0238805970149, "grad_norm": 28.933629989624023, "learning_rate": 9.700564971751413e-06, "loss": 26.9539, "step": 24697 }, { "epoch": 588.0477611940298, "grad_norm": 33.955657958984375, "learning_rate": 9.700161420500405e-06, "loss": 27.0229, "step": 24698 }, { "epoch": 588.0716417910447, "grad_norm": 24.66825294494629, "learning_rate": 9.699757869249396e-06, "loss": 26.4478, "step": 24699 }, { "epoch": 588.0955223880597, "grad_norm": 33.16463088989258, "learning_rate": 9.699354317998386e-06, "loss": 27.1187, "step": 24700 }, { "epoch": 588.1194029850747, "grad_norm": 24.612686157226562, "learning_rate": 9.698950766747378e-06, "loss": 26.6854, "step": 24701 }, { "epoch": 588.1432835820896, "grad_norm": 26.001401901245117, "learning_rate": 9.698547215496368e-06, "loss": 27.0425, "step": 24702 }, { "epoch": 588.1671641791045, "grad_norm": 27.305707931518555, "learning_rate": 9.69814366424536e-06, "loss": 27.0524, "step": 24703 }, { "epoch": 588.1910447761194, "grad_norm": 23.11536407470703, "learning_rate": 9.69774011299435e-06, "loss": 26.5236, "step": 24704 }, { "epoch": 588.2149253731343, "grad_norm": 25.623634338378906, "learning_rate": 9.697336561743343e-06, "loss": 26.6795, "step": 24705 }, { "epoch": 588.2388059701492, "grad_norm": 32.394264221191406, "learning_rate": 9.696933010492333e-06, "loss": 26.4814, "step": 24706 }, { "epoch": 588.2626865671642, "grad_norm": 22.99032974243164, "learning_rate": 9.696529459241324e-06, "loss": 25.7196, "step": 24707 }, { "epoch": 588.2865671641791, "grad_norm": 23.22537612915039, "learning_rate": 9.696125907990316e-06, "loss": 25.5383, "step": 24708 }, { "epoch": 588.310447761194, "grad_norm": 24.892866134643555, "learning_rate": 9.695722356739306e-06, "loss": 26.3771, "step": 24709 }, { "epoch": 588.334328358209, "grad_norm": 27.669692993164062, "learning_rate": 9.695318805488298e-06, "loss": 26.8883, "step": 24710 }, { "epoch": 588.3582089552239, "grad_norm": 22.54668617248535, "learning_rate": 9.69491525423729e-06, "loss": 26.2576, "step": 24711 }, { "epoch": 588.3820895522388, "grad_norm": 25.75528335571289, "learning_rate": 9.69451170298628e-06, "loss": 26.3891, "step": 24712 }, { "epoch": 588.4059701492537, "grad_norm": 31.255844116210938, "learning_rate": 9.694108151735271e-06, "loss": 26.6056, "step": 24713 }, { "epoch": 588.4298507462687, "grad_norm": 24.882234573364258, "learning_rate": 9.693704600484263e-06, "loss": 26.2398, "step": 24714 }, { "epoch": 588.4537313432836, "grad_norm": 29.758243560791016, "learning_rate": 9.693301049233253e-06, "loss": 25.9742, "step": 24715 }, { "epoch": 588.4776119402985, "grad_norm": 29.26739501953125, "learning_rate": 9.692897497982244e-06, "loss": 26.3768, "step": 24716 }, { "epoch": 588.5014925373134, "grad_norm": 25.25251007080078, "learning_rate": 9.692493946731236e-06, "loss": 25.8869, "step": 24717 }, { "epoch": 588.5253731343283, "grad_norm": 28.098785400390625, "learning_rate": 9.692090395480228e-06, "loss": 25.9004, "step": 24718 }, { "epoch": 588.5492537313432, "grad_norm": 27.13605499267578, "learning_rate": 9.691686844229218e-06, "loss": 26.8347, "step": 24719 }, { "epoch": 588.5731343283583, "grad_norm": 23.49510955810547, "learning_rate": 9.691283292978209e-06, "loss": 25.5257, "step": 24720 }, { "epoch": 588.5970149253732, "grad_norm": 20.181053161621094, "learning_rate": 9.6908797417272e-06, "loss": 26.6207, "step": 24721 }, { "epoch": 588.6208955223881, "grad_norm": 24.287778854370117, "learning_rate": 9.690476190476191e-06, "loss": 25.3487, "step": 24722 }, { "epoch": 588.644776119403, "grad_norm": 22.617843627929688, "learning_rate": 9.690072639225181e-06, "loss": 26.2803, "step": 24723 }, { "epoch": 588.6686567164179, "grad_norm": 20.5438175201416, "learning_rate": 9.689669087974173e-06, "loss": 25.3903, "step": 24724 }, { "epoch": 588.6925373134328, "grad_norm": 20.67898941040039, "learning_rate": 9.689265536723165e-06, "loss": 26.868, "step": 24725 }, { "epoch": 588.7164179104477, "grad_norm": 22.90618896484375, "learning_rate": 9.688861985472156e-06, "loss": 26.1688, "step": 24726 }, { "epoch": 588.7402985074627, "grad_norm": 21.959964752197266, "learning_rate": 9.688458434221148e-06, "loss": 26.9576, "step": 24727 }, { "epoch": 588.7641791044776, "grad_norm": 21.654979705810547, "learning_rate": 9.688054882970138e-06, "loss": 25.7321, "step": 24728 }, { "epoch": 588.7880597014926, "grad_norm": 22.840957641601562, "learning_rate": 9.687651331719129e-06, "loss": 26.1805, "step": 24729 }, { "epoch": 588.8119402985075, "grad_norm": 21.548410415649414, "learning_rate": 9.687247780468119e-06, "loss": 24.0202, "step": 24730 }, { "epoch": 588.8358208955224, "grad_norm": 20.094371795654297, "learning_rate": 9.686844229217111e-06, "loss": 25.9843, "step": 24731 }, { "epoch": 588.8597014925373, "grad_norm": 21.973751068115234, "learning_rate": 9.686440677966103e-06, "loss": 25.5635, "step": 24732 }, { "epoch": 588.8835820895522, "grad_norm": 21.247291564941406, "learning_rate": 9.686037126715093e-06, "loss": 25.1751, "step": 24733 }, { "epoch": 588.9074626865672, "grad_norm": 23.29511260986328, "learning_rate": 9.685633575464086e-06, "loss": 25.3996, "step": 24734 }, { "epoch": 588.9313432835821, "grad_norm": 22.234058380126953, "learning_rate": 9.685230024213076e-06, "loss": 26.1625, "step": 24735 }, { "epoch": 588.955223880597, "grad_norm": 22.82220458984375, "learning_rate": 9.684826472962066e-06, "loss": 27.2192, "step": 24736 }, { "epoch": 588.9791044776119, "grad_norm": 25.65250015258789, "learning_rate": 9.684422921711058e-06, "loss": 25.235, "step": 24737 }, { "epoch": 589.0, "grad_norm": 21.713010787963867, "learning_rate": 9.684019370460049e-06, "loss": 22.3742, "step": 24738 }, { "epoch": 589.0238805970149, "grad_norm": 21.282495498657227, "learning_rate": 9.68361581920904e-06, "loss": 26.8962, "step": 24739 }, { "epoch": 589.0477611940298, "grad_norm": 23.180152893066406, "learning_rate": 9.683212267958031e-06, "loss": 26.1422, "step": 24740 }, { "epoch": 589.0716417910447, "grad_norm": 26.520540237426758, "learning_rate": 9.682808716707023e-06, "loss": 26.3579, "step": 24741 }, { "epoch": 589.0955223880597, "grad_norm": 21.047975540161133, "learning_rate": 9.682405165456014e-06, "loss": 26.5561, "step": 24742 }, { "epoch": 589.1194029850747, "grad_norm": 23.61358642578125, "learning_rate": 9.682001614205004e-06, "loss": 26.1243, "step": 24743 }, { "epoch": 589.1432835820896, "grad_norm": 20.484519958496094, "learning_rate": 9.681598062953996e-06, "loss": 26.4518, "step": 24744 }, { "epoch": 589.1671641791045, "grad_norm": 24.00567626953125, "learning_rate": 9.681194511702986e-06, "loss": 26.3545, "step": 24745 }, { "epoch": 589.1910447761194, "grad_norm": 23.419187545776367, "learning_rate": 9.680790960451978e-06, "loss": 26.0908, "step": 24746 }, { "epoch": 589.2149253731343, "grad_norm": 21.72808837890625, "learning_rate": 9.68038740920097e-06, "loss": 25.706, "step": 24747 }, { "epoch": 589.2388059701492, "grad_norm": 22.583450317382812, "learning_rate": 9.679983857949961e-06, "loss": 26.7549, "step": 24748 }, { "epoch": 589.2626865671642, "grad_norm": NaN, "learning_rate": 9.679580306698951e-06, "loss": 32.6615, "step": 24749 }, { "epoch": 589.2865671641791, "grad_norm": 27.05158805847168, "learning_rate": 9.679580306698951e-06, "loss": 25.999, "step": 24750 }, { "epoch": 589.310447761194, "grad_norm": 28.120515823364258, "learning_rate": 9.679176755447943e-06, "loss": 27.0762, "step": 24751 }, { "epoch": 589.334328358209, "grad_norm": 24.040719985961914, "learning_rate": 9.678773204196934e-06, "loss": 27.1818, "step": 24752 }, { "epoch": 589.3582089552239, "grad_norm": 23.633686065673828, "learning_rate": 9.678369652945924e-06, "loss": 26.5832, "step": 24753 }, { "epoch": 589.3820895522388, "grad_norm": 23.62124252319336, "learning_rate": 9.677966101694916e-06, "loss": 26.1629, "step": 24754 }, { "epoch": 589.4059701492537, "grad_norm": 28.338857650756836, "learning_rate": 9.677562550443908e-06, "loss": 26.2909, "step": 24755 }, { "epoch": 589.4298507462687, "grad_norm": 22.411848068237305, "learning_rate": 9.677158999192899e-06, "loss": 26.0076, "step": 24756 }, { "epoch": 589.4537313432836, "grad_norm": 19.79324722290039, "learning_rate": 9.676755447941889e-06, "loss": 26.1195, "step": 24757 }, { "epoch": 589.4776119402985, "grad_norm": 25.054922103881836, "learning_rate": 9.676351896690881e-06, "loss": 26.6152, "step": 24758 }, { "epoch": 589.5014925373134, "grad_norm": 28.257783889770508, "learning_rate": 9.675948345439871e-06, "loss": 26.7215, "step": 24759 }, { "epoch": 589.5253731343283, "grad_norm": 21.235565185546875, "learning_rate": 9.675544794188862e-06, "loss": 25.9134, "step": 24760 }, { "epoch": 589.5492537313432, "grad_norm": 18.014190673828125, "learning_rate": 9.675141242937854e-06, "loss": 26.7793, "step": 24761 }, { "epoch": 589.5731343283583, "grad_norm": 24.018096923828125, "learning_rate": 9.674737691686846e-06, "loss": 26.3828, "step": 24762 }, { "epoch": 589.5970149253732, "grad_norm": 23.911205291748047, "learning_rate": 9.674334140435836e-06, "loss": 27.074, "step": 24763 }, { "epoch": 589.6208955223881, "grad_norm": 19.758451461791992, "learning_rate": 9.673930589184827e-06, "loss": 26.9736, "step": 24764 }, { "epoch": 589.644776119403, "grad_norm": 21.613731384277344, "learning_rate": 9.673527037933819e-06, "loss": 25.4549, "step": 24765 }, { "epoch": 589.6686567164179, "grad_norm": 29.909374237060547, "learning_rate": 9.673123486682809e-06, "loss": 27.5047, "step": 24766 }, { "epoch": 589.6925373134328, "grad_norm": 19.978282928466797, "learning_rate": 9.672719935431801e-06, "loss": 25.7333, "step": 24767 }, { "epoch": 589.7164179104477, "grad_norm": 20.975845336914062, "learning_rate": 9.672316384180791e-06, "loss": 26.0749, "step": 24768 }, { "epoch": 589.7402985074627, "grad_norm": 29.678686141967773, "learning_rate": 9.671912832929783e-06, "loss": 27.1436, "step": 24769 }, { "epoch": 589.7641791044776, "grad_norm": 23.37542724609375, "learning_rate": 9.671509281678774e-06, "loss": 26.655, "step": 24770 }, { "epoch": 589.7880597014926, "grad_norm": 21.359844207763672, "learning_rate": 9.671105730427766e-06, "loss": 25.551, "step": 24771 }, { "epoch": 589.8119402985075, "grad_norm": 28.425365447998047, "learning_rate": 9.670702179176756e-06, "loss": 26.7114, "step": 24772 }, { "epoch": 589.8358208955224, "grad_norm": 25.74860382080078, "learning_rate": 9.670298627925747e-06, "loss": 27.0873, "step": 24773 }, { "epoch": 589.8597014925373, "grad_norm": 20.7000675201416, "learning_rate": 9.669895076674739e-06, "loss": 26.5215, "step": 24774 }, { "epoch": 589.8835820895522, "grad_norm": 29.431015014648438, "learning_rate": 9.669491525423729e-06, "loss": 26.693, "step": 24775 }, { "epoch": 589.9074626865672, "grad_norm": 21.63443374633789, "learning_rate": 9.669087974172721e-06, "loss": 26.2695, "step": 24776 }, { "epoch": 589.9313432835821, "grad_norm": 25.825551986694336, "learning_rate": 9.668684422921711e-06, "loss": 27.2596, "step": 24777 }, { "epoch": 589.955223880597, "grad_norm": 21.472169876098633, "learning_rate": 9.668280871670704e-06, "loss": 25.9065, "step": 24778 }, { "epoch": 589.9791044776119, "grad_norm": 28.493507385253906, "learning_rate": 9.667877320419694e-06, "loss": 27.215, "step": 24779 }, { "epoch": 590.0, "grad_norm": 20.225736618041992, "learning_rate": 9.667473769168684e-06, "loss": 24.3542, "step": 24780 }, { "epoch": 590.0, "step": 24780, "total_flos": 1.2181008110327127e+18, "train_loss": 0.8897820170989817, "train_runtime": 25671.2183, "train_samples_per_second": 123.005, "train_steps_per_second": 0.965 }, { "epoch": 590.0238805970149, "grad_norm": 32.38751983642578, "learning_rate": 1e-05, "loss": 26.5261, "step": 24781 }, { "epoch": 590.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999603174603175e-06, "loss": 30.9317, "step": 24782 }, { "epoch": 590.0716417910447, "grad_norm": 308.18304443359375, "learning_rate": 9.999603174603175e-06, "loss": 30.3594, "step": 24783 }, { "epoch": 590.0955223880597, "grad_norm": 170.1776580810547, "learning_rate": 9.99920634920635e-06, "loss": 29.6381, "step": 24784 }, { "epoch": 590.1194029850747, "grad_norm": 83.29058837890625, "learning_rate": 9.998809523809524e-06, "loss": 28.7973, "step": 24785 }, { "epoch": 590.1432835820896, "grad_norm": 103.14496612548828, "learning_rate": 9.998412698412699e-06, "loss": 27.4475, "step": 24786 }, { "epoch": 590.1671641791045, "grad_norm": 54.1573371887207, "learning_rate": 9.998015873015874e-06, "loss": 27.5086, "step": 24787 }, { "epoch": 590.1910447761194, "grad_norm": 83.75999450683594, "learning_rate": 9.997619047619048e-06, "loss": 27.8085, "step": 24788 }, { "epoch": 590.2149253731343, "grad_norm": 48.15753936767578, "learning_rate": 9.997222222222223e-06, "loss": 24.7527, "step": 24789 }, { "epoch": 590.2388059701492, "grad_norm": 92.9489974975586, "learning_rate": 9.996825396825399e-06, "loss": 25.9382, "step": 24790 }, { "epoch": 590.2626865671642, "grad_norm": 67.59130096435547, "learning_rate": 9.996428571428572e-06, "loss": 25.7968, "step": 24791 }, { "epoch": 590.2865671641791, "grad_norm": 73.17889404296875, "learning_rate": 9.996031746031746e-06, "loss": 28.1064, "step": 24792 }, { "epoch": 590.310447761194, "grad_norm": 63.96194076538086, "learning_rate": 9.99563492063492e-06, "loss": 27.2321, "step": 24793 }, { "epoch": 590.334328358209, "grad_norm": 47.63359451293945, "learning_rate": 9.995238095238095e-06, "loss": 27.0386, "step": 24794 }, { "epoch": 590.3582089552239, "grad_norm": 46.89753723144531, "learning_rate": 9.994841269841272e-06, "loss": 26.147, "step": 24795 }, { "epoch": 590.3820895522388, "grad_norm": 41.82035446166992, "learning_rate": 9.994444444444446e-06, "loss": 26.9495, "step": 24796 }, { "epoch": 590.4059701492537, "grad_norm": 36.30079650878906, "learning_rate": 9.99404761904762e-06, "loss": 26.8518, "step": 24797 }, { "epoch": 590.4298507462687, "grad_norm": 41.66610336303711, "learning_rate": 9.993650793650793e-06, "loss": 27.3526, "step": 24798 }, { "epoch": 590.4537313432836, "grad_norm": 28.27171516418457, "learning_rate": 9.993253968253968e-06, "loss": 25.6981, "step": 24799 }, { "epoch": 590.4776119402985, "grad_norm": 36.82769775390625, "learning_rate": 9.992857142857144e-06, "loss": 26.5702, "step": 24800 }, { "epoch": 590.5014925373134, "grad_norm": 31.696352005004883, "learning_rate": 9.992460317460319e-06, "loss": 25.8408, "step": 24801 }, { "epoch": 590.5253731343283, "grad_norm": 27.81403160095215, "learning_rate": 9.992063492063493e-06, "loss": 26.7864, "step": 24802 }, { "epoch": 590.5492537313432, "grad_norm": 33.93839645385742, "learning_rate": 9.991666666666668e-06, "loss": 26.5667, "step": 24803 }, { "epoch": 590.5731343283583, "grad_norm": 25.36647605895996, "learning_rate": 9.991269841269842e-06, "loss": 25.6366, "step": 24804 }, { "epoch": 590.5970149253732, "grad_norm": 30.93791389465332, "learning_rate": 9.990873015873017e-06, "loss": 26.0666, "step": 24805 }, { "epoch": 590.6208955223881, "grad_norm": 30.572669982910156, "learning_rate": 9.990476190476191e-06, "loss": 25.8127, "step": 24806 }, { "epoch": 590.644776119403, "grad_norm": 24.368446350097656, "learning_rate": 9.990079365079366e-06, "loss": 25.4055, "step": 24807 }, { "epoch": 590.6686567164179, "grad_norm": 24.62511444091797, "learning_rate": 9.98968253968254e-06, "loss": 25.212, "step": 24808 }, { "epoch": 590.6925373134328, "grad_norm": 26.832115173339844, "learning_rate": 9.989285714285715e-06, "loss": 25.0662, "step": 24809 }, { "epoch": 590.7164179104477, "grad_norm": 22.718965530395508, "learning_rate": 9.98888888888889e-06, "loss": 26.1209, "step": 24810 }, { "epoch": 590.7402985074627, "grad_norm": 26.248214721679688, "learning_rate": 9.988492063492064e-06, "loss": 26.3261, "step": 24811 }, { "epoch": 590.7641791044776, "grad_norm": 25.842761993408203, "learning_rate": 9.988095238095239e-06, "loss": 25.7934, "step": 24812 }, { "epoch": 590.7880597014926, "grad_norm": 24.885391235351562, "learning_rate": 9.987698412698413e-06, "loss": 26.315, "step": 24813 }, { "epoch": 590.8119402985075, "grad_norm": 22.091886520385742, "learning_rate": 9.987301587301588e-06, "loss": 25.3459, "step": 24814 }, { "epoch": 590.8358208955224, "grad_norm": 25.159875869750977, "learning_rate": 9.986904761904764e-06, "loss": 26.8305, "step": 24815 }, { "epoch": 590.8597014925373, "grad_norm": 22.594758987426758, "learning_rate": 9.986507936507937e-06, "loss": 26.7207, "step": 24816 }, { "epoch": 590.8835820895522, "grad_norm": 26.021387100219727, "learning_rate": 9.986111111111111e-06, "loss": 25.4475, "step": 24817 }, { "epoch": 590.9074626865672, "grad_norm": 26.40793800354004, "learning_rate": 9.985714285714286e-06, "loss": 25.1644, "step": 24818 }, { "epoch": 590.9313432835821, "grad_norm": 27.049848556518555, "learning_rate": 9.98531746031746e-06, "loss": 25.449, "step": 24819 }, { "epoch": 590.955223880597, "grad_norm": 25.380956649780273, "learning_rate": 9.984920634920637e-06, "loss": 26.2029, "step": 24820 }, { "epoch": 590.9791044776119, "grad_norm": 24.72477149963379, "learning_rate": 9.984523809523811e-06, "loss": 25.826, "step": 24821 }, { "epoch": 591.0, "grad_norm": 20.87173080444336, "learning_rate": 9.984126984126986e-06, "loss": 21.6045, "step": 24822 }, { "epoch": 591.0238805970149, "grad_norm": 24.914087295532227, "learning_rate": 9.983730158730159e-06, "loss": 25.5598, "step": 24823 }, { "epoch": 591.0477611940298, "grad_norm": 23.642045974731445, "learning_rate": 9.983333333333333e-06, "loss": 25.333, "step": 24824 }, { "epoch": 591.0716417910447, "grad_norm": 20.315439224243164, "learning_rate": 9.98293650793651e-06, "loss": 25.4591, "step": 24825 }, { "epoch": 591.0955223880597, "grad_norm": 23.875192642211914, "learning_rate": 9.982539682539684e-06, "loss": 26.0289, "step": 24826 }, { "epoch": 591.1194029850747, "grad_norm": 21.876815795898438, "learning_rate": 9.982142857142858e-06, "loss": 26.0793, "step": 24827 }, { "epoch": 591.1432835820896, "grad_norm": 24.537233352661133, "learning_rate": 9.981746031746033e-06, "loss": 24.8444, "step": 24828 }, { "epoch": 591.1671641791045, "grad_norm": 29.5328311920166, "learning_rate": 9.981349206349208e-06, "loss": 24.9453, "step": 24829 }, { "epoch": 591.1910447761194, "grad_norm": 28.186058044433594, "learning_rate": 9.980952380952382e-06, "loss": 26.934, "step": 24830 }, { "epoch": 591.2149253731343, "grad_norm": 24.6148681640625, "learning_rate": 9.980555555555557e-06, "loss": 25.4354, "step": 24831 }, { "epoch": 591.2388059701492, "grad_norm": 27.353029251098633, "learning_rate": 9.980158730158731e-06, "loss": 27.4815, "step": 24832 }, { "epoch": 591.2626865671642, "grad_norm": 26.900039672851562, "learning_rate": 9.979761904761906e-06, "loss": 27.507, "step": 24833 }, { "epoch": 591.2865671641791, "grad_norm": 22.690462112426758, "learning_rate": 9.97936507936508e-06, "loss": 25.1526, "step": 24834 }, { "epoch": 591.310447761194, "grad_norm": NaN, "learning_rate": 9.978968253968255e-06, "loss": 40.046, "step": 24835 }, { "epoch": 591.334328358209, "grad_norm": 26.094011306762695, "learning_rate": 9.978968253968255e-06, "loss": 25.7396, "step": 24836 }, { "epoch": 591.3582089552239, "grad_norm": 27.837017059326172, "learning_rate": 9.97857142857143e-06, "loss": 26.2075, "step": 24837 }, { "epoch": 591.3820895522388, "grad_norm": 24.98420524597168, "learning_rate": 9.978174603174604e-06, "loss": 25.3582, "step": 24838 }, { "epoch": 591.4059701492537, "grad_norm": 24.316162109375, "learning_rate": 9.977777777777778e-06, "loss": 25.9011, "step": 24839 }, { "epoch": 591.4298507462687, "grad_norm": 24.92269515991211, "learning_rate": 9.977380952380953e-06, "loss": 26.1315, "step": 24840 }, { "epoch": 591.4537313432836, "grad_norm": 25.060258865356445, "learning_rate": 9.976984126984128e-06, "loss": 25.7465, "step": 24841 }, { "epoch": 591.4776119402985, "grad_norm": 28.091259002685547, "learning_rate": 9.976587301587302e-06, "loss": 25.6749, "step": 24842 }, { "epoch": 591.5014925373134, "grad_norm": 25.803117752075195, "learning_rate": 9.976190476190477e-06, "loss": 26.0419, "step": 24843 }, { "epoch": 591.5253731343283, "grad_norm": 25.82796859741211, "learning_rate": 9.975793650793651e-06, "loss": 25.3111, "step": 24844 }, { "epoch": 591.5492537313432, "grad_norm": 25.189868927001953, "learning_rate": 9.975396825396826e-06, "loss": 25.5209, "step": 24845 }, { "epoch": 591.5731343283583, "grad_norm": 23.93549156188965, "learning_rate": 9.975000000000002e-06, "loss": 25.0733, "step": 24846 }, { "epoch": 591.5970149253732, "grad_norm": 22.82111358642578, "learning_rate": 9.974603174603176e-06, "loss": 24.876, "step": 24847 }, { "epoch": 591.6208955223881, "grad_norm": 24.573532104492188, "learning_rate": 9.97420634920635e-06, "loss": 26.6107, "step": 24848 }, { "epoch": 591.644776119403, "grad_norm": 23.865331649780273, "learning_rate": 9.973809523809524e-06, "loss": 27.045, "step": 24849 }, { "epoch": 591.6686567164179, "grad_norm": 24.804426193237305, "learning_rate": 9.973412698412698e-06, "loss": 25.9354, "step": 24850 }, { "epoch": 591.6925373134328, "grad_norm": 29.551023483276367, "learning_rate": 9.973015873015875e-06, "loss": 25.9428, "step": 24851 }, { "epoch": 591.7164179104477, "grad_norm": 22.42382049560547, "learning_rate": 9.972619047619049e-06, "loss": 26.1557, "step": 24852 }, { "epoch": 591.7402985074627, "grad_norm": 22.469646453857422, "learning_rate": 9.972222222222224e-06, "loss": 25.771, "step": 24853 }, { "epoch": 591.7641791044776, "grad_norm": 20.678512573242188, "learning_rate": 9.971825396825398e-06, "loss": 25.2619, "step": 24854 }, { "epoch": 591.7880597014926, "grad_norm": 23.119787216186523, "learning_rate": 9.971428571428571e-06, "loss": 25.8464, "step": 24855 }, { "epoch": 591.8119402985075, "grad_norm": 26.09597396850586, "learning_rate": 9.971031746031747e-06, "loss": 25.4637, "step": 24856 }, { "epoch": 591.8358208955224, "grad_norm": 28.86103057861328, "learning_rate": 9.970634920634922e-06, "loss": 25.9405, "step": 24857 }, { "epoch": 591.8597014925373, "grad_norm": 23.647422790527344, "learning_rate": 9.970238095238096e-06, "loss": 26.5306, "step": 24858 }, { "epoch": 591.8835820895522, "grad_norm": 20.792186737060547, "learning_rate": 9.969841269841271e-06, "loss": 25.6604, "step": 24859 }, { "epoch": 591.9074626865672, "grad_norm": 25.55927848815918, "learning_rate": 9.969444444444445e-06, "loss": 26.295, "step": 24860 }, { "epoch": 591.9313432835821, "grad_norm": 30.858434677124023, "learning_rate": 9.96904761904762e-06, "loss": 26.3215, "step": 24861 }, { "epoch": 591.955223880597, "grad_norm": 24.268409729003906, "learning_rate": 9.968650793650795e-06, "loss": 25.0376, "step": 24862 }, { "epoch": 591.9791044776119, "grad_norm": 21.838285446166992, "learning_rate": 9.968253968253969e-06, "loss": 24.9367, "step": 24863 }, { "epoch": 592.0, "grad_norm": 22.223636627197266, "learning_rate": 9.967857142857144e-06, "loss": 20.9882, "step": 24864 }, { "epoch": 592.0238805970149, "grad_norm": 34.9276237487793, "learning_rate": 9.967460317460318e-06, "loss": 24.8214, "step": 24865 }, { "epoch": 592.0477611940298, "grad_norm": 25.057830810546875, "learning_rate": 9.967063492063493e-06, "loss": 26.1019, "step": 24866 }, { "epoch": 592.0716417910447, "grad_norm": 21.665557861328125, "learning_rate": 9.966666666666667e-06, "loss": 26.6416, "step": 24867 }, { "epoch": 592.0955223880597, "grad_norm": 24.499387741088867, "learning_rate": 9.966269841269842e-06, "loss": 25.4419, "step": 24868 }, { "epoch": 592.1194029850747, "grad_norm": 23.511585235595703, "learning_rate": 9.965873015873016e-06, "loss": 24.4169, "step": 24869 }, { "epoch": 592.1432835820896, "grad_norm": 20.998411178588867, "learning_rate": 9.965476190476191e-06, "loss": 26.6439, "step": 24870 }, { "epoch": 592.1671641791045, "grad_norm": 27.615819931030273, "learning_rate": 9.965079365079365e-06, "loss": 26.5077, "step": 24871 }, { "epoch": 592.1910447761194, "grad_norm": 25.752527236938477, "learning_rate": 9.964682539682542e-06, "loss": 25.555, "step": 24872 }, { "epoch": 592.2149253731343, "grad_norm": 26.167125701904297, "learning_rate": 9.964285714285714e-06, "loss": 26.2654, "step": 24873 }, { "epoch": 592.2388059701492, "grad_norm": 27.942628860473633, "learning_rate": 9.963888888888889e-06, "loss": 25.1397, "step": 24874 }, { "epoch": 592.2626865671642, "grad_norm": 20.057777404785156, "learning_rate": 9.963492063492064e-06, "loss": 24.5627, "step": 24875 }, { "epoch": 592.2865671641791, "grad_norm": 28.692611694335938, "learning_rate": 9.963095238095238e-06, "loss": 26.2874, "step": 24876 }, { "epoch": 592.310447761194, "grad_norm": 25.661327362060547, "learning_rate": 9.962698412698414e-06, "loss": 26.457, "step": 24877 }, { "epoch": 592.334328358209, "grad_norm": 28.75472068786621, "learning_rate": 9.962301587301589e-06, "loss": 26.2982, "step": 24878 }, { "epoch": 592.3582089552239, "grad_norm": 22.969066619873047, "learning_rate": 9.961904761904763e-06, "loss": 24.2406, "step": 24879 }, { "epoch": 592.3820895522388, "grad_norm": 24.886077880859375, "learning_rate": 9.961507936507936e-06, "loss": 25.4487, "step": 24880 }, { "epoch": 592.4059701492537, "grad_norm": 32.79336166381836, "learning_rate": 9.96111111111111e-06, "loss": 25.3264, "step": 24881 }, { "epoch": 592.4298507462687, "grad_norm": 21.684770584106445, "learning_rate": 9.960714285714287e-06, "loss": 25.2006, "step": 24882 }, { "epoch": 592.4537313432836, "grad_norm": 25.352096557617188, "learning_rate": 9.960317460317462e-06, "loss": 26.4193, "step": 24883 }, { "epoch": 592.4776119402985, "grad_norm": 24.256072998046875, "learning_rate": 9.959920634920636e-06, "loss": 25.2909, "step": 24884 }, { "epoch": 592.5014925373134, "grad_norm": 26.264633178710938, "learning_rate": 9.95952380952381e-06, "loss": 26.7519, "step": 24885 }, { "epoch": 592.5253731343283, "grad_norm": 27.71848487854004, "learning_rate": 9.959126984126985e-06, "loss": 25.5537, "step": 24886 }, { "epoch": 592.5492537313432, "grad_norm": 21.39444351196289, "learning_rate": 9.95873015873016e-06, "loss": 25.8987, "step": 24887 }, { "epoch": 592.5731343283583, "grad_norm": 28.044570922851562, "learning_rate": 9.958333333333334e-06, "loss": 26.4733, "step": 24888 }, { "epoch": 592.5970149253732, "grad_norm": 23.895475387573242, "learning_rate": 9.957936507936509e-06, "loss": 25.4726, "step": 24889 }, { "epoch": 592.6208955223881, "grad_norm": 21.789249420166016, "learning_rate": 9.957539682539683e-06, "loss": 25.0356, "step": 24890 }, { "epoch": 592.644776119403, "grad_norm": 26.668081283569336, "learning_rate": 9.957142857142858e-06, "loss": 25.8718, "step": 24891 }, { "epoch": 592.6686567164179, "grad_norm": 24.450868606567383, "learning_rate": 9.956746031746032e-06, "loss": 26.9938, "step": 24892 }, { "epoch": 592.6925373134328, "grad_norm": 24.368562698364258, "learning_rate": 9.956349206349207e-06, "loss": 26.4511, "step": 24893 }, { "epoch": 592.7164179104477, "grad_norm": 26.46340560913086, "learning_rate": 9.955952380952382e-06, "loss": 25.7887, "step": 24894 }, { "epoch": 592.7402985074627, "grad_norm": 21.85275650024414, "learning_rate": 9.955555555555556e-06, "loss": 24.6155, "step": 24895 }, { "epoch": 592.7641791044776, "grad_norm": 32.05070495605469, "learning_rate": 9.95515873015873e-06, "loss": 24.6373, "step": 24896 }, { "epoch": 592.7880597014926, "grad_norm": 24.3539981842041, "learning_rate": 9.954761904761905e-06, "loss": 26.5635, "step": 24897 }, { "epoch": 592.8119402985075, "grad_norm": 23.658493041992188, "learning_rate": 9.95436507936508e-06, "loss": 24.3329, "step": 24898 }, { "epoch": 592.8358208955224, "grad_norm": 28.50235939025879, "learning_rate": 9.953968253968254e-06, "loss": 25.6617, "step": 24899 }, { "epoch": 592.8597014925373, "grad_norm": 30.11471176147461, "learning_rate": 9.953571428571429e-06, "loss": 25.8374, "step": 24900 }, { "epoch": 592.8835820895522, "grad_norm": 22.72588348388672, "learning_rate": 9.953174603174603e-06, "loss": 25.6179, "step": 24901 }, { "epoch": 592.9074626865672, "grad_norm": 27.251739501953125, "learning_rate": 9.95277777777778e-06, "loss": 25.5995, "step": 24902 }, { "epoch": 592.9313432835821, "grad_norm": 32.791255950927734, "learning_rate": 9.952380952380954e-06, "loss": 26.0102, "step": 24903 }, { "epoch": 592.955223880597, "grad_norm": 22.213632583618164, "learning_rate": 9.951984126984127e-06, "loss": 25.2462, "step": 24904 }, { "epoch": 592.9791044776119, "grad_norm": 24.99026107788086, "learning_rate": 9.951587301587301e-06, "loss": 25.8888, "step": 24905 }, { "epoch": 593.0, "grad_norm": 26.444826126098633, "learning_rate": 9.951190476190476e-06, "loss": 22.9073, "step": 24906 }, { "epoch": 593.0238805970149, "grad_norm": 28.267274856567383, "learning_rate": 9.950793650793652e-06, "loss": 25.5913, "step": 24907 }, { "epoch": 593.0477611940298, "grad_norm": 22.606760025024414, "learning_rate": 9.950396825396827e-06, "loss": 26.5444, "step": 24908 }, { "epoch": 593.0716417910447, "grad_norm": 24.310060501098633, "learning_rate": 9.950000000000001e-06, "loss": 25.1689, "step": 24909 }, { "epoch": 593.0955223880597, "grad_norm": 25.905433654785156, "learning_rate": 9.949603174603176e-06, "loss": 25.0083, "step": 24910 }, { "epoch": 593.1194029850747, "grad_norm": 24.08650779724121, "learning_rate": 9.94920634920635e-06, "loss": 26.2696, "step": 24911 }, { "epoch": 593.1432835820896, "grad_norm": 26.446935653686523, "learning_rate": 9.948809523809525e-06, "loss": 26.0374, "step": 24912 }, { "epoch": 593.1671641791045, "grad_norm": 20.422761917114258, "learning_rate": 9.9484126984127e-06, "loss": 25.8238, "step": 24913 }, { "epoch": 593.1910447761194, "grad_norm": 27.821630477905273, "learning_rate": 9.948015873015874e-06, "loss": 25.0718, "step": 24914 }, { "epoch": 593.2149253731343, "grad_norm": 27.36674690246582, "learning_rate": 9.947619047619049e-06, "loss": 25.9552, "step": 24915 }, { "epoch": 593.2388059701492, "grad_norm": 29.559370040893555, "learning_rate": 9.947222222222223e-06, "loss": 26.0264, "step": 24916 }, { "epoch": 593.2626865671642, "grad_norm": 22.626440048217773, "learning_rate": 9.946825396825398e-06, "loss": 26.0338, "step": 24917 }, { "epoch": 593.2865671641791, "grad_norm": 28.20386505126953, "learning_rate": 9.946428571428572e-06, "loss": 25.0621, "step": 24918 }, { "epoch": 593.310447761194, "grad_norm": 32.93440628051758, "learning_rate": 9.946031746031747e-06, "loss": 25.205, "step": 24919 }, { "epoch": 593.334328358209, "grad_norm": 22.089387893676758, "learning_rate": 9.945634920634921e-06, "loss": 24.7795, "step": 24920 }, { "epoch": 593.3582089552239, "grad_norm": 28.19015884399414, "learning_rate": 9.945238095238096e-06, "loss": 25.7681, "step": 24921 }, { "epoch": 593.3820895522388, "grad_norm": 29.941843032836914, "learning_rate": 9.94484126984127e-06, "loss": 25.2763, "step": 24922 }, { "epoch": 593.4059701492537, "grad_norm": 25.613557815551758, "learning_rate": 9.944444444444445e-06, "loss": 25.3587, "step": 24923 }, { "epoch": 593.4298507462687, "grad_norm": 26.70645523071289, "learning_rate": 9.94404761904762e-06, "loss": 25.6439, "step": 24924 }, { "epoch": 593.4537313432836, "grad_norm": NaN, "learning_rate": 9.943650793650794e-06, "loss": 21.5729, "step": 24925 }, { "epoch": 593.4776119402985, "grad_norm": 38.24277114868164, "learning_rate": 9.943650793650794e-06, "loss": 25.7615, "step": 24926 }, { "epoch": 593.5014925373134, "grad_norm": 22.880823135375977, "learning_rate": 9.943253968253968e-06, "loss": 25.5452, "step": 24927 }, { "epoch": 593.5253731343283, "grad_norm": 29.924650192260742, "learning_rate": 9.942857142857145e-06, "loss": 25.717, "step": 24928 }, { "epoch": 593.5492537313432, "grad_norm": 32.731773376464844, "learning_rate": 9.94246031746032e-06, "loss": 26.3031, "step": 24929 }, { "epoch": 593.5731343283583, "grad_norm": 24.089282989501953, "learning_rate": 9.942063492063492e-06, "loss": 26.451, "step": 24930 }, { "epoch": 593.5970149253732, "grad_norm": 30.0961971282959, "learning_rate": 9.941666666666667e-06, "loss": 24.3998, "step": 24931 }, { "epoch": 593.6208955223881, "grad_norm": 36.34827423095703, "learning_rate": 9.941269841269841e-06, "loss": 26.6747, "step": 24932 }, { "epoch": 593.644776119403, "grad_norm": 23.323444366455078, "learning_rate": 9.940873015873017e-06, "loss": 25.2262, "step": 24933 }, { "epoch": 593.6686567164179, "grad_norm": 39.25984573364258, "learning_rate": 9.940476190476192e-06, "loss": 26.4286, "step": 24934 }, { "epoch": 593.6925373134328, "grad_norm": 27.57270622253418, "learning_rate": 9.940079365079366e-06, "loss": 26.1606, "step": 24935 }, { "epoch": 593.7164179104477, "grad_norm": 25.9527645111084, "learning_rate": 9.939682539682541e-06, "loss": 25.6895, "step": 24936 }, { "epoch": 593.7402985074627, "grad_norm": 34.87306213378906, "learning_rate": 9.939285714285714e-06, "loss": 25.4387, "step": 24937 }, { "epoch": 593.7641791044776, "grad_norm": 24.651517868041992, "learning_rate": 9.93888888888889e-06, "loss": 25.8853, "step": 24938 }, { "epoch": 593.7880597014926, "grad_norm": 36.87623596191406, "learning_rate": 9.938492063492065e-06, "loss": 26.4484, "step": 24939 }, { "epoch": 593.8119402985075, "grad_norm": 29.530839920043945, "learning_rate": 9.93809523809524e-06, "loss": 25.2559, "step": 24940 }, { "epoch": 593.8358208955224, "grad_norm": 23.337617874145508, "learning_rate": 9.937698412698414e-06, "loss": 25.0679, "step": 24941 }, { "epoch": 593.8597014925373, "grad_norm": 32.77561569213867, "learning_rate": 9.937301587301588e-06, "loss": 26.0102, "step": 24942 }, { "epoch": 593.8835820895522, "grad_norm": 26.334997177124023, "learning_rate": 9.936904761904763e-06, "loss": 26.0961, "step": 24943 }, { "epoch": 593.9074626865672, "grad_norm": 25.03455924987793, "learning_rate": 9.936507936507937e-06, "loss": 25.1034, "step": 24944 }, { "epoch": 593.9313432835821, "grad_norm": NaN, "learning_rate": 9.936111111111112e-06, "loss": 30.6319, "step": 24945 }, { "epoch": 593.955223880597, "grad_norm": 32.942222595214844, "learning_rate": 9.936111111111112e-06, "loss": 26.009, "step": 24946 }, { "epoch": 593.9791044776119, "grad_norm": 26.386043548583984, "learning_rate": 9.935714285714286e-06, "loss": 25.6896, "step": 24947 }, { "epoch": 594.0, "grad_norm": 22.751941680908203, "learning_rate": 9.935317460317461e-06, "loss": 22.7754, "step": 24948 }, { "epoch": 594.0238805970149, "grad_norm": 29.11665153503418, "learning_rate": 9.934920634920636e-06, "loss": 26.3463, "step": 24949 }, { "epoch": 594.0477611940298, "grad_norm": 26.54667091369629, "learning_rate": 9.93452380952381e-06, "loss": 26.9463, "step": 24950 }, { "epoch": 594.0716417910447, "grad_norm": 22.238555908203125, "learning_rate": 9.934126984126985e-06, "loss": 25.328, "step": 24951 }, { "epoch": 594.0955223880597, "grad_norm": 31.46116065979004, "learning_rate": 9.933730158730159e-06, "loss": 26.5246, "step": 24952 }, { "epoch": 594.1194029850747, "grad_norm": 23.44728660583496, "learning_rate": 9.933333333333334e-06, "loss": 25.4817, "step": 24953 }, { "epoch": 594.1432835820896, "grad_norm": 23.11781120300293, "learning_rate": 9.93293650793651e-06, "loss": 24.8494, "step": 24954 }, { "epoch": 594.1671641791045, "grad_norm": 36.521366119384766, "learning_rate": 9.932539682539684e-06, "loss": 25.5776, "step": 24955 }, { "epoch": 594.1910447761194, "grad_norm": 26.55750274658203, "learning_rate": 9.932142857142857e-06, "loss": 25.827, "step": 24956 }, { "epoch": 594.2149253731343, "grad_norm": 27.58326530456543, "learning_rate": 9.931746031746032e-06, "loss": 24.78, "step": 24957 }, { "epoch": 594.2388059701492, "grad_norm": 32.70005798339844, "learning_rate": 9.931349206349206e-06, "loss": 25.3682, "step": 24958 }, { "epoch": 594.2626865671642, "grad_norm": 22.713151931762695, "learning_rate": 9.930952380952383e-06, "loss": 24.6241, "step": 24959 }, { "epoch": 594.2865671641791, "grad_norm": 38.38673400878906, "learning_rate": 9.930555555555557e-06, "loss": 25.8458, "step": 24960 }, { "epoch": 594.310447761194, "grad_norm": 25.81012535095215, "learning_rate": 9.930158730158732e-06, "loss": 26.1331, "step": 24961 }, { "epoch": 594.334328358209, "grad_norm": 28.882944107055664, "learning_rate": 9.929761904761906e-06, "loss": 24.8167, "step": 24962 }, { "epoch": 594.3582089552239, "grad_norm": 38.33415603637695, "learning_rate": 9.929365079365079e-06, "loss": 25.1231, "step": 24963 }, { "epoch": 594.3820895522388, "grad_norm": 24.157337188720703, "learning_rate": 9.928968253968255e-06, "loss": 25.916, "step": 24964 }, { "epoch": 594.4059701492537, "grad_norm": 46.813594818115234, "learning_rate": 9.92857142857143e-06, "loss": 26.0817, "step": 24965 }, { "epoch": 594.4298507462687, "grad_norm": 29.57288932800293, "learning_rate": 9.928174603174604e-06, "loss": 26.3842, "step": 24966 }, { "epoch": 594.4537313432836, "grad_norm": 45.883628845214844, "learning_rate": 9.927777777777779e-06, "loss": 26.3784, "step": 24967 }, { "epoch": 594.4776119402985, "grad_norm": 32.666141510009766, "learning_rate": 9.927380952380953e-06, "loss": 26.5298, "step": 24968 }, { "epoch": 594.5014925373134, "grad_norm": 53.34564971923828, "learning_rate": 9.926984126984128e-06, "loss": 24.9997, "step": 24969 }, { "epoch": 594.5253731343283, "grad_norm": 42.81193923950195, "learning_rate": 9.926587301587303e-06, "loss": 25.3662, "step": 24970 }, { "epoch": 594.5492537313432, "grad_norm": 45.649234771728516, "learning_rate": 9.926190476190477e-06, "loss": 25.5977, "step": 24971 }, { "epoch": 594.5731343283583, "grad_norm": 43.08897018432617, "learning_rate": 9.925793650793652e-06, "loss": 26.3928, "step": 24972 }, { "epoch": 594.5970149253732, "grad_norm": 40.90314483642578, "learning_rate": 9.925396825396826e-06, "loss": 24.9929, "step": 24973 }, { "epoch": 594.6208955223881, "grad_norm": 36.69975662231445, "learning_rate": 9.925e-06, "loss": 25.6582, "step": 24974 }, { "epoch": 594.644776119403, "grad_norm": 45.9951286315918, "learning_rate": 9.924603174603175e-06, "loss": 26.4403, "step": 24975 }, { "epoch": 594.6686567164179, "grad_norm": 40.0910758972168, "learning_rate": 9.92420634920635e-06, "loss": 26.369, "step": 24976 }, { "epoch": 594.6925373134328, "grad_norm": 46.12454605102539, "learning_rate": 9.923809523809524e-06, "loss": 25.1833, "step": 24977 }, { "epoch": 594.7164179104477, "grad_norm": 41.463890075683594, "learning_rate": 9.923412698412699e-06, "loss": 25.7716, "step": 24978 }, { "epoch": 594.7402985074627, "grad_norm": 41.699405670166016, "learning_rate": 9.923015873015875e-06, "loss": 26.7883, "step": 24979 }, { "epoch": 594.7641791044776, "grad_norm": 37.37868881225586, "learning_rate": 9.922619047619048e-06, "loss": 24.5722, "step": 24980 }, { "epoch": 594.7880597014926, "grad_norm": 39.5391960144043, "learning_rate": 9.922222222222222e-06, "loss": 25.0872, "step": 24981 }, { "epoch": 594.8119402985075, "grad_norm": 37.511375427246094, "learning_rate": 9.921825396825397e-06, "loss": 26.2676, "step": 24982 }, { "epoch": 594.8358208955224, "grad_norm": 49.18541717529297, "learning_rate": 9.921428571428572e-06, "loss": 25.7513, "step": 24983 }, { "epoch": 594.8597014925373, "grad_norm": 43.0590934753418, "learning_rate": 9.921031746031748e-06, "loss": 25.6461, "step": 24984 }, { "epoch": 594.8835820895522, "grad_norm": 46.377925872802734, "learning_rate": 9.920634920634922e-06, "loss": 24.3578, "step": 24985 }, { "epoch": 594.9074626865672, "grad_norm": 41.44746017456055, "learning_rate": 9.920238095238097e-06, "loss": 25.47, "step": 24986 }, { "epoch": 594.9313432835821, "grad_norm": 39.85419845581055, "learning_rate": 9.91984126984127e-06, "loss": 25.159, "step": 24987 }, { "epoch": 594.955223880597, "grad_norm": 36.79846954345703, "learning_rate": 9.919444444444444e-06, "loss": 24.5556, "step": 24988 }, { "epoch": 594.9791044776119, "grad_norm": 47.0821418762207, "learning_rate": 9.91904761904762e-06, "loss": 25.3444, "step": 24989 }, { "epoch": 595.0, "grad_norm": 32.914642333984375, "learning_rate": 9.918650793650795e-06, "loss": 22.6339, "step": 24990 }, { "epoch": 595.0238805970149, "grad_norm": 46.31789779663086, "learning_rate": 9.91825396825397e-06, "loss": 25.4658, "step": 24991 }, { "epoch": 595.0477611940298, "grad_norm": 42.928775787353516, "learning_rate": 9.917857142857144e-06, "loss": 26.1511, "step": 24992 }, { "epoch": 595.0716417910447, "grad_norm": 38.32334518432617, "learning_rate": 9.917460317460319e-06, "loss": 24.8834, "step": 24993 }, { "epoch": 595.0955223880597, "grad_norm": 38.501556396484375, "learning_rate": 9.917063492063493e-06, "loss": 25.8271, "step": 24994 }, { "epoch": 595.1194029850747, "grad_norm": 43.30253982543945, "learning_rate": 9.916666666666668e-06, "loss": 25.7048, "step": 24995 }, { "epoch": 595.1432835820896, "grad_norm": 33.72343444824219, "learning_rate": 9.916269841269842e-06, "loss": 25.6887, "step": 24996 }, { "epoch": 595.1671641791045, "grad_norm": 46.45766067504883, "learning_rate": 9.915873015873017e-06, "loss": 25.5647, "step": 24997 }, { "epoch": 595.1910447761194, "grad_norm": 35.734012603759766, "learning_rate": 9.915476190476191e-06, "loss": 24.5918, "step": 24998 }, { "epoch": 595.2149253731343, "grad_norm": 40.43130874633789, "learning_rate": 9.915079365079366e-06, "loss": 25.0282, "step": 24999 }, { "epoch": 595.2388059701492, "grad_norm": 38.481143951416016, "learning_rate": 9.91468253968254e-06, "loss": 25.3052, "step": 25000 }, { "epoch": 595.2626865671642, "grad_norm": 40.76797103881836, "learning_rate": 9.914285714285715e-06, "loss": 26.4088, "step": 25001 }, { "epoch": 595.2865671641791, "grad_norm": 38.45075607299805, "learning_rate": 9.91388888888889e-06, "loss": 24.4304, "step": 25002 }, { "epoch": 595.310447761194, "grad_norm": 44.005252838134766, "learning_rate": 9.913492063492064e-06, "loss": 25.0914, "step": 25003 }, { "epoch": 595.334328358209, "grad_norm": 36.03594970703125, "learning_rate": 9.91309523809524e-06, "loss": 25.6347, "step": 25004 }, { "epoch": 595.3582089552239, "grad_norm": 46.996299743652344, "learning_rate": 9.912698412698413e-06, "loss": 26.489, "step": 25005 }, { "epoch": 595.3820895522388, "grad_norm": 37.67258834838867, "learning_rate": 9.912301587301588e-06, "loss": 25.2793, "step": 25006 }, { "epoch": 595.4059701492537, "grad_norm": 39.09611892700195, "learning_rate": 9.911904761904762e-06, "loss": 25.9274, "step": 25007 }, { "epoch": 595.4298507462687, "grad_norm": 38.05245590209961, "learning_rate": 9.911507936507937e-06, "loss": 25.065, "step": 25008 }, { "epoch": 595.4537313432836, "grad_norm": 43.641212463378906, "learning_rate": 9.911111111111113e-06, "loss": 25.6702, "step": 25009 }, { "epoch": 595.4776119402985, "grad_norm": 39.434104919433594, "learning_rate": 9.910714285714288e-06, "loss": 25.2334, "step": 25010 }, { "epoch": 595.5014925373134, "grad_norm": 40.656959533691406, "learning_rate": 9.910317460317462e-06, "loss": 26.5871, "step": 25011 }, { "epoch": 595.5253731343283, "grad_norm": 34.247982025146484, "learning_rate": 9.909920634920635e-06, "loss": 26.2656, "step": 25012 }, { "epoch": 595.5492537313432, "grad_norm": 44.038726806640625, "learning_rate": 9.90952380952381e-06, "loss": 25.4938, "step": 25013 }, { "epoch": 595.5731343283583, "grad_norm": 35.6322135925293, "learning_rate": 9.909126984126986e-06, "loss": 26.6141, "step": 25014 }, { "epoch": 595.5970149253732, "grad_norm": 40.97134780883789, "learning_rate": 9.90873015873016e-06, "loss": 26.2444, "step": 25015 }, { "epoch": 595.6208955223881, "grad_norm": 39.32062530517578, "learning_rate": 9.908333333333335e-06, "loss": 25.4767, "step": 25016 }, { "epoch": 595.644776119403, "grad_norm": 42.71369171142578, "learning_rate": 9.90793650793651e-06, "loss": 24.6329, "step": 25017 }, { "epoch": 595.6686567164179, "grad_norm": 39.34294509887695, "learning_rate": 9.907539682539684e-06, "loss": 25.8026, "step": 25018 }, { "epoch": 595.6925373134328, "grad_norm": 42.66548538208008, "learning_rate": 9.907142857142858e-06, "loss": 25.8585, "step": 25019 }, { "epoch": 595.7164179104477, "grad_norm": 38.43756866455078, "learning_rate": 9.906746031746033e-06, "loss": 24.2186, "step": 25020 }, { "epoch": 595.7402985074627, "grad_norm": 39.67866134643555, "learning_rate": 9.906349206349207e-06, "loss": 25.9156, "step": 25021 }, { "epoch": 595.7641791044776, "grad_norm": 36.9583740234375, "learning_rate": 9.905952380952382e-06, "loss": 25.0043, "step": 25022 }, { "epoch": 595.7880597014926, "grad_norm": 39.36587905883789, "learning_rate": 9.905555555555557e-06, "loss": 26.6169, "step": 25023 }, { "epoch": 595.8119402985075, "grad_norm": 36.89835739135742, "learning_rate": 9.905158730158731e-06, "loss": 24.7224, "step": 25024 }, { "epoch": 595.8358208955224, "grad_norm": 40.41022491455078, "learning_rate": 9.904761904761906e-06, "loss": 25.8029, "step": 25025 }, { "epoch": 595.8597014925373, "grad_norm": 35.006011962890625, "learning_rate": 9.90436507936508e-06, "loss": 25.9267, "step": 25026 }, { "epoch": 595.8835820895522, "grad_norm": 41.57292556762695, "learning_rate": 9.903968253968255e-06, "loss": 25.0556, "step": 25027 }, { "epoch": 595.9074626865672, "grad_norm": 35.257118225097656, "learning_rate": 9.90357142857143e-06, "loss": 24.7119, "step": 25028 }, { "epoch": 595.9313432835821, "grad_norm": 41.396575927734375, "learning_rate": 9.903174603174604e-06, "loss": 25.9333, "step": 25029 }, { "epoch": 595.955223880597, "grad_norm": 34.646583557128906, "learning_rate": 9.902777777777778e-06, "loss": 25.0524, "step": 25030 }, { "epoch": 595.9791044776119, "grad_norm": 40.87548828125, "learning_rate": 9.902380952380953e-06, "loss": 26.2721, "step": 25031 }, { "epoch": 596.0, "grad_norm": 33.67090606689453, "learning_rate": 9.901984126984127e-06, "loss": 23.0072, "step": 25032 }, { "epoch": 596.0238805970149, "grad_norm": 42.37968444824219, "learning_rate": 9.901587301587302e-06, "loss": 25.6844, "step": 25033 }, { "epoch": 596.0477611940298, "grad_norm": 38.849308013916016, "learning_rate": 9.901190476190476e-06, "loss": 25.6466, "step": 25034 }, { "epoch": 596.0716417910447, "grad_norm": 38.646759033203125, "learning_rate": 9.900793650793653e-06, "loss": 25.3404, "step": 25035 }, { "epoch": 596.0955223880597, "grad_norm": 35.141075134277344, "learning_rate": 9.900396825396826e-06, "loss": 24.8942, "step": 25036 }, { "epoch": 596.1194029850747, "grad_norm": 39.860633850097656, "learning_rate": 9.9e-06, "loss": 26.2251, "step": 25037 }, { "epoch": 596.1432835820896, "grad_norm": 33.31079864501953, "learning_rate": 9.899603174603175e-06, "loss": 25.0424, "step": 25038 }, { "epoch": 596.1671641791045, "grad_norm": 44.40277099609375, "learning_rate": 9.89920634920635e-06, "loss": 24.7606, "step": 25039 }, { "epoch": 596.1910447761194, "grad_norm": 35.99314880371094, "learning_rate": 9.898809523809525e-06, "loss": 24.8776, "step": 25040 }, { "epoch": 596.2149253731343, "grad_norm": 40.922630310058594, "learning_rate": 9.8984126984127e-06, "loss": 25.9541, "step": 25041 }, { "epoch": 596.2388059701492, "grad_norm": 38.0330924987793, "learning_rate": 9.898015873015874e-06, "loss": 25.0688, "step": 25042 }, { "epoch": 596.2626865671642, "grad_norm": 38.05756378173828, "learning_rate": 9.897619047619047e-06, "loss": 25.1169, "step": 25043 }, { "epoch": 596.2865671641791, "grad_norm": 33.30294418334961, "learning_rate": 9.897222222222222e-06, "loss": 25.1271, "step": 25044 }, { "epoch": 596.310447761194, "grad_norm": 41.80462646484375, "learning_rate": 9.896825396825398e-06, "loss": 25.6627, "step": 25045 }, { "epoch": 596.334328358209, "grad_norm": 36.42918395996094, "learning_rate": 9.896428571428573e-06, "loss": 25.1272, "step": 25046 }, { "epoch": 596.3582089552239, "grad_norm": 44.78289031982422, "learning_rate": 9.896031746031747e-06, "loss": 25.7735, "step": 25047 }, { "epoch": 596.3820895522388, "grad_norm": 37.69222640991211, "learning_rate": 9.895634920634922e-06, "loss": 26.1291, "step": 25048 }, { "epoch": 596.4059701492537, "grad_norm": 38.12914276123047, "learning_rate": 9.895238095238096e-06, "loss": 26.0779, "step": 25049 }, { "epoch": 596.4298507462687, "grad_norm": 36.81379318237305, "learning_rate": 9.89484126984127e-06, "loss": 25.4936, "step": 25050 }, { "epoch": 596.4537313432836, "grad_norm": 41.91215896606445, "learning_rate": 9.894444444444445e-06, "loss": 26.0658, "step": 25051 }, { "epoch": 596.4776119402985, "grad_norm": 33.176734924316406, "learning_rate": 9.89404761904762e-06, "loss": 23.6145, "step": 25052 }, { "epoch": 596.5014925373134, "grad_norm": 42.8675537109375, "learning_rate": 9.893650793650794e-06, "loss": 26.1725, "step": 25053 }, { "epoch": 596.5253731343283, "grad_norm": 37.59535598754883, "learning_rate": 9.893253968253969e-06, "loss": 26.1289, "step": 25054 }, { "epoch": 596.5492537313432, "grad_norm": 39.78119659423828, "learning_rate": 9.892857142857143e-06, "loss": 25.8567, "step": 25055 }, { "epoch": 596.5731343283583, "grad_norm": 36.71355056762695, "learning_rate": 9.892460317460318e-06, "loss": 25.4232, "step": 25056 }, { "epoch": 596.5970149253732, "grad_norm": 33.673370361328125, "learning_rate": 9.892063492063493e-06, "loss": 24.9532, "step": 25057 }, { "epoch": 596.6208955223881, "grad_norm": 36.023075103759766, "learning_rate": 9.891666666666667e-06, "loss": 25.9994, "step": 25058 }, { "epoch": 596.644776119403, "grad_norm": 35.83855438232422, "learning_rate": 9.891269841269842e-06, "loss": 25.5821, "step": 25059 }, { "epoch": 596.6686567164179, "grad_norm": 31.636924743652344, "learning_rate": 9.890873015873018e-06, "loss": 25.084, "step": 25060 }, { "epoch": 596.6925373134328, "grad_norm": 40.64546203613281, "learning_rate": 9.89047619047619e-06, "loss": 25.683, "step": 25061 }, { "epoch": 596.7164179104477, "grad_norm": 31.962682723999023, "learning_rate": 9.890079365079365e-06, "loss": 25.302, "step": 25062 }, { "epoch": 596.7402985074627, "grad_norm": 46.648223876953125, "learning_rate": 9.88968253968254e-06, "loss": 25.5082, "step": 25063 }, { "epoch": 596.7641791044776, "grad_norm": 39.109397888183594, "learning_rate": 9.889285714285714e-06, "loss": 25.923, "step": 25064 }, { "epoch": 596.7880597014926, "grad_norm": 38.02949142456055, "learning_rate": 9.88888888888889e-06, "loss": 26.829, "step": 25065 }, { "epoch": 596.8119402985075, "grad_norm": 35.798709869384766, "learning_rate": 9.888492063492065e-06, "loss": 26.4239, "step": 25066 }, { "epoch": 596.8358208955224, "grad_norm": 36.35650634765625, "learning_rate": 9.88809523809524e-06, "loss": 25.1754, "step": 25067 }, { "epoch": 596.8597014925373, "grad_norm": 28.304527282714844, "learning_rate": 9.887698412698413e-06, "loss": 24.9193, "step": 25068 }, { "epoch": 596.8835820895522, "grad_norm": 42.42804718017578, "learning_rate": 9.887301587301587e-06, "loss": 26.7942, "step": 25069 }, { "epoch": 596.9074626865672, "grad_norm": 33.032203674316406, "learning_rate": 9.886904761904763e-06, "loss": 25.6152, "step": 25070 }, { "epoch": 596.9313432835821, "grad_norm": 44.681556701660156, "learning_rate": 9.886507936507938e-06, "loss": 25.1966, "step": 25071 }, { "epoch": 596.955223880597, "grad_norm": 37.717044830322266, "learning_rate": 9.886111111111112e-06, "loss": 25.7979, "step": 25072 }, { "epoch": 596.9791044776119, "grad_norm": 36.644012451171875, "learning_rate": 9.885714285714287e-06, "loss": 25.8104, "step": 25073 }, { "epoch": 597.0, "grad_norm": 31.46599578857422, "learning_rate": 9.885317460317461e-06, "loss": 22.6042, "step": 25074 }, { "epoch": 597.0238805970149, "grad_norm": 30.582611083984375, "learning_rate": 9.884920634920636e-06, "loss": 24.6854, "step": 25075 }, { "epoch": 597.0477611940298, "grad_norm": 28.086528778076172, "learning_rate": 9.88452380952381e-06, "loss": 25.985, "step": 25076 }, { "epoch": 597.0716417910447, "grad_norm": 29.19880485534668, "learning_rate": 9.884126984126985e-06, "loss": 25.4386, "step": 25077 }, { "epoch": 597.0955223880597, "grad_norm": 25.83917808532715, "learning_rate": 9.88373015873016e-06, "loss": 24.957, "step": 25078 }, { "epoch": 597.1194029850747, "grad_norm": 29.66777229309082, "learning_rate": 9.883333333333334e-06, "loss": 24.274, "step": 25079 }, { "epoch": 597.1432835820896, "grad_norm": 23.795969009399414, "learning_rate": 9.882936507936509e-06, "loss": 26.5618, "step": 25080 }, { "epoch": 597.1671641791045, "grad_norm": 32.852447509765625, "learning_rate": 9.882539682539683e-06, "loss": 25.866, "step": 25081 }, { "epoch": 597.1910447761194, "grad_norm": 25.9959716796875, "learning_rate": 9.882142857142858e-06, "loss": 25.3711, "step": 25082 }, { "epoch": 597.2149253731343, "grad_norm": 32.09193420410156, "learning_rate": 9.881746031746032e-06, "loss": 25.2498, "step": 25083 }, { "epoch": 597.2388059701492, "grad_norm": 29.49506950378418, "learning_rate": 9.881349206349207e-06, "loss": 25.7886, "step": 25084 }, { "epoch": 597.2626865671642, "grad_norm": 27.297449111938477, "learning_rate": 9.880952380952381e-06, "loss": 26.0071, "step": 25085 }, { "epoch": 597.2865671641791, "grad_norm": 28.932405471801758, "learning_rate": 9.880555555555556e-06, "loss": 26.2067, "step": 25086 }, { "epoch": 597.310447761194, "grad_norm": 24.374874114990234, "learning_rate": 9.88015873015873e-06, "loss": 24.8459, "step": 25087 }, { "epoch": 597.334328358209, "grad_norm": 27.32842254638672, "learning_rate": 9.879761904761905e-06, "loss": 26.1432, "step": 25088 }, { "epoch": 597.3582089552239, "grad_norm": 26.23664665222168, "learning_rate": 9.87936507936508e-06, "loss": 25.3268, "step": 25089 }, { "epoch": 597.3820895522388, "grad_norm": 25.65001106262207, "learning_rate": 9.878968253968256e-06, "loss": 25.7002, "step": 25090 }, { "epoch": 597.4059701492537, "grad_norm": 22.17331314086914, "learning_rate": 9.87857142857143e-06, "loss": 25.5867, "step": 25091 }, { "epoch": 597.4298507462687, "grad_norm": 28.787059783935547, "learning_rate": 9.878174603174603e-06, "loss": 27.0788, "step": 25092 }, { "epoch": 597.4537313432836, "grad_norm": 23.757028579711914, "learning_rate": 9.877777777777778e-06, "loss": 25.5819, "step": 25093 }, { "epoch": 597.4776119402985, "grad_norm": 25.576171875, "learning_rate": 9.877380952380952e-06, "loss": 24.891, "step": 25094 }, { "epoch": 597.5014925373134, "grad_norm": 25.235645294189453, "learning_rate": 9.876984126984128e-06, "loss": 25.641, "step": 25095 }, { "epoch": 597.5253731343283, "grad_norm": 26.9622802734375, "learning_rate": 9.876587301587303e-06, "loss": 25.2795, "step": 25096 }, { "epoch": 597.5492537313432, "grad_norm": 23.157024383544922, "learning_rate": 9.876190476190478e-06, "loss": 25.0667, "step": 25097 }, { "epoch": 597.5731343283583, "grad_norm": 24.688461303710938, "learning_rate": 9.875793650793652e-06, "loss": 25.5018, "step": 25098 }, { "epoch": 597.5970149253732, "grad_norm": 26.19953155517578, "learning_rate": 9.875396825396825e-06, "loss": 25.0266, "step": 25099 }, { "epoch": 597.6208955223881, "grad_norm": 25.499740600585938, "learning_rate": 9.875000000000001e-06, "loss": 25.2352, "step": 25100 }, { "epoch": 597.644776119403, "grad_norm": 27.977405548095703, "learning_rate": 9.874603174603176e-06, "loss": 25.5019, "step": 25101 }, { "epoch": 597.6686567164179, "grad_norm": 26.174951553344727, "learning_rate": 9.87420634920635e-06, "loss": 25.2382, "step": 25102 }, { "epoch": 597.6925373134328, "grad_norm": 28.070632934570312, "learning_rate": 9.873809523809525e-06, "loss": 25.4692, "step": 25103 }, { "epoch": 597.7164179104477, "grad_norm": 22.17469024658203, "learning_rate": 9.8734126984127e-06, "loss": 24.5121, "step": 25104 }, { "epoch": 597.7402985074627, "grad_norm": 26.832414627075195, "learning_rate": 9.873015873015874e-06, "loss": 25.4579, "step": 25105 }, { "epoch": 597.7641791044776, "grad_norm": 26.160648345947266, "learning_rate": 9.872619047619048e-06, "loss": 25.5267, "step": 25106 }, { "epoch": 597.7880597014926, "grad_norm": 21.82392120361328, "learning_rate": 9.872222222222223e-06, "loss": 25.8582, "step": 25107 }, { "epoch": 597.8119402985075, "grad_norm": 26.009929656982422, "learning_rate": 9.871825396825397e-06, "loss": 26.3615, "step": 25108 }, { "epoch": 597.8358208955224, "grad_norm": 23.422176361083984, "learning_rate": 9.871428571428572e-06, "loss": 25.0212, "step": 25109 }, { "epoch": 597.8597014925373, "grad_norm": 23.58262825012207, "learning_rate": 9.871031746031747e-06, "loss": 25.2309, "step": 25110 }, { "epoch": 597.8835820895522, "grad_norm": 24.457157135009766, "learning_rate": 9.870634920634921e-06, "loss": 26.1426, "step": 25111 }, { "epoch": 597.9074626865672, "grad_norm": 23.516891479492188, "learning_rate": 9.870238095238096e-06, "loss": 25.6417, "step": 25112 }, { "epoch": 597.9313432835821, "grad_norm": 25.33465003967285, "learning_rate": 9.86984126984127e-06, "loss": 26.6205, "step": 25113 }, { "epoch": 597.955223880597, "grad_norm": 23.903474807739258, "learning_rate": 9.869444444444445e-06, "loss": 26.1871, "step": 25114 }, { "epoch": 597.9791044776119, "grad_norm": 24.448122024536133, "learning_rate": 9.869047619047621e-06, "loss": 26.0555, "step": 25115 }, { "epoch": 598.0, "grad_norm": 23.065427780151367, "learning_rate": 9.868650793650795e-06, "loss": 22.2825, "step": 25116 }, { "epoch": 598.0238805970149, "grad_norm": 23.766305923461914, "learning_rate": 9.868253968253968e-06, "loss": 25.0172, "step": 25117 }, { "epoch": 598.0477611940298, "grad_norm": 23.139814376831055, "learning_rate": 9.867857142857143e-06, "loss": 25.3835, "step": 25118 }, { "epoch": 598.0716417910447, "grad_norm": 27.666072845458984, "learning_rate": 9.867460317460317e-06, "loss": 25.2817, "step": 25119 }, { "epoch": 598.0955223880597, "grad_norm": 25.044002532958984, "learning_rate": 9.867063492063494e-06, "loss": 25.6817, "step": 25120 }, { "epoch": 598.1194029850747, "grad_norm": 23.992816925048828, "learning_rate": 9.866666666666668e-06, "loss": 25.4401, "step": 25121 }, { "epoch": 598.1432835820896, "grad_norm": 27.28566551208496, "learning_rate": 9.866269841269843e-06, "loss": 24.9626, "step": 25122 }, { "epoch": 598.1671641791045, "grad_norm": 25.054506301879883, "learning_rate": 9.865873015873017e-06, "loss": 26.0114, "step": 25123 }, { "epoch": 598.1910447761194, "grad_norm": 27.13926124572754, "learning_rate": 9.86547619047619e-06, "loss": 25.4918, "step": 25124 }, { "epoch": 598.2149253731343, "grad_norm": 22.303207397460938, "learning_rate": 9.865079365079366e-06, "loss": 26.1319, "step": 25125 }, { "epoch": 598.2388059701492, "grad_norm": 22.21714973449707, "learning_rate": 9.864682539682541e-06, "loss": 25.9117, "step": 25126 }, { "epoch": 598.2626865671642, "grad_norm": 21.011526107788086, "learning_rate": 9.864285714285715e-06, "loss": 25.6466, "step": 25127 }, { "epoch": 598.2865671641791, "grad_norm": 24.67301368713379, "learning_rate": 9.86388888888889e-06, "loss": 25.2675, "step": 25128 }, { "epoch": 598.310447761194, "grad_norm": 24.699827194213867, "learning_rate": 9.863492063492065e-06, "loss": 26.1075, "step": 25129 }, { "epoch": 598.334328358209, "grad_norm": 23.35677146911621, "learning_rate": 9.863095238095239e-06, "loss": 25.0482, "step": 25130 }, { "epoch": 598.3582089552239, "grad_norm": 32.84406280517578, "learning_rate": 9.862698412698414e-06, "loss": 25.6349, "step": 25131 }, { "epoch": 598.3820895522388, "grad_norm": 24.900012969970703, "learning_rate": 9.862301587301588e-06, "loss": 25.8691, "step": 25132 }, { "epoch": 598.4059701492537, "grad_norm": 25.50873374938965, "learning_rate": 9.861904761904763e-06, "loss": 25.1796, "step": 25133 }, { "epoch": 598.4298507462687, "grad_norm": 25.960886001586914, "learning_rate": 9.861507936507937e-06, "loss": 26.0463, "step": 25134 }, { "epoch": 598.4537313432836, "grad_norm": 23.060821533203125, "learning_rate": 9.861111111111112e-06, "loss": 25.8076, "step": 25135 }, { "epoch": 598.4776119402985, "grad_norm": 22.331501007080078, "learning_rate": 9.860714285714286e-06, "loss": 25.0703, "step": 25136 }, { "epoch": 598.5014925373134, "grad_norm": 26.534748077392578, "learning_rate": 9.86031746031746e-06, "loss": 25.2348, "step": 25137 }, { "epoch": 598.5253731343283, "grad_norm": 19.820270538330078, "learning_rate": 9.859920634920635e-06, "loss": 25.1114, "step": 25138 }, { "epoch": 598.5492537313432, "grad_norm": 24.783946990966797, "learning_rate": 9.85952380952381e-06, "loss": 25.6965, "step": 25139 }, { "epoch": 598.5731343283583, "grad_norm": 23.71938133239746, "learning_rate": 9.859126984126986e-06, "loss": 25.3078, "step": 25140 }, { "epoch": 598.5970149253732, "grad_norm": 23.27475357055664, "learning_rate": 9.858730158730159e-06, "loss": 26.1994, "step": 25141 }, { "epoch": 598.6208955223881, "grad_norm": 22.04896354675293, "learning_rate": 9.858333333333334e-06, "loss": 25.8483, "step": 25142 }, { "epoch": 598.644776119403, "grad_norm": 29.425796508789062, "learning_rate": 9.857936507936508e-06, "loss": 25.0837, "step": 25143 }, { "epoch": 598.6686567164179, "grad_norm": 24.742719650268555, "learning_rate": 9.857539682539683e-06, "loss": 25.5731, "step": 25144 }, { "epoch": 598.6925373134328, "grad_norm": 24.48812484741211, "learning_rate": 9.857142857142859e-06, "loss": 24.6962, "step": 25145 }, { "epoch": 598.7164179104477, "grad_norm": 24.64676284790039, "learning_rate": 9.856746031746033e-06, "loss": 24.6084, "step": 25146 }, { "epoch": 598.7402985074627, "grad_norm": 23.083940505981445, "learning_rate": 9.856349206349208e-06, "loss": 25.1208, "step": 25147 }, { "epoch": 598.7641791044776, "grad_norm": 25.45944595336914, "learning_rate": 9.85595238095238e-06, "loss": 25.3855, "step": 25148 }, { "epoch": 598.7880597014926, "grad_norm": 23.419382095336914, "learning_rate": 9.855555555555555e-06, "loss": 26.5566, "step": 25149 }, { "epoch": 598.8119402985075, "grad_norm": 20.61183738708496, "learning_rate": 9.855158730158732e-06, "loss": 24.7122, "step": 25150 }, { "epoch": 598.8358208955224, "grad_norm": 23.730514526367188, "learning_rate": 9.854761904761906e-06, "loss": 25.7459, "step": 25151 }, { "epoch": 598.8597014925373, "grad_norm": 24.134597778320312, "learning_rate": 9.85436507936508e-06, "loss": 25.9107, "step": 25152 }, { "epoch": 598.8835820895522, "grad_norm": 23.78508758544922, "learning_rate": 9.853968253968255e-06, "loss": 25.4407, "step": 25153 }, { "epoch": 598.9074626865672, "grad_norm": 25.0212345123291, "learning_rate": 9.85357142857143e-06, "loss": 25.7349, "step": 25154 }, { "epoch": 598.9313432835821, "grad_norm": 27.858970642089844, "learning_rate": 9.853174603174604e-06, "loss": 26.1467, "step": 25155 }, { "epoch": 598.955223880597, "grad_norm": 27.504737854003906, "learning_rate": 9.852777777777779e-06, "loss": 26.1136, "step": 25156 }, { "epoch": 598.9791044776119, "grad_norm": 24.58822250366211, "learning_rate": 9.852380952380953e-06, "loss": 25.3519, "step": 25157 }, { "epoch": 599.0, "grad_norm": 22.54213523864746, "learning_rate": 9.851984126984128e-06, "loss": 22.8534, "step": 25158 }, { "epoch": 599.0238805970149, "grad_norm": 26.073135375976562, "learning_rate": 9.851587301587302e-06, "loss": 26.3418, "step": 25159 }, { "epoch": 599.0477611940298, "grad_norm": 26.062599182128906, "learning_rate": 9.851190476190477e-06, "loss": 25.766, "step": 25160 }, { "epoch": 599.0716417910447, "grad_norm": 24.84092140197754, "learning_rate": 9.850793650793651e-06, "loss": 25.5588, "step": 25161 }, { "epoch": 599.0955223880597, "grad_norm": 27.507949829101562, "learning_rate": 9.850396825396826e-06, "loss": 25.2416, "step": 25162 }, { "epoch": 599.1194029850747, "grad_norm": 23.31418800354004, "learning_rate": 9.85e-06, "loss": 25.6784, "step": 25163 }, { "epoch": 599.1432835820896, "grad_norm": 26.86326026916504, "learning_rate": 9.849603174603175e-06, "loss": 24.9782, "step": 25164 }, { "epoch": 599.1671641791045, "grad_norm": 22.96787452697754, "learning_rate": 9.849206349206351e-06, "loss": 25.8026, "step": 25165 }, { "epoch": 599.1910447761194, "grad_norm": 29.18767738342285, "learning_rate": 9.848809523809524e-06, "loss": 25.1953, "step": 25166 }, { "epoch": 599.2149253731343, "grad_norm": 27.67605209350586, "learning_rate": 9.848412698412699e-06, "loss": 25.2273, "step": 25167 }, { "epoch": 599.2388059701492, "grad_norm": 22.301931381225586, "learning_rate": 9.848015873015873e-06, "loss": 24.5411, "step": 25168 }, { "epoch": 599.2626865671642, "grad_norm": 23.93730926513672, "learning_rate": 9.847619047619048e-06, "loss": 24.7437, "step": 25169 }, { "epoch": 599.2865671641791, "grad_norm": 27.02720832824707, "learning_rate": 9.847222222222224e-06, "loss": 24.8887, "step": 25170 }, { "epoch": 599.310447761194, "grad_norm": 27.196428298950195, "learning_rate": 9.846825396825399e-06, "loss": 25.1869, "step": 25171 }, { "epoch": 599.334328358209, "grad_norm": 26.442428588867188, "learning_rate": 9.846428571428573e-06, "loss": 24.4948, "step": 25172 }, { "epoch": 599.3582089552239, "grad_norm": 24.558757781982422, "learning_rate": 9.846031746031746e-06, "loss": 25.9516, "step": 25173 }, { "epoch": 599.3820895522388, "grad_norm": 25.334247589111328, "learning_rate": 9.84563492063492e-06, "loss": 25.4194, "step": 25174 }, { "epoch": 599.4059701492537, "grad_norm": 27.9658145904541, "learning_rate": 9.845238095238097e-06, "loss": 26.2463, "step": 25175 }, { "epoch": 599.4298507462687, "grad_norm": 22.390424728393555, "learning_rate": 9.844841269841271e-06, "loss": 25.3799, "step": 25176 }, { "epoch": 599.4537313432836, "grad_norm": 22.65277099609375, "learning_rate": 9.844444444444446e-06, "loss": 25.8985, "step": 25177 }, { "epoch": 599.4776119402985, "grad_norm": 23.889314651489258, "learning_rate": 9.84404761904762e-06, "loss": 26.4533, "step": 25178 }, { "epoch": 599.5014925373134, "grad_norm": 22.63577651977539, "learning_rate": 9.843650793650795e-06, "loss": 25.006, "step": 25179 }, { "epoch": 599.5253731343283, "grad_norm": NaN, "learning_rate": 9.843253968253968e-06, "loss": 35.1263, "step": 25180 }, { "epoch": 599.5492537313432, "grad_norm": 24.152027130126953, "learning_rate": 9.843253968253968e-06, "loss": 24.9555, "step": 25181 }, { "epoch": 599.5731343283583, "grad_norm": 20.276412963867188, "learning_rate": 9.842857142857144e-06, "loss": 25.6892, "step": 25182 }, { "epoch": 599.5970149253732, "grad_norm": 21.342002868652344, "learning_rate": 9.842460317460319e-06, "loss": 24.7717, "step": 25183 }, { "epoch": 599.6208955223881, "grad_norm": 23.860883712768555, "learning_rate": 9.842063492063493e-06, "loss": 24.8257, "step": 25184 }, { "epoch": 599.644776119403, "grad_norm": 26.820240020751953, "learning_rate": 9.841666666666668e-06, "loss": 26.3246, "step": 25185 }, { "epoch": 599.6686567164179, "grad_norm": 24.886865615844727, "learning_rate": 9.841269841269842e-06, "loss": 25.5546, "step": 25186 }, { "epoch": 599.6925373134328, "grad_norm": 27.94231414794922, "learning_rate": 9.840873015873017e-06, "loss": 26.742, "step": 25187 }, { "epoch": 599.7164179104477, "grad_norm": 21.123388290405273, "learning_rate": 9.840476190476191e-06, "loss": 25.3113, "step": 25188 }, { "epoch": 599.7402985074627, "grad_norm": 30.372873306274414, "learning_rate": 9.840079365079366e-06, "loss": 25.5324, "step": 25189 }, { "epoch": 599.7641791044776, "grad_norm": 25.508926391601562, "learning_rate": 9.83968253968254e-06, "loss": 25.8396, "step": 25190 }, { "epoch": 599.7880597014926, "grad_norm": 28.383529663085938, "learning_rate": 9.839285714285715e-06, "loss": 25.6595, "step": 25191 }, { "epoch": 599.8119402985075, "grad_norm": 26.774394989013672, "learning_rate": 9.83888888888889e-06, "loss": 26.1137, "step": 25192 }, { "epoch": 599.8358208955224, "grad_norm": 25.883054733276367, "learning_rate": 9.838492063492064e-06, "loss": 24.9768, "step": 25193 }, { "epoch": 599.8597014925373, "grad_norm": 25.622344970703125, "learning_rate": 9.838095238095238e-06, "loss": 24.9676, "step": 25194 }, { "epoch": 599.8835820895522, "grad_norm": 28.175193786621094, "learning_rate": 9.837698412698413e-06, "loss": 25.9297, "step": 25195 }, { "epoch": 599.9074626865672, "grad_norm": 23.887847900390625, "learning_rate": 9.837301587301588e-06, "loss": 25.5425, "step": 25196 }, { "epoch": 599.9313432835821, "grad_norm": 23.83967399597168, "learning_rate": 9.836904761904764e-06, "loss": 25.6778, "step": 25197 }, { "epoch": 599.955223880597, "grad_norm": 24.4652042388916, "learning_rate": 9.836507936507937e-06, "loss": 24.9608, "step": 25198 }, { "epoch": 599.9791044776119, "grad_norm": 24.708646774291992, "learning_rate": 9.836111111111111e-06, "loss": 25.6439, "step": 25199 }, { "epoch": 600.0, "grad_norm": 22.628244400024414, "learning_rate": 9.835714285714286e-06, "loss": 22.6238, "step": 25200 }, { "epoch": 600.0, "step": 25200, "total_flos": 1.2387709733236424e+18, "train_loss": 0.4282282575728401, "train_runtime": 12824.2723, "train_samples_per_second": 250.4, "train_steps_per_second": 1.965 }, { "epoch": 600.0238805970149, "grad_norm": 23.073057174682617, "learning_rate": 1e-05, "loss": 24.47, "step": 25201 }, { "epoch": 600.0477611940298, "grad_norm": Infinity, "learning_rate": 9.99960967993755e-06, "loss": 30.9551, "step": 25202 }, { "epoch": 600.0716417910447, "grad_norm": 337.4909973144531, "learning_rate": 9.99960967993755e-06, "loss": 32.3724, "step": 25203 }, { "epoch": 600.0955223880597, "grad_norm": 174.58084106445312, "learning_rate": 9.999219359875098e-06, "loss": 29.9205, "step": 25204 }, { "epoch": 600.1194029850747, "grad_norm": 90.24905395507812, "learning_rate": 9.998829039812648e-06, "loss": 28.2067, "step": 25205 }, { "epoch": 600.1432835820896, "grad_norm": 91.64534759521484, "learning_rate": 9.998438719750197e-06, "loss": 26.5233, "step": 25206 }, { "epoch": 600.1671641791045, "grad_norm": 63.391700744628906, "learning_rate": 9.998048399687746e-06, "loss": 27.6983, "step": 25207 }, { "epoch": 600.1910447761194, "grad_norm": 59.01126480102539, "learning_rate": 9.997658079625293e-06, "loss": 26.7203, "step": 25208 }, { "epoch": 600.2149253731343, "grad_norm": 72.64952087402344, "learning_rate": 9.997267759562843e-06, "loss": 26.3295, "step": 25209 }, { "epoch": 600.2388059701492, "grad_norm": 46.562774658203125, "learning_rate": 9.996877439500391e-06, "loss": 26.6047, "step": 25210 }, { "epoch": 600.2626865671642, "grad_norm": 60.67850875854492, "learning_rate": 9.99648711943794e-06, "loss": 25.9829, "step": 25211 }, { "epoch": 600.2865671641791, "grad_norm": 36.499778747558594, "learning_rate": 9.996096799375489e-06, "loss": 26.3729, "step": 25212 }, { "epoch": 600.310447761194, "grad_norm": 47.30533981323242, "learning_rate": 9.995706479313037e-06, "loss": 24.5883, "step": 25213 }, { "epoch": 600.334328358209, "grad_norm": 35.27845764160156, "learning_rate": 9.995316159250586e-06, "loss": 26.0408, "step": 25214 }, { "epoch": 600.3582089552239, "grad_norm": 30.36895179748535, "learning_rate": 9.994925839188136e-06, "loss": 25.988, "step": 25215 }, { "epoch": 600.3820895522388, "grad_norm": 43.20551300048828, "learning_rate": 9.994535519125685e-06, "loss": 25.5942, "step": 25216 }, { "epoch": 600.4059701492537, "grad_norm": 32.01940155029297, "learning_rate": 9.994145199063233e-06, "loss": 26.2761, "step": 25217 }, { "epoch": 600.4298507462687, "grad_norm": 25.979251861572266, "learning_rate": 9.99375487900078e-06, "loss": 24.9816, "step": 25218 }, { "epoch": 600.4537313432836, "grad_norm": 29.35100746154785, "learning_rate": 9.99336455893833e-06, "loss": 26.2646, "step": 25219 }, { "epoch": 600.4776119402985, "grad_norm": 25.36323356628418, "learning_rate": 9.99297423887588e-06, "loss": 25.8652, "step": 25220 }, { "epoch": 600.5014925373134, "grad_norm": 26.753320693969727, "learning_rate": 9.992583918813428e-06, "loss": 25.9317, "step": 25221 }, { "epoch": 600.5253731343283, "grad_norm": 26.48851203918457, "learning_rate": 9.992193598750977e-06, "loss": 25.161, "step": 25222 }, { "epoch": 600.5492537313432, "grad_norm": 24.544984817504883, "learning_rate": 9.991803278688525e-06, "loss": 25.6468, "step": 25223 }, { "epoch": 600.5731343283583, "grad_norm": NaN, "learning_rate": 9.991412958626074e-06, "loss": 34.9704, "step": 25224 }, { "epoch": 600.5970149253732, "grad_norm": 27.457212448120117, "learning_rate": 9.991412958626074e-06, "loss": 26.0894, "step": 25225 }, { "epoch": 600.6208955223881, "grad_norm": 23.630407333374023, "learning_rate": 9.991022638563624e-06, "loss": 25.4254, "step": 25226 }, { "epoch": 600.644776119403, "grad_norm": 25.954763412475586, "learning_rate": 9.990632318501173e-06, "loss": 25.3184, "step": 25227 }, { "epoch": 600.6686567164179, "grad_norm": 25.39680290222168, "learning_rate": 9.990241998438721e-06, "loss": 25.7438, "step": 25228 }, { "epoch": 600.6925373134328, "grad_norm": 28.862239837646484, "learning_rate": 9.989851678376268e-06, "loss": 26.3577, "step": 25229 }, { "epoch": 600.7164179104477, "grad_norm": 22.95964241027832, "learning_rate": 9.989461358313819e-06, "loss": 25.3863, "step": 25230 }, { "epoch": 600.7402985074627, "grad_norm": 23.24620819091797, "learning_rate": 9.989071038251367e-06, "loss": 25.6442, "step": 25231 }, { "epoch": 600.7641791044776, "grad_norm": 22.58769416809082, "learning_rate": 9.988680718188916e-06, "loss": 25.9007, "step": 25232 }, { "epoch": 600.7880597014926, "grad_norm": 24.49824333190918, "learning_rate": 9.988290398126464e-06, "loss": 25.1904, "step": 25233 }, { "epoch": 600.8119402985075, "grad_norm": 28.481000900268555, "learning_rate": 9.987900078064013e-06, "loss": 25.4335, "step": 25234 }, { "epoch": 600.8358208955224, "grad_norm": 27.306537628173828, "learning_rate": 9.987509758001562e-06, "loss": 26.2449, "step": 25235 }, { "epoch": 600.8597014925373, "grad_norm": 24.15239906311035, "learning_rate": 9.987119437939112e-06, "loss": 25.2776, "step": 25236 }, { "epoch": 600.8835820895522, "grad_norm": 25.032730102539062, "learning_rate": 9.98672911787666e-06, "loss": 26.0761, "step": 25237 }, { "epoch": 600.9074626865672, "grad_norm": 34.23324203491211, "learning_rate": 9.98633879781421e-06, "loss": 24.7806, "step": 25238 }, { "epoch": 600.9313432835821, "grad_norm": 24.042741775512695, "learning_rate": 9.985948477751756e-06, "loss": 25.5528, "step": 25239 }, { "epoch": 600.955223880597, "grad_norm": 29.08383560180664, "learning_rate": 9.985558157689306e-06, "loss": 26.0603, "step": 25240 }, { "epoch": 600.9791044776119, "grad_norm": 29.164005279541016, "learning_rate": 9.985167837626855e-06, "loss": 25.0459, "step": 25241 }, { "epoch": 601.0, "grad_norm": 22.892629623413086, "learning_rate": 9.984777517564404e-06, "loss": 21.7517, "step": 25242 }, { "epoch": 601.0238805970149, "grad_norm": 25.1090145111084, "learning_rate": 9.984387197501952e-06, "loss": 26.3292, "step": 25243 }, { "epoch": 601.0477611940298, "grad_norm": 26.843229293823242, "learning_rate": 9.983996877439501e-06, "loss": 25.8835, "step": 25244 }, { "epoch": 601.0716417910447, "grad_norm": 25.6456356048584, "learning_rate": 9.98360655737705e-06, "loss": 24.6888, "step": 25245 }, { "epoch": 601.0955223880597, "grad_norm": 21.013362884521484, "learning_rate": 9.9832162373146e-06, "loss": 24.9382, "step": 25246 }, { "epoch": 601.1194029850747, "grad_norm": 22.74300193786621, "learning_rate": 9.982825917252148e-06, "loss": 24.6078, "step": 25247 }, { "epoch": 601.1432835820896, "grad_norm": 27.286649703979492, "learning_rate": 9.982435597189697e-06, "loss": 25.725, "step": 25248 }, { "epoch": 601.1671641791045, "grad_norm": 22.73012924194336, "learning_rate": 9.982045277127244e-06, "loss": 25.3718, "step": 25249 }, { "epoch": 601.1910447761194, "grad_norm": 25.393972396850586, "learning_rate": 9.981654957064794e-06, "loss": 25.562, "step": 25250 }, { "epoch": 601.2149253731343, "grad_norm": 32.594261169433594, "learning_rate": 9.981264637002343e-06, "loss": 26.3896, "step": 25251 }, { "epoch": 601.2388059701492, "grad_norm": 26.182994842529297, "learning_rate": 9.980874316939891e-06, "loss": 25.5458, "step": 25252 }, { "epoch": 601.2626865671642, "grad_norm": 26.04961585998535, "learning_rate": 9.98048399687744e-06, "loss": 24.8029, "step": 25253 }, { "epoch": 601.2865671641791, "grad_norm": 32.65851593017578, "learning_rate": 9.980093676814989e-06, "loss": 26.2618, "step": 25254 }, { "epoch": 601.310447761194, "grad_norm": 27.464126586914062, "learning_rate": 9.979703356752537e-06, "loss": 25.4757, "step": 25255 }, { "epoch": 601.334328358209, "grad_norm": 21.68206214904785, "learning_rate": 9.979313036690088e-06, "loss": 25.3281, "step": 25256 }, { "epoch": 601.3582089552239, "grad_norm": 27.363697052001953, "learning_rate": 9.978922716627636e-06, "loss": 25.2848, "step": 25257 }, { "epoch": 601.3820895522388, "grad_norm": 25.779977798461914, "learning_rate": 9.978532396565185e-06, "loss": 26.2434, "step": 25258 }, { "epoch": 601.4059701492537, "grad_norm": 22.09343719482422, "learning_rate": 9.978142076502732e-06, "loss": 25.4654, "step": 25259 }, { "epoch": 601.4298507462687, "grad_norm": 30.66204833984375, "learning_rate": 9.977751756440282e-06, "loss": 25.512, "step": 25260 }, { "epoch": 601.4537313432836, "grad_norm": 27.142324447631836, "learning_rate": 9.97736143637783e-06, "loss": 25.1293, "step": 25261 }, { "epoch": 601.4776119402985, "grad_norm": 23.878868103027344, "learning_rate": 9.97697111631538e-06, "loss": 24.3574, "step": 25262 }, { "epoch": 601.5014925373134, "grad_norm": 28.65682601928711, "learning_rate": 9.976580796252928e-06, "loss": 25.6393, "step": 25263 }, { "epoch": 601.5253731343283, "grad_norm": 26.60750961303711, "learning_rate": 9.976190476190477e-06, "loss": 25.3125, "step": 25264 }, { "epoch": 601.5492537313432, "grad_norm": 23.9840030670166, "learning_rate": 9.975800156128025e-06, "loss": 25.4759, "step": 25265 }, { "epoch": 601.5731343283583, "grad_norm": 23.126829147338867, "learning_rate": 9.975409836065576e-06, "loss": 26.1646, "step": 25266 }, { "epoch": 601.5970149253732, "grad_norm": 28.92863655090332, "learning_rate": 9.975019516003124e-06, "loss": 24.2605, "step": 25267 }, { "epoch": 601.6208955223881, "grad_norm": 25.690805435180664, "learning_rate": 9.974629195940673e-06, "loss": 25.9357, "step": 25268 }, { "epoch": 601.644776119403, "grad_norm": 23.715782165527344, "learning_rate": 9.97423887587822e-06, "loss": 25.4938, "step": 25269 }, { "epoch": 601.6686567164179, "grad_norm": 21.6058292388916, "learning_rate": 9.97384855581577e-06, "loss": 25.7428, "step": 25270 }, { "epoch": 601.6925373134328, "grad_norm": 31.098527908325195, "learning_rate": 9.973458235753319e-06, "loss": 25.5548, "step": 25271 }, { "epoch": 601.7164179104477, "grad_norm": 23.623291015625, "learning_rate": 9.973067915690867e-06, "loss": 26.0281, "step": 25272 }, { "epoch": 601.7402985074627, "grad_norm": 23.31321144104004, "learning_rate": 9.972677595628416e-06, "loss": 25.6486, "step": 25273 }, { "epoch": 601.7641791044776, "grad_norm": 20.67629623413086, "learning_rate": 9.972287275565964e-06, "loss": 24.8319, "step": 25274 }, { "epoch": 601.7880597014926, "grad_norm": 23.897207260131836, "learning_rate": 9.971896955503513e-06, "loss": 25.312, "step": 25275 }, { "epoch": 601.8119402985075, "grad_norm": 22.303232192993164, "learning_rate": 9.971506635441063e-06, "loss": 25.8765, "step": 25276 }, { "epoch": 601.8358208955224, "grad_norm": 24.765939712524414, "learning_rate": 9.971116315378612e-06, "loss": 25.3122, "step": 25277 }, { "epoch": 601.8597014925373, "grad_norm": 26.297208786010742, "learning_rate": 9.97072599531616e-06, "loss": 25.8374, "step": 25278 }, { "epoch": 601.8835820895522, "grad_norm": 24.237567901611328, "learning_rate": 9.970335675253708e-06, "loss": 24.6975, "step": 25279 }, { "epoch": 601.9074626865672, "grad_norm": 25.387493133544922, "learning_rate": 9.969945355191258e-06, "loss": 25.8755, "step": 25280 }, { "epoch": 601.9313432835821, "grad_norm": 24.752119064331055, "learning_rate": 9.969555035128806e-06, "loss": 25.9075, "step": 25281 }, { "epoch": 601.955223880597, "grad_norm": 21.772205352783203, "learning_rate": 9.969164715066355e-06, "loss": 25.342, "step": 25282 }, { "epoch": 601.9791044776119, "grad_norm": 22.90241241455078, "learning_rate": 9.968774395003904e-06, "loss": 25.3921, "step": 25283 }, { "epoch": 602.0, "grad_norm": 23.35023307800293, "learning_rate": 9.968384074941452e-06, "loss": 21.6328, "step": 25284 }, { "epoch": 602.0238805970149, "grad_norm": NaN, "learning_rate": 9.967993754879003e-06, "loss": 22.8121, "step": 25285 }, { "epoch": 602.0477611940298, "grad_norm": 24.78314208984375, "learning_rate": 9.967993754879003e-06, "loss": 24.9306, "step": 25286 }, { "epoch": 602.0716417910447, "grad_norm": 21.92508316040039, "learning_rate": 9.967603434816551e-06, "loss": 24.2665, "step": 25287 }, { "epoch": 602.0955223880597, "grad_norm": 29.315948486328125, "learning_rate": 9.9672131147541e-06, "loss": 25.2374, "step": 25288 }, { "epoch": 602.1194029850747, "grad_norm": 29.42236328125, "learning_rate": 9.966822794691648e-06, "loss": 24.9918, "step": 25289 }, { "epoch": 602.1432835820896, "grad_norm": 22.185544967651367, "learning_rate": 9.966432474629197e-06, "loss": 24.0583, "step": 25290 }, { "epoch": 602.1671641791045, "grad_norm": 30.228050231933594, "learning_rate": 9.966042154566746e-06, "loss": 25.1459, "step": 25291 }, { "epoch": 602.1910447761194, "grad_norm": 32.263851165771484, "learning_rate": 9.965651834504294e-06, "loss": 24.5795, "step": 25292 }, { "epoch": 602.2149253731343, "grad_norm": 21.5002384185791, "learning_rate": 9.965261514441843e-06, "loss": 24.5242, "step": 25293 }, { "epoch": 602.2388059701492, "grad_norm": 32.42888259887695, "learning_rate": 9.964871194379392e-06, "loss": 26.0043, "step": 25294 }, { "epoch": 602.2626865671642, "grad_norm": 24.42958641052246, "learning_rate": 9.96448087431694e-06, "loss": 24.8578, "step": 25295 }, { "epoch": 602.2865671641791, "grad_norm": 25.789329528808594, "learning_rate": 9.96409055425449e-06, "loss": 25.4749, "step": 25296 }, { "epoch": 602.310447761194, "grad_norm": 33.564353942871094, "learning_rate": 9.963700234192039e-06, "loss": 26.0309, "step": 25297 }, { "epoch": 602.334328358209, "grad_norm": 25.389881134033203, "learning_rate": 9.963309914129588e-06, "loss": 26.8327, "step": 25298 }, { "epoch": 602.3582089552239, "grad_norm": 30.222270965576172, "learning_rate": 9.962919594067136e-06, "loss": 24.9734, "step": 25299 }, { "epoch": 602.3820895522388, "grad_norm": 26.240842819213867, "learning_rate": 9.962529274004685e-06, "loss": 25.2605, "step": 25300 }, { "epoch": 602.4059701492537, "grad_norm": 31.5482120513916, "learning_rate": 9.962138953942234e-06, "loss": 26.1579, "step": 25301 }, { "epoch": 602.4298507462687, "grad_norm": 26.31661033630371, "learning_rate": 9.961748633879782e-06, "loss": 25.4614, "step": 25302 }, { "epoch": 602.4537313432836, "grad_norm": 22.539907455444336, "learning_rate": 9.96135831381733e-06, "loss": 25.4925, "step": 25303 }, { "epoch": 602.4776119402985, "grad_norm": 32.85356140136719, "learning_rate": 9.96096799375488e-06, "loss": 26.6173, "step": 25304 }, { "epoch": 602.5014925373134, "grad_norm": 23.269336700439453, "learning_rate": 9.960577673692428e-06, "loss": 25.7646, "step": 25305 }, { "epoch": 602.5253731343283, "grad_norm": 29.837646484375, "learning_rate": 9.960187353629978e-06, "loss": 26.2866, "step": 25306 }, { "epoch": 602.5492537313432, "grad_norm": 29.720081329345703, "learning_rate": 9.959797033567527e-06, "loss": 24.2604, "step": 25307 }, { "epoch": 602.5731343283583, "grad_norm": 28.570817947387695, "learning_rate": 9.959406713505076e-06, "loss": 26.0141, "step": 25308 }, { "epoch": 602.5970149253732, "grad_norm": 23.962709426879883, "learning_rate": 9.959016393442624e-06, "loss": 25.1983, "step": 25309 }, { "epoch": 602.6208955223881, "grad_norm": 27.109094619750977, "learning_rate": 9.958626073380173e-06, "loss": 25.1074, "step": 25310 }, { "epoch": 602.644776119403, "grad_norm": 28.446969985961914, "learning_rate": 9.958235753317721e-06, "loss": 25.9575, "step": 25311 }, { "epoch": 602.6686567164179, "grad_norm": 23.37527084350586, "learning_rate": 9.95784543325527e-06, "loss": 26.0034, "step": 25312 }, { "epoch": 602.6925373134328, "grad_norm": 25.87799644470215, "learning_rate": 9.957455113192819e-06, "loss": 25.5864, "step": 25313 }, { "epoch": 602.7164179104477, "grad_norm": 22.242685317993164, "learning_rate": 9.957064793130367e-06, "loss": 25.3962, "step": 25314 }, { "epoch": 602.7402985074627, "grad_norm": 29.238134384155273, "learning_rate": 9.956674473067916e-06, "loss": 25.146, "step": 25315 }, { "epoch": 602.7641791044776, "grad_norm": 21.910146713256836, "learning_rate": 9.956284153005466e-06, "loss": 24.9752, "step": 25316 }, { "epoch": 602.7880597014926, "grad_norm": 29.16541290283203, "learning_rate": 9.955893832943015e-06, "loss": 26.0432, "step": 25317 }, { "epoch": 602.8119402985075, "grad_norm": 23.731882095336914, "learning_rate": 9.955503512880563e-06, "loss": 25.6971, "step": 25318 }, { "epoch": 602.8358208955224, "grad_norm": 25.641420364379883, "learning_rate": 9.955113192818112e-06, "loss": 25.8792, "step": 25319 }, { "epoch": 602.8597014925373, "grad_norm": 23.287992477416992, "learning_rate": 9.95472287275566e-06, "loss": 24.9486, "step": 25320 }, { "epoch": 602.8835820895522, "grad_norm": 22.729612350463867, "learning_rate": 9.95433255269321e-06, "loss": 25.3627, "step": 25321 }, { "epoch": 602.9074626865672, "grad_norm": 26.697757720947266, "learning_rate": 9.953942232630758e-06, "loss": 24.9195, "step": 25322 }, { "epoch": 602.9313432835821, "grad_norm": 23.083293914794922, "learning_rate": 9.953551912568307e-06, "loss": 24.9879, "step": 25323 }, { "epoch": 602.955223880597, "grad_norm": 28.775314331054688, "learning_rate": 9.953161592505855e-06, "loss": 25.7734, "step": 25324 }, { "epoch": 602.9791044776119, "grad_norm": 21.337034225463867, "learning_rate": 9.952771272443404e-06, "loss": 25.5818, "step": 25325 }, { "epoch": 603.0, "grad_norm": 26.069002151489258, "learning_rate": 9.952380952380954e-06, "loss": 21.9452, "step": 25326 }, { "epoch": 603.0238805970149, "grad_norm": 26.105669021606445, "learning_rate": 9.951990632318503e-06, "loss": 25.6587, "step": 25327 }, { "epoch": 603.0477611940298, "grad_norm": 26.580331802368164, "learning_rate": 9.951600312256051e-06, "loss": 24.8188, "step": 25328 }, { "epoch": 603.0716417910447, "grad_norm": 25.72101402282715, "learning_rate": 9.9512099921936e-06, "loss": 24.9504, "step": 25329 }, { "epoch": 603.0955223880597, "grad_norm": 25.49736213684082, "learning_rate": 9.950819672131149e-06, "loss": 25.6547, "step": 25330 }, { "epoch": 603.1194029850747, "grad_norm": 26.611846923828125, "learning_rate": 9.950429352068697e-06, "loss": 25.1895, "step": 25331 }, { "epoch": 603.1432835820896, "grad_norm": 27.15961265563965, "learning_rate": 9.950039032006246e-06, "loss": 25.065, "step": 25332 }, { "epoch": 603.1671641791045, "grad_norm": 25.270959854125977, "learning_rate": 9.949648711943794e-06, "loss": 24.2858, "step": 25333 }, { "epoch": 603.1910447761194, "grad_norm": 23.885774612426758, "learning_rate": 9.949258391881343e-06, "loss": 24.9663, "step": 25334 }, { "epoch": 603.2149253731343, "grad_norm": 28.08515739440918, "learning_rate": 9.948868071818892e-06, "loss": 25.0734, "step": 25335 }, { "epoch": 603.2388059701492, "grad_norm": 22.54444122314453, "learning_rate": 9.948477751756442e-06, "loss": 25.2646, "step": 25336 }, { "epoch": 603.2626865671642, "grad_norm": 25.082895278930664, "learning_rate": 9.94808743169399e-06, "loss": 24.9976, "step": 25337 }, { "epoch": 603.2865671641791, "grad_norm": 21.055728912353516, "learning_rate": 9.947697111631539e-06, "loss": 25.034, "step": 25338 }, { "epoch": 603.310447761194, "grad_norm": 22.95960807800293, "learning_rate": 9.947306791569088e-06, "loss": 25.9014, "step": 25339 }, { "epoch": 603.334328358209, "grad_norm": 26.127471923828125, "learning_rate": 9.946916471506636e-06, "loss": 25.38, "step": 25340 }, { "epoch": 603.3582089552239, "grad_norm": 28.119775772094727, "learning_rate": 9.946526151444185e-06, "loss": 25.2657, "step": 25341 }, { "epoch": 603.3820895522388, "grad_norm": 31.674331665039062, "learning_rate": 9.946135831381734e-06, "loss": 24.5059, "step": 25342 }, { "epoch": 603.4059701492537, "grad_norm": 21.247745513916016, "learning_rate": 9.945745511319282e-06, "loss": 25.8477, "step": 25343 }, { "epoch": 603.4298507462687, "grad_norm": 24.3935489654541, "learning_rate": 9.945355191256831e-06, "loss": 25.1651, "step": 25344 }, { "epoch": 603.4537313432836, "grad_norm": 31.470129013061523, "learning_rate": 9.94496487119438e-06, "loss": 25.4226, "step": 25345 }, { "epoch": 603.4776119402985, "grad_norm": 26.692007064819336, "learning_rate": 9.94457455113193e-06, "loss": 25.696, "step": 25346 }, { "epoch": 603.5014925373134, "grad_norm": 21.69451332092285, "learning_rate": 9.944184231069478e-06, "loss": 25.8647, "step": 25347 }, { "epoch": 603.5253731343283, "grad_norm": 31.942323684692383, "learning_rate": 9.943793911007027e-06, "loss": 25.6661, "step": 25348 }, { "epoch": 603.5492537313432, "grad_norm": 28.34360694885254, "learning_rate": 9.943403590944576e-06, "loss": 25.3782, "step": 25349 }, { "epoch": 603.5731343283583, "grad_norm": 24.63170623779297, "learning_rate": 9.943013270882124e-06, "loss": 26.0342, "step": 25350 }, { "epoch": 603.5970149253732, "grad_norm": 36.260284423828125, "learning_rate": 9.942622950819673e-06, "loss": 25.7649, "step": 25351 }, { "epoch": 603.6208955223881, "grad_norm": 28.36099624633789, "learning_rate": 9.942232630757221e-06, "loss": 25.0328, "step": 25352 }, { "epoch": 603.644776119403, "grad_norm": 24.43423843383789, "learning_rate": 9.94184231069477e-06, "loss": 25.0307, "step": 25353 }, { "epoch": 603.6686567164179, "grad_norm": 26.7183895111084, "learning_rate": 9.941451990632319e-06, "loss": 25.4882, "step": 25354 }, { "epoch": 603.6925373134328, "grad_norm": 32.023895263671875, "learning_rate": 9.941061670569867e-06, "loss": 25.6517, "step": 25355 }, { "epoch": 603.7164179104477, "grad_norm": 20.21428871154785, "learning_rate": 9.940671350507418e-06, "loss": 25.5365, "step": 25356 }, { "epoch": 603.7402985074627, "grad_norm": 26.70046043395996, "learning_rate": 9.940281030444966e-06, "loss": 25.0257, "step": 25357 }, { "epoch": 603.7641791044776, "grad_norm": 33.28660583496094, "learning_rate": 9.939890710382515e-06, "loss": 26.3886, "step": 25358 }, { "epoch": 603.7880597014926, "grad_norm": 23.459604263305664, "learning_rate": 9.939500390320063e-06, "loss": 25.6913, "step": 25359 }, { "epoch": 603.8119402985075, "grad_norm": 21.47509765625, "learning_rate": 9.939110070257612e-06, "loss": 24.3408, "step": 25360 }, { "epoch": 603.8358208955224, "grad_norm": 32.901588439941406, "learning_rate": 9.93871975019516e-06, "loss": 25.8797, "step": 25361 }, { "epoch": 603.8597014925373, "grad_norm": 27.954391479492188, "learning_rate": 9.93832943013271e-06, "loss": 25.7333, "step": 25362 }, { "epoch": 603.8835820895522, "grad_norm": 22.767715454101562, "learning_rate": 9.937939110070258e-06, "loss": 26.1302, "step": 25363 }, { "epoch": 603.9074626865672, "grad_norm": 28.76542091369629, "learning_rate": 9.937548790007807e-06, "loss": 25.5724, "step": 25364 }, { "epoch": 603.9313432835821, "grad_norm": 29.49973487854004, "learning_rate": 9.937158469945357e-06, "loss": 25.1673, "step": 25365 }, { "epoch": 603.955223880597, "grad_norm": 22.6486759185791, "learning_rate": 9.936768149882905e-06, "loss": 25.3602, "step": 25366 }, { "epoch": 603.9791044776119, "grad_norm": 24.762557983398438, "learning_rate": 9.936377829820454e-06, "loss": 24.9197, "step": 25367 }, { "epoch": 604.0, "grad_norm": 24.34908676147461, "learning_rate": 9.935987509758003e-06, "loss": 22.877, "step": 25368 }, { "epoch": 604.0238805970149, "grad_norm": 24.768648147583008, "learning_rate": 9.935597189695551e-06, "loss": 25.8962, "step": 25369 }, { "epoch": 604.0477611940298, "grad_norm": 23.82158660888672, "learning_rate": 9.9352068696331e-06, "loss": 25.7049, "step": 25370 }, { "epoch": 604.0716417910447, "grad_norm": 21.150848388671875, "learning_rate": 9.934816549570649e-06, "loss": 26.079, "step": 25371 }, { "epoch": 604.0955223880597, "grad_norm": 23.54196548461914, "learning_rate": 9.934426229508197e-06, "loss": 25.6496, "step": 25372 }, { "epoch": 604.1194029850747, "grad_norm": 23.79225730895996, "learning_rate": 9.934035909445746e-06, "loss": 25.4849, "step": 25373 }, { "epoch": 604.1432835820896, "grad_norm": 23.08950424194336, "learning_rate": 9.933645589383294e-06, "loss": 25.172, "step": 25374 }, { "epoch": 604.1671641791045, "grad_norm": 19.915943145751953, "learning_rate": 9.933255269320845e-06, "loss": 25.6702, "step": 25375 }, { "epoch": 604.1910447761194, "grad_norm": 27.851221084594727, "learning_rate": 9.932864949258393e-06, "loss": 24.1238, "step": 25376 }, { "epoch": 604.2149253731343, "grad_norm": 20.97184181213379, "learning_rate": 9.932474629195942e-06, "loss": 24.8914, "step": 25377 }, { "epoch": 604.2388059701492, "grad_norm": 30.542709350585938, "learning_rate": 9.93208430913349e-06, "loss": 25.5489, "step": 25378 }, { "epoch": 604.2626865671642, "grad_norm": 26.838359832763672, "learning_rate": 9.93169398907104e-06, "loss": 25.9372, "step": 25379 }, { "epoch": 604.2865671641791, "grad_norm": 23.38106918334961, "learning_rate": 9.931303669008588e-06, "loss": 26.2913, "step": 25380 }, { "epoch": 604.310447761194, "grad_norm": 23.594844818115234, "learning_rate": 9.930913348946136e-06, "loss": 24.453, "step": 25381 }, { "epoch": 604.334328358209, "grad_norm": 28.93843650817871, "learning_rate": 9.930523028883685e-06, "loss": 25.9981, "step": 25382 }, { "epoch": 604.3582089552239, "grad_norm": 24.465314865112305, "learning_rate": 9.930132708821234e-06, "loss": 25.6178, "step": 25383 }, { "epoch": 604.3820895522388, "grad_norm": 23.687274932861328, "learning_rate": 9.929742388758782e-06, "loss": 25.8336, "step": 25384 }, { "epoch": 604.4059701492537, "grad_norm": 34.38788986206055, "learning_rate": 9.929352068696333e-06, "loss": 25.2804, "step": 25385 }, { "epoch": 604.4298507462687, "grad_norm": 23.750019073486328, "learning_rate": 9.928961748633881e-06, "loss": 24.784, "step": 25386 }, { "epoch": 604.4537313432836, "grad_norm": 23.371463775634766, "learning_rate": 9.92857142857143e-06, "loss": 25.8545, "step": 25387 }, { "epoch": 604.4776119402985, "grad_norm": 32.78957748413086, "learning_rate": 9.928181108508978e-06, "loss": 26.0861, "step": 25388 }, { "epoch": 604.5014925373134, "grad_norm": 24.566471099853516, "learning_rate": 9.927790788446527e-06, "loss": 25.4662, "step": 25389 }, { "epoch": 604.5253731343283, "grad_norm": 25.741518020629883, "learning_rate": 9.927400468384076e-06, "loss": 25.9837, "step": 25390 }, { "epoch": 604.5492537313432, "grad_norm": 28.28160285949707, "learning_rate": 9.927010148321624e-06, "loss": 25.0581, "step": 25391 }, { "epoch": 604.5731343283583, "grad_norm": 25.738037109375, "learning_rate": 9.926619828259173e-06, "loss": 24.6654, "step": 25392 }, { "epoch": 604.5970149253732, "grad_norm": 21.11688232421875, "learning_rate": 9.926229508196722e-06, "loss": 25.7802, "step": 25393 }, { "epoch": 604.6208955223881, "grad_norm": 26.948467254638672, "learning_rate": 9.92583918813427e-06, "loss": 25.8682, "step": 25394 }, { "epoch": 604.644776119403, "grad_norm": 28.640905380249023, "learning_rate": 9.92544886807182e-06, "loss": 25.224, "step": 25395 }, { "epoch": 604.6686567164179, "grad_norm": 24.635211944580078, "learning_rate": 9.925058548009369e-06, "loss": 24.8489, "step": 25396 }, { "epoch": 604.6925373134328, "grad_norm": 22.29700469970703, "learning_rate": 9.924668227946918e-06, "loss": 25.3874, "step": 25397 }, { "epoch": 604.7164179104477, "grad_norm": 22.53318214416504, "learning_rate": 9.924277907884466e-06, "loss": 25.3039, "step": 25398 }, { "epoch": 604.7402985074627, "grad_norm": 22.219810485839844, "learning_rate": 9.923887587822015e-06, "loss": 24.6556, "step": 25399 }, { "epoch": 604.7641791044776, "grad_norm": 27.302358627319336, "learning_rate": 9.923497267759564e-06, "loss": 25.0738, "step": 25400 }, { "epoch": 604.7880597014926, "grad_norm": 24.54825210571289, "learning_rate": 9.923106947697112e-06, "loss": 25.7797, "step": 25401 }, { "epoch": 604.8119402985075, "grad_norm": 22.452058792114258, "learning_rate": 9.92271662763466e-06, "loss": 24.502, "step": 25402 }, { "epoch": 604.8358208955224, "grad_norm": 21.530773162841797, "learning_rate": 9.92232630757221e-06, "loss": 24.2042, "step": 25403 }, { "epoch": 604.8597014925373, "grad_norm": 24.979511260986328, "learning_rate": 9.921935987509758e-06, "loss": 25.6647, "step": 25404 }, { "epoch": 604.8835820895522, "grad_norm": 23.81471824645996, "learning_rate": 9.921545667447308e-06, "loss": 24.3325, "step": 25405 }, { "epoch": 604.9074626865672, "grad_norm": 22.6749324798584, "learning_rate": 9.921155347384857e-06, "loss": 25.6678, "step": 25406 }, { "epoch": 604.9313432835821, "grad_norm": 20.739168167114258, "learning_rate": 9.920765027322406e-06, "loss": 25.2294, "step": 25407 }, { "epoch": 604.955223880597, "grad_norm": 24.252458572387695, "learning_rate": 9.920374707259954e-06, "loss": 25.2469, "step": 25408 }, { "epoch": 604.9791044776119, "grad_norm": 25.795175552368164, "learning_rate": 9.919984387197503e-06, "loss": 25.3455, "step": 25409 }, { "epoch": 605.0, "grad_norm": 21.93946647644043, "learning_rate": 9.919594067135051e-06, "loss": 22.077, "step": 25410 }, { "epoch": 605.0238805970149, "grad_norm": 24.054697036743164, "learning_rate": 9.9192037470726e-06, "loss": 24.3907, "step": 25411 }, { "epoch": 605.0477611940298, "grad_norm": 19.69785499572754, "learning_rate": 9.918813427010149e-06, "loss": 25.6029, "step": 25412 }, { "epoch": 605.0716417910447, "grad_norm": 23.090755462646484, "learning_rate": 9.918423106947697e-06, "loss": 26.0986, "step": 25413 }, { "epoch": 605.0955223880597, "grad_norm": 22.396129608154297, "learning_rate": 9.918032786885246e-06, "loss": 26.0728, "step": 25414 }, { "epoch": 605.1194029850747, "grad_norm": 24.90531349182129, "learning_rate": 9.917642466822796e-06, "loss": 25.8323, "step": 25415 }, { "epoch": 605.1432835820896, "grad_norm": 21.791433334350586, "learning_rate": 9.917252146760345e-06, "loss": 25.2666, "step": 25416 }, { "epoch": 605.1671641791045, "grad_norm": 22.720182418823242, "learning_rate": 9.916861826697893e-06, "loss": 24.6951, "step": 25417 }, { "epoch": 605.1910447761194, "grad_norm": 22.602121353149414, "learning_rate": 9.916471506635442e-06, "loss": 25.7815, "step": 25418 }, { "epoch": 605.2149253731343, "grad_norm": 29.794843673706055, "learning_rate": 9.91608118657299e-06, "loss": 24.3196, "step": 25419 }, { "epoch": 605.2388059701492, "grad_norm": 27.180782318115234, "learning_rate": 9.91569086651054e-06, "loss": 25.4764, "step": 25420 }, { "epoch": 605.2626865671642, "grad_norm": 21.611343383789062, "learning_rate": 9.915300546448088e-06, "loss": 25.5835, "step": 25421 }, { "epoch": 605.2865671641791, "grad_norm": 19.98012351989746, "learning_rate": 9.914910226385636e-06, "loss": 24.8438, "step": 25422 }, { "epoch": 605.310447761194, "grad_norm": 22.28644371032715, "learning_rate": 9.914519906323185e-06, "loss": 26.4991, "step": 25423 }, { "epoch": 605.334328358209, "grad_norm": 20.963783264160156, "learning_rate": 9.914129586260734e-06, "loss": 24.2675, "step": 25424 }, { "epoch": 605.3582089552239, "grad_norm": 20.81569480895996, "learning_rate": 9.913739266198284e-06, "loss": 24.9756, "step": 25425 }, { "epoch": 605.3820895522388, "grad_norm": 22.248781204223633, "learning_rate": 9.913348946135833e-06, "loss": 25.2955, "step": 25426 }, { "epoch": 605.4059701492537, "grad_norm": 25.36286163330078, "learning_rate": 9.912958626073381e-06, "loss": 25.7043, "step": 25427 }, { "epoch": 605.4298507462687, "grad_norm": 22.679466247558594, "learning_rate": 9.91256830601093e-06, "loss": 24.6004, "step": 25428 }, { "epoch": 605.4537313432836, "grad_norm": 25.45107078552246, "learning_rate": 9.912177985948479e-06, "loss": 24.5775, "step": 25429 }, { "epoch": 605.4776119402985, "grad_norm": 22.04071044921875, "learning_rate": 9.911787665886027e-06, "loss": 25.6196, "step": 25430 }, { "epoch": 605.5014925373134, "grad_norm": 24.541973114013672, "learning_rate": 9.911397345823576e-06, "loss": 25.4959, "step": 25431 }, { "epoch": 605.5253731343283, "grad_norm": 24.244237899780273, "learning_rate": 9.911007025761124e-06, "loss": 25.0749, "step": 25432 }, { "epoch": 605.5492537313432, "grad_norm": 24.483753204345703, "learning_rate": 9.910616705698673e-06, "loss": 25.1984, "step": 25433 }, { "epoch": 605.5731343283583, "grad_norm": 23.428966522216797, "learning_rate": 9.910226385636222e-06, "loss": 24.7594, "step": 25434 }, { "epoch": 605.5970149253732, "grad_norm": 24.966047286987305, "learning_rate": 9.909836065573772e-06, "loss": 26.1009, "step": 25435 }, { "epoch": 605.6208955223881, "grad_norm": 29.053747177124023, "learning_rate": 9.90944574551132e-06, "loss": 25.1132, "step": 25436 }, { "epoch": 605.644776119403, "grad_norm": 31.83943748474121, "learning_rate": 9.909055425448869e-06, "loss": 26.0321, "step": 25437 }, { "epoch": 605.6686567164179, "grad_norm": 23.647754669189453, "learning_rate": 9.908665105386418e-06, "loss": 25.5087, "step": 25438 }, { "epoch": 605.6925373134328, "grad_norm": 33.11470031738281, "learning_rate": 9.908274785323966e-06, "loss": 25.326, "step": 25439 }, { "epoch": 605.7164179104477, "grad_norm": 29.39484405517578, "learning_rate": 9.907884465261515e-06, "loss": 25.2832, "step": 25440 }, { "epoch": 605.7402985074627, "grad_norm": 23.177297592163086, "learning_rate": 9.907494145199064e-06, "loss": 25.0774, "step": 25441 }, { "epoch": 605.7641791044776, "grad_norm": 30.19684410095215, "learning_rate": 9.907103825136612e-06, "loss": 25.143, "step": 25442 }, { "epoch": 605.7880597014926, "grad_norm": 27.684965133666992, "learning_rate": 9.90671350507416e-06, "loss": 24.6438, "step": 25443 }, { "epoch": 605.8119402985075, "grad_norm": 21.740859985351562, "learning_rate": 9.906323185011711e-06, "loss": 25.9214, "step": 25444 }, { "epoch": 605.8358208955224, "grad_norm": 22.596054077148438, "learning_rate": 9.90593286494926e-06, "loss": 24.506, "step": 25445 }, { "epoch": 605.8597014925373, "grad_norm": 23.899045944213867, "learning_rate": 9.905542544886808e-06, "loss": 25.9311, "step": 25446 }, { "epoch": 605.8835820895522, "grad_norm": 26.7908878326416, "learning_rate": 9.905152224824357e-06, "loss": 26.0751, "step": 25447 }, { "epoch": 605.9074626865672, "grad_norm": 26.55275535583496, "learning_rate": 9.904761904761906e-06, "loss": 25.1235, "step": 25448 }, { "epoch": 605.9313432835821, "grad_norm": 24.953807830810547, "learning_rate": 9.904371584699454e-06, "loss": 26.256, "step": 25449 }, { "epoch": 605.955223880597, "grad_norm": 22.06060028076172, "learning_rate": 9.903981264637003e-06, "loss": 24.7696, "step": 25450 }, { "epoch": 605.9791044776119, "grad_norm": 22.635913848876953, "learning_rate": 9.903590944574551e-06, "loss": 25.7477, "step": 25451 }, { "epoch": 606.0, "grad_norm": 22.171985626220703, "learning_rate": 9.9032006245121e-06, "loss": 21.7339, "step": 25452 }, { "epoch": 606.0238805970149, "grad_norm": 23.75839614868164, "learning_rate": 9.902810304449649e-06, "loss": 26.0003, "step": 25453 }, { "epoch": 606.0477611940298, "grad_norm": 22.36174774169922, "learning_rate": 9.902419984387199e-06, "loss": 25.2082, "step": 25454 }, { "epoch": 606.0716417910447, "grad_norm": 26.380908966064453, "learning_rate": 9.902029664324748e-06, "loss": 24.987, "step": 25455 }, { "epoch": 606.0955223880597, "grad_norm": 24.884618759155273, "learning_rate": 9.901639344262296e-06, "loss": 25.2254, "step": 25456 }, { "epoch": 606.1194029850747, "grad_norm": 24.45013999938965, "learning_rate": 9.901249024199845e-06, "loss": 25.535, "step": 25457 }, { "epoch": 606.1432835820896, "grad_norm": 22.078426361083984, "learning_rate": 9.900858704137393e-06, "loss": 24.3858, "step": 25458 }, { "epoch": 606.1671641791045, "grad_norm": 21.113130569458008, "learning_rate": 9.900468384074942e-06, "loss": 25.9577, "step": 25459 }, { "epoch": 606.1910447761194, "grad_norm": 26.67395782470703, "learning_rate": 9.90007806401249e-06, "loss": 24.3968, "step": 25460 }, { "epoch": 606.2149253731343, "grad_norm": 29.51879119873047, "learning_rate": 9.89968774395004e-06, "loss": 25.9138, "step": 25461 }, { "epoch": 606.2388059701492, "grad_norm": 19.34926986694336, "learning_rate": 9.899297423887588e-06, "loss": 24.6674, "step": 25462 }, { "epoch": 606.2626865671642, "grad_norm": 36.34192657470703, "learning_rate": 9.898907103825137e-06, "loss": 25.8715, "step": 25463 }, { "epoch": 606.2865671641791, "grad_norm": 29.10862159729004, "learning_rate": 9.898516783762687e-06, "loss": 25.1501, "step": 25464 }, { "epoch": 606.310447761194, "grad_norm": 26.004749298095703, "learning_rate": 9.898126463700235e-06, "loss": 25.1524, "step": 25465 }, { "epoch": 606.334328358209, "grad_norm": 32.490718841552734, "learning_rate": 9.897736143637784e-06, "loss": 25.9564, "step": 25466 }, { "epoch": 606.3582089552239, "grad_norm": 32.59740447998047, "learning_rate": 9.897345823575333e-06, "loss": 24.8277, "step": 25467 }, { "epoch": 606.3820895522388, "grad_norm": 27.013883590698242, "learning_rate": 9.896955503512881e-06, "loss": 25.8106, "step": 25468 }, { "epoch": 606.4059701492537, "grad_norm": 24.20184898376465, "learning_rate": 9.89656518345043e-06, "loss": 25.2283, "step": 25469 }, { "epoch": 606.4298507462687, "grad_norm": 30.53286361694336, "learning_rate": 9.89617486338798e-06, "loss": 26.3797, "step": 25470 }, { "epoch": 606.4537313432836, "grad_norm": 25.33515739440918, "learning_rate": 9.895784543325527e-06, "loss": 24.3347, "step": 25471 }, { "epoch": 606.4776119402985, "grad_norm": 26.192121505737305, "learning_rate": 9.895394223263076e-06, "loss": 24.9707, "step": 25472 }, { "epoch": 606.5014925373134, "grad_norm": 25.44937515258789, "learning_rate": 9.895003903200624e-06, "loss": 25.9869, "step": 25473 }, { "epoch": 606.5253731343283, "grad_norm": 23.086130142211914, "learning_rate": 9.894613583138175e-06, "loss": 24.18, "step": 25474 }, { "epoch": 606.5492537313432, "grad_norm": 27.953718185424805, "learning_rate": 9.894223263075723e-06, "loss": 25.2677, "step": 25475 }, { "epoch": 606.5731343283583, "grad_norm": 27.436752319335938, "learning_rate": 9.893832943013272e-06, "loss": 26.1839, "step": 25476 }, { "epoch": 606.5970149253732, "grad_norm": 20.546165466308594, "learning_rate": 9.89344262295082e-06, "loss": 25.2743, "step": 25477 }, { "epoch": 606.6208955223881, "grad_norm": 26.954730987548828, "learning_rate": 9.89305230288837e-06, "loss": 24.2864, "step": 25478 }, { "epoch": 606.644776119403, "grad_norm": 28.78386878967285, "learning_rate": 9.892661982825918e-06, "loss": 25.3638, "step": 25479 }, { "epoch": 606.6686567164179, "grad_norm": 24.949113845825195, "learning_rate": 9.892271662763468e-06, "loss": 25.34, "step": 25480 }, { "epoch": 606.6925373134328, "grad_norm": 21.693727493286133, "learning_rate": 9.891881342701015e-06, "loss": 25.6599, "step": 25481 }, { "epoch": 606.7164179104477, "grad_norm": 23.004314422607422, "learning_rate": 9.891491022638564e-06, "loss": 24.8117, "step": 25482 }, { "epoch": 606.7402985074627, "grad_norm": 22.43446159362793, "learning_rate": 9.891100702576112e-06, "loss": 24.9404, "step": 25483 }, { "epoch": 606.7641791044776, "grad_norm": 30.287145614624023, "learning_rate": 9.890710382513663e-06, "loss": 25.5859, "step": 25484 }, { "epoch": 606.7880597014926, "grad_norm": 26.567161560058594, "learning_rate": 9.890320062451211e-06, "loss": 25.6725, "step": 25485 }, { "epoch": 606.8119402985075, "grad_norm": 18.930328369140625, "learning_rate": 9.88992974238876e-06, "loss": 25.0928, "step": 25486 }, { "epoch": 606.8358208955224, "grad_norm": 24.11359214782715, "learning_rate": 9.889539422326308e-06, "loss": 25.2193, "step": 25487 }, { "epoch": 606.8597014925373, "grad_norm": 35.72883224487305, "learning_rate": 9.889149102263857e-06, "loss": 25.6212, "step": 25488 }, { "epoch": 606.8835820895522, "grad_norm": 23.187461853027344, "learning_rate": 9.888758782201406e-06, "loss": 26.0305, "step": 25489 }, { "epoch": 606.9074626865672, "grad_norm": 22.7213191986084, "learning_rate": 9.888368462138956e-06, "loss": 24.6629, "step": 25490 }, { "epoch": 606.9313432835821, "grad_norm": 28.147794723510742, "learning_rate": 9.887978142076503e-06, "loss": 25.6639, "step": 25491 }, { "epoch": 606.955223880597, "grad_norm": 26.872093200683594, "learning_rate": 9.887587822014052e-06, "loss": 24.7394, "step": 25492 }, { "epoch": 606.9791044776119, "grad_norm": 25.364891052246094, "learning_rate": 9.8871975019516e-06, "loss": 25.5847, "step": 25493 }, { "epoch": 607.0, "grad_norm": 20.620765686035156, "learning_rate": 9.88680718188915e-06, "loss": 21.7132, "step": 25494 }, { "epoch": 607.0238805970149, "grad_norm": 22.88150978088379, "learning_rate": 9.886416861826699e-06, "loss": 26.6486, "step": 25495 }, { "epoch": 607.0477611940298, "grad_norm": 20.45751190185547, "learning_rate": 9.886026541764248e-06, "loss": 26.1807, "step": 25496 }, { "epoch": 607.0716417910447, "grad_norm": 25.948596954345703, "learning_rate": 9.885636221701796e-06, "loss": 25.2907, "step": 25497 }, { "epoch": 607.0955223880597, "grad_norm": 24.330801010131836, "learning_rate": 9.885245901639345e-06, "loss": 25.377, "step": 25498 }, { "epoch": 607.1194029850747, "grad_norm": 22.788928985595703, "learning_rate": 9.884855581576894e-06, "loss": 24.9426, "step": 25499 }, { "epoch": 607.1432835820896, "grad_norm": 23.00371551513672, "learning_rate": 9.884465261514444e-06, "loss": 25.1052, "step": 25500 }, { "epoch": 607.1671641791045, "grad_norm": 23.77033805847168, "learning_rate": 9.88407494145199e-06, "loss": 25.0581, "step": 25501 }, { "epoch": 607.1910447761194, "grad_norm": 22.43863868713379, "learning_rate": 9.88368462138954e-06, "loss": 24.9577, "step": 25502 }, { "epoch": 607.2149253731343, "grad_norm": 25.590076446533203, "learning_rate": 9.883294301327088e-06, "loss": 25.615, "step": 25503 }, { "epoch": 607.2388059701492, "grad_norm": 21.24906349182129, "learning_rate": 9.882903981264638e-06, "loss": 25.1388, "step": 25504 }, { "epoch": 607.2626865671642, "grad_norm": 29.904142379760742, "learning_rate": 9.882513661202187e-06, "loss": 24.7442, "step": 25505 }, { "epoch": 607.2865671641791, "grad_norm": 32.56971740722656, "learning_rate": 9.882123341139736e-06, "loss": 25.4384, "step": 25506 }, { "epoch": 607.310447761194, "grad_norm": 27.28400230407715, "learning_rate": 9.881733021077284e-06, "loss": 25.5275, "step": 25507 }, { "epoch": 607.334328358209, "grad_norm": 26.52834701538086, "learning_rate": 9.881342701014833e-06, "loss": 24.996, "step": 25508 }, { "epoch": 607.3582089552239, "grad_norm": 40.56324768066406, "learning_rate": 9.880952380952381e-06, "loss": 24.8566, "step": 25509 }, { "epoch": 607.3820895522388, "grad_norm": 26.332483291625977, "learning_rate": 9.880562060889932e-06, "loss": 24.1649, "step": 25510 }, { "epoch": 607.4059701492537, "grad_norm": 40.1367073059082, "learning_rate": 9.880171740827479e-06, "loss": 24.7971, "step": 25511 }, { "epoch": 607.4298507462687, "grad_norm": 35.280818939208984, "learning_rate": 9.879781420765027e-06, "loss": 26.3821, "step": 25512 }, { "epoch": 607.4537313432836, "grad_norm": 29.55442237854004, "learning_rate": 9.879391100702576e-06, "loss": 24.4943, "step": 25513 }, { "epoch": 607.4776119402985, "grad_norm": 37.04280090332031, "learning_rate": 9.879000780640126e-06, "loss": 25.14, "step": 25514 }, { "epoch": 607.5014925373134, "grad_norm": 24.97962760925293, "learning_rate": 9.878610460577675e-06, "loss": 25.0595, "step": 25515 }, { "epoch": 607.5253731343283, "grad_norm": 41.6199951171875, "learning_rate": 9.878220140515223e-06, "loss": 26.1297, "step": 25516 }, { "epoch": 607.5492537313432, "grad_norm": 30.03373146057129, "learning_rate": 9.877829820452772e-06, "loss": 24.6604, "step": 25517 }, { "epoch": 607.5731343283583, "grad_norm": 36.85375213623047, "learning_rate": 9.87743950039032e-06, "loss": 24.6415, "step": 25518 }, { "epoch": 607.5970149253732, "grad_norm": 33.74565505981445, "learning_rate": 9.87704918032787e-06, "loss": 25.0959, "step": 25519 }, { "epoch": 607.6208955223881, "grad_norm": 25.890426635742188, "learning_rate": 9.87665886026542e-06, "loss": 25.1889, "step": 25520 }, { "epoch": 607.644776119403, "grad_norm": 45.339988708496094, "learning_rate": 9.876268540202966e-06, "loss": 25.1377, "step": 25521 }, { "epoch": 607.6686567164179, "grad_norm": 29.542776107788086, "learning_rate": 9.875878220140515e-06, "loss": 24.4035, "step": 25522 }, { "epoch": 607.6925373134328, "grad_norm": 36.981197357177734, "learning_rate": 9.875487900078065e-06, "loss": 25.7365, "step": 25523 }, { "epoch": 607.7164179104477, "grad_norm": 30.78512954711914, "learning_rate": 9.875097580015614e-06, "loss": 24.926, "step": 25524 }, { "epoch": 607.7402985074627, "grad_norm": 30.746496200561523, "learning_rate": 9.874707259953163e-06, "loss": 25.7196, "step": 25525 }, { "epoch": 607.7641791044776, "grad_norm": 43.969093322753906, "learning_rate": 9.874316939890711e-06, "loss": 25.7208, "step": 25526 }, { "epoch": 607.7880597014926, "grad_norm": 29.186382293701172, "learning_rate": 9.87392661982826e-06, "loss": 25.1007, "step": 25527 }, { "epoch": 607.8119402985075, "grad_norm": 47.83816146850586, "learning_rate": 9.873536299765808e-06, "loss": 25.2476, "step": 25528 }, { "epoch": 607.8358208955224, "grad_norm": 32.961116790771484, "learning_rate": 9.873145979703359e-06, "loss": 26.2056, "step": 25529 }, { "epoch": 607.8597014925373, "grad_norm": 56.277008056640625, "learning_rate": 9.872755659640907e-06, "loss": 25.6692, "step": 25530 }, { "epoch": 607.8835820895522, "grad_norm": 36.40987014770508, "learning_rate": 9.872365339578454e-06, "loss": 24.3047, "step": 25531 }, { "epoch": 607.9074626865672, "grad_norm": 63.646644592285156, "learning_rate": 9.871975019516003e-06, "loss": 25.3312, "step": 25532 }, { "epoch": 607.9313432835821, "grad_norm": 67.00436401367188, "learning_rate": 9.871584699453553e-06, "loss": 26.0302, "step": 25533 }, { "epoch": 607.955223880597, "grad_norm": 29.919971466064453, "learning_rate": 9.871194379391102e-06, "loss": 25.1985, "step": 25534 }, { "epoch": 607.9791044776119, "grad_norm": 39.47930145263672, "learning_rate": 9.87080405932865e-06, "loss": 24.8802, "step": 25535 }, { "epoch": 608.0, "grad_norm": 30.086702346801758, "learning_rate": 9.870413739266199e-06, "loss": 22.7714, "step": 25536 }, { "epoch": 608.0238805970149, "grad_norm": 28.491975784301758, "learning_rate": 9.870023419203748e-06, "loss": 25.0503, "step": 25537 }, { "epoch": 608.0477611940298, "grad_norm": 45.581581115722656, "learning_rate": 9.869633099141296e-06, "loss": 25.129, "step": 25538 }, { "epoch": 608.0716417910447, "grad_norm": 35.86201477050781, "learning_rate": 9.869242779078847e-06, "loss": 24.9986, "step": 25539 }, { "epoch": 608.0955223880597, "grad_norm": 55.162071228027344, "learning_rate": 9.868852459016395e-06, "loss": 24.7016, "step": 25540 }, { "epoch": 608.1194029850747, "grad_norm": 46.01496887207031, "learning_rate": 9.868462138953942e-06, "loss": 24.7492, "step": 25541 }, { "epoch": 608.1432835820896, "grad_norm": 46.031089782714844, "learning_rate": 9.86807181889149e-06, "loss": 25.2049, "step": 25542 }, { "epoch": 608.1671641791045, "grad_norm": 45.283119201660156, "learning_rate": 9.867681498829041e-06, "loss": 25.6378, "step": 25543 }, { "epoch": 608.1910447761194, "grad_norm": 42.441585540771484, "learning_rate": 9.86729117876659e-06, "loss": 24.713, "step": 25544 }, { "epoch": 608.2149253731343, "grad_norm": 36.776153564453125, "learning_rate": 9.866900858704138e-06, "loss": 25.344, "step": 25545 }, { "epoch": 608.2388059701492, "grad_norm": 48.09455871582031, "learning_rate": 9.866510538641687e-06, "loss": 25.5968, "step": 25546 }, { "epoch": 608.2626865671642, "grad_norm": 40.65133285522461, "learning_rate": 9.866120218579236e-06, "loss": 24.5276, "step": 25547 }, { "epoch": 608.2865671641791, "grad_norm": 48.02949142456055, "learning_rate": 9.865729898516784e-06, "loss": 24.7639, "step": 25548 }, { "epoch": 608.310447761194, "grad_norm": 46.6374397277832, "learning_rate": 9.865339578454335e-06, "loss": 25.0713, "step": 25549 }, { "epoch": 608.334328358209, "grad_norm": 38.023075103759766, "learning_rate": 9.864949258391883e-06, "loss": 25.8499, "step": 25550 }, { "epoch": 608.3582089552239, "grad_norm": 34.73591613769531, "learning_rate": 9.86455893832943e-06, "loss": 24.6451, "step": 25551 }, { "epoch": 608.3820895522388, "grad_norm": 43.980899810791016, "learning_rate": 9.864168618266979e-06, "loss": 24.3878, "step": 25552 }, { "epoch": 608.4059701492537, "grad_norm": 39.1434211730957, "learning_rate": 9.863778298204529e-06, "loss": 24.842, "step": 25553 }, { "epoch": 608.4298507462687, "grad_norm": 43.52046203613281, "learning_rate": 9.863387978142078e-06, "loss": 25.8955, "step": 25554 }, { "epoch": 608.4537313432836, "grad_norm": 42.185794830322266, "learning_rate": 9.862997658079626e-06, "loss": 24.3269, "step": 25555 }, { "epoch": 608.4776119402985, "grad_norm": 42.85014724731445, "learning_rate": 9.862607338017175e-06, "loss": 25.4997, "step": 25556 }, { "epoch": 608.5014925373134, "grad_norm": 37.37519073486328, "learning_rate": 9.862217017954723e-06, "loss": 25.0605, "step": 25557 }, { "epoch": 608.5253731343283, "grad_norm": 38.054290771484375, "learning_rate": 9.861826697892272e-06, "loss": 24.9323, "step": 25558 }, { "epoch": 608.5492537313432, "grad_norm": 30.635469436645508, "learning_rate": 9.861436377829822e-06, "loss": 23.9498, "step": 25559 }, { "epoch": 608.5731343283583, "grad_norm": 48.011474609375, "learning_rate": 9.861046057767371e-06, "loss": 24.6117, "step": 25560 }, { "epoch": 608.5970149253732, "grad_norm": 37.32808303833008, "learning_rate": 9.860655737704918e-06, "loss": 26.4729, "step": 25561 }, { "epoch": 608.6208955223881, "grad_norm": 42.975257873535156, "learning_rate": 9.860265417642467e-06, "loss": 24.9591, "step": 25562 }, { "epoch": 608.644776119403, "grad_norm": 40.93128967285156, "learning_rate": 9.859875097580017e-06, "loss": 24.3708, "step": 25563 }, { "epoch": 608.6686567164179, "grad_norm": 43.22646713256836, "learning_rate": 9.859484777517565e-06, "loss": 26.1691, "step": 25564 }, { "epoch": 608.6925373134328, "grad_norm": 37.55772018432617, "learning_rate": 9.859094457455114e-06, "loss": 24.5051, "step": 25565 }, { "epoch": 608.7164179104477, "grad_norm": 42.93619918823242, "learning_rate": 9.858704137392663e-06, "loss": 26.0061, "step": 25566 }, { "epoch": 608.7402985074627, "grad_norm": 36.41124725341797, "learning_rate": 9.858313817330211e-06, "loss": 25.8298, "step": 25567 }, { "epoch": 608.7641791044776, "grad_norm": 43.60440444946289, "learning_rate": 9.85792349726776e-06, "loss": 25.7044, "step": 25568 }, { "epoch": 608.7880597014926, "grad_norm": 36.794044494628906, "learning_rate": 9.85753317720531e-06, "loss": 25.307, "step": 25569 }, { "epoch": 608.8119402985075, "grad_norm": 38.97314453125, "learning_rate": 9.857142857142859e-06, "loss": 26.3901, "step": 25570 }, { "epoch": 608.8358208955224, "grad_norm": 34.44176483154297, "learning_rate": 9.856752537080406e-06, "loss": 25.8168, "step": 25571 }, { "epoch": 608.8597014925373, "grad_norm": 37.483360290527344, "learning_rate": 9.856362217017954e-06, "loss": 25.8442, "step": 25572 }, { "epoch": 608.8835820895522, "grad_norm": 30.55111312866211, "learning_rate": 9.855971896955505e-06, "loss": 24.749, "step": 25573 }, { "epoch": 608.9074626865672, "grad_norm": 38.54761505126953, "learning_rate": 9.855581576893053e-06, "loss": 23.953, "step": 25574 }, { "epoch": 608.9313432835821, "grad_norm": 29.73417091369629, "learning_rate": 9.855191256830602e-06, "loss": 25.5851, "step": 25575 }, { "epoch": 608.955223880597, "grad_norm": 41.780548095703125, "learning_rate": 9.85480093676815e-06, "loss": 26.1442, "step": 25576 }, { "epoch": 608.9791044776119, "grad_norm": 31.17083740234375, "learning_rate": 9.8544106167057e-06, "loss": 25.3258, "step": 25577 }, { "epoch": 609.0, "grad_norm": 33.97274398803711, "learning_rate": 9.854020296643248e-06, "loss": 22.779, "step": 25578 }, { "epoch": 609.0238805970149, "grad_norm": 32.742393493652344, "learning_rate": 9.853629976580798e-06, "loss": 25.3953, "step": 25579 }, { "epoch": 609.0477611940298, "grad_norm": 39.73828887939453, "learning_rate": 9.853239656518347e-06, "loss": 24.0822, "step": 25580 }, { "epoch": 609.0716417910447, "grad_norm": 30.852848052978516, "learning_rate": 9.852849336455894e-06, "loss": 23.9897, "step": 25581 }, { "epoch": 609.0955223880597, "grad_norm": 39.87790298461914, "learning_rate": 9.852459016393442e-06, "loss": 24.9162, "step": 25582 }, { "epoch": 609.1194029850747, "grad_norm": 35.04434585571289, "learning_rate": 9.852068696330993e-06, "loss": 25.454, "step": 25583 }, { "epoch": 609.1432835820896, "grad_norm": 34.69393539428711, "learning_rate": 9.851678376268541e-06, "loss": 25.3237, "step": 25584 }, { "epoch": 609.1671641791045, "grad_norm": 33.38497543334961, "learning_rate": 9.85128805620609e-06, "loss": 24.7944, "step": 25585 }, { "epoch": 609.1910447761194, "grad_norm": 35.11175537109375, "learning_rate": 9.850897736143638e-06, "loss": 25.3514, "step": 25586 }, { "epoch": 609.2149253731343, "grad_norm": 29.96240997314453, "learning_rate": 9.850507416081187e-06, "loss": 25.5848, "step": 25587 }, { "epoch": 609.2388059701492, "grad_norm": 33.54533004760742, "learning_rate": 9.850117096018736e-06, "loss": 25.2976, "step": 25588 }, { "epoch": 609.2626865671642, "grad_norm": 29.880247116088867, "learning_rate": 9.849726775956286e-06, "loss": 26.0233, "step": 25589 }, { "epoch": 609.2865671641791, "grad_norm": 32.85261154174805, "learning_rate": 9.849336455893835e-06, "loss": 24.8787, "step": 25590 }, { "epoch": 609.310447761194, "grad_norm": 27.391265869140625, "learning_rate": 9.848946135831381e-06, "loss": 24.4917, "step": 25591 }, { "epoch": 609.334328358209, "grad_norm": 36.940452575683594, "learning_rate": 9.84855581576893e-06, "loss": 25.7634, "step": 25592 }, { "epoch": 609.3582089552239, "grad_norm": 27.73214340209961, "learning_rate": 9.84816549570648e-06, "loss": 24.7708, "step": 25593 }, { "epoch": 609.3820895522388, "grad_norm": 40.06153869628906, "learning_rate": 9.847775175644029e-06, "loss": 24.6121, "step": 25594 }, { "epoch": 609.4059701492537, "grad_norm": 33.15467071533203, "learning_rate": 9.847384855581578e-06, "loss": 25.7712, "step": 25595 }, { "epoch": 609.4298507462687, "grad_norm": 38.94450378417969, "learning_rate": 9.846994535519126e-06, "loss": 25.7544, "step": 25596 }, { "epoch": 609.4537313432836, "grad_norm": 38.363895416259766, "learning_rate": 9.846604215456675e-06, "loss": 24.9173, "step": 25597 }, { "epoch": 609.4776119402985, "grad_norm": 30.43694496154785, "learning_rate": 9.846213895394223e-06, "loss": 26.4114, "step": 25598 }, { "epoch": 609.5014925373134, "grad_norm": 31.585241317749023, "learning_rate": 9.845823575331774e-06, "loss": 25.154, "step": 25599 }, { "epoch": 609.5253731343283, "grad_norm": 30.759761810302734, "learning_rate": 9.845433255269322e-06, "loss": 25.9474, "step": 25600 }, { "epoch": 609.5492537313432, "grad_norm": 35.551387786865234, "learning_rate": 9.84504293520687e-06, "loss": 24.8467, "step": 25601 }, { "epoch": 609.5731343283583, "grad_norm": 27.29801368713379, "learning_rate": 9.84465261514442e-06, "loss": 25.5874, "step": 25602 }, { "epoch": 609.5970149253732, "grad_norm": 35.65155792236328, "learning_rate": 9.844262295081968e-06, "loss": 24.9885, "step": 25603 }, { "epoch": 609.6208955223881, "grad_norm": 31.137264251708984, "learning_rate": 9.843871975019517e-06, "loss": 25.2915, "step": 25604 }, { "epoch": 609.644776119403, "grad_norm": 24.855215072631836, "learning_rate": 9.843481654957066e-06, "loss": 25.6261, "step": 25605 }, { "epoch": 609.6686567164179, "grad_norm": 32.134132385253906, "learning_rate": 9.843091334894614e-06, "loss": 24.8965, "step": 25606 }, { "epoch": 609.6925373134328, "grad_norm": 23.48820686340332, "learning_rate": 9.842701014832163e-06, "loss": 25.3864, "step": 25607 }, { "epoch": 609.7164179104477, "grad_norm": 35.26432418823242, "learning_rate": 9.842310694769713e-06, "loss": 24.2943, "step": 25608 }, { "epoch": 609.7402985074627, "grad_norm": 27.209312438964844, "learning_rate": 9.841920374707262e-06, "loss": 24.4714, "step": 25609 }, { "epoch": 609.7641791044776, "grad_norm": 36.613521575927734, "learning_rate": 9.84153005464481e-06, "loss": 24.6335, "step": 25610 }, { "epoch": 609.7880597014926, "grad_norm": 31.906259536743164, "learning_rate": 9.841139734582357e-06, "loss": 26.3843, "step": 25611 }, { "epoch": 609.8119402985075, "grad_norm": 31.463607788085938, "learning_rate": 9.840749414519908e-06, "loss": 25.7246, "step": 25612 }, { "epoch": 609.8358208955224, "grad_norm": 28.88726234436035, "learning_rate": 9.840359094457456e-06, "loss": 24.3507, "step": 25613 }, { "epoch": 609.8597014925373, "grad_norm": 30.97195816040039, "learning_rate": 9.839968774395005e-06, "loss": 25.7659, "step": 25614 }, { "epoch": 609.8835820895522, "grad_norm": 25.57737159729004, "learning_rate": 9.839578454332553e-06, "loss": 25.5268, "step": 25615 }, { "epoch": 609.9074626865672, "grad_norm": 33.579124450683594, "learning_rate": 9.839188134270102e-06, "loss": 24.1534, "step": 25616 }, { "epoch": 609.9313432835821, "grad_norm": 26.089975357055664, "learning_rate": 9.83879781420765e-06, "loss": 24.9026, "step": 25617 }, { "epoch": 609.955223880597, "grad_norm": NaN, "learning_rate": 9.838407494145201e-06, "loss": 43.9617, "step": 25618 }, { "epoch": 609.9791044776119, "grad_norm": 29.04308319091797, "learning_rate": 9.838407494145201e-06, "loss": 25.5953, "step": 25619 }, { "epoch": 610.0, "grad_norm": 26.338823318481445, "learning_rate": 9.83801717408275e-06, "loss": 23.1293, "step": 25620 }, { "epoch": 610.0, "step": 25620, "total_flos": 1.2594273123352143e+18, "train_loss": 0.416262620841033, "train_runtime": 12834.8706, "train_samples_per_second": 254.363, "train_steps_per_second": 1.996 }, { "epoch": 610.0238805970149, "grad_norm": 26.39642333984375, "learning_rate": 1e-05, "loss": 25.3171, "step": 25621 }, { "epoch": 610.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999615975422428e-06, "loss": 31.312, "step": 25622 }, { "epoch": 610.0716417910447, "grad_norm": 309.4290771484375, "learning_rate": 9.999615975422428e-06, "loss": 32.1714, "step": 25623 }, { "epoch": 610.0955223880597, "grad_norm": 159.0628204345703, "learning_rate": 9.999231950844855e-06, "loss": 29.2898, "step": 25624 }, { "epoch": 610.1194029850747, "grad_norm": 107.7866439819336, "learning_rate": 9.998847926267282e-06, "loss": 28.2585, "step": 25625 }, { "epoch": 610.1432835820896, "grad_norm": 94.59452056884766, "learning_rate": 9.99846390168971e-06, "loss": 25.8618, "step": 25626 }, { "epoch": 610.1671641791045, "grad_norm": 62.13037109375, "learning_rate": 9.998079877112135e-06, "loss": 25.5489, "step": 25627 }, { "epoch": 610.1910447761194, "grad_norm": 64.3987045288086, "learning_rate": 9.997695852534564e-06, "loss": 25.1758, "step": 25628 }, { "epoch": 610.2149253731343, "grad_norm": 61.516014099121094, "learning_rate": 9.99731182795699e-06, "loss": 25.7476, "step": 25629 }, { "epoch": 610.2388059701492, "grad_norm": 44.50913619995117, "learning_rate": 9.996927803379417e-06, "loss": 25.6829, "step": 25630 }, { "epoch": 610.2626865671642, "grad_norm": 48.24209213256836, "learning_rate": 9.996543778801844e-06, "loss": 26.8866, "step": 25631 }, { "epoch": 610.2865671641791, "grad_norm": 45.295318603515625, "learning_rate": 9.996159754224271e-06, "loss": 26.4253, "step": 25632 }, { "epoch": 610.310447761194, "grad_norm": 33.64358139038086, "learning_rate": 9.995775729646698e-06, "loss": 26.0992, "step": 25633 }, { "epoch": 610.334328358209, "grad_norm": 34.28087615966797, "learning_rate": 9.995391705069125e-06, "loss": 25.6483, "step": 25634 }, { "epoch": 610.3582089552239, "grad_norm": 44.77628707885742, "learning_rate": 9.995007680491553e-06, "loss": 25.0555, "step": 25635 }, { "epoch": 610.3820895522388, "grad_norm": 31.71843719482422, "learning_rate": 9.99462365591398e-06, "loss": 26.1006, "step": 25636 }, { "epoch": 610.4059701492537, "grad_norm": 33.89853286743164, "learning_rate": 9.994239631336407e-06, "loss": 26.2409, "step": 25637 }, { "epoch": 610.4298507462687, "grad_norm": 36.94978713989258, "learning_rate": 9.993855606758833e-06, "loss": 24.9622, "step": 25638 }, { "epoch": 610.4537313432836, "grad_norm": 26.379831314086914, "learning_rate": 9.99347158218126e-06, "loss": 24.3259, "step": 25639 }, { "epoch": 610.4776119402985, "grad_norm": 31.460132598876953, "learning_rate": 9.993087557603689e-06, "loss": 26.536, "step": 25640 }, { "epoch": 610.5014925373134, "grad_norm": 32.3940315246582, "learning_rate": 9.992703533026114e-06, "loss": 24.9, "step": 25641 }, { "epoch": 610.5253731343283, "grad_norm": 29.971956253051758, "learning_rate": 9.992319508448541e-06, "loss": 25.0432, "step": 25642 }, { "epoch": 610.5492537313432, "grad_norm": 26.527297973632812, "learning_rate": 9.991935483870968e-06, "loss": 25.5842, "step": 25643 }, { "epoch": 610.5731343283583, "grad_norm": 27.106637954711914, "learning_rate": 9.991551459293396e-06, "loss": 25.1456, "step": 25644 }, { "epoch": 610.5970149253732, "grad_norm": 32.90644454956055, "learning_rate": 9.991167434715823e-06, "loss": 25.513, "step": 25645 }, { "epoch": 610.6208955223881, "grad_norm": 22.6689510345459, "learning_rate": 9.99078341013825e-06, "loss": 24.2313, "step": 25646 }, { "epoch": 610.644776119403, "grad_norm": 26.570556640625, "learning_rate": 9.990399385560676e-06, "loss": 25.9447, "step": 25647 }, { "epoch": 610.6686567164179, "grad_norm": 25.21335792541504, "learning_rate": 9.990015360983104e-06, "loss": 24.9702, "step": 25648 }, { "epoch": 610.6925373134328, "grad_norm": 30.679506301879883, "learning_rate": 9.989631336405532e-06, "loss": 26.0709, "step": 25649 }, { "epoch": 610.7164179104477, "grad_norm": 24.535852432250977, "learning_rate": 9.989247311827957e-06, "loss": 25.9287, "step": 25650 }, { "epoch": 610.7402985074627, "grad_norm": 26.010141372680664, "learning_rate": 9.988863287250384e-06, "loss": 25.1345, "step": 25651 }, { "epoch": 610.7641791044776, "grad_norm": 25.797096252441406, "learning_rate": 9.988479262672812e-06, "loss": 24.5339, "step": 25652 }, { "epoch": 610.7880597014926, "grad_norm": 26.222095489501953, "learning_rate": 9.988095238095239e-06, "loss": 24.6427, "step": 25653 }, { "epoch": 610.8119402985075, "grad_norm": 25.174264907836914, "learning_rate": 9.987711213517666e-06, "loss": 25.1054, "step": 25654 }, { "epoch": 610.8358208955224, "grad_norm": 24.023622512817383, "learning_rate": 9.987327188940093e-06, "loss": 25.5762, "step": 25655 }, { "epoch": 610.8597014925373, "grad_norm": 23.0316104888916, "learning_rate": 9.98694316436252e-06, "loss": 24.9728, "step": 25656 }, { "epoch": 610.8835820895522, "grad_norm": 23.167949676513672, "learning_rate": 9.986559139784947e-06, "loss": 25.9061, "step": 25657 }, { "epoch": 610.9074626865672, "grad_norm": 23.63789939880371, "learning_rate": 9.986175115207373e-06, "loss": 25.6914, "step": 25658 }, { "epoch": 610.9313432835821, "grad_norm": 21.00596046447754, "learning_rate": 9.985791090629802e-06, "loss": 25.4782, "step": 25659 }, { "epoch": 610.955223880597, "grad_norm": 24.41181182861328, "learning_rate": 9.985407066052229e-06, "loss": 25.1093, "step": 25660 }, { "epoch": 610.9791044776119, "grad_norm": 24.943506240844727, "learning_rate": 9.985023041474655e-06, "loss": 25.0664, "step": 25661 }, { "epoch": 611.0, "grad_norm": 24.367935180664062, "learning_rate": 9.984639016897082e-06, "loss": 21.9733, "step": 25662 }, { "epoch": 611.0238805970149, "grad_norm": 27.567567825317383, "learning_rate": 9.98425499231951e-06, "loss": 25.3815, "step": 25663 }, { "epoch": 611.0477611940298, "grad_norm": 20.51948356628418, "learning_rate": 9.983870967741936e-06, "loss": 24.3226, "step": 25664 }, { "epoch": 611.0716417910447, "grad_norm": 23.099084854125977, "learning_rate": 9.983486943164363e-06, "loss": 25.2221, "step": 25665 }, { "epoch": 611.0955223880597, "grad_norm": 21.180395126342773, "learning_rate": 9.98310291858679e-06, "loss": 26.3877, "step": 25666 }, { "epoch": 611.1194029850747, "grad_norm": 23.44700813293457, "learning_rate": 9.982718894009218e-06, "loss": 25.8687, "step": 25667 }, { "epoch": 611.1432835820896, "grad_norm": 21.925050735473633, "learning_rate": 9.982334869431645e-06, "loss": 24.9464, "step": 25668 }, { "epoch": 611.1671641791045, "grad_norm": 21.687721252441406, "learning_rate": 9.981950844854072e-06, "loss": 25.0728, "step": 25669 }, { "epoch": 611.1910447761194, "grad_norm": 22.469396591186523, "learning_rate": 9.981566820276498e-06, "loss": 25.1482, "step": 25670 }, { "epoch": 611.2149253731343, "grad_norm": 21.204984664916992, "learning_rate": 9.981182795698926e-06, "loss": 25.0368, "step": 25671 }, { "epoch": 611.2388059701492, "grad_norm": 26.084671020507812, "learning_rate": 9.980798771121352e-06, "loss": 25.1276, "step": 25672 }, { "epoch": 611.2626865671642, "grad_norm": 23.70815658569336, "learning_rate": 9.98041474654378e-06, "loss": 24.7399, "step": 25673 }, { "epoch": 611.2865671641791, "grad_norm": 27.56496238708496, "learning_rate": 9.980030721966206e-06, "loss": 25.32, "step": 25674 }, { "epoch": 611.310447761194, "grad_norm": 24.951242446899414, "learning_rate": 9.979646697388634e-06, "loss": 25.3563, "step": 25675 }, { "epoch": 611.334328358209, "grad_norm": 19.4776554107666, "learning_rate": 9.97926267281106e-06, "loss": 25.4425, "step": 25676 }, { "epoch": 611.3582089552239, "grad_norm": 25.940279006958008, "learning_rate": 9.978878648233488e-06, "loss": 24.9546, "step": 25677 }, { "epoch": 611.3820895522388, "grad_norm": 28.162708282470703, "learning_rate": 9.978494623655915e-06, "loss": 25.3697, "step": 25678 }, { "epoch": 611.4059701492537, "grad_norm": 28.221967697143555, "learning_rate": 9.978110599078342e-06, "loss": 24.77, "step": 25679 }, { "epoch": 611.4298507462687, "grad_norm": 25.061935424804688, "learning_rate": 9.97772657450077e-06, "loss": 24.1835, "step": 25680 }, { "epoch": 611.4537313432836, "grad_norm": 24.419485092163086, "learning_rate": 9.977342549923195e-06, "loss": 24.8014, "step": 25681 }, { "epoch": 611.4776119402985, "grad_norm": 30.198593139648438, "learning_rate": 9.976958525345622e-06, "loss": 24.8441, "step": 25682 }, { "epoch": 611.5014925373134, "grad_norm": 25.07428741455078, "learning_rate": 9.976574500768051e-06, "loss": 25.762, "step": 25683 }, { "epoch": 611.5253731343283, "grad_norm": 23.741594314575195, "learning_rate": 9.976190476190477e-06, "loss": 26.7813, "step": 25684 }, { "epoch": 611.5492537313432, "grad_norm": 22.810001373291016, "learning_rate": 9.975806451612904e-06, "loss": 25.2584, "step": 25685 }, { "epoch": 611.5731343283583, "grad_norm": 26.202970504760742, "learning_rate": 9.975422427035331e-06, "loss": 23.7902, "step": 25686 }, { "epoch": 611.5970149253732, "grad_norm": 30.7081298828125, "learning_rate": 9.975038402457758e-06, "loss": 24.9684, "step": 25687 }, { "epoch": 611.6208955223881, "grad_norm": 22.706403732299805, "learning_rate": 9.974654377880185e-06, "loss": 24.4078, "step": 25688 }, { "epoch": 611.644776119403, "grad_norm": 19.941679000854492, "learning_rate": 9.974270353302613e-06, "loss": 24.8527, "step": 25689 }, { "epoch": 611.6686567164179, "grad_norm": 23.745763778686523, "learning_rate": 9.973886328725038e-06, "loss": 25.1575, "step": 25690 }, { "epoch": 611.6925373134328, "grad_norm": 21.63069725036621, "learning_rate": 9.973502304147467e-06, "loss": 25.1039, "step": 25691 }, { "epoch": 611.7164179104477, "grad_norm": 29.761520385742188, "learning_rate": 9.973118279569894e-06, "loss": 25.1675, "step": 25692 }, { "epoch": 611.7402985074627, "grad_norm": 23.274063110351562, "learning_rate": 9.97273425499232e-06, "loss": 25.3472, "step": 25693 }, { "epoch": 611.7641791044776, "grad_norm": 23.35721206665039, "learning_rate": 9.972350230414747e-06, "loss": 25.6053, "step": 25694 }, { "epoch": 611.7880597014926, "grad_norm": 21.578100204467773, "learning_rate": 9.971966205837174e-06, "loss": 24.6398, "step": 25695 }, { "epoch": 611.8119402985075, "grad_norm": 21.768587112426758, "learning_rate": 9.971582181259601e-06, "loss": 25.1454, "step": 25696 }, { "epoch": 611.8358208955224, "grad_norm": 30.734169006347656, "learning_rate": 9.971198156682028e-06, "loss": 25.418, "step": 25697 }, { "epoch": 611.8597014925373, "grad_norm": 25.426969528198242, "learning_rate": 9.970814132104456e-06, "loss": 25.1583, "step": 25698 }, { "epoch": 611.8835820895522, "grad_norm": 20.37607192993164, "learning_rate": 9.970430107526883e-06, "loss": 24.8014, "step": 25699 }, { "epoch": 611.9074626865672, "grad_norm": 23.506933212280273, "learning_rate": 9.97004608294931e-06, "loss": 25.4396, "step": 25700 }, { "epoch": 611.9313432835821, "grad_norm": 28.65123176574707, "learning_rate": 9.969662058371735e-06, "loss": 25.2095, "step": 25701 }, { "epoch": 611.955223880597, "grad_norm": NaN, "learning_rate": 9.969278033794164e-06, "loss": 22.0065, "step": 25702 }, { "epoch": 611.9791044776119, "grad_norm": 21.61505126953125, "learning_rate": 9.969278033794164e-06, "loss": 25.6607, "step": 25703 }, { "epoch": 612.0, "grad_norm": 24.227508544921875, "learning_rate": 9.968894009216592e-06, "loss": 23.6654, "step": 25704 }, { "epoch": 612.0238805970149, "grad_norm": 32.30353927612305, "learning_rate": 9.968509984639017e-06, "loss": 24.8336, "step": 25705 }, { "epoch": 612.0477611940298, "grad_norm": 22.874370574951172, "learning_rate": 9.968125960061444e-06, "loss": 25.1942, "step": 25706 }, { "epoch": 612.0716417910447, "grad_norm": 22.680349349975586, "learning_rate": 9.967741935483871e-06, "loss": 25.5204, "step": 25707 }, { "epoch": 612.0955223880597, "grad_norm": 31.924339294433594, "learning_rate": 9.967357910906299e-06, "loss": 24.5853, "step": 25708 }, { "epoch": 612.1194029850747, "grad_norm": 27.711156845092773, "learning_rate": 9.966973886328726e-06, "loss": 24.4867, "step": 25709 }, { "epoch": 612.1432835820896, "grad_norm": 21.649337768554688, "learning_rate": 9.966589861751153e-06, "loss": 24.9042, "step": 25710 }, { "epoch": 612.1671641791045, "grad_norm": 28.23440170288086, "learning_rate": 9.96620583717358e-06, "loss": 24.8428, "step": 25711 }, { "epoch": 612.1910447761194, "grad_norm": 30.537273406982422, "learning_rate": 9.965821812596007e-06, "loss": 24.5187, "step": 25712 }, { "epoch": 612.2149253731343, "grad_norm": 20.512601852416992, "learning_rate": 9.965437788018435e-06, "loss": 25.8371, "step": 25713 }, { "epoch": 612.2388059701492, "grad_norm": 28.07915496826172, "learning_rate": 9.96505376344086e-06, "loss": 25.1439, "step": 25714 }, { "epoch": 612.2626865671642, "grad_norm": 31.754671096801758, "learning_rate": 9.964669738863289e-06, "loss": 25.7853, "step": 25715 }, { "epoch": 612.2865671641791, "grad_norm": 21.81911277770996, "learning_rate": 9.964285714285714e-06, "loss": 24.3309, "step": 25716 }, { "epoch": 612.310447761194, "grad_norm": 27.207365036010742, "learning_rate": 9.963901689708142e-06, "loss": 24.2833, "step": 25717 }, { "epoch": 612.334328358209, "grad_norm": 31.895978927612305, "learning_rate": 9.963517665130569e-06, "loss": 25.0107, "step": 25718 }, { "epoch": 612.3582089552239, "grad_norm": 24.51824951171875, "learning_rate": 9.963133640552996e-06, "loss": 26.0842, "step": 25719 }, { "epoch": 612.3820895522388, "grad_norm": 24.146604537963867, "learning_rate": 9.962749615975423e-06, "loss": 25.0536, "step": 25720 }, { "epoch": 612.4059701492537, "grad_norm": 28.29360580444336, "learning_rate": 9.96236559139785e-06, "loss": 24.7569, "step": 25721 }, { "epoch": 612.4298507462687, "grad_norm": 31.11864471435547, "learning_rate": 9.961981566820278e-06, "loss": 24.7343, "step": 25722 }, { "epoch": 612.4537313432836, "grad_norm": 25.677345275878906, "learning_rate": 9.961597542242705e-06, "loss": 26.2318, "step": 25723 }, { "epoch": 612.4776119402985, "grad_norm": 23.14234733581543, "learning_rate": 9.961213517665132e-06, "loss": 25.5714, "step": 25724 }, { "epoch": 612.5014925373134, "grad_norm": 31.862394332885742, "learning_rate": 9.960829493087558e-06, "loss": 26.3871, "step": 25725 }, { "epoch": 612.5253731343283, "grad_norm": 24.678388595581055, "learning_rate": 9.960445468509985e-06, "loss": 25.7023, "step": 25726 }, { "epoch": 612.5492537313432, "grad_norm": 24.836063385009766, "learning_rate": 9.960061443932414e-06, "loss": 25.0483, "step": 25727 }, { "epoch": 612.5731343283583, "grad_norm": 25.453025817871094, "learning_rate": 9.959677419354839e-06, "loss": 24.8595, "step": 25728 }, { "epoch": 612.5970149253732, "grad_norm": 31.3270320892334, "learning_rate": 9.959293394777266e-06, "loss": 25.7441, "step": 25729 }, { "epoch": 612.6208955223881, "grad_norm": 25.450618743896484, "learning_rate": 9.958909370199693e-06, "loss": 25.205, "step": 25730 }, { "epoch": 612.644776119403, "grad_norm": 20.942358016967773, "learning_rate": 9.95852534562212e-06, "loss": 25.0345, "step": 25731 }, { "epoch": 612.6686567164179, "grad_norm": 28.489992141723633, "learning_rate": 9.958141321044548e-06, "loss": 25.2219, "step": 25732 }, { "epoch": 612.6925373134328, "grad_norm": 27.494064331054688, "learning_rate": 9.957757296466975e-06, "loss": 24.654, "step": 25733 }, { "epoch": 612.7164179104477, "grad_norm": 21.845125198364258, "learning_rate": 9.957373271889402e-06, "loss": 25.1134, "step": 25734 }, { "epoch": 612.7402985074627, "grad_norm": 22.9196720123291, "learning_rate": 9.95698924731183e-06, "loss": 25.6232, "step": 25735 }, { "epoch": 612.7641791044776, "grad_norm": 26.916860580444336, "learning_rate": 9.956605222734255e-06, "loss": 25.4988, "step": 25736 }, { "epoch": 612.7880597014926, "grad_norm": 30.398229598999023, "learning_rate": 9.956221198156682e-06, "loss": 24.7537, "step": 25737 }, { "epoch": 612.8119402985075, "grad_norm": 20.612260818481445, "learning_rate": 9.955837173579111e-06, "loss": 24.7095, "step": 25738 }, { "epoch": 612.8358208955224, "grad_norm": 23.466632843017578, "learning_rate": 9.955453149001537e-06, "loss": 25.2862, "step": 25739 }, { "epoch": 612.8597014925373, "grad_norm": 23.051654815673828, "learning_rate": 9.955069124423964e-06, "loss": 25.5201, "step": 25740 }, { "epoch": 612.8835820895522, "grad_norm": 23.083698272705078, "learning_rate": 9.954685099846391e-06, "loss": 24.7951, "step": 25741 }, { "epoch": 612.9074626865672, "grad_norm": 21.45365333557129, "learning_rate": 9.954301075268818e-06, "loss": 25.7687, "step": 25742 }, { "epoch": 612.9313432835821, "grad_norm": 20.186498641967773, "learning_rate": 9.953917050691245e-06, "loss": 24.9371, "step": 25743 }, { "epoch": 612.955223880597, "grad_norm": 19.819618225097656, "learning_rate": 9.953533026113672e-06, "loss": 24.8331, "step": 25744 }, { "epoch": 612.9791044776119, "grad_norm": 27.523269653320312, "learning_rate": 9.953149001536098e-06, "loss": 25.0457, "step": 25745 }, { "epoch": 613.0, "grad_norm": 22.85436248779297, "learning_rate": 9.952764976958527e-06, "loss": 20.9556, "step": 25746 }, { "epoch": 613.0238805970149, "grad_norm": 24.3552303314209, "learning_rate": 9.952380952380954e-06, "loss": 25.4036, "step": 25747 }, { "epoch": 613.0477611940298, "grad_norm": 21.634634017944336, "learning_rate": 9.95199692780338e-06, "loss": 24.7454, "step": 25748 }, { "epoch": 613.0716417910447, "grad_norm": 22.948402404785156, "learning_rate": 9.951612903225807e-06, "loss": 24.4699, "step": 25749 }, { "epoch": 613.0955223880597, "grad_norm": 29.949249267578125, "learning_rate": 9.951228878648234e-06, "loss": 25.4315, "step": 25750 }, { "epoch": 613.1194029850747, "grad_norm": 26.683277130126953, "learning_rate": 9.950844854070661e-06, "loss": 25.0519, "step": 25751 }, { "epoch": 613.1432835820896, "grad_norm": 23.747163772583008, "learning_rate": 9.950460829493088e-06, "loss": 24.84, "step": 25752 }, { "epoch": 613.1671641791045, "grad_norm": 23.506847381591797, "learning_rate": 9.950076804915516e-06, "loss": 25.6467, "step": 25753 }, { "epoch": 613.1910447761194, "grad_norm": 26.976110458374023, "learning_rate": 9.949692780337943e-06, "loss": 25.1644, "step": 25754 }, { "epoch": 613.2149253731343, "grad_norm": 28.289905548095703, "learning_rate": 9.94930875576037e-06, "loss": 25.0538, "step": 25755 }, { "epoch": 613.2388059701492, "grad_norm": 20.88368797302246, "learning_rate": 9.948924731182797e-06, "loss": 24.0384, "step": 25756 }, { "epoch": 613.2626865671642, "grad_norm": 25.4835262298584, "learning_rate": 9.948540706605223e-06, "loss": 24.973, "step": 25757 }, { "epoch": 613.2865671641791, "grad_norm": 23.59998893737793, "learning_rate": 9.948156682027651e-06, "loss": 24.4305, "step": 25758 }, { "epoch": 613.310447761194, "grad_norm": 31.828563690185547, "learning_rate": 9.947772657450077e-06, "loss": 24.9301, "step": 25759 }, { "epoch": 613.334328358209, "grad_norm": 22.931869506835938, "learning_rate": 9.947388632872504e-06, "loss": 25.4619, "step": 25760 }, { "epoch": 613.3582089552239, "grad_norm": 28.25804328918457, "learning_rate": 9.947004608294931e-06, "loss": 25.316, "step": 25761 }, { "epoch": 613.3820895522388, "grad_norm": 23.73213768005371, "learning_rate": 9.946620583717359e-06, "loss": 25.6076, "step": 25762 }, { "epoch": 613.4059701492537, "grad_norm": 25.72173500061035, "learning_rate": 9.946236559139786e-06, "loss": 24.8358, "step": 25763 }, { "epoch": 613.4298507462687, "grad_norm": 25.58971405029297, "learning_rate": 9.945852534562213e-06, "loss": 26.549, "step": 25764 }, { "epoch": 613.4537313432836, "grad_norm": 31.33917999267578, "learning_rate": 9.94546850998464e-06, "loss": 24.6849, "step": 25765 }, { "epoch": 613.4776119402985, "grad_norm": 27.255338668823242, "learning_rate": 9.945084485407067e-06, "loss": 25.683, "step": 25766 }, { "epoch": 613.5014925373134, "grad_norm": 25.269556045532227, "learning_rate": 9.944700460829495e-06, "loss": 25.1584, "step": 25767 }, { "epoch": 613.5253731343283, "grad_norm": 29.96344566345215, "learning_rate": 9.94431643625192e-06, "loss": 25.1128, "step": 25768 }, { "epoch": 613.5492537313432, "grad_norm": 29.732757568359375, "learning_rate": 9.943932411674347e-06, "loss": 24.0092, "step": 25769 }, { "epoch": 613.5731343283583, "grad_norm": 22.5306396484375, "learning_rate": 9.943548387096776e-06, "loss": 25.277, "step": 25770 }, { "epoch": 613.5970149253732, "grad_norm": 26.6087703704834, "learning_rate": 9.943164362519202e-06, "loss": 25.4824, "step": 25771 }, { "epoch": 613.6208955223881, "grad_norm": 25.60525131225586, "learning_rate": 9.942780337941629e-06, "loss": 24.8133, "step": 25772 }, { "epoch": 613.644776119403, "grad_norm": 21.344419479370117, "learning_rate": 9.942396313364056e-06, "loss": 24.8143, "step": 25773 }, { "epoch": 613.6686567164179, "grad_norm": 22.69523048400879, "learning_rate": 9.942012288786483e-06, "loss": 25.0987, "step": 25774 }, { "epoch": 613.6925373134328, "grad_norm": 24.040748596191406, "learning_rate": 9.94162826420891e-06, "loss": 25.1395, "step": 25775 }, { "epoch": 613.7164179104477, "grad_norm": 27.60848617553711, "learning_rate": 9.941244239631338e-06, "loss": 25.8018, "step": 25776 }, { "epoch": 613.7402985074627, "grad_norm": 28.18586540222168, "learning_rate": 9.940860215053765e-06, "loss": 25.3245, "step": 25777 }, { "epoch": 613.7641791044776, "grad_norm": 25.10994529724121, "learning_rate": 9.940476190476192e-06, "loss": 24.8952, "step": 25778 }, { "epoch": 613.7880597014926, "grad_norm": 25.178232192993164, "learning_rate": 9.940092165898617e-06, "loss": 25.1943, "step": 25779 }, { "epoch": 613.8119402985075, "grad_norm": 19.672317504882812, "learning_rate": 9.939708141321045e-06, "loss": 25.0819, "step": 25780 }, { "epoch": 613.8358208955224, "grad_norm": 23.228126525878906, "learning_rate": 9.939324116743474e-06, "loss": 24.9149, "step": 25781 }, { "epoch": 613.8597014925373, "grad_norm": 20.608182907104492, "learning_rate": 9.938940092165899e-06, "loss": 23.8747, "step": 25782 }, { "epoch": 613.8835820895522, "grad_norm": 24.53752326965332, "learning_rate": 9.938556067588326e-06, "loss": 25.8779, "step": 25783 }, { "epoch": 613.9074626865672, "grad_norm": 23.684057235717773, "learning_rate": 9.938172043010753e-06, "loss": 25.3506, "step": 25784 }, { "epoch": 613.9313432835821, "grad_norm": 25.373605728149414, "learning_rate": 9.93778801843318e-06, "loss": 25.3865, "step": 25785 }, { "epoch": 613.955223880597, "grad_norm": 21.496883392333984, "learning_rate": 9.937403993855608e-06, "loss": 25.0861, "step": 25786 }, { "epoch": 613.9791044776119, "grad_norm": 23.9390811920166, "learning_rate": 9.937019969278035e-06, "loss": 25.9098, "step": 25787 }, { "epoch": 614.0, "grad_norm": 23.3063907623291, "learning_rate": 9.93663594470046e-06, "loss": 21.52, "step": 25788 }, { "epoch": 614.0238805970149, "grad_norm": 30.053810119628906, "learning_rate": 9.93625192012289e-06, "loss": 24.9349, "step": 25789 }, { "epoch": 614.0477611940298, "grad_norm": 23.73406982421875, "learning_rate": 9.935867895545317e-06, "loss": 25.1478, "step": 25790 }, { "epoch": 614.0716417910447, "grad_norm": 23.836132049560547, "learning_rate": 9.935483870967742e-06, "loss": 25.4325, "step": 25791 }, { "epoch": 614.0955223880597, "grad_norm": 22.802785873413086, "learning_rate": 9.93509984639017e-06, "loss": 24.7543, "step": 25792 }, { "epoch": 614.1194029850747, "grad_norm": 27.399946212768555, "learning_rate": 9.934715821812596e-06, "loss": 24.813, "step": 25793 }, { "epoch": 614.1432835820896, "grad_norm": 25.158945083618164, "learning_rate": 9.934331797235024e-06, "loss": 24.6352, "step": 25794 }, { "epoch": 614.1671641791045, "grad_norm": 25.72072410583496, "learning_rate": 9.93394777265745e-06, "loss": 25.5725, "step": 25795 }, { "epoch": 614.1910447761194, "grad_norm": 24.136680603027344, "learning_rate": 9.933563748079878e-06, "loss": 24.6923, "step": 25796 }, { "epoch": 614.2149253731343, "grad_norm": 21.66455078125, "learning_rate": 9.933179723502305e-06, "loss": 25.1676, "step": 25797 }, { "epoch": 614.2388059701492, "grad_norm": 22.397045135498047, "learning_rate": 9.932795698924732e-06, "loss": 23.9389, "step": 25798 }, { "epoch": 614.2626865671642, "grad_norm": 29.140689849853516, "learning_rate": 9.93241167434716e-06, "loss": 24.9766, "step": 25799 }, { "epoch": 614.2865671641791, "grad_norm": 29.949115753173828, "learning_rate": 9.932027649769585e-06, "loss": 24.9934, "step": 25800 }, { "epoch": 614.310447761194, "grad_norm": 21.1999454498291, "learning_rate": 9.931643625192014e-06, "loss": 24.4986, "step": 25801 }, { "epoch": 614.334328358209, "grad_norm": 21.01217269897461, "learning_rate": 9.93125960061444e-06, "loss": 24.4981, "step": 25802 }, { "epoch": 614.3582089552239, "grad_norm": 20.92207908630371, "learning_rate": 9.930875576036867e-06, "loss": 25.1189, "step": 25803 }, { "epoch": 614.3820895522388, "grad_norm": 25.667728424072266, "learning_rate": 9.930491551459294e-06, "loss": 24.9454, "step": 25804 }, { "epoch": 614.4059701492537, "grad_norm": 23.551116943359375, "learning_rate": 9.930107526881721e-06, "loss": 25.4729, "step": 25805 }, { "epoch": 614.4298507462687, "grad_norm": 25.834924697875977, "learning_rate": 9.929723502304148e-06, "loss": 25.1119, "step": 25806 }, { "epoch": 614.4537313432836, "grad_norm": 23.914325714111328, "learning_rate": 9.929339477726575e-06, "loss": 26.1311, "step": 25807 }, { "epoch": 614.4776119402985, "grad_norm": 23.225597381591797, "learning_rate": 9.928955453149003e-06, "loss": 25.2604, "step": 25808 }, { "epoch": 614.5014925373134, "grad_norm": 22.19977378845215, "learning_rate": 9.92857142857143e-06, "loss": 24.2445, "step": 25809 }, { "epoch": 614.5253731343283, "grad_norm": 30.02191925048828, "learning_rate": 9.928187403993857e-06, "loss": 24.6253, "step": 25810 }, { "epoch": 614.5492537313432, "grad_norm": 25.48715591430664, "learning_rate": 9.927803379416283e-06, "loss": 25.09, "step": 25811 }, { "epoch": 614.5731343283583, "grad_norm": 23.099884033203125, "learning_rate": 9.927419354838711e-06, "loss": 25.6773, "step": 25812 }, { "epoch": 614.5970149253732, "grad_norm": 23.06089973449707, "learning_rate": 9.927035330261137e-06, "loss": 24.2976, "step": 25813 }, { "epoch": 614.6208955223881, "grad_norm": 21.688173294067383, "learning_rate": 9.926651305683564e-06, "loss": 25.5297, "step": 25814 }, { "epoch": 614.644776119403, "grad_norm": 22.716089248657227, "learning_rate": 9.926267281105991e-06, "loss": 25.576, "step": 25815 }, { "epoch": 614.6686567164179, "grad_norm": 21.75605583190918, "learning_rate": 9.925883256528418e-06, "loss": 25.0744, "step": 25816 }, { "epoch": 614.6925373134328, "grad_norm": 24.06793785095215, "learning_rate": 9.925499231950846e-06, "loss": 25.7653, "step": 25817 }, { "epoch": 614.7164179104477, "grad_norm": 25.46147346496582, "learning_rate": 9.925115207373273e-06, "loss": 25.0495, "step": 25818 }, { "epoch": 614.7402985074627, "grad_norm": 32.08125305175781, "learning_rate": 9.9247311827957e-06, "loss": 25.4516, "step": 25819 }, { "epoch": 614.7641791044776, "grad_norm": 27.551956176757812, "learning_rate": 9.924347158218127e-06, "loss": 24.5681, "step": 25820 }, { "epoch": 614.7880597014926, "grad_norm": 21.545560836791992, "learning_rate": 9.923963133640554e-06, "loss": 24.8366, "step": 25821 }, { "epoch": 614.8119402985075, "grad_norm": 22.23763656616211, "learning_rate": 9.92357910906298e-06, "loss": 24.8862, "step": 25822 }, { "epoch": 614.8358208955224, "grad_norm": 21.67057991027832, "learning_rate": 9.923195084485407e-06, "loss": 25.6276, "step": 25823 }, { "epoch": 614.8597014925373, "grad_norm": 24.673255920410156, "learning_rate": 9.922811059907836e-06, "loss": 25.4496, "step": 25824 }, { "epoch": 614.8835820895522, "grad_norm": 24.544931411743164, "learning_rate": 9.922427035330262e-06, "loss": 25.8737, "step": 25825 }, { "epoch": 614.9074626865672, "grad_norm": NaN, "learning_rate": 9.922043010752689e-06, "loss": 40.9799, "step": 25826 }, { "epoch": 614.9313432835821, "grad_norm": 25.613018035888672, "learning_rate": 9.922043010752689e-06, "loss": 24.3605, "step": 25827 }, { "epoch": 614.955223880597, "grad_norm": 25.144878387451172, "learning_rate": 9.921658986175116e-06, "loss": 24.1217, "step": 25828 }, { "epoch": 614.9791044776119, "grad_norm": NaN, "learning_rate": 9.921274961597543e-06, "loss": 35.4049, "step": 25829 }, { "epoch": 615.0, "grad_norm": 20.39946937561035, "learning_rate": 9.921274961597543e-06, "loss": 22.6066, "step": 25830 }, { "epoch": 615.0238805970149, "grad_norm": 23.51532554626465, "learning_rate": 9.92089093701997e-06, "loss": 25.7509, "step": 25831 }, { "epoch": 615.0477611940298, "grad_norm": 24.471641540527344, "learning_rate": 9.920506912442397e-06, "loss": 24.3331, "step": 25832 }, { "epoch": 615.0716417910447, "grad_norm": 20.977481842041016, "learning_rate": 9.920122887864823e-06, "loss": 24.9634, "step": 25833 }, { "epoch": 615.0955223880597, "grad_norm": 23.81809425354004, "learning_rate": 9.919738863287252e-06, "loss": 25.6126, "step": 25834 }, { "epoch": 615.1194029850747, "grad_norm": 22.861698150634766, "learning_rate": 9.919354838709679e-06, "loss": 25.251, "step": 25835 }, { "epoch": 615.1432835820896, "grad_norm": 25.065780639648438, "learning_rate": 9.918970814132105e-06, "loss": 24.2578, "step": 25836 }, { "epoch": 615.1671641791045, "grad_norm": 22.244686126708984, "learning_rate": 9.918586789554532e-06, "loss": 25.2533, "step": 25837 }, { "epoch": 615.1910447761194, "grad_norm": 23.90375518798828, "learning_rate": 9.918202764976959e-06, "loss": 25.537, "step": 25838 }, { "epoch": 615.2149253731343, "grad_norm": 27.376728057861328, "learning_rate": 9.917818740399386e-06, "loss": 26.066, "step": 25839 }, { "epoch": 615.2388059701492, "grad_norm": 21.63084602355957, "learning_rate": 9.917434715821813e-06, "loss": 24.2485, "step": 25840 }, { "epoch": 615.2626865671642, "grad_norm": 26.530149459838867, "learning_rate": 9.91705069124424e-06, "loss": 24.3003, "step": 25841 }, { "epoch": 615.2865671641791, "grad_norm": 21.106172561645508, "learning_rate": 9.916666666666668e-06, "loss": 24.946, "step": 25842 }, { "epoch": 615.310447761194, "grad_norm": 21.449193954467773, "learning_rate": 9.916282642089095e-06, "loss": 24.5491, "step": 25843 }, { "epoch": 615.334328358209, "grad_norm": 23.364200592041016, "learning_rate": 9.915898617511522e-06, "loss": 25.1385, "step": 25844 }, { "epoch": 615.3582089552239, "grad_norm": 22.762107849121094, "learning_rate": 9.91551459293395e-06, "loss": 25.0412, "step": 25845 }, { "epoch": 615.3820895522388, "grad_norm": 23.364274978637695, "learning_rate": 9.915130568356376e-06, "loss": 25.1087, "step": 25846 }, { "epoch": 615.4059701492537, "grad_norm": 21.875333786010742, "learning_rate": 9.914746543778802e-06, "loss": 25.0238, "step": 25847 }, { "epoch": 615.4298507462687, "grad_norm": 21.92527961730957, "learning_rate": 9.91436251920123e-06, "loss": 24.8952, "step": 25848 }, { "epoch": 615.4537313432836, "grad_norm": 26.59933853149414, "learning_rate": 9.913978494623658e-06, "loss": 25.0713, "step": 25849 }, { "epoch": 615.4776119402985, "grad_norm": 27.756755828857422, "learning_rate": 9.913594470046084e-06, "loss": 25.1393, "step": 25850 }, { "epoch": 615.5014925373134, "grad_norm": 24.92581558227539, "learning_rate": 9.91321044546851e-06, "loss": 25.087, "step": 25851 }, { "epoch": 615.5253731343283, "grad_norm": 23.31959342956543, "learning_rate": 9.912826420890938e-06, "loss": 24.8795, "step": 25852 }, { "epoch": 615.5492537313432, "grad_norm": 22.462547302246094, "learning_rate": 9.912442396313365e-06, "loss": 25.941, "step": 25853 }, { "epoch": 615.5731343283583, "grad_norm": 24.011972427368164, "learning_rate": 9.912058371735792e-06, "loss": 25.3673, "step": 25854 }, { "epoch": 615.5970149253732, "grad_norm": 31.931514739990234, "learning_rate": 9.91167434715822e-06, "loss": 24.388, "step": 25855 }, { "epoch": 615.6208955223881, "grad_norm": 25.86977195739746, "learning_rate": 9.911290322580645e-06, "loss": 24.8173, "step": 25856 }, { "epoch": 615.644776119403, "grad_norm": 23.85453224182129, "learning_rate": 9.910906298003074e-06, "loss": 25.403, "step": 25857 }, { "epoch": 615.6686567164179, "grad_norm": 22.548362731933594, "learning_rate": 9.9105222734255e-06, "loss": 24.5856, "step": 25858 }, { "epoch": 615.6925373134328, "grad_norm": 25.237770080566406, "learning_rate": 9.910138248847927e-06, "loss": 24.7636, "step": 25859 }, { "epoch": 615.7164179104477, "grad_norm": 23.571475982666016, "learning_rate": 9.909754224270354e-06, "loss": 25.5746, "step": 25860 }, { "epoch": 615.7402985074627, "grad_norm": 25.894695281982422, "learning_rate": 9.909370199692781e-06, "loss": 25.0338, "step": 25861 }, { "epoch": 615.7641791044776, "grad_norm": 28.105260848999023, "learning_rate": 9.908986175115208e-06, "loss": 24.9404, "step": 25862 }, { "epoch": 615.7880597014926, "grad_norm": 23.500511169433594, "learning_rate": 9.908602150537635e-06, "loss": 24.57, "step": 25863 }, { "epoch": 615.8119402985075, "grad_norm": 25.01885223388672, "learning_rate": 9.908218125960063e-06, "loss": 24.8731, "step": 25864 }, { "epoch": 615.8358208955224, "grad_norm": 26.28655433654785, "learning_rate": 9.90783410138249e-06, "loss": 24.2693, "step": 25865 }, { "epoch": 615.8597014925373, "grad_norm": 25.973299026489258, "learning_rate": 9.907450076804917e-06, "loss": 25.2257, "step": 25866 }, { "epoch": 615.8835820895522, "grad_norm": 28.673370361328125, "learning_rate": 9.907066052227342e-06, "loss": 25.7517, "step": 25867 }, { "epoch": 615.9074626865672, "grad_norm": 23.950685501098633, "learning_rate": 9.90668202764977e-06, "loss": 25.764, "step": 25868 }, { "epoch": 615.9313432835821, "grad_norm": 21.926387786865234, "learning_rate": 9.906298003072199e-06, "loss": 24.3519, "step": 25869 }, { "epoch": 615.955223880597, "grad_norm": 28.563783645629883, "learning_rate": 9.905913978494624e-06, "loss": 25.2148, "step": 25870 }, { "epoch": 615.9791044776119, "grad_norm": 29.97212028503418, "learning_rate": 9.905529953917051e-06, "loss": 26.0365, "step": 25871 }, { "epoch": 616.0, "grad_norm": 24.337642669677734, "learning_rate": 9.905145929339478e-06, "loss": 21.6293, "step": 25872 }, { "epoch": 616.0238805970149, "grad_norm": 22.39923095703125, "learning_rate": 9.904761904761906e-06, "loss": 26.0749, "step": 25873 }, { "epoch": 616.0477611940298, "grad_norm": 21.88098907470703, "learning_rate": 9.904377880184333e-06, "loss": 24.3167, "step": 25874 }, { "epoch": 616.0716417910447, "grad_norm": 22.443824768066406, "learning_rate": 9.90399385560676e-06, "loss": 24.4285, "step": 25875 }, { "epoch": 616.0955223880597, "grad_norm": 26.93584632873535, "learning_rate": 9.903609831029185e-06, "loss": 25.3044, "step": 25876 }, { "epoch": 616.1194029850747, "grad_norm": 29.194143295288086, "learning_rate": 9.903225806451614e-06, "loss": 25.7455, "step": 25877 }, { "epoch": 616.1432835820896, "grad_norm": 25.81365966796875, "learning_rate": 9.902841781874042e-06, "loss": 24.8697, "step": 25878 }, { "epoch": 616.1671641791045, "grad_norm": 22.024860382080078, "learning_rate": 9.902457757296467e-06, "loss": 25.3971, "step": 25879 }, { "epoch": 616.1910447761194, "grad_norm": 23.38484764099121, "learning_rate": 9.902073732718894e-06, "loss": 24.9038, "step": 25880 }, { "epoch": 616.2149253731343, "grad_norm": 22.608699798583984, "learning_rate": 9.901689708141321e-06, "loss": 25.4285, "step": 25881 }, { "epoch": 616.2388059701492, "grad_norm": 26.60794448852539, "learning_rate": 9.901305683563749e-06, "loss": 25.3053, "step": 25882 }, { "epoch": 616.2626865671642, "grad_norm": 29.128110885620117, "learning_rate": 9.900921658986176e-06, "loss": 25.6509, "step": 25883 }, { "epoch": 616.2865671641791, "grad_norm": 23.693424224853516, "learning_rate": 9.900537634408603e-06, "loss": 24.5737, "step": 25884 }, { "epoch": 616.310447761194, "grad_norm": 30.832626342773438, "learning_rate": 9.90015360983103e-06, "loss": 24.9429, "step": 25885 }, { "epoch": 616.334328358209, "grad_norm": 24.79467010498047, "learning_rate": 9.899769585253457e-06, "loss": 24.5678, "step": 25886 }, { "epoch": 616.3582089552239, "grad_norm": 24.95270347595215, "learning_rate": 9.899385560675883e-06, "loss": 24.6768, "step": 25887 }, { "epoch": 616.3820895522388, "grad_norm": 28.31185531616211, "learning_rate": 9.899001536098312e-06, "loss": 25.3132, "step": 25888 }, { "epoch": 616.4059701492537, "grad_norm": 23.00187873840332, "learning_rate": 9.898617511520739e-06, "loss": 23.9812, "step": 25889 }, { "epoch": 616.4298507462687, "grad_norm": 28.210512161254883, "learning_rate": 9.898233486943164e-06, "loss": 24.9542, "step": 25890 }, { "epoch": 616.4537313432836, "grad_norm": 25.016206741333008, "learning_rate": 9.897849462365592e-06, "loss": 25.4082, "step": 25891 }, { "epoch": 616.4776119402985, "grad_norm": 27.343931198120117, "learning_rate": 9.897465437788019e-06, "loss": 24.8737, "step": 25892 }, { "epoch": 616.5014925373134, "grad_norm": 22.16849708557129, "learning_rate": 9.897081413210446e-06, "loss": 25.4046, "step": 25893 }, { "epoch": 616.5253731343283, "grad_norm": 21.12938690185547, "learning_rate": 9.896697388632873e-06, "loss": 23.9487, "step": 25894 }, { "epoch": 616.5492537313432, "grad_norm": 28.2277774810791, "learning_rate": 9.8963133640553e-06, "loss": 25.1268, "step": 25895 }, { "epoch": 616.5731343283583, "grad_norm": 26.067367553710938, "learning_rate": 9.895929339477728e-06, "loss": 25.021, "step": 25896 }, { "epoch": 616.5970149253732, "grad_norm": 23.959903717041016, "learning_rate": 9.895545314900155e-06, "loss": 25.2828, "step": 25897 }, { "epoch": 616.6208955223881, "grad_norm": 23.634510040283203, "learning_rate": 9.895161290322582e-06, "loss": 24.8179, "step": 25898 }, { "epoch": 616.644776119403, "grad_norm": 20.9796199798584, "learning_rate": 9.894777265745008e-06, "loss": 24.437, "step": 25899 }, { "epoch": 616.6686567164179, "grad_norm": 29.286033630371094, "learning_rate": 9.894393241167436e-06, "loss": 25.2722, "step": 25900 }, { "epoch": 616.6925373134328, "grad_norm": 24.03057861328125, "learning_rate": 9.894009216589862e-06, "loss": 24.6581, "step": 25901 }, { "epoch": 616.7164179104477, "grad_norm": 32.90248489379883, "learning_rate": 9.893625192012289e-06, "loss": 25.1521, "step": 25902 }, { "epoch": 616.7402985074627, "grad_norm": 27.884071350097656, "learning_rate": 9.893241167434716e-06, "loss": 25.6828, "step": 25903 }, { "epoch": 616.7641791044776, "grad_norm": 20.178741455078125, "learning_rate": 9.892857142857143e-06, "loss": 25.1421, "step": 25904 }, { "epoch": 616.7880597014926, "grad_norm": 25.42621612548828, "learning_rate": 9.89247311827957e-06, "loss": 25.045, "step": 25905 }, { "epoch": 616.8119402985075, "grad_norm": 25.598617553710938, "learning_rate": 9.892089093701998e-06, "loss": 24.7444, "step": 25906 }, { "epoch": 616.8358208955224, "grad_norm": 30.169647216796875, "learning_rate": 9.891705069124425e-06, "loss": 24.7332, "step": 25907 }, { "epoch": 616.8597014925373, "grad_norm": 27.881765365600586, "learning_rate": 9.891321044546852e-06, "loss": 24.4374, "step": 25908 }, { "epoch": 616.8835820895522, "grad_norm": 19.62690544128418, "learning_rate": 9.89093701996928e-06, "loss": 24.6214, "step": 25909 }, { "epoch": 616.9074626865672, "grad_norm": 25.853788375854492, "learning_rate": 9.890552995391705e-06, "loss": 25.6305, "step": 25910 }, { "epoch": 616.9313432835821, "grad_norm": 28.979040145874023, "learning_rate": 9.890168970814132e-06, "loss": 25.4077, "step": 25911 }, { "epoch": 616.955223880597, "grad_norm": 26.943544387817383, "learning_rate": 9.889784946236561e-06, "loss": 24.7726, "step": 25912 }, { "epoch": 616.9791044776119, "grad_norm": 22.37078285217285, "learning_rate": 9.889400921658987e-06, "loss": 24.6278, "step": 25913 }, { "epoch": 617.0, "grad_norm": 21.427488327026367, "learning_rate": 9.889016897081414e-06, "loss": 22.5782, "step": 25914 }, { "epoch": 617.0238805970149, "grad_norm": 27.343067169189453, "learning_rate": 9.888632872503841e-06, "loss": 24.5695, "step": 25915 }, { "epoch": 617.0477611940298, "grad_norm": 30.033567428588867, "learning_rate": 9.888248847926268e-06, "loss": 25.1746, "step": 25916 }, { "epoch": 617.0716417910447, "grad_norm": 23.928268432617188, "learning_rate": 9.887864823348695e-06, "loss": 25.4044, "step": 25917 }, { "epoch": 617.0955223880597, "grad_norm": 24.25472068786621, "learning_rate": 9.887480798771122e-06, "loss": 25.2018, "step": 25918 }, { "epoch": 617.1194029850747, "grad_norm": 21.383451461791992, "learning_rate": 9.88709677419355e-06, "loss": 24.6284, "step": 25919 }, { "epoch": 617.1432835820896, "grad_norm": 26.792692184448242, "learning_rate": 9.886712749615977e-06, "loss": 25.1361, "step": 25920 }, { "epoch": 617.1671641791045, "grad_norm": 28.30308723449707, "learning_rate": 9.886328725038404e-06, "loss": 24.6429, "step": 25921 }, { "epoch": 617.1910447761194, "grad_norm": 26.449058532714844, "learning_rate": 9.88594470046083e-06, "loss": 25.3479, "step": 25922 }, { "epoch": 617.2149253731343, "grad_norm": 23.323781967163086, "learning_rate": 9.885560675883258e-06, "loss": 24.5575, "step": 25923 }, { "epoch": 617.2388059701492, "grad_norm": 23.20481300354004, "learning_rate": 9.885176651305684e-06, "loss": 24.8203, "step": 25924 }, { "epoch": 617.2626865671642, "grad_norm": 24.901885986328125, "learning_rate": 9.884792626728111e-06, "loss": 25.1178, "step": 25925 }, { "epoch": 617.2865671641791, "grad_norm": 25.649229049682617, "learning_rate": 9.884408602150538e-06, "loss": 25.3976, "step": 25926 }, { "epoch": 617.310447761194, "grad_norm": 23.629505157470703, "learning_rate": 9.884024577572966e-06, "loss": 24.5522, "step": 25927 }, { "epoch": 617.334328358209, "grad_norm": 22.891794204711914, "learning_rate": 9.883640552995393e-06, "loss": 23.9187, "step": 25928 }, { "epoch": 617.3582089552239, "grad_norm": 20.995445251464844, "learning_rate": 9.88325652841782e-06, "loss": 24.9666, "step": 25929 }, { "epoch": 617.3820895522388, "grad_norm": 22.010400772094727, "learning_rate": 9.882872503840245e-06, "loss": 24.4379, "step": 25930 }, { "epoch": 617.4059701492537, "grad_norm": 21.883441925048828, "learning_rate": 9.882488479262674e-06, "loss": 25.2411, "step": 25931 }, { "epoch": 617.4298507462687, "grad_norm": 26.569250106811523, "learning_rate": 9.882104454685101e-06, "loss": 24.9856, "step": 25932 }, { "epoch": 617.4537313432836, "grad_norm": 25.192140579223633, "learning_rate": 9.881720430107527e-06, "loss": 24.9829, "step": 25933 }, { "epoch": 617.4776119402985, "grad_norm": 23.4812068939209, "learning_rate": 9.881336405529954e-06, "loss": 24.883, "step": 25934 }, { "epoch": 617.5014925373134, "grad_norm": 22.680740356445312, "learning_rate": 9.880952380952381e-06, "loss": 24.6093, "step": 25935 }, { "epoch": 617.5253731343283, "grad_norm": 20.53631591796875, "learning_rate": 9.880568356374809e-06, "loss": 24.5448, "step": 25936 }, { "epoch": 617.5492537313432, "grad_norm": 23.485292434692383, "learning_rate": 9.880184331797236e-06, "loss": 25.3392, "step": 25937 }, { "epoch": 617.5731343283583, "grad_norm": 23.505504608154297, "learning_rate": 9.879800307219663e-06, "loss": 25.7244, "step": 25938 }, { "epoch": 617.5970149253732, "grad_norm": 33.47627258300781, "learning_rate": 9.87941628264209e-06, "loss": 25.7765, "step": 25939 }, { "epoch": 617.6208955223881, "grad_norm": 29.959434509277344, "learning_rate": 9.879032258064517e-06, "loss": 24.7406, "step": 25940 }, { "epoch": 617.644776119403, "grad_norm": 21.542659759521484, "learning_rate": 9.878648233486945e-06, "loss": 24.5231, "step": 25941 }, { "epoch": 617.6686567164179, "grad_norm": 22.39794158935547, "learning_rate": 9.87826420890937e-06, "loss": 25.2593, "step": 25942 }, { "epoch": 617.6925373134328, "grad_norm": 23.176267623901367, "learning_rate": 9.877880184331799e-06, "loss": 24.5613, "step": 25943 }, { "epoch": 617.7164179104477, "grad_norm": 21.965164184570312, "learning_rate": 9.877496159754224e-06, "loss": 24.7006, "step": 25944 }, { "epoch": 617.7402985074627, "grad_norm": 29.394760131835938, "learning_rate": 9.877112135176652e-06, "loss": 25.9681, "step": 25945 }, { "epoch": 617.7641791044776, "grad_norm": 28.614957809448242, "learning_rate": 9.876728110599079e-06, "loss": 25.1183, "step": 25946 }, { "epoch": 617.7880597014926, "grad_norm": 24.878740310668945, "learning_rate": 9.876344086021506e-06, "loss": 25.2998, "step": 25947 }, { "epoch": 617.8119402985075, "grad_norm": 26.000125885009766, "learning_rate": 9.875960061443933e-06, "loss": 25.5186, "step": 25948 }, { "epoch": 617.8358208955224, "grad_norm": 22.580163955688477, "learning_rate": 9.87557603686636e-06, "loss": 24.4847, "step": 25949 }, { "epoch": 617.8597014925373, "grad_norm": 34.844566345214844, "learning_rate": 9.875192012288788e-06, "loss": 26.2095, "step": 25950 }, { "epoch": 617.8835820895522, "grad_norm": 23.92089080810547, "learning_rate": 9.874807987711215e-06, "loss": 24.6749, "step": 25951 }, { "epoch": 617.9074626865672, "grad_norm": 24.449602127075195, "learning_rate": 9.874423963133642e-06, "loss": 24.5773, "step": 25952 }, { "epoch": 617.9313432835821, "grad_norm": 31.131711959838867, "learning_rate": 9.874039938556067e-06, "loss": 24.6809, "step": 25953 }, { "epoch": 617.955223880597, "grad_norm": 31.51456642150879, "learning_rate": 9.873655913978495e-06, "loss": 25.016, "step": 25954 }, { "epoch": 617.9791044776119, "grad_norm": 26.432167053222656, "learning_rate": 9.873271889400924e-06, "loss": 25.3497, "step": 25955 }, { "epoch": 618.0, "grad_norm": 24.621665954589844, "learning_rate": 9.872887864823349e-06, "loss": 21.5112, "step": 25956 }, { "epoch": 618.0238805970149, "grad_norm": 34.08124542236328, "learning_rate": 9.872503840245776e-06, "loss": 25.3088, "step": 25957 }, { "epoch": 618.0477611940298, "grad_norm": 26.25657844543457, "learning_rate": 9.872119815668203e-06, "loss": 26.1483, "step": 25958 }, { "epoch": 618.0716417910447, "grad_norm": 28.74955177307129, "learning_rate": 9.87173579109063e-06, "loss": 24.7146, "step": 25959 }, { "epoch": 618.0955223880597, "grad_norm": 32.78278732299805, "learning_rate": 9.871351766513058e-06, "loss": 24.5566, "step": 25960 }, { "epoch": 618.1194029850747, "grad_norm": 24.39505958557129, "learning_rate": 9.870967741935485e-06, "loss": 25.3519, "step": 25961 }, { "epoch": 618.1432835820896, "grad_norm": 28.549152374267578, "learning_rate": 9.870583717357912e-06, "loss": 25.8539, "step": 25962 }, { "epoch": 618.1671641791045, "grad_norm": 29.98374366760254, "learning_rate": 9.87019969278034e-06, "loss": 24.7284, "step": 25963 }, { "epoch": 618.1910447761194, "grad_norm": 28.7457218170166, "learning_rate": 9.869815668202765e-06, "loss": 25.9225, "step": 25964 }, { "epoch": 618.2149253731343, "grad_norm": 25.495697021484375, "learning_rate": 9.869431643625192e-06, "loss": 25.2425, "step": 25965 }, { "epoch": 618.2388059701492, "grad_norm": 29.72812271118164, "learning_rate": 9.869047619047621e-06, "loss": 26.4882, "step": 25966 }, { "epoch": 618.2626865671642, "grad_norm": 27.81902503967285, "learning_rate": 9.868663594470046e-06, "loss": 24.1938, "step": 25967 }, { "epoch": 618.2865671641791, "grad_norm": 21.481189727783203, "learning_rate": 9.868279569892474e-06, "loss": 24.6413, "step": 25968 }, { "epoch": 618.310447761194, "grad_norm": 32.722625732421875, "learning_rate": 9.8678955453149e-06, "loss": 25.03, "step": 25969 }, { "epoch": 618.334328358209, "grad_norm": 33.36281967163086, "learning_rate": 9.867511520737328e-06, "loss": 25.1152, "step": 25970 }, { "epoch": 618.3582089552239, "grad_norm": 23.085208892822266, "learning_rate": 9.867127496159755e-06, "loss": 24.8535, "step": 25971 }, { "epoch": 618.3820895522388, "grad_norm": 26.29661750793457, "learning_rate": 9.866743471582182e-06, "loss": 24.6655, "step": 25972 }, { "epoch": 618.4059701492537, "grad_norm": 30.18625259399414, "learning_rate": 9.866359447004608e-06, "loss": 25.8679, "step": 25973 }, { "epoch": 618.4298507462687, "grad_norm": 28.55936050415039, "learning_rate": 9.865975422427037e-06, "loss": 25.1863, "step": 25974 }, { "epoch": 618.4537313432836, "grad_norm": 20.35788345336914, "learning_rate": 9.865591397849464e-06, "loss": 24.4035, "step": 25975 }, { "epoch": 618.4776119402985, "grad_norm": 30.649593353271484, "learning_rate": 9.86520737327189e-06, "loss": 23.2881, "step": 25976 }, { "epoch": 618.5014925373134, "grad_norm": 31.722864151000977, "learning_rate": 9.864823348694317e-06, "loss": 24.447, "step": 25977 }, { "epoch": 618.5253731343283, "grad_norm": 21.22374153137207, "learning_rate": 9.864439324116744e-06, "loss": 24.2586, "step": 25978 }, { "epoch": 618.5492537313432, "grad_norm": 34.81007766723633, "learning_rate": 9.864055299539171e-06, "loss": 25.1302, "step": 25979 }, { "epoch": 618.5731343283583, "grad_norm": 32.4544677734375, "learning_rate": 9.863671274961598e-06, "loss": 25.0124, "step": 25980 }, { "epoch": 618.5970149253732, "grad_norm": 24.17289161682129, "learning_rate": 9.863287250384025e-06, "loss": 24.9602, "step": 25981 }, { "epoch": 618.6208955223881, "grad_norm": 36.94866180419922, "learning_rate": 9.862903225806453e-06, "loss": 24.9397, "step": 25982 }, { "epoch": 618.644776119403, "grad_norm": 27.09461784362793, "learning_rate": 9.86251920122888e-06, "loss": 24.4348, "step": 25983 }, { "epoch": 618.6686567164179, "grad_norm": 30.65574073791504, "learning_rate": 9.862135176651307e-06, "loss": 25.0496, "step": 25984 }, { "epoch": 618.6925373134328, "grad_norm": 33.181209564208984, "learning_rate": 9.861751152073733e-06, "loss": 24.1992, "step": 25985 }, { "epoch": 618.7164179104477, "grad_norm": 28.99519157409668, "learning_rate": 9.861367127496161e-06, "loss": 25.0167, "step": 25986 }, { "epoch": 618.7402985074627, "grad_norm": 22.570842742919922, "learning_rate": 9.860983102918587e-06, "loss": 25.923, "step": 25987 }, { "epoch": 618.7641791044776, "grad_norm": 30.530895233154297, "learning_rate": 9.860599078341014e-06, "loss": 24.2531, "step": 25988 }, { "epoch": 618.7880597014926, "grad_norm": 25.236764907836914, "learning_rate": 9.860215053763441e-06, "loss": 24.0065, "step": 25989 }, { "epoch": 618.8119402985075, "grad_norm": 25.186128616333008, "learning_rate": 9.859831029185868e-06, "loss": 25.2745, "step": 25990 }, { "epoch": 618.8358208955224, "grad_norm": 30.087661743164062, "learning_rate": 9.859447004608296e-06, "loss": 24.8907, "step": 25991 }, { "epoch": 618.8597014925373, "grad_norm": 21.27625274658203, "learning_rate": 9.859062980030723e-06, "loss": 24.1917, "step": 25992 }, { "epoch": 618.8835820895522, "grad_norm": 23.741966247558594, "learning_rate": 9.85867895545315e-06, "loss": 26.134, "step": 25993 }, { "epoch": 618.9074626865672, "grad_norm": 24.155847549438477, "learning_rate": 9.858294930875577e-06, "loss": 24.1211, "step": 25994 }, { "epoch": 618.9313432835821, "grad_norm": 29.60780143737793, "learning_rate": 9.857910906298004e-06, "loss": 24.9149, "step": 25995 }, { "epoch": 618.955223880597, "grad_norm": 30.745830535888672, "learning_rate": 9.85752688172043e-06, "loss": 25.4975, "step": 25996 }, { "epoch": 618.9791044776119, "grad_norm": 21.55984878540039, "learning_rate": 9.857142857142859e-06, "loss": 25.0149, "step": 25997 }, { "epoch": 619.0, "grad_norm": 21.854862213134766, "learning_rate": 9.856758832565286e-06, "loss": 21.4604, "step": 25998 }, { "epoch": 619.0238805970149, "grad_norm": 32.564449310302734, "learning_rate": 9.856374807987712e-06, "loss": 23.1587, "step": 25999 }, { "epoch": 619.0477611940298, "grad_norm": 25.952119827270508, "learning_rate": 9.855990783410139e-06, "loss": 23.9943, "step": 26000 }, { "epoch": 619.0716417910447, "grad_norm": 20.017864227294922, "learning_rate": 9.855606758832566e-06, "loss": 25.3455, "step": 26001 }, { "epoch": 619.0955223880597, "grad_norm": 24.779722213745117, "learning_rate": 9.855222734254993e-06, "loss": 25.4832, "step": 26002 }, { "epoch": 619.1194029850747, "grad_norm": 27.805391311645508, "learning_rate": 9.85483870967742e-06, "loss": 25.3593, "step": 26003 }, { "epoch": 619.1432835820896, "grad_norm": 25.947172164916992, "learning_rate": 9.854454685099847e-06, "loss": 24.7731, "step": 26004 }, { "epoch": 619.1671641791045, "grad_norm": 22.29290199279785, "learning_rate": 9.854070660522275e-06, "loss": 25.309, "step": 26005 }, { "epoch": 619.1910447761194, "grad_norm": 28.606996536254883, "learning_rate": 9.853686635944702e-06, "loss": 24.9502, "step": 26006 }, { "epoch": 619.2149253731343, "grad_norm": 27.381324768066406, "learning_rate": 9.853302611367127e-06, "loss": 24.283, "step": 26007 }, { "epoch": 619.2388059701492, "grad_norm": 25.392391204833984, "learning_rate": 9.852918586789555e-06, "loss": 24.0311, "step": 26008 }, { "epoch": 619.2626865671642, "grad_norm": 23.742725372314453, "learning_rate": 9.852534562211983e-06, "loss": 24.2903, "step": 26009 }, { "epoch": 619.2865671641791, "grad_norm": 26.02241325378418, "learning_rate": 9.852150537634409e-06, "loss": 24.5964, "step": 26010 }, { "epoch": 619.310447761194, "grad_norm": 32.36456298828125, "learning_rate": 9.851766513056836e-06, "loss": 25.2989, "step": 26011 }, { "epoch": 619.334328358209, "grad_norm": 22.022724151611328, "learning_rate": 9.851382488479263e-06, "loss": 24.1219, "step": 26012 }, { "epoch": 619.3582089552239, "grad_norm": 24.274524688720703, "learning_rate": 9.85099846390169e-06, "loss": 25.5169, "step": 26013 }, { "epoch": 619.3820895522388, "grad_norm": 29.47658348083496, "learning_rate": 9.850614439324118e-06, "loss": 24.2422, "step": 26014 }, { "epoch": 619.4059701492537, "grad_norm": 31.295873641967773, "learning_rate": 9.850230414746545e-06, "loss": 26.3945, "step": 26015 }, { "epoch": 619.4298507462687, "grad_norm": 19.1476993560791, "learning_rate": 9.84984639016897e-06, "loss": 25.2435, "step": 26016 }, { "epoch": 619.4537313432836, "grad_norm": 25.354576110839844, "learning_rate": 9.8494623655914e-06, "loss": 25.4244, "step": 26017 }, { "epoch": 619.4776119402985, "grad_norm": 27.52989959716797, "learning_rate": 9.849078341013826e-06, "loss": 25.2977, "step": 26018 }, { "epoch": 619.5014925373134, "grad_norm": 23.498943328857422, "learning_rate": 9.848694316436252e-06, "loss": 25.194, "step": 26019 }, { "epoch": 619.5253731343283, "grad_norm": 23.270952224731445, "learning_rate": 9.84831029185868e-06, "loss": 26.0503, "step": 26020 }, { "epoch": 619.5492537313432, "grad_norm": 20.42378807067871, "learning_rate": 9.847926267281106e-06, "loss": 24.8222, "step": 26021 }, { "epoch": 619.5731343283583, "grad_norm": 25.991498947143555, "learning_rate": 9.847542242703534e-06, "loss": 24.8071, "step": 26022 }, { "epoch": 619.5970149253732, "grad_norm": 26.26662826538086, "learning_rate": 9.84715821812596e-06, "loss": 24.7436, "step": 26023 }, { "epoch": 619.6208955223881, "grad_norm": 27.015769958496094, "learning_rate": 9.846774193548388e-06, "loss": 24.5145, "step": 26024 }, { "epoch": 619.644776119403, "grad_norm": 23.608522415161133, "learning_rate": 9.846390168970815e-06, "loss": 25.224, "step": 26025 }, { "epoch": 619.6686567164179, "grad_norm": 22.61237907409668, "learning_rate": 9.846006144393242e-06, "loss": 24.8421, "step": 26026 }, { "epoch": 619.6925373134328, "grad_norm": 22.119319915771484, "learning_rate": 9.84562211981567e-06, "loss": 24.34, "step": 26027 }, { "epoch": 619.7164179104477, "grad_norm": 24.56681251525879, "learning_rate": 9.845238095238097e-06, "loss": 24.3727, "step": 26028 }, { "epoch": 619.7402985074627, "grad_norm": 22.02560043334961, "learning_rate": 9.844854070660524e-06, "loss": 25.0078, "step": 26029 }, { "epoch": 619.7641791044776, "grad_norm": 26.37422752380371, "learning_rate": 9.84447004608295e-06, "loss": 25.1196, "step": 26030 }, { "epoch": 619.7880597014926, "grad_norm": 28.63333511352539, "learning_rate": 9.844086021505377e-06, "loss": 24.9746, "step": 26031 }, { "epoch": 619.8119402985075, "grad_norm": 29.054927825927734, "learning_rate": 9.843701996927806e-06, "loss": 25.588, "step": 26032 }, { "epoch": 619.8358208955224, "grad_norm": 23.010398864746094, "learning_rate": 9.843317972350231e-06, "loss": 25.0218, "step": 26033 }, { "epoch": 619.8597014925373, "grad_norm": 23.78619956970215, "learning_rate": 9.842933947772658e-06, "loss": 24.3829, "step": 26034 }, { "epoch": 619.8835820895522, "grad_norm": 26.832828521728516, "learning_rate": 9.842549923195085e-06, "loss": 25.1618, "step": 26035 }, { "epoch": 619.9074626865672, "grad_norm": 33.49582290649414, "learning_rate": 9.842165898617513e-06, "loss": 24.9733, "step": 26036 }, { "epoch": 619.9313432835821, "grad_norm": 28.33132553100586, "learning_rate": 9.84178187403994e-06, "loss": 25.9713, "step": 26037 }, { "epoch": 619.955223880597, "grad_norm": 23.55602264404297, "learning_rate": 9.841397849462367e-06, "loss": 25.2471, "step": 26038 }, { "epoch": 619.9791044776119, "grad_norm": 23.90228843688965, "learning_rate": 9.841013824884792e-06, "loss": 24.5309, "step": 26039 }, { "epoch": 620.0, "grad_norm": 27.77632713317871, "learning_rate": 9.840629800307221e-06, "loss": 21.49, "step": 26040 }, { "epoch": 620.0, "step": 26040, "total_flos": 1.280060656853238e+18, "train_loss": 0.4050079777859689, "train_runtime": 12833.4654, "train_samples_per_second": 258.561, "train_steps_per_second": 2.029 }, { "epoch": 620.0238805970149, "grad_norm": 24.91378402709961, "learning_rate": 1e-05, "loss": 25.035, "step": 26041 }, { "epoch": 620.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999627976190476e-06, "loss": 31.1743, "step": 26042 }, { "epoch": 620.0716417910447, "grad_norm": 342.8687744140625, "learning_rate": 9.999627976190476e-06, "loss": 31.1834, "step": 26043 }, { "epoch": 620.0955223880597, "grad_norm": 190.4541778564453, "learning_rate": 9.999255952380954e-06, "loss": 30.0994, "step": 26044 }, { "epoch": 620.1194029850747, "grad_norm": 94.81751251220703, "learning_rate": 9.998883928571429e-06, "loss": 27.6317, "step": 26045 }, { "epoch": 620.1432835820896, "grad_norm": 87.42115020751953, "learning_rate": 9.998511904761904e-06, "loss": 26.8965, "step": 26046 }, { "epoch": 620.1671641791045, "grad_norm": 59.28572082519531, "learning_rate": 9.998139880952382e-06, "loss": 25.6263, "step": 26047 }, { "epoch": 620.1910447761194, "grad_norm": 58.285736083984375, "learning_rate": 9.997767857142859e-06, "loss": 25.4774, "step": 26048 }, { "epoch": 620.2149253731343, "grad_norm": 54.15262222290039, "learning_rate": 9.997395833333334e-06, "loss": 25.5026, "step": 26049 }, { "epoch": 620.2388059701492, "grad_norm": 45.90610122680664, "learning_rate": 9.99702380952381e-06, "loss": 25.3933, "step": 26050 }, { "epoch": 620.2626865671642, "grad_norm": 48.85770797729492, "learning_rate": 9.996651785714287e-06, "loss": 25.4971, "step": 26051 }, { "epoch": 620.2865671641791, "grad_norm": 34.313507080078125, "learning_rate": 9.996279761904763e-06, "loss": 24.1296, "step": 26052 }, { "epoch": 620.310447761194, "grad_norm": 41.775447845458984, "learning_rate": 9.995907738095238e-06, "loss": 25.5097, "step": 26053 }, { "epoch": 620.334328358209, "grad_norm": 31.128746032714844, "learning_rate": 9.995535714285715e-06, "loss": 26.071, "step": 26054 }, { "epoch": 620.3582089552239, "grad_norm": 30.956989288330078, "learning_rate": 9.995163690476192e-06, "loss": 25.3951, "step": 26055 }, { "epoch": 620.3820895522388, "grad_norm": 31.0334415435791, "learning_rate": 9.994791666666668e-06, "loss": 25.6565, "step": 26056 }, { "epoch": 620.4059701492537, "grad_norm": 33.700077056884766, "learning_rate": 9.994419642857143e-06, "loss": 25.432, "step": 26057 }, { "epoch": 620.4298507462687, "grad_norm": 27.85845947265625, "learning_rate": 9.99404761904762e-06, "loss": 24.723, "step": 26058 }, { "epoch": 620.4537313432836, "grad_norm": 22.763154983520508, "learning_rate": 9.993675595238096e-06, "loss": 24.2133, "step": 26059 }, { "epoch": 620.4776119402985, "grad_norm": 24.908203125, "learning_rate": 9.993303571428572e-06, "loss": 23.8575, "step": 26060 }, { "epoch": 620.5014925373134, "grad_norm": 26.25194549560547, "learning_rate": 9.992931547619049e-06, "loss": 25.424, "step": 26061 }, { "epoch": 620.5253731343283, "grad_norm": 23.371143341064453, "learning_rate": 9.992559523809524e-06, "loss": 24.9313, "step": 26062 }, { "epoch": 620.5492537313432, "grad_norm": NaN, "learning_rate": 9.992187500000001e-06, "loss": 24.4705, "step": 26063 }, { "epoch": 620.5731343283583, "grad_norm": 26.544981002807617, "learning_rate": 9.992187500000001e-06, "loss": 25.3854, "step": 26064 }, { "epoch": 620.5970149253732, "grad_norm": 25.982515335083008, "learning_rate": 9.991815476190477e-06, "loss": 25.2457, "step": 26065 }, { "epoch": 620.6208955223881, "grad_norm": 25.3665714263916, "learning_rate": 9.991443452380954e-06, "loss": 24.6075, "step": 26066 }, { "epoch": 620.644776119403, "grad_norm": 33.857421875, "learning_rate": 9.99107142857143e-06, "loss": 24.876, "step": 26067 }, { "epoch": 620.6686567164179, "grad_norm": 31.53282928466797, "learning_rate": 9.990699404761905e-06, "loss": 24.8016, "step": 26068 }, { "epoch": 620.6925373134328, "grad_norm": 25.362796783447266, "learning_rate": 9.990327380952382e-06, "loss": 24.0917, "step": 26069 }, { "epoch": 620.7164179104477, "grad_norm": 31.01021957397461, "learning_rate": 9.989955357142858e-06, "loss": 24.5128, "step": 26070 }, { "epoch": 620.7402985074627, "grad_norm": 34.345027923583984, "learning_rate": 9.989583333333333e-06, "loss": 25.013, "step": 26071 }, { "epoch": 620.7641791044776, "grad_norm": 23.64490509033203, "learning_rate": 9.98921130952381e-06, "loss": 24.4069, "step": 26072 }, { "epoch": 620.7880597014926, "grad_norm": 28.894901275634766, "learning_rate": 9.988839285714286e-06, "loss": 25.7174, "step": 26073 }, { "epoch": 620.8119402985075, "grad_norm": 28.29454803466797, "learning_rate": 9.988467261904763e-06, "loss": 25.1675, "step": 26074 }, { "epoch": 620.8358208955224, "grad_norm": 29.37328338623047, "learning_rate": 9.988095238095239e-06, "loss": 25.5043, "step": 26075 }, { "epoch": 620.8597014925373, "grad_norm": 24.891460418701172, "learning_rate": 9.987723214285714e-06, "loss": 25.7651, "step": 26076 }, { "epoch": 620.8835820895522, "grad_norm": 23.43007469177246, "learning_rate": 9.987351190476191e-06, "loss": 25.0585, "step": 26077 }, { "epoch": 620.9074626865672, "grad_norm": 25.210540771484375, "learning_rate": 9.986979166666667e-06, "loss": 26.3477, "step": 26078 }, { "epoch": 620.9313432835821, "grad_norm": 23.890453338623047, "learning_rate": 9.986607142857142e-06, "loss": 26.2726, "step": 26079 }, { "epoch": 620.955223880597, "grad_norm": 24.704011917114258, "learning_rate": 9.98623511904762e-06, "loss": 24.6064, "step": 26080 }, { "epoch": 620.9791044776119, "grad_norm": 26.745969772338867, "learning_rate": 9.985863095238097e-06, "loss": 24.9557, "step": 26081 }, { "epoch": 621.0, "grad_norm": 24.702239990234375, "learning_rate": 9.985491071428572e-06, "loss": 21.4275, "step": 26082 }, { "epoch": 621.0238805970149, "grad_norm": 21.973514556884766, "learning_rate": 9.985119047619048e-06, "loss": 24.1587, "step": 26083 }, { "epoch": 621.0477611940298, "grad_norm": 24.771589279174805, "learning_rate": 9.984747023809525e-06, "loss": 25.6406, "step": 26084 }, { "epoch": 621.0716417910447, "grad_norm": 34.5574951171875, "learning_rate": 9.984375e-06, "loss": 24.4506, "step": 26085 }, { "epoch": 621.0955223880597, "grad_norm": 26.46080780029297, "learning_rate": 9.984002976190476e-06, "loss": 24.6919, "step": 26086 }, { "epoch": 621.1194029850747, "grad_norm": 25.035846710205078, "learning_rate": 9.983630952380953e-06, "loss": 24.1896, "step": 26087 }, { "epoch": 621.1432835820896, "grad_norm": 25.299291610717773, "learning_rate": 9.98325892857143e-06, "loss": 25.4818, "step": 26088 }, { "epoch": 621.1671641791045, "grad_norm": 26.573305130004883, "learning_rate": 9.982886904761906e-06, "loss": 25.3754, "step": 26089 }, { "epoch": 621.1910447761194, "grad_norm": 26.418306350708008, "learning_rate": 9.982514880952381e-06, "loss": 25.6248, "step": 26090 }, { "epoch": 621.2149253731343, "grad_norm": 23.831689834594727, "learning_rate": 9.982142857142858e-06, "loss": 25.0234, "step": 26091 }, { "epoch": 621.2388059701492, "grad_norm": 22.85289764404297, "learning_rate": 9.981770833333334e-06, "loss": 24.2576, "step": 26092 }, { "epoch": 621.2626865671642, "grad_norm": 25.334184646606445, "learning_rate": 9.98139880952381e-06, "loss": 24.3009, "step": 26093 }, { "epoch": 621.2865671641791, "grad_norm": 23.915695190429688, "learning_rate": 9.981026785714287e-06, "loss": 25.4901, "step": 26094 }, { "epoch": 621.310447761194, "grad_norm": 25.55776023864746, "learning_rate": 9.980654761904762e-06, "loss": 25.2344, "step": 26095 }, { "epoch": 621.334328358209, "grad_norm": 20.857139587402344, "learning_rate": 9.98028273809524e-06, "loss": 25.5688, "step": 26096 }, { "epoch": 621.3582089552239, "grad_norm": 25.633827209472656, "learning_rate": 9.979910714285715e-06, "loss": 25.2934, "step": 26097 }, { "epoch": 621.3820895522388, "grad_norm": 24.880081176757812, "learning_rate": 9.979538690476192e-06, "loss": 23.825, "step": 26098 }, { "epoch": 621.4059701492537, "grad_norm": 23.499269485473633, "learning_rate": 9.979166666666668e-06, "loss": 24.5471, "step": 26099 }, { "epoch": 621.4298507462687, "grad_norm": 23.67032814025879, "learning_rate": 9.978794642857143e-06, "loss": 24.571, "step": 26100 }, { "epoch": 621.4537313432836, "grad_norm": 24.918148040771484, "learning_rate": 9.97842261904762e-06, "loss": 25.3747, "step": 26101 }, { "epoch": 621.4776119402985, "grad_norm": 21.892433166503906, "learning_rate": 9.978050595238096e-06, "loss": 24.8911, "step": 26102 }, { "epoch": 621.5014925373134, "grad_norm": 25.2368221282959, "learning_rate": 9.977678571428571e-06, "loss": 24.67, "step": 26103 }, { "epoch": 621.5253731343283, "grad_norm": 31.559696197509766, "learning_rate": 9.977306547619048e-06, "loss": 24.3542, "step": 26104 }, { "epoch": 621.5492537313432, "grad_norm": 27.29726791381836, "learning_rate": 9.976934523809526e-06, "loss": 24.8717, "step": 26105 }, { "epoch": 621.5731343283583, "grad_norm": 22.19944190979004, "learning_rate": 9.976562500000001e-06, "loss": 24.952, "step": 26106 }, { "epoch": 621.5970149253732, "grad_norm": 23.618799209594727, "learning_rate": 9.976190476190477e-06, "loss": 24.7138, "step": 26107 }, { "epoch": 621.6208955223881, "grad_norm": 25.116117477416992, "learning_rate": 9.975818452380954e-06, "loss": 25.5623, "step": 26108 }, { "epoch": 621.644776119403, "grad_norm": 26.472307205200195, "learning_rate": 9.97544642857143e-06, "loss": 24.5134, "step": 26109 }, { "epoch": 621.6686567164179, "grad_norm": 26.89175796508789, "learning_rate": 9.975074404761905e-06, "loss": 24.6014, "step": 26110 }, { "epoch": 621.6925373134328, "grad_norm": 24.83146095275879, "learning_rate": 9.974702380952382e-06, "loss": 25.1977, "step": 26111 }, { "epoch": 621.7164179104477, "grad_norm": 25.08694076538086, "learning_rate": 9.974330357142859e-06, "loss": 25.0876, "step": 26112 }, { "epoch": 621.7402985074627, "grad_norm": 23.336130142211914, "learning_rate": 9.973958333333335e-06, "loss": 24.971, "step": 26113 }, { "epoch": 621.7641791044776, "grad_norm": 24.394306182861328, "learning_rate": 9.97358630952381e-06, "loss": 24.8309, "step": 26114 }, { "epoch": 621.7880597014926, "grad_norm": 31.172273635864258, "learning_rate": 9.973214285714287e-06, "loss": 24.3962, "step": 26115 }, { "epoch": 621.8119402985075, "grad_norm": 28.983915328979492, "learning_rate": 9.972842261904763e-06, "loss": 24.5994, "step": 26116 }, { "epoch": 621.8358208955224, "grad_norm": 20.831514358520508, "learning_rate": 9.972470238095238e-06, "loss": 24.0557, "step": 26117 }, { "epoch": 621.8597014925373, "grad_norm": 31.9395809173584, "learning_rate": 9.972098214285716e-06, "loss": 25.2284, "step": 26118 }, { "epoch": 621.8835820895522, "grad_norm": 35.17112731933594, "learning_rate": 9.971726190476191e-06, "loss": 24.6708, "step": 26119 }, { "epoch": 621.9074626865672, "grad_norm": 23.15226936340332, "learning_rate": 9.971354166666668e-06, "loss": 25.647, "step": 26120 }, { "epoch": 621.9313432835821, "grad_norm": 35.16173553466797, "learning_rate": 9.970982142857144e-06, "loss": 26.0648, "step": 26121 }, { "epoch": 621.955223880597, "grad_norm": 33.65373229980469, "learning_rate": 9.970610119047621e-06, "loss": 25.544, "step": 26122 }, { "epoch": 621.9791044776119, "grad_norm": 21.786731719970703, "learning_rate": 9.970238095238096e-06, "loss": 24.8758, "step": 26123 }, { "epoch": 622.0, "grad_norm": 38.278690338134766, "learning_rate": 9.969866071428572e-06, "loss": 21.1523, "step": 26124 }, { "epoch": 622.0238805970149, "grad_norm": 28.062040328979492, "learning_rate": 9.969494047619049e-06, "loss": 26.255, "step": 26125 }, { "epoch": 622.0477611940298, "grad_norm": 41.07844543457031, "learning_rate": 9.969122023809525e-06, "loss": 24.494, "step": 26126 }, { "epoch": 622.0716417910447, "grad_norm": 30.691818237304688, "learning_rate": 9.96875e-06, "loss": 25.2049, "step": 26127 }, { "epoch": 622.0955223880597, "grad_norm": 31.57940673828125, "learning_rate": 9.968377976190477e-06, "loss": 24.4413, "step": 26128 }, { "epoch": 622.1194029850747, "grad_norm": 34.96574783325195, "learning_rate": 9.968005952380953e-06, "loss": 25.2057, "step": 26129 }, { "epoch": 622.1432835820896, "grad_norm": 25.324403762817383, "learning_rate": 9.96763392857143e-06, "loss": 23.9624, "step": 26130 }, { "epoch": 622.1671641791045, "grad_norm": 36.9755973815918, "learning_rate": 9.967261904761905e-06, "loss": 24.9352, "step": 26131 }, { "epoch": 622.1910447761194, "grad_norm": 32.03201675415039, "learning_rate": 9.966889880952381e-06, "loss": 24.9014, "step": 26132 }, { "epoch": 622.2149253731343, "grad_norm": 33.23040008544922, "learning_rate": 9.966517857142858e-06, "loss": 25.9594, "step": 26133 }, { "epoch": 622.2388059701492, "grad_norm": 35.23188781738281, "learning_rate": 9.966145833333334e-06, "loss": 24.951, "step": 26134 }, { "epoch": 622.2626865671642, "grad_norm": 28.250337600708008, "learning_rate": 9.965773809523809e-06, "loss": 25.8672, "step": 26135 }, { "epoch": 622.2865671641791, "grad_norm": 41.3726692199707, "learning_rate": 9.965401785714286e-06, "loss": 25.5118, "step": 26136 }, { "epoch": 622.310447761194, "grad_norm": 31.452219009399414, "learning_rate": 9.965029761904763e-06, "loss": 25.2419, "step": 26137 }, { "epoch": 622.334328358209, "grad_norm": 28.901996612548828, "learning_rate": 9.964657738095239e-06, "loss": 24.4828, "step": 26138 }, { "epoch": 622.3582089552239, "grad_norm": 42.492332458496094, "learning_rate": 9.964285714285714e-06, "loss": 24.8663, "step": 26139 }, { "epoch": 622.3820895522388, "grad_norm": 28.535715103149414, "learning_rate": 9.963913690476192e-06, "loss": 24.4247, "step": 26140 }, { "epoch": 622.4059701492537, "grad_norm": 37.35414505004883, "learning_rate": 9.963541666666667e-06, "loss": 24.2972, "step": 26141 }, { "epoch": 622.4298507462687, "grad_norm": 26.287925720214844, "learning_rate": 9.963169642857143e-06, "loss": 23.9414, "step": 26142 }, { "epoch": 622.4537313432836, "grad_norm": 39.0321044921875, "learning_rate": 9.96279761904762e-06, "loss": 24.5359, "step": 26143 }, { "epoch": 622.4776119402985, "grad_norm": 30.517288208007812, "learning_rate": 9.962425595238097e-06, "loss": 23.7032, "step": 26144 }, { "epoch": 622.5014925373134, "grad_norm": 30.642763137817383, "learning_rate": 9.962053571428573e-06, "loss": 24.8484, "step": 26145 }, { "epoch": 622.5253731343283, "grad_norm": 36.33723831176758, "learning_rate": 9.961681547619048e-06, "loss": 25.5902, "step": 26146 }, { "epoch": 622.5492537313432, "grad_norm": 23.820173263549805, "learning_rate": 9.961309523809525e-06, "loss": 25.0757, "step": 26147 }, { "epoch": 622.5731343283583, "grad_norm": 31.761919021606445, "learning_rate": 9.9609375e-06, "loss": 26.2149, "step": 26148 }, { "epoch": 622.5970149253732, "grad_norm": 27.708927154541016, "learning_rate": 9.960565476190476e-06, "loss": 25.2541, "step": 26149 }, { "epoch": 622.6208955223881, "grad_norm": 27.316608428955078, "learning_rate": 9.960193452380953e-06, "loss": 25.9627, "step": 26150 }, { "epoch": 622.644776119403, "grad_norm": 25.586172103881836, "learning_rate": 9.959821428571429e-06, "loss": 23.5837, "step": 26151 }, { "epoch": 622.6686567164179, "grad_norm": 28.683391571044922, "learning_rate": 9.959449404761904e-06, "loss": 24.4049, "step": 26152 }, { "epoch": 622.6925373134328, "grad_norm": 26.972593307495117, "learning_rate": 9.959077380952382e-06, "loss": 24.6797, "step": 26153 }, { "epoch": 622.7164179104477, "grad_norm": 23.700153350830078, "learning_rate": 9.958705357142859e-06, "loss": 24.6944, "step": 26154 }, { "epoch": 622.7402985074627, "grad_norm": 28.03346824645996, "learning_rate": 9.958333333333334e-06, "loss": 24.4762, "step": 26155 }, { "epoch": 622.7641791044776, "grad_norm": 27.623281478881836, "learning_rate": 9.95796130952381e-06, "loss": 25.0586, "step": 26156 }, { "epoch": 622.7880597014926, "grad_norm": 24.915176391601562, "learning_rate": 9.957589285714287e-06, "loss": 25.4393, "step": 26157 }, { "epoch": 622.8119402985075, "grad_norm": 24.086997985839844, "learning_rate": 9.957217261904762e-06, "loss": 24.6765, "step": 26158 }, { "epoch": 622.8358208955224, "grad_norm": 34.52686309814453, "learning_rate": 9.956845238095238e-06, "loss": 24.1498, "step": 26159 }, { "epoch": 622.8597014925373, "grad_norm": 24.30274200439453, "learning_rate": 9.956473214285715e-06, "loss": 24.585, "step": 26160 }, { "epoch": 622.8835820895522, "grad_norm": 30.797683715820312, "learning_rate": 9.956101190476192e-06, "loss": 24.0085, "step": 26161 }, { "epoch": 622.9074626865672, "grad_norm": 26.958532333374023, "learning_rate": 9.955729166666668e-06, "loss": 24.3729, "step": 26162 }, { "epoch": 622.9313432835821, "grad_norm": 30.99280548095703, "learning_rate": 9.955357142857143e-06, "loss": 25.2502, "step": 26163 }, { "epoch": 622.955223880597, "grad_norm": 24.40467643737793, "learning_rate": 9.95498511904762e-06, "loss": 24.8558, "step": 26164 }, { "epoch": 622.9791044776119, "grad_norm": 27.682308197021484, "learning_rate": 9.954613095238096e-06, "loss": 25.1101, "step": 26165 }, { "epoch": 623.0, "grad_norm": 21.96601104736328, "learning_rate": 9.954241071428571e-06, "loss": 21.3423, "step": 26166 }, { "epoch": 623.0238805970149, "grad_norm": 27.269691467285156, "learning_rate": 9.953869047619049e-06, "loss": 24.8075, "step": 26167 }, { "epoch": 623.0477611940298, "grad_norm": 24.425676345825195, "learning_rate": 9.953497023809524e-06, "loss": 24.5174, "step": 26168 }, { "epoch": 623.0716417910447, "grad_norm": 25.62434196472168, "learning_rate": 9.953125000000001e-06, "loss": 24.7581, "step": 26169 }, { "epoch": 623.0955223880597, "grad_norm": 25.065420150756836, "learning_rate": 9.952752976190477e-06, "loss": 24.7947, "step": 26170 }, { "epoch": 623.1194029850747, "grad_norm": 22.053775787353516, "learning_rate": 9.952380952380954e-06, "loss": 24.461, "step": 26171 }, { "epoch": 623.1432835820896, "grad_norm": 24.989675521850586, "learning_rate": 9.95200892857143e-06, "loss": 24.3799, "step": 26172 }, { "epoch": 623.1671641791045, "grad_norm": 24.917587280273438, "learning_rate": 9.951636904761905e-06, "loss": 24.5078, "step": 26173 }, { "epoch": 623.1910447761194, "grad_norm": 22.615922927856445, "learning_rate": 9.951264880952382e-06, "loss": 24.4051, "step": 26174 }, { "epoch": 623.2149253731343, "grad_norm": 25.998004913330078, "learning_rate": 9.950892857142858e-06, "loss": 24.966, "step": 26175 }, { "epoch": 623.2388059701492, "grad_norm": 23.970062255859375, "learning_rate": 9.950520833333333e-06, "loss": 25.095, "step": 26176 }, { "epoch": 623.2626865671642, "grad_norm": 27.032665252685547, "learning_rate": 9.95014880952381e-06, "loss": 25.3818, "step": 26177 }, { "epoch": 623.2865671641791, "grad_norm": 23.917381286621094, "learning_rate": 9.949776785714288e-06, "loss": 25.6007, "step": 26178 }, { "epoch": 623.310447761194, "grad_norm": 21.65546989440918, "learning_rate": 9.949404761904763e-06, "loss": 24.2326, "step": 26179 }, { "epoch": 623.334328358209, "grad_norm": 23.565580368041992, "learning_rate": 9.949032738095239e-06, "loss": 23.6481, "step": 26180 }, { "epoch": 623.3582089552239, "grad_norm": 25.22085189819336, "learning_rate": 9.948660714285716e-06, "loss": 26.2647, "step": 26181 }, { "epoch": 623.3820895522388, "grad_norm": 24.539743423461914, "learning_rate": 9.948288690476191e-06, "loss": 24.5061, "step": 26182 }, { "epoch": 623.4059701492537, "grad_norm": 24.893327713012695, "learning_rate": 9.947916666666667e-06, "loss": 25.2971, "step": 26183 }, { "epoch": 623.4298507462687, "grad_norm": 21.352903366088867, "learning_rate": 9.947544642857144e-06, "loss": 24.2108, "step": 26184 }, { "epoch": 623.4537313432836, "grad_norm": 23.817258834838867, "learning_rate": 9.947172619047621e-06, "loss": 24.9458, "step": 26185 }, { "epoch": 623.4776119402985, "grad_norm": 21.70363426208496, "learning_rate": 9.946800595238097e-06, "loss": 25.5279, "step": 26186 }, { "epoch": 623.5014925373134, "grad_norm": 26.546266555786133, "learning_rate": 9.946428571428572e-06, "loss": 25.6024, "step": 26187 }, { "epoch": 623.5253731343283, "grad_norm": 23.855884552001953, "learning_rate": 9.946056547619048e-06, "loss": 25.127, "step": 26188 }, { "epoch": 623.5492537313432, "grad_norm": 21.637657165527344, "learning_rate": 9.945684523809525e-06, "loss": 24.4339, "step": 26189 }, { "epoch": 623.5731343283583, "grad_norm": 27.520000457763672, "learning_rate": 9.9453125e-06, "loss": 25.1477, "step": 26190 }, { "epoch": 623.5970149253732, "grad_norm": 23.13272476196289, "learning_rate": 9.944940476190476e-06, "loss": 25.1561, "step": 26191 }, { "epoch": 623.6208955223881, "grad_norm": 23.93512535095215, "learning_rate": 9.944568452380953e-06, "loss": 25.4654, "step": 26192 }, { "epoch": 623.644776119403, "grad_norm": 20.5714054107666, "learning_rate": 9.94419642857143e-06, "loss": 23.7676, "step": 26193 }, { "epoch": 623.6686567164179, "grad_norm": 24.93540382385254, "learning_rate": 9.943824404761906e-06, "loss": 23.9326, "step": 26194 }, { "epoch": 623.6925373134328, "grad_norm": 23.0811710357666, "learning_rate": 9.943452380952381e-06, "loss": 24.4098, "step": 26195 }, { "epoch": 623.7164179104477, "grad_norm": 25.89657211303711, "learning_rate": 9.943080357142858e-06, "loss": 24.9272, "step": 26196 }, { "epoch": 623.7402985074627, "grad_norm": 27.41640853881836, "learning_rate": 9.942708333333334e-06, "loss": 25.6491, "step": 26197 }, { "epoch": 623.7641791044776, "grad_norm": 30.205127716064453, "learning_rate": 9.94233630952381e-06, "loss": 24.6456, "step": 26198 }, { "epoch": 623.7880597014926, "grad_norm": 26.06348991394043, "learning_rate": 9.941964285714287e-06, "loss": 24.7027, "step": 26199 }, { "epoch": 623.8119402985075, "grad_norm": 24.002147674560547, "learning_rate": 9.941592261904762e-06, "loss": 25.0488, "step": 26200 }, { "epoch": 623.8358208955224, "grad_norm": 22.69706153869629, "learning_rate": 9.94122023809524e-06, "loss": 24.7165, "step": 26201 }, { "epoch": 623.8597014925373, "grad_norm": 24.934734344482422, "learning_rate": 9.940848214285715e-06, "loss": 25.3127, "step": 26202 }, { "epoch": 623.8835820895522, "grad_norm": 27.45754623413086, "learning_rate": 9.940476190476192e-06, "loss": 24.9832, "step": 26203 }, { "epoch": 623.9074626865672, "grad_norm": 29.029735565185547, "learning_rate": 9.940104166666667e-06, "loss": 24.8286, "step": 26204 }, { "epoch": 623.9313432835821, "grad_norm": 20.424327850341797, "learning_rate": 9.939732142857143e-06, "loss": 25.373, "step": 26205 }, { "epoch": 623.955223880597, "grad_norm": 26.03881072998047, "learning_rate": 9.93936011904762e-06, "loss": 24.4542, "step": 26206 }, { "epoch": 623.9791044776119, "grad_norm": 32.24114990234375, "learning_rate": 9.938988095238096e-06, "loss": 24.6572, "step": 26207 }, { "epoch": 624.0, "grad_norm": 24.39130973815918, "learning_rate": 9.938616071428571e-06, "loss": 20.7967, "step": 26208 }, { "epoch": 624.0238805970149, "grad_norm": 23.160137176513672, "learning_rate": 9.938244047619048e-06, "loss": 24.208, "step": 26209 }, { "epoch": 624.0477611940298, "grad_norm": 24.81633949279785, "learning_rate": 9.937872023809525e-06, "loss": 23.6972, "step": 26210 }, { "epoch": 624.0716417910447, "grad_norm": 29.318784713745117, "learning_rate": 9.937500000000001e-06, "loss": 24.5352, "step": 26211 }, { "epoch": 624.0955223880597, "grad_norm": 25.646860122680664, "learning_rate": 9.937127976190476e-06, "loss": 24.1436, "step": 26212 }, { "epoch": 624.1194029850747, "grad_norm": 22.81551170349121, "learning_rate": 9.936755952380954e-06, "loss": 25.3824, "step": 26213 }, { "epoch": 624.1432835820896, "grad_norm": 25.782062530517578, "learning_rate": 9.93638392857143e-06, "loss": 25.5302, "step": 26214 }, { "epoch": 624.1671641791045, "grad_norm": 29.818784713745117, "learning_rate": 9.936011904761905e-06, "loss": 24.7878, "step": 26215 }, { "epoch": 624.1910447761194, "grad_norm": 30.66541290283203, "learning_rate": 9.935639880952382e-06, "loss": 24.537, "step": 26216 }, { "epoch": 624.2149253731343, "grad_norm": 25.2702693939209, "learning_rate": 9.935267857142859e-06, "loss": 25.6277, "step": 26217 }, { "epoch": 624.2388059701492, "grad_norm": 23.65883445739746, "learning_rate": 9.934895833333335e-06, "loss": 24.8512, "step": 26218 }, { "epoch": 624.2626865671642, "grad_norm": 24.66923713684082, "learning_rate": 9.93452380952381e-06, "loss": 24.0302, "step": 26219 }, { "epoch": 624.2865671641791, "grad_norm": 26.437881469726562, "learning_rate": 9.934151785714287e-06, "loss": 25.5298, "step": 26220 }, { "epoch": 624.310447761194, "grad_norm": 25.250146865844727, "learning_rate": 9.933779761904763e-06, "loss": 24.9936, "step": 26221 }, { "epoch": 624.334328358209, "grad_norm": 25.13810157775879, "learning_rate": 9.933407738095238e-06, "loss": 25.3266, "step": 26222 }, { "epoch": 624.3582089552239, "grad_norm": 25.23234748840332, "learning_rate": 9.933035714285715e-06, "loss": 25.1103, "step": 26223 }, { "epoch": 624.3820895522388, "grad_norm": 29.54981803894043, "learning_rate": 9.932663690476191e-06, "loss": 25.1696, "step": 26224 }, { "epoch": 624.4059701492537, "grad_norm": 25.61219024658203, "learning_rate": 9.932291666666668e-06, "loss": 24.9651, "step": 26225 }, { "epoch": 624.4298507462687, "grad_norm": 25.34833526611328, "learning_rate": 9.931919642857144e-06, "loss": 25.9805, "step": 26226 }, { "epoch": 624.4537313432836, "grad_norm": 21.056711196899414, "learning_rate": 9.93154761904762e-06, "loss": 25.1762, "step": 26227 }, { "epoch": 624.4776119402985, "grad_norm": 21.44442367553711, "learning_rate": 9.931175595238096e-06, "loss": 24.7618, "step": 26228 }, { "epoch": 624.5014925373134, "grad_norm": 27.038555145263672, "learning_rate": 9.930803571428572e-06, "loss": 24.2113, "step": 26229 }, { "epoch": 624.5253731343283, "grad_norm": 26.864185333251953, "learning_rate": 9.930431547619049e-06, "loss": 24.2102, "step": 26230 }, { "epoch": 624.5492537313432, "grad_norm": 29.11811065673828, "learning_rate": 9.930059523809524e-06, "loss": 25.2511, "step": 26231 }, { "epoch": 624.5731343283583, "grad_norm": 23.782861709594727, "learning_rate": 9.9296875e-06, "loss": 24.5572, "step": 26232 }, { "epoch": 624.5970149253732, "grad_norm": 24.423126220703125, "learning_rate": 9.929315476190477e-06, "loss": 24.2013, "step": 26233 }, { "epoch": 624.6208955223881, "grad_norm": 25.43351173400879, "learning_rate": 9.928943452380954e-06, "loss": 23.83, "step": 26234 }, { "epoch": 624.644776119403, "grad_norm": 26.8736572265625, "learning_rate": 9.92857142857143e-06, "loss": 24.3147, "step": 26235 }, { "epoch": 624.6686567164179, "grad_norm": 25.589479446411133, "learning_rate": 9.928199404761905e-06, "loss": 25.2411, "step": 26236 }, { "epoch": 624.6925373134328, "grad_norm": 24.361083984375, "learning_rate": 9.927827380952383e-06, "loss": 24.0134, "step": 26237 }, { "epoch": 624.7164179104477, "grad_norm": 26.538875579833984, "learning_rate": 9.927455357142858e-06, "loss": 24.0433, "step": 26238 }, { "epoch": 624.7402985074627, "grad_norm": 25.471370697021484, "learning_rate": 9.927083333333334e-06, "loss": 25.2658, "step": 26239 }, { "epoch": 624.7641791044776, "grad_norm": 29.073253631591797, "learning_rate": 9.92671130952381e-06, "loss": 25.7129, "step": 26240 }, { "epoch": 624.7880597014926, "grad_norm": 23.623449325561523, "learning_rate": 9.926339285714288e-06, "loss": 24.8227, "step": 26241 }, { "epoch": 624.8119402985075, "grad_norm": 29.939706802368164, "learning_rate": 9.925967261904763e-06, "loss": 25.9399, "step": 26242 }, { "epoch": 624.8358208955224, "grad_norm": 26.805702209472656, "learning_rate": 9.925595238095239e-06, "loss": 24.4864, "step": 26243 }, { "epoch": 624.8597014925373, "grad_norm": 29.85672950744629, "learning_rate": 9.925223214285716e-06, "loss": 24.6916, "step": 26244 }, { "epoch": 624.8835820895522, "grad_norm": 28.68791961669922, "learning_rate": 9.924851190476192e-06, "loss": 25.0446, "step": 26245 }, { "epoch": 624.9074626865672, "grad_norm": 24.394943237304688, "learning_rate": 9.924479166666667e-06, "loss": 24.3227, "step": 26246 }, { "epoch": 624.9313432835821, "grad_norm": 31.705686569213867, "learning_rate": 9.924107142857143e-06, "loss": 23.6384, "step": 26247 }, { "epoch": 624.955223880597, "grad_norm": 34.00606918334961, "learning_rate": 9.92373511904762e-06, "loss": 24.7858, "step": 26248 }, { "epoch": 624.9791044776119, "grad_norm": 24.961383819580078, "learning_rate": 9.923363095238097e-06, "loss": 25.3181, "step": 26249 }, { "epoch": 625.0, "grad_norm": 25.91278648376465, "learning_rate": 9.922991071428572e-06, "loss": 21.8728, "step": 26250 }, { "epoch": 625.0238805970149, "grad_norm": 28.382904052734375, "learning_rate": 9.922619047619048e-06, "loss": 23.037, "step": 26251 }, { "epoch": 625.0477611940298, "grad_norm": 23.964353561401367, "learning_rate": 9.922247023809525e-06, "loss": 25.3735, "step": 26252 }, { "epoch": 625.0716417910447, "grad_norm": 29.455690383911133, "learning_rate": 9.921875e-06, "loss": 25.0235, "step": 26253 }, { "epoch": 625.0955223880597, "grad_norm": 29.5518798828125, "learning_rate": 9.921502976190476e-06, "loss": 23.7772, "step": 26254 }, { "epoch": 625.1194029850747, "grad_norm": 28.207279205322266, "learning_rate": 9.921130952380953e-06, "loss": 24.4098, "step": 26255 }, { "epoch": 625.1432835820896, "grad_norm": 22.911775588989258, "learning_rate": 9.920758928571429e-06, "loss": 24.6499, "step": 26256 }, { "epoch": 625.1671641791045, "grad_norm": 28.369247436523438, "learning_rate": 9.920386904761904e-06, "loss": 25.17, "step": 26257 }, { "epoch": 625.1910447761194, "grad_norm": 25.387388229370117, "learning_rate": 9.920014880952381e-06, "loss": 25.857, "step": 26258 }, { "epoch": 625.2149253731343, "grad_norm": 25.431726455688477, "learning_rate": 9.919642857142859e-06, "loss": 25.028, "step": 26259 }, { "epoch": 625.2388059701492, "grad_norm": 22.9133243560791, "learning_rate": 9.919270833333334e-06, "loss": 24.3015, "step": 26260 }, { "epoch": 625.2626865671642, "grad_norm": 23.307003021240234, "learning_rate": 9.91889880952381e-06, "loss": 26.1238, "step": 26261 }, { "epoch": 625.2865671641791, "grad_norm": 23.48640251159668, "learning_rate": 9.918526785714287e-06, "loss": 24.5402, "step": 26262 }, { "epoch": 625.310447761194, "grad_norm": 21.947586059570312, "learning_rate": 9.918154761904762e-06, "loss": 24.253, "step": 26263 }, { "epoch": 625.334328358209, "grad_norm": 24.44500160217285, "learning_rate": 9.917782738095238e-06, "loss": 25.0272, "step": 26264 }, { "epoch": 625.3582089552239, "grad_norm": 20.736799240112305, "learning_rate": 9.917410714285715e-06, "loss": 25.4359, "step": 26265 }, { "epoch": 625.3820895522388, "grad_norm": 26.814987182617188, "learning_rate": 9.917038690476192e-06, "loss": 24.2251, "step": 26266 }, { "epoch": 625.4059701492537, "grad_norm": 25.870620727539062, "learning_rate": 9.916666666666668e-06, "loss": 24.4073, "step": 26267 }, { "epoch": 625.4298507462687, "grad_norm": 28.053050994873047, "learning_rate": 9.916294642857143e-06, "loss": 24.6775, "step": 26268 }, { "epoch": 625.4537313432836, "grad_norm": 23.044513702392578, "learning_rate": 9.91592261904762e-06, "loss": 24.0751, "step": 26269 }, { "epoch": 625.4776119402985, "grad_norm": 28.800580978393555, "learning_rate": 9.915550595238096e-06, "loss": 25.7556, "step": 26270 }, { "epoch": 625.5014925373134, "grad_norm": 33.92456817626953, "learning_rate": 9.915178571428571e-06, "loss": 24.4037, "step": 26271 }, { "epoch": 625.5253731343283, "grad_norm": 28.196022033691406, "learning_rate": 9.914806547619049e-06, "loss": 24.4607, "step": 26272 }, { "epoch": 625.5492537313432, "grad_norm": 21.123517990112305, "learning_rate": 9.914434523809524e-06, "loss": 24.2536, "step": 26273 }, { "epoch": 625.5731343283583, "grad_norm": 25.854751586914062, "learning_rate": 9.914062500000001e-06, "loss": 25.9238, "step": 26274 }, { "epoch": 625.5970149253732, "grad_norm": 31.504497528076172, "learning_rate": 9.913690476190477e-06, "loss": 25.211, "step": 26275 }, { "epoch": 625.6208955223881, "grad_norm": 26.072492599487305, "learning_rate": 9.913318452380954e-06, "loss": 23.8266, "step": 26276 }, { "epoch": 625.644776119403, "grad_norm": 21.04237937927246, "learning_rate": 9.91294642857143e-06, "loss": 23.6498, "step": 26277 }, { "epoch": 625.6686567164179, "grad_norm": 27.169776916503906, "learning_rate": 9.912574404761905e-06, "loss": 25.6683, "step": 26278 }, { "epoch": 625.6925373134328, "grad_norm": 38.49479293823242, "learning_rate": 9.912202380952382e-06, "loss": 25.0689, "step": 26279 }, { "epoch": 625.7164179104477, "grad_norm": 24.1158504486084, "learning_rate": 9.911830357142858e-06, "loss": 24.9777, "step": 26280 }, { "epoch": 625.7402985074627, "grad_norm": 36.262596130371094, "learning_rate": 9.911458333333333e-06, "loss": 24.9557, "step": 26281 }, { "epoch": 625.7641791044776, "grad_norm": 32.53903579711914, "learning_rate": 9.91108630952381e-06, "loss": 24.6404, "step": 26282 }, { "epoch": 625.7880597014926, "grad_norm": 25.171184539794922, "learning_rate": 9.910714285714288e-06, "loss": 24.3611, "step": 26283 }, { "epoch": 625.8119402985075, "grad_norm": 42.09287643432617, "learning_rate": 9.910342261904763e-06, "loss": 24.1288, "step": 26284 }, { "epoch": 625.8358208955224, "grad_norm": 28.79418182373047, "learning_rate": 9.909970238095238e-06, "loss": 24.6508, "step": 26285 }, { "epoch": 625.8597014925373, "grad_norm": 32.476776123046875, "learning_rate": 9.909598214285716e-06, "loss": 25.3883, "step": 26286 }, { "epoch": 625.8835820895522, "grad_norm": 41.55902862548828, "learning_rate": 9.909226190476191e-06, "loss": 23.6617, "step": 26287 }, { "epoch": 625.9074626865672, "grad_norm": 27.39820098876953, "learning_rate": 9.908854166666667e-06, "loss": 24.9353, "step": 26288 }, { "epoch": 625.9313432835821, "grad_norm": 54.63384246826172, "learning_rate": 9.908482142857144e-06, "loss": 26.0302, "step": 26289 }, { "epoch": 625.955223880597, "grad_norm": 35.04170608520508, "learning_rate": 9.908110119047621e-06, "loss": 25.2224, "step": 26290 }, { "epoch": 625.9791044776119, "grad_norm": 55.96820831298828, "learning_rate": 9.907738095238097e-06, "loss": 24.6894, "step": 26291 }, { "epoch": 626.0, "grad_norm": 38.488136291503906, "learning_rate": 9.907366071428572e-06, "loss": 22.1716, "step": 26292 }, { "epoch": 626.0238805970149, "grad_norm": 52.314605712890625, "learning_rate": 9.90699404761905e-06, "loss": 24.2805, "step": 26293 }, { "epoch": 626.0477611940298, "grad_norm": 43.419944763183594, "learning_rate": 9.906622023809525e-06, "loss": 25.1744, "step": 26294 }, { "epoch": 626.0716417910447, "grad_norm": 47.1335334777832, "learning_rate": 9.90625e-06, "loss": 25.7141, "step": 26295 }, { "epoch": 626.0955223880597, "grad_norm": 46.716278076171875, "learning_rate": 9.905877976190477e-06, "loss": 25.4719, "step": 26296 }, { "epoch": 626.1194029850747, "grad_norm": 48.573604583740234, "learning_rate": 9.905505952380953e-06, "loss": 25.3355, "step": 26297 }, { "epoch": 626.1432835820896, "grad_norm": 45.22209930419922, "learning_rate": 9.90513392857143e-06, "loss": 24.7212, "step": 26298 }, { "epoch": 626.1671641791045, "grad_norm": 48.6837272644043, "learning_rate": 9.904761904761906e-06, "loss": 24.1505, "step": 26299 }, { "epoch": 626.1910447761194, "grad_norm": 45.32852554321289, "learning_rate": 9.904389880952383e-06, "loss": 24.07, "step": 26300 }, { "epoch": 626.2149253731343, "grad_norm": 43.113746643066406, "learning_rate": 9.904017857142858e-06, "loss": 24.417, "step": 26301 }, { "epoch": 626.2388059701492, "grad_norm": 40.37260437011719, "learning_rate": 9.903645833333334e-06, "loss": 24.6242, "step": 26302 }, { "epoch": 626.2626865671642, "grad_norm": 48.70083999633789, "learning_rate": 9.90327380952381e-06, "loss": 24.6742, "step": 26303 }, { "epoch": 626.2865671641791, "grad_norm": 40.80581283569336, "learning_rate": 9.902901785714286e-06, "loss": 25.8223, "step": 26304 }, { "epoch": 626.310447761194, "grad_norm": 48.669490814208984, "learning_rate": 9.902529761904762e-06, "loss": 24.119, "step": 26305 }, { "epoch": 626.334328358209, "grad_norm": 44.2794075012207, "learning_rate": 9.90215773809524e-06, "loss": 24.4372, "step": 26306 }, { "epoch": 626.3582089552239, "grad_norm": 44.434791564941406, "learning_rate": 9.901785714285715e-06, "loss": 24.133, "step": 26307 }, { "epoch": 626.3820895522388, "grad_norm": 41.451534271240234, "learning_rate": 9.901413690476192e-06, "loss": 25.0111, "step": 26308 }, { "epoch": 626.4059701492537, "grad_norm": 47.300994873046875, "learning_rate": 9.901041666666667e-06, "loss": 25.0251, "step": 26309 }, { "epoch": 626.4298507462687, "grad_norm": 38.66123962402344, "learning_rate": 9.900669642857143e-06, "loss": 24.3928, "step": 26310 }, { "epoch": 626.4537313432836, "grad_norm": 46.88286209106445, "learning_rate": 9.90029761904762e-06, "loss": 24.7414, "step": 26311 }, { "epoch": 626.4776119402985, "grad_norm": 44.52467346191406, "learning_rate": 9.899925595238096e-06, "loss": 25.228, "step": 26312 }, { "epoch": 626.5014925373134, "grad_norm": 44.708309173583984, "learning_rate": 9.899553571428571e-06, "loss": 25.0445, "step": 26313 }, { "epoch": 626.5253731343283, "grad_norm": 43.34814453125, "learning_rate": 9.899181547619048e-06, "loss": 24.6711, "step": 26314 }, { "epoch": 626.5492537313432, "grad_norm": 42.328819274902344, "learning_rate": 9.898809523809525e-06, "loss": 24.8445, "step": 26315 }, { "epoch": 626.5731343283583, "grad_norm": 38.41672897338867, "learning_rate": 9.898437500000001e-06, "loss": 25.297, "step": 26316 }, { "epoch": 626.5970149253732, "grad_norm": NaN, "learning_rate": 9.898065476190476e-06, "loss": 29.261, "step": 26317 }, { "epoch": 626.6208955223881, "grad_norm": 45.19789123535156, "learning_rate": 9.898065476190476e-06, "loss": 25.6546, "step": 26318 }, { "epoch": 626.644776119403, "grad_norm": 37.06144714355469, "learning_rate": 9.897693452380954e-06, "loss": 24.5754, "step": 26319 }, { "epoch": 626.6686567164179, "grad_norm": 49.76111602783203, "learning_rate": 9.897321428571429e-06, "loss": 24.9053, "step": 26320 }, { "epoch": 626.6925373134328, "grad_norm": 38.915557861328125, "learning_rate": 9.896949404761905e-06, "loss": 24.0628, "step": 26321 }, { "epoch": 626.7164179104477, "grad_norm": 45.64610290527344, "learning_rate": 9.896577380952382e-06, "loss": 24.3142, "step": 26322 }, { "epoch": 626.7402985074627, "grad_norm": 43.72671127319336, "learning_rate": 9.896205357142859e-06, "loss": 25.045, "step": 26323 }, { "epoch": 626.7641791044776, "grad_norm": 43.71349334716797, "learning_rate": 9.895833333333334e-06, "loss": 23.9096, "step": 26324 }, { "epoch": 626.7880597014926, "grad_norm": 40.033546447753906, "learning_rate": 9.89546130952381e-06, "loss": 24.8217, "step": 26325 }, { "epoch": 626.8119402985075, "grad_norm": 44.5277214050293, "learning_rate": 9.895089285714287e-06, "loss": 24.0957, "step": 26326 }, { "epoch": 626.8358208955224, "grad_norm": 37.202178955078125, "learning_rate": 9.894717261904763e-06, "loss": 24.7057, "step": 26327 }, { "epoch": 626.8597014925373, "grad_norm": 45.44336700439453, "learning_rate": 9.894345238095238e-06, "loss": 24.6792, "step": 26328 }, { "epoch": 626.8835820895522, "grad_norm": 39.90263748168945, "learning_rate": 9.893973214285715e-06, "loss": 24.798, "step": 26329 }, { "epoch": 626.9074626865672, "grad_norm": 52.987457275390625, "learning_rate": 9.89360119047619e-06, "loss": 24.0877, "step": 26330 }, { "epoch": 626.9313432835821, "grad_norm": 53.64413070678711, "learning_rate": 9.893229166666668e-06, "loss": 25.0061, "step": 26331 }, { "epoch": 626.955223880597, "grad_norm": 31.701366424560547, "learning_rate": 9.892857142857143e-06, "loss": 24.6495, "step": 26332 }, { "epoch": 626.9791044776119, "grad_norm": NaN, "learning_rate": 9.89248511904762e-06, "loss": 28.9293, "step": 26333 }, { "epoch": 627.0, "grad_norm": 26.012569427490234, "learning_rate": 9.89248511904762e-06, "loss": 21.8265, "step": 26334 }, { "epoch": 627.0238805970149, "grad_norm": 31.096445083618164, "learning_rate": 9.892113095238096e-06, "loss": 24.4368, "step": 26335 }, { "epoch": 627.0477611940298, "grad_norm": 24.574275970458984, "learning_rate": 9.891741071428572e-06, "loss": 25.5098, "step": 26336 }, { "epoch": 627.0716417910447, "grad_norm": 31.449804306030273, "learning_rate": 9.891369047619049e-06, "loss": 25.1465, "step": 26337 }, { "epoch": 627.0955223880597, "grad_norm": 25.445524215698242, "learning_rate": 9.890997023809524e-06, "loss": 24.0813, "step": 26338 }, { "epoch": 627.1194029850747, "grad_norm": 31.196739196777344, "learning_rate": 9.890625e-06, "loss": 25.1208, "step": 26339 }, { "epoch": 627.1432835820896, "grad_norm": 22.18548011779785, "learning_rate": 9.890252976190477e-06, "loss": 26.0526, "step": 26340 }, { "epoch": 627.1671641791045, "grad_norm": 31.828235626220703, "learning_rate": 9.889880952380954e-06, "loss": 24.967, "step": 26341 }, { "epoch": 627.1910447761194, "grad_norm": 22.823535919189453, "learning_rate": 9.88950892857143e-06, "loss": 24.5114, "step": 26342 }, { "epoch": 627.2149253731343, "grad_norm": 33.83493423461914, "learning_rate": 9.889136904761905e-06, "loss": 24.376, "step": 26343 }, { "epoch": 627.2388059701492, "grad_norm": 24.617420196533203, "learning_rate": 9.888764880952382e-06, "loss": 24.5254, "step": 26344 }, { "epoch": 627.2626865671642, "grad_norm": 33.599395751953125, "learning_rate": 9.888392857142858e-06, "loss": 24.7164, "step": 26345 }, { "epoch": 627.2865671641791, "grad_norm": 27.620431900024414, "learning_rate": 9.888020833333333e-06, "loss": 23.6419, "step": 26346 }, { "epoch": 627.310447761194, "grad_norm": 28.967958450317383, "learning_rate": 9.88764880952381e-06, "loss": 24.6206, "step": 26347 }, { "epoch": 627.334328358209, "grad_norm": 26.579023361206055, "learning_rate": 9.887276785714288e-06, "loss": 24.8356, "step": 26348 }, { "epoch": 627.3582089552239, "grad_norm": 28.76621437072754, "learning_rate": 9.886904761904763e-06, "loss": 23.3118, "step": 26349 }, { "epoch": 627.3820895522388, "grad_norm": 26.175262451171875, "learning_rate": 9.886532738095239e-06, "loss": 25.0438, "step": 26350 }, { "epoch": 627.4059701492537, "grad_norm": 29.011743545532227, "learning_rate": 9.886160714285716e-06, "loss": 24.8605, "step": 26351 }, { "epoch": 627.4298507462687, "grad_norm": 26.052589416503906, "learning_rate": 9.885788690476191e-06, "loss": 25.3307, "step": 26352 }, { "epoch": 627.4537313432836, "grad_norm": 31.418912887573242, "learning_rate": 9.885416666666667e-06, "loss": 25.2397, "step": 26353 }, { "epoch": 627.4776119402985, "grad_norm": 28.675981521606445, "learning_rate": 9.885044642857144e-06, "loss": 24.7294, "step": 26354 }, { "epoch": 627.5014925373134, "grad_norm": 27.361602783203125, "learning_rate": 9.88467261904762e-06, "loss": 24.6308, "step": 26355 }, { "epoch": 627.5253731343283, "grad_norm": 24.882740020751953, "learning_rate": 9.884300595238097e-06, "loss": 24.6787, "step": 26356 }, { "epoch": 627.5492537313432, "grad_norm": 24.19281005859375, "learning_rate": 9.883928571428572e-06, "loss": 23.7113, "step": 26357 }, { "epoch": 627.5731343283583, "grad_norm": 24.612768173217773, "learning_rate": 9.88355654761905e-06, "loss": 24.4792, "step": 26358 }, { "epoch": 627.5970149253732, "grad_norm": 28.082002639770508, "learning_rate": 9.883184523809525e-06, "loss": 25.0392, "step": 26359 }, { "epoch": 627.6208955223881, "grad_norm": 26.898231506347656, "learning_rate": 9.8828125e-06, "loss": 24.3406, "step": 26360 }, { "epoch": 627.644776119403, "grad_norm": 28.95717430114746, "learning_rate": 9.882440476190478e-06, "loss": 24.6224, "step": 26361 }, { "epoch": 627.6686567164179, "grad_norm": 29.322729110717773, "learning_rate": 9.882068452380953e-06, "loss": 26.0338, "step": 26362 }, { "epoch": 627.6925373134328, "grad_norm": 23.786596298217773, "learning_rate": 9.881696428571429e-06, "loss": 23.1837, "step": 26363 }, { "epoch": 627.7164179104477, "grad_norm": 31.378787994384766, "learning_rate": 9.881324404761904e-06, "loss": 24.5854, "step": 26364 }, { "epoch": 627.7402985074627, "grad_norm": 30.12535285949707, "learning_rate": 9.880952380952381e-06, "loss": 25.2144, "step": 26365 }, { "epoch": 627.7641791044776, "grad_norm": 24.577856063842773, "learning_rate": 9.880580357142859e-06, "loss": 24.8971, "step": 26366 }, { "epoch": 627.7880597014926, "grad_norm": 27.22940444946289, "learning_rate": 9.880208333333334e-06, "loss": 24.5696, "step": 26367 }, { "epoch": 627.8119402985075, "grad_norm": 27.332473754882812, "learning_rate": 9.87983630952381e-06, "loss": 25.101, "step": 26368 }, { "epoch": 627.8358208955224, "grad_norm": 26.889965057373047, "learning_rate": 9.879464285714287e-06, "loss": 24.1095, "step": 26369 }, { "epoch": 627.8597014925373, "grad_norm": 24.91248321533203, "learning_rate": 9.879092261904762e-06, "loss": 24.8139, "step": 26370 }, { "epoch": 627.8835820895522, "grad_norm": 27.902786254882812, "learning_rate": 9.878720238095238e-06, "loss": 24.7726, "step": 26371 }, { "epoch": 627.9074626865672, "grad_norm": 24.272830963134766, "learning_rate": 9.878348214285715e-06, "loss": 25.426, "step": 26372 }, { "epoch": 627.9313432835821, "grad_norm": 26.145450592041016, "learning_rate": 9.877976190476192e-06, "loss": 24.6276, "step": 26373 }, { "epoch": 627.955223880597, "grad_norm": 25.821060180664062, "learning_rate": 9.877604166666668e-06, "loss": 24.9371, "step": 26374 }, { "epoch": 627.9791044776119, "grad_norm": 26.76931381225586, "learning_rate": 9.877232142857143e-06, "loss": 24.3045, "step": 26375 }, { "epoch": 628.0, "grad_norm": 23.519826889038086, "learning_rate": 9.87686011904762e-06, "loss": 21.663, "step": 26376 }, { "epoch": 628.0238805970149, "grad_norm": 25.898805618286133, "learning_rate": 9.876488095238096e-06, "loss": 24.7714, "step": 26377 }, { "epoch": 628.0477611940298, "grad_norm": 20.830474853515625, "learning_rate": 9.876116071428571e-06, "loss": 24.8285, "step": 26378 }, { "epoch": 628.0716417910447, "grad_norm": 27.327030181884766, "learning_rate": 9.875744047619048e-06, "loss": 23.6876, "step": 26379 }, { "epoch": 628.0955223880597, "grad_norm": 24.586181640625, "learning_rate": 9.875372023809524e-06, "loss": 25.7695, "step": 26380 }, { "epoch": 628.1194029850747, "grad_norm": 22.805221557617188, "learning_rate": 9.875000000000001e-06, "loss": 24.3214, "step": 26381 }, { "epoch": 628.1432835820896, "grad_norm": 24.490230560302734, "learning_rate": 9.874627976190477e-06, "loss": 25.2218, "step": 26382 }, { "epoch": 628.1671641791045, "grad_norm": 26.484508514404297, "learning_rate": 9.874255952380954e-06, "loss": 24.1054, "step": 26383 }, { "epoch": 628.1910447761194, "grad_norm": 26.139108657836914, "learning_rate": 9.87388392857143e-06, "loss": 24.2791, "step": 26384 }, { "epoch": 628.2149253731343, "grad_norm": 24.011751174926758, "learning_rate": 9.873511904761905e-06, "loss": 23.5438, "step": 26385 }, { "epoch": 628.2388059701492, "grad_norm": 30.714567184448242, "learning_rate": 9.873139880952382e-06, "loss": 25.2879, "step": 26386 }, { "epoch": 628.2626865671642, "grad_norm": 35.487640380859375, "learning_rate": 9.872767857142858e-06, "loss": 25.1002, "step": 26387 }, { "epoch": 628.2865671641791, "grad_norm": 22.74915313720703, "learning_rate": 9.872395833333333e-06, "loss": 25.4723, "step": 26388 }, { "epoch": 628.310447761194, "grad_norm": 32.621089935302734, "learning_rate": 9.87202380952381e-06, "loss": 23.4929, "step": 26389 }, { "epoch": 628.334328358209, "grad_norm": 33.53009033203125, "learning_rate": 9.871651785714287e-06, "loss": 25.5104, "step": 26390 }, { "epoch": 628.3582089552239, "grad_norm": 22.9473934173584, "learning_rate": 9.871279761904763e-06, "loss": 23.9531, "step": 26391 }, { "epoch": 628.3820895522388, "grad_norm": 30.4063777923584, "learning_rate": 9.870907738095238e-06, "loss": 24.6976, "step": 26392 }, { "epoch": 628.4059701492537, "grad_norm": 30.195960998535156, "learning_rate": 9.870535714285716e-06, "loss": 25.7822, "step": 26393 }, { "epoch": 628.4298507462687, "grad_norm": 23.066911697387695, "learning_rate": 9.870163690476191e-06, "loss": 23.9884, "step": 26394 }, { "epoch": 628.4537313432836, "grad_norm": 27.024877548217773, "learning_rate": 9.869791666666667e-06, "loss": 25.2832, "step": 26395 }, { "epoch": 628.4776119402985, "grad_norm": 35.23518753051758, "learning_rate": 9.869419642857144e-06, "loss": 24.9478, "step": 26396 }, { "epoch": 628.5014925373134, "grad_norm": 24.550039291381836, "learning_rate": 9.869047619047621e-06, "loss": 25.1045, "step": 26397 }, { "epoch": 628.5253731343283, "grad_norm": 26.605953216552734, "learning_rate": 9.868675595238096e-06, "loss": 24.5193, "step": 26398 }, { "epoch": 628.5492537313432, "grad_norm": 27.591476440429688, "learning_rate": 9.868303571428572e-06, "loss": 24.1728, "step": 26399 }, { "epoch": 628.5731343283583, "grad_norm": 26.01827049255371, "learning_rate": 9.867931547619049e-06, "loss": 25.3505, "step": 26400 }, { "epoch": 628.5970149253732, "grad_norm": 33.576759338378906, "learning_rate": 9.867559523809525e-06, "loss": 24.2564, "step": 26401 }, { "epoch": 628.6208955223881, "grad_norm": 26.83687973022461, "learning_rate": 9.8671875e-06, "loss": 23.7815, "step": 26402 }, { "epoch": 628.644776119403, "grad_norm": 36.65134048461914, "learning_rate": 9.866815476190477e-06, "loss": 24.9816, "step": 26403 }, { "epoch": 628.6686567164179, "grad_norm": 23.956750869750977, "learning_rate": 9.866443452380953e-06, "loss": 24.296, "step": 26404 }, { "epoch": 628.6925373134328, "grad_norm": 43.069580078125, "learning_rate": 9.86607142857143e-06, "loss": 25.0971, "step": 26405 }, { "epoch": 628.7164179104477, "grad_norm": 25.07878303527832, "learning_rate": 9.865699404761906e-06, "loss": 24.0747, "step": 26406 }, { "epoch": 628.7402985074627, "grad_norm": 36.92786407470703, "learning_rate": 9.865327380952383e-06, "loss": 25.3645, "step": 26407 }, { "epoch": 628.7641791044776, "grad_norm": 26.87118148803711, "learning_rate": 9.864955357142858e-06, "loss": 24.1947, "step": 26408 }, { "epoch": 628.7880597014926, "grad_norm": 29.392969131469727, "learning_rate": 9.864583333333334e-06, "loss": 24.3771, "step": 26409 }, { "epoch": 628.8119402985075, "grad_norm": 27.214941024780273, "learning_rate": 9.864211309523811e-06, "loss": 24.2583, "step": 26410 }, { "epoch": 628.8358208955224, "grad_norm": 27.848140716552734, "learning_rate": 9.863839285714286e-06, "loss": 24.3935, "step": 26411 }, { "epoch": 628.8597014925373, "grad_norm": 28.083253860473633, "learning_rate": 9.863467261904762e-06, "loss": 24.9061, "step": 26412 }, { "epoch": 628.8835820895522, "grad_norm": 31.001998901367188, "learning_rate": 9.863095238095239e-06, "loss": 25.377, "step": 26413 }, { "epoch": 628.9074626865672, "grad_norm": 31.080110549926758, "learning_rate": 9.862723214285716e-06, "loss": 24.2066, "step": 26414 }, { "epoch": 628.9313432835821, "grad_norm": 28.662025451660156, "learning_rate": 9.862351190476192e-06, "loss": 25.1315, "step": 26415 }, { "epoch": 628.955223880597, "grad_norm": 27.598478317260742, "learning_rate": 9.861979166666667e-06, "loss": 24.6269, "step": 26416 }, { "epoch": 628.9791044776119, "grad_norm": 31.439809799194336, "learning_rate": 9.861607142857144e-06, "loss": 25.2736, "step": 26417 }, { "epoch": 629.0, "grad_norm": 22.392900466918945, "learning_rate": 9.86123511904762e-06, "loss": 22.0993, "step": 26418 }, { "epoch": 629.0238805970149, "grad_norm": 31.41524314880371, "learning_rate": 9.860863095238095e-06, "loss": 24.9713, "step": 26419 }, { "epoch": 629.0477611940298, "grad_norm": 31.840463638305664, "learning_rate": 9.860491071428571e-06, "loss": 24.6524, "step": 26420 }, { "epoch": 629.0716417910447, "grad_norm": 23.808565139770508, "learning_rate": 9.860119047619048e-06, "loss": 24.408, "step": 26421 }, { "epoch": 629.0955223880597, "grad_norm": 28.091983795166016, "learning_rate": 9.859747023809525e-06, "loss": 23.7734, "step": 26422 }, { "epoch": 629.1194029850747, "grad_norm": 29.38776969909668, "learning_rate": 9.859375e-06, "loss": 25.2626, "step": 26423 }, { "epoch": 629.1432835820896, "grad_norm": 23.964481353759766, "learning_rate": 9.859002976190476e-06, "loss": 24.2101, "step": 26424 }, { "epoch": 629.1671641791045, "grad_norm": 21.922164916992188, "learning_rate": 9.858630952380953e-06, "loss": 24.3118, "step": 26425 }, { "epoch": 629.1910447761194, "grad_norm": 26.3013973236084, "learning_rate": 9.858258928571429e-06, "loss": 23.0549, "step": 26426 }, { "epoch": 629.2149253731343, "grad_norm": 23.664180755615234, "learning_rate": 9.857886904761904e-06, "loss": 25.1825, "step": 26427 }, { "epoch": 629.2388059701492, "grad_norm": NaN, "learning_rate": 9.857514880952382e-06, "loss": 38.8369, "step": 26428 }, { "epoch": 629.2626865671642, "grad_norm": 27.10687828063965, "learning_rate": 9.857514880952382e-06, "loss": 24.8752, "step": 26429 }, { "epoch": 629.2865671641791, "grad_norm": 21.014942169189453, "learning_rate": 9.857142857142859e-06, "loss": 25.428, "step": 26430 }, { "epoch": 629.310447761194, "grad_norm": 25.638525009155273, "learning_rate": 9.856770833333334e-06, "loss": 24.9805, "step": 26431 }, { "epoch": 629.334328358209, "grad_norm": 26.734024047851562, "learning_rate": 9.85639880952381e-06, "loss": 23.9427, "step": 26432 }, { "epoch": 629.3582089552239, "grad_norm": 24.08184814453125, "learning_rate": 9.856026785714287e-06, "loss": 23.622, "step": 26433 }, { "epoch": 629.3820895522388, "grad_norm": 23.911325454711914, "learning_rate": 9.855654761904763e-06, "loss": 24.4866, "step": 26434 }, { "epoch": 629.4059701492537, "grad_norm": 23.26405143737793, "learning_rate": 9.855282738095238e-06, "loss": 24.8037, "step": 26435 }, { "epoch": 629.4298507462687, "grad_norm": 23.327701568603516, "learning_rate": 9.854910714285715e-06, "loss": 24.5641, "step": 26436 }, { "epoch": 629.4537313432836, "grad_norm": 26.068674087524414, "learning_rate": 9.85453869047619e-06, "loss": 25.0691, "step": 26437 }, { "epoch": 629.4776119402985, "grad_norm": 27.973793029785156, "learning_rate": 9.854166666666668e-06, "loss": 24.4818, "step": 26438 }, { "epoch": 629.5014925373134, "grad_norm": 26.360668182373047, "learning_rate": 9.853794642857143e-06, "loss": 23.9077, "step": 26439 }, { "epoch": 629.5253731343283, "grad_norm": 21.998777389526367, "learning_rate": 9.85342261904762e-06, "loss": 24.5932, "step": 26440 }, { "epoch": 629.5492537313432, "grad_norm": 35.697792053222656, "learning_rate": 9.853050595238096e-06, "loss": 25.4514, "step": 26441 }, { "epoch": 629.5731343283583, "grad_norm": 28.95992660522461, "learning_rate": 9.852678571428572e-06, "loss": 24.6417, "step": 26442 }, { "epoch": 629.5970149253732, "grad_norm": 23.305116653442383, "learning_rate": 9.852306547619049e-06, "loss": 24.8534, "step": 26443 }, { "epoch": 629.6208955223881, "grad_norm": 30.633604049682617, "learning_rate": 9.851934523809524e-06, "loss": 25.2015, "step": 26444 }, { "epoch": 629.644776119403, "grad_norm": 24.97882080078125, "learning_rate": 9.8515625e-06, "loss": 24.0787, "step": 26445 }, { "epoch": 629.6686567164179, "grad_norm": 27.341264724731445, "learning_rate": 9.851190476190477e-06, "loss": 24.5162, "step": 26446 }, { "epoch": 629.6925373134328, "grad_norm": 33.41053009033203, "learning_rate": 9.850818452380954e-06, "loss": 24.676, "step": 26447 }, { "epoch": 629.7164179104477, "grad_norm": 27.160913467407227, "learning_rate": 9.85044642857143e-06, "loss": 25.1796, "step": 26448 }, { "epoch": 629.7402985074627, "grad_norm": 23.923627853393555, "learning_rate": 9.850074404761905e-06, "loss": 23.6267, "step": 26449 }, { "epoch": 629.7641791044776, "grad_norm": 24.031518936157227, "learning_rate": 9.849702380952382e-06, "loss": 24.8219, "step": 26450 }, { "epoch": 629.7880597014926, "grad_norm": 22.570480346679688, "learning_rate": 9.849330357142858e-06, "loss": 24.5633, "step": 26451 }, { "epoch": 629.8119402985075, "grad_norm": 28.253448486328125, "learning_rate": 9.848958333333333e-06, "loss": 26.395, "step": 26452 }, { "epoch": 629.8358208955224, "grad_norm": 21.554536819458008, "learning_rate": 9.84858630952381e-06, "loss": 24.2322, "step": 26453 }, { "epoch": 629.8597014925373, "grad_norm": 26.194549560546875, "learning_rate": 9.848214285714288e-06, "loss": 23.8677, "step": 26454 }, { "epoch": 629.8835820895522, "grad_norm": 26.529996871948242, "learning_rate": 9.847842261904763e-06, "loss": 25.4494, "step": 26455 }, { "epoch": 629.9074626865672, "grad_norm": 25.49509048461914, "learning_rate": 9.847470238095239e-06, "loss": 24.9582, "step": 26456 }, { "epoch": 629.9313432835821, "grad_norm": 28.625038146972656, "learning_rate": 9.847098214285716e-06, "loss": 24.8438, "step": 26457 }, { "epoch": 629.955223880597, "grad_norm": 24.68073844909668, "learning_rate": 9.846726190476191e-06, "loss": 24.4319, "step": 26458 }, { "epoch": 629.9791044776119, "grad_norm": 27.182823181152344, "learning_rate": 9.846354166666667e-06, "loss": 25.3679, "step": 26459 }, { "epoch": 630.0, "grad_norm": 28.905681610107422, "learning_rate": 9.845982142857144e-06, "loss": 22.5602, "step": 26460 }, { "epoch": 630.0238805970149, "grad_norm": 23.45807456970215, "learning_rate": 9.84561011904762e-06, "loss": 24.9118, "step": 26461 }, { "epoch": 630.0477611940298, "grad_norm": 23.83633804321289, "learning_rate": 9.845238095238097e-06, "loss": 24.3709, "step": 26462 }, { "epoch": 630.0716417910447, "grad_norm": 27.519569396972656, "learning_rate": 9.844866071428572e-06, "loss": 23.5474, "step": 26463 }, { "epoch": 630.0955223880597, "grad_norm": 27.721044540405273, "learning_rate": 9.84449404761905e-06, "loss": 25.0601, "step": 26464 }, { "epoch": 630.1194029850747, "grad_norm": 27.67167091369629, "learning_rate": 9.844122023809525e-06, "loss": 24.241, "step": 26465 }, { "epoch": 630.1432835820896, "grad_norm": 22.3553466796875, "learning_rate": 9.84375e-06, "loss": 25.0499, "step": 26466 }, { "epoch": 630.1671641791045, "grad_norm": 33.444278717041016, "learning_rate": 9.843377976190478e-06, "loss": 25.2318, "step": 26467 }, { "epoch": 630.1910447761194, "grad_norm": 25.638553619384766, "learning_rate": 9.843005952380953e-06, "loss": 24.6407, "step": 26468 }, { "epoch": 630.2149253731343, "grad_norm": 30.25693130493164, "learning_rate": 9.842633928571429e-06, "loss": 24.404, "step": 26469 }, { "epoch": 630.2388059701492, "grad_norm": 27.42254066467285, "learning_rate": 9.842261904761906e-06, "loss": 25.2573, "step": 26470 }, { "epoch": 630.2626865671642, "grad_norm": 29.580293655395508, "learning_rate": 9.841889880952383e-06, "loss": 24.7377, "step": 26471 }, { "epoch": 630.2865671641791, "grad_norm": 25.172914505004883, "learning_rate": 9.841517857142858e-06, "loss": 23.4843, "step": 26472 }, { "epoch": 630.310447761194, "grad_norm": 27.641674041748047, "learning_rate": 9.841145833333334e-06, "loss": 24.6391, "step": 26473 }, { "epoch": 630.334328358209, "grad_norm": 38.046058654785156, "learning_rate": 9.840773809523811e-06, "loss": 24.5949, "step": 26474 }, { "epoch": 630.3582089552239, "grad_norm": 23.581073760986328, "learning_rate": 9.840401785714287e-06, "loss": 24.9498, "step": 26475 }, { "epoch": 630.3820895522388, "grad_norm": 29.353961944580078, "learning_rate": 9.840029761904762e-06, "loss": 24.5778, "step": 26476 }, { "epoch": 630.4059701492537, "grad_norm": 33.42658996582031, "learning_rate": 9.83965773809524e-06, "loss": 24.4856, "step": 26477 }, { "epoch": 630.4298507462687, "grad_norm": 23.142004013061523, "learning_rate": 9.839285714285715e-06, "loss": 23.3785, "step": 26478 }, { "epoch": 630.4537313432836, "grad_norm": 26.48701286315918, "learning_rate": 9.838913690476192e-06, "loss": 24.1132, "step": 26479 }, { "epoch": 630.4776119402985, "grad_norm": 27.23042106628418, "learning_rate": 9.838541666666668e-06, "loss": 25.0327, "step": 26480 }, { "epoch": 630.5014925373134, "grad_norm": 28.40250587463379, "learning_rate": 9.838169642857143e-06, "loss": 25.3913, "step": 26481 }, { "epoch": 630.5253731343283, "grad_norm": 23.236806869506836, "learning_rate": 9.83779761904762e-06, "loss": 25.005, "step": 26482 }, { "epoch": 630.5492537313432, "grad_norm": 26.97603416442871, "learning_rate": 9.837425595238096e-06, "loss": 23.0592, "step": 26483 }, { "epoch": 630.5731343283583, "grad_norm": 34.8483772277832, "learning_rate": 9.837053571428571e-06, "loss": 24.1805, "step": 26484 }, { "epoch": 630.5970149253732, "grad_norm": 23.159347534179688, "learning_rate": 9.836681547619048e-06, "loss": 24.4235, "step": 26485 }, { "epoch": 630.6208955223881, "grad_norm": 25.63663673400879, "learning_rate": 9.836309523809524e-06, "loss": 24.8011, "step": 26486 }, { "epoch": 630.644776119403, "grad_norm": NaN, "learning_rate": 9.835937500000001e-06, "loss": 21.3201, "step": 26487 }, { "epoch": 630.6686567164179, "grad_norm": 29.063201904296875, "learning_rate": 9.835937500000001e-06, "loss": 24.7633, "step": 26488 }, { "epoch": 630.6925373134328, "grad_norm": 32.81318664550781, "learning_rate": 9.835565476190477e-06, "loss": 25.4424, "step": 26489 }, { "epoch": 630.7164179104477, "grad_norm": 24.659286499023438, "learning_rate": 9.835193452380954e-06, "loss": 24.7782, "step": 26490 }, { "epoch": 630.7402985074627, "grad_norm": 27.090320587158203, "learning_rate": 9.83482142857143e-06, "loss": 25.7436, "step": 26491 }, { "epoch": 630.7641791044776, "grad_norm": 32.61868667602539, "learning_rate": 9.834449404761905e-06, "loss": 24.763, "step": 26492 }, { "epoch": 630.7880597014926, "grad_norm": 26.582548141479492, "learning_rate": 9.834077380952382e-06, "loss": 24.8281, "step": 26493 }, { "epoch": 630.8119402985075, "grad_norm": 23.021286010742188, "learning_rate": 9.833705357142857e-06, "loss": 24.4077, "step": 26494 }, { "epoch": 630.8358208955224, "grad_norm": 28.4708194732666, "learning_rate": 9.833333333333333e-06, "loss": 24.8806, "step": 26495 }, { "epoch": 630.8597014925373, "grad_norm": 24.36382484436035, "learning_rate": 9.83296130952381e-06, "loss": 24.485, "step": 26496 }, { "epoch": 630.8835820895522, "grad_norm": 26.302494049072266, "learning_rate": 9.832589285714287e-06, "loss": 24.2215, "step": 26497 }, { "epoch": 630.9074626865672, "grad_norm": 24.67487144470215, "learning_rate": 9.832217261904763e-06, "loss": 24.755, "step": 26498 }, { "epoch": 630.9313432835821, "grad_norm": 24.746475219726562, "learning_rate": 9.831845238095238e-06, "loss": 24.7178, "step": 26499 }, { "epoch": 630.955223880597, "grad_norm": 24.380250930786133, "learning_rate": 9.831473214285715e-06, "loss": 24.934, "step": 26500 }, { "epoch": 630.9791044776119, "grad_norm": 23.954273223876953, "learning_rate": 9.831101190476191e-06, "loss": 24.5837, "step": 26501 }, { "epoch": 631.0, "grad_norm": 20.658815383911133, "learning_rate": 9.830729166666666e-06, "loss": 21.9483, "step": 26502 }, { "epoch": 631.0238805970149, "grad_norm": 26.089475631713867, "learning_rate": 9.830357142857144e-06, "loss": 25.0707, "step": 26503 }, { "epoch": 631.0477611940298, "grad_norm": 25.379066467285156, "learning_rate": 9.829985119047621e-06, "loss": 23.8517, "step": 26504 }, { "epoch": 631.0716417910447, "grad_norm": 24.036169052124023, "learning_rate": 9.829613095238096e-06, "loss": 24.1024, "step": 26505 }, { "epoch": 631.0955223880597, "grad_norm": 23.87770652770996, "learning_rate": 9.829241071428572e-06, "loss": 25.3663, "step": 26506 }, { "epoch": 631.1194029850747, "grad_norm": 23.43189239501953, "learning_rate": 9.828869047619049e-06, "loss": 24.7099, "step": 26507 }, { "epoch": 631.1432835820896, "grad_norm": 20.958942413330078, "learning_rate": 9.828497023809525e-06, "loss": 23.9343, "step": 26508 }, { "epoch": 631.1671641791045, "grad_norm": 25.732044219970703, "learning_rate": 9.828125e-06, "loss": 24.371, "step": 26509 }, { "epoch": 631.1910447761194, "grad_norm": 24.153810501098633, "learning_rate": 9.827752976190477e-06, "loss": 25.5039, "step": 26510 }, { "epoch": 631.2149253731343, "grad_norm": 31.15625762939453, "learning_rate": 9.827380952380953e-06, "loss": 24.0234, "step": 26511 }, { "epoch": 631.2388059701492, "grad_norm": 29.137760162353516, "learning_rate": 9.82700892857143e-06, "loss": 26.0788, "step": 26512 }, { "epoch": 631.2626865671642, "grad_norm": 25.35007667541504, "learning_rate": 9.826636904761905e-06, "loss": 24.8941, "step": 26513 }, { "epoch": 631.2865671641791, "grad_norm": 30.898296356201172, "learning_rate": 9.826264880952383e-06, "loss": 23.9466, "step": 26514 }, { "epoch": 631.310447761194, "grad_norm": 32.138248443603516, "learning_rate": 9.825892857142858e-06, "loss": 24.7388, "step": 26515 }, { "epoch": 631.334328358209, "grad_norm": 24.10903549194336, "learning_rate": 9.825520833333334e-06, "loss": 25.5685, "step": 26516 }, { "epoch": 631.3582089552239, "grad_norm": 27.12180519104004, "learning_rate": 9.82514880952381e-06, "loss": 24.6306, "step": 26517 }, { "epoch": 631.3820895522388, "grad_norm": 28.70182991027832, "learning_rate": 9.824776785714286e-06, "loss": 25.1962, "step": 26518 }, { "epoch": 631.4059701492537, "grad_norm": 27.004968643188477, "learning_rate": 9.824404761904762e-06, "loss": 24.2362, "step": 26519 }, { "epoch": 631.4298507462687, "grad_norm": 22.726083755493164, "learning_rate": 9.824032738095239e-06, "loss": 23.7693, "step": 26520 }, { "epoch": 631.4537313432836, "grad_norm": 24.0318603515625, "learning_rate": 9.823660714285716e-06, "loss": 25.1604, "step": 26521 }, { "epoch": 631.4776119402985, "grad_norm": 35.54570770263672, "learning_rate": 9.823288690476192e-06, "loss": 24.3076, "step": 26522 }, { "epoch": 631.5014925373134, "grad_norm": 28.46152687072754, "learning_rate": 9.822916666666667e-06, "loss": 24.4865, "step": 26523 }, { "epoch": 631.5253731343283, "grad_norm": 27.81413459777832, "learning_rate": 9.822544642857144e-06, "loss": 24.6478, "step": 26524 }, { "epoch": 631.5492537313432, "grad_norm": 24.406192779541016, "learning_rate": 9.82217261904762e-06, "loss": 23.4839, "step": 26525 }, { "epoch": 631.5731343283583, "grad_norm": 26.614959716796875, "learning_rate": 9.821800595238095e-06, "loss": 24.6842, "step": 26526 }, { "epoch": 631.5970149253732, "grad_norm": 23.143566131591797, "learning_rate": 9.821428571428573e-06, "loss": 23.7225, "step": 26527 }, { "epoch": 631.6208955223881, "grad_norm": 23.56361198425293, "learning_rate": 9.82105654761905e-06, "loss": 24.736, "step": 26528 }, { "epoch": 631.644776119403, "grad_norm": 24.092937469482422, "learning_rate": 9.820684523809525e-06, "loss": 25.5978, "step": 26529 }, { "epoch": 631.6686567164179, "grad_norm": 23.794906616210938, "learning_rate": 9.8203125e-06, "loss": 24.5074, "step": 26530 }, { "epoch": 631.6925373134328, "grad_norm": 28.41253662109375, "learning_rate": 9.819940476190478e-06, "loss": 24.5377, "step": 26531 }, { "epoch": 631.7164179104477, "grad_norm": 25.916873931884766, "learning_rate": 9.819568452380953e-06, "loss": 24.7462, "step": 26532 }, { "epoch": 631.7402985074627, "grad_norm": 22.260616302490234, "learning_rate": 9.819196428571429e-06, "loss": 24.7637, "step": 26533 }, { "epoch": 631.7641791044776, "grad_norm": 23.332265853881836, "learning_rate": 9.818824404761906e-06, "loss": 24.4965, "step": 26534 }, { "epoch": 631.7880597014926, "grad_norm": 24.538864135742188, "learning_rate": 9.818452380952382e-06, "loss": 24.9219, "step": 26535 }, { "epoch": 631.8119402985075, "grad_norm": 21.08180809020996, "learning_rate": 9.818080357142859e-06, "loss": 24.3212, "step": 26536 }, { "epoch": 631.8358208955224, "grad_norm": 21.909700393676758, "learning_rate": 9.817708333333334e-06, "loss": 23.8551, "step": 26537 }, { "epoch": 631.8597014925373, "grad_norm": 28.71913719177246, "learning_rate": 9.81733630952381e-06, "loss": 25.3731, "step": 26538 }, { "epoch": 631.8835820895522, "grad_norm": 25.43705940246582, "learning_rate": 9.816964285714287e-06, "loss": 24.0302, "step": 26539 }, { "epoch": 631.9074626865672, "grad_norm": 27.447799682617188, "learning_rate": 9.816592261904762e-06, "loss": 24.596, "step": 26540 }, { "epoch": 631.9313432835821, "grad_norm": 21.860702514648438, "learning_rate": 9.816220238095238e-06, "loss": 23.6185, "step": 26541 }, { "epoch": 631.955223880597, "grad_norm": 22.26009178161621, "learning_rate": 9.815848214285715e-06, "loss": 24.3025, "step": 26542 }, { "epoch": 631.9791044776119, "grad_norm": 24.331600189208984, "learning_rate": 9.81547619047619e-06, "loss": 25.4386, "step": 26543 }, { "epoch": 632.0, "grad_norm": 25.623231887817383, "learning_rate": 9.815104166666668e-06, "loss": 21.6997, "step": 26544 }, { "epoch": 632.0238805970149, "grad_norm": 23.076889038085938, "learning_rate": 9.814732142857143e-06, "loss": 24.1549, "step": 26545 }, { "epoch": 632.0477611940298, "grad_norm": NaN, "learning_rate": 9.81436011904762e-06, "loss": 42.9196, "step": 26546 }, { "epoch": 632.0716417910447, "grad_norm": 29.46230125427246, "learning_rate": 9.81436011904762e-06, "loss": 24.48, "step": 26547 }, { "epoch": 632.0955223880597, "grad_norm": 32.89728927612305, "learning_rate": 9.813988095238096e-06, "loss": 24.6366, "step": 26548 }, { "epoch": 632.1194029850747, "grad_norm": 26.74151039123535, "learning_rate": 9.813616071428571e-06, "loss": 24.1428, "step": 26549 }, { "epoch": 632.1432835820896, "grad_norm": 25.37149429321289, "learning_rate": 9.813244047619049e-06, "loss": 24.7496, "step": 26550 }, { "epoch": 632.1671641791045, "grad_norm": 34.9299201965332, "learning_rate": 9.812872023809524e-06, "loss": 24.1532, "step": 26551 }, { "epoch": 632.1910447761194, "grad_norm": 24.024446487426758, "learning_rate": 9.8125e-06, "loss": 23.8382, "step": 26552 }, { "epoch": 632.2149253731343, "grad_norm": 30.083049774169922, "learning_rate": 9.812127976190477e-06, "loss": 24.6757, "step": 26553 }, { "epoch": 632.2388059701492, "grad_norm": 35.108943939208984, "learning_rate": 9.811755952380954e-06, "loss": 25.3931, "step": 26554 }, { "epoch": 632.2626865671642, "grad_norm": 26.57042121887207, "learning_rate": 9.81138392857143e-06, "loss": 25.6866, "step": 26555 }, { "epoch": 632.2865671641791, "grad_norm": 28.703781127929688, "learning_rate": 9.811011904761905e-06, "loss": 24.1925, "step": 26556 }, { "epoch": 632.310447761194, "grad_norm": 32.58632278442383, "learning_rate": 9.810639880952382e-06, "loss": 24.9055, "step": 26557 }, { "epoch": 632.334328358209, "grad_norm": 23.426956176757812, "learning_rate": 9.810267857142858e-06, "loss": 25.0317, "step": 26558 }, { "epoch": 632.3582089552239, "grad_norm": 24.31167984008789, "learning_rate": 9.809895833333333e-06, "loss": 24.2893, "step": 26559 }, { "epoch": 632.3820895522388, "grad_norm": 30.857587814331055, "learning_rate": 9.80952380952381e-06, "loss": 23.8383, "step": 26560 }, { "epoch": 632.4059701492537, "grad_norm": 24.66522216796875, "learning_rate": 9.809151785714288e-06, "loss": 25.4805, "step": 26561 }, { "epoch": 632.4298507462687, "grad_norm": 23.978893280029297, "learning_rate": 9.808779761904763e-06, "loss": 24.7597, "step": 26562 }, { "epoch": 632.4537313432836, "grad_norm": 28.850318908691406, "learning_rate": 9.808407738095239e-06, "loss": 24.5864, "step": 26563 }, { "epoch": 632.4776119402985, "grad_norm": 29.15174674987793, "learning_rate": 9.808035714285716e-06, "loss": 24.7175, "step": 26564 }, { "epoch": 632.5014925373134, "grad_norm": 25.133121490478516, "learning_rate": 9.807663690476191e-06, "loss": 24.937, "step": 26565 }, { "epoch": 632.5253731343283, "grad_norm": 26.74774169921875, "learning_rate": 9.807291666666667e-06, "loss": 24.6663, "step": 26566 }, { "epoch": 632.5492537313432, "grad_norm": 23.924102783203125, "learning_rate": 9.806919642857144e-06, "loss": 24.3463, "step": 26567 }, { "epoch": 632.5731343283583, "grad_norm": 26.176528930664062, "learning_rate": 9.80654761904762e-06, "loss": 23.8901, "step": 26568 }, { "epoch": 632.5970149253732, "grad_norm": 21.71010971069336, "learning_rate": 9.806175595238097e-06, "loss": 24.9558, "step": 26569 }, { "epoch": 632.6208955223881, "grad_norm": 25.804967880249023, "learning_rate": 9.805803571428572e-06, "loss": 24.6187, "step": 26570 }, { "epoch": 632.644776119403, "grad_norm": 22.492341995239258, "learning_rate": 9.80543154761905e-06, "loss": 25.1681, "step": 26571 }, { "epoch": 632.6686567164179, "grad_norm": 29.911855697631836, "learning_rate": 9.805059523809525e-06, "loss": 24.256, "step": 26572 }, { "epoch": 632.6925373134328, "grad_norm": 25.859050750732422, "learning_rate": 9.8046875e-06, "loss": 24.5156, "step": 26573 }, { "epoch": 632.7164179104477, "grad_norm": 23.560871124267578, "learning_rate": 9.804315476190477e-06, "loss": 25.0364, "step": 26574 }, { "epoch": 632.7402985074627, "grad_norm": 28.436748504638672, "learning_rate": 9.803943452380953e-06, "loss": 24.9066, "step": 26575 }, { "epoch": 632.7641791044776, "grad_norm": 32.80308532714844, "learning_rate": 9.803571428571428e-06, "loss": 24.5116, "step": 26576 }, { "epoch": 632.7880597014926, "grad_norm": 25.503278732299805, "learning_rate": 9.803199404761906e-06, "loss": 23.8845, "step": 26577 }, { "epoch": 632.8119402985075, "grad_norm": 23.940357208251953, "learning_rate": 9.802827380952383e-06, "loss": 24.5755, "step": 26578 }, { "epoch": 632.8358208955224, "grad_norm": 23.751285552978516, "learning_rate": 9.802455357142858e-06, "loss": 24.3852, "step": 26579 }, { "epoch": 632.8597014925373, "grad_norm": 29.350662231445312, "learning_rate": 9.802083333333334e-06, "loss": 24.66, "step": 26580 }, { "epoch": 632.8835820895522, "grad_norm": 24.49724769592285, "learning_rate": 9.801711309523811e-06, "loss": 24.6462, "step": 26581 }, { "epoch": 632.9074626865672, "grad_norm": 29.604188919067383, "learning_rate": 9.801339285714287e-06, "loss": 24.1578, "step": 26582 }, { "epoch": 632.9313432835821, "grad_norm": 26.694780349731445, "learning_rate": 9.800967261904762e-06, "loss": 24.2829, "step": 26583 }, { "epoch": 632.955223880597, "grad_norm": 27.14252471923828, "learning_rate": 9.80059523809524e-06, "loss": 24.6327, "step": 26584 }, { "epoch": 632.9791044776119, "grad_norm": 23.4996395111084, "learning_rate": 9.800223214285715e-06, "loss": 23.5861, "step": 26585 }, { "epoch": 633.0, "grad_norm": 23.14702606201172, "learning_rate": 9.799851190476192e-06, "loss": 21.3571, "step": 26586 }, { "epoch": 633.0238805970149, "grad_norm": 24.145986557006836, "learning_rate": 9.799479166666667e-06, "loss": 24.4647, "step": 26587 }, { "epoch": 633.0477611940298, "grad_norm": 31.26276206970215, "learning_rate": 9.799107142857145e-06, "loss": 24.9372, "step": 26588 }, { "epoch": 633.0716417910447, "grad_norm": 24.713775634765625, "learning_rate": 9.79873511904762e-06, "loss": 23.8556, "step": 26589 }, { "epoch": 633.0955223880597, "grad_norm": 26.147598266601562, "learning_rate": 9.798363095238096e-06, "loss": 24.551, "step": 26590 }, { "epoch": 633.1194029850747, "grad_norm": 30.35661506652832, "learning_rate": 9.797991071428573e-06, "loss": 24.0108, "step": 26591 }, { "epoch": 633.1432835820896, "grad_norm": 29.79447364807129, "learning_rate": 9.797619047619048e-06, "loss": 25.6783, "step": 26592 }, { "epoch": 633.1671641791045, "grad_norm": 22.812028884887695, "learning_rate": 9.797247023809524e-06, "loss": 24.9052, "step": 26593 }, { "epoch": 633.1910447761194, "grad_norm": 22.72399139404297, "learning_rate": 9.796875000000001e-06, "loss": 24.3247, "step": 26594 }, { "epoch": 633.2149253731343, "grad_norm": 23.527502059936523, "learning_rate": 9.796502976190476e-06, "loss": 24.8575, "step": 26595 }, { "epoch": 633.2388059701492, "grad_norm": 22.425312042236328, "learning_rate": 9.796130952380954e-06, "loss": 24.7808, "step": 26596 }, { "epoch": 633.2626865671642, "grad_norm": 26.117782592773438, "learning_rate": 9.795758928571429e-06, "loss": 24.4552, "step": 26597 }, { "epoch": 633.2865671641791, "grad_norm": 21.831253051757812, "learning_rate": 9.795386904761905e-06, "loss": 24.7289, "step": 26598 }, { "epoch": 633.310447761194, "grad_norm": 24.17723846435547, "learning_rate": 9.795014880952382e-06, "loss": 23.8469, "step": 26599 }, { "epoch": 633.334328358209, "grad_norm": 23.592124938964844, "learning_rate": 9.794642857142857e-06, "loss": 24.5166, "step": 26600 }, { "epoch": 633.3582089552239, "grad_norm": 26.86852264404297, "learning_rate": 9.794270833333333e-06, "loss": 23.7471, "step": 26601 }, { "epoch": 633.3820895522388, "grad_norm": 24.938283920288086, "learning_rate": 9.79389880952381e-06, "loss": 25.2131, "step": 26602 }, { "epoch": 633.4059701492537, "grad_norm": 23.635848999023438, "learning_rate": 9.793526785714287e-06, "loss": 24.0811, "step": 26603 }, { "epoch": 633.4298507462687, "grad_norm": 25.18374252319336, "learning_rate": 9.793154761904763e-06, "loss": 24.7186, "step": 26604 }, { "epoch": 633.4537313432836, "grad_norm": 30.426042556762695, "learning_rate": 9.792782738095238e-06, "loss": 24.4027, "step": 26605 }, { "epoch": 633.4776119402985, "grad_norm": 23.14556121826172, "learning_rate": 9.792410714285715e-06, "loss": 22.9134, "step": 26606 }, { "epoch": 633.5014925373134, "grad_norm": 23.88039779663086, "learning_rate": 9.792038690476191e-06, "loss": 25.1095, "step": 26607 }, { "epoch": 633.5253731343283, "grad_norm": 24.98709487915039, "learning_rate": 9.791666666666666e-06, "loss": 24.2889, "step": 26608 }, { "epoch": 633.5492537313432, "grad_norm": 24.319272994995117, "learning_rate": 9.791294642857144e-06, "loss": 24.2507, "step": 26609 }, { "epoch": 633.5731343283583, "grad_norm": 22.63819694519043, "learning_rate": 9.79092261904762e-06, "loss": 24.1076, "step": 26610 }, { "epoch": 633.5970149253732, "grad_norm": 24.399860382080078, "learning_rate": 9.790550595238096e-06, "loss": 23.9693, "step": 26611 }, { "epoch": 633.6208955223881, "grad_norm": 20.688108444213867, "learning_rate": 9.790178571428572e-06, "loss": 24.8365, "step": 26612 }, { "epoch": 633.644776119403, "grad_norm": 24.122526168823242, "learning_rate": 9.789806547619049e-06, "loss": 25.4516, "step": 26613 }, { "epoch": 633.6686567164179, "grad_norm": 28.114620208740234, "learning_rate": 9.789434523809524e-06, "loss": 24.3341, "step": 26614 }, { "epoch": 633.6925373134328, "grad_norm": 24.988117218017578, "learning_rate": 9.7890625e-06, "loss": 25.1872, "step": 26615 }, { "epoch": 633.7164179104477, "grad_norm": 21.917449951171875, "learning_rate": 9.788690476190477e-06, "loss": 24.3513, "step": 26616 }, { "epoch": 633.7402985074627, "grad_norm": 28.205787658691406, "learning_rate": 9.788318452380953e-06, "loss": 24.3958, "step": 26617 }, { "epoch": 633.7641791044776, "grad_norm": 26.902259826660156, "learning_rate": 9.78794642857143e-06, "loss": 24.8609, "step": 26618 }, { "epoch": 633.7880597014926, "grad_norm": 28.6626033782959, "learning_rate": 9.787574404761905e-06, "loss": 24.4776, "step": 26619 }, { "epoch": 633.8119402985075, "grad_norm": 24.58899688720703, "learning_rate": 9.787202380952382e-06, "loss": 25.1811, "step": 26620 }, { "epoch": 633.8358208955224, "grad_norm": 22.43996810913086, "learning_rate": 9.786830357142858e-06, "loss": 24.5605, "step": 26621 }, { "epoch": 633.8597014925373, "grad_norm": 32.93098068237305, "learning_rate": 9.786458333333333e-06, "loss": 25.5278, "step": 26622 }, { "epoch": 633.8835820895522, "grad_norm": 29.73679542541504, "learning_rate": 9.78608630952381e-06, "loss": 24.7403, "step": 26623 }, { "epoch": 633.9074626865672, "grad_norm": 21.915843963623047, "learning_rate": 9.785714285714286e-06, "loss": 24.5504, "step": 26624 }, { "epoch": 633.9313432835821, "grad_norm": 24.776214599609375, "learning_rate": 9.785342261904762e-06, "loss": 24.6751, "step": 26625 }, { "epoch": 633.955223880597, "grad_norm": 30.478174209594727, "learning_rate": 9.784970238095239e-06, "loss": 24.4375, "step": 26626 }, { "epoch": 633.9791044776119, "grad_norm": 24.889284133911133, "learning_rate": 9.784598214285716e-06, "loss": 23.7654, "step": 26627 }, { "epoch": 634.0, "grad_norm": 22.991058349609375, "learning_rate": 9.784226190476192e-06, "loss": 20.9881, "step": 26628 }, { "epoch": 634.0238805970149, "grad_norm": 21.61377716064453, "learning_rate": 9.783854166666667e-06, "loss": 24.2521, "step": 26629 }, { "epoch": 634.0477611940298, "grad_norm": 26.499591827392578, "learning_rate": 9.783482142857144e-06, "loss": 24.4748, "step": 26630 }, { "epoch": 634.0716417910447, "grad_norm": 27.13996124267578, "learning_rate": 9.78311011904762e-06, "loss": 24.5581, "step": 26631 }, { "epoch": 634.0955223880597, "grad_norm": 25.5087833404541, "learning_rate": 9.782738095238095e-06, "loss": 24.7112, "step": 26632 }, { "epoch": 634.1194029850747, "grad_norm": 25.2069034576416, "learning_rate": 9.782366071428572e-06, "loss": 24.168, "step": 26633 }, { "epoch": 634.1432835820896, "grad_norm": 21.098857879638672, "learning_rate": 9.78199404761905e-06, "loss": 25.5098, "step": 26634 }, { "epoch": 634.1671641791045, "grad_norm": 26.832904815673828, "learning_rate": 9.781622023809525e-06, "loss": 24.8213, "step": 26635 }, { "epoch": 634.1910447761194, "grad_norm": 24.602773666381836, "learning_rate": 9.78125e-06, "loss": 24.2892, "step": 26636 }, { "epoch": 634.2149253731343, "grad_norm": 28.634923934936523, "learning_rate": 9.780877976190478e-06, "loss": 24.3915, "step": 26637 }, { "epoch": 634.2388059701492, "grad_norm": 27.540084838867188, "learning_rate": 9.780505952380953e-06, "loss": 24.4716, "step": 26638 }, { "epoch": 634.2626865671642, "grad_norm": 28.301795959472656, "learning_rate": 9.780133928571429e-06, "loss": 25.2487, "step": 26639 }, { "epoch": 634.2865671641791, "grad_norm": 21.996583938598633, "learning_rate": 9.779761904761906e-06, "loss": 24.5379, "step": 26640 }, { "epoch": 634.310447761194, "grad_norm": 24.413022994995117, "learning_rate": 9.779389880952381e-06, "loss": 24.3835, "step": 26641 }, { "epoch": 634.334328358209, "grad_norm": 26.04973030090332, "learning_rate": 9.779017857142859e-06, "loss": 24.3166, "step": 26642 }, { "epoch": 634.3582089552239, "grad_norm": 28.560104370117188, "learning_rate": 9.778645833333334e-06, "loss": 24.0727, "step": 26643 }, { "epoch": 634.3820895522388, "grad_norm": NaN, "learning_rate": 9.778273809523811e-06, "loss": 30.2105, "step": 26644 }, { "epoch": 634.4059701492537, "grad_norm": 23.567365646362305, "learning_rate": 9.778273809523811e-06, "loss": 24.976, "step": 26645 }, { "epoch": 634.4298507462687, "grad_norm": 25.748470306396484, "learning_rate": 9.777901785714287e-06, "loss": 25.0014, "step": 26646 }, { "epoch": 634.4537313432836, "grad_norm": 28.938861846923828, "learning_rate": 9.777529761904762e-06, "loss": 23.2731, "step": 26647 }, { "epoch": 634.4776119402985, "grad_norm": 26.58571434020996, "learning_rate": 9.77715773809524e-06, "loss": 24.6666, "step": 26648 }, { "epoch": 634.5014925373134, "grad_norm": 22.75620460510254, "learning_rate": 9.776785714285715e-06, "loss": 24.7515, "step": 26649 }, { "epoch": 634.5253731343283, "grad_norm": 32.92923355102539, "learning_rate": 9.77641369047619e-06, "loss": 24.1892, "step": 26650 }, { "epoch": 634.5492537313432, "grad_norm": 28.327030181884766, "learning_rate": 9.776041666666668e-06, "loss": 24.2879, "step": 26651 }, { "epoch": 634.5731343283583, "grad_norm": 23.865522384643555, "learning_rate": 9.775669642857145e-06, "loss": 24.1462, "step": 26652 }, { "epoch": 634.5970149253732, "grad_norm": 30.8858585357666, "learning_rate": 9.77529761904762e-06, "loss": 24.7356, "step": 26653 }, { "epoch": 634.6208955223881, "grad_norm": 34.65692138671875, "learning_rate": 9.774925595238096e-06, "loss": 24.4764, "step": 26654 }, { "epoch": 634.644776119403, "grad_norm": 22.017671585083008, "learning_rate": 9.774553571428571e-06, "loss": 24.898, "step": 26655 }, { "epoch": 634.6686567164179, "grad_norm": 28.72623062133789, "learning_rate": 9.774181547619049e-06, "loss": 23.8041, "step": 26656 }, { "epoch": 634.6925373134328, "grad_norm": 35.10506820678711, "learning_rate": 9.773809523809524e-06, "loss": 24.2199, "step": 26657 }, { "epoch": 634.7164179104477, "grad_norm": 23.961441040039062, "learning_rate": 9.7734375e-06, "loss": 24.0235, "step": 26658 }, { "epoch": 634.7402985074627, "grad_norm": 23.720813751220703, "learning_rate": 9.773065476190477e-06, "loss": 24.2843, "step": 26659 }, { "epoch": 634.7641791044776, "grad_norm": 25.083972930908203, "learning_rate": 9.772693452380954e-06, "loss": 24.2116, "step": 26660 }, { "epoch": 634.7880597014926, "grad_norm": 29.39017105102539, "learning_rate": 9.77232142857143e-06, "loss": 24.4133, "step": 26661 }, { "epoch": 634.8119402985075, "grad_norm": 26.65843391418457, "learning_rate": 9.771949404761905e-06, "loss": 24.9729, "step": 26662 }, { "epoch": 634.8358208955224, "grad_norm": 23.767152786254883, "learning_rate": 9.771577380952382e-06, "loss": 24.1386, "step": 26663 }, { "epoch": 634.8597014925373, "grad_norm": 22.073551177978516, "learning_rate": 9.771205357142858e-06, "loss": 25.1776, "step": 26664 }, { "epoch": 634.8835820895522, "grad_norm": 23.853620529174805, "learning_rate": 9.770833333333333e-06, "loss": 24.7433, "step": 26665 }, { "epoch": 634.9074626865672, "grad_norm": 22.086177825927734, "learning_rate": 9.77046130952381e-06, "loss": 24.086, "step": 26666 }, { "epoch": 634.9313432835821, "grad_norm": 28.894014358520508, "learning_rate": 9.770089285714287e-06, "loss": 24.6406, "step": 26667 }, { "epoch": 634.955223880597, "grad_norm": 32.68827438354492, "learning_rate": 9.769717261904763e-06, "loss": 25.233, "step": 26668 }, { "epoch": 634.9791044776119, "grad_norm": 25.308706283569336, "learning_rate": 9.769345238095238e-06, "loss": 24.9022, "step": 26669 }, { "epoch": 635.0, "grad_norm": 21.311107635498047, "learning_rate": 9.768973214285716e-06, "loss": 21.4711, "step": 26670 }, { "epoch": 635.0238805970149, "grad_norm": 25.36104393005371, "learning_rate": 9.768601190476191e-06, "loss": 25.0665, "step": 26671 }, { "epoch": 635.0477611940298, "grad_norm": 28.775667190551758, "learning_rate": 9.768229166666667e-06, "loss": 24.8703, "step": 26672 }, { "epoch": 635.0716417910447, "grad_norm": 21.873777389526367, "learning_rate": 9.767857142857144e-06, "loss": 23.784, "step": 26673 }, { "epoch": 635.0955223880597, "grad_norm": 25.38401222229004, "learning_rate": 9.76748511904762e-06, "loss": 24.3457, "step": 26674 }, { "epoch": 635.1194029850747, "grad_norm": 33.230892181396484, "learning_rate": 9.767113095238097e-06, "loss": 24.6772, "step": 26675 }, { "epoch": 635.1432835820896, "grad_norm": 25.493534088134766, "learning_rate": 9.766741071428572e-06, "loss": 24.1062, "step": 26676 }, { "epoch": 635.1671641791045, "grad_norm": 22.10563087463379, "learning_rate": 9.76636904761905e-06, "loss": 23.8775, "step": 26677 }, { "epoch": 635.1910447761194, "grad_norm": 25.386348724365234, "learning_rate": 9.765997023809525e-06, "loss": 24.5588, "step": 26678 }, { "epoch": 635.2149253731343, "grad_norm": 30.902835845947266, "learning_rate": 9.765625e-06, "loss": 24.0422, "step": 26679 }, { "epoch": 635.2388059701492, "grad_norm": 26.98641014099121, "learning_rate": 9.765252976190477e-06, "loss": 24.3455, "step": 26680 }, { "epoch": 635.2626865671642, "grad_norm": 25.595069885253906, "learning_rate": 9.764880952380953e-06, "loss": 24.8886, "step": 26681 }, { "epoch": 635.2865671641791, "grad_norm": 36.632659912109375, "learning_rate": 9.764508928571428e-06, "loss": 24.5854, "step": 26682 }, { "epoch": 635.310447761194, "grad_norm": 25.73783302307129, "learning_rate": 9.764136904761906e-06, "loss": 22.7445, "step": 26683 }, { "epoch": 635.334328358209, "grad_norm": 23.876745223999023, "learning_rate": 9.763764880952383e-06, "loss": 24.5032, "step": 26684 }, { "epoch": 635.3582089552239, "grad_norm": 40.27981185913086, "learning_rate": 9.763392857142858e-06, "loss": 24.6458, "step": 26685 }, { "epoch": 635.3820895522388, "grad_norm": 25.388874053955078, "learning_rate": 9.763020833333334e-06, "loss": 24.5371, "step": 26686 }, { "epoch": 635.4059701492537, "grad_norm": 31.079683303833008, "learning_rate": 9.762648809523811e-06, "loss": 24.7579, "step": 26687 }, { "epoch": 635.4298507462687, "grad_norm": 34.017032623291016, "learning_rate": 9.762276785714286e-06, "loss": 24.7578, "step": 26688 }, { "epoch": 635.4537313432836, "grad_norm": 23.168676376342773, "learning_rate": 9.761904761904762e-06, "loss": 23.945, "step": 26689 }, { "epoch": 635.4776119402985, "grad_norm": 33.181541442871094, "learning_rate": 9.761532738095239e-06, "loss": 24.2583, "step": 26690 }, { "epoch": 635.5014925373134, "grad_norm": 29.131032943725586, "learning_rate": 9.761160714285715e-06, "loss": 24.4825, "step": 26691 }, { "epoch": 635.5253731343283, "grad_norm": 27.92920684814453, "learning_rate": 9.760788690476192e-06, "loss": 23.8616, "step": 26692 }, { "epoch": 635.5492537313432, "grad_norm": 37.59626007080078, "learning_rate": 9.760416666666667e-06, "loss": 25.5866, "step": 26693 }, { "epoch": 635.5731343283583, "grad_norm": 29.873828887939453, "learning_rate": 9.760044642857144e-06, "loss": 24.3352, "step": 26694 }, { "epoch": 635.5970149253732, "grad_norm": 28.565269470214844, "learning_rate": 9.75967261904762e-06, "loss": 24.6677, "step": 26695 }, { "epoch": 635.6208955223881, "grad_norm": 28.92363166809082, "learning_rate": 9.759300595238095e-06, "loss": 25.5012, "step": 26696 }, { "epoch": 635.644776119403, "grad_norm": 29.2810115814209, "learning_rate": 9.758928571428573e-06, "loss": 24.9747, "step": 26697 }, { "epoch": 635.6686567164179, "grad_norm": 22.388505935668945, "learning_rate": 9.758556547619048e-06, "loss": 24.0698, "step": 26698 }, { "epoch": 635.6925373134328, "grad_norm": 31.790252685546875, "learning_rate": 9.758184523809524e-06, "loss": 24.2371, "step": 26699 }, { "epoch": 635.7164179104477, "grad_norm": 27.50443458557129, "learning_rate": 9.757812500000001e-06, "loss": 24.6255, "step": 26700 }, { "epoch": 635.7402985074627, "grad_norm": 23.417757034301758, "learning_rate": 9.757440476190478e-06, "loss": 24.6437, "step": 26701 }, { "epoch": 635.7641791044776, "grad_norm": 30.429866790771484, "learning_rate": 9.757068452380954e-06, "loss": 23.756, "step": 26702 }, { "epoch": 635.7880597014926, "grad_norm": 30.873659133911133, "learning_rate": 9.756696428571429e-06, "loss": 23.9502, "step": 26703 }, { "epoch": 635.8119402985075, "grad_norm": 24.58136749267578, "learning_rate": 9.756324404761906e-06, "loss": 24.7842, "step": 26704 }, { "epoch": 635.8358208955224, "grad_norm": 22.029361724853516, "learning_rate": 9.755952380952382e-06, "loss": 24.9483, "step": 26705 }, { "epoch": 635.8597014925373, "grad_norm": 33.79682922363281, "learning_rate": 9.755580357142857e-06, "loss": 24.4075, "step": 26706 }, { "epoch": 635.8835820895522, "grad_norm": 25.494802474975586, "learning_rate": 9.755208333333334e-06, "loss": 23.684, "step": 26707 }, { "epoch": 635.9074626865672, "grad_norm": 26.206022262573242, "learning_rate": 9.754836309523812e-06, "loss": 24.6896, "step": 26708 }, { "epoch": 635.9313432835821, "grad_norm": 33.68046951293945, "learning_rate": 9.754464285714287e-06, "loss": 24.9597, "step": 26709 }, { "epoch": 635.955223880597, "grad_norm": 31.90056610107422, "learning_rate": 9.754092261904763e-06, "loss": 24.5608, "step": 26710 }, { "epoch": 635.9791044776119, "grad_norm": 21.37799644470215, "learning_rate": 9.753720238095238e-06, "loss": 25.0132, "step": 26711 }, { "epoch": 636.0, "grad_norm": 28.289352416992188, "learning_rate": 9.753348214285715e-06, "loss": 21.7779, "step": 26712 }, { "epoch": 636.0238805970149, "grad_norm": NaN, "learning_rate": 9.75297619047619e-06, "loss": 40.2852, "step": 26713 }, { "epoch": 636.0477611940298, "grad_norm": 27.398441314697266, "learning_rate": 9.75297619047619e-06, "loss": 25.7057, "step": 26714 }, { "epoch": 636.0716417910447, "grad_norm": 27.475481033325195, "learning_rate": 9.752604166666666e-06, "loss": 24.3434, "step": 26715 }, { "epoch": 636.0955223880597, "grad_norm": 22.882389068603516, "learning_rate": 9.752232142857143e-06, "loss": 23.7945, "step": 26716 }, { "epoch": 636.1194029850747, "grad_norm": 26.770885467529297, "learning_rate": 9.75186011904762e-06, "loss": 25.1476, "step": 26717 }, { "epoch": 636.1432835820896, "grad_norm": 28.269367218017578, "learning_rate": 9.751488095238096e-06, "loss": 24.9079, "step": 26718 }, { "epoch": 636.1671641791045, "grad_norm": 25.782562255859375, "learning_rate": 9.751116071428572e-06, "loss": 23.8786, "step": 26719 }, { "epoch": 636.1910447761194, "grad_norm": 24.294086456298828, "learning_rate": 9.750744047619049e-06, "loss": 24.38, "step": 26720 }, { "epoch": 636.2149253731343, "grad_norm": 23.295495986938477, "learning_rate": 9.750372023809524e-06, "loss": 23.644, "step": 26721 }, { "epoch": 636.2388059701492, "grad_norm": 24.36777687072754, "learning_rate": 9.75e-06, "loss": 25.2652, "step": 26722 }, { "epoch": 636.2626865671642, "grad_norm": 26.608198165893555, "learning_rate": 9.749627976190477e-06, "loss": 24.5588, "step": 26723 }, { "epoch": 636.2865671641791, "grad_norm": 30.375051498413086, "learning_rate": 9.749255952380953e-06, "loss": 25.3316, "step": 26724 }, { "epoch": 636.310447761194, "grad_norm": 25.625030517578125, "learning_rate": 9.74888392857143e-06, "loss": 23.4468, "step": 26725 }, { "epoch": 636.334328358209, "grad_norm": 21.608673095703125, "learning_rate": 9.748511904761905e-06, "loss": 24.395, "step": 26726 }, { "epoch": 636.3582089552239, "grad_norm": 21.38640785217285, "learning_rate": 9.748139880952382e-06, "loss": 24.4456, "step": 26727 }, { "epoch": 636.3820895522388, "grad_norm": 28.74233627319336, "learning_rate": 9.747767857142858e-06, "loss": 25.1858, "step": 26728 }, { "epoch": 636.4059701492537, "grad_norm": 23.535215377807617, "learning_rate": 9.747395833333333e-06, "loss": 24.8088, "step": 26729 }, { "epoch": 636.4298507462687, "grad_norm": 23.179779052734375, "learning_rate": 9.74702380952381e-06, "loss": 24.8643, "step": 26730 }, { "epoch": 636.4537313432836, "grad_norm": 24.203962326049805, "learning_rate": 9.746651785714286e-06, "loss": 23.9345, "step": 26731 }, { "epoch": 636.4776119402985, "grad_norm": 26.96550750732422, "learning_rate": 9.746279761904762e-06, "loss": 24.2181, "step": 26732 }, { "epoch": 636.5014925373134, "grad_norm": 28.056982040405273, "learning_rate": 9.745907738095239e-06, "loss": 23.8087, "step": 26733 }, { "epoch": 636.5253731343283, "grad_norm": 25.257722854614258, "learning_rate": 9.745535714285716e-06, "loss": 25.1521, "step": 26734 }, { "epoch": 636.5492537313432, "grad_norm": 24.534692764282227, "learning_rate": 9.745163690476191e-06, "loss": 24.5904, "step": 26735 }, { "epoch": 636.5731343283583, "grad_norm": 22.31742286682129, "learning_rate": 9.744791666666667e-06, "loss": 23.5031, "step": 26736 }, { "epoch": 636.5970149253732, "grad_norm": 27.282033920288086, "learning_rate": 9.744419642857144e-06, "loss": 24.2722, "step": 26737 }, { "epoch": 636.6208955223881, "grad_norm": 23.719669342041016, "learning_rate": 9.74404761904762e-06, "loss": 24.5759, "step": 26738 }, { "epoch": 636.644776119403, "grad_norm": 28.17474365234375, "learning_rate": 9.743675595238095e-06, "loss": 25.8131, "step": 26739 }, { "epoch": 636.6686567164179, "grad_norm": 28.88218879699707, "learning_rate": 9.743303571428572e-06, "loss": 24.3997, "step": 26740 }, { "epoch": 636.6925373134328, "grad_norm": 21.484294891357422, "learning_rate": 9.74293154761905e-06, "loss": 24.0748, "step": 26741 }, { "epoch": 636.7164179104477, "grad_norm": 24.787044525146484, "learning_rate": 9.742559523809525e-06, "loss": 24.5576, "step": 26742 }, { "epoch": 636.7402985074627, "grad_norm": 30.256078720092773, "learning_rate": 9.7421875e-06, "loss": 24.2446, "step": 26743 }, { "epoch": 636.7641791044776, "grad_norm": 25.64661407470703, "learning_rate": 9.741815476190478e-06, "loss": 24.2444, "step": 26744 }, { "epoch": 636.7880597014926, "grad_norm": 22.060195922851562, "learning_rate": 9.741443452380953e-06, "loss": 25.3614, "step": 26745 }, { "epoch": 636.8119402985075, "grad_norm": 27.50933837890625, "learning_rate": 9.741071428571429e-06, "loss": 23.4462, "step": 26746 }, { "epoch": 636.8358208955224, "grad_norm": 32.87361145019531, "learning_rate": 9.740699404761906e-06, "loss": 24.4322, "step": 26747 }, { "epoch": 636.8597014925373, "grad_norm": 25.638866424560547, "learning_rate": 9.740327380952381e-06, "loss": 25.0017, "step": 26748 }, { "epoch": 636.8835820895522, "grad_norm": 28.514604568481445, "learning_rate": 9.739955357142859e-06, "loss": 23.8382, "step": 26749 }, { "epoch": 636.9074626865672, "grad_norm": 31.259599685668945, "learning_rate": 9.739583333333334e-06, "loss": 24.6234, "step": 26750 }, { "epoch": 636.9313432835821, "grad_norm": 25.827632904052734, "learning_rate": 9.739211309523811e-06, "loss": 23.0577, "step": 26751 }, { "epoch": 636.955223880597, "grad_norm": 22.222227096557617, "learning_rate": 9.738839285714287e-06, "loss": 24.36, "step": 26752 }, { "epoch": 636.9791044776119, "grad_norm": 27.584550857543945, "learning_rate": 9.738467261904762e-06, "loss": 23.8668, "step": 26753 }, { "epoch": 637.0, "grad_norm": 25.154268264770508, "learning_rate": 9.73809523809524e-06, "loss": 21.0565, "step": 26754 }, { "epoch": 637.0238805970149, "grad_norm": 28.072492599487305, "learning_rate": 9.737723214285715e-06, "loss": 25.3391, "step": 26755 }, { "epoch": 637.0477611940298, "grad_norm": 23.01315689086914, "learning_rate": 9.73735119047619e-06, "loss": 24.2035, "step": 26756 }, { "epoch": 637.0716417910447, "grad_norm": 31.06144905090332, "learning_rate": 9.736979166666668e-06, "loss": 24.0617, "step": 26757 }, { "epoch": 637.0955223880597, "grad_norm": 27.932861328125, "learning_rate": 9.736607142857145e-06, "loss": 24.4281, "step": 26758 }, { "epoch": 637.1194029850747, "grad_norm": 22.93927001953125, "learning_rate": 9.73623511904762e-06, "loss": 24.5668, "step": 26759 }, { "epoch": 637.1432835820896, "grad_norm": 30.56427001953125, "learning_rate": 9.735863095238096e-06, "loss": 24.7501, "step": 26760 }, { "epoch": 637.1671641791045, "grad_norm": 32.922420501708984, "learning_rate": 9.735491071428573e-06, "loss": 24.3132, "step": 26761 }, { "epoch": 637.1910447761194, "grad_norm": 22.417070388793945, "learning_rate": 9.735119047619048e-06, "loss": 24.0608, "step": 26762 }, { "epoch": 637.2149253731343, "grad_norm": 31.090923309326172, "learning_rate": 9.734747023809524e-06, "loss": 24.4064, "step": 26763 }, { "epoch": 637.2388059701492, "grad_norm": 36.04600143432617, "learning_rate": 9.734375000000001e-06, "loss": 24.6205, "step": 26764 }, { "epoch": 637.2626865671642, "grad_norm": 24.12148666381836, "learning_rate": 9.734002976190478e-06, "loss": 24.967, "step": 26765 }, { "epoch": 637.2865671641791, "grad_norm": 31.835174560546875, "learning_rate": 9.733630952380954e-06, "loss": 24.6259, "step": 26766 }, { "epoch": 637.310447761194, "grad_norm": 27.37590217590332, "learning_rate": 9.73325892857143e-06, "loss": 24.001, "step": 26767 }, { "epoch": 637.334328358209, "grad_norm": 22.6307315826416, "learning_rate": 9.732886904761907e-06, "loss": 24.9113, "step": 26768 }, { "epoch": 637.3582089552239, "grad_norm": 37.53612518310547, "learning_rate": 9.732514880952382e-06, "loss": 24.4318, "step": 26769 }, { "epoch": 637.3820895522388, "grad_norm": 29.519813537597656, "learning_rate": 9.732142857142858e-06, "loss": 25.2529, "step": 26770 }, { "epoch": 637.4059701492537, "grad_norm": 27.45328140258789, "learning_rate": 9.731770833333333e-06, "loss": 23.4506, "step": 26771 }, { "epoch": 637.4298507462687, "grad_norm": 37.55928421020508, "learning_rate": 9.73139880952381e-06, "loss": 24.5991, "step": 26772 }, { "epoch": 637.4537313432836, "grad_norm": 27.59527587890625, "learning_rate": 9.731026785714287e-06, "loss": 24.8171, "step": 26773 }, { "epoch": 637.4776119402985, "grad_norm": 25.661592483520508, "learning_rate": 9.730654761904763e-06, "loss": 24.867, "step": 26774 }, { "epoch": 637.5014925373134, "grad_norm": 27.062664031982422, "learning_rate": 9.730282738095238e-06, "loss": 24.2484, "step": 26775 }, { "epoch": 637.5253731343283, "grad_norm": 31.88229751586914, "learning_rate": 9.729910714285716e-06, "loss": 24.6977, "step": 26776 }, { "epoch": 637.5492537313432, "grad_norm": 25.097238540649414, "learning_rate": 9.729538690476191e-06, "loss": 25.1243, "step": 26777 }, { "epoch": 637.5731343283583, "grad_norm": 26.037860870361328, "learning_rate": 9.729166666666667e-06, "loss": 24.4942, "step": 26778 }, { "epoch": 637.5970149253732, "grad_norm": 24.415632247924805, "learning_rate": 9.728794642857144e-06, "loss": 24.2437, "step": 26779 }, { "epoch": 637.6208955223881, "grad_norm": 29.612117767333984, "learning_rate": 9.72842261904762e-06, "loss": 24.4037, "step": 26780 }, { "epoch": 637.644776119403, "grad_norm": 23.84958267211914, "learning_rate": 9.728050595238096e-06, "loss": 24.1257, "step": 26781 }, { "epoch": 637.6686567164179, "grad_norm": 26.16590690612793, "learning_rate": 9.727678571428572e-06, "loss": 23.9392, "step": 26782 }, { "epoch": 637.6925373134328, "grad_norm": 27.00301742553711, "learning_rate": 9.727306547619049e-06, "loss": 24.7022, "step": 26783 }, { "epoch": 637.7164179104477, "grad_norm": 23.370786666870117, "learning_rate": 9.726934523809525e-06, "loss": 24.2557, "step": 26784 }, { "epoch": 637.7402985074627, "grad_norm": 28.229995727539062, "learning_rate": 9.7265625e-06, "loss": 24.0205, "step": 26785 }, { "epoch": 637.7641791044776, "grad_norm": 25.81377601623535, "learning_rate": 9.726190476190477e-06, "loss": 24.1106, "step": 26786 }, { "epoch": 637.7880597014926, "grad_norm": 29.834672927856445, "learning_rate": 9.725818452380953e-06, "loss": 25.0796, "step": 26787 }, { "epoch": 637.8119402985075, "grad_norm": 21.043336868286133, "learning_rate": 9.725446428571428e-06, "loss": 23.1301, "step": 26788 }, { "epoch": 637.8358208955224, "grad_norm": 27.81207275390625, "learning_rate": 9.725074404761905e-06, "loss": 24.7008, "step": 26789 }, { "epoch": 637.8597014925373, "grad_norm": 29.01840591430664, "learning_rate": 9.724702380952383e-06, "loss": 24.5552, "step": 26790 }, { "epoch": 637.8835820895522, "grad_norm": 28.768152236938477, "learning_rate": 9.724330357142858e-06, "loss": 23.7128, "step": 26791 }, { "epoch": 637.9074626865672, "grad_norm": 23.408058166503906, "learning_rate": 9.723958333333334e-06, "loss": 24.3382, "step": 26792 }, { "epoch": 637.9313432835821, "grad_norm": 22.471420288085938, "learning_rate": 9.72358630952381e-06, "loss": 24.3298, "step": 26793 }, { "epoch": 637.955223880597, "grad_norm": 24.64818572998047, "learning_rate": 9.723214285714286e-06, "loss": 24.4392, "step": 26794 }, { "epoch": 637.9791044776119, "grad_norm": 22.899646759033203, "learning_rate": 9.722842261904762e-06, "loss": 23.7523, "step": 26795 }, { "epoch": 638.0, "grad_norm": 25.450275421142578, "learning_rate": 9.722470238095239e-06, "loss": 21.2106, "step": 26796 }, { "epoch": 638.0238805970149, "grad_norm": 25.73255157470703, "learning_rate": 9.722098214285715e-06, "loss": 24.5739, "step": 26797 }, { "epoch": 638.0477611940298, "grad_norm": 25.004520416259766, "learning_rate": 9.721726190476192e-06, "loss": 24.7414, "step": 26798 }, { "epoch": 638.0716417910447, "grad_norm": 24.58650016784668, "learning_rate": 9.721354166666667e-06, "loss": 25.1026, "step": 26799 }, { "epoch": 638.0955223880597, "grad_norm": 21.49562644958496, "learning_rate": 9.720982142857144e-06, "loss": 23.7857, "step": 26800 }, { "epoch": 638.1194029850747, "grad_norm": 26.85505485534668, "learning_rate": 9.72061011904762e-06, "loss": 26.0518, "step": 26801 }, { "epoch": 638.1432835820896, "grad_norm": 29.01259422302246, "learning_rate": 9.720238095238095e-06, "loss": 24.055, "step": 26802 }, { "epoch": 638.1671641791045, "grad_norm": 24.24563980102539, "learning_rate": 9.719866071428573e-06, "loss": 23.3087, "step": 26803 }, { "epoch": 638.1910447761194, "grad_norm": 27.472576141357422, "learning_rate": 9.719494047619048e-06, "loss": 23.7133, "step": 26804 }, { "epoch": 638.2149253731343, "grad_norm": 24.589475631713867, "learning_rate": 9.719122023809524e-06, "loss": 24.9012, "step": 26805 }, { "epoch": 638.2388059701492, "grad_norm": 25.94295310974121, "learning_rate": 9.71875e-06, "loss": 24.3566, "step": 26806 }, { "epoch": 638.2626865671642, "grad_norm": 25.371692657470703, "learning_rate": 9.718377976190478e-06, "loss": 23.549, "step": 26807 }, { "epoch": 638.2865671641791, "grad_norm": 28.113435745239258, "learning_rate": 9.718005952380953e-06, "loss": 24.4356, "step": 26808 }, { "epoch": 638.310447761194, "grad_norm": 20.815563201904297, "learning_rate": 9.717633928571429e-06, "loss": 23.4156, "step": 26809 }, { "epoch": 638.334328358209, "grad_norm": 25.521400451660156, "learning_rate": 9.717261904761906e-06, "loss": 24.5504, "step": 26810 }, { "epoch": 638.3582089552239, "grad_norm": 27.78317642211914, "learning_rate": 9.716889880952382e-06, "loss": 23.8473, "step": 26811 }, { "epoch": 638.3820895522388, "grad_norm": 20.588539123535156, "learning_rate": 9.716517857142857e-06, "loss": 24.9697, "step": 26812 }, { "epoch": 638.4059701492537, "grad_norm": 30.457809448242188, "learning_rate": 9.716145833333334e-06, "loss": 24.5783, "step": 26813 }, { "epoch": 638.4298507462687, "grad_norm": 23.7593994140625, "learning_rate": 9.715773809523812e-06, "loss": 23.394, "step": 26814 }, { "epoch": 638.4537313432836, "grad_norm": 28.608503341674805, "learning_rate": 9.715401785714287e-06, "loss": 24.6513, "step": 26815 }, { "epoch": 638.4776119402985, "grad_norm": 27.108848571777344, "learning_rate": 9.715029761904762e-06, "loss": 24.5734, "step": 26816 }, { "epoch": 638.5014925373134, "grad_norm": 22.619768142700195, "learning_rate": 9.71465773809524e-06, "loss": 24.2726, "step": 26817 }, { "epoch": 638.5253731343283, "grad_norm": 30.14516830444336, "learning_rate": 9.714285714285715e-06, "loss": 24.8022, "step": 26818 }, { "epoch": 638.5492537313432, "grad_norm": 25.301523208618164, "learning_rate": 9.71391369047619e-06, "loss": 25.1436, "step": 26819 }, { "epoch": 638.5731343283583, "grad_norm": 32.325462341308594, "learning_rate": 9.713541666666668e-06, "loss": 24.336, "step": 26820 }, { "epoch": 638.5970149253732, "grad_norm": 26.873737335205078, "learning_rate": 9.713169642857143e-06, "loss": 23.4314, "step": 26821 }, { "epoch": 638.6208955223881, "grad_norm": 26.979829788208008, "learning_rate": 9.71279761904762e-06, "loss": 23.7818, "step": 26822 }, { "epoch": 638.644776119403, "grad_norm": 26.812257766723633, "learning_rate": 9.712425595238096e-06, "loss": 24.3387, "step": 26823 }, { "epoch": 638.6686567164179, "grad_norm": 22.594440460205078, "learning_rate": 9.712053571428573e-06, "loss": 23.9196, "step": 26824 }, { "epoch": 638.6925373134328, "grad_norm": 29.114402770996094, "learning_rate": 9.711681547619049e-06, "loss": 24.0105, "step": 26825 }, { "epoch": 638.7164179104477, "grad_norm": 23.58099937438965, "learning_rate": 9.711309523809524e-06, "loss": 23.9298, "step": 26826 }, { "epoch": 638.7402985074627, "grad_norm": 30.74314308166504, "learning_rate": 9.710937500000001e-06, "loss": 24.483, "step": 26827 }, { "epoch": 638.7641791044776, "grad_norm": 24.85072135925293, "learning_rate": 9.710565476190477e-06, "loss": 24.2985, "step": 26828 }, { "epoch": 638.7880597014926, "grad_norm": 29.688499450683594, "learning_rate": 9.710193452380952e-06, "loss": 24.3621, "step": 26829 }, { "epoch": 638.8119402985075, "grad_norm": 26.328838348388672, "learning_rate": 9.70982142857143e-06, "loss": 25.3076, "step": 26830 }, { "epoch": 638.8358208955224, "grad_norm": 27.890846252441406, "learning_rate": 9.709449404761905e-06, "loss": 25.2402, "step": 26831 }, { "epoch": 638.8597014925373, "grad_norm": 27.554807662963867, "learning_rate": 9.709077380952382e-06, "loss": 24.706, "step": 26832 }, { "epoch": 638.8835820895522, "grad_norm": 29.23007583618164, "learning_rate": 9.708705357142858e-06, "loss": 25.024, "step": 26833 }, { "epoch": 638.9074626865672, "grad_norm": 32.205204010009766, "learning_rate": 9.708333333333333e-06, "loss": 24.7056, "step": 26834 }, { "epoch": 638.9313432835821, "grad_norm": 23.304231643676758, "learning_rate": 9.70796130952381e-06, "loss": 23.676, "step": 26835 }, { "epoch": 638.955223880597, "grad_norm": 26.240468978881836, "learning_rate": 9.707589285714286e-06, "loss": 25.0716, "step": 26836 }, { "epoch": 638.9791044776119, "grad_norm": 26.691740036010742, "learning_rate": 9.707217261904761e-06, "loss": 24.3877, "step": 26837 }, { "epoch": 639.0, "grad_norm": 26.89805030822754, "learning_rate": 9.706845238095239e-06, "loss": 21.4802, "step": 26838 }, { "epoch": 639.0238805970149, "grad_norm": 25.85698890686035, "learning_rate": 9.706473214285716e-06, "loss": 24.7814, "step": 26839 }, { "epoch": 639.0477611940298, "grad_norm": 22.81261444091797, "learning_rate": 9.706101190476191e-06, "loss": 24.8212, "step": 26840 }, { "epoch": 639.0716417910447, "grad_norm": 27.725204467773438, "learning_rate": 9.705729166666667e-06, "loss": 23.722, "step": 26841 }, { "epoch": 639.0955223880597, "grad_norm": 32.116329193115234, "learning_rate": 9.705357142857144e-06, "loss": 24.5475, "step": 26842 }, { "epoch": 639.1194029850747, "grad_norm": 29.992908477783203, "learning_rate": 9.70498511904762e-06, "loss": 23.7901, "step": 26843 }, { "epoch": 639.1432835820896, "grad_norm": 23.69184112548828, "learning_rate": 9.704613095238095e-06, "loss": 24.2015, "step": 26844 }, { "epoch": 639.1671641791045, "grad_norm": 33.6051025390625, "learning_rate": 9.704241071428572e-06, "loss": 24.8433, "step": 26845 }, { "epoch": 639.1910447761194, "grad_norm": 29.0351505279541, "learning_rate": 9.70386904761905e-06, "loss": 24.5241, "step": 26846 }, { "epoch": 639.2149253731343, "grad_norm": 23.261098861694336, "learning_rate": 9.703497023809525e-06, "loss": 23.9859, "step": 26847 }, { "epoch": 639.2388059701492, "grad_norm": 25.86594009399414, "learning_rate": 9.703125e-06, "loss": 24.4569, "step": 26848 }, { "epoch": 639.2626865671642, "grad_norm": 22.295520782470703, "learning_rate": 9.702752976190478e-06, "loss": 23.7172, "step": 26849 }, { "epoch": 639.2865671641791, "grad_norm": 36.461753845214844, "learning_rate": 9.702380952380953e-06, "loss": 24.7201, "step": 26850 }, { "epoch": 639.310447761194, "grad_norm": 25.248790740966797, "learning_rate": 9.702008928571429e-06, "loss": 24.5876, "step": 26851 }, { "epoch": 639.334328358209, "grad_norm": 28.757646560668945, "learning_rate": 9.701636904761906e-06, "loss": 24.0484, "step": 26852 }, { "epoch": 639.3582089552239, "grad_norm": 31.703176498413086, "learning_rate": 9.701264880952381e-06, "loss": 23.1152, "step": 26853 }, { "epoch": 639.3820895522388, "grad_norm": 26.923845291137695, "learning_rate": 9.700892857142858e-06, "loss": 24.5997, "step": 26854 }, { "epoch": 639.4059701492537, "grad_norm": 28.21604347229004, "learning_rate": 9.700520833333334e-06, "loss": 24.8167, "step": 26855 }, { "epoch": 639.4298507462687, "grad_norm": 42.024696350097656, "learning_rate": 9.700148809523811e-06, "loss": 24.0088, "step": 26856 }, { "epoch": 639.4537313432836, "grad_norm": 24.794185638427734, "learning_rate": 9.699776785714287e-06, "loss": 23.9267, "step": 26857 }, { "epoch": 639.4776119402985, "grad_norm": 34.815574645996094, "learning_rate": 9.699404761904762e-06, "loss": 23.4334, "step": 26858 }, { "epoch": 639.5014925373134, "grad_norm": 32.674320220947266, "learning_rate": 9.69903273809524e-06, "loss": 25.2813, "step": 26859 }, { "epoch": 639.5253731343283, "grad_norm": 27.373300552368164, "learning_rate": 9.698660714285715e-06, "loss": 24.7456, "step": 26860 }, { "epoch": 639.5492537313432, "grad_norm": 31.323671340942383, "learning_rate": 9.69828869047619e-06, "loss": 24.3301, "step": 26861 }, { "epoch": 639.5731343283583, "grad_norm": 32.19162368774414, "learning_rate": 9.697916666666667e-06, "loss": 24.429, "step": 26862 }, { "epoch": 639.5970149253732, "grad_norm": 30.053760528564453, "learning_rate": 9.697544642857145e-06, "loss": 25.0995, "step": 26863 }, { "epoch": 639.6208955223881, "grad_norm": 20.503440856933594, "learning_rate": 9.69717261904762e-06, "loss": 24.3127, "step": 26864 }, { "epoch": 639.644776119403, "grad_norm": 27.681734085083008, "learning_rate": 9.696800595238096e-06, "loss": 24.9434, "step": 26865 }, { "epoch": 639.6686567164179, "grad_norm": 28.722747802734375, "learning_rate": 9.696428571428573e-06, "loss": 24.7079, "step": 26866 }, { "epoch": 639.6925373134328, "grad_norm": 25.06566619873047, "learning_rate": 9.696056547619048e-06, "loss": 24.4119, "step": 26867 }, { "epoch": 639.7164179104477, "grad_norm": 22.65522575378418, "learning_rate": 9.695684523809524e-06, "loss": 24.023, "step": 26868 }, { "epoch": 639.7402985074627, "grad_norm": 23.77244758605957, "learning_rate": 9.695312500000001e-06, "loss": 24.3023, "step": 26869 }, { "epoch": 639.7641791044776, "grad_norm": 24.856107711791992, "learning_rate": 9.694940476190478e-06, "loss": 23.4585, "step": 26870 }, { "epoch": 639.7880597014926, "grad_norm": 25.37268829345703, "learning_rate": 9.694568452380954e-06, "loss": 24.6216, "step": 26871 }, { "epoch": 639.8119402985075, "grad_norm": 33.29642868041992, "learning_rate": 9.69419642857143e-06, "loss": 24.1753, "step": 26872 }, { "epoch": 639.8358208955224, "grad_norm": 24.925979614257812, "learning_rate": 9.693824404761906e-06, "loss": 23.6175, "step": 26873 }, { "epoch": 639.8597014925373, "grad_norm": 25.245452880859375, "learning_rate": 9.693452380952382e-06, "loss": 24.5317, "step": 26874 }, { "epoch": 639.8835820895522, "grad_norm": 25.16192626953125, "learning_rate": 9.693080357142857e-06, "loss": 23.9515, "step": 26875 }, { "epoch": 639.9074626865672, "grad_norm": 28.372631072998047, "learning_rate": 9.692708333333335e-06, "loss": 24.2485, "step": 26876 }, { "epoch": 639.9313432835821, "grad_norm": 33.75870895385742, "learning_rate": 9.69233630952381e-06, "loss": 24.5071, "step": 26877 }, { "epoch": 639.955223880597, "grad_norm": 22.499496459960938, "learning_rate": 9.691964285714287e-06, "loss": 25.1671, "step": 26878 }, { "epoch": 639.9791044776119, "grad_norm": 30.794221878051758, "learning_rate": 9.691592261904763e-06, "loss": 24.8667, "step": 26879 }, { "epoch": 640.0, "grad_norm": 25.250890731811523, "learning_rate": 9.69122023809524e-06, "loss": 21.4669, "step": 26880 }, { "epoch": 640.0, "step": 26880, "total_flos": 1.321405580333805e+18, "train_loss": 0.7709448340393248, "train_runtime": 25718.4164, "train_samples_per_second": 133.184, "train_steps_per_second": 1.045 }, { "epoch": 640.0238805970149, "grad_norm": 24.928321838378906, "learning_rate": 1e-05, "loss": 24.8925, "step": 26881 }, { "epoch": 640.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999639249639251e-06, "loss": 31.4496, "step": 26882 }, { "epoch": 640.0716417910447, "grad_norm": 321.63201904296875, "learning_rate": 9.999639249639251e-06, "loss": 30.96, "step": 26883 }, { "epoch": 640.0955223880597, "grad_norm": 152.88729858398438, "learning_rate": 9.9992784992785e-06, "loss": 28.0617, "step": 26884 }, { "epoch": 640.1194029850747, "grad_norm": 88.36662292480469, "learning_rate": 9.99891774891775e-06, "loss": 25.2652, "step": 26885 }, { "epoch": 640.1432835820896, "grad_norm": 75.94825744628906, "learning_rate": 9.998556998557e-06, "loss": 26.4386, "step": 26886 }, { "epoch": 640.1671641791045, "grad_norm": 78.24170684814453, "learning_rate": 9.998196248196248e-06, "loss": 24.1587, "step": 26887 }, { "epoch": 640.1910447761194, "grad_norm": 76.0237045288086, "learning_rate": 9.997835497835499e-06, "loss": 24.8778, "step": 26888 }, { "epoch": 640.2149253731343, "grad_norm": 52.09082794189453, "learning_rate": 9.997474747474749e-06, "loss": 26.3009, "step": 26889 }, { "epoch": 640.2388059701492, "grad_norm": 55.52709197998047, "learning_rate": 9.997113997113997e-06, "loss": 24.3429, "step": 26890 }, { "epoch": 640.2626865671642, "grad_norm": 50.308616638183594, "learning_rate": 9.996753246753248e-06, "loss": 25.1901, "step": 26891 }, { "epoch": 640.2865671641791, "grad_norm": 36.14228439331055, "learning_rate": 9.996392496392498e-06, "loss": 24.1843, "step": 26892 }, { "epoch": 640.310447761194, "grad_norm": 34.843719482421875, "learning_rate": 9.996031746031746e-06, "loss": 24.3265, "step": 26893 }, { "epoch": 640.334328358209, "grad_norm": 47.0025749206543, "learning_rate": 9.995670995670996e-06, "loss": 24.8591, "step": 26894 }, { "epoch": 640.3582089552239, "grad_norm": 41.13364028930664, "learning_rate": 9.995310245310245e-06, "loss": 24.7527, "step": 26895 }, { "epoch": 640.3820895522388, "grad_norm": 27.59086036682129, "learning_rate": 9.994949494949497e-06, "loss": 24.888, "step": 26896 }, { "epoch": 640.4059701492537, "grad_norm": 36.17591094970703, "learning_rate": 9.994588744588745e-06, "loss": 23.4807, "step": 26897 }, { "epoch": 640.4298507462687, "grad_norm": 34.188018798828125, "learning_rate": 9.994227994227996e-06, "loss": 24.7569, "step": 26898 }, { "epoch": 640.4537313432836, "grad_norm": NaN, "learning_rate": 9.993867243867244e-06, "loss": 25.2127, "step": 26899 }, { "epoch": 640.4776119402985, "grad_norm": 27.968191146850586, "learning_rate": 9.993867243867244e-06, "loss": 24.395, "step": 26900 }, { "epoch": 640.5014925373134, "grad_norm": 30.78780174255371, "learning_rate": 9.993506493506494e-06, "loss": 24.8972, "step": 26901 }, { "epoch": 640.5253731343283, "grad_norm": 35.5263557434082, "learning_rate": 9.993145743145743e-06, "loss": 24.9105, "step": 26902 }, { "epoch": 640.5492537313432, "grad_norm": 29.958518981933594, "learning_rate": 9.992784992784995e-06, "loss": 23.8995, "step": 26903 }, { "epoch": 640.5731343283583, "grad_norm": 30.844274520874023, "learning_rate": 9.992424242424243e-06, "loss": 23.7377, "step": 26904 }, { "epoch": 640.5970149253732, "grad_norm": 28.826095581054688, "learning_rate": 9.992063492063493e-06, "loss": 25.2219, "step": 26905 }, { "epoch": 640.6208955223881, "grad_norm": 23.138059616088867, "learning_rate": 9.991702741702742e-06, "loss": 24.2672, "step": 26906 }, { "epoch": 640.644776119403, "grad_norm": 28.437253952026367, "learning_rate": 9.991341991341992e-06, "loss": 24.1799, "step": 26907 }, { "epoch": 640.6686567164179, "grad_norm": 29.532930374145508, "learning_rate": 9.990981240981242e-06, "loss": 24.4114, "step": 26908 }, { "epoch": 640.6925373134328, "grad_norm": 25.18813705444336, "learning_rate": 9.990620490620492e-06, "loss": 25.6406, "step": 26909 }, { "epoch": 640.7164179104477, "grad_norm": 26.35684585571289, "learning_rate": 9.990259740259741e-06, "loss": 24.4624, "step": 26910 }, { "epoch": 640.7402985074627, "grad_norm": 26.31411361694336, "learning_rate": 9.989898989898991e-06, "loss": 24.9666, "step": 26911 }, { "epoch": 640.7641791044776, "grad_norm": 24.570539474487305, "learning_rate": 9.98953823953824e-06, "loss": 24.5484, "step": 26912 }, { "epoch": 640.7880597014926, "grad_norm": 27.26763153076172, "learning_rate": 9.98917748917749e-06, "loss": 24.1455, "step": 26913 }, { "epoch": 640.8119402985075, "grad_norm": 32.089805603027344, "learning_rate": 9.98881673881674e-06, "loss": 24.5577, "step": 26914 }, { "epoch": 640.8358208955224, "grad_norm": 35.302040100097656, "learning_rate": 9.98845598845599e-06, "loss": 24.9565, "step": 26915 }, { "epoch": 640.8597014925373, "grad_norm": 22.158662796020508, "learning_rate": 9.988095238095239e-06, "loss": 24.5375, "step": 26916 }, { "epoch": 640.8835820895522, "grad_norm": 36.87960433959961, "learning_rate": 9.987734487734489e-06, "loss": 24.3397, "step": 26917 }, { "epoch": 640.9074626865672, "grad_norm": 29.687061309814453, "learning_rate": 9.987373737373737e-06, "loss": 23.9994, "step": 26918 }, { "epoch": 640.9313432835821, "grad_norm": 26.87071418762207, "learning_rate": 9.987012987012988e-06, "loss": 24.3131, "step": 26919 }, { "epoch": 640.955223880597, "grad_norm": 29.02485466003418, "learning_rate": 9.986652236652238e-06, "loss": 25.1087, "step": 26920 }, { "epoch": 640.9791044776119, "grad_norm": 31.923995971679688, "learning_rate": 9.986291486291488e-06, "loss": 23.6066, "step": 26921 }, { "epoch": 641.0, "grad_norm": 22.033334732055664, "learning_rate": 9.985930735930737e-06, "loss": 22.4866, "step": 26922 }, { "epoch": 641.0238805970149, "grad_norm": 25.437795639038086, "learning_rate": 9.985569985569987e-06, "loss": 25.1613, "step": 26923 }, { "epoch": 641.0477611940298, "grad_norm": 21.967941284179688, "learning_rate": 9.985209235209235e-06, "loss": 23.9105, "step": 26924 }, { "epoch": 641.0716417910447, "grad_norm": 26.970626831054688, "learning_rate": 9.984848484848485e-06, "loss": 24.1694, "step": 26925 }, { "epoch": 641.0955223880597, "grad_norm": 28.04083251953125, "learning_rate": 9.984487734487736e-06, "loss": 25.5129, "step": 26926 }, { "epoch": 641.1194029850747, "grad_norm": 26.387624740600586, "learning_rate": 9.984126984126986e-06, "loss": 23.914, "step": 26927 }, { "epoch": 641.1432835820896, "grad_norm": 26.66200065612793, "learning_rate": 9.983766233766234e-06, "loss": 24.2375, "step": 26928 }, { "epoch": 641.1671641791045, "grad_norm": 24.590618133544922, "learning_rate": 9.983405483405484e-06, "loss": 24.6828, "step": 26929 }, { "epoch": 641.1910447761194, "grad_norm": NaN, "learning_rate": 9.983044733044733e-06, "loss": 54.4568, "step": 26930 }, { "epoch": 641.2149253731343, "grad_norm": 35.544532775878906, "learning_rate": 9.983044733044733e-06, "loss": 24.165, "step": 26931 }, { "epoch": 641.2388059701492, "grad_norm": 32.51929473876953, "learning_rate": 9.982683982683983e-06, "loss": 24.7543, "step": 26932 }, { "epoch": 641.2626865671642, "grad_norm": 25.896137237548828, "learning_rate": 9.982323232323233e-06, "loss": 24.4958, "step": 26933 }, { "epoch": 641.2865671641791, "grad_norm": 30.933406829833984, "learning_rate": 9.981962481962482e-06, "loss": 24.078, "step": 26934 }, { "epoch": 641.310447761194, "grad_norm": 35.41303253173828, "learning_rate": 9.981601731601732e-06, "loss": 23.0704, "step": 26935 }, { "epoch": 641.334328358209, "grad_norm": 23.333410263061523, "learning_rate": 9.981240981240982e-06, "loss": 24.0691, "step": 26936 }, { "epoch": 641.3582089552239, "grad_norm": 23.86208152770996, "learning_rate": 9.980880230880232e-06, "loss": 23.2205, "step": 26937 }, { "epoch": 641.3820895522388, "grad_norm": 32.075706481933594, "learning_rate": 9.980519480519481e-06, "loss": 23.8732, "step": 26938 }, { "epoch": 641.4059701492537, "grad_norm": 29.29692268371582, "learning_rate": 9.980158730158731e-06, "loss": 24.9904, "step": 26939 }, { "epoch": 641.4298507462687, "grad_norm": 26.072864532470703, "learning_rate": 9.97979797979798e-06, "loss": 25.0498, "step": 26940 }, { "epoch": 641.4537313432836, "grad_norm": 26.663606643676758, "learning_rate": 9.97943722943723e-06, "loss": 25.5096, "step": 26941 }, { "epoch": 641.4776119402985, "grad_norm": 31.263084411621094, "learning_rate": 9.97907647907648e-06, "loss": 24.3053, "step": 26942 }, { "epoch": 641.5014925373134, "grad_norm": 26.593734741210938, "learning_rate": 9.97871572871573e-06, "loss": 24.2737, "step": 26943 }, { "epoch": 641.5253731343283, "grad_norm": 27.479001998901367, "learning_rate": 9.978354978354979e-06, "loss": 24.942, "step": 26944 }, { "epoch": 641.5492537313432, "grad_norm": 22.47988510131836, "learning_rate": 9.977994227994229e-06, "loss": 24.7647, "step": 26945 }, { "epoch": 641.5731343283583, "grad_norm": 26.630617141723633, "learning_rate": 9.977633477633477e-06, "loss": 24.1685, "step": 26946 }, { "epoch": 641.5970149253732, "grad_norm": 24.412918090820312, "learning_rate": 9.977272727272728e-06, "loss": 24.2759, "step": 26947 }, { "epoch": 641.6208955223881, "grad_norm": 29.683259963989258, "learning_rate": 9.976911976911978e-06, "loss": 23.6591, "step": 26948 }, { "epoch": 641.644776119403, "grad_norm": 23.648815155029297, "learning_rate": 9.976551226551228e-06, "loss": 24.5678, "step": 26949 }, { "epoch": 641.6686567164179, "grad_norm": 24.530168533325195, "learning_rate": 9.976190476190477e-06, "loss": 24.0035, "step": 26950 }, { "epoch": 641.6925373134328, "grad_norm": 26.140356063842773, "learning_rate": 9.975829725829727e-06, "loss": 24.1716, "step": 26951 }, { "epoch": 641.7164179104477, "grad_norm": 30.735143661499023, "learning_rate": 9.975468975468975e-06, "loss": 23.7325, "step": 26952 }, { "epoch": 641.7402985074627, "grad_norm": 26.125144958496094, "learning_rate": 9.975108225108225e-06, "loss": 24.6709, "step": 26953 }, { "epoch": 641.7641791044776, "grad_norm": 22.824838638305664, "learning_rate": 9.974747474747476e-06, "loss": 24.4808, "step": 26954 }, { "epoch": 641.7880597014926, "grad_norm": 26.846179962158203, "learning_rate": 9.974386724386726e-06, "loss": 24.7847, "step": 26955 }, { "epoch": 641.8119402985075, "grad_norm": 26.2933349609375, "learning_rate": 9.974025974025974e-06, "loss": 24.2648, "step": 26956 }, { "epoch": 641.8358208955224, "grad_norm": 27.221210479736328, "learning_rate": 9.973665223665225e-06, "loss": 23.1785, "step": 26957 }, { "epoch": 641.8597014925373, "grad_norm": 34.12690734863281, "learning_rate": 9.973304473304473e-06, "loss": 25.9808, "step": 26958 }, { "epoch": 641.8835820895522, "grad_norm": 24.80228614807129, "learning_rate": 9.972943722943725e-06, "loss": 24.3018, "step": 26959 }, { "epoch": 641.9074626865672, "grad_norm": 24.938386917114258, "learning_rate": 9.972582972582973e-06, "loss": 24.1412, "step": 26960 }, { "epoch": 641.9313432835821, "grad_norm": 35.51830291748047, "learning_rate": 9.972222222222224e-06, "loss": 24.9425, "step": 26961 }, { "epoch": 641.955223880597, "grad_norm": 23.755596160888672, "learning_rate": 9.971861471861472e-06, "loss": 24.2509, "step": 26962 }, { "epoch": 641.9791044776119, "grad_norm": 23.43117332458496, "learning_rate": 9.971500721500722e-06, "loss": 23.7976, "step": 26963 }, { "epoch": 642.0, "grad_norm": 28.86804962158203, "learning_rate": 9.971139971139971e-06, "loss": 21.2822, "step": 26964 }, { "epoch": 642.0238805970149, "grad_norm": 28.50323486328125, "learning_rate": 9.970779220779223e-06, "loss": 25.5493, "step": 26965 }, { "epoch": 642.0477611940298, "grad_norm": 23.462697982788086, "learning_rate": 9.970418470418471e-06, "loss": 24.2509, "step": 26966 }, { "epoch": 642.0716417910447, "grad_norm": 35.47056198120117, "learning_rate": 9.970057720057721e-06, "loss": 24.1057, "step": 26967 }, { "epoch": 642.0955223880597, "grad_norm": 29.3469295501709, "learning_rate": 9.96969696969697e-06, "loss": 24.9299, "step": 26968 }, { "epoch": 642.1194029850747, "grad_norm": 26.334142684936523, "learning_rate": 9.96933621933622e-06, "loss": 24.5014, "step": 26969 }, { "epoch": 642.1432835820896, "grad_norm": 32.942405700683594, "learning_rate": 9.96897546897547e-06, "loss": 24.904, "step": 26970 }, { "epoch": 642.1671641791045, "grad_norm": 28.88990592956543, "learning_rate": 9.96861471861472e-06, "loss": 23.9487, "step": 26971 }, { "epoch": 642.1910447761194, "grad_norm": 28.035417556762695, "learning_rate": 9.968253968253969e-06, "loss": 23.5356, "step": 26972 }, { "epoch": 642.2149253731343, "grad_norm": 27.52299690246582, "learning_rate": 9.96789321789322e-06, "loss": 24.2028, "step": 26973 }, { "epoch": 642.2388059701492, "grad_norm": 32.34648132324219, "learning_rate": 9.967532467532468e-06, "loss": 24.842, "step": 26974 }, { "epoch": 642.2626865671642, "grad_norm": 29.258586883544922, "learning_rate": 9.967171717171718e-06, "loss": 24.0915, "step": 26975 }, { "epoch": 642.2865671641791, "grad_norm": 21.798316955566406, "learning_rate": 9.966810966810968e-06, "loss": 23.947, "step": 26976 }, { "epoch": 642.310447761194, "grad_norm": 32.55973815917969, "learning_rate": 9.966450216450217e-06, "loss": 23.3979, "step": 26977 }, { "epoch": 642.334328358209, "grad_norm": 29.374162673950195, "learning_rate": 9.966089466089467e-06, "loss": 23.8097, "step": 26978 }, { "epoch": 642.3582089552239, "grad_norm": 24.41139030456543, "learning_rate": 9.965728715728717e-06, "loss": 24.3212, "step": 26979 }, { "epoch": 642.3820895522388, "grad_norm": 26.753406524658203, "learning_rate": 9.965367965367966e-06, "loss": 24.0767, "step": 26980 }, { "epoch": 642.4059701492537, "grad_norm": 25.585206985473633, "learning_rate": 9.965007215007216e-06, "loss": 23.6149, "step": 26981 }, { "epoch": 642.4298507462687, "grad_norm": 25.38397979736328, "learning_rate": 9.964646464646466e-06, "loss": 23.7558, "step": 26982 }, { "epoch": 642.4537313432836, "grad_norm": 24.317235946655273, "learning_rate": 9.964285714285714e-06, "loss": 23.5889, "step": 26983 }, { "epoch": 642.4776119402985, "grad_norm": 22.1574764251709, "learning_rate": 9.963924963924965e-06, "loss": 24.5493, "step": 26984 }, { "epoch": 642.5014925373134, "grad_norm": 21.31608772277832, "learning_rate": 9.963564213564215e-06, "loss": 23.765, "step": 26985 }, { "epoch": 642.5253731343283, "grad_norm": 26.307231903076172, "learning_rate": 9.963203463203463e-06, "loss": 24.8784, "step": 26986 }, { "epoch": 642.5492537313432, "grad_norm": 26.193675994873047, "learning_rate": 9.962842712842714e-06, "loss": 24.411, "step": 26987 }, { "epoch": 642.5731343283583, "grad_norm": 23.551618576049805, "learning_rate": 9.962481962481964e-06, "loss": 24.5153, "step": 26988 }, { "epoch": 642.5970149253732, "grad_norm": 23.894058227539062, "learning_rate": 9.962121212121212e-06, "loss": 23.8639, "step": 26989 }, { "epoch": 642.6208955223881, "grad_norm": 23.796802520751953, "learning_rate": 9.961760461760462e-06, "loss": 25.4809, "step": 26990 }, { "epoch": 642.644776119403, "grad_norm": 25.540937423706055, "learning_rate": 9.961399711399713e-06, "loss": 23.2913, "step": 26991 }, { "epoch": 642.6686567164179, "grad_norm": 25.73032569885254, "learning_rate": 9.961038961038963e-06, "loss": 23.8494, "step": 26992 }, { "epoch": 642.6925373134328, "grad_norm": 32.15433120727539, "learning_rate": 9.960678210678211e-06, "loss": 24.5955, "step": 26993 }, { "epoch": 642.7164179104477, "grad_norm": 28.571977615356445, "learning_rate": 9.960317460317462e-06, "loss": 23.9919, "step": 26994 }, { "epoch": 642.7402985074627, "grad_norm": 21.99958610534668, "learning_rate": 9.95995670995671e-06, "loss": 24.3686, "step": 26995 }, { "epoch": 642.7641791044776, "grad_norm": 23.218584060668945, "learning_rate": 9.95959595959596e-06, "loss": 24.4923, "step": 26996 }, { "epoch": 642.7880597014926, "grad_norm": 23.38093376159668, "learning_rate": 9.959235209235209e-06, "loss": 24.4662, "step": 26997 }, { "epoch": 642.8119402985075, "grad_norm": 23.545005798339844, "learning_rate": 9.95887445887446e-06, "loss": 24.0981, "step": 26998 }, { "epoch": 642.8358208955224, "grad_norm": 29.945598602294922, "learning_rate": 9.95851370851371e-06, "loss": 24.2721, "step": 26999 }, { "epoch": 642.8597014925373, "grad_norm": 26.570999145507812, "learning_rate": 9.95815295815296e-06, "loss": 23.8703, "step": 27000 }, { "epoch": 642.8835820895522, "grad_norm": 22.574682235717773, "learning_rate": 9.957792207792208e-06, "loss": 24.4096, "step": 27001 }, { "epoch": 642.9074626865672, "grad_norm": 20.977323532104492, "learning_rate": 9.957431457431458e-06, "loss": 25.2687, "step": 27002 }, { "epoch": 642.9313432835821, "grad_norm": 22.120407104492188, "learning_rate": 9.957070707070707e-06, "loss": 24.4462, "step": 27003 }, { "epoch": 642.955223880597, "grad_norm": 20.599756240844727, "learning_rate": 9.956709956709958e-06, "loss": 24.8084, "step": 27004 }, { "epoch": 642.9791044776119, "grad_norm": 22.547243118286133, "learning_rate": 9.956349206349207e-06, "loss": 23.8487, "step": 27005 }, { "epoch": 643.0, "grad_norm": 21.79747200012207, "learning_rate": 9.955988455988457e-06, "loss": 22.5908, "step": 27006 }, { "epoch": 643.0238805970149, "grad_norm": 31.353321075439453, "learning_rate": 9.955627705627706e-06, "loss": 25.2107, "step": 27007 }, { "epoch": 643.0477611940298, "grad_norm": 28.017921447753906, "learning_rate": 9.955266955266956e-06, "loss": 23.7507, "step": 27008 }, { "epoch": 643.0716417910447, "grad_norm": 21.671388626098633, "learning_rate": 9.954906204906206e-06, "loss": 24.624, "step": 27009 }, { "epoch": 643.0955223880597, "grad_norm": 23.2735538482666, "learning_rate": 9.954545454545456e-06, "loss": 23.2092, "step": 27010 }, { "epoch": 643.1194029850747, "grad_norm": 26.323091506958008, "learning_rate": 9.954184704184705e-06, "loss": 24.5227, "step": 27011 }, { "epoch": 643.1432835820896, "grad_norm": 24.66031837463379, "learning_rate": 9.953823953823955e-06, "loss": 23.8464, "step": 27012 }, { "epoch": 643.1671641791045, "grad_norm": 24.41261100769043, "learning_rate": 9.953463203463203e-06, "loss": 24.8273, "step": 27013 }, { "epoch": 643.1910447761194, "grad_norm": 25.619028091430664, "learning_rate": 9.953102453102454e-06, "loss": 23.5357, "step": 27014 }, { "epoch": 643.2149253731343, "grad_norm": 26.503620147705078, "learning_rate": 9.952741702741704e-06, "loss": 24.2611, "step": 27015 }, { "epoch": 643.2388059701492, "grad_norm": 25.376270294189453, "learning_rate": 9.952380952380954e-06, "loss": 24.5349, "step": 27016 }, { "epoch": 643.2626865671642, "grad_norm": NaN, "learning_rate": 9.952020202020203e-06, "loss": 33.2592, "step": 27017 }, { "epoch": 643.2865671641791, "grad_norm": 23.97342300415039, "learning_rate": 9.952020202020203e-06, "loss": 24.2938, "step": 27018 }, { "epoch": 643.310447761194, "grad_norm": 27.8851318359375, "learning_rate": 9.951659451659453e-06, "loss": 24.0367, "step": 27019 }, { "epoch": 643.334328358209, "grad_norm": 30.175189971923828, "learning_rate": 9.951298701298701e-06, "loss": 24.3424, "step": 27020 }, { "epoch": 643.3582089552239, "grad_norm": 26.9473934173584, "learning_rate": 9.950937950937951e-06, "loss": 24.7845, "step": 27021 }, { "epoch": 643.3820895522388, "grad_norm": 24.232507705688477, "learning_rate": 9.950577200577202e-06, "loss": 23.9087, "step": 27022 }, { "epoch": 643.4059701492537, "grad_norm": 23.22726058959961, "learning_rate": 9.950216450216452e-06, "loss": 23.6278, "step": 27023 }, { "epoch": 643.4298507462687, "grad_norm": 23.56996726989746, "learning_rate": 9.9498556998557e-06, "loss": 24.9665, "step": 27024 }, { "epoch": 643.4537313432836, "grad_norm": 26.135562896728516, "learning_rate": 9.94949494949495e-06, "loss": 24.188, "step": 27025 }, { "epoch": 643.4776119402985, "grad_norm": 25.083261489868164, "learning_rate": 9.949134199134199e-06, "loss": 24.7316, "step": 27026 }, { "epoch": 643.5014925373134, "grad_norm": 20.08146858215332, "learning_rate": 9.94877344877345e-06, "loss": 23.0477, "step": 27027 }, { "epoch": 643.5253731343283, "grad_norm": 24.048660278320312, "learning_rate": 9.9484126984127e-06, "loss": 24.6283, "step": 27028 }, { "epoch": 643.5492537313432, "grad_norm": 20.601097106933594, "learning_rate": 9.94805194805195e-06, "loss": 23.6588, "step": 27029 }, { "epoch": 643.5731343283583, "grad_norm": 24.741374969482422, "learning_rate": 9.947691197691198e-06, "loss": 24.6616, "step": 27030 }, { "epoch": 643.5970149253732, "grad_norm": 27.059925079345703, "learning_rate": 9.947330447330448e-06, "loss": 24.6425, "step": 27031 }, { "epoch": 643.6208955223881, "grad_norm": 26.042823791503906, "learning_rate": 9.946969696969699e-06, "loss": 24.3378, "step": 27032 }, { "epoch": 643.644776119403, "grad_norm": 26.95840072631836, "learning_rate": 9.946608946608947e-06, "loss": 23.9961, "step": 27033 }, { "epoch": 643.6686567164179, "grad_norm": 28.61941909790039, "learning_rate": 9.946248196248197e-06, "loss": 24.2822, "step": 27034 }, { "epoch": 643.6925373134328, "grad_norm": 23.983318328857422, "learning_rate": 9.945887445887446e-06, "loss": 23.5466, "step": 27035 }, { "epoch": 643.7164179104477, "grad_norm": 24.777942657470703, "learning_rate": 9.945526695526696e-06, "loss": 23.6519, "step": 27036 }, { "epoch": 643.7402985074627, "grad_norm": 26.34248924255371, "learning_rate": 9.945165945165946e-06, "loss": 23.8064, "step": 27037 }, { "epoch": 643.7641791044776, "grad_norm": 34.97313690185547, "learning_rate": 9.944805194805196e-06, "loss": 24.9895, "step": 27038 }, { "epoch": 643.7880597014926, "grad_norm": 33.297325134277344, "learning_rate": 9.944444444444445e-06, "loss": 24.2733, "step": 27039 }, { "epoch": 643.8119402985075, "grad_norm": 22.950193405151367, "learning_rate": 9.944083694083695e-06, "loss": 23.9813, "step": 27040 }, { "epoch": 643.8358208955224, "grad_norm": 23.767053604125977, "learning_rate": 9.943722943722944e-06, "loss": 24.1925, "step": 27041 }, { "epoch": 643.8597014925373, "grad_norm": 26.666109085083008, "learning_rate": 9.943362193362194e-06, "loss": 24.2448, "step": 27042 }, { "epoch": 643.8835820895522, "grad_norm": 29.639509201049805, "learning_rate": 9.943001443001444e-06, "loss": 25.3297, "step": 27043 }, { "epoch": 643.9074626865672, "grad_norm": 28.37894058227539, "learning_rate": 9.942640692640694e-06, "loss": 24.7654, "step": 27044 }, { "epoch": 643.9313432835821, "grad_norm": 25.73722267150879, "learning_rate": 9.942279942279943e-06, "loss": 23.6584, "step": 27045 }, { "epoch": 643.955223880597, "grad_norm": 26.257123947143555, "learning_rate": 9.941919191919193e-06, "loss": 24.9067, "step": 27046 }, { "epoch": 643.9791044776119, "grad_norm": 23.997255325317383, "learning_rate": 9.941558441558441e-06, "loss": 25.178, "step": 27047 }, { "epoch": 644.0, "grad_norm": 22.65498161315918, "learning_rate": 9.941197691197692e-06, "loss": 20.9199, "step": 27048 }, { "epoch": 644.0238805970149, "grad_norm": 26.324689865112305, "learning_rate": 9.940836940836942e-06, "loss": 23.3249, "step": 27049 }, { "epoch": 644.0477611940298, "grad_norm": 33.438201904296875, "learning_rate": 9.940476190476192e-06, "loss": 24.1393, "step": 27050 }, { "epoch": 644.0716417910447, "grad_norm": 24.8535099029541, "learning_rate": 9.94011544011544e-06, "loss": 23.3927, "step": 27051 }, { "epoch": 644.0955223880597, "grad_norm": 30.771907806396484, "learning_rate": 9.93975468975469e-06, "loss": 25.047, "step": 27052 }, { "epoch": 644.1194029850747, "grad_norm": 23.443113327026367, "learning_rate": 9.939393939393939e-06, "loss": 24.6126, "step": 27053 }, { "epoch": 644.1432835820896, "grad_norm": 30.7844181060791, "learning_rate": 9.939033189033191e-06, "loss": 23.7091, "step": 27054 }, { "epoch": 644.1671641791045, "grad_norm": 25.343034744262695, "learning_rate": 9.93867243867244e-06, "loss": 23.5756, "step": 27055 }, { "epoch": 644.1910447761194, "grad_norm": 25.43994903564453, "learning_rate": 9.93831168831169e-06, "loss": 24.3203, "step": 27056 }, { "epoch": 644.2149253731343, "grad_norm": 24.358131408691406, "learning_rate": 9.937950937950938e-06, "loss": 24.1467, "step": 27057 }, { "epoch": 644.2388059701492, "grad_norm": 23.26030158996582, "learning_rate": 9.937590187590188e-06, "loss": 24.1033, "step": 27058 }, { "epoch": 644.2626865671642, "grad_norm": 27.461746215820312, "learning_rate": 9.937229437229437e-06, "loss": 25.065, "step": 27059 }, { "epoch": 644.2865671641791, "grad_norm": 28.310550689697266, "learning_rate": 9.936868686868689e-06, "loss": 24.2075, "step": 27060 }, { "epoch": 644.310447761194, "grad_norm": 30.918333053588867, "learning_rate": 9.936507936507937e-06, "loss": 24.0685, "step": 27061 }, { "epoch": 644.334328358209, "grad_norm": 28.394559860229492, "learning_rate": 9.936147186147188e-06, "loss": 23.9491, "step": 27062 }, { "epoch": 644.3582089552239, "grad_norm": 22.709871292114258, "learning_rate": 9.935786435786436e-06, "loss": 24.2259, "step": 27063 }, { "epoch": 644.3820895522388, "grad_norm": 22.721986770629883, "learning_rate": 9.935425685425686e-06, "loss": 24.1975, "step": 27064 }, { "epoch": 644.4059701492537, "grad_norm": 25.373287200927734, "learning_rate": 9.935064935064936e-06, "loss": 24.4408, "step": 27065 }, { "epoch": 644.4298507462687, "grad_norm": NaN, "learning_rate": 9.934704184704187e-06, "loss": 35.8408, "step": 27066 }, { "epoch": 644.4537313432836, "grad_norm": 24.729379653930664, "learning_rate": 9.934704184704187e-06, "loss": 24.3387, "step": 27067 }, { "epoch": 644.4776119402985, "grad_norm": 27.06106185913086, "learning_rate": 9.934343434343435e-06, "loss": 23.2522, "step": 27068 }, { "epoch": 644.5014925373134, "grad_norm": 33.9829216003418, "learning_rate": 9.933982683982685e-06, "loss": 24.6517, "step": 27069 }, { "epoch": 644.5253731343283, "grad_norm": 25.986812591552734, "learning_rate": 9.933621933621934e-06, "loss": 24.9025, "step": 27070 }, { "epoch": 644.5492537313432, "grad_norm": 22.90077781677246, "learning_rate": 9.933261183261184e-06, "loss": 23.8103, "step": 27071 }, { "epoch": 644.5731343283583, "grad_norm": 24.45450782775879, "learning_rate": 9.932900432900434e-06, "loss": 24.6019, "step": 27072 }, { "epoch": 644.5970149253732, "grad_norm": 25.982009887695312, "learning_rate": 9.932539682539684e-06, "loss": 24.8996, "step": 27073 }, { "epoch": 644.6208955223881, "grad_norm": 27.23410987854004, "learning_rate": 9.932178932178933e-06, "loss": 24.0014, "step": 27074 }, { "epoch": 644.644776119403, "grad_norm": 28.94352149963379, "learning_rate": 9.931818181818183e-06, "loss": 24.5426, "step": 27075 }, { "epoch": 644.6686567164179, "grad_norm": 26.646804809570312, "learning_rate": 9.931457431457432e-06, "loss": 24.8514, "step": 27076 }, { "epoch": 644.6925373134328, "grad_norm": 22.4049015045166, "learning_rate": 9.931096681096682e-06, "loss": 24.3386, "step": 27077 }, { "epoch": 644.7164179104477, "grad_norm": 24.41880989074707, "learning_rate": 9.930735930735932e-06, "loss": 24.2409, "step": 27078 }, { "epoch": 644.7402985074627, "grad_norm": 27.20975112915039, "learning_rate": 9.93037518037518e-06, "loss": 23.7407, "step": 27079 }, { "epoch": 644.7641791044776, "grad_norm": 30.132299423217773, "learning_rate": 9.93001443001443e-06, "loss": 24.455, "step": 27080 }, { "epoch": 644.7880597014926, "grad_norm": 27.081174850463867, "learning_rate": 9.929653679653681e-06, "loss": 23.9471, "step": 27081 }, { "epoch": 644.8119402985075, "grad_norm": 24.09163475036621, "learning_rate": 9.92929292929293e-06, "loss": 24.3254, "step": 27082 }, { "epoch": 644.8358208955224, "grad_norm": 26.43840980529785, "learning_rate": 9.92893217893218e-06, "loss": 23.4211, "step": 27083 }, { "epoch": 644.8597014925373, "grad_norm": 32.044288635253906, "learning_rate": 9.92857142857143e-06, "loss": 24.1663, "step": 27084 }, { "epoch": 644.8835820895522, "grad_norm": 24.982341766357422, "learning_rate": 9.928210678210678e-06, "loss": 25.3925, "step": 27085 }, { "epoch": 644.9074626865672, "grad_norm": 28.54620361328125, "learning_rate": 9.927849927849929e-06, "loss": 23.9134, "step": 27086 }, { "epoch": 644.9313432835821, "grad_norm": 42.198001861572266, "learning_rate": 9.927489177489179e-06, "loss": 24.4424, "step": 27087 }, { "epoch": 644.955223880597, "grad_norm": 24.1193904876709, "learning_rate": 9.927128427128427e-06, "loss": 24.1897, "step": 27088 }, { "epoch": 644.9791044776119, "grad_norm": 45.98109817504883, "learning_rate": 9.926767676767677e-06, "loss": 24.0082, "step": 27089 }, { "epoch": 645.0, "grad_norm": 27.32633399963379, "learning_rate": 9.926406926406928e-06, "loss": 21.5716, "step": 27090 }, { "epoch": 645.0238805970149, "grad_norm": 34.25537109375, "learning_rate": 9.926046176046176e-06, "loss": 24.473, "step": 27091 }, { "epoch": 645.0477611940298, "grad_norm": 34.721397399902344, "learning_rate": 9.925685425685426e-06, "loss": 23.7556, "step": 27092 }, { "epoch": 645.0716417910447, "grad_norm": 24.97245216369629, "learning_rate": 9.925324675324677e-06, "loss": 25.0688, "step": 27093 }, { "epoch": 645.0955223880597, "grad_norm": 30.28203010559082, "learning_rate": 9.924963924963927e-06, "loss": 24.1935, "step": 27094 }, { "epoch": 645.1194029850747, "grad_norm": 29.7774600982666, "learning_rate": 9.924603174603175e-06, "loss": 24.0507, "step": 27095 }, { "epoch": 645.1432835820896, "grad_norm": 24.69854164123535, "learning_rate": 9.924242424242425e-06, "loss": 23.998, "step": 27096 }, { "epoch": 645.1671641791045, "grad_norm": 29.424612045288086, "learning_rate": 9.923881673881674e-06, "loss": 24.0114, "step": 27097 }, { "epoch": 645.1910447761194, "grad_norm": 26.638010025024414, "learning_rate": 9.923520923520924e-06, "loss": 23.8792, "step": 27098 }, { "epoch": 645.2149253731343, "grad_norm": 30.32175064086914, "learning_rate": 9.923160173160173e-06, "loss": 24.4081, "step": 27099 }, { "epoch": 645.2388059701492, "grad_norm": 22.885793685913086, "learning_rate": 9.922799422799425e-06, "loss": 23.9419, "step": 27100 }, { "epoch": 645.2626865671642, "grad_norm": 23.558176040649414, "learning_rate": 9.922438672438673e-06, "loss": 23.4078, "step": 27101 }, { "epoch": 645.2865671641791, "grad_norm": 25.1423282623291, "learning_rate": 9.922077922077923e-06, "loss": 24.7027, "step": 27102 }, { "epoch": 645.310447761194, "grad_norm": 24.7944278717041, "learning_rate": 9.921717171717172e-06, "loss": 24.0504, "step": 27103 }, { "epoch": 645.334328358209, "grad_norm": 26.03127670288086, "learning_rate": 9.921356421356422e-06, "loss": 24.8514, "step": 27104 }, { "epoch": 645.3582089552239, "grad_norm": 26.075767517089844, "learning_rate": 9.920995670995672e-06, "loss": 23.7705, "step": 27105 }, { "epoch": 645.3820895522388, "grad_norm": NaN, "learning_rate": 9.920634920634922e-06, "loss": 38.5074, "step": 27106 }, { "epoch": 645.4059701492537, "grad_norm": 22.230548858642578, "learning_rate": 9.920634920634922e-06, "loss": 24.3961, "step": 27107 }, { "epoch": 645.4298507462687, "grad_norm": 24.587413787841797, "learning_rate": 9.92027417027417e-06, "loss": 23.9886, "step": 27108 }, { "epoch": 645.4537313432836, "grad_norm": 22.929100036621094, "learning_rate": 9.919913419913421e-06, "loss": 22.9778, "step": 27109 }, { "epoch": 645.4776119402985, "grad_norm": 22.752593994140625, "learning_rate": 9.91955266955267e-06, "loss": 24.5914, "step": 27110 }, { "epoch": 645.5014925373134, "grad_norm": 27.09078598022461, "learning_rate": 9.91919191919192e-06, "loss": 24.5481, "step": 27111 }, { "epoch": 645.5253731343283, "grad_norm": 24.9510555267334, "learning_rate": 9.91883116883117e-06, "loss": 24.1069, "step": 27112 }, { "epoch": 645.5492537313432, "grad_norm": 25.001245498657227, "learning_rate": 9.91847041847042e-06, "loss": 24.1391, "step": 27113 }, { "epoch": 645.5731343283583, "grad_norm": 26.81512451171875, "learning_rate": 9.918109668109669e-06, "loss": 24.4482, "step": 27114 }, { "epoch": 645.5970149253732, "grad_norm": 26.13603973388672, "learning_rate": 9.917748917748919e-06, "loss": 24.4931, "step": 27115 }, { "epoch": 645.6208955223881, "grad_norm": 27.639625549316406, "learning_rate": 9.917388167388167e-06, "loss": 24.2541, "step": 27116 }, { "epoch": 645.644776119403, "grad_norm": 25.221309661865234, "learning_rate": 9.917027417027418e-06, "loss": 24.0674, "step": 27117 }, { "epoch": 645.6686567164179, "grad_norm": 27.48367691040039, "learning_rate": 9.916666666666668e-06, "loss": 24.6593, "step": 27118 }, { "epoch": 645.6925373134328, "grad_norm": 29.89508628845215, "learning_rate": 9.916305916305918e-06, "loss": 24.9299, "step": 27119 }, { "epoch": 645.7164179104477, "grad_norm": 24.45992660522461, "learning_rate": 9.915945165945166e-06, "loss": 24.7081, "step": 27120 }, { "epoch": 645.7402985074627, "grad_norm": 23.715770721435547, "learning_rate": 9.915584415584417e-06, "loss": 23.8615, "step": 27121 }, { "epoch": 645.7641791044776, "grad_norm": 26.855283737182617, "learning_rate": 9.915223665223665e-06, "loss": 24.5022, "step": 27122 }, { "epoch": 645.7880597014926, "grad_norm": 31.556528091430664, "learning_rate": 9.914862914862915e-06, "loss": 24.5181, "step": 27123 }, { "epoch": 645.8119402985075, "grad_norm": 27.410524368286133, "learning_rate": 9.914502164502166e-06, "loss": 24.5454, "step": 27124 }, { "epoch": 645.8358208955224, "grad_norm": 24.454967498779297, "learning_rate": 9.914141414141416e-06, "loss": 23.7154, "step": 27125 }, { "epoch": 645.8597014925373, "grad_norm": 24.725727081298828, "learning_rate": 9.913780663780664e-06, "loss": 24.8029, "step": 27126 }, { "epoch": 645.8835820895522, "grad_norm": 22.492816925048828, "learning_rate": 9.913419913419914e-06, "loss": 23.0518, "step": 27127 }, { "epoch": 645.9074626865672, "grad_norm": 23.901926040649414, "learning_rate": 9.913059163059165e-06, "loss": 24.2505, "step": 27128 }, { "epoch": 645.9313432835821, "grad_norm": 23.533836364746094, "learning_rate": 9.912698412698413e-06, "loss": 24.2672, "step": 27129 }, { "epoch": 645.955223880597, "grad_norm": 27.136075973510742, "learning_rate": 9.912337662337663e-06, "loss": 23.7239, "step": 27130 }, { "epoch": 645.9791044776119, "grad_norm": 26.122900009155273, "learning_rate": 9.911976911976914e-06, "loss": 24.5387, "step": 27131 }, { "epoch": 646.0, "grad_norm": 28.028358459472656, "learning_rate": 9.911616161616162e-06, "loss": 21.1289, "step": 27132 }, { "epoch": 646.0238805970149, "grad_norm": 26.495149612426758, "learning_rate": 9.911255411255412e-06, "loss": 23.7073, "step": 27133 }, { "epoch": 646.0477611940298, "grad_norm": 27.194917678833008, "learning_rate": 9.910894660894662e-06, "loss": 23.7157, "step": 27134 }, { "epoch": 646.0716417910447, "grad_norm": 35.28769302368164, "learning_rate": 9.910533910533911e-06, "loss": 23.9955, "step": 27135 }, { "epoch": 646.0955223880597, "grad_norm": 31.397390365600586, "learning_rate": 9.910173160173161e-06, "loss": 25.0337, "step": 27136 }, { "epoch": 646.1194029850747, "grad_norm": 23.250524520874023, "learning_rate": 9.90981240981241e-06, "loss": 24.3093, "step": 27137 }, { "epoch": 646.1432835820896, "grad_norm": 29.116615295410156, "learning_rate": 9.90945165945166e-06, "loss": 23.6416, "step": 27138 }, { "epoch": 646.1671641791045, "grad_norm": 32.65553665161133, "learning_rate": 9.90909090909091e-06, "loss": 23.8777, "step": 27139 }, { "epoch": 646.1910447761194, "grad_norm": 24.90568733215332, "learning_rate": 9.90873015873016e-06, "loss": 24.2399, "step": 27140 }, { "epoch": 646.2149253731343, "grad_norm": 25.331951141357422, "learning_rate": 9.908369408369409e-06, "loss": 22.8874, "step": 27141 }, { "epoch": 646.2388059701492, "grad_norm": 26.57431411743164, "learning_rate": 9.908008658008659e-06, "loss": 23.6475, "step": 27142 }, { "epoch": 646.2626865671642, "grad_norm": 29.425270080566406, "learning_rate": 9.907647907647907e-06, "loss": 24.7151, "step": 27143 }, { "epoch": 646.2865671641791, "grad_norm": 27.371694564819336, "learning_rate": 9.907287157287158e-06, "loss": 23.9981, "step": 27144 }, { "epoch": 646.310447761194, "grad_norm": 22.14531707763672, "learning_rate": 9.906926406926408e-06, "loss": 24.8771, "step": 27145 }, { "epoch": 646.334328358209, "grad_norm": 23.433574676513672, "learning_rate": 9.906565656565658e-06, "loss": 24.3799, "step": 27146 }, { "epoch": 646.3582089552239, "grad_norm": 22.063169479370117, "learning_rate": 9.906204906204907e-06, "loss": 23.8801, "step": 27147 }, { "epoch": 646.3820895522388, "grad_norm": 25.451950073242188, "learning_rate": 9.905844155844157e-06, "loss": 23.3019, "step": 27148 }, { "epoch": 646.4059701492537, "grad_norm": 31.352100372314453, "learning_rate": 9.905483405483405e-06, "loss": 23.509, "step": 27149 }, { "epoch": 646.4298507462687, "grad_norm": 24.829927444458008, "learning_rate": 9.905122655122657e-06, "loss": 24.0299, "step": 27150 }, { "epoch": 646.4537313432836, "grad_norm": 24.975574493408203, "learning_rate": 9.904761904761906e-06, "loss": 23.4215, "step": 27151 }, { "epoch": 646.4776119402985, "grad_norm": 23.89220428466797, "learning_rate": 9.904401154401156e-06, "loss": 23.6384, "step": 27152 }, { "epoch": 646.5014925373134, "grad_norm": 24.285146713256836, "learning_rate": 9.904040404040404e-06, "loss": 24.0597, "step": 27153 }, { "epoch": 646.5253731343283, "grad_norm": 21.241527557373047, "learning_rate": 9.903679653679655e-06, "loss": 23.9553, "step": 27154 }, { "epoch": 646.5492537313432, "grad_norm": 28.96174430847168, "learning_rate": 9.903318903318903e-06, "loss": 24.7949, "step": 27155 }, { "epoch": 646.5731343283583, "grad_norm": 27.611190795898438, "learning_rate": 9.902958152958155e-06, "loss": 24.3536, "step": 27156 }, { "epoch": 646.5970149253732, "grad_norm": 22.232746124267578, "learning_rate": 9.902597402597403e-06, "loss": 24.2298, "step": 27157 }, { "epoch": 646.6208955223881, "grad_norm": 25.204694747924805, "learning_rate": 9.902236652236654e-06, "loss": 23.9711, "step": 27158 }, { "epoch": 646.644776119403, "grad_norm": 23.613628387451172, "learning_rate": 9.901875901875902e-06, "loss": 23.3118, "step": 27159 }, { "epoch": 646.6686567164179, "grad_norm": 26.986854553222656, "learning_rate": 9.901515151515152e-06, "loss": 24.0993, "step": 27160 }, { "epoch": 646.6925373134328, "grad_norm": 28.190168380737305, "learning_rate": 9.901154401154402e-06, "loss": 23.8378, "step": 27161 }, { "epoch": 646.7164179104477, "grad_norm": 27.2662296295166, "learning_rate": 9.900793650793653e-06, "loss": 24.6064, "step": 27162 }, { "epoch": 646.7402985074627, "grad_norm": 24.211074829101562, "learning_rate": 9.900432900432901e-06, "loss": 23.2107, "step": 27163 }, { "epoch": 646.7641791044776, "grad_norm": 27.76797866821289, "learning_rate": 9.900072150072151e-06, "loss": 24.3405, "step": 27164 }, { "epoch": 646.7880597014926, "grad_norm": 25.644994735717773, "learning_rate": 9.8997113997114e-06, "loss": 24.7769, "step": 27165 }, { "epoch": 646.8119402985075, "grad_norm": 28.5152645111084, "learning_rate": 9.89935064935065e-06, "loss": 24.4878, "step": 27166 }, { "epoch": 646.8358208955224, "grad_norm": 31.486610412597656, "learning_rate": 9.8989898989899e-06, "loss": 25.1658, "step": 27167 }, { "epoch": 646.8597014925373, "grad_norm": 24.903160095214844, "learning_rate": 9.89862914862915e-06, "loss": 24.253, "step": 27168 }, { "epoch": 646.8835820895522, "grad_norm": 26.27738380432129, "learning_rate": 9.898268398268399e-06, "loss": 24.8672, "step": 27169 }, { "epoch": 646.9074626865672, "grad_norm": 24.152833938598633, "learning_rate": 9.89790764790765e-06, "loss": 24.4003, "step": 27170 }, { "epoch": 646.9313432835821, "grad_norm": 27.882205963134766, "learning_rate": 9.897546897546898e-06, "loss": 24.5273, "step": 27171 }, { "epoch": 646.955223880597, "grad_norm": 27.816434860229492, "learning_rate": 9.897186147186148e-06, "loss": 25.1964, "step": 27172 }, { "epoch": 646.9791044776119, "grad_norm": 25.415409088134766, "learning_rate": 9.896825396825398e-06, "loss": 25.097, "step": 27173 }, { "epoch": 647.0, "grad_norm": 25.06436538696289, "learning_rate": 9.896464646464647e-06, "loss": 21.8391, "step": 27174 }, { "epoch": 647.0238805970149, "grad_norm": 23.271377563476562, "learning_rate": 9.896103896103897e-06, "loss": 22.0403, "step": 27175 }, { "epoch": 647.0477611940298, "grad_norm": 31.288394927978516, "learning_rate": 9.895743145743147e-06, "loss": 24.9248, "step": 27176 }, { "epoch": 647.0716417910447, "grad_norm": 24.70477867126465, "learning_rate": 9.895382395382395e-06, "loss": 23.0038, "step": 27177 }, { "epoch": 647.0955223880597, "grad_norm": 27.02154541015625, "learning_rate": 9.895021645021646e-06, "loss": 24.1796, "step": 27178 }, { "epoch": 647.1194029850747, "grad_norm": 24.183916091918945, "learning_rate": 9.894660894660896e-06, "loss": 23.0979, "step": 27179 }, { "epoch": 647.1432835820896, "grad_norm": 28.77643394470215, "learning_rate": 9.894300144300144e-06, "loss": 23.8295, "step": 27180 }, { "epoch": 647.1671641791045, "grad_norm": 37.692413330078125, "learning_rate": 9.893939393939395e-06, "loss": 24.693, "step": 27181 }, { "epoch": 647.1910447761194, "grad_norm": 24.464263916015625, "learning_rate": 9.893578643578645e-06, "loss": 23.6739, "step": 27182 }, { "epoch": 647.2149253731343, "grad_norm": 34.02976608276367, "learning_rate": 9.893217893217893e-06, "loss": 24.2154, "step": 27183 }, { "epoch": 647.2388059701492, "grad_norm": 38.38890075683594, "learning_rate": 9.892857142857143e-06, "loss": 24.0827, "step": 27184 }, { "epoch": 647.2626865671642, "grad_norm": 25.39635467529297, "learning_rate": 9.892496392496394e-06, "loss": 24.7179, "step": 27185 }, { "epoch": 647.2865671641791, "grad_norm": 40.95951843261719, "learning_rate": 9.892135642135642e-06, "loss": 24.4029, "step": 27186 }, { "epoch": 647.310447761194, "grad_norm": 30.839414596557617, "learning_rate": 9.891774891774892e-06, "loss": 25.7064, "step": 27187 }, { "epoch": 647.334328358209, "grad_norm": 29.018280029296875, "learning_rate": 9.891414141414143e-06, "loss": 24.4931, "step": 27188 }, { "epoch": 647.3582089552239, "grad_norm": 42.10007858276367, "learning_rate": 9.891053391053393e-06, "loss": 23.5473, "step": 27189 }, { "epoch": 647.3820895522388, "grad_norm": 25.12420082092285, "learning_rate": 9.890692640692641e-06, "loss": 23.8319, "step": 27190 }, { "epoch": 647.4059701492537, "grad_norm": 52.277828216552734, "learning_rate": 9.890331890331891e-06, "loss": 24.3461, "step": 27191 }, { "epoch": 647.4298507462687, "grad_norm": 32.327064514160156, "learning_rate": 9.88997113997114e-06, "loss": 24.0609, "step": 27192 }, { "epoch": 647.4537313432836, "grad_norm": 54.25950622558594, "learning_rate": 9.88961038961039e-06, "loss": 24.6748, "step": 27193 }, { "epoch": 647.4776119402985, "grad_norm": 35.99332046508789, "learning_rate": 9.88924963924964e-06, "loss": 24.6969, "step": 27194 }, { "epoch": 647.5014925373134, "grad_norm": 58.95316696166992, "learning_rate": 9.88888888888889e-06, "loss": 24.7479, "step": 27195 }, { "epoch": 647.5253731343283, "grad_norm": 44.301456451416016, "learning_rate": 9.888528138528139e-06, "loss": 24.2998, "step": 27196 }, { "epoch": 647.5492537313432, "grad_norm": 57.96403884887695, "learning_rate": 9.88816738816739e-06, "loss": 23.096, "step": 27197 }, { "epoch": 647.5731343283583, "grad_norm": 46.84333801269531, "learning_rate": 9.887806637806638e-06, "loss": 24.3784, "step": 27198 }, { "epoch": 647.5970149253732, "grad_norm": 54.647552490234375, "learning_rate": 9.887445887445888e-06, "loss": 24.7341, "step": 27199 }, { "epoch": 647.6208955223881, "grad_norm": 48.62948989868164, "learning_rate": 9.887085137085138e-06, "loss": 25.3524, "step": 27200 }, { "epoch": 647.644776119403, "grad_norm": 45.08952331542969, "learning_rate": 9.886724386724388e-06, "loss": 24.8538, "step": 27201 }, { "epoch": 647.6686567164179, "grad_norm": NaN, "learning_rate": 9.886363636363637e-06, "loss": 32.2914, "step": 27202 }, { "epoch": 647.6925373134328, "grad_norm": 42.02565002441406, "learning_rate": 9.886363636363637e-06, "loss": 23.6775, "step": 27203 }, { "epoch": 647.7164179104477, "grad_norm": 50.74455261230469, "learning_rate": 9.886002886002887e-06, "loss": 24.7554, "step": 27204 }, { "epoch": 647.7402985074627, "grad_norm": 43.85517120361328, "learning_rate": 9.885642135642136e-06, "loss": 24.6707, "step": 27205 }, { "epoch": 647.7641791044776, "grad_norm": 52.42988586425781, "learning_rate": 9.885281385281386e-06, "loss": 23.3978, "step": 27206 }, { "epoch": 647.7880597014926, "grad_norm": 45.69871139526367, "learning_rate": 9.884920634920636e-06, "loss": 23.7204, "step": 27207 }, { "epoch": 647.8119402985075, "grad_norm": 45.11188888549805, "learning_rate": 9.884559884559886e-06, "loss": 23.5168, "step": 27208 }, { "epoch": 647.8358208955224, "grad_norm": 40.73832321166992, "learning_rate": 9.884199134199135e-06, "loss": 24.1828, "step": 27209 }, { "epoch": 647.8597014925373, "grad_norm": 54.78335952758789, "learning_rate": 9.883838383838385e-06, "loss": 24.783, "step": 27210 }, { "epoch": 647.8835820895522, "grad_norm": 46.49887466430664, "learning_rate": 9.883477633477633e-06, "loss": 24.2626, "step": 27211 }, { "epoch": 647.9074626865672, "grad_norm": 47.690303802490234, "learning_rate": 9.883116883116885e-06, "loss": 23.6505, "step": 27212 }, { "epoch": 647.9313432835821, "grad_norm": 44.93275833129883, "learning_rate": 9.882756132756134e-06, "loss": 23.7297, "step": 27213 }, { "epoch": 647.955223880597, "grad_norm": 46.45100784301758, "learning_rate": 9.882395382395384e-06, "loss": 23.316, "step": 27214 }, { "epoch": 647.9791044776119, "grad_norm": 41.57920837402344, "learning_rate": 9.882034632034632e-06, "loss": 25.0547, "step": 27215 }, { "epoch": 648.0, "grad_norm": 42.720703125, "learning_rate": 9.881673881673883e-06, "loss": 21.1225, "step": 27216 }, { "epoch": 648.0238805970149, "grad_norm": 45.49483871459961, "learning_rate": 9.881313131313131e-06, "loss": 24.2776, "step": 27217 }, { "epoch": 648.0477611940298, "grad_norm": 45.18010711669922, "learning_rate": 9.880952380952381e-06, "loss": 24.001, "step": 27218 }, { "epoch": 648.0716417910447, "grad_norm": 40.589942932128906, "learning_rate": 9.880591630591632e-06, "loss": 24.6347, "step": 27219 }, { "epoch": 648.0955223880597, "grad_norm": 47.21287155151367, "learning_rate": 9.880230880230882e-06, "loss": 23.553, "step": 27220 }, { "epoch": 648.1194029850747, "grad_norm": 38.416656494140625, "learning_rate": 9.87987012987013e-06, "loss": 24.0404, "step": 27221 }, { "epoch": 648.1432835820896, "grad_norm": 54.25535202026367, "learning_rate": 9.87950937950938e-06, "loss": 25.0724, "step": 27222 }, { "epoch": 648.1671641791045, "grad_norm": 43.76709747314453, "learning_rate": 9.87914862914863e-06, "loss": 23.6562, "step": 27223 }, { "epoch": 648.1910447761194, "grad_norm": 51.23270034790039, "learning_rate": 9.87878787878788e-06, "loss": 24.3223, "step": 27224 }, { "epoch": 648.2149253731343, "grad_norm": 44.771907806396484, "learning_rate": 9.87842712842713e-06, "loss": 24.4908, "step": 27225 }, { "epoch": 648.2388059701492, "grad_norm": 43.20574951171875, "learning_rate": 9.87806637806638e-06, "loss": 24.4806, "step": 27226 }, { "epoch": 648.2626865671642, "grad_norm": 41.824581146240234, "learning_rate": 9.877705627705628e-06, "loss": 24.8316, "step": 27227 }, { "epoch": 648.2865671641791, "grad_norm": 46.47908401489258, "learning_rate": 9.877344877344878e-06, "loss": 24.1889, "step": 27228 }, { "epoch": 648.310447761194, "grad_norm": 43.085933685302734, "learning_rate": 9.876984126984128e-06, "loss": 24.0663, "step": 27229 }, { "epoch": 648.334328358209, "grad_norm": 48.88016128540039, "learning_rate": 9.876623376623377e-06, "loss": 23.2777, "step": 27230 }, { "epoch": 648.3582089552239, "grad_norm": 43.09103775024414, "learning_rate": 9.876262626262627e-06, "loss": 23.4922, "step": 27231 }, { "epoch": 648.3820895522388, "grad_norm": 39.23814392089844, "learning_rate": 9.875901875901877e-06, "loss": 24.7131, "step": 27232 }, { "epoch": 648.4059701492537, "grad_norm": 38.985511779785156, "learning_rate": 9.875541125541126e-06, "loss": 24.5028, "step": 27233 }, { "epoch": 648.4298507462687, "grad_norm": 46.83061981201172, "learning_rate": 9.875180375180376e-06, "loss": 24.4589, "step": 27234 }, { "epoch": 648.4537313432836, "grad_norm": 40.78582763671875, "learning_rate": 9.874819624819626e-06, "loss": 24.0226, "step": 27235 }, { "epoch": 648.4776119402985, "grad_norm": 48.59239959716797, "learning_rate": 9.874458874458875e-06, "loss": 22.9948, "step": 27236 }, { "epoch": 648.5014925373134, "grad_norm": 39.933292388916016, "learning_rate": 9.874098124098125e-06, "loss": 24.0036, "step": 27237 }, { "epoch": 648.5253731343283, "grad_norm": 44.46174621582031, "learning_rate": 9.873737373737373e-06, "loss": 24.3029, "step": 27238 }, { "epoch": 648.5492537313432, "grad_norm": 40.04295349121094, "learning_rate": 9.873376623376624e-06, "loss": 23.6139, "step": 27239 }, { "epoch": 648.5731343283583, "grad_norm": 48.94956588745117, "learning_rate": 9.873015873015874e-06, "loss": 24.7053, "step": 27240 }, { "epoch": 648.5970149253732, "grad_norm": 42.07666015625, "learning_rate": 9.872655122655124e-06, "loss": 24.1683, "step": 27241 }, { "epoch": 648.6208955223881, "grad_norm": 45.55680847167969, "learning_rate": 9.872294372294373e-06, "loss": 23.7577, "step": 27242 }, { "epoch": 648.644776119403, "grad_norm": 39.48947525024414, "learning_rate": 9.871933621933623e-06, "loss": 23.8501, "step": 27243 }, { "epoch": 648.6686567164179, "grad_norm": 46.74977493286133, "learning_rate": 9.871572871572871e-06, "loss": 24.3909, "step": 27244 }, { "epoch": 648.6925373134328, "grad_norm": 37.84947204589844, "learning_rate": 9.871212121212121e-06, "loss": 24.8161, "step": 27245 }, { "epoch": 648.7164179104477, "grad_norm": 45.52964782714844, "learning_rate": 9.870851370851372e-06, "loss": 24.0817, "step": 27246 }, { "epoch": 648.7402985074627, "grad_norm": 38.88505172729492, "learning_rate": 9.870490620490622e-06, "loss": 23.4657, "step": 27247 }, { "epoch": 648.7641791044776, "grad_norm": 46.9312629699707, "learning_rate": 9.87012987012987e-06, "loss": 24.2189, "step": 27248 }, { "epoch": 648.7880597014926, "grad_norm": 39.42325210571289, "learning_rate": 9.86976911976912e-06, "loss": 23.3666, "step": 27249 }, { "epoch": 648.8119402985075, "grad_norm": 46.94573974609375, "learning_rate": 9.869408369408369e-06, "loss": 24.2854, "step": 27250 }, { "epoch": 648.8358208955224, "grad_norm": 38.89752197265625, "learning_rate": 9.869047619047621e-06, "loss": 23.2537, "step": 27251 }, { "epoch": 648.8597014925373, "grad_norm": 45.1987419128418, "learning_rate": 9.86868686868687e-06, "loss": 24.8716, "step": 27252 }, { "epoch": 648.8835820895522, "grad_norm": 39.057716369628906, "learning_rate": 9.86832611832612e-06, "loss": 22.8291, "step": 27253 }, { "epoch": 648.9074626865672, "grad_norm": 45.83326721191406, "learning_rate": 9.867965367965368e-06, "loss": 24.4518, "step": 27254 }, { "epoch": 648.9313432835821, "grad_norm": 38.610755920410156, "learning_rate": 9.867604617604618e-06, "loss": 23.1973, "step": 27255 }, { "epoch": 648.955223880597, "grad_norm": 42.57149124145508, "learning_rate": 9.867243867243867e-06, "loss": 24.3589, "step": 27256 }, { "epoch": 648.9791044776119, "grad_norm": 39.83126449584961, "learning_rate": 9.866883116883119e-06, "loss": 23.9777, "step": 27257 }, { "epoch": 649.0, "grad_norm": 39.41853713989258, "learning_rate": 9.866522366522367e-06, "loss": 21.2676, "step": 27258 }, { "epoch": 649.0238805970149, "grad_norm": 37.62281799316406, "learning_rate": 9.866161616161617e-06, "loss": 24.4813, "step": 27259 }, { "epoch": 649.0477611940298, "grad_norm": 46.36490249633789, "learning_rate": 9.865800865800866e-06, "loss": 23.9227, "step": 27260 }, { "epoch": 649.0716417910447, "grad_norm": 37.501651763916016, "learning_rate": 9.865440115440116e-06, "loss": 24.5771, "step": 27261 }, { "epoch": 649.0955223880597, "grad_norm": 44.96711349487305, "learning_rate": 9.865079365079366e-06, "loss": 23.7774, "step": 27262 }, { "epoch": 649.1194029850747, "grad_norm": 33.76577377319336, "learning_rate": 9.864718614718617e-06, "loss": 23.5001, "step": 27263 }, { "epoch": 649.1432835820896, "grad_norm": 47.14030075073242, "learning_rate": 9.864357864357865e-06, "loss": 24.4625, "step": 27264 }, { "epoch": 649.1671641791045, "grad_norm": 39.20140838623047, "learning_rate": 9.863997113997115e-06, "loss": 22.7131, "step": 27265 }, { "epoch": 649.1910447761194, "grad_norm": 48.9221305847168, "learning_rate": 9.863636363636364e-06, "loss": 24.3219, "step": 27266 }, { "epoch": 649.2149253731343, "grad_norm": 40.667869567871094, "learning_rate": 9.863275613275614e-06, "loss": 24.0418, "step": 27267 }, { "epoch": 649.2388059701492, "grad_norm": 42.55061721801758, "learning_rate": 9.862914862914864e-06, "loss": 24.5975, "step": 27268 }, { "epoch": 649.2626865671642, "grad_norm": 39.40453338623047, "learning_rate": 9.862554112554114e-06, "loss": 23.6549, "step": 27269 }, { "epoch": 649.2865671641791, "grad_norm": 42.561588287353516, "learning_rate": 9.862193362193363e-06, "loss": 23.6424, "step": 27270 }, { "epoch": 649.310447761194, "grad_norm": 36.35006332397461, "learning_rate": 9.861832611832613e-06, "loss": 23.6752, "step": 27271 }, { "epoch": 649.334328358209, "grad_norm": 44.39028549194336, "learning_rate": 9.861471861471862e-06, "loss": 23.2584, "step": 27272 }, { "epoch": 649.3582089552239, "grad_norm": 37.81244659423828, "learning_rate": 9.861111111111112e-06, "loss": 23.4542, "step": 27273 }, { "epoch": 649.3820895522388, "grad_norm": 44.21824645996094, "learning_rate": 9.860750360750362e-06, "loss": 25.3499, "step": 27274 }, { "epoch": 649.4059701492537, "grad_norm": 39.695674896240234, "learning_rate": 9.86038961038961e-06, "loss": 24.6846, "step": 27275 }, { "epoch": 649.4298507462687, "grad_norm": 42.411590576171875, "learning_rate": 9.86002886002886e-06, "loss": 23.5009, "step": 27276 }, { "epoch": 649.4537313432836, "grad_norm": 36.447303771972656, "learning_rate": 9.859668109668111e-06, "loss": 23.5508, "step": 27277 }, { "epoch": 649.4776119402985, "grad_norm": 43.181034088134766, "learning_rate": 9.85930735930736e-06, "loss": 23.9583, "step": 27278 }, { "epoch": 649.5014925373134, "grad_norm": 35.98109817504883, "learning_rate": 9.85894660894661e-06, "loss": 24.5084, "step": 27279 }, { "epoch": 649.5253731343283, "grad_norm": 44.81536865234375, "learning_rate": 9.85858585858586e-06, "loss": 24.2832, "step": 27280 }, { "epoch": 649.5492537313432, "grad_norm": 31.807279586791992, "learning_rate": 9.858225108225108e-06, "loss": 24.4348, "step": 27281 }, { "epoch": 649.5731343283583, "grad_norm": 42.53800582885742, "learning_rate": 9.857864357864358e-06, "loss": 23.7741, "step": 27282 }, { "epoch": 649.5970149253732, "grad_norm": 34.998291015625, "learning_rate": 9.857503607503609e-06, "loss": 24.5042, "step": 27283 }, { "epoch": 649.6208955223881, "grad_norm": 46.033714294433594, "learning_rate": 9.857142857142859e-06, "loss": 24.4117, "step": 27284 }, { "epoch": 649.644776119403, "grad_norm": 36.66647720336914, "learning_rate": 9.856782106782107e-06, "loss": 23.7341, "step": 27285 }, { "epoch": 649.6686567164179, "grad_norm": 48.23325729370117, "learning_rate": 9.856421356421358e-06, "loss": 25.2997, "step": 27286 }, { "epoch": 649.6925373134328, "grad_norm": 40.268489837646484, "learning_rate": 9.856060606060606e-06, "loss": 24.0199, "step": 27287 }, { "epoch": 649.7164179104477, "grad_norm": 43.577796936035156, "learning_rate": 9.855699855699856e-06, "loss": 23.5125, "step": 27288 }, { "epoch": 649.7402985074627, "grad_norm": 38.81521987915039, "learning_rate": 9.855339105339106e-06, "loss": 24.6117, "step": 27289 }, { "epoch": 649.7641791044776, "grad_norm": 44.58305740356445, "learning_rate": 9.854978354978357e-06, "loss": 24.0451, "step": 27290 }, { "epoch": 649.7880597014926, "grad_norm": 39.84925079345703, "learning_rate": 9.854617604617605e-06, "loss": 24.2819, "step": 27291 }, { "epoch": 649.8119402985075, "grad_norm": 42.13063049316406, "learning_rate": 9.854256854256855e-06, "loss": 24.6918, "step": 27292 }, { "epoch": 649.8358208955224, "grad_norm": 37.185821533203125, "learning_rate": 9.853896103896104e-06, "loss": 24.9163, "step": 27293 }, { "epoch": 649.8597014925373, "grad_norm": 38.51730728149414, "learning_rate": 9.853535353535354e-06, "loss": 23.7674, "step": 27294 }, { "epoch": 649.8835820895522, "grad_norm": 32.59008026123047, "learning_rate": 9.853174603174604e-06, "loss": 23.6715, "step": 27295 }, { "epoch": 649.9074626865672, "grad_norm": 40.09797668457031, "learning_rate": 9.852813852813854e-06, "loss": 23.6556, "step": 27296 }, { "epoch": 649.9313432835821, "grad_norm": 33.74359893798828, "learning_rate": 9.852453102453103e-06, "loss": 23.9639, "step": 27297 }, { "epoch": 649.955223880597, "grad_norm": 39.667999267578125, "learning_rate": 9.852092352092353e-06, "loss": 23.9193, "step": 27298 }, { "epoch": 649.9791044776119, "grad_norm": 35.636932373046875, "learning_rate": 9.851731601731602e-06, "loss": 23.4158, "step": 27299 }, { "epoch": 650.0, "grad_norm": 35.028629302978516, "learning_rate": 9.851370851370852e-06, "loss": 21.4354, "step": 27300 }, { "epoch": 650.0238805970149, "grad_norm": 33.80897903442383, "learning_rate": 9.851010101010102e-06, "loss": 24.3372, "step": 27301 }, { "epoch": 650.0477611940298, "grad_norm": 36.22688674926758, "learning_rate": 9.850649350649352e-06, "loss": 24.662, "step": 27302 }, { "epoch": 650.0716417910447, "grad_norm": 31.593679428100586, "learning_rate": 9.8502886002886e-06, "loss": 23.3683, "step": 27303 }, { "epoch": 650.0955223880597, "grad_norm": 31.997047424316406, "learning_rate": 9.849927849927851e-06, "loss": 24.3617, "step": 27304 }, { "epoch": 650.1194029850747, "grad_norm": 32.71683120727539, "learning_rate": 9.8495670995671e-06, "loss": 23.3849, "step": 27305 }, { "epoch": 650.1432835820896, "grad_norm": 24.335834503173828, "learning_rate": 9.849206349206351e-06, "loss": 23.6967, "step": 27306 }, { "epoch": 650.1671641791045, "grad_norm": 33.453712463378906, "learning_rate": 9.8488455988456e-06, "loss": 24.4642, "step": 27307 }, { "epoch": 650.1910447761194, "grad_norm": 24.574378967285156, "learning_rate": 9.84848484848485e-06, "loss": 23.6643, "step": 27308 }, { "epoch": 650.2149253731343, "grad_norm": 29.898365020751953, "learning_rate": 9.848124098124099e-06, "loss": 23.8646, "step": 27309 }, { "epoch": 650.2388059701492, "grad_norm": 25.25676727294922, "learning_rate": 9.847763347763349e-06, "loss": 23.7496, "step": 27310 }, { "epoch": 650.2626865671642, "grad_norm": 34.72488021850586, "learning_rate": 9.847402597402597e-06, "loss": 24.4244, "step": 27311 }, { "epoch": 650.2865671641791, "grad_norm": 31.21709632873535, "learning_rate": 9.847041847041849e-06, "loss": 23.8206, "step": 27312 }, { "epoch": 650.310447761194, "grad_norm": 26.131511688232422, "learning_rate": 9.846681096681098e-06, "loss": 22.9485, "step": 27313 }, { "epoch": 650.334328358209, "grad_norm": 34.62797927856445, "learning_rate": 9.846320346320348e-06, "loss": 24.0957, "step": 27314 }, { "epoch": 650.3582089552239, "grad_norm": 27.03043556213379, "learning_rate": 9.845959595959596e-06, "loss": 24.7347, "step": 27315 }, { "epoch": 650.3820895522388, "grad_norm": 30.23405647277832, "learning_rate": 9.845598845598847e-06, "loss": 24.3021, "step": 27316 }, { "epoch": 650.4059701492537, "grad_norm": 28.20057487487793, "learning_rate": 9.845238095238097e-06, "loss": 24.2143, "step": 27317 }, { "epoch": 650.4298507462687, "grad_norm": 28.391061782836914, "learning_rate": 9.844877344877345e-06, "loss": 23.9356, "step": 27318 }, { "epoch": 650.4537313432836, "grad_norm": 24.47734832763672, "learning_rate": 9.844516594516595e-06, "loss": 23.8576, "step": 27319 }, { "epoch": 650.4776119402985, "grad_norm": 24.631837844848633, "learning_rate": 9.844155844155846e-06, "loss": 24.4096, "step": 27320 }, { "epoch": 650.5014925373134, "grad_norm": 27.107946395874023, "learning_rate": 9.843795093795094e-06, "loss": 23.835, "step": 27321 }, { "epoch": 650.5253731343283, "grad_norm": 28.295082092285156, "learning_rate": 9.843434343434344e-06, "loss": 23.7322, "step": 27322 }, { "epoch": 650.5492537313432, "grad_norm": 27.29584503173828, "learning_rate": 9.843073593073595e-06, "loss": 23.6709, "step": 27323 }, { "epoch": 650.5731343283583, "grad_norm": 26.31354331970215, "learning_rate": 9.842712842712843e-06, "loss": 24.793, "step": 27324 }, { "epoch": 650.5970149253732, "grad_norm": 25.665102005004883, "learning_rate": 9.842352092352093e-06, "loss": 23.9145, "step": 27325 }, { "epoch": 650.6208955223881, "grad_norm": 27.342769622802734, "learning_rate": 9.841991341991343e-06, "loss": 24.3338, "step": 27326 }, { "epoch": 650.644776119403, "grad_norm": 31.78343391418457, "learning_rate": 9.841630591630592e-06, "loss": 24.9636, "step": 27327 }, { "epoch": 650.6686567164179, "grad_norm": 26.621755599975586, "learning_rate": 9.841269841269842e-06, "loss": 24.4279, "step": 27328 }, { "epoch": 650.6925373134328, "grad_norm": 30.178508758544922, "learning_rate": 9.840909090909092e-06, "loss": 24.3668, "step": 27329 }, { "epoch": 650.7164179104477, "grad_norm": 26.888341903686523, "learning_rate": 9.84054834054834e-06, "loss": 23.7391, "step": 27330 }, { "epoch": 650.7402985074627, "grad_norm": 32.24462890625, "learning_rate": 9.840187590187591e-06, "loss": 24.4397, "step": 27331 }, { "epoch": 650.7641791044776, "grad_norm": 31.898698806762695, "learning_rate": 9.839826839826841e-06, "loss": 23.9316, "step": 27332 }, { "epoch": 650.7880597014926, "grad_norm": 26.96746063232422, "learning_rate": 9.83946608946609e-06, "loss": 24.2896, "step": 27333 }, { "epoch": 650.8119402985075, "grad_norm": 32.6287956237793, "learning_rate": 9.83910533910534e-06, "loss": 23.3001, "step": 27334 }, { "epoch": 650.8358208955224, "grad_norm": 30.406644821166992, "learning_rate": 9.83874458874459e-06, "loss": 24.1881, "step": 27335 }, { "epoch": 650.8597014925373, "grad_norm": 25.7602596282959, "learning_rate": 9.838383838383839e-06, "loss": 24.2176, "step": 27336 }, { "epoch": 650.8835820895522, "grad_norm": 29.300962448120117, "learning_rate": 9.838023088023089e-06, "loss": 23.4623, "step": 27337 }, { "epoch": 650.9074626865672, "grad_norm": 26.809667587280273, "learning_rate": 9.837662337662337e-06, "loss": 24.6932, "step": 27338 }, { "epoch": 650.9313432835821, "grad_norm": 30.422792434692383, "learning_rate": 9.837301587301588e-06, "loss": 24.5892, "step": 27339 }, { "epoch": 650.955223880597, "grad_norm": 32.40089416503906, "learning_rate": 9.836940836940838e-06, "loss": 24.3581, "step": 27340 }, { "epoch": 650.9791044776119, "grad_norm": 29.09444808959961, "learning_rate": 9.836580086580088e-06, "loss": 23.7299, "step": 27341 }, { "epoch": 651.0, "grad_norm": 26.181495666503906, "learning_rate": 9.836219336219336e-06, "loss": 21.1409, "step": 27342 }, { "epoch": 651.0238805970149, "grad_norm": 30.701189041137695, "learning_rate": 9.835858585858587e-06, "loss": 23.3619, "step": 27343 }, { "epoch": 651.0477611940298, "grad_norm": 25.779796600341797, "learning_rate": 9.835497835497835e-06, "loss": 23.8045, "step": 27344 }, { "epoch": 651.0716417910447, "grad_norm": 27.121299743652344, "learning_rate": 9.835137085137087e-06, "loss": 24.2042, "step": 27345 }, { "epoch": 651.0955223880597, "grad_norm": 30.463695526123047, "learning_rate": 9.834776334776336e-06, "loss": 24.1377, "step": 27346 }, { "epoch": 651.1194029850747, "grad_norm": 23.01169204711914, "learning_rate": 9.834415584415586e-06, "loss": 24.3099, "step": 27347 }, { "epoch": 651.1432835820896, "grad_norm": 28.844026565551758, "learning_rate": 9.834054834054834e-06, "loss": 23.8597, "step": 27348 }, { "epoch": 651.1671641791045, "grad_norm": 26.02249526977539, "learning_rate": 9.833694083694084e-06, "loss": 23.9746, "step": 27349 }, { "epoch": 651.1910447761194, "grad_norm": 26.84374237060547, "learning_rate": 9.833333333333333e-06, "loss": 24.2014, "step": 27350 }, { "epoch": 651.2149253731343, "grad_norm": 27.301645278930664, "learning_rate": 9.832972582972585e-06, "loss": 23.5088, "step": 27351 }, { "epoch": 651.2388059701492, "grad_norm": 28.71134376525879, "learning_rate": 9.832611832611833e-06, "loss": 23.8802, "step": 27352 }, { "epoch": 651.2626865671642, "grad_norm": 24.1010799407959, "learning_rate": 9.832251082251084e-06, "loss": 23.7683, "step": 27353 }, { "epoch": 651.2865671641791, "grad_norm": 29.610837936401367, "learning_rate": 9.831890331890332e-06, "loss": 24.4791, "step": 27354 }, { "epoch": 651.310447761194, "grad_norm": 27.421768188476562, "learning_rate": 9.831529581529582e-06, "loss": 23.873, "step": 27355 }, { "epoch": 651.334328358209, "grad_norm": 25.883419036865234, "learning_rate": 9.831168831168832e-06, "loss": 24.6673, "step": 27356 }, { "epoch": 651.3582089552239, "grad_norm": 25.900651931762695, "learning_rate": 9.830808080808083e-06, "loss": 24.2829, "step": 27357 }, { "epoch": 651.3820895522388, "grad_norm": 26.077159881591797, "learning_rate": 9.830447330447331e-06, "loss": 23.8508, "step": 27358 }, { "epoch": 651.4059701492537, "grad_norm": 21.465845108032227, "learning_rate": 9.830086580086581e-06, "loss": 23.6476, "step": 27359 }, { "epoch": 651.4298507462687, "grad_norm": 31.26907730102539, "learning_rate": 9.82972582972583e-06, "loss": 24.7103, "step": 27360 }, { "epoch": 651.4537313432836, "grad_norm": 24.6610107421875, "learning_rate": 9.82936507936508e-06, "loss": 23.8117, "step": 27361 }, { "epoch": 651.4776119402985, "grad_norm": 29.363353729248047, "learning_rate": 9.82900432900433e-06, "loss": 24.4314, "step": 27362 }, { "epoch": 651.5014925373134, "grad_norm": 29.492544174194336, "learning_rate": 9.82864357864358e-06, "loss": 24.5661, "step": 27363 }, { "epoch": 651.5253731343283, "grad_norm": 33.06040954589844, "learning_rate": 9.828282828282829e-06, "loss": 24.8437, "step": 27364 }, { "epoch": 651.5492537313432, "grad_norm": 25.678647994995117, "learning_rate": 9.827922077922079e-06, "loss": 23.4454, "step": 27365 }, { "epoch": 651.5731343283583, "grad_norm": 28.489849090576172, "learning_rate": 9.827561327561328e-06, "loss": 24.7408, "step": 27366 }, { "epoch": 651.5970149253732, "grad_norm": 24.84072494506836, "learning_rate": 9.827200577200578e-06, "loss": 23.8095, "step": 27367 }, { "epoch": 651.6208955223881, "grad_norm": 30.67249298095703, "learning_rate": 9.826839826839828e-06, "loss": 24.009, "step": 27368 }, { "epoch": 651.644776119403, "grad_norm": 25.106706619262695, "learning_rate": 9.826479076479078e-06, "loss": 24.2292, "step": 27369 }, { "epoch": 651.6686567164179, "grad_norm": 29.48039436340332, "learning_rate": 9.826118326118327e-06, "loss": 23.9244, "step": 27370 }, { "epoch": 651.6925373134328, "grad_norm": 25.54505157470703, "learning_rate": 9.825757575757577e-06, "loss": 23.4484, "step": 27371 }, { "epoch": 651.7164179104477, "grad_norm": 25.325286865234375, "learning_rate": 9.825396825396825e-06, "loss": 24.899, "step": 27372 }, { "epoch": 651.7402985074627, "grad_norm": 29.29640769958496, "learning_rate": 9.825036075036076e-06, "loss": 23.4775, "step": 27373 }, { "epoch": 651.7641791044776, "grad_norm": 26.821046829223633, "learning_rate": 9.824675324675326e-06, "loss": 23.4001, "step": 27374 }, { "epoch": 651.7880597014926, "grad_norm": 32.3176383972168, "learning_rate": 9.824314574314574e-06, "loss": 24.0136, "step": 27375 }, { "epoch": 651.8119402985075, "grad_norm": 24.706947326660156, "learning_rate": 9.823953823953825e-06, "loss": 23.5394, "step": 27376 }, { "epoch": 651.8358208955224, "grad_norm": 31.801403045654297, "learning_rate": 9.823593073593075e-06, "loss": 23.7513, "step": 27377 }, { "epoch": 651.8597014925373, "grad_norm": 23.605533599853516, "learning_rate": 9.823232323232325e-06, "loss": 23.7823, "step": 27378 }, { "epoch": 651.8835820895522, "grad_norm": 31.625946044921875, "learning_rate": 9.822871572871573e-06, "loss": 24.8336, "step": 27379 }, { "epoch": 651.9074626865672, "grad_norm": 28.936017990112305, "learning_rate": 9.822510822510824e-06, "loss": 25.5869, "step": 27380 }, { "epoch": 651.9313432835821, "grad_norm": 25.481950759887695, "learning_rate": 9.822150072150072e-06, "loss": 23.6017, "step": 27381 }, { "epoch": 651.955223880597, "grad_norm": 30.013370513916016, "learning_rate": 9.821789321789322e-06, "loss": 24.5919, "step": 27382 }, { "epoch": 651.9791044776119, "grad_norm": 25.76313591003418, "learning_rate": 9.821428571428573e-06, "loss": 23.5527, "step": 27383 }, { "epoch": 652.0, "grad_norm": 21.961063385009766, "learning_rate": 9.821067821067823e-06, "loss": 20.9613, "step": 27384 }, { "epoch": 652.0238805970149, "grad_norm": 23.64035987854004, "learning_rate": 9.820707070707071e-06, "loss": 23.2276, "step": 27385 }, { "epoch": 652.0477611940298, "grad_norm": 28.50796127319336, "learning_rate": 9.820346320346321e-06, "loss": 23.4429, "step": 27386 }, { "epoch": 652.0716417910447, "grad_norm": 26.364213943481445, "learning_rate": 9.81998556998557e-06, "loss": 24.3058, "step": 27387 }, { "epoch": 652.0955223880597, "grad_norm": 29.319286346435547, "learning_rate": 9.81962481962482e-06, "loss": 23.9848, "step": 27388 }, { "epoch": 652.1194029850747, "grad_norm": 23.73418426513672, "learning_rate": 9.81926406926407e-06, "loss": 23.7761, "step": 27389 }, { "epoch": 652.1432835820896, "grad_norm": 29.049306869506836, "learning_rate": 9.81890331890332e-06, "loss": 24.2243, "step": 27390 }, { "epoch": 652.1671641791045, "grad_norm": 25.216861724853516, "learning_rate": 9.818542568542569e-06, "loss": 22.8564, "step": 27391 }, { "epoch": 652.1910447761194, "grad_norm": 25.460735321044922, "learning_rate": 9.81818181818182e-06, "loss": 24.985, "step": 27392 }, { "epoch": 652.2149253731343, "grad_norm": 26.073078155517578, "learning_rate": 9.817821067821068e-06, "loss": 23.4819, "step": 27393 }, { "epoch": 652.2388059701492, "grad_norm": 24.181976318359375, "learning_rate": 9.817460317460318e-06, "loss": 23.9504, "step": 27394 }, { "epoch": 652.2626865671642, "grad_norm": 29.976829528808594, "learning_rate": 9.817099567099568e-06, "loss": 25.074, "step": 27395 }, { "epoch": 652.2865671641791, "grad_norm": 25.900859832763672, "learning_rate": 9.816738816738818e-06, "loss": 24.4757, "step": 27396 }, { "epoch": 652.310447761194, "grad_norm": 30.66126251220703, "learning_rate": 9.816378066378067e-06, "loss": 24.4039, "step": 27397 }, { "epoch": 652.334328358209, "grad_norm": 23.164514541625977, "learning_rate": 9.816017316017317e-06, "loss": 23.4783, "step": 27398 }, { "epoch": 652.3582089552239, "grad_norm": 30.119333267211914, "learning_rate": 9.815656565656566e-06, "loss": 23.2068, "step": 27399 }, { "epoch": 652.3820895522388, "grad_norm": 25.107463836669922, "learning_rate": 9.815295815295816e-06, "loss": 23.105, "step": 27400 }, { "epoch": 652.4059701492537, "grad_norm": 33.6695556640625, "learning_rate": 9.814935064935066e-06, "loss": 25.109, "step": 27401 }, { "epoch": 652.4298507462687, "grad_norm": 26.111156463623047, "learning_rate": 9.814574314574316e-06, "loss": 23.5221, "step": 27402 }, { "epoch": 652.4537313432836, "grad_norm": 29.26272201538086, "learning_rate": 9.814213564213565e-06, "loss": 24.5573, "step": 27403 }, { "epoch": 652.4776119402985, "grad_norm": 25.971342086791992, "learning_rate": 9.813852813852815e-06, "loss": 24.5114, "step": 27404 }, { "epoch": 652.5014925373134, "grad_norm": 28.93685531616211, "learning_rate": 9.813492063492063e-06, "loss": 23.3754, "step": 27405 }, { "epoch": 652.5253731343283, "grad_norm": 30.4278621673584, "learning_rate": 9.813131313131315e-06, "loss": 24.3274, "step": 27406 }, { "epoch": 652.5492537313432, "grad_norm": 26.53434944152832, "learning_rate": 9.812770562770564e-06, "loss": 22.5764, "step": 27407 }, { "epoch": 652.5731343283583, "grad_norm": 26.983217239379883, "learning_rate": 9.812409812409814e-06, "loss": 24.0362, "step": 27408 }, { "epoch": 652.5970149253732, "grad_norm": 26.081037521362305, "learning_rate": 9.812049062049062e-06, "loss": 24.732, "step": 27409 }, { "epoch": 652.6208955223881, "grad_norm": 24.127439498901367, "learning_rate": 9.811688311688313e-06, "loss": 24.6367, "step": 27410 }, { "epoch": 652.644776119403, "grad_norm": 25.958757400512695, "learning_rate": 9.811327561327561e-06, "loss": 24.6703, "step": 27411 }, { "epoch": 652.6686567164179, "grad_norm": 25.044544219970703, "learning_rate": 9.810966810966811e-06, "loss": 24.4597, "step": 27412 }, { "epoch": 652.6925373134328, "grad_norm": 34.65435028076172, "learning_rate": 9.810606060606061e-06, "loss": 24.3648, "step": 27413 }, { "epoch": 652.7164179104477, "grad_norm": 25.603534698486328, "learning_rate": 9.810245310245312e-06, "loss": 24.6385, "step": 27414 }, { "epoch": 652.7402985074627, "grad_norm": 24.399791717529297, "learning_rate": 9.80988455988456e-06, "loss": 24.1947, "step": 27415 }, { "epoch": 652.7641791044776, "grad_norm": 26.15210723876953, "learning_rate": 9.80952380952381e-06, "loss": 24.0488, "step": 27416 }, { "epoch": 652.7880597014926, "grad_norm": 28.377660751342773, "learning_rate": 9.80916305916306e-06, "loss": 24.0384, "step": 27417 }, { "epoch": 652.8119402985075, "grad_norm": 22.798725128173828, "learning_rate": 9.808802308802309e-06, "loss": 24.1343, "step": 27418 }, { "epoch": 652.8358208955224, "grad_norm": 25.3743896484375, "learning_rate": 9.80844155844156e-06, "loss": 23.6354, "step": 27419 }, { "epoch": 652.8597014925373, "grad_norm": 25.248132705688477, "learning_rate": 9.80808080808081e-06, "loss": 24.3194, "step": 27420 }, { "epoch": 652.8835820895522, "grad_norm": 31.32422637939453, "learning_rate": 9.807720057720058e-06, "loss": 23.8204, "step": 27421 }, { "epoch": 652.9074626865672, "grad_norm": 25.18345832824707, "learning_rate": 9.807359307359308e-06, "loss": 24.2553, "step": 27422 }, { "epoch": 652.9313432835821, "grad_norm": 27.226408004760742, "learning_rate": 9.806998556998558e-06, "loss": 24.0203, "step": 27423 }, { "epoch": 652.955223880597, "grad_norm": 28.699071884155273, "learning_rate": 9.806637806637807e-06, "loss": 24.0877, "step": 27424 }, { "epoch": 652.9791044776119, "grad_norm": 31.687910079956055, "learning_rate": 9.806277056277057e-06, "loss": 24.1655, "step": 27425 }, { "epoch": 653.0, "grad_norm": 22.696186065673828, "learning_rate": 9.805916305916307e-06, "loss": 19.5171, "step": 27426 }, { "epoch": 653.0238805970149, "grad_norm": 24.427879333496094, "learning_rate": 9.805555555555556e-06, "loss": 24.3555, "step": 27427 }, { "epoch": 653.0477611940298, "grad_norm": 26.88880157470703, "learning_rate": 9.805194805194806e-06, "loss": 23.5329, "step": 27428 }, { "epoch": 653.0716417910447, "grad_norm": 24.5339298248291, "learning_rate": 9.804834054834056e-06, "loss": 23.5938, "step": 27429 }, { "epoch": 653.0955223880597, "grad_norm": 26.455869674682617, "learning_rate": 9.804473304473305e-06, "loss": 23.5772, "step": 27430 }, { "epoch": 653.1194029850747, "grad_norm": 27.99302864074707, "learning_rate": 9.804112554112555e-06, "loss": 24.5499, "step": 27431 }, { "epoch": 653.1432835820896, "grad_norm": 26.0510196685791, "learning_rate": 9.803751803751805e-06, "loss": 24.5647, "step": 27432 }, { "epoch": 653.1671641791045, "grad_norm": 25.564727783203125, "learning_rate": 9.803391053391054e-06, "loss": 24.8325, "step": 27433 }, { "epoch": 653.1910447761194, "grad_norm": 31.95475959777832, "learning_rate": 9.803030303030304e-06, "loss": 23.3944, "step": 27434 }, { "epoch": 653.2149253731343, "grad_norm": 24.790605545043945, "learning_rate": 9.802669552669554e-06, "loss": 23.2333, "step": 27435 }, { "epoch": 653.2388059701492, "grad_norm": 24.484601974487305, "learning_rate": 9.802308802308802e-06, "loss": 23.8399, "step": 27436 }, { "epoch": 653.2626865671642, "grad_norm": 24.681623458862305, "learning_rate": 9.801948051948053e-06, "loss": 24.0008, "step": 27437 }, { "epoch": 653.2865671641791, "grad_norm": 24.93864631652832, "learning_rate": 9.801587301587301e-06, "loss": 23.8043, "step": 27438 }, { "epoch": 653.310447761194, "grad_norm": 34.50504684448242, "learning_rate": 9.801226551226553e-06, "loss": 24.0038, "step": 27439 }, { "epoch": 653.334328358209, "grad_norm": 26.337921142578125, "learning_rate": 9.800865800865802e-06, "loss": 24.1887, "step": 27440 }, { "epoch": 653.3582089552239, "grad_norm": 26.935935974121094, "learning_rate": 9.800505050505052e-06, "loss": 24.674, "step": 27441 }, { "epoch": 653.3820895522388, "grad_norm": 41.12553024291992, "learning_rate": 9.8001443001443e-06, "loss": 24.8611, "step": 27442 }, { "epoch": 653.4059701492537, "grad_norm": 24.315813064575195, "learning_rate": 9.79978354978355e-06, "loss": 24.4031, "step": 27443 }, { "epoch": 653.4298507462687, "grad_norm": 28.352760314941406, "learning_rate": 9.799422799422799e-06, "loss": 23.5484, "step": 27444 }, { "epoch": 653.4537313432836, "grad_norm": 34.37550735473633, "learning_rate": 9.799062049062051e-06, "loss": 23.8457, "step": 27445 }, { "epoch": 653.4776119402985, "grad_norm": 26.283235549926758, "learning_rate": 9.7987012987013e-06, "loss": 24.3078, "step": 27446 }, { "epoch": 653.5014925373134, "grad_norm": 22.546634674072266, "learning_rate": 9.79834054834055e-06, "loss": 23.7727, "step": 27447 }, { "epoch": 653.5253731343283, "grad_norm": 27.597900390625, "learning_rate": 9.797979797979798e-06, "loss": 23.8403, "step": 27448 }, { "epoch": 653.5492537313432, "grad_norm": 26.744291305541992, "learning_rate": 9.797619047619048e-06, "loss": 23.9557, "step": 27449 }, { "epoch": 653.5731343283583, "grad_norm": 23.557636260986328, "learning_rate": 9.797258297258298e-06, "loss": 23.5747, "step": 27450 }, { "epoch": 653.5970149253732, "grad_norm": 25.935649871826172, "learning_rate": 9.796897546897549e-06, "loss": 24.7019, "step": 27451 }, { "epoch": 653.6208955223881, "grad_norm": 23.152339935302734, "learning_rate": 9.796536796536797e-06, "loss": 22.6472, "step": 27452 }, { "epoch": 653.644776119403, "grad_norm": 30.70677375793457, "learning_rate": 9.796176046176047e-06, "loss": 22.9947, "step": 27453 }, { "epoch": 653.6686567164179, "grad_norm": 25.693382263183594, "learning_rate": 9.795815295815296e-06, "loss": 24.5457, "step": 27454 }, { "epoch": 653.6925373134328, "grad_norm": 24.64613914489746, "learning_rate": 9.795454545454546e-06, "loss": 24.0065, "step": 27455 }, { "epoch": 653.7164179104477, "grad_norm": 25.778106689453125, "learning_rate": 9.795093795093796e-06, "loss": 23.0923, "step": 27456 }, { "epoch": 653.7402985074627, "grad_norm": 30.71236801147461, "learning_rate": 9.794733044733046e-06, "loss": 23.4963, "step": 27457 }, { "epoch": 653.7641791044776, "grad_norm": 27.904550552368164, "learning_rate": 9.794372294372295e-06, "loss": 24.2012, "step": 27458 }, { "epoch": 653.7880597014926, "grad_norm": 24.91156768798828, "learning_rate": 9.794011544011545e-06, "loss": 23.9487, "step": 27459 }, { "epoch": 653.8119402985075, "grad_norm": 23.836719512939453, "learning_rate": 9.793650793650794e-06, "loss": 23.8511, "step": 27460 }, { "epoch": 653.8358208955224, "grad_norm": NaN, "learning_rate": 9.793290043290044e-06, "loss": 21.1984, "step": 27461 }, { "epoch": 653.8597014925373, "grad_norm": 25.523080825805664, "learning_rate": 9.793290043290044e-06, "loss": 24.761, "step": 27462 }, { "epoch": 653.8835820895522, "grad_norm": 33.86993408203125, "learning_rate": 9.792929292929294e-06, "loss": 24.5497, "step": 27463 }, { "epoch": 653.9074626865672, "grad_norm": 26.689245223999023, "learning_rate": 9.792568542568544e-06, "loss": 24.2881, "step": 27464 }, { "epoch": 653.9313432835821, "grad_norm": 24.34453773498535, "learning_rate": 9.792207792207793e-06, "loss": 23.4458, "step": 27465 }, { "epoch": 653.955223880597, "grad_norm": 33.94736862182617, "learning_rate": 9.791847041847043e-06, "loss": 24.3976, "step": 27466 }, { "epoch": 653.9791044776119, "grad_norm": 25.324932098388672, "learning_rate": 9.791486291486291e-06, "loss": 24.6035, "step": 27467 }, { "epoch": 654.0, "grad_norm": 25.145793914794922, "learning_rate": 9.791125541125542e-06, "loss": 20.4945, "step": 27468 }, { "epoch": 654.0238805970149, "grad_norm": 26.26644515991211, "learning_rate": 9.790764790764792e-06, "loss": 23.853, "step": 27469 }, { "epoch": 654.0477611940298, "grad_norm": 24.88412857055664, "learning_rate": 9.790404040404042e-06, "loss": 23.3236, "step": 27470 }, { "epoch": 654.0716417910447, "grad_norm": 32.15158462524414, "learning_rate": 9.79004329004329e-06, "loss": 24.9276, "step": 27471 }, { "epoch": 654.0955223880597, "grad_norm": 22.76410675048828, "learning_rate": 9.78968253968254e-06, "loss": 24.2483, "step": 27472 }, { "epoch": 654.1194029850747, "grad_norm": 25.887971878051758, "learning_rate": 9.789321789321791e-06, "loss": 24.4632, "step": 27473 }, { "epoch": 654.1432835820896, "grad_norm": 27.057981491088867, "learning_rate": 9.78896103896104e-06, "loss": 24.0464, "step": 27474 }, { "epoch": 654.1671641791045, "grad_norm": 23.626657485961914, "learning_rate": 9.78860028860029e-06, "loss": 22.7635, "step": 27475 }, { "epoch": 654.1910447761194, "grad_norm": 26.944059371948242, "learning_rate": 9.788239538239538e-06, "loss": 24.3915, "step": 27476 }, { "epoch": 654.2149253731343, "grad_norm": 27.69143295288086, "learning_rate": 9.787878787878788e-06, "loss": 23.6121, "step": 27477 }, { "epoch": 654.2388059701492, "grad_norm": 28.021526336669922, "learning_rate": 9.787518037518039e-06, "loss": 24.2412, "step": 27478 }, { "epoch": 654.2626865671642, "grad_norm": 22.615224838256836, "learning_rate": 9.787157287157289e-06, "loss": 23.0607, "step": 27479 }, { "epoch": 654.2865671641791, "grad_norm": 28.997983932495117, "learning_rate": 9.786796536796537e-06, "loss": 24.4869, "step": 27480 }, { "epoch": 654.310447761194, "grad_norm": 31.47622299194336, "learning_rate": 9.786435786435787e-06, "loss": 25.3479, "step": 27481 }, { "epoch": 654.334328358209, "grad_norm": 25.208593368530273, "learning_rate": 9.786075036075036e-06, "loss": 23.9387, "step": 27482 }, { "epoch": 654.3582089552239, "grad_norm": 35.23402786254883, "learning_rate": 9.785714285714286e-06, "loss": 22.8644, "step": 27483 }, { "epoch": 654.3820895522388, "grad_norm": 29.036388397216797, "learning_rate": 9.785353535353536e-06, "loss": 23.9824, "step": 27484 }, { "epoch": 654.4059701492537, "grad_norm": 28.079744338989258, "learning_rate": 9.784992784992787e-06, "loss": 23.3628, "step": 27485 }, { "epoch": 654.4298507462687, "grad_norm": 28.132553100585938, "learning_rate": 9.784632034632035e-06, "loss": 24.5346, "step": 27486 }, { "epoch": 654.4537313432836, "grad_norm": 27.058706283569336, "learning_rate": 9.784271284271285e-06, "loss": 24.3007, "step": 27487 }, { "epoch": 654.4776119402985, "grad_norm": 26.308183670043945, "learning_rate": 9.783910533910534e-06, "loss": 24.6109, "step": 27488 }, { "epoch": 654.5014925373134, "grad_norm": 23.693843841552734, "learning_rate": 9.783549783549784e-06, "loss": 23.5198, "step": 27489 }, { "epoch": 654.5253731343283, "grad_norm": 31.799598693847656, "learning_rate": 9.783189033189034e-06, "loss": 23.2052, "step": 27490 }, { "epoch": 654.5492537313432, "grad_norm": 35.788150787353516, "learning_rate": 9.782828282828284e-06, "loss": 24.1347, "step": 27491 }, { "epoch": 654.5731343283583, "grad_norm": 24.299524307250977, "learning_rate": 9.782467532467533e-06, "loss": 24.6338, "step": 27492 }, { "epoch": 654.5970149253732, "grad_norm": 23.247623443603516, "learning_rate": 9.782106782106783e-06, "loss": 24.8631, "step": 27493 }, { "epoch": 654.6208955223881, "grad_norm": 25.91057586669922, "learning_rate": 9.781746031746032e-06, "loss": 23.8508, "step": 27494 }, { "epoch": 654.644776119403, "grad_norm": 32.70425033569336, "learning_rate": 9.781385281385282e-06, "loss": 23.5963, "step": 27495 }, { "epoch": 654.6686567164179, "grad_norm": 27.491281509399414, "learning_rate": 9.781024531024532e-06, "loss": 25.3167, "step": 27496 }, { "epoch": 654.6925373134328, "grad_norm": 25.996700286865234, "learning_rate": 9.780663780663782e-06, "loss": 24.3222, "step": 27497 }, { "epoch": 654.7164179104477, "grad_norm": 41.5152473449707, "learning_rate": 9.78030303030303e-06, "loss": 23.8791, "step": 27498 }, { "epoch": 654.7402985074627, "grad_norm": 25.515962600708008, "learning_rate": 9.779942279942281e-06, "loss": 22.9469, "step": 27499 }, { "epoch": 654.7641791044776, "grad_norm": 34.283348083496094, "learning_rate": 9.77958152958153e-06, "loss": 23.4557, "step": 27500 }, { "epoch": 654.7880597014926, "grad_norm": 37.740230560302734, "learning_rate": 9.779220779220781e-06, "loss": 23.366, "step": 27501 }, { "epoch": 654.8119402985075, "grad_norm": 23.757917404174805, "learning_rate": 9.77886002886003e-06, "loss": 23.8736, "step": 27502 }, { "epoch": 654.8358208955224, "grad_norm": 46.85301208496094, "learning_rate": 9.77849927849928e-06, "loss": 23.2015, "step": 27503 }, { "epoch": 654.8597014925373, "grad_norm": 32.06129455566406, "learning_rate": 9.778138528138528e-06, "loss": 23.2739, "step": 27504 }, { "epoch": 654.8835820895522, "grad_norm": 37.644100189208984, "learning_rate": 9.777777777777779e-06, "loss": 23.1784, "step": 27505 }, { "epoch": 654.9074626865672, "grad_norm": 35.728694915771484, "learning_rate": 9.777417027417027e-06, "loss": 23.8205, "step": 27506 }, { "epoch": 654.9313432835821, "grad_norm": 28.489002227783203, "learning_rate": 9.777056277056279e-06, "loss": 24.3065, "step": 27507 }, { "epoch": 654.955223880597, "grad_norm": 36.47765350341797, "learning_rate": 9.776695526695528e-06, "loss": 24.0409, "step": 27508 }, { "epoch": 654.9791044776119, "grad_norm": 33.13026428222656, "learning_rate": 9.776334776334778e-06, "loss": 24.992, "step": 27509 }, { "epoch": 655.0, "grad_norm": 21.112485885620117, "learning_rate": 9.775974025974026e-06, "loss": 21.6478, "step": 27510 }, { "epoch": 655.0238805970149, "grad_norm": 32.53463363647461, "learning_rate": 9.775613275613276e-06, "loss": 23.6146, "step": 27511 }, { "epoch": 655.0477611940298, "grad_norm": 30.009319305419922, "learning_rate": 9.775252525252527e-06, "loss": 23.7329, "step": 27512 }, { "epoch": 655.0716417910447, "grad_norm": 22.066049575805664, "learning_rate": 9.774891774891775e-06, "loss": 24.2955, "step": 27513 }, { "epoch": 655.0955223880597, "grad_norm": 29.274311065673828, "learning_rate": 9.774531024531025e-06, "loss": 23.4073, "step": 27514 }, { "epoch": 655.1194029850747, "grad_norm": 31.58591651916504, "learning_rate": 9.774170274170276e-06, "loss": 24.133, "step": 27515 }, { "epoch": 655.1432835820896, "grad_norm": 24.635549545288086, "learning_rate": 9.773809523809524e-06, "loss": 23.402, "step": 27516 }, { "epoch": 655.1671641791045, "grad_norm": 25.053138732910156, "learning_rate": 9.773448773448774e-06, "loss": 24.2897, "step": 27517 }, { "epoch": 655.1910447761194, "grad_norm": 38.646888732910156, "learning_rate": 9.773088023088024e-06, "loss": 24.2069, "step": 27518 }, { "epoch": 655.2149253731343, "grad_norm": 26.619375228881836, "learning_rate": 9.772727272727273e-06, "loss": 24.412, "step": 27519 }, { "epoch": 655.2388059701492, "grad_norm": 23.682863235473633, "learning_rate": 9.772366522366523e-06, "loss": 23.5583, "step": 27520 }, { "epoch": 655.2626865671642, "grad_norm": 31.320621490478516, "learning_rate": 9.772005772005773e-06, "loss": 23.6769, "step": 27521 }, { "epoch": 655.2865671641791, "grad_norm": 27.828079223632812, "learning_rate": 9.771645021645022e-06, "loss": 24.24, "step": 27522 }, { "epoch": 655.310447761194, "grad_norm": 20.809232711791992, "learning_rate": 9.771284271284272e-06, "loss": 23.199, "step": 27523 }, { "epoch": 655.334328358209, "grad_norm": 30.442581176757812, "learning_rate": 9.770923520923522e-06, "loss": 24.0393, "step": 27524 }, { "epoch": 655.3582089552239, "grad_norm": 26.167743682861328, "learning_rate": 9.77056277056277e-06, "loss": 23.9283, "step": 27525 }, { "epoch": 655.3820895522388, "grad_norm": 22.713279724121094, "learning_rate": 9.770202020202021e-06, "loss": 24.2093, "step": 27526 }, { "epoch": 655.4059701492537, "grad_norm": 23.353647232055664, "learning_rate": 9.769841269841271e-06, "loss": 23.8555, "step": 27527 }, { "epoch": 655.4298507462687, "grad_norm": 22.9490909576416, "learning_rate": 9.76948051948052e-06, "loss": 24.4482, "step": 27528 }, { "epoch": 655.4537313432836, "grad_norm": 29.209775924682617, "learning_rate": 9.76911976911977e-06, "loss": 24.0591, "step": 27529 }, { "epoch": 655.4776119402985, "grad_norm": 25.942344665527344, "learning_rate": 9.76875901875902e-06, "loss": 23.2298, "step": 27530 }, { "epoch": 655.5014925373134, "grad_norm": 27.97564125061035, "learning_rate": 9.768398268398269e-06, "loss": 23.9363, "step": 27531 }, { "epoch": 655.5253731343283, "grad_norm": 26.12917709350586, "learning_rate": 9.768037518037519e-06, "loss": 24.6963, "step": 27532 }, { "epoch": 655.5492537313432, "grad_norm": 34.15718078613281, "learning_rate": 9.767676767676767e-06, "loss": 23.44, "step": 27533 }, { "epoch": 655.5731343283583, "grad_norm": 33.54568099975586, "learning_rate": 9.767316017316019e-06, "loss": 23.6825, "step": 27534 }, { "epoch": 655.5970149253732, "grad_norm": 24.43870735168457, "learning_rate": 9.766955266955268e-06, "loss": 24.2292, "step": 27535 }, { "epoch": 655.6208955223881, "grad_norm": 25.049991607666016, "learning_rate": 9.766594516594518e-06, "loss": 23.7888, "step": 27536 }, { "epoch": 655.644776119403, "grad_norm": 27.8150634765625, "learning_rate": 9.766233766233766e-06, "loss": 23.4545, "step": 27537 }, { "epoch": 655.6686567164179, "grad_norm": 29.715158462524414, "learning_rate": 9.765873015873017e-06, "loss": 23.9194, "step": 27538 }, { "epoch": 655.6925373134328, "grad_norm": 27.568763732910156, "learning_rate": 9.765512265512265e-06, "loss": 24.7393, "step": 27539 }, { "epoch": 655.7164179104477, "grad_norm": 22.757781982421875, "learning_rate": 9.765151515151517e-06, "loss": 24.8996, "step": 27540 }, { "epoch": 655.7402985074627, "grad_norm": 29.459028244018555, "learning_rate": 9.764790764790765e-06, "loss": 24.3467, "step": 27541 }, { "epoch": 655.7641791044776, "grad_norm": 23.562183380126953, "learning_rate": 9.764430014430016e-06, "loss": 24.2923, "step": 27542 }, { "epoch": 655.7880597014926, "grad_norm": 26.609025955200195, "learning_rate": 9.764069264069264e-06, "loss": 22.9921, "step": 27543 }, { "epoch": 655.8119402985075, "grad_norm": 26.995885848999023, "learning_rate": 9.763708513708514e-06, "loss": 24.3587, "step": 27544 }, { "epoch": 655.8358208955224, "grad_norm": 23.335559844970703, "learning_rate": 9.763347763347765e-06, "loss": 22.7372, "step": 27545 }, { "epoch": 655.8597014925373, "grad_norm": 25.975025177001953, "learning_rate": 9.762987012987015e-06, "loss": 23.6521, "step": 27546 }, { "epoch": 655.8835820895522, "grad_norm": 28.723472595214844, "learning_rate": 9.762626262626263e-06, "loss": 24.1509, "step": 27547 }, { "epoch": 655.9074626865672, "grad_norm": 27.101295471191406, "learning_rate": 9.762265512265513e-06, "loss": 23.7848, "step": 27548 }, { "epoch": 655.9313432835821, "grad_norm": 26.480274200439453, "learning_rate": 9.761904761904762e-06, "loss": 24.4607, "step": 27549 }, { "epoch": 655.955223880597, "grad_norm": 23.709148406982422, "learning_rate": 9.761544011544012e-06, "loss": 24.6744, "step": 27550 }, { "epoch": 655.9791044776119, "grad_norm": 25.716594696044922, "learning_rate": 9.761183261183262e-06, "loss": 23.3882, "step": 27551 }, { "epoch": 656.0, "grad_norm": 25.26085662841797, "learning_rate": 9.760822510822513e-06, "loss": 20.4842, "step": 27552 }, { "epoch": 656.0238805970149, "grad_norm": 27.066146850585938, "learning_rate": 9.760461760461761e-06, "loss": 23.9661, "step": 27553 }, { "epoch": 656.0477611940298, "grad_norm": 22.922536849975586, "learning_rate": 9.760101010101011e-06, "loss": 22.8151, "step": 27554 }, { "epoch": 656.0716417910447, "grad_norm": 28.41008758544922, "learning_rate": 9.75974025974026e-06, "loss": 23.7709, "step": 27555 }, { "epoch": 656.0955223880597, "grad_norm": 29.740224838256836, "learning_rate": 9.75937950937951e-06, "loss": 24.1076, "step": 27556 }, { "epoch": 656.1194029850747, "grad_norm": 23.29267120361328, "learning_rate": 9.75901875901876e-06, "loss": 23.4218, "step": 27557 }, { "epoch": 656.1432835820896, "grad_norm": 23.54802703857422, "learning_rate": 9.75865800865801e-06, "loss": 23.4678, "step": 27558 }, { "epoch": 656.1671641791045, "grad_norm": 24.881925582885742, "learning_rate": 9.758297258297259e-06, "loss": 24.3967, "step": 27559 }, { "epoch": 656.1910447761194, "grad_norm": 27.382787704467773, "learning_rate": 9.757936507936509e-06, "loss": 24.786, "step": 27560 }, { "epoch": 656.2149253731343, "grad_norm": 21.991167068481445, "learning_rate": 9.757575757575758e-06, "loss": 24.016, "step": 27561 }, { "epoch": 656.2388059701492, "grad_norm": 24.373294830322266, "learning_rate": 9.757215007215008e-06, "loss": 22.7162, "step": 27562 }, { "epoch": 656.2626865671642, "grad_norm": NaN, "learning_rate": 9.756854256854258e-06, "loss": 24.6886, "step": 27563 }, { "epoch": 656.2865671641791, "grad_norm": 30.067712783813477, "learning_rate": 9.756854256854258e-06, "loss": 24.4386, "step": 27564 }, { "epoch": 656.310447761194, "grad_norm": 32.20934295654297, "learning_rate": 9.756493506493508e-06, "loss": 23.7932, "step": 27565 }, { "epoch": 656.334328358209, "grad_norm": 21.62567901611328, "learning_rate": 9.756132756132757e-06, "loss": 24.0513, "step": 27566 }, { "epoch": 656.3582089552239, "grad_norm": 28.245765686035156, "learning_rate": 9.755772005772007e-06, "loss": 23.5883, "step": 27567 }, { "epoch": 656.3820895522388, "grad_norm": 29.45981788635254, "learning_rate": 9.755411255411255e-06, "loss": 23.5695, "step": 27568 }, { "epoch": 656.4059701492537, "grad_norm": 27.943010330200195, "learning_rate": 9.755050505050506e-06, "loss": 23.7585, "step": 27569 }, { "epoch": 656.4298507462687, "grad_norm": 24.719675064086914, "learning_rate": 9.754689754689756e-06, "loss": 24.1045, "step": 27570 }, { "epoch": 656.4537313432836, "grad_norm": 32.70699691772461, "learning_rate": 9.754329004329006e-06, "loss": 24.0677, "step": 27571 }, { "epoch": 656.4776119402985, "grad_norm": 29.000858306884766, "learning_rate": 9.753968253968254e-06, "loss": 25.2614, "step": 27572 }, { "epoch": 656.5014925373134, "grad_norm": 24.95917510986328, "learning_rate": 9.753607503607505e-06, "loss": 23.2512, "step": 27573 }, { "epoch": 656.5253731343283, "grad_norm": 32.82474899291992, "learning_rate": 9.753246753246755e-06, "loss": 23.245, "step": 27574 }, { "epoch": 656.5492537313432, "grad_norm": 27.516254425048828, "learning_rate": 9.752886002886003e-06, "loss": 23.2, "step": 27575 }, { "epoch": 656.5731343283583, "grad_norm": 26.659879684448242, "learning_rate": 9.752525252525254e-06, "loss": 24.1208, "step": 27576 }, { "epoch": 656.5970149253732, "grad_norm": 26.529314041137695, "learning_rate": 9.752164502164502e-06, "loss": 23.7481, "step": 27577 }, { "epoch": 656.6208955223881, "grad_norm": 28.729400634765625, "learning_rate": 9.751803751803752e-06, "loss": 24.4347, "step": 27578 }, { "epoch": 656.644776119403, "grad_norm": 27.600645065307617, "learning_rate": 9.751443001443002e-06, "loss": 24.5267, "step": 27579 }, { "epoch": 656.6686567164179, "grad_norm": 23.183305740356445, "learning_rate": 9.751082251082253e-06, "loss": 24.144, "step": 27580 }, { "epoch": 656.6925373134328, "grad_norm": 26.052120208740234, "learning_rate": 9.750721500721501e-06, "loss": 24.1179, "step": 27581 }, { "epoch": 656.7164179104477, "grad_norm": 25.713125228881836, "learning_rate": 9.750360750360751e-06, "loss": 23.8757, "step": 27582 }, { "epoch": 656.7402985074627, "grad_norm": 27.22038459777832, "learning_rate": 9.75e-06, "loss": 23.3819, "step": 27583 }, { "epoch": 656.7641791044776, "grad_norm": 22.613525390625, "learning_rate": 9.74963924963925e-06, "loss": 23.9437, "step": 27584 }, { "epoch": 656.7880597014926, "grad_norm": 23.730426788330078, "learning_rate": 9.7492784992785e-06, "loss": 23.7792, "step": 27585 }, { "epoch": 656.8119402985075, "grad_norm": 24.69949722290039, "learning_rate": 9.74891774891775e-06, "loss": 24.4247, "step": 27586 }, { "epoch": 656.8358208955224, "grad_norm": 25.61389923095703, "learning_rate": 9.748556998556999e-06, "loss": 24.2141, "step": 27587 }, { "epoch": 656.8597014925373, "grad_norm": 23.214338302612305, "learning_rate": 9.748196248196249e-06, "loss": 24.2183, "step": 27588 }, { "epoch": 656.8835820895522, "grad_norm": 25.438236236572266, "learning_rate": 9.747835497835498e-06, "loss": 24.1363, "step": 27589 }, { "epoch": 656.9074626865672, "grad_norm": 24.854110717773438, "learning_rate": 9.747474747474748e-06, "loss": 23.7389, "step": 27590 }, { "epoch": 656.9313432835821, "grad_norm": 22.029722213745117, "learning_rate": 9.747113997113998e-06, "loss": 23.5855, "step": 27591 }, { "epoch": 656.955223880597, "grad_norm": 23.37220573425293, "learning_rate": 9.746753246753248e-06, "loss": 23.4464, "step": 27592 }, { "epoch": 656.9791044776119, "grad_norm": 25.114337921142578, "learning_rate": 9.746392496392497e-06, "loss": 23.436, "step": 27593 }, { "epoch": 657.0, "grad_norm": 24.985904693603516, "learning_rate": 9.746031746031747e-06, "loss": 21.3956, "step": 27594 }, { "epoch": 657.0238805970149, "grad_norm": 28.733783721923828, "learning_rate": 9.745670995670995e-06, "loss": 22.8867, "step": 27595 }, { "epoch": 657.0477611940298, "grad_norm": 27.717098236083984, "learning_rate": 9.745310245310247e-06, "loss": 24.1963, "step": 27596 }, { "epoch": 657.0716417910447, "grad_norm": 27.08446502685547, "learning_rate": 9.744949494949496e-06, "loss": 23.8299, "step": 27597 }, { "epoch": 657.0955223880597, "grad_norm": 21.285696029663086, "learning_rate": 9.744588744588746e-06, "loss": 22.9181, "step": 27598 }, { "epoch": 657.1194029850747, "grad_norm": 26.91779327392578, "learning_rate": 9.744227994227995e-06, "loss": 23.8745, "step": 27599 }, { "epoch": 657.1432835820896, "grad_norm": 27.42451286315918, "learning_rate": 9.743867243867245e-06, "loss": 23.6704, "step": 27600 }, { "epoch": 657.1671641791045, "grad_norm": 30.6593074798584, "learning_rate": 9.743506493506493e-06, "loss": 24.3467, "step": 27601 }, { "epoch": 657.1910447761194, "grad_norm": 25.143653869628906, "learning_rate": 9.743145743145745e-06, "loss": 23.2543, "step": 27602 }, { "epoch": 657.2149253731343, "grad_norm": 22.328475952148438, "learning_rate": 9.742784992784994e-06, "loss": 23.4104, "step": 27603 }, { "epoch": 657.2388059701492, "grad_norm": 28.864736557006836, "learning_rate": 9.742424242424244e-06, "loss": 24.02, "step": 27604 }, { "epoch": 657.2626865671642, "grad_norm": 26.79606819152832, "learning_rate": 9.742063492063492e-06, "loss": 23.6925, "step": 27605 }, { "epoch": 657.2865671641791, "grad_norm": 26.673738479614258, "learning_rate": 9.741702741702743e-06, "loss": 23.4603, "step": 27606 }, { "epoch": 657.310447761194, "grad_norm": 26.091949462890625, "learning_rate": 9.741341991341993e-06, "loss": 23.8267, "step": 27607 }, { "epoch": 657.334328358209, "grad_norm": 33.82353210449219, "learning_rate": 9.740981240981243e-06, "loss": 23.5868, "step": 27608 }, { "epoch": 657.3582089552239, "grad_norm": 27.908884048461914, "learning_rate": 9.740620490620491e-06, "loss": 23.8103, "step": 27609 }, { "epoch": 657.3820895522388, "grad_norm": 23.25847816467285, "learning_rate": 9.740259740259742e-06, "loss": 23.9041, "step": 27610 }, { "epoch": 657.4059701492537, "grad_norm": 30.498170852661133, "learning_rate": 9.73989898989899e-06, "loss": 24.2655, "step": 27611 }, { "epoch": 657.4298507462687, "grad_norm": 32.752559661865234, "learning_rate": 9.73953823953824e-06, "loss": 24.4027, "step": 27612 }, { "epoch": 657.4537313432836, "grad_norm": 25.3778018951416, "learning_rate": 9.73917748917749e-06, "loss": 24.1441, "step": 27613 }, { "epoch": 657.4776119402985, "grad_norm": 24.620458602905273, "learning_rate": 9.738816738816739e-06, "loss": 23.9755, "step": 27614 }, { "epoch": 657.5014925373134, "grad_norm": 23.1533145904541, "learning_rate": 9.73845598845599e-06, "loss": 23.8856, "step": 27615 }, { "epoch": 657.5253731343283, "grad_norm": 27.94002914428711, "learning_rate": 9.73809523809524e-06, "loss": 23.5475, "step": 27616 }, { "epoch": 657.5492537313432, "grad_norm": 22.8441219329834, "learning_rate": 9.737734487734488e-06, "loss": 24.4686, "step": 27617 }, { "epoch": 657.5731343283583, "grad_norm": 28.91463851928711, "learning_rate": 9.737373737373738e-06, "loss": 24.6944, "step": 27618 }, { "epoch": 657.5970149253732, "grad_norm": 26.081262588500977, "learning_rate": 9.737012987012988e-06, "loss": 23.447, "step": 27619 }, { "epoch": 657.6208955223881, "grad_norm": 26.564781188964844, "learning_rate": 9.736652236652237e-06, "loss": 23.4137, "step": 27620 }, { "epoch": 657.644776119403, "grad_norm": 28.901702880859375, "learning_rate": 9.736291486291487e-06, "loss": 23.8788, "step": 27621 }, { "epoch": 657.6686567164179, "grad_norm": 24.17472267150879, "learning_rate": 9.735930735930737e-06, "loss": 24.1281, "step": 27622 }, { "epoch": 657.6925373134328, "grad_norm": 28.15230369567871, "learning_rate": 9.735569985569986e-06, "loss": 23.9115, "step": 27623 }, { "epoch": 657.7164179104477, "grad_norm": 32.21467208862305, "learning_rate": 9.735209235209236e-06, "loss": 24.3642, "step": 27624 }, { "epoch": 657.7402985074627, "grad_norm": 22.184114456176758, "learning_rate": 9.734848484848486e-06, "loss": 24.0919, "step": 27625 }, { "epoch": 657.7641791044776, "grad_norm": 27.285932540893555, "learning_rate": 9.734487734487735e-06, "loss": 23.8193, "step": 27626 }, { "epoch": 657.7880597014926, "grad_norm": 36.43394088745117, "learning_rate": 9.734126984126985e-06, "loss": 24.2936, "step": 27627 }, { "epoch": 657.8119402985075, "grad_norm": 30.89509391784668, "learning_rate": 9.733766233766235e-06, "loss": 23.2739, "step": 27628 }, { "epoch": 657.8358208955224, "grad_norm": 21.475086212158203, "learning_rate": 9.733405483405485e-06, "loss": 23.7646, "step": 27629 }, { "epoch": 657.8597014925373, "grad_norm": 25.119407653808594, "learning_rate": 9.733044733044734e-06, "loss": 23.0659, "step": 27630 }, { "epoch": 657.8835820895522, "grad_norm": 27.781204223632812, "learning_rate": 9.732683982683984e-06, "loss": 25.0211, "step": 27631 }, { "epoch": 657.9074626865672, "grad_norm": 25.046979904174805, "learning_rate": 9.732323232323232e-06, "loss": 23.0657, "step": 27632 }, { "epoch": 657.9313432835821, "grad_norm": 26.25678253173828, "learning_rate": 9.731962481962483e-06, "loss": 24.4553, "step": 27633 }, { "epoch": 657.955223880597, "grad_norm": 26.091449737548828, "learning_rate": 9.731601731601731e-06, "loss": 24.8162, "step": 27634 }, { "epoch": 657.9791044776119, "grad_norm": 24.11795997619629, "learning_rate": 9.731240981240983e-06, "loss": 23.6276, "step": 27635 }, { "epoch": 658.0, "grad_norm": 25.06505012512207, "learning_rate": 9.730880230880231e-06, "loss": 21.1747, "step": 27636 }, { "epoch": 658.0238805970149, "grad_norm": 22.089433670043945, "learning_rate": 9.730519480519482e-06, "loss": 23.582, "step": 27637 }, { "epoch": 658.0477611940298, "grad_norm": 21.646230697631836, "learning_rate": 9.73015873015873e-06, "loss": 23.8205, "step": 27638 }, { "epoch": 658.0716417910447, "grad_norm": 27.159700393676758, "learning_rate": 9.72979797979798e-06, "loss": 23.5663, "step": 27639 }, { "epoch": 658.0955223880597, "grad_norm": 34.188270568847656, "learning_rate": 9.729437229437229e-06, "loss": 24.0156, "step": 27640 }, { "epoch": 658.1194029850747, "grad_norm": 23.1765193939209, "learning_rate": 9.72907647907648e-06, "loss": 23.6503, "step": 27641 }, { "epoch": 658.1432835820896, "grad_norm": 28.290205001831055, "learning_rate": 9.72871572871573e-06, "loss": 24.6033, "step": 27642 }, { "epoch": 658.1671641791045, "grad_norm": 35.68321990966797, "learning_rate": 9.72835497835498e-06, "loss": 24.342, "step": 27643 }, { "epoch": 658.1910447761194, "grad_norm": 25.041505813598633, "learning_rate": 9.727994227994228e-06, "loss": 23.4641, "step": 27644 }, { "epoch": 658.2149253731343, "grad_norm": 26.054576873779297, "learning_rate": 9.727633477633478e-06, "loss": 24.7085, "step": 27645 }, { "epoch": 658.2388059701492, "grad_norm": 26.248207092285156, "learning_rate": 9.727272727272728e-06, "loss": 23.9004, "step": 27646 }, { "epoch": 658.2626865671642, "grad_norm": 31.055133819580078, "learning_rate": 9.726911976911979e-06, "loss": 24.3754, "step": 27647 }, { "epoch": 658.2865671641791, "grad_norm": 26.05176544189453, "learning_rate": 9.726551226551227e-06, "loss": 23.9579, "step": 27648 }, { "epoch": 658.310447761194, "grad_norm": 25.17340660095215, "learning_rate": 9.726190476190477e-06, "loss": 23.0571, "step": 27649 }, { "epoch": 658.334328358209, "grad_norm": 23.336244583129883, "learning_rate": 9.725829725829726e-06, "loss": 23.7044, "step": 27650 }, { "epoch": 658.3582089552239, "grad_norm": NaN, "learning_rate": 9.725468975468976e-06, "loss": 24.106, "step": 27651 }, { "epoch": 658.3820895522388, "grad_norm": 24.26068878173828, "learning_rate": 9.725468975468976e-06, "loss": 22.655, "step": 27652 }, { "epoch": 658.4059701492537, "grad_norm": 28.09440803527832, "learning_rate": 9.725108225108226e-06, "loss": 24.0364, "step": 27653 }, { "epoch": 658.4298507462687, "grad_norm": 23.456350326538086, "learning_rate": 9.724747474747476e-06, "loss": 23.5669, "step": 27654 }, { "epoch": 658.4537313432836, "grad_norm": 28.413000106811523, "learning_rate": 9.724386724386725e-06, "loss": 24.7387, "step": 27655 }, { "epoch": 658.4776119402985, "grad_norm": 28.1519832611084, "learning_rate": 9.724025974025975e-06, "loss": 23.6883, "step": 27656 }, { "epoch": 658.5014925373134, "grad_norm": 22.70875358581543, "learning_rate": 9.723665223665224e-06, "loss": 24.3903, "step": 27657 }, { "epoch": 658.5253731343283, "grad_norm": 25.87919807434082, "learning_rate": 9.723304473304474e-06, "loss": 23.6528, "step": 27658 }, { "epoch": 658.5492537313432, "grad_norm": 30.575149536132812, "learning_rate": 9.722943722943724e-06, "loss": 24.6145, "step": 27659 }, { "epoch": 658.5731343283583, "grad_norm": 32.41998291015625, "learning_rate": 9.722582972582974e-06, "loss": 23.077, "step": 27660 }, { "epoch": 658.5970149253732, "grad_norm": 23.585355758666992, "learning_rate": 9.722222222222223e-06, "loss": 23.9071, "step": 27661 }, { "epoch": 658.6208955223881, "grad_norm": 30.574277877807617, "learning_rate": 9.721861471861473e-06, "loss": 23.7756, "step": 27662 }, { "epoch": 658.644776119403, "grad_norm": 34.663536071777344, "learning_rate": 9.721500721500721e-06, "loss": 23.5731, "step": 27663 }, { "epoch": 658.6686567164179, "grad_norm": 29.61380386352539, "learning_rate": 9.721139971139972e-06, "loss": 25.0746, "step": 27664 }, { "epoch": 658.6925373134328, "grad_norm": 27.207136154174805, "learning_rate": 9.720779220779222e-06, "loss": 24.3891, "step": 27665 }, { "epoch": 658.7164179104477, "grad_norm": 27.434850692749023, "learning_rate": 9.720418470418472e-06, "loss": 23.1646, "step": 27666 }, { "epoch": 658.7402985074627, "grad_norm": 29.90032958984375, "learning_rate": 9.72005772005772e-06, "loss": 23.5687, "step": 27667 }, { "epoch": 658.7641791044776, "grad_norm": 21.83568572998047, "learning_rate": 9.71969696969697e-06, "loss": 23.5172, "step": 27668 }, { "epoch": 658.7880597014926, "grad_norm": 25.399295806884766, "learning_rate": 9.719336219336221e-06, "loss": 22.5325, "step": 27669 }, { "epoch": 658.8119402985075, "grad_norm": 23.210098266601562, "learning_rate": 9.71897546897547e-06, "loss": 23.7722, "step": 27670 }, { "epoch": 658.8358208955224, "grad_norm": 28.036169052124023, "learning_rate": 9.71861471861472e-06, "loss": 23.8952, "step": 27671 }, { "epoch": 658.8597014925373, "grad_norm": 28.052936553955078, "learning_rate": 9.71825396825397e-06, "loss": 23.7184, "step": 27672 }, { "epoch": 658.8835820895522, "grad_norm": 29.171266555786133, "learning_rate": 9.717893217893218e-06, "loss": 23.7774, "step": 27673 }, { "epoch": 658.9074626865672, "grad_norm": 24.12400245666504, "learning_rate": 9.717532467532468e-06, "loss": 24.1113, "step": 27674 }, { "epoch": 658.9313432835821, "grad_norm": 25.063232421875, "learning_rate": 9.717171717171719e-06, "loss": 24.2737, "step": 27675 }, { "epoch": 658.955223880597, "grad_norm": 26.407297134399414, "learning_rate": 9.716810966810967e-06, "loss": 23.6644, "step": 27676 }, { "epoch": 658.9791044776119, "grad_norm": 21.13384437561035, "learning_rate": 9.716450216450217e-06, "loss": 23.682, "step": 27677 }, { "epoch": 659.0, "grad_norm": 26.38224220275879, "learning_rate": 9.716089466089466e-06, "loss": 20.628, "step": 27678 }, { "epoch": 659.0238805970149, "grad_norm": 25.282442092895508, "learning_rate": 9.715728715728716e-06, "loss": 23.6078, "step": 27679 }, { "epoch": 659.0477611940298, "grad_norm": 27.4145565032959, "learning_rate": 9.715367965367966e-06, "loss": 24.119, "step": 27680 }, { "epoch": 659.0716417910447, "grad_norm": 26.1214599609375, "learning_rate": 9.715007215007216e-06, "loss": 24.6062, "step": 27681 }, { "epoch": 659.0955223880597, "grad_norm": 27.120166778564453, "learning_rate": 9.714646464646465e-06, "loss": 23.8259, "step": 27682 }, { "epoch": 659.1194029850747, "grad_norm": 26.79106903076172, "learning_rate": 9.714285714285715e-06, "loss": 23.9605, "step": 27683 }, { "epoch": 659.1432835820896, "grad_norm": 24.979106903076172, "learning_rate": 9.713924963924964e-06, "loss": 23.9368, "step": 27684 }, { "epoch": 659.1671641791045, "grad_norm": 28.482118606567383, "learning_rate": 9.713564213564214e-06, "loss": 23.8113, "step": 27685 }, { "epoch": 659.1910447761194, "grad_norm": 25.42946434020996, "learning_rate": 9.713203463203464e-06, "loss": 22.7682, "step": 27686 }, { "epoch": 659.2149253731343, "grad_norm": 22.504793167114258, "learning_rate": 9.712842712842714e-06, "loss": 23.4998, "step": 27687 }, { "epoch": 659.2388059701492, "grad_norm": 26.60252571105957, "learning_rate": 9.712481962481963e-06, "loss": 23.1266, "step": 27688 }, { "epoch": 659.2626865671642, "grad_norm": 30.946517944335938, "learning_rate": 9.712121212121213e-06, "loss": 23.3451, "step": 27689 }, { "epoch": 659.2865671641791, "grad_norm": 24.299036026000977, "learning_rate": 9.711760461760461e-06, "loss": 23.4949, "step": 27690 }, { "epoch": 659.310447761194, "grad_norm": 32.065731048583984, "learning_rate": 9.711399711399713e-06, "loss": 23.7526, "step": 27691 }, { "epoch": 659.334328358209, "grad_norm": 36.73197555541992, "learning_rate": 9.711038961038962e-06, "loss": 23.0583, "step": 27692 }, { "epoch": 659.3582089552239, "grad_norm": 24.187349319458008, "learning_rate": 9.710678210678212e-06, "loss": 23.5705, "step": 27693 }, { "epoch": 659.3820895522388, "grad_norm": 28.755393981933594, "learning_rate": 9.71031746031746e-06, "loss": 25.5738, "step": 27694 }, { "epoch": 659.4059701492537, "grad_norm": 28.449708938598633, "learning_rate": 9.70995670995671e-06, "loss": 24.0473, "step": 27695 }, { "epoch": 659.4298507462687, "grad_norm": 24.00153160095215, "learning_rate": 9.70959595959596e-06, "loss": 22.9974, "step": 27696 }, { "epoch": 659.4537313432836, "grad_norm": 22.969654083251953, "learning_rate": 9.709235209235211e-06, "loss": 24.3664, "step": 27697 }, { "epoch": 659.4776119402985, "grad_norm": 22.070667266845703, "learning_rate": 9.70887445887446e-06, "loss": 23.2599, "step": 27698 }, { "epoch": 659.5014925373134, "grad_norm": NaN, "learning_rate": 9.70851370851371e-06, "loss": 24.0131, "step": 27699 }, { "epoch": 659.5253731343283, "grad_norm": 24.776872634887695, "learning_rate": 9.70851370851371e-06, "loss": 23.8652, "step": 27700 }, { "epoch": 659.5492537313432, "grad_norm": 25.071243286132812, "learning_rate": 9.708152958152958e-06, "loss": 24.0931, "step": 27701 }, { "epoch": 659.5731343283583, "grad_norm": 32.05207824707031, "learning_rate": 9.707792207792209e-06, "loss": 24.7417, "step": 27702 }, { "epoch": 659.5970149253732, "grad_norm": 27.146015167236328, "learning_rate": 9.707431457431459e-06, "loss": 24.8758, "step": 27703 }, { "epoch": 659.6208955223881, "grad_norm": 23.785661697387695, "learning_rate": 9.707070707070709e-06, "loss": 23.3186, "step": 27704 }, { "epoch": 659.644776119403, "grad_norm": 30.773475646972656, "learning_rate": 9.706709956709957e-06, "loss": 24.2602, "step": 27705 }, { "epoch": 659.6686567164179, "grad_norm": 26.643104553222656, "learning_rate": 9.706349206349208e-06, "loss": 22.5525, "step": 27706 }, { "epoch": 659.6925373134328, "grad_norm": 24.995256423950195, "learning_rate": 9.705988455988456e-06, "loss": 23.4826, "step": 27707 }, { "epoch": 659.7164179104477, "grad_norm": 22.4803524017334, "learning_rate": 9.705627705627706e-06, "loss": 24.1827, "step": 27708 }, { "epoch": 659.7402985074627, "grad_norm": 23.07716941833496, "learning_rate": 9.705266955266957e-06, "loss": 23.6241, "step": 27709 }, { "epoch": 659.7641791044776, "grad_norm": 23.736827850341797, "learning_rate": 9.704906204906207e-06, "loss": 24.0936, "step": 27710 }, { "epoch": 659.7880597014926, "grad_norm": 32.252540588378906, "learning_rate": 9.704545454545455e-06, "loss": 23.4912, "step": 27711 }, { "epoch": 659.8119402985075, "grad_norm": 33.07818603515625, "learning_rate": 9.704184704184705e-06, "loss": 24.7745, "step": 27712 }, { "epoch": 659.8358208955224, "grad_norm": 21.026386260986328, "learning_rate": 9.703823953823954e-06, "loss": 23.2175, "step": 27713 }, { "epoch": 659.8597014925373, "grad_norm": 30.461545944213867, "learning_rate": 9.703463203463204e-06, "loss": 24.9265, "step": 27714 }, { "epoch": 659.8835820895522, "grad_norm": 30.730905532836914, "learning_rate": 9.703102453102454e-06, "loss": 23.7761, "step": 27715 }, { "epoch": 659.9074626865672, "grad_norm": 22.39615249633789, "learning_rate": 9.702741702741703e-06, "loss": 22.9459, "step": 27716 }, { "epoch": 659.9313432835821, "grad_norm": 24.178117752075195, "learning_rate": 9.702380952380953e-06, "loss": 23.8389, "step": 27717 }, { "epoch": 659.955223880597, "grad_norm": 28.098398208618164, "learning_rate": 9.702020202020203e-06, "loss": 23.8301, "step": 27718 }, { "epoch": 659.9791044776119, "grad_norm": 24.60340118408203, "learning_rate": 9.701659451659452e-06, "loss": 24.1226, "step": 27719 }, { "epoch": 660.0, "grad_norm": 22.02492332458496, "learning_rate": 9.701298701298702e-06, "loss": 20.6041, "step": 27720 }, { "epoch": 660.0, "step": 27720, "total_flos": 1.3626485305458755e+18, "train_loss": 0.7312985455318963, "train_runtime": 25638.5553, "train_samples_per_second": 137.774, "train_steps_per_second": 1.081 }, { "epoch": 660.0238805970149, "grad_norm": 31.043121337890625, "learning_rate": 1e-05, "loss": 23.5532, "step": 27721 }, { "epoch": 660.0477611940298, "grad_norm": Infinity, "learning_rate": 9.999649859943978e-06, "loss": 32.2539, "step": 27722 }, { "epoch": 660.0716417910447, "grad_norm": Infinity, "learning_rate": 9.999649859943978e-06, "loss": 32.0305, "step": 27723 }, { "epoch": 660.0955223880597, "grad_norm": 477.0511779785156, "learning_rate": 9.999649859943978e-06, "loss": 31.5339, "step": 27724 }, { "epoch": 660.1194029850747, "grad_norm": 236.59033203125, "learning_rate": 9.999299719887955e-06, "loss": 30.2326, "step": 27725 }, { "epoch": 660.1432835820896, "grad_norm": 142.07669067382812, "learning_rate": 9.998949579831934e-06, "loss": 27.9829, "step": 27726 }, { "epoch": 660.1671641791045, "grad_norm": 92.471435546875, "learning_rate": 9.998599439775911e-06, "loss": 25.5594, "step": 27727 }, { "epoch": 660.1910447761194, "grad_norm": 86.55365753173828, "learning_rate": 9.998249299719889e-06, "loss": 24.9088, "step": 27728 }, { "epoch": 660.2149253731343, "grad_norm": 69.09452056884766, "learning_rate": 9.997899159663866e-06, "loss": 25.4176, "step": 27729 }, { "epoch": 660.2388059701492, "grad_norm": 73.3984146118164, "learning_rate": 9.997549019607843e-06, "loss": 23.7294, "step": 27730 }, { "epoch": 660.2626865671642, "grad_norm": 56.46774673461914, "learning_rate": 9.997198879551822e-06, "loss": 24.597, "step": 27731 }, { "epoch": 660.2865671641791, "grad_norm": 49.679874420166016, "learning_rate": 9.9968487394958e-06, "loss": 24.0804, "step": 27732 }, { "epoch": 660.310447761194, "grad_norm": 40.21451187133789, "learning_rate": 9.996498599439777e-06, "loss": 24.3012, "step": 27733 }, { "epoch": 660.334328358209, "grad_norm": 44.74810028076172, "learning_rate": 9.996148459383754e-06, "loss": 23.817, "step": 27734 }, { "epoch": 660.3582089552239, "grad_norm": 42.58028793334961, "learning_rate": 9.995798319327733e-06, "loss": 24.066, "step": 27735 }, { "epoch": 660.3820895522388, "grad_norm": 36.37326431274414, "learning_rate": 9.99544817927171e-06, "loss": 23.673, "step": 27736 }, { "epoch": 660.4059701492537, "grad_norm": 34.82540512084961, "learning_rate": 9.995098039215687e-06, "loss": 23.2029, "step": 27737 }, { "epoch": 660.4298507462687, "grad_norm": 34.54738235473633, "learning_rate": 9.994747899159664e-06, "loss": 24.5047, "step": 27738 }, { "epoch": 660.4537313432836, "grad_norm": 36.539424896240234, "learning_rate": 9.994397759103642e-06, "loss": 24.6589, "step": 27739 }, { "epoch": 660.4776119402985, "grad_norm": 28.867277145385742, "learning_rate": 9.99404761904762e-06, "loss": 24.5449, "step": 27740 }, { "epoch": 660.5014925373134, "grad_norm": 25.364084243774414, "learning_rate": 9.993697478991598e-06, "loss": 23.6606, "step": 27741 }, { "epoch": 660.5253731343283, "grad_norm": 30.428136825561523, "learning_rate": 9.993347338935575e-06, "loss": 24.3966, "step": 27742 }, { "epoch": 660.5492537313432, "grad_norm": 27.482521057128906, "learning_rate": 9.992997198879552e-06, "loss": 24.0197, "step": 27743 }, { "epoch": 660.5731343283583, "grad_norm": 26.52524757385254, "learning_rate": 9.99264705882353e-06, "loss": 23.9563, "step": 27744 }, { "epoch": 660.5970149253732, "grad_norm": 29.601648330688477, "learning_rate": 9.992296918767508e-06, "loss": 24.0547, "step": 27745 }, { "epoch": 660.6208955223881, "grad_norm": 27.55223274230957, "learning_rate": 9.991946778711486e-06, "loss": 23.699, "step": 27746 }, { "epoch": 660.644776119403, "grad_norm": 25.585954666137695, "learning_rate": 9.991596638655463e-06, "loss": 24.0155, "step": 27747 }, { "epoch": 660.6686567164179, "grad_norm": 30.396692276000977, "learning_rate": 9.99124649859944e-06, "loss": 24.2243, "step": 27748 }, { "epoch": 660.6925373134328, "grad_norm": 23.69998550415039, "learning_rate": 9.990896358543417e-06, "loss": 24.4499, "step": 27749 }, { "epoch": 660.7164179104477, "grad_norm": 28.63499641418457, "learning_rate": 9.990546218487396e-06, "loss": 23.3047, "step": 27750 }, { "epoch": 660.7402985074627, "grad_norm": 30.027944564819336, "learning_rate": 9.990196078431374e-06, "loss": 23.7199, "step": 27751 }, { "epoch": 660.7641791044776, "grad_norm": 29.068984985351562, "learning_rate": 9.98984593837535e-06, "loss": 24.3442, "step": 27752 }, { "epoch": 660.7880597014926, "grad_norm": 26.217924118041992, "learning_rate": 9.989495798319328e-06, "loss": 24.5858, "step": 27753 }, { "epoch": 660.8119402985075, "grad_norm": 24.704265594482422, "learning_rate": 9.989145658263307e-06, "loss": 24.2896, "step": 27754 }, { "epoch": 660.8358208955224, "grad_norm": 25.60186004638672, "learning_rate": 9.988795518207284e-06, "loss": 23.2738, "step": 27755 }, { "epoch": 660.8597014925373, "grad_norm": 30.563819885253906, "learning_rate": 9.988445378151261e-06, "loss": 24.4849, "step": 27756 }, { "epoch": 660.8835820895522, "grad_norm": 27.925561904907227, "learning_rate": 9.988095238095239e-06, "loss": 24.2396, "step": 27757 }, { "epoch": 660.9074626865672, "grad_norm": 28.66200828552246, "learning_rate": 9.987745098039216e-06, "loss": 23.6696, "step": 27758 }, { "epoch": 660.9313432835821, "grad_norm": 26.383285522460938, "learning_rate": 9.987394957983195e-06, "loss": 24.3024, "step": 27759 }, { "epoch": 660.955223880597, "grad_norm": 27.90447998046875, "learning_rate": 9.987044817927172e-06, "loss": 23.9018, "step": 27760 }, { "epoch": 660.9791044776119, "grad_norm": 26.694393157958984, "learning_rate": 9.98669467787115e-06, "loss": 23.7268, "step": 27761 }, { "epoch": 661.0, "grad_norm": 25.111169815063477, "learning_rate": 9.986344537815127e-06, "loss": 20.3889, "step": 27762 }, { "epoch": 661.0238805970149, "grad_norm": 24.971755981445312, "learning_rate": 9.985994397759104e-06, "loss": 23.223, "step": 27763 }, { "epoch": 661.0477611940298, "grad_norm": 25.492292404174805, "learning_rate": 9.985644257703083e-06, "loss": 22.7773, "step": 27764 }, { "epoch": 661.0716417910447, "grad_norm": 25.47602081298828, "learning_rate": 9.98529411764706e-06, "loss": 23.395, "step": 27765 }, { "epoch": 661.0955223880597, "grad_norm": 32.2619743347168, "learning_rate": 9.984943977591037e-06, "loss": 23.0216, "step": 27766 }, { "epoch": 661.1194029850747, "grad_norm": 27.262752532958984, "learning_rate": 9.984593837535014e-06, "loss": 24.6718, "step": 27767 }, { "epoch": 661.1432835820896, "grad_norm": 22.871915817260742, "learning_rate": 9.984243697478992e-06, "loss": 23.5618, "step": 27768 }, { "epoch": 661.1671641791045, "grad_norm": 40.52000427246094, "learning_rate": 9.98389355742297e-06, "loss": 24.0631, "step": 27769 }, { "epoch": 661.1910447761194, "grad_norm": 33.103172302246094, "learning_rate": 9.983543417366948e-06, "loss": 23.7302, "step": 27770 }, { "epoch": 661.2149253731343, "grad_norm": 29.344127655029297, "learning_rate": 9.983193277310925e-06, "loss": 24.2917, "step": 27771 }, { "epoch": 661.2388059701492, "grad_norm": 24.431243896484375, "learning_rate": 9.982843137254902e-06, "loss": 23.7076, "step": 27772 }, { "epoch": 661.2626865671642, "grad_norm": 30.569934844970703, "learning_rate": 9.982492997198881e-06, "loss": 23.6234, "step": 27773 }, { "epoch": 661.2865671641791, "grad_norm": NaN, "learning_rate": 9.982142857142858e-06, "loss": 29.5269, "step": 27774 }, { "epoch": 661.310447761194, "grad_norm": 25.980012893676758, "learning_rate": 9.982142857142858e-06, "loss": 23.6819, "step": 27775 }, { "epoch": 661.334328358209, "grad_norm": 28.595386505126953, "learning_rate": 9.981792717086836e-06, "loss": 23.5962, "step": 27776 }, { "epoch": 661.3582089552239, "grad_norm": 26.90094757080078, "learning_rate": 9.981442577030813e-06, "loss": 24.1049, "step": 27777 }, { "epoch": 661.3820895522388, "grad_norm": 26.29422950744629, "learning_rate": 9.98109243697479e-06, "loss": 23.6279, "step": 27778 }, { "epoch": 661.4059701492537, "grad_norm": 26.020126342773438, "learning_rate": 9.980742296918769e-06, "loss": 23.1593, "step": 27779 }, { "epoch": 661.4298507462687, "grad_norm": 24.586742401123047, "learning_rate": 9.980392156862746e-06, "loss": 23.1089, "step": 27780 }, { "epoch": 661.4537313432836, "grad_norm": 28.277482986450195, "learning_rate": 9.980042016806724e-06, "loss": 24.6503, "step": 27781 }, { "epoch": 661.4776119402985, "grad_norm": 28.50227165222168, "learning_rate": 9.9796918767507e-06, "loss": 23.6698, "step": 27782 }, { "epoch": 661.5014925373134, "grad_norm": 29.14333724975586, "learning_rate": 9.979341736694678e-06, "loss": 23.7779, "step": 27783 }, { "epoch": 661.5253731343283, "grad_norm": 26.661670684814453, "learning_rate": 9.978991596638657e-06, "loss": 23.8505, "step": 27784 }, { "epoch": 661.5492537313432, "grad_norm": 22.219894409179688, "learning_rate": 9.978641456582634e-06, "loss": 23.4566, "step": 27785 }, { "epoch": 661.5731343283583, "grad_norm": 26.88197898864746, "learning_rate": 9.978291316526611e-06, "loss": 23.8678, "step": 27786 }, { "epoch": 661.5970149253732, "grad_norm": 25.149669647216797, "learning_rate": 9.977941176470589e-06, "loss": 24.1438, "step": 27787 }, { "epoch": 661.6208955223881, "grad_norm": 26.3154239654541, "learning_rate": 9.977591036414566e-06, "loss": 23.9046, "step": 27788 }, { "epoch": 661.644776119403, "grad_norm": 29.16592788696289, "learning_rate": 9.977240896358545e-06, "loss": 23.7997, "step": 27789 }, { "epoch": 661.6686567164179, "grad_norm": 33.2398681640625, "learning_rate": 9.976890756302522e-06, "loss": 22.5122, "step": 27790 }, { "epoch": 661.6925373134328, "grad_norm": 29.379756927490234, "learning_rate": 9.9765406162465e-06, "loss": 23.5553, "step": 27791 }, { "epoch": 661.7164179104477, "grad_norm": 27.6501522064209, "learning_rate": 9.976190476190477e-06, "loss": 23.8541, "step": 27792 }, { "epoch": 661.7402985074627, "grad_norm": 25.622169494628906, "learning_rate": 9.975840336134456e-06, "loss": 24.2605, "step": 27793 }, { "epoch": 661.7641791044776, "grad_norm": 30.703405380249023, "learning_rate": 9.975490196078433e-06, "loss": 23.5385, "step": 27794 }, { "epoch": 661.7880597014926, "grad_norm": 34.07145309448242, "learning_rate": 9.97514005602241e-06, "loss": 23.4004, "step": 27795 }, { "epoch": 661.8119402985075, "grad_norm": 24.851993560791016, "learning_rate": 9.974789915966387e-06, "loss": 23.5931, "step": 27796 }, { "epoch": 661.8358208955224, "grad_norm": 25.471948623657227, "learning_rate": 9.974439775910364e-06, "loss": 24.4141, "step": 27797 }, { "epoch": 661.8597014925373, "grad_norm": 33.07841491699219, "learning_rate": 9.974089635854343e-06, "loss": 23.6542, "step": 27798 }, { "epoch": 661.8835820895522, "grad_norm": 28.14122772216797, "learning_rate": 9.97373949579832e-06, "loss": 24.4802, "step": 27799 }, { "epoch": 661.9074626865672, "grad_norm": 21.794437408447266, "learning_rate": 9.973389355742298e-06, "loss": 24.1156, "step": 27800 }, { "epoch": 661.9313432835821, "grad_norm": 26.0432186126709, "learning_rate": 9.973039215686275e-06, "loss": 24.8789, "step": 27801 }, { "epoch": 661.955223880597, "grad_norm": NaN, "learning_rate": 9.972689075630252e-06, "loss": 25.6686, "step": 27802 }, { "epoch": 661.9791044776119, "grad_norm": NaN, "learning_rate": 9.972689075630252e-06, "loss": 21.016, "step": 27803 }, { "epoch": 662.0, "grad_norm": 21.405139923095703, "learning_rate": 9.972689075630252e-06, "loss": 20.3975, "step": 27804 }, { "epoch": 662.0238805970149, "grad_norm": 29.019128799438477, "learning_rate": 9.972338935574231e-06, "loss": 23.5915, "step": 27805 }, { "epoch": 662.0477611940298, "grad_norm": 24.119226455688477, "learning_rate": 9.971988795518209e-06, "loss": 24.8324, "step": 27806 }, { "epoch": 662.0716417910447, "grad_norm": 30.2606143951416, "learning_rate": 9.971638655462186e-06, "loss": 24.987, "step": 27807 }, { "epoch": 662.0955223880597, "grad_norm": 29.214004516601562, "learning_rate": 9.971288515406163e-06, "loss": 23.7869, "step": 27808 }, { "epoch": 662.1194029850747, "grad_norm": 23.756948471069336, "learning_rate": 9.97093837535014e-06, "loss": 22.6286, "step": 27809 }, { "epoch": 662.1432835820896, "grad_norm": 26.405725479125977, "learning_rate": 9.970588235294119e-06, "loss": 23.6894, "step": 27810 }, { "epoch": 662.1671641791045, "grad_norm": 27.360219955444336, "learning_rate": 9.970238095238096e-06, "loss": 23.766, "step": 27811 }, { "epoch": 662.1910447761194, "grad_norm": 35.345069885253906, "learning_rate": 9.969887955182074e-06, "loss": 23.7916, "step": 27812 }, { "epoch": 662.2149253731343, "grad_norm": 26.573898315429688, "learning_rate": 9.969537815126051e-06, "loss": 23.4086, "step": 27813 }, { "epoch": 662.2388059701492, "grad_norm": 21.715124130249023, "learning_rate": 9.969187675070028e-06, "loss": 24.3089, "step": 27814 }, { "epoch": 662.2626865671642, "grad_norm": 42.01942825317383, "learning_rate": 9.968837535014007e-06, "loss": 23.667, "step": 27815 }, { "epoch": 662.2865671641791, "grad_norm": 30.023157119750977, "learning_rate": 9.968487394957984e-06, "loss": 24.1217, "step": 27816 }, { "epoch": 662.310447761194, "grad_norm": 34.099342346191406, "learning_rate": 9.968137254901961e-06, "loss": 24.4005, "step": 27817 }, { "epoch": 662.334328358209, "grad_norm": 32.29900360107422, "learning_rate": 9.967787114845939e-06, "loss": 23.3879, "step": 27818 }, { "epoch": 662.3582089552239, "grad_norm": 30.734378814697266, "learning_rate": 9.967436974789918e-06, "loss": 23.8854, "step": 27819 }, { "epoch": 662.3820895522388, "grad_norm": 24.96274757385254, "learning_rate": 9.967086834733895e-06, "loss": 24.0067, "step": 27820 }, { "epoch": 662.4059701492537, "grad_norm": 28.50667381286621, "learning_rate": 9.966736694677872e-06, "loss": 23.5866, "step": 27821 }, { "epoch": 662.4298507462687, "grad_norm": 24.112167358398438, "learning_rate": 9.96638655462185e-06, "loss": 23.9909, "step": 27822 }, { "epoch": 662.4537313432836, "grad_norm": 33.15054702758789, "learning_rate": 9.966036414565827e-06, "loss": 25.1405, "step": 27823 }, { "epoch": 662.4776119402985, "grad_norm": 25.243532180786133, "learning_rate": 9.965686274509806e-06, "loss": 22.9736, "step": 27824 }, { "epoch": 662.5014925373134, "grad_norm": 30.245147705078125, "learning_rate": 9.965336134453783e-06, "loss": 23.973, "step": 27825 }, { "epoch": 662.5253731343283, "grad_norm": 25.174617767333984, "learning_rate": 9.96498599439776e-06, "loss": 22.9115, "step": 27826 }, { "epoch": 662.5492537313432, "grad_norm": 29.395139694213867, "learning_rate": 9.964635854341737e-06, "loss": 23.592, "step": 27827 }, { "epoch": 662.5731343283583, "grad_norm": 32.494571685791016, "learning_rate": 9.964285714285714e-06, "loss": 23.8368, "step": 27828 }, { "epoch": 662.5970149253732, "grad_norm": 28.71198081970215, "learning_rate": 9.963935574229693e-06, "loss": 23.4779, "step": 27829 }, { "epoch": 662.6208955223881, "grad_norm": 30.151912689208984, "learning_rate": 9.96358543417367e-06, "loss": 24.2947, "step": 27830 }, { "epoch": 662.644776119403, "grad_norm": 22.531126022338867, "learning_rate": 9.963235294117648e-06, "loss": 22.93, "step": 27831 }, { "epoch": 662.6686567164179, "grad_norm": 32.26754379272461, "learning_rate": 9.962885154061625e-06, "loss": 23.8611, "step": 27832 }, { "epoch": 662.6925373134328, "grad_norm": 29.994827270507812, "learning_rate": 9.962535014005602e-06, "loss": 23.1668, "step": 27833 }, { "epoch": 662.7164179104477, "grad_norm": 21.665407180786133, "learning_rate": 9.962184873949581e-06, "loss": 23.2178, "step": 27834 }, { "epoch": 662.7402985074627, "grad_norm": 27.696849822998047, "learning_rate": 9.961834733893559e-06, "loss": 23.4998, "step": 27835 }, { "epoch": 662.7641791044776, "grad_norm": 30.576454162597656, "learning_rate": 9.961484593837536e-06, "loss": 23.968, "step": 27836 }, { "epoch": 662.7880597014926, "grad_norm": 25.522886276245117, "learning_rate": 9.961134453781513e-06, "loss": 23.7748, "step": 27837 }, { "epoch": 662.8119402985075, "grad_norm": 31.32742691040039, "learning_rate": 9.960784313725492e-06, "loss": 24.0649, "step": 27838 }, { "epoch": 662.8358208955224, "grad_norm": 24.346332550048828, "learning_rate": 9.96043417366947e-06, "loss": 23.697, "step": 27839 }, { "epoch": 662.8597014925373, "grad_norm": 26.38673210144043, "learning_rate": 9.960084033613446e-06, "loss": 23.7818, "step": 27840 }, { "epoch": 662.8835820895522, "grad_norm": NaN, "learning_rate": 9.959733893557424e-06, "loss": 22.5067, "step": 27841 }, { "epoch": 662.9074626865672, "grad_norm": 28.948406219482422, "learning_rate": 9.959733893557424e-06, "loss": 22.8503, "step": 27842 }, { "epoch": 662.9313432835821, "grad_norm": 31.875516891479492, "learning_rate": 9.959383753501401e-06, "loss": 24.1398, "step": 27843 }, { "epoch": 662.955223880597, "grad_norm": 28.437366485595703, "learning_rate": 9.95903361344538e-06, "loss": 24.5373, "step": 27844 }, { "epoch": 662.9791044776119, "grad_norm": 24.29119300842285, "learning_rate": 9.958683473389357e-06, "loss": 23.9354, "step": 27845 }, { "epoch": 663.0, "grad_norm": 22.224313735961914, "learning_rate": 9.958333333333334e-06, "loss": 20.8756, "step": 27846 }, { "epoch": 663.0238805970149, "grad_norm": 25.46147346496582, "learning_rate": 9.957983193277312e-06, "loss": 23.8485, "step": 27847 }, { "epoch": 663.0477611940298, "grad_norm": 30.015182495117188, "learning_rate": 9.957633053221289e-06, "loss": 23.6518, "step": 27848 }, { "epoch": 663.0716417910447, "grad_norm": 31.626977920532227, "learning_rate": 9.957282913165268e-06, "loss": 23.2009, "step": 27849 }, { "epoch": 663.0955223880597, "grad_norm": 22.64250373840332, "learning_rate": 9.956932773109245e-06, "loss": 23.6227, "step": 27850 }, { "epoch": 663.1194029850747, "grad_norm": 26.47330665588379, "learning_rate": 9.956582633053222e-06, "loss": 23.2134, "step": 27851 }, { "epoch": 663.1432835820896, "grad_norm": 23.409517288208008, "learning_rate": 9.9562324929972e-06, "loss": 23.7767, "step": 27852 }, { "epoch": 663.1671641791045, "grad_norm": 25.988758087158203, "learning_rate": 9.955882352941177e-06, "loss": 23.0483, "step": 27853 }, { "epoch": 663.1910447761194, "grad_norm": 24.798179626464844, "learning_rate": 9.955532212885156e-06, "loss": 23.9384, "step": 27854 }, { "epoch": 663.2149253731343, "grad_norm": 31.156259536743164, "learning_rate": 9.955182072829133e-06, "loss": 23.7712, "step": 27855 }, { "epoch": 663.2388059701492, "grad_norm": 29.477062225341797, "learning_rate": 9.95483193277311e-06, "loss": 23.606, "step": 27856 }, { "epoch": 663.2626865671642, "grad_norm": 30.879825592041016, "learning_rate": 9.954481792717087e-06, "loss": 23.997, "step": 27857 }, { "epoch": 663.2865671641791, "grad_norm": 24.305809020996094, "learning_rate": 9.954131652661066e-06, "loss": 23.874, "step": 27858 }, { "epoch": 663.310447761194, "grad_norm": 29.31652069091797, "learning_rate": 9.953781512605043e-06, "loss": 23.3938, "step": 27859 }, { "epoch": 663.334328358209, "grad_norm": 30.7676944732666, "learning_rate": 9.95343137254902e-06, "loss": 24.7118, "step": 27860 }, { "epoch": 663.3582089552239, "grad_norm": 27.460371017456055, "learning_rate": 9.953081232492998e-06, "loss": 24.7194, "step": 27861 }, { "epoch": 663.3820895522388, "grad_norm": 27.6529541015625, "learning_rate": 9.952731092436975e-06, "loss": 23.9811, "step": 27862 }, { "epoch": 663.4059701492537, "grad_norm": 23.993913650512695, "learning_rate": 9.952380952380954e-06, "loss": 22.788, "step": 27863 }, { "epoch": 663.4298507462687, "grad_norm": 27.692190170288086, "learning_rate": 9.952030812324931e-06, "loss": 23.0796, "step": 27864 }, { "epoch": 663.4537313432836, "grad_norm": 27.72395133972168, "learning_rate": 9.951680672268909e-06, "loss": 23.2861, "step": 27865 }, { "epoch": 663.4776119402985, "grad_norm": 32.44499588012695, "learning_rate": 9.951330532212886e-06, "loss": 24.2573, "step": 27866 }, { "epoch": 663.5014925373134, "grad_norm": 27.62234115600586, "learning_rate": 9.950980392156863e-06, "loss": 24.2703, "step": 27867 }, { "epoch": 663.5253731343283, "grad_norm": 24.544164657592773, "learning_rate": 9.950630252100842e-06, "loss": 24.1872, "step": 27868 }, { "epoch": 663.5492537313432, "grad_norm": 23.36867332458496, "learning_rate": 9.95028011204482e-06, "loss": 23.719, "step": 27869 }, { "epoch": 663.5731343283583, "grad_norm": 26.852781295776367, "learning_rate": 9.949929971988796e-06, "loss": 23.9474, "step": 27870 }, { "epoch": 663.5970149253732, "grad_norm": 29.524154663085938, "learning_rate": 9.949579831932774e-06, "loss": 24.2673, "step": 27871 }, { "epoch": 663.6208955223881, "grad_norm": 27.42780876159668, "learning_rate": 9.949229691876751e-06, "loss": 23.8874, "step": 27872 }, { "epoch": 663.644776119403, "grad_norm": 26.328275680541992, "learning_rate": 9.94887955182073e-06, "loss": 24.0043, "step": 27873 }, { "epoch": 663.6686567164179, "grad_norm": 24.924297332763672, "learning_rate": 9.948529411764707e-06, "loss": 23.7572, "step": 27874 }, { "epoch": 663.6925373134328, "grad_norm": 24.594135284423828, "learning_rate": 9.948179271708684e-06, "loss": 23.5814, "step": 27875 }, { "epoch": 663.7164179104477, "grad_norm": 24.61986541748047, "learning_rate": 9.947829131652662e-06, "loss": 22.7382, "step": 27876 }, { "epoch": 663.7402985074627, "grad_norm": 23.868558883666992, "learning_rate": 9.94747899159664e-06, "loss": 22.7798, "step": 27877 }, { "epoch": 663.7641791044776, "grad_norm": 26.15067481994629, "learning_rate": 9.947128851540618e-06, "loss": 23.8903, "step": 27878 }, { "epoch": 663.7880597014926, "grad_norm": 27.647140502929688, "learning_rate": 9.946778711484595e-06, "loss": 22.9178, "step": 27879 }, { "epoch": 663.8119402985075, "grad_norm": 32.45333480834961, "learning_rate": 9.946428571428572e-06, "loss": 23.948, "step": 27880 }, { "epoch": 663.8358208955224, "grad_norm": 25.64783477783203, "learning_rate": 9.94607843137255e-06, "loss": 23.5729, "step": 27881 }, { "epoch": 663.8597014925373, "grad_norm": 24.679834365844727, "learning_rate": 9.945728291316528e-06, "loss": 23.1554, "step": 27882 }, { "epoch": 663.8835820895522, "grad_norm": 26.207914352416992, "learning_rate": 9.945378151260506e-06, "loss": 24.0881, "step": 27883 }, { "epoch": 663.9074626865672, "grad_norm": 26.96107292175293, "learning_rate": 9.945028011204483e-06, "loss": 23.5054, "step": 27884 }, { "epoch": 663.9313432835821, "grad_norm": 30.922147750854492, "learning_rate": 9.94467787114846e-06, "loss": 23.5104, "step": 27885 }, { "epoch": 663.955223880597, "grad_norm": 26.356897354125977, "learning_rate": 9.944327731092437e-06, "loss": 23.7775, "step": 27886 }, { "epoch": 663.9791044776119, "grad_norm": 28.768064498901367, "learning_rate": 9.943977591036416e-06, "loss": 24.4624, "step": 27887 }, { "epoch": 664.0, "grad_norm": 26.142288208007812, "learning_rate": 9.943627450980393e-06, "loss": 21.3609, "step": 27888 }, { "epoch": 664.0238805970149, "grad_norm": 36.84703063964844, "learning_rate": 9.94327731092437e-06, "loss": 23.5891, "step": 27889 }, { "epoch": 664.0477611940298, "grad_norm": 25.004491806030273, "learning_rate": 9.942927170868348e-06, "loss": 23.2716, "step": 27890 }, { "epoch": 664.0716417910447, "grad_norm": 25.40360450744629, "learning_rate": 9.942577030812325e-06, "loss": 23.7848, "step": 27891 }, { "epoch": 664.0955223880597, "grad_norm": 29.375789642333984, "learning_rate": 9.942226890756304e-06, "loss": 22.9277, "step": 27892 }, { "epoch": 664.1194029850747, "grad_norm": 34.5162239074707, "learning_rate": 9.941876750700281e-06, "loss": 23.8421, "step": 27893 }, { "epoch": 664.1432835820896, "grad_norm": 25.08316421508789, "learning_rate": 9.941526610644259e-06, "loss": 24.0127, "step": 27894 }, { "epoch": 664.1671641791045, "grad_norm": 28.712108612060547, "learning_rate": 9.941176470588236e-06, "loss": 23.5329, "step": 27895 }, { "epoch": 664.1910447761194, "grad_norm": 38.28528594970703, "learning_rate": 9.940826330532215e-06, "loss": 23.9455, "step": 27896 }, { "epoch": 664.2149253731343, "grad_norm": 24.946470260620117, "learning_rate": 9.940476190476192e-06, "loss": 23.7965, "step": 27897 }, { "epoch": 664.2388059701492, "grad_norm": 30.632293701171875, "learning_rate": 9.94012605042017e-06, "loss": 22.5503, "step": 27898 }, { "epoch": 664.2626865671642, "grad_norm": 36.43370819091797, "learning_rate": 9.939775910364146e-06, "loss": 22.994, "step": 27899 }, { "epoch": 664.2865671641791, "grad_norm": 24.365968704223633, "learning_rate": 9.939425770308124e-06, "loss": 24.274, "step": 27900 }, { "epoch": 664.310447761194, "grad_norm": 32.12224578857422, "learning_rate": 9.939075630252103e-06, "loss": 23.7088, "step": 27901 }, { "epoch": 664.334328358209, "grad_norm": 35.5211296081543, "learning_rate": 9.93872549019608e-06, "loss": 24.0825, "step": 27902 }, { "epoch": 664.3582089552239, "grad_norm": 24.052671432495117, "learning_rate": 9.938375350140057e-06, "loss": 23.8308, "step": 27903 }, { "epoch": 664.3820895522388, "grad_norm": 31.417722702026367, "learning_rate": 9.938025210084034e-06, "loss": 24.1691, "step": 27904 }, { "epoch": 664.4059701492537, "grad_norm": 33.81781005859375, "learning_rate": 9.937675070028012e-06, "loss": 23.5591, "step": 27905 }, { "epoch": 664.4298507462687, "grad_norm": 28.032344818115234, "learning_rate": 9.93732492997199e-06, "loss": 24.11, "step": 27906 }, { "epoch": 664.4537313432836, "grad_norm": 22.883941650390625, "learning_rate": 9.936974789915968e-06, "loss": 23.535, "step": 27907 }, { "epoch": 664.4776119402985, "grad_norm": 29.02946662902832, "learning_rate": 9.936624649859945e-06, "loss": 23.3264, "step": 27908 }, { "epoch": 664.5014925373134, "grad_norm": 28.946544647216797, "learning_rate": 9.936274509803922e-06, "loss": 24.3286, "step": 27909 }, { "epoch": 664.5253731343283, "grad_norm": 30.354461669921875, "learning_rate": 9.9359243697479e-06, "loss": 24.1835, "step": 27910 }, { "epoch": 664.5492537313432, "grad_norm": 23.58377456665039, "learning_rate": 9.935574229691878e-06, "loss": 23.7591, "step": 27911 }, { "epoch": 664.5731343283583, "grad_norm": 27.44983673095703, "learning_rate": 9.935224089635856e-06, "loss": 23.5251, "step": 27912 }, { "epoch": 664.5970149253732, "grad_norm": 34.30299377441406, "learning_rate": 9.934873949579833e-06, "loss": 23.4955, "step": 27913 }, { "epoch": 664.6208955223881, "grad_norm": 26.145023345947266, "learning_rate": 9.93452380952381e-06, "loss": 24.4323, "step": 27914 }, { "epoch": 664.644776119403, "grad_norm": 26.24932289123535, "learning_rate": 9.934173669467789e-06, "loss": 23.5371, "step": 27915 }, { "epoch": 664.6686567164179, "grad_norm": 33.9185791015625, "learning_rate": 9.933823529411766e-06, "loss": 23.1952, "step": 27916 }, { "epoch": 664.6925373134328, "grad_norm": 29.557518005371094, "learning_rate": 9.933473389355743e-06, "loss": 23.7027, "step": 27917 }, { "epoch": 664.7164179104477, "grad_norm": 24.070606231689453, "learning_rate": 9.93312324929972e-06, "loss": 24.0473, "step": 27918 }, { "epoch": 664.7402985074627, "grad_norm": 37.00263214111328, "learning_rate": 9.932773109243698e-06, "loss": 23.8094, "step": 27919 }, { "epoch": 664.7641791044776, "grad_norm": 28.35333824157715, "learning_rate": 9.932422969187677e-06, "loss": 24.0282, "step": 27920 }, { "epoch": 664.7880597014926, "grad_norm": 30.921873092651367, "learning_rate": 9.932072829131654e-06, "loss": 23.6745, "step": 27921 }, { "epoch": 664.8119402985075, "grad_norm": 31.997854232788086, "learning_rate": 9.931722689075631e-06, "loss": 23.5608, "step": 27922 }, { "epoch": 664.8358208955224, "grad_norm": 28.497461318969727, "learning_rate": 9.931372549019609e-06, "loss": 23.6963, "step": 27923 }, { "epoch": 664.8597014925373, "grad_norm": 25.528728485107422, "learning_rate": 9.931022408963586e-06, "loss": 23.3537, "step": 27924 }, { "epoch": 664.8835820895522, "grad_norm": 30.48784637451172, "learning_rate": 9.930672268907565e-06, "loss": 24.2071, "step": 27925 }, { "epoch": 664.9074626865672, "grad_norm": 32.28633117675781, "learning_rate": 9.930322128851542e-06, "loss": 23.5348, "step": 27926 }, { "epoch": 664.9313432835821, "grad_norm": 27.63132667541504, "learning_rate": 9.92997198879552e-06, "loss": 22.8975, "step": 27927 }, { "epoch": 664.955223880597, "grad_norm": 24.013532638549805, "learning_rate": 9.929621848739496e-06, "loss": 23.6205, "step": 27928 }, { "epoch": 664.9791044776119, "grad_norm": 32.82793045043945, "learning_rate": 9.929271708683474e-06, "loss": 23.3947, "step": 27929 }, { "epoch": 665.0, "grad_norm": 32.21843719482422, "learning_rate": 9.928921568627453e-06, "loss": 21.136, "step": 27930 }, { "epoch": 665.0238805970149, "grad_norm": 22.123016357421875, "learning_rate": 9.92857142857143e-06, "loss": 22.5888, "step": 27931 }, { "epoch": 665.0477611940298, "grad_norm": 24.667259216308594, "learning_rate": 9.928221288515407e-06, "loss": 24.3152, "step": 27932 }, { "epoch": 665.0716417910447, "grad_norm": 28.497156143188477, "learning_rate": 9.927871148459384e-06, "loss": 23.3464, "step": 27933 }, { "epoch": 665.0955223880597, "grad_norm": 29.39504623413086, "learning_rate": 9.927521008403363e-06, "loss": 23.3136, "step": 27934 }, { "epoch": 665.1194029850747, "grad_norm": 26.971254348754883, "learning_rate": 9.92717086834734e-06, "loss": 24.0311, "step": 27935 }, { "epoch": 665.1432835820896, "grad_norm": 27.261659622192383, "learning_rate": 9.926820728291318e-06, "loss": 23.1316, "step": 27936 }, { "epoch": 665.1671641791045, "grad_norm": 39.17470932006836, "learning_rate": 9.926470588235295e-06, "loss": 23.2365, "step": 27937 }, { "epoch": 665.1910447761194, "grad_norm": 30.38986587524414, "learning_rate": 9.926120448179272e-06, "loss": 23.8104, "step": 27938 }, { "epoch": 665.2149253731343, "grad_norm": 26.768701553344727, "learning_rate": 9.925770308123251e-06, "loss": 23.2984, "step": 27939 }, { "epoch": 665.2388059701492, "grad_norm": 43.10434341430664, "learning_rate": 9.925420168067228e-06, "loss": 24.0229, "step": 27940 }, { "epoch": 665.2626865671642, "grad_norm": 24.400781631469727, "learning_rate": 9.925070028011206e-06, "loss": 24.0787, "step": 27941 }, { "epoch": 665.2865671641791, "grad_norm": 45.64080810546875, "learning_rate": 9.924719887955183e-06, "loss": 23.9997, "step": 27942 }, { "epoch": 665.310447761194, "grad_norm": 29.548673629760742, "learning_rate": 9.92436974789916e-06, "loss": 24.2188, "step": 27943 }, { "epoch": 665.334328358209, "grad_norm": 45.74468231201172, "learning_rate": 9.924019607843139e-06, "loss": 23.4648, "step": 27944 }, { "epoch": 665.3582089552239, "grad_norm": 36.60218048095703, "learning_rate": 9.923669467787116e-06, "loss": 24.211, "step": 27945 }, { "epoch": 665.3820895522388, "grad_norm": 28.405288696289062, "learning_rate": 9.923319327731093e-06, "loss": 23.4693, "step": 27946 }, { "epoch": 665.4059701492537, "grad_norm": 49.586666107177734, "learning_rate": 9.92296918767507e-06, "loss": 23.0361, "step": 27947 }, { "epoch": 665.4298507462687, "grad_norm": 30.595346450805664, "learning_rate": 9.922619047619048e-06, "loss": 22.911, "step": 27948 }, { "epoch": 665.4537313432836, "grad_norm": 52.73930740356445, "learning_rate": 9.922268907563027e-06, "loss": 23.4978, "step": 27949 }, { "epoch": 665.4776119402985, "grad_norm": 39.91150665283203, "learning_rate": 9.921918767507004e-06, "loss": 24.0617, "step": 27950 }, { "epoch": 665.5014925373134, "grad_norm": 57.29331970214844, "learning_rate": 9.921568627450981e-06, "loss": 24.325, "step": 27951 }, { "epoch": 665.5253731343283, "grad_norm": 41.64246368408203, "learning_rate": 9.921218487394959e-06, "loss": 24.1873, "step": 27952 }, { "epoch": 665.5492537313432, "grad_norm": 56.38955307006836, "learning_rate": 9.920868347338937e-06, "loss": 24.5887, "step": 27953 }, { "epoch": 665.5731343283583, "grad_norm": 48.794212341308594, "learning_rate": 9.920518207282915e-06, "loss": 23.5384, "step": 27954 }, { "epoch": 665.5970149253732, "grad_norm": 55.477596282958984, "learning_rate": 9.920168067226892e-06, "loss": 24.4358, "step": 27955 }, { "epoch": 665.6208955223881, "grad_norm": 54.58844757080078, "learning_rate": 9.91981792717087e-06, "loss": 23.7011, "step": 27956 }, { "epoch": 665.644776119403, "grad_norm": 40.3865852355957, "learning_rate": 9.919467787114846e-06, "loss": 23.3387, "step": 27957 }, { "epoch": 665.6686567164179, "grad_norm": 43.249305725097656, "learning_rate": 9.919117647058825e-06, "loss": 23.7229, "step": 27958 }, { "epoch": 665.6925373134328, "grad_norm": 46.663429260253906, "learning_rate": 9.918767507002803e-06, "loss": 23.4771, "step": 27959 }, { "epoch": 665.7164179104477, "grad_norm": 36.12270736694336, "learning_rate": 9.91841736694678e-06, "loss": 24.0106, "step": 27960 }, { "epoch": 665.7402985074627, "grad_norm": 56.828758239746094, "learning_rate": 9.918067226890757e-06, "loss": 24.8478, "step": 27961 }, { "epoch": 665.7641791044776, "grad_norm": 45.077083587646484, "learning_rate": 9.917717086834734e-06, "loss": 23.835, "step": 27962 }, { "epoch": 665.7880597014926, "grad_norm": 52.47279739379883, "learning_rate": 9.917366946778713e-06, "loss": 22.833, "step": 27963 }, { "epoch": 665.8119402985075, "grad_norm": 47.243560791015625, "learning_rate": 9.91701680672269e-06, "loss": 24.3589, "step": 27964 }, { "epoch": 665.8358208955224, "grad_norm": 47.371620178222656, "learning_rate": 9.916666666666668e-06, "loss": 23.9981, "step": 27965 }, { "epoch": 665.8597014925373, "grad_norm": 39.6259880065918, "learning_rate": 9.916316526610645e-06, "loss": 23.7065, "step": 27966 }, { "epoch": 665.8835820895522, "grad_norm": 50.2418212890625, "learning_rate": 9.915966386554622e-06, "loss": 23.7275, "step": 27967 }, { "epoch": 665.9074626865672, "grad_norm": 42.290096282958984, "learning_rate": 9.915616246498601e-06, "loss": 22.0022, "step": 27968 }, { "epoch": 665.9313432835821, "grad_norm": 53.214263916015625, "learning_rate": 9.915266106442578e-06, "loss": 23.3683, "step": 27969 }, { "epoch": 665.955223880597, "grad_norm": 49.36977005004883, "learning_rate": 9.914915966386556e-06, "loss": 23.0505, "step": 27970 }, { "epoch": 665.9791044776119, "grad_norm": 44.63957214355469, "learning_rate": 9.914565826330533e-06, "loss": 23.1952, "step": 27971 }, { "epoch": 666.0, "grad_norm": 38.876895904541016, "learning_rate": 9.914215686274512e-06, "loss": 20.7382, "step": 27972 }, { "epoch": 666.0238805970149, "grad_norm": 47.05699157714844, "learning_rate": 9.913865546218489e-06, "loss": 23.4437, "step": 27973 }, { "epoch": 666.0477611940298, "grad_norm": 41.78810119628906, "learning_rate": 9.913515406162466e-06, "loss": 23.4993, "step": 27974 }, { "epoch": 666.0716417910447, "grad_norm": 49.530879974365234, "learning_rate": 9.913165266106443e-06, "loss": 23.2552, "step": 27975 }, { "epoch": 666.0955223880597, "grad_norm": 43.61246109008789, "learning_rate": 9.91281512605042e-06, "loss": 24.2852, "step": 27976 }, { "epoch": 666.1194029850747, "grad_norm": 44.68059158325195, "learning_rate": 9.9124649859944e-06, "loss": 23.5971, "step": 27977 }, { "epoch": 666.1432835820896, "grad_norm": 44.04079818725586, "learning_rate": 9.912114845938377e-06, "loss": 23.6857, "step": 27978 }, { "epoch": 666.1671641791045, "grad_norm": 47.92853927612305, "learning_rate": 9.911764705882354e-06, "loss": 23.3935, "step": 27979 }, { "epoch": 666.1910447761194, "grad_norm": 42.45918273925781, "learning_rate": 9.911414565826331e-06, "loss": 24.6001, "step": 27980 }, { "epoch": 666.2149253731343, "grad_norm": 49.71293258666992, "learning_rate": 9.911064425770309e-06, "loss": 23.5621, "step": 27981 }, { "epoch": 666.2388059701492, "grad_norm": 43.063297271728516, "learning_rate": 9.910714285714288e-06, "loss": 23.099, "step": 27982 }, { "epoch": 666.2626865671642, "grad_norm": 47.20065689086914, "learning_rate": 9.910364145658265e-06, "loss": 23.6537, "step": 27983 }, { "epoch": 666.2865671641791, "grad_norm": 41.25203323364258, "learning_rate": 9.910014005602242e-06, "loss": 23.7645, "step": 27984 }, { "epoch": 666.310447761194, "grad_norm": 47.099937438964844, "learning_rate": 9.90966386554622e-06, "loss": 24.3489, "step": 27985 }, { "epoch": 666.334328358209, "grad_norm": 42.63725280761719, "learning_rate": 9.909313725490196e-06, "loss": 24.0659, "step": 27986 }, { "epoch": 666.3582089552239, "grad_norm": 49.94467544555664, "learning_rate": 9.908963585434175e-06, "loss": 23.2569, "step": 27987 }, { "epoch": 666.3820895522388, "grad_norm": 46.246829986572266, "learning_rate": 9.908613445378153e-06, "loss": 23.4426, "step": 27988 }, { "epoch": 666.4059701492537, "grad_norm": NaN, "learning_rate": 9.90826330532213e-06, "loss": 29.8728, "step": 27989 }, { "epoch": 666.4298507462687, "grad_norm": 39.374366760253906, "learning_rate": 9.90826330532213e-06, "loss": 23.1853, "step": 27990 }, { "epoch": 666.4537313432836, "grad_norm": 42.494911193847656, "learning_rate": 9.907913165266107e-06, "loss": 22.526, "step": 27991 }, { "epoch": 666.4776119402985, "grad_norm": 42.09098434448242, "learning_rate": 9.907563025210084e-06, "loss": 23.0223, "step": 27992 }, { "epoch": 666.5014925373134, "grad_norm": 37.32732391357422, "learning_rate": 9.907212885154063e-06, "loss": 23.9271, "step": 27993 }, { "epoch": 666.5253731343283, "grad_norm": 54.65208053588867, "learning_rate": 9.90686274509804e-06, "loss": 24.9641, "step": 27994 }, { "epoch": 666.5492537313432, "grad_norm": 48.15755081176758, "learning_rate": 9.906512605042018e-06, "loss": 22.9597, "step": 27995 }, { "epoch": 666.5731343283583, "grad_norm": 48.2169189453125, "learning_rate": 9.906162464985995e-06, "loss": 23.73, "step": 27996 }, { "epoch": 666.5970149253732, "grad_norm": 46.0543098449707, "learning_rate": 9.905812324929974e-06, "loss": 24.1245, "step": 27997 }, { "epoch": 666.6208955223881, "grad_norm": 36.8085823059082, "learning_rate": 9.905462184873951e-06, "loss": 22.6318, "step": 27998 }, { "epoch": 666.644776119403, "grad_norm": 36.735782623291016, "learning_rate": 9.905112044817928e-06, "loss": 23.7083, "step": 27999 }, { "epoch": 666.6686567164179, "grad_norm": NaN, "learning_rate": 9.904761904761906e-06, "loss": 20.1884, "step": 28000 }, { "epoch": 666.6925373134328, "grad_norm": 45.80076217651367, "learning_rate": 9.904761904761906e-06, "loss": 24.1821, "step": 28001 }, { "epoch": 666.7164179104477, "grad_norm": NaN, "learning_rate": 9.904411764705883e-06, "loss": 27.6099, "step": 28002 }, { "epoch": 666.7402985074627, "grad_norm": 39.95090103149414, "learning_rate": 9.904411764705883e-06, "loss": 23.1922, "step": 28003 }, { "epoch": 666.7641791044776, "grad_norm": 50.89797592163086, "learning_rate": 9.904061624649862e-06, "loss": 23.7368, "step": 28004 }, { "epoch": 666.7880597014926, "grad_norm": 51.66702651977539, "learning_rate": 9.903711484593839e-06, "loss": 23.3581, "step": 28005 }, { "epoch": 666.8119402985075, "grad_norm": 39.952056884765625, "learning_rate": 9.903361344537816e-06, "loss": 23.2723, "step": 28006 }, { "epoch": 666.8358208955224, "grad_norm": 39.79604721069336, "learning_rate": 9.903011204481793e-06, "loss": 22.099, "step": 28007 }, { "epoch": 666.8597014925373, "grad_norm": 38.3823127746582, "learning_rate": 9.90266106442577e-06, "loss": 24.6118, "step": 28008 }, { "epoch": 666.8835820895522, "grad_norm": 33.34206771850586, "learning_rate": 9.90231092436975e-06, "loss": 23.8447, "step": 28009 }, { "epoch": 666.9074626865672, "grad_norm": 48.71775817871094, "learning_rate": 9.901960784313727e-06, "loss": 23.9476, "step": 28010 }, { "epoch": 666.9313432835821, "grad_norm": 38.46339416503906, "learning_rate": 9.901610644257704e-06, "loss": 23.9474, "step": 28011 }, { "epoch": 666.955223880597, "grad_norm": 48.46202087402344, "learning_rate": 9.901260504201681e-06, "loss": 23.3866, "step": 28012 }, { "epoch": 666.9791044776119, "grad_norm": 42.2346305847168, "learning_rate": 9.900910364145659e-06, "loss": 22.9538, "step": 28013 }, { "epoch": 667.0, "grad_norm": 43.945987701416016, "learning_rate": 9.900560224089638e-06, "loss": 20.2167, "step": 28014 }, { "epoch": 667.0238805970149, "grad_norm": 44.03017807006836, "learning_rate": 9.900210084033615e-06, "loss": 23.4052, "step": 28015 }, { "epoch": 667.0477611940298, "grad_norm": 41.96559524536133, "learning_rate": 9.899859943977592e-06, "loss": 23.4554, "step": 28016 }, { "epoch": 667.0716417910447, "grad_norm": 37.09965133666992, "learning_rate": 9.89950980392157e-06, "loss": 23.4526, "step": 28017 }, { "epoch": 667.0955223880597, "grad_norm": 43.515724182128906, "learning_rate": 9.899159663865548e-06, "loss": 23.403, "step": 28018 }, { "epoch": 667.1194029850747, "grad_norm": 35.646480560302734, "learning_rate": 9.898809523809525e-06, "loss": 23.9741, "step": 28019 }, { "epoch": 667.1432835820896, "grad_norm": 44.79692840576172, "learning_rate": 9.898459383753503e-06, "loss": 23.2348, "step": 28020 }, { "epoch": 667.1671641791045, "grad_norm": 33.32938003540039, "learning_rate": 9.89810924369748e-06, "loss": 23.4005, "step": 28021 }, { "epoch": 667.1910447761194, "grad_norm": 47.52588653564453, "learning_rate": 9.897759103641457e-06, "loss": 24.2864, "step": 28022 }, { "epoch": 667.2149253731343, "grad_norm": 38.16631317138672, "learning_rate": 9.897408963585436e-06, "loss": 23.1105, "step": 28023 }, { "epoch": 667.2388059701492, "grad_norm": 50.350467681884766, "learning_rate": 9.897058823529413e-06, "loss": 23.8676, "step": 28024 }, { "epoch": 667.2626865671642, "grad_norm": 42.41181564331055, "learning_rate": 9.89670868347339e-06, "loss": 22.921, "step": 28025 }, { "epoch": 667.2865671641791, "grad_norm": 42.280540466308594, "learning_rate": 9.896358543417368e-06, "loss": 23.7002, "step": 28026 }, { "epoch": 667.310447761194, "grad_norm": 42.50837326049805, "learning_rate": 9.896008403361345e-06, "loss": 24.3187, "step": 28027 }, { "epoch": 667.334328358209, "grad_norm": 44.4390754699707, "learning_rate": 9.895658263305324e-06, "loss": 23.8562, "step": 28028 }, { "epoch": 667.3582089552239, "grad_norm": 40.53400802612305, "learning_rate": 9.895308123249301e-06, "loss": 24.1544, "step": 28029 }, { "epoch": 667.3820895522388, "grad_norm": 47.9568977355957, "learning_rate": 9.894957983193278e-06, "loss": 24.3233, "step": 28030 }, { "epoch": 667.4059701492537, "grad_norm": 37.565406799316406, "learning_rate": 9.894607843137256e-06, "loss": 22.0957, "step": 28031 }, { "epoch": 667.4298507462687, "grad_norm": 48.66737747192383, "learning_rate": 9.894257703081233e-06, "loss": 24.4807, "step": 28032 }, { "epoch": 667.4537313432836, "grad_norm": 41.209388732910156, "learning_rate": 9.893907563025212e-06, "loss": 23.3864, "step": 28033 }, { "epoch": 667.4776119402985, "grad_norm": 46.97706985473633, "learning_rate": 9.893557422969189e-06, "loss": 23.343, "step": 28034 }, { "epoch": 667.5014925373134, "grad_norm": 39.939735412597656, "learning_rate": 9.893207282913166e-06, "loss": 23.8746, "step": 28035 }, { "epoch": 667.5253731343283, "grad_norm": 47.096893310546875, "learning_rate": 9.892857142857143e-06, "loss": 23.5296, "step": 28036 }, { "epoch": 667.5492537313432, "grad_norm": 39.80535125732422, "learning_rate": 9.892507002801122e-06, "loss": 23.8208, "step": 28037 }, { "epoch": 667.5731343283583, "grad_norm": 47.364559173583984, "learning_rate": 9.8921568627451e-06, "loss": 23.7344, "step": 28038 }, { "epoch": 667.5970149253732, "grad_norm": 39.62509536743164, "learning_rate": 9.891806722689077e-06, "loss": 23.5297, "step": 28039 }, { "epoch": 667.6208955223881, "grad_norm": 48.98966598510742, "learning_rate": 9.891456582633054e-06, "loss": 23.9367, "step": 28040 }, { "epoch": 667.644776119403, "grad_norm": 41.771759033203125, "learning_rate": 9.891106442577031e-06, "loss": 23.5323, "step": 28041 }, { "epoch": 667.6686567164179, "grad_norm": 42.40343475341797, "learning_rate": 9.89075630252101e-06, "loss": 22.1803, "step": 28042 }, { "epoch": 667.6925373134328, "grad_norm": 34.541717529296875, "learning_rate": 9.890406162464988e-06, "loss": 22.3669, "step": 28043 }, { "epoch": 667.7164179104477, "grad_norm": 45.25448226928711, "learning_rate": 9.890056022408965e-06, "loss": 23.7529, "step": 28044 }, { "epoch": 667.7402985074627, "grad_norm": 39.55406188964844, "learning_rate": 9.889705882352942e-06, "loss": 23.1241, "step": 28045 }, { "epoch": 667.7641791044776, "grad_norm": 42.861637115478516, "learning_rate": 9.88935574229692e-06, "loss": 22.7457, "step": 28046 }, { "epoch": 667.7880597014926, "grad_norm": 38.59557342529297, "learning_rate": 9.889005602240898e-06, "loss": 23.5227, "step": 28047 }, { "epoch": 667.8119402985075, "grad_norm": 45.15275192260742, "learning_rate": 9.888655462184875e-06, "loss": 23.7633, "step": 28048 }, { "epoch": 667.8358208955224, "grad_norm": 37.81317138671875, "learning_rate": 9.888305322128853e-06, "loss": 24.3267, "step": 28049 }, { "epoch": 667.8597014925373, "grad_norm": 46.50737762451172, "learning_rate": 9.88795518207283e-06, "loss": 24.4079, "step": 28050 }, { "epoch": 667.8835820895522, "grad_norm": NaN, "learning_rate": 9.887605042016807e-06, "loss": 30.4383, "step": 28051 }, { "epoch": 667.9074626865672, "grad_norm": 35.8526496887207, "learning_rate": 9.887605042016807e-06, "loss": 23.4164, "step": 28052 }, { "epoch": 667.9313432835821, "grad_norm": 46.1452751159668, "learning_rate": 9.887254901960786e-06, "loss": 24.5752, "step": 28053 }, { "epoch": 667.955223880597, "grad_norm": 35.013336181640625, "learning_rate": 9.886904761904763e-06, "loss": 23.7977, "step": 28054 }, { "epoch": 667.9791044776119, "grad_norm": 48.12261199951172, "learning_rate": 9.88655462184874e-06, "loss": 24.0722, "step": 28055 }, { "epoch": 668.0, "grad_norm": 35.512664794921875, "learning_rate": 9.886204481792718e-06, "loss": 19.7043, "step": 28056 }, { "epoch": 668.0238805970149, "grad_norm": 45.56311798095703, "learning_rate": 9.885854341736697e-06, "loss": 23.2436, "step": 28057 }, { "epoch": 668.0477611940298, "grad_norm": NaN, "learning_rate": 9.885504201680674e-06, "loss": 20.9599, "step": 28058 }, { "epoch": 668.0716417910447, "grad_norm": 38.41719436645508, "learning_rate": 9.885504201680674e-06, "loss": 22.9008, "step": 28059 }, { "epoch": 668.0955223880597, "grad_norm": 44.229469299316406, "learning_rate": 9.885154061624651e-06, "loss": 22.8808, "step": 28060 }, { "epoch": 668.1194029850747, "grad_norm": 37.9459114074707, "learning_rate": 9.884803921568628e-06, "loss": 23.9238, "step": 28061 }, { "epoch": 668.1432835820896, "grad_norm": 41.715145111083984, "learning_rate": 9.884453781512606e-06, "loss": 23.6435, "step": 28062 }, { "epoch": 668.1671641791045, "grad_norm": 38.170005798339844, "learning_rate": 9.884103641456585e-06, "loss": 24.4915, "step": 28063 }, { "epoch": 668.1910447761194, "grad_norm": 45.79888916015625, "learning_rate": 9.883753501400562e-06, "loss": 23.6117, "step": 28064 }, { "epoch": 668.2149253731343, "grad_norm": 38.538169860839844, "learning_rate": 9.883403361344539e-06, "loss": 23.6703, "step": 28065 }, { "epoch": 668.2388059701492, "grad_norm": 38.931400299072266, "learning_rate": 9.883053221288516e-06, "loss": 23.5221, "step": 28066 }, { "epoch": 668.2626865671642, "grad_norm": 37.04226303100586, "learning_rate": 9.882703081232494e-06, "loss": 23.7266, "step": 28067 }, { "epoch": 668.2865671641791, "grad_norm": 37.76079177856445, "learning_rate": 9.882352941176472e-06, "loss": 22.8946, "step": 28068 }, { "epoch": 668.310447761194, "grad_norm": 34.927757263183594, "learning_rate": 9.88200280112045e-06, "loss": 24.1619, "step": 28069 }, { "epoch": 668.334328358209, "grad_norm": 44.396854400634766, "learning_rate": 9.881652661064427e-06, "loss": 23.0666, "step": 28070 }, { "epoch": 668.3582089552239, "grad_norm": 33.40688705444336, "learning_rate": 9.881302521008404e-06, "loss": 23.2752, "step": 28071 }, { "epoch": 668.3820895522388, "grad_norm": 43.15229415893555, "learning_rate": 9.880952380952381e-06, "loss": 23.0351, "step": 28072 }, { "epoch": 668.4059701492537, "grad_norm": 36.886661529541016, "learning_rate": 9.88060224089636e-06, "loss": 23.9274, "step": 28073 }, { "epoch": 668.4298507462687, "grad_norm": 37.16306686401367, "learning_rate": 9.880252100840338e-06, "loss": 23.0655, "step": 28074 }, { "epoch": 668.4537313432836, "grad_norm": 34.91098403930664, "learning_rate": 9.879901960784315e-06, "loss": 23.2095, "step": 28075 }, { "epoch": 668.4776119402985, "grad_norm": 36.38564682006836, "learning_rate": 9.879551820728292e-06, "loss": 23.2587, "step": 28076 }, { "epoch": 668.5014925373134, "grad_norm": 30.035175323486328, "learning_rate": 9.879201680672271e-06, "loss": 23.4384, "step": 28077 }, { "epoch": 668.5253731343283, "grad_norm": 39.25247573852539, "learning_rate": 9.878851540616248e-06, "loss": 23.4267, "step": 28078 }, { "epoch": 668.5492537313432, "grad_norm": 30.60988426208496, "learning_rate": 9.878501400560225e-06, "loss": 23.8253, "step": 28079 }, { "epoch": 668.5731343283583, "grad_norm": 41.27000427246094, "learning_rate": 9.878151260504203e-06, "loss": 24.4192, "step": 28080 }, { "epoch": 668.5970149253732, "grad_norm": 31.75570297241211, "learning_rate": 9.87780112044818e-06, "loss": 23.4688, "step": 28081 }, { "epoch": 668.6208955223881, "grad_norm": 39.673404693603516, "learning_rate": 9.877450980392159e-06, "loss": 24.3349, "step": 28082 }, { "epoch": 668.644776119403, "grad_norm": 36.259132385253906, "learning_rate": 9.877100840336136e-06, "loss": 23.0615, "step": 28083 }, { "epoch": 668.6686567164179, "grad_norm": 37.831512451171875, "learning_rate": 9.876750700280113e-06, "loss": 24.1026, "step": 28084 }, { "epoch": 668.6925373134328, "grad_norm": 36.425376892089844, "learning_rate": 9.87640056022409e-06, "loss": 24.0185, "step": 28085 }, { "epoch": 668.7164179104477, "grad_norm": 33.232505798339844, "learning_rate": 9.876050420168068e-06, "loss": 23.5853, "step": 28086 }, { "epoch": 668.7402985074627, "grad_norm": 31.59798812866211, "learning_rate": 9.875700280112047e-06, "loss": 24.3595, "step": 28087 }, { "epoch": 668.7641791044776, "grad_norm": 31.68828773498535, "learning_rate": 9.875350140056024e-06, "loss": 23.0842, "step": 28088 }, { "epoch": 668.7880597014926, "grad_norm": 29.456623077392578, "learning_rate": 9.875000000000001e-06, "loss": 23.3299, "step": 28089 }, { "epoch": 668.8119402985075, "grad_norm": 30.199718475341797, "learning_rate": 9.874649859943978e-06, "loss": 23.5843, "step": 28090 }, { "epoch": 668.8358208955224, "grad_norm": 27.54193115234375, "learning_rate": 9.874299719887956e-06, "loss": 22.6743, "step": 28091 }, { "epoch": 668.8597014925373, "grad_norm": 33.96809005737305, "learning_rate": 9.873949579831935e-06, "loss": 24.2394, "step": 28092 }, { "epoch": 668.8835820895522, "grad_norm": 27.96333122253418, "learning_rate": 9.873599439775912e-06, "loss": 23.6538, "step": 28093 }, { "epoch": 668.9074626865672, "grad_norm": 34.814735412597656, "learning_rate": 9.873249299719889e-06, "loss": 23.7344, "step": 28094 }, { "epoch": 668.9313432835821, "grad_norm": 29.88099479675293, "learning_rate": 9.872899159663866e-06, "loss": 23.8446, "step": 28095 }, { "epoch": 668.955223880597, "grad_norm": 34.39855194091797, "learning_rate": 9.872549019607845e-06, "loss": 23.7177, "step": 28096 }, { "epoch": 668.9791044776119, "grad_norm": 29.95107650756836, "learning_rate": 9.872198879551822e-06, "loss": 24.1066, "step": 28097 }, { "epoch": 669.0, "grad_norm": 35.417274475097656, "learning_rate": 9.8718487394958e-06, "loss": 20.9551, "step": 28098 }, { "epoch": 669.0238805970149, "grad_norm": 31.010696411132812, "learning_rate": 9.871498599439777e-06, "loss": 23.608, "step": 28099 }, { "epoch": 669.0477611940298, "grad_norm": 32.63833236694336, "learning_rate": 9.871148459383754e-06, "loss": 23.2752, "step": 28100 }, { "epoch": 669.0716417910447, "grad_norm": 30.37897491455078, "learning_rate": 9.870798319327733e-06, "loss": 23.1625, "step": 28101 }, { "epoch": 669.0955223880597, "grad_norm": 28.040130615234375, "learning_rate": 9.87044817927171e-06, "loss": 24.6267, "step": 28102 }, { "epoch": 669.1194029850747, "grad_norm": 32.19098663330078, "learning_rate": 9.870098039215688e-06, "loss": 23.3281, "step": 28103 }, { "epoch": 669.1432835820896, "grad_norm": 27.428743362426758, "learning_rate": 9.869747899159665e-06, "loss": 23.8165, "step": 28104 }, { "epoch": 669.1671641791045, "grad_norm": 28.75040054321289, "learning_rate": 9.869397759103642e-06, "loss": 23.398, "step": 28105 }, { "epoch": 669.1910447761194, "grad_norm": 28.509489059448242, "learning_rate": 9.869047619047621e-06, "loss": 23.9742, "step": 28106 }, { "epoch": 669.2149253731343, "grad_norm": 26.727712631225586, "learning_rate": 9.868697478991598e-06, "loss": 23.0428, "step": 28107 }, { "epoch": 669.2388059701492, "grad_norm": 26.253154754638672, "learning_rate": 9.868347338935575e-06, "loss": 22.8633, "step": 28108 }, { "epoch": 669.2626865671642, "grad_norm": 34.739585876464844, "learning_rate": 9.867997198879553e-06, "loss": 23.6394, "step": 28109 }, { "epoch": 669.2865671641791, "grad_norm": 27.651391983032227, "learning_rate": 9.86764705882353e-06, "loss": 23.5465, "step": 28110 }, { "epoch": 669.310447761194, "grad_norm": 26.794286727905273, "learning_rate": 9.867296918767509e-06, "loss": 23.8979, "step": 28111 }, { "epoch": 669.334328358209, "grad_norm": 31.231369018554688, "learning_rate": 9.866946778711486e-06, "loss": 24.2302, "step": 28112 }, { "epoch": 669.3582089552239, "grad_norm": 28.38521385192871, "learning_rate": 9.866596638655463e-06, "loss": 23.2526, "step": 28113 }, { "epoch": 669.3820895522388, "grad_norm": 25.329326629638672, "learning_rate": 9.86624649859944e-06, "loss": 24.1797, "step": 28114 }, { "epoch": 669.4059701492537, "grad_norm": 26.59894371032715, "learning_rate": 9.86589635854342e-06, "loss": 23.2879, "step": 28115 }, { "epoch": 669.4298507462687, "grad_norm": 25.231687545776367, "learning_rate": 9.865546218487397e-06, "loss": 24.0888, "step": 28116 }, { "epoch": 669.4537313432836, "grad_norm": 23.633501052856445, "learning_rate": 9.865196078431374e-06, "loss": 23.0842, "step": 28117 }, { "epoch": 669.4776119402985, "grad_norm": 26.112926483154297, "learning_rate": 9.864845938375351e-06, "loss": 23.1782, "step": 28118 }, { "epoch": 669.5014925373134, "grad_norm": 25.10151481628418, "learning_rate": 9.864495798319328e-06, "loss": 23.5105, "step": 28119 }, { "epoch": 669.5253731343283, "grad_norm": 31.58018684387207, "learning_rate": 9.864145658263307e-06, "loss": 23.8353, "step": 28120 }, { "epoch": 669.5492537313432, "grad_norm": 23.767982482910156, "learning_rate": 9.863795518207285e-06, "loss": 22.908, "step": 28121 }, { "epoch": 669.5731343283583, "grad_norm": 29.545076370239258, "learning_rate": 9.863445378151262e-06, "loss": 22.5731, "step": 28122 }, { "epoch": 669.5970149253732, "grad_norm": 26.223838806152344, "learning_rate": 9.863095238095239e-06, "loss": 23.2289, "step": 28123 }, { "epoch": 669.6208955223881, "grad_norm": 28.27680778503418, "learning_rate": 9.862745098039216e-06, "loss": 23.8281, "step": 28124 }, { "epoch": 669.644776119403, "grad_norm": 25.09589385986328, "learning_rate": 9.862394957983195e-06, "loss": 24.9875, "step": 28125 }, { "epoch": 669.6686567164179, "grad_norm": 27.242555618286133, "learning_rate": 9.862044817927172e-06, "loss": 23.0644, "step": 28126 }, { "epoch": 669.6925373134328, "grad_norm": 25.314552307128906, "learning_rate": 9.86169467787115e-06, "loss": 23.4659, "step": 28127 }, { "epoch": 669.7164179104477, "grad_norm": 25.869443893432617, "learning_rate": 9.861344537815127e-06, "loss": 23.2532, "step": 28128 }, { "epoch": 669.7402985074627, "grad_norm": 26.701702117919922, "learning_rate": 9.860994397759104e-06, "loss": 23.2615, "step": 28129 }, { "epoch": 669.7641791044776, "grad_norm": 26.328962326049805, "learning_rate": 9.860644257703083e-06, "loss": 23.7363, "step": 28130 }, { "epoch": 669.7880597014926, "grad_norm": 24.876399993896484, "learning_rate": 9.86029411764706e-06, "loss": 22.9535, "step": 28131 }, { "epoch": 669.8119402985075, "grad_norm": 27.98390769958496, "learning_rate": 9.859943977591038e-06, "loss": 24.0122, "step": 28132 }, { "epoch": 669.8358208955224, "grad_norm": 26.41121482849121, "learning_rate": 9.859593837535015e-06, "loss": 24.322, "step": 28133 }, { "epoch": 669.8597014925373, "grad_norm": 25.587377548217773, "learning_rate": 9.859243697478994e-06, "loss": 23.6692, "step": 28134 }, { "epoch": 669.8835820895522, "grad_norm": 26.171039581298828, "learning_rate": 9.858893557422971e-06, "loss": 24.0605, "step": 28135 }, { "epoch": 669.9074626865672, "grad_norm": 24.981836318969727, "learning_rate": 9.858543417366948e-06, "loss": 23.6966, "step": 28136 }, { "epoch": 669.9313432835821, "grad_norm": 23.478927612304688, "learning_rate": 9.858193277310925e-06, "loss": 24.0991, "step": 28137 }, { "epoch": 669.955223880597, "grad_norm": 26.156719207763672, "learning_rate": 9.857843137254903e-06, "loss": 23.1814, "step": 28138 }, { "epoch": 669.9791044776119, "grad_norm": 28.466459274291992, "learning_rate": 9.857492997198882e-06, "loss": 24.0075, "step": 28139 }, { "epoch": 670.0, "grad_norm": 28.58958625793457, "learning_rate": 9.857142857142859e-06, "loss": 21.1078, "step": 28140 }, { "epoch": 670.0238805970149, "grad_norm": 24.957599639892578, "learning_rate": 9.856792717086836e-06, "loss": 23.4426, "step": 28141 }, { "epoch": 670.0477611940298, "grad_norm": 26.835893630981445, "learning_rate": 9.856442577030813e-06, "loss": 23.4288, "step": 28142 }, { "epoch": 670.0716417910447, "grad_norm": 26.3164119720459, "learning_rate": 9.85609243697479e-06, "loss": 23.9086, "step": 28143 }, { "epoch": 670.0955223880597, "grad_norm": 25.467477798461914, "learning_rate": 9.85574229691877e-06, "loss": 23.4593, "step": 28144 }, { "epoch": 670.1194029850747, "grad_norm": 26.333219528198242, "learning_rate": 9.855392156862747e-06, "loss": 23.7084, "step": 28145 }, { "epoch": 670.1432835820896, "grad_norm": 25.417259216308594, "learning_rate": 9.855042016806724e-06, "loss": 23.7757, "step": 28146 }, { "epoch": 670.1671641791045, "grad_norm": 26.2432918548584, "learning_rate": 9.854691876750701e-06, "loss": 23.3436, "step": 28147 }, { "epoch": 670.1910447761194, "grad_norm": 24.3697566986084, "learning_rate": 9.854341736694678e-06, "loss": 23.4924, "step": 28148 }, { "epoch": 670.2149253731343, "grad_norm": 23.245662689208984, "learning_rate": 9.853991596638657e-06, "loss": 23.7788, "step": 28149 }, { "epoch": 670.2388059701492, "grad_norm": 24.086837768554688, "learning_rate": 9.853641456582635e-06, "loss": 22.4378, "step": 28150 }, { "epoch": 670.2626865671642, "grad_norm": 27.905824661254883, "learning_rate": 9.853291316526612e-06, "loss": 22.7476, "step": 28151 }, { "epoch": 670.2865671641791, "grad_norm": 31.52743148803711, "learning_rate": 9.852941176470589e-06, "loss": 23.5309, "step": 28152 }, { "epoch": 670.310447761194, "grad_norm": NaN, "learning_rate": 9.852591036414568e-06, "loss": 27.0927, "step": 28153 }, { "epoch": 670.334328358209, "grad_norm": 27.73345375061035, "learning_rate": 9.852591036414568e-06, "loss": 24.2844, "step": 28154 }, { "epoch": 670.3582089552239, "grad_norm": 26.16486930847168, "learning_rate": 9.852240896358545e-06, "loss": 22.8915, "step": 28155 }, { "epoch": 670.3820895522388, "grad_norm": 29.36111068725586, "learning_rate": 9.851890756302522e-06, "loss": 23.3258, "step": 28156 }, { "epoch": 670.4059701492537, "grad_norm": 28.3204345703125, "learning_rate": 9.8515406162465e-06, "loss": 23.7467, "step": 28157 }, { "epoch": 670.4298507462687, "grad_norm": 23.155715942382812, "learning_rate": 9.851190476190477e-06, "loss": 24.2041, "step": 28158 }, { "epoch": 670.4537313432836, "grad_norm": 35.217315673828125, "learning_rate": 9.850840336134456e-06, "loss": 23.6386, "step": 28159 }, { "epoch": 670.4776119402985, "grad_norm": 28.091575622558594, "learning_rate": 9.850490196078433e-06, "loss": 22.9846, "step": 28160 }, { "epoch": 670.5014925373134, "grad_norm": 23.595659255981445, "learning_rate": 9.85014005602241e-06, "loss": 23.7067, "step": 28161 }, { "epoch": 670.5253731343283, "grad_norm": 22.929824829101562, "learning_rate": 9.849789915966388e-06, "loss": 23.6783, "step": 28162 }, { "epoch": 670.5492537313432, "grad_norm": 25.527082443237305, "learning_rate": 9.849439775910365e-06, "loss": 24.3728, "step": 28163 }, { "epoch": 670.5731343283583, "grad_norm": 21.49575424194336, "learning_rate": 9.849089635854344e-06, "loss": 23.2364, "step": 28164 }, { "epoch": 670.5970149253732, "grad_norm": 26.428831100463867, "learning_rate": 9.848739495798321e-06, "loss": 22.6462, "step": 28165 }, { "epoch": 670.6208955223881, "grad_norm": 28.54371452331543, "learning_rate": 9.848389355742298e-06, "loss": 24.0668, "step": 28166 }, { "epoch": 670.644776119403, "grad_norm": 28.092350006103516, "learning_rate": 9.848039215686275e-06, "loss": 23.7058, "step": 28167 }, { "epoch": 670.6686567164179, "grad_norm": 25.459184646606445, "learning_rate": 9.847689075630253e-06, "loss": 23.7649, "step": 28168 }, { "epoch": 670.6925373134328, "grad_norm": 25.958419799804688, "learning_rate": 9.847338935574232e-06, "loss": 23.9194, "step": 28169 }, { "epoch": 670.7164179104477, "grad_norm": 26.958599090576172, "learning_rate": 9.846988795518209e-06, "loss": 23.2648, "step": 28170 }, { "epoch": 670.7402985074627, "grad_norm": 32.716800689697266, "learning_rate": 9.846638655462186e-06, "loss": 24.1077, "step": 28171 }, { "epoch": 670.7641791044776, "grad_norm": 30.51858901977539, "learning_rate": 9.846288515406163e-06, "loss": 24.1117, "step": 28172 }, { "epoch": 670.7880597014926, "grad_norm": 24.62455940246582, "learning_rate": 9.84593837535014e-06, "loss": 24.0185, "step": 28173 }, { "epoch": 670.8119402985075, "grad_norm": 27.86040496826172, "learning_rate": 9.84558823529412e-06, "loss": 23.6737, "step": 28174 }, { "epoch": 670.8358208955224, "grad_norm": 33.13042068481445, "learning_rate": 9.845238095238097e-06, "loss": 23.3715, "step": 28175 }, { "epoch": 670.8597014925373, "grad_norm": 23.259071350097656, "learning_rate": 9.844887955182074e-06, "loss": 23.2558, "step": 28176 }, { "epoch": 670.8835820895522, "grad_norm": 31.83940315246582, "learning_rate": 9.844537815126051e-06, "loss": 23.8448, "step": 28177 }, { "epoch": 670.9074626865672, "grad_norm": 33.038612365722656, "learning_rate": 9.84418767507003e-06, "loss": 23.6354, "step": 28178 }, { "epoch": 670.9313432835821, "grad_norm": 23.59255027770996, "learning_rate": 9.843837535014007e-06, "loss": 23.901, "step": 28179 }, { "epoch": 670.955223880597, "grad_norm": 33.460968017578125, "learning_rate": 9.843487394957983e-06, "loss": 23.7846, "step": 28180 }, { "epoch": 670.9791044776119, "grad_norm": 29.213640213012695, "learning_rate": 9.843137254901962e-06, "loss": 22.8027, "step": 28181 }, { "epoch": 671.0, "grad_norm": 24.961408615112305, "learning_rate": 9.842787114845939e-06, "loss": 20.5984, "step": 28182 }, { "epoch": 671.0238805970149, "grad_norm": 22.645565032958984, "learning_rate": 9.842436974789916e-06, "loss": 23.0977, "step": 28183 }, { "epoch": 671.0477611940298, "grad_norm": NaN, "learning_rate": 9.842086834733894e-06, "loss": 24.0813, "step": 28184 }, { "epoch": 671.0716417910447, "grad_norm": 25.77701187133789, "learning_rate": 9.842086834733894e-06, "loss": 23.9306, "step": 28185 }, { "epoch": 671.0955223880597, "grad_norm": 22.86971664428711, "learning_rate": 9.84173669467787e-06, "loss": 23.776, "step": 28186 }, { "epoch": 671.1194029850747, "grad_norm": 23.343942642211914, "learning_rate": 9.84138655462185e-06, "loss": 23.8959, "step": 28187 }, { "epoch": 671.1432835820896, "grad_norm": 31.436973571777344, "learning_rate": 9.841036414565827e-06, "loss": 23.1511, "step": 28188 }, { "epoch": 671.1671641791045, "grad_norm": 23.08850860595703, "learning_rate": 9.840686274509804e-06, "loss": 23.7861, "step": 28189 }, { "epoch": 671.1910447761194, "grad_norm": 28.51725196838379, "learning_rate": 9.840336134453781e-06, "loss": 24.0263, "step": 28190 }, { "epoch": 671.2149253731343, "grad_norm": 28.18543815612793, "learning_rate": 9.839985994397759e-06, "loss": 23.0727, "step": 28191 }, { "epoch": 671.2388059701492, "grad_norm": 27.899097442626953, "learning_rate": 9.839635854341738e-06, "loss": 24.4904, "step": 28192 }, { "epoch": 671.2626865671642, "grad_norm": 27.784957885742188, "learning_rate": 9.839285714285715e-06, "loss": 24.0982, "step": 28193 }, { "epoch": 671.2865671641791, "grad_norm": 28.987464904785156, "learning_rate": 9.838935574229692e-06, "loss": 23.291, "step": 28194 }, { "epoch": 671.310447761194, "grad_norm": 24.283185958862305, "learning_rate": 9.83858543417367e-06, "loss": 23.4618, "step": 28195 }, { "epoch": 671.334328358209, "grad_norm": 25.439228057861328, "learning_rate": 9.838235294117647e-06, "loss": 24.6624, "step": 28196 }, { "epoch": 671.3582089552239, "grad_norm": 29.988819122314453, "learning_rate": 9.837885154061625e-06, "loss": 23.2782, "step": 28197 }, { "epoch": 671.3820895522388, "grad_norm": 27.20264434814453, "learning_rate": 9.837535014005603e-06, "loss": 24.7565, "step": 28198 }, { "epoch": 671.4059701492537, "grad_norm": 25.00308609008789, "learning_rate": 9.83718487394958e-06, "loss": 23.6374, "step": 28199 }, { "epoch": 671.4298507462687, "grad_norm": 28.131622314453125, "learning_rate": 9.836834733893557e-06, "loss": 22.6734, "step": 28200 }, { "epoch": 671.4537313432836, "grad_norm": 24.93061065673828, "learning_rate": 9.836484593837536e-06, "loss": 23.2387, "step": 28201 }, { "epoch": 671.4776119402985, "grad_norm": 27.46073341369629, "learning_rate": 9.836134453781513e-06, "loss": 23.7918, "step": 28202 }, { "epoch": 671.5014925373134, "grad_norm": 24.54970359802246, "learning_rate": 9.83578431372549e-06, "loss": 22.9397, "step": 28203 }, { "epoch": 671.5253731343283, "grad_norm": 29.673587799072266, "learning_rate": 9.835434173669468e-06, "loss": 24.3859, "step": 28204 }, { "epoch": 671.5492537313432, "grad_norm": 27.03093147277832, "learning_rate": 9.835084033613445e-06, "loss": 22.9938, "step": 28205 }, { "epoch": 671.5731343283583, "grad_norm": 25.578868865966797, "learning_rate": 9.834733893557424e-06, "loss": 23.4168, "step": 28206 }, { "epoch": 671.5970149253732, "grad_norm": 22.938739776611328, "learning_rate": 9.834383753501401e-06, "loss": 23.7403, "step": 28207 }, { "epoch": 671.6208955223881, "grad_norm": 27.574411392211914, "learning_rate": 9.834033613445378e-06, "loss": 24.0836, "step": 28208 }, { "epoch": 671.644776119403, "grad_norm": 27.87846565246582, "learning_rate": 9.833683473389356e-06, "loss": 24.3965, "step": 28209 }, { "epoch": 671.6686567164179, "grad_norm": 25.28472328186035, "learning_rate": 9.833333333333333e-06, "loss": 23.6707, "step": 28210 }, { "epoch": 671.6925373134328, "grad_norm": 22.039173126220703, "learning_rate": 9.832983193277312e-06, "loss": 23.3719, "step": 28211 }, { "epoch": 671.7164179104477, "grad_norm": 23.589345932006836, "learning_rate": 9.832633053221289e-06, "loss": 23.2974, "step": 28212 }, { "epoch": 671.7402985074627, "grad_norm": 24.745899200439453, "learning_rate": 9.832282913165266e-06, "loss": 23.2801, "step": 28213 }, { "epoch": 671.7641791044776, "grad_norm": 29.11342430114746, "learning_rate": 9.831932773109244e-06, "loss": 23.1048, "step": 28214 }, { "epoch": 671.7880597014926, "grad_norm": 26.03569984436035, "learning_rate": 9.83158263305322e-06, "loss": 23.6808, "step": 28215 }, { "epoch": 671.8119402985075, "grad_norm": 29.058746337890625, "learning_rate": 9.8312324929972e-06, "loss": 23.1937, "step": 28216 }, { "epoch": 671.8358208955224, "grad_norm": 21.68157196044922, "learning_rate": 9.830882352941177e-06, "loss": 23.7308, "step": 28217 }, { "epoch": 671.8597014925373, "grad_norm": 36.663536071777344, "learning_rate": 9.830532212885154e-06, "loss": 24.4435, "step": 28218 }, { "epoch": 671.8835820895522, "grad_norm": 26.63733673095703, "learning_rate": 9.830182072829131e-06, "loss": 23.5839, "step": 28219 }, { "epoch": 671.9074626865672, "grad_norm": 25.800003051757812, "learning_rate": 9.82983193277311e-06, "loss": 22.946, "step": 28220 }, { "epoch": 671.9313432835821, "grad_norm": 27.150007247924805, "learning_rate": 9.829481792717088e-06, "loss": 23.5361, "step": 28221 }, { "epoch": 671.955223880597, "grad_norm": 32.53767776489258, "learning_rate": 9.829131652661065e-06, "loss": 23.4123, "step": 28222 }, { "epoch": 671.9791044776119, "grad_norm": 25.02525520324707, "learning_rate": 9.828781512605042e-06, "loss": 23.3394, "step": 28223 }, { "epoch": 672.0, "grad_norm": 24.405431747436523, "learning_rate": 9.82843137254902e-06, "loss": 21.5512, "step": 28224 }, { "epoch": 672.0238805970149, "grad_norm": 38.46488952636719, "learning_rate": 9.828081232492998e-06, "loss": 23.5006, "step": 28225 }, { "epoch": 672.0477611940298, "grad_norm": 26.314544677734375, "learning_rate": 9.827731092436975e-06, "loss": 23.5665, "step": 28226 }, { "epoch": 672.0716417910447, "grad_norm": 24.74464988708496, "learning_rate": 9.827380952380953e-06, "loss": 23.6959, "step": 28227 }, { "epoch": 672.0955223880597, "grad_norm": 37.11141586303711, "learning_rate": 9.82703081232493e-06, "loss": 23.8306, "step": 28228 }, { "epoch": 672.1194029850747, "grad_norm": 27.98441505432129, "learning_rate": 9.826680672268907e-06, "loss": 23.81, "step": 28229 }, { "epoch": 672.1432835820896, "grad_norm": 23.763595581054688, "learning_rate": 9.826330532212886e-06, "loss": 23.4203, "step": 28230 }, { "epoch": 672.1671641791045, "grad_norm": 26.265222549438477, "learning_rate": 9.825980392156863e-06, "loss": 24.0111, "step": 28231 }, { "epoch": 672.1910447761194, "grad_norm": 28.96470832824707, "learning_rate": 9.82563025210084e-06, "loss": 22.8486, "step": 28232 }, { "epoch": 672.2149253731343, "grad_norm": 29.15851402282715, "learning_rate": 9.825280112044818e-06, "loss": 23.7948, "step": 28233 }, { "epoch": 672.2388059701492, "grad_norm": 22.175342559814453, "learning_rate": 9.824929971988795e-06, "loss": 22.6925, "step": 28234 }, { "epoch": 672.2626865671642, "grad_norm": 26.797090530395508, "learning_rate": 9.824579831932774e-06, "loss": 23.7818, "step": 28235 }, { "epoch": 672.2865671641791, "grad_norm": 35.47073745727539, "learning_rate": 9.824229691876751e-06, "loss": 23.3269, "step": 28236 }, { "epoch": 672.310447761194, "grad_norm": 25.062612533569336, "learning_rate": 9.823879551820728e-06, "loss": 23.435, "step": 28237 }, { "epoch": 672.334328358209, "grad_norm": 25.780431747436523, "learning_rate": 9.823529411764706e-06, "loss": 23.9154, "step": 28238 }, { "epoch": 672.3582089552239, "grad_norm": 41.84453201293945, "learning_rate": 9.823179271708685e-06, "loss": 24.2205, "step": 28239 }, { "epoch": 672.3820895522388, "grad_norm": 24.127408981323242, "learning_rate": 9.822829131652662e-06, "loss": 23.1638, "step": 28240 }, { "epoch": 672.4059701492537, "grad_norm": 34.68413162231445, "learning_rate": 9.822478991596639e-06, "loss": 23.4761, "step": 28241 }, { "epoch": 672.4298507462687, "grad_norm": 36.38454818725586, "learning_rate": 9.822128851540616e-06, "loss": 23.705, "step": 28242 }, { "epoch": 672.4537313432836, "grad_norm": 25.756484985351562, "learning_rate": 9.821778711484594e-06, "loss": 22.9129, "step": 28243 }, { "epoch": 672.4776119402985, "grad_norm": 45.65080261230469, "learning_rate": 9.821428571428573e-06, "loss": 22.7747, "step": 28244 }, { "epoch": 672.5014925373134, "grad_norm": 31.318588256835938, "learning_rate": 9.82107843137255e-06, "loss": 24.1531, "step": 28245 }, { "epoch": 672.5253731343283, "grad_norm": 44.13901901245117, "learning_rate": 9.820728291316527e-06, "loss": 23.2494, "step": 28246 }, { "epoch": 672.5492537313432, "grad_norm": NaN, "learning_rate": 9.820378151260504e-06, "loss": 31.8642, "step": 28247 }, { "epoch": 672.5731343283583, "grad_norm": 33.72365951538086, "learning_rate": 9.820378151260504e-06, "loss": 25.2321, "step": 28248 }, { "epoch": 672.5970149253732, "grad_norm": 41.211456298828125, "learning_rate": 9.820028011204481e-06, "loss": 22.8644, "step": 28249 }, { "epoch": 672.6208955223881, "grad_norm": 32.62614059448242, "learning_rate": 9.81967787114846e-06, "loss": 22.7424, "step": 28250 }, { "epoch": 672.644776119403, "grad_norm": 33.225242614746094, "learning_rate": 9.819327731092438e-06, "loss": 22.6827, "step": 28251 }, { "epoch": 672.6686567164179, "grad_norm": 36.19495391845703, "learning_rate": 9.818977591036415e-06, "loss": 23.4541, "step": 28252 }, { "epoch": 672.6925373134328, "grad_norm": 26.384521484375, "learning_rate": 9.818627450980392e-06, "loss": 23.7656, "step": 28253 }, { "epoch": 672.7164179104477, "grad_norm": 26.89274024963379, "learning_rate": 9.81827731092437e-06, "loss": 23.9962, "step": 28254 }, { "epoch": 672.7402985074627, "grad_norm": 31.026199340820312, "learning_rate": 9.817927170868348e-06, "loss": 23.2256, "step": 28255 }, { "epoch": 672.7641791044776, "grad_norm": 26.364185333251953, "learning_rate": 9.817577030812325e-06, "loss": 24.2327, "step": 28256 }, { "epoch": 672.7880597014926, "grad_norm": 27.590343475341797, "learning_rate": 9.817226890756303e-06, "loss": 24.6753, "step": 28257 }, { "epoch": 672.8119402985075, "grad_norm": 25.88616371154785, "learning_rate": 9.81687675070028e-06, "loss": 23.9883, "step": 28258 }, { "epoch": 672.8358208955224, "grad_norm": 26.31214141845703, "learning_rate": 9.816526610644259e-06, "loss": 24.1224, "step": 28259 }, { "epoch": 672.8597014925373, "grad_norm": 27.247215270996094, "learning_rate": 9.816176470588236e-06, "loss": 24.6691, "step": 28260 }, { "epoch": 672.8835820895522, "grad_norm": 24.730010986328125, "learning_rate": 9.815826330532213e-06, "loss": 23.5322, "step": 28261 }, { "epoch": 672.9074626865672, "grad_norm": 27.212120056152344, "learning_rate": 9.81547619047619e-06, "loss": 23.8765, "step": 28262 }, { "epoch": 672.9313432835821, "grad_norm": 26.32611656188965, "learning_rate": 9.815126050420168e-06, "loss": 23.4083, "step": 28263 }, { "epoch": 672.955223880597, "grad_norm": 26.108142852783203, "learning_rate": 9.814775910364147e-06, "loss": 25.3582, "step": 28264 }, { "epoch": 672.9791044776119, "grad_norm": 26.041637420654297, "learning_rate": 9.814425770308124e-06, "loss": 23.8244, "step": 28265 }, { "epoch": 673.0, "grad_norm": 22.697729110717773, "learning_rate": 9.814075630252101e-06, "loss": 21.5094, "step": 28266 }, { "epoch": 673.0238805970149, "grad_norm": 22.640453338623047, "learning_rate": 9.813725490196078e-06, "loss": 23.7384, "step": 28267 }, { "epoch": 673.0477611940298, "grad_norm": 26.58555793762207, "learning_rate": 9.813375350140056e-06, "loss": 22.1461, "step": 28268 }, { "epoch": 673.0716417910447, "grad_norm": 23.971187591552734, "learning_rate": 9.813025210084035e-06, "loss": 22.8363, "step": 28269 }, { "epoch": 673.0955223880597, "grad_norm": 24.221845626831055, "learning_rate": 9.812675070028012e-06, "loss": 22.8363, "step": 28270 }, { "epoch": 673.1194029850747, "grad_norm": 31.31696319580078, "learning_rate": 9.812324929971989e-06, "loss": 24.4964, "step": 28271 }, { "epoch": 673.1432835820896, "grad_norm": 28.171499252319336, "learning_rate": 9.811974789915966e-06, "loss": 22.8339, "step": 28272 }, { "epoch": 673.1671641791045, "grad_norm": 24.465356826782227, "learning_rate": 9.811624649859944e-06, "loss": 24.2118, "step": 28273 }, { "epoch": 673.1910447761194, "grad_norm": 28.239917755126953, "learning_rate": 9.811274509803923e-06, "loss": 24.3511, "step": 28274 }, { "epoch": 673.2149253731343, "grad_norm": 22.200870513916016, "learning_rate": 9.8109243697479e-06, "loss": 22.7441, "step": 28275 }, { "epoch": 673.2388059701492, "grad_norm": 25.05191421508789, "learning_rate": 9.810574229691877e-06, "loss": 24.3107, "step": 28276 }, { "epoch": 673.2626865671642, "grad_norm": 23.75380516052246, "learning_rate": 9.810224089635854e-06, "loss": 23.6673, "step": 28277 }, { "epoch": 673.2865671641791, "grad_norm": 27.288101196289062, "learning_rate": 9.809873949579831e-06, "loss": 24.104, "step": 28278 }, { "epoch": 673.310447761194, "grad_norm": 33.03350067138672, "learning_rate": 9.80952380952381e-06, "loss": 24.1233, "step": 28279 }, { "epoch": 673.334328358209, "grad_norm": 30.141902923583984, "learning_rate": 9.809173669467788e-06, "loss": 23.4132, "step": 28280 }, { "epoch": 673.3582089552239, "grad_norm": 23.05860710144043, "learning_rate": 9.808823529411765e-06, "loss": 23.5456, "step": 28281 }, { "epoch": 673.3820895522388, "grad_norm": 27.307323455810547, "learning_rate": 9.808473389355742e-06, "loss": 23.8576, "step": 28282 }, { "epoch": 673.4059701492537, "grad_norm": 29.893875122070312, "learning_rate": 9.808123249299721e-06, "loss": 23.4767, "step": 28283 }, { "epoch": 673.4298507462687, "grad_norm": 25.267135620117188, "learning_rate": 9.807773109243698e-06, "loss": 23.3751, "step": 28284 }, { "epoch": 673.4537313432836, "grad_norm": 27.61143684387207, "learning_rate": 9.807422969187676e-06, "loss": 24.563, "step": 28285 }, { "epoch": 673.4776119402985, "grad_norm": 27.193950653076172, "learning_rate": 9.807072829131653e-06, "loss": 24.3713, "step": 28286 }, { "epoch": 673.5014925373134, "grad_norm": 26.4012393951416, "learning_rate": 9.80672268907563e-06, "loss": 24.3088, "step": 28287 }, { "epoch": 673.5253731343283, "grad_norm": 32.82551956176758, "learning_rate": 9.806372549019609e-06, "loss": 24.7286, "step": 28288 }, { "epoch": 673.5492537313432, "grad_norm": 25.366779327392578, "learning_rate": 9.806022408963586e-06, "loss": 23.9591, "step": 28289 }, { "epoch": 673.5731343283583, "grad_norm": 29.329328536987305, "learning_rate": 9.805672268907563e-06, "loss": 24.1018, "step": 28290 }, { "epoch": 673.5970149253732, "grad_norm": 24.721790313720703, "learning_rate": 9.80532212885154e-06, "loss": 23.4196, "step": 28291 }, { "epoch": 673.6208955223881, "grad_norm": 25.70654296875, "learning_rate": 9.804971988795518e-06, "loss": 24.2579, "step": 28292 }, { "epoch": 673.644776119403, "grad_norm": 32.71250534057617, "learning_rate": 9.804621848739497e-06, "loss": 24.5773, "step": 28293 }, { "epoch": 673.6686567164179, "grad_norm": 23.183422088623047, "learning_rate": 9.804271708683474e-06, "loss": 23.9072, "step": 28294 }, { "epoch": 673.6925373134328, "grad_norm": 30.141254425048828, "learning_rate": 9.803921568627451e-06, "loss": 24.6941, "step": 28295 }, { "epoch": 673.7164179104477, "grad_norm": 30.068408966064453, "learning_rate": 9.803571428571428e-06, "loss": 23.7601, "step": 28296 }, { "epoch": 673.7402985074627, "grad_norm": 32.19398880004883, "learning_rate": 9.803221288515406e-06, "loss": 23.7139, "step": 28297 }, { "epoch": 673.7641791044776, "grad_norm": 25.857927322387695, "learning_rate": 9.802871148459385e-06, "loss": 23.0419, "step": 28298 }, { "epoch": 673.7880597014926, "grad_norm": NaN, "learning_rate": 9.802521008403362e-06, "loss": 27.1631, "step": 28299 }, { "epoch": 673.8119402985075, "grad_norm": 27.06586456298828, "learning_rate": 9.802521008403362e-06, "loss": 23.8079, "step": 28300 }, { "epoch": 673.8358208955224, "grad_norm": 27.27884292602539, "learning_rate": 9.802170868347339e-06, "loss": 23.8879, "step": 28301 }, { "epoch": 673.8597014925373, "grad_norm": 23.987571716308594, "learning_rate": 9.801820728291316e-06, "loss": 24.5217, "step": 28302 }, { "epoch": 673.8835820895522, "grad_norm": 27.61414909362793, "learning_rate": 9.801470588235295e-06, "loss": 22.8994, "step": 28303 }, { "epoch": 673.9074626865672, "grad_norm": 22.769411087036133, "learning_rate": 9.801120448179273e-06, "loss": 24.7857, "step": 28304 }, { "epoch": 673.9313432835821, "grad_norm": 26.217153549194336, "learning_rate": 9.80077030812325e-06, "loss": 24.0498, "step": 28305 }, { "epoch": 673.955223880597, "grad_norm": 25.30057144165039, "learning_rate": 9.800420168067227e-06, "loss": 24.4399, "step": 28306 }, { "epoch": 673.9791044776119, "grad_norm": 36.258331298828125, "learning_rate": 9.800070028011204e-06, "loss": 25.1132, "step": 28307 }, { "epoch": 674.0, "grad_norm": 18.917421340942383, "learning_rate": 9.799719887955183e-06, "loss": 19.8972, "step": 28308 }, { "epoch": 674.0238805970149, "grad_norm": 30.027742385864258, "learning_rate": 9.79936974789916e-06, "loss": 24.8486, "step": 28309 }, { "epoch": 674.0477611940298, "grad_norm": 37.494014739990234, "learning_rate": 9.799019607843138e-06, "loss": 24.6336, "step": 28310 }, { "epoch": 674.0716417910447, "grad_norm": 23.54927635192871, "learning_rate": 9.798669467787115e-06, "loss": 23.8542, "step": 28311 }, { "epoch": 674.0955223880597, "grad_norm": 28.02633285522461, "learning_rate": 9.798319327731092e-06, "loss": 24.9139, "step": 28312 }, { "epoch": 674.1194029850747, "grad_norm": 35.34666061401367, "learning_rate": 9.797969187675071e-06, "loss": 23.7673, "step": 28313 }, { "epoch": 674.1432835820896, "grad_norm": 24.0801944732666, "learning_rate": 9.797619047619048e-06, "loss": 24.5785, "step": 28314 }, { "epoch": 674.1671641791045, "grad_norm": 28.13842010498047, "learning_rate": 9.797268907563026e-06, "loss": 24.3454, "step": 28315 }, { "epoch": 674.1910447761194, "grad_norm": 33.35695266723633, "learning_rate": 9.796918767507003e-06, "loss": 23.6178, "step": 28316 }, { "epoch": 674.2149253731343, "grad_norm": 25.948400497436523, "learning_rate": 9.79656862745098e-06, "loss": 23.5983, "step": 28317 }, { "epoch": 674.2388059701492, "grad_norm": 21.568740844726562, "learning_rate": 9.796218487394959e-06, "loss": 24.0239, "step": 28318 }, { "epoch": 674.2626865671642, "grad_norm": 32.7653923034668, "learning_rate": 9.795868347338936e-06, "loss": 23.788, "step": 28319 }, { "epoch": 674.2865671641791, "grad_norm": 27.78307342529297, "learning_rate": 9.795518207282913e-06, "loss": 24.8773, "step": 28320 }, { "epoch": 674.310447761194, "grad_norm": 23.010631561279297, "learning_rate": 9.79516806722689e-06, "loss": 24.6142, "step": 28321 }, { "epoch": 674.334328358209, "grad_norm": 25.264530181884766, "learning_rate": 9.79481792717087e-06, "loss": 24.8413, "step": 28322 }, { "epoch": 674.3582089552239, "grad_norm": 28.22577667236328, "learning_rate": 9.794467787114847e-06, "loss": 24.06, "step": 28323 }, { "epoch": 674.3820895522388, "grad_norm": 26.051956176757812, "learning_rate": 9.794117647058824e-06, "loss": 23.8528, "step": 28324 }, { "epoch": 674.4059701492537, "grad_norm": 21.528079986572266, "learning_rate": 9.793767507002801e-06, "loss": 25.035, "step": 28325 }, { "epoch": 674.4298507462687, "grad_norm": 21.419574737548828, "learning_rate": 9.793417366946778e-06, "loss": 24.075, "step": 28326 }, { "epoch": 674.4537313432836, "grad_norm": 24.96001625061035, "learning_rate": 9.793067226890757e-06, "loss": 24.1955, "step": 28327 }, { "epoch": 674.4776119402985, "grad_norm": 22.3768310546875, "learning_rate": 9.792717086834735e-06, "loss": 23.336, "step": 28328 }, { "epoch": 674.5014925373134, "grad_norm": 24.09269142150879, "learning_rate": 9.792366946778712e-06, "loss": 23.1351, "step": 28329 }, { "epoch": 674.5253731343283, "grad_norm": 26.000404357910156, "learning_rate": 9.792016806722689e-06, "loss": 24.0043, "step": 28330 }, { "epoch": 674.5492537313432, "grad_norm": 24.611543655395508, "learning_rate": 9.791666666666666e-06, "loss": 24.8161, "step": 28331 }, { "epoch": 674.5731343283583, "grad_norm": 23.63307762145996, "learning_rate": 9.791316526610645e-06, "loss": 24.2792, "step": 28332 }, { "epoch": 674.5970149253732, "grad_norm": 28.078998565673828, "learning_rate": 9.790966386554623e-06, "loss": 24.1126, "step": 28333 }, { "epoch": 674.6208955223881, "grad_norm": 23.291767120361328, "learning_rate": 9.7906162464986e-06, "loss": 25.2121, "step": 28334 }, { "epoch": 674.644776119403, "grad_norm": 23.16015625, "learning_rate": 9.790266106442577e-06, "loss": 24.2875, "step": 28335 }, { "epoch": 674.6686567164179, "grad_norm": 23.394784927368164, "learning_rate": 9.789915966386554e-06, "loss": 23.8474, "step": 28336 }, { "epoch": 674.6925373134328, "grad_norm": 23.141990661621094, "learning_rate": 9.789565826330533e-06, "loss": 24.2265, "step": 28337 }, { "epoch": 674.7164179104477, "grad_norm": 20.143539428710938, "learning_rate": 9.78921568627451e-06, "loss": 24.5747, "step": 28338 }, { "epoch": 674.7402985074627, "grad_norm": 25.436532974243164, "learning_rate": 9.788865546218488e-06, "loss": 24.9307, "step": 28339 }, { "epoch": 674.7641791044776, "grad_norm": 23.35222816467285, "learning_rate": 9.788515406162465e-06, "loss": 24.7511, "step": 28340 }, { "epoch": 674.7880597014926, "grad_norm": 32.07123565673828, "learning_rate": 9.788165266106444e-06, "loss": 24.1713, "step": 28341 }, { "epoch": 674.8119402985075, "grad_norm": 28.102930068969727, "learning_rate": 9.787815126050421e-06, "loss": 24.8966, "step": 28342 }, { "epoch": 674.8358208955224, "grad_norm": 20.97711181640625, "learning_rate": 9.787464985994398e-06, "loss": 23.4668, "step": 28343 }, { "epoch": 674.8597014925373, "grad_norm": 24.397151947021484, "learning_rate": 9.787114845938376e-06, "loss": 24.2197, "step": 28344 }, { "epoch": 674.8835820895522, "grad_norm": 27.100086212158203, "learning_rate": 9.786764705882353e-06, "loss": 24.056, "step": 28345 }, { "epoch": 674.9074626865672, "grad_norm": 28.953519821166992, "learning_rate": 9.786414565826332e-06, "loss": 23.7346, "step": 28346 }, { "epoch": 674.9313432835821, "grad_norm": 21.787353515625, "learning_rate": 9.786064425770309e-06, "loss": 23.3539, "step": 28347 }, { "epoch": 674.955223880597, "grad_norm": 23.390085220336914, "learning_rate": 9.785714285714286e-06, "loss": 24.6478, "step": 28348 }, { "epoch": 674.9791044776119, "grad_norm": 20.409473419189453, "learning_rate": 9.785364145658263e-06, "loss": 23.6956, "step": 28349 }, { "epoch": 675.0, "grad_norm": 18.879438400268555, "learning_rate": 9.78501400560224e-06, "loss": 19.9644, "step": 28350 }, { "epoch": 675.0238805970149, "grad_norm": 22.220060348510742, "learning_rate": 9.78466386554622e-06, "loss": 24.4203, "step": 28351 }, { "epoch": 675.0477611940298, "grad_norm": 29.648405075073242, "learning_rate": 9.784313725490197e-06, "loss": 23.7487, "step": 28352 }, { "epoch": 675.0716417910447, "grad_norm": 39.54617691040039, "learning_rate": 9.783963585434174e-06, "loss": 24.4827, "step": 28353 }, { "epoch": 675.0955223880597, "grad_norm": 22.07240867614746, "learning_rate": 9.783613445378151e-06, "loss": 25.1183, "step": 28354 }, { "epoch": 675.1194029850747, "grad_norm": 34.5073127746582, "learning_rate": 9.783263305322129e-06, "loss": 24.5235, "step": 28355 }, { "epoch": 675.1432835820896, "grad_norm": 33.18600082397461, "learning_rate": 9.782913165266107e-06, "loss": 23.8403, "step": 28356 }, { "epoch": 675.1671641791045, "grad_norm": 22.083593368530273, "learning_rate": 9.782563025210085e-06, "loss": 24.0577, "step": 28357 }, { "epoch": 675.1910447761194, "grad_norm": 24.715713500976562, "learning_rate": 9.782212885154062e-06, "loss": 24.8087, "step": 28358 }, { "epoch": 675.2149253731343, "grad_norm": 28.13483238220215, "learning_rate": 9.781862745098039e-06, "loss": 23.3633, "step": 28359 }, { "epoch": 675.2388059701492, "grad_norm": 31.56966781616211, "learning_rate": 9.781512605042018e-06, "loss": 23.7821, "step": 28360 }, { "epoch": 675.2626865671642, "grad_norm": 26.181209564208984, "learning_rate": 9.781162464985995e-06, "loss": 23.8749, "step": 28361 }, { "epoch": 675.2865671641791, "grad_norm": 22.584510803222656, "learning_rate": 9.780812324929973e-06, "loss": 24.6195, "step": 28362 }, { "epoch": 675.310447761194, "grad_norm": 29.104854583740234, "learning_rate": 9.78046218487395e-06, "loss": 23.96, "step": 28363 }, { "epoch": 675.334328358209, "grad_norm": 34.88409423828125, "learning_rate": 9.780112044817927e-06, "loss": 24.0361, "step": 28364 }, { "epoch": 675.3582089552239, "grad_norm": 22.4432315826416, "learning_rate": 9.779761904761906e-06, "loss": 24.3744, "step": 28365 }, { "epoch": 675.3820895522388, "grad_norm": 24.852943420410156, "learning_rate": 9.779411764705883e-06, "loss": 23.6984, "step": 28366 }, { "epoch": 675.4059701492537, "grad_norm": 32.755157470703125, "learning_rate": 9.77906162464986e-06, "loss": 23.6734, "step": 28367 }, { "epoch": 675.4298507462687, "grad_norm": 28.13591194152832, "learning_rate": 9.778711484593838e-06, "loss": 24.2046, "step": 28368 }, { "epoch": 675.4537313432836, "grad_norm": 26.57293128967285, "learning_rate": 9.778361344537815e-06, "loss": 24.1002, "step": 28369 }, { "epoch": 675.4776119402985, "grad_norm": 22.18393325805664, "learning_rate": 9.778011204481794e-06, "loss": 24.3341, "step": 28370 }, { "epoch": 675.5014925373134, "grad_norm": 36.54170227050781, "learning_rate": 9.777661064425771e-06, "loss": 24.6731, "step": 28371 }, { "epoch": 675.5253731343283, "grad_norm": 26.669252395629883, "learning_rate": 9.777310924369748e-06, "loss": 24.1361, "step": 28372 }, { "epoch": 675.5492537313432, "grad_norm": 25.564579010009766, "learning_rate": 9.776960784313726e-06, "loss": 23.5321, "step": 28373 }, { "epoch": 675.5731343283583, "grad_norm": 26.70139503479004, "learning_rate": 9.776610644257703e-06, "loss": 23.5781, "step": 28374 }, { "epoch": 675.5970149253732, "grad_norm": 39.812992095947266, "learning_rate": 9.776260504201682e-06, "loss": 24.1693, "step": 28375 }, { "epoch": 675.6208955223881, "grad_norm": 26.151081085205078, "learning_rate": 9.775910364145659e-06, "loss": 24.2871, "step": 28376 }, { "epoch": 675.644776119403, "grad_norm": 45.95269012451172, "learning_rate": 9.775560224089636e-06, "loss": 24.2997, "step": 28377 }, { "epoch": 675.6686567164179, "grad_norm": 31.296398162841797, "learning_rate": 9.775210084033613e-06, "loss": 23.8866, "step": 28378 }, { "epoch": 675.6925373134328, "grad_norm": 41.50266647338867, "learning_rate": 9.774859943977592e-06, "loss": 24.4063, "step": 28379 }, { "epoch": 675.7164179104477, "grad_norm": 31.85862922668457, "learning_rate": 9.77450980392157e-06, "loss": 23.6651, "step": 28380 }, { "epoch": 675.7402985074627, "grad_norm": 30.804162979125977, "learning_rate": 9.774159663865547e-06, "loss": 24.3112, "step": 28381 }, { "epoch": 675.7641791044776, "grad_norm": 41.860443115234375, "learning_rate": 9.773809523809524e-06, "loss": 24.5793, "step": 28382 }, { "epoch": 675.7880597014926, "grad_norm": 25.741579055786133, "learning_rate": 9.773459383753501e-06, "loss": 24.4333, "step": 28383 }, { "epoch": 675.8119402985075, "grad_norm": 38.76439666748047, "learning_rate": 9.77310924369748e-06, "loss": 23.7972, "step": 28384 }, { "epoch": 675.8358208955224, "grad_norm": 31.41914176940918, "learning_rate": 9.772759103641457e-06, "loss": 24.5369, "step": 28385 }, { "epoch": 675.8597014925373, "grad_norm": 33.311946868896484, "learning_rate": 9.772408963585435e-06, "loss": 23.7547, "step": 28386 }, { "epoch": 675.8835820895522, "grad_norm": 37.6534538269043, "learning_rate": 9.772058823529412e-06, "loss": 23.9112, "step": 28387 }, { "epoch": 675.9074626865672, "grad_norm": 29.303983688354492, "learning_rate": 9.77170868347339e-06, "loss": 25.7703, "step": 28388 }, { "epoch": 675.9313432835821, "grad_norm": 29.713428497314453, "learning_rate": 9.771358543417368e-06, "loss": 23.4813, "step": 28389 }, { "epoch": 675.955223880597, "grad_norm": 41.13805389404297, "learning_rate": 9.771008403361345e-06, "loss": 24.8416, "step": 28390 }, { "epoch": 675.9791044776119, "grad_norm": 24.55109977722168, "learning_rate": 9.770658263305323e-06, "loss": 24.1412, "step": 28391 }, { "epoch": 676.0, "grad_norm": 44.923095703125, "learning_rate": 9.7703081232493e-06, "loss": 22.0062, "step": 28392 }, { "epoch": 676.0238805970149, "grad_norm": 32.64291763305664, "learning_rate": 9.769957983193277e-06, "loss": 25.1426, "step": 28393 }, { "epoch": 676.0477611940298, "grad_norm": 48.520599365234375, "learning_rate": 9.769607843137256e-06, "loss": 24.744, "step": 28394 }, { "epoch": 676.0716417910447, "grad_norm": 37.770450592041016, "learning_rate": 9.769257703081233e-06, "loss": 24.9309, "step": 28395 }, { "epoch": 676.0955223880597, "grad_norm": 57.21852111816406, "learning_rate": 9.76890756302521e-06, "loss": 24.5767, "step": 28396 }, { "epoch": 676.1194029850747, "grad_norm": 40.606563568115234, "learning_rate": 9.768557422969188e-06, "loss": 24.4088, "step": 28397 }, { "epoch": 676.1432835820896, "grad_norm": 61.287559509277344, "learning_rate": 9.768207282913167e-06, "loss": 24.9941, "step": 28398 }, { "epoch": 676.1671641791045, "grad_norm": 51.546730041503906, "learning_rate": 9.767857142857144e-06, "loss": 24.3252, "step": 28399 }, { "epoch": 676.1910447761194, "grad_norm": 50.03056335449219, "learning_rate": 9.767507002801121e-06, "loss": 24.1035, "step": 28400 }, { "epoch": 676.2149253731343, "grad_norm": 53.97758483886719, "learning_rate": 9.767156862745098e-06, "loss": 24.83, "step": 28401 }, { "epoch": 676.2388059701492, "grad_norm": 40.89584732055664, "learning_rate": 9.766806722689076e-06, "loss": 23.8439, "step": 28402 }, { "epoch": 676.2626865671642, "grad_norm": 41.035945892333984, "learning_rate": 9.766456582633054e-06, "loss": 24.2145, "step": 28403 }, { "epoch": 676.2865671641791, "grad_norm": 51.07011413574219, "learning_rate": 9.766106442577032e-06, "loss": 24.4396, "step": 28404 }, { "epoch": 676.310447761194, "grad_norm": 39.405540466308594, "learning_rate": 9.765756302521009e-06, "loss": 24.3618, "step": 28405 }, { "epoch": 676.334328358209, "grad_norm": 52.54141616821289, "learning_rate": 9.765406162464986e-06, "loss": 23.8247, "step": 28406 }, { "epoch": 676.3582089552239, "grad_norm": 45.63829803466797, "learning_rate": 9.765056022408963e-06, "loss": 23.9749, "step": 28407 }, { "epoch": 676.3820895522388, "grad_norm": 51.59364700317383, "learning_rate": 9.764705882352942e-06, "loss": 24.2659, "step": 28408 }, { "epoch": 676.4059701492537, "grad_norm": 44.24338150024414, "learning_rate": 9.76435574229692e-06, "loss": 23.2941, "step": 28409 }, { "epoch": 676.4298507462687, "grad_norm": 45.5228271484375, "learning_rate": 9.764005602240897e-06, "loss": 23.9636, "step": 28410 }, { "epoch": 676.4537313432836, "grad_norm": 39.87224578857422, "learning_rate": 9.763655462184874e-06, "loss": 23.8539, "step": 28411 }, { "epoch": 676.4776119402985, "grad_norm": 42.25913619995117, "learning_rate": 9.763305322128851e-06, "loss": 23.3289, "step": 28412 }, { "epoch": 676.5014925373134, "grad_norm": 37.304630279541016, "learning_rate": 9.76295518207283e-06, "loss": 24.257, "step": 28413 }, { "epoch": 676.5253731343283, "grad_norm": 48.69265365600586, "learning_rate": 9.762605042016807e-06, "loss": 22.7033, "step": 28414 }, { "epoch": 676.5492537313432, "grad_norm": 44.31386184692383, "learning_rate": 9.762254901960785e-06, "loss": 24.2443, "step": 28415 }, { "epoch": 676.5731343283583, "grad_norm": 44.67386245727539, "learning_rate": 9.761904761904762e-06, "loss": 24.8551, "step": 28416 }, { "epoch": 676.5970149253732, "grad_norm": 45.94630813598633, "learning_rate": 9.761554621848741e-06, "loss": 24.7467, "step": 28417 }, { "epoch": 676.6208955223881, "grad_norm": 44.59523010253906, "learning_rate": 9.761204481792718e-06, "loss": 24.0735, "step": 28418 }, { "epoch": 676.644776119403, "grad_norm": 39.844085693359375, "learning_rate": 9.760854341736695e-06, "loss": 24.358, "step": 28419 }, { "epoch": 676.6686567164179, "grad_norm": 48.74398422241211, "learning_rate": 9.760504201680673e-06, "loss": 23.6283, "step": 28420 }, { "epoch": 676.6925373134328, "grad_norm": 40.46718215942383, "learning_rate": 9.76015406162465e-06, "loss": 24.3259, "step": 28421 }, { "epoch": 676.7164179104477, "grad_norm": NaN, "learning_rate": 9.759803921568629e-06, "loss": 29.9286, "step": 28422 }, { "epoch": 676.7402985074627, "grad_norm": 56.491127014160156, "learning_rate": 9.759803921568629e-06, "loss": 23.9119, "step": 28423 }, { "epoch": 676.7641791044776, "grad_norm": 55.08971405029297, "learning_rate": 9.759453781512606e-06, "loss": 23.4104, "step": 28424 }, { "epoch": 676.7880597014926, "grad_norm": 42.598716735839844, "learning_rate": 9.759103641456583e-06, "loss": 23.6785, "step": 28425 }, { "epoch": 676.8119402985075, "grad_norm": 36.51124954223633, "learning_rate": 9.75875350140056e-06, "loss": 24.3286, "step": 28426 }, { "epoch": 676.8358208955224, "grad_norm": 40.8990478515625, "learning_rate": 9.758403361344538e-06, "loss": 24.5424, "step": 28427 }, { "epoch": 676.8597014925373, "grad_norm": 36.00132751464844, "learning_rate": 9.758053221288517e-06, "loss": 25.3569, "step": 28428 }, { "epoch": 676.8835820895522, "grad_norm": 52.73878479003906, "learning_rate": 9.757703081232494e-06, "loss": 24.2085, "step": 28429 }, { "epoch": 676.9074626865672, "grad_norm": 45.86772918701172, "learning_rate": 9.757352941176471e-06, "loss": 24.3144, "step": 28430 }, { "epoch": 676.9313432835821, "grad_norm": 49.9136962890625, "learning_rate": 9.757002801120448e-06, "loss": 23.8861, "step": 28431 }, { "epoch": 676.955223880597, "grad_norm": 43.43037796020508, "learning_rate": 9.756652661064426e-06, "loss": 25.067, "step": 28432 }, { "epoch": 676.9791044776119, "grad_norm": 51.324989318847656, "learning_rate": 9.756302521008404e-06, "loss": 25.0838, "step": 28433 }, { "epoch": 677.0, "grad_norm": 40.42143249511719, "learning_rate": 9.755952380952382e-06, "loss": 22.6757, "step": 28434 }, { "epoch": 677.0238805970149, "grad_norm": 43.5252571105957, "learning_rate": 9.755602240896359e-06, "loss": 23.9598, "step": 28435 }, { "epoch": 677.0477611940298, "grad_norm": 37.86907958984375, "learning_rate": 9.755252100840336e-06, "loss": 24.4964, "step": 28436 }, { "epoch": 677.0716417910447, "grad_norm": 47.04248046875, "learning_rate": 9.754901960784315e-06, "loss": 25.5088, "step": 28437 }, { "epoch": 677.0955223880597, "grad_norm": 39.31808853149414, "learning_rate": 9.754551820728292e-06, "loss": 23.7963, "step": 28438 }, { "epoch": 677.1194029850747, "grad_norm": 52.718448638916016, "learning_rate": 9.75420168067227e-06, "loss": 25.3628, "step": 28439 }, { "epoch": 677.1432835820896, "grad_norm": 47.90808868408203, "learning_rate": 9.753851540616247e-06, "loss": 24.5083, "step": 28440 }, { "epoch": 677.1671641791045, "grad_norm": 43.605838775634766, "learning_rate": 9.753501400560224e-06, "loss": 24.4626, "step": 28441 }, { "epoch": 677.1910447761194, "grad_norm": 42.77262878417969, "learning_rate": 9.753151260504203e-06, "loss": 24.0338, "step": 28442 }, { "epoch": 677.2149253731343, "grad_norm": 47.083099365234375, "learning_rate": 9.75280112044818e-06, "loss": 24.9058, "step": 28443 }, { "epoch": 677.2388059701492, "grad_norm": 37.35990905761719, "learning_rate": 9.752450980392157e-06, "loss": 24.7475, "step": 28444 }, { "epoch": 677.2626865671642, "grad_norm": 49.00994873046875, "learning_rate": 9.752100840336135e-06, "loss": 25.8086, "step": 28445 }, { "epoch": 677.2865671641791, "grad_norm": 48.19622802734375, "learning_rate": 9.751750700280112e-06, "loss": 25.0579, "step": 28446 }, { "epoch": 677.310447761194, "grad_norm": 42.074195861816406, "learning_rate": 9.751400560224091e-06, "loss": 24.971, "step": 28447 }, { "epoch": 677.334328358209, "grad_norm": 39.55786895751953, "learning_rate": 9.751050420168068e-06, "loss": 24.51, "step": 28448 }, { "epoch": 677.3582089552239, "grad_norm": 45.00484848022461, "learning_rate": 9.750700280112045e-06, "loss": 24.826, "step": 28449 }, { "epoch": 677.3820895522388, "grad_norm": 39.21566390991211, "learning_rate": 9.750350140056023e-06, "loss": 24.8378, "step": 28450 }, { "epoch": 677.4059701492537, "grad_norm": 46.10448455810547, "learning_rate": 9.75e-06, "loss": 24.8089, "step": 28451 }, { "epoch": 677.4298507462687, "grad_norm": 39.43421173095703, "learning_rate": 9.749649859943979e-06, "loss": 24.2331, "step": 28452 }, { "epoch": 677.4537313432836, "grad_norm": 50.73710632324219, "learning_rate": 9.749299719887956e-06, "loss": 25.7691, "step": 28453 }, { "epoch": 677.4776119402985, "grad_norm": 43.65921401977539, "learning_rate": 9.748949579831933e-06, "loss": 25.5132, "step": 28454 }, { "epoch": 677.5014925373134, "grad_norm": 43.80561065673828, "learning_rate": 9.74859943977591e-06, "loss": 24.506, "step": 28455 }, { "epoch": 677.5253731343283, "grad_norm": 42.193565368652344, "learning_rate": 9.748249299719888e-06, "loss": 25.2331, "step": 28456 }, { "epoch": 677.5492537313432, "grad_norm": NaN, "learning_rate": 9.747899159663867e-06, "loss": 24.9356, "step": 28457 }, { "epoch": 677.5731343283583, "grad_norm": 38.84444046020508, "learning_rate": 9.747899159663867e-06, "loss": 24.4996, "step": 28458 }, { "epoch": 677.5970149253732, "grad_norm": 39.62822341918945, "learning_rate": 9.747549019607844e-06, "loss": 24.8309, "step": 28459 }, { "epoch": 677.6208955223881, "grad_norm": 36.04973220825195, "learning_rate": 9.747198879551821e-06, "loss": 26.0292, "step": 28460 }, { "epoch": 677.644776119403, "grad_norm": 27.25406265258789, "learning_rate": 9.746848739495798e-06, "loss": 25.7023, "step": 28461 }, { "epoch": 677.6686567164179, "grad_norm": 47.098140716552734, "learning_rate": 9.746498599439777e-06, "loss": 26.1586, "step": 28462 }, { "epoch": 677.6925373134328, "grad_norm": 30.754005432128906, "learning_rate": 9.746148459383755e-06, "loss": 26.0331, "step": 28463 }, { "epoch": 677.7164179104477, "grad_norm": 55.54644775390625, "learning_rate": 9.745798319327732e-06, "loss": 25.9971, "step": 28464 }, { "epoch": 677.7402985074627, "grad_norm": 46.47118377685547, "learning_rate": 9.745448179271709e-06, "loss": 26.2589, "step": 28465 }, { "epoch": 677.7641791044776, "grad_norm": 43.573944091796875, "learning_rate": 9.745098039215686e-06, "loss": 26.0113, "step": 28466 }, { "epoch": 677.7880597014926, "grad_norm": 41.6529426574707, "learning_rate": 9.744747899159665e-06, "loss": 26.9343, "step": 28467 }, { "epoch": 677.8119402985075, "grad_norm": 48.42827606201172, "learning_rate": 9.744397759103642e-06, "loss": 26.288, "step": 28468 }, { "epoch": 677.8358208955224, "grad_norm": 35.742000579833984, "learning_rate": 9.74404761904762e-06, "loss": 26.3073, "step": 28469 }, { "epoch": 677.8597014925373, "grad_norm": 50.31163024902344, "learning_rate": 9.743697478991597e-06, "loss": 25.8236, "step": 28470 }, { "epoch": 677.8835820895522, "grad_norm": 37.03639221191406, "learning_rate": 9.743347338935574e-06, "loss": 24.8394, "step": 28471 }, { "epoch": 677.9074626865672, "grad_norm": 52.99988555908203, "learning_rate": 9.742997198879553e-06, "loss": 24.6915, "step": 28472 }, { "epoch": 677.9313432835821, "grad_norm": 42.52507400512695, "learning_rate": 9.74264705882353e-06, "loss": 24.5775, "step": 28473 }, { "epoch": 677.955223880597, "grad_norm": 55.55000305175781, "learning_rate": 9.742296918767507e-06, "loss": 24.5726, "step": 28474 }, { "epoch": 677.9791044776119, "grad_norm": 46.04210662841797, "learning_rate": 9.741946778711485e-06, "loss": 26.8571, "step": 28475 }, { "epoch": 678.0, "grad_norm": 35.14178466796875, "learning_rate": 9.741596638655462e-06, "loss": 22.4404, "step": 28476 }, { "epoch": 678.0238805970149, "grad_norm": 42.73740005493164, "learning_rate": 9.741246498599441e-06, "loss": 25.1118, "step": 28477 }, { "epoch": 678.0477611940298, "grad_norm": 41.48872756958008, "learning_rate": 9.740896358543418e-06, "loss": 25.1775, "step": 28478 }, { "epoch": 678.0716417910447, "grad_norm": 34.68490219116211, "learning_rate": 9.740546218487395e-06, "loss": 26.41, "step": 28479 }, { "epoch": 678.0955223880597, "grad_norm": 54.41889953613281, "learning_rate": 9.740196078431373e-06, "loss": 25.0872, "step": 28480 }, { "epoch": 678.1194029850747, "grad_norm": 42.669490814208984, "learning_rate": 9.739845938375352e-06, "loss": 26.2398, "step": 28481 }, { "epoch": 678.1432835820896, "grad_norm": 49.76118850708008, "learning_rate": 9.739495798319329e-06, "loss": 26.0059, "step": 28482 }, { "epoch": 678.1671641791045, "grad_norm": 42.93458938598633, "learning_rate": 9.739145658263306e-06, "loss": 24.9575, "step": 28483 }, { "epoch": 678.1910447761194, "grad_norm": 44.209651947021484, "learning_rate": 9.738795518207283e-06, "loss": 25.8121, "step": 28484 }, { "epoch": 678.2149253731343, "grad_norm": 33.849159240722656, "learning_rate": 9.73844537815126e-06, "loss": 24.5543, "step": 28485 }, { "epoch": 678.2388059701492, "grad_norm": 54.430572509765625, "learning_rate": 9.73809523809524e-06, "loss": 25.7405, "step": 28486 }, { "epoch": 678.2626865671642, "grad_norm": 46.821292877197266, "learning_rate": 9.737745098039217e-06, "loss": 25.1865, "step": 28487 }, { "epoch": 678.2865671641791, "grad_norm": 46.43588638305664, "learning_rate": 9.737394957983194e-06, "loss": 26.3932, "step": 28488 }, { "epoch": 678.310447761194, "grad_norm": 39.801631927490234, "learning_rate": 9.737044817927171e-06, "loss": 25.6416, "step": 28489 }, { "epoch": 678.334328358209, "grad_norm": 46.59560775756836, "learning_rate": 9.736694677871148e-06, "loss": 25.0184, "step": 28490 }, { "epoch": 678.3582089552239, "grad_norm": 35.331809997558594, "learning_rate": 9.736344537815127e-06, "loss": 25.7827, "step": 28491 }, { "epoch": 678.3820895522388, "grad_norm": NaN, "learning_rate": 9.735994397759105e-06, "loss": 23.5183, "step": 28492 }, { "epoch": 678.4059701492537, "grad_norm": 73.0634765625, "learning_rate": 9.735994397759105e-06, "loss": 25.6454, "step": 28493 }, { "epoch": 678.4298507462687, "grad_norm": 67.00900268554688, "learning_rate": 9.735644257703082e-06, "loss": 27.1655, "step": 28494 }, { "epoch": 678.4537313432836, "grad_norm": 33.46862030029297, "learning_rate": 9.735294117647059e-06, "loss": 27.068, "step": 28495 }, { "epoch": 678.4776119402985, "grad_norm": 46.64191436767578, "learning_rate": 9.734943977591036e-06, "loss": 27.6555, "step": 28496 }, { "epoch": 678.5014925373134, "grad_norm": 29.740318298339844, "learning_rate": 9.734593837535015e-06, "loss": 27.2362, "step": 28497 }, { "epoch": 678.5253731343283, "grad_norm": 34.19194412231445, "learning_rate": 9.734243697478992e-06, "loss": 25.9847, "step": 28498 }, { "epoch": 678.5492537313432, "grad_norm": 41.04691696166992, "learning_rate": 9.73389355742297e-06, "loss": 27.3855, "step": 28499 }, { "epoch": 678.5731343283583, "grad_norm": 21.853052139282227, "learning_rate": 9.733543417366947e-06, "loss": 26.6311, "step": 28500 }, { "epoch": 678.5970149253732, "grad_norm": 47.24453353881836, "learning_rate": 9.733193277310926e-06, "loss": 26.6141, "step": 28501 }, { "epoch": 678.6208955223881, "grad_norm": 26.65744972229004, "learning_rate": 9.732843137254903e-06, "loss": 25.9028, "step": 28502 }, { "epoch": 678.644776119403, "grad_norm": 41.11772155761719, "learning_rate": 9.73249299719888e-06, "loss": 27.7216, "step": 28503 }, { "epoch": 678.6686567164179, "grad_norm": 29.159067153930664, "learning_rate": 9.732142857142858e-06, "loss": 27.6677, "step": 28504 }, { "epoch": 678.6925373134328, "grad_norm": 37.240875244140625, "learning_rate": 9.731792717086835e-06, "loss": 27.8009, "step": 28505 }, { "epoch": 678.7164179104477, "grad_norm": 28.934986114501953, "learning_rate": 9.731442577030814e-06, "loss": 27.4776, "step": 28506 }, { "epoch": 678.7402985074627, "grad_norm": 35.624507904052734, "learning_rate": 9.731092436974791e-06, "loss": 27.7336, "step": 28507 }, { "epoch": 678.7641791044776, "grad_norm": 28.00650978088379, "learning_rate": 9.730742296918768e-06, "loss": 25.9923, "step": 28508 }, { "epoch": 678.7880597014926, "grad_norm": 40.11753463745117, "learning_rate": 9.730392156862745e-06, "loss": 27.1863, "step": 28509 }, { "epoch": 678.8119402985075, "grad_norm": 35.49860382080078, "learning_rate": 9.730042016806723e-06, "loss": 26.9167, "step": 28510 }, { "epoch": 678.8358208955224, "grad_norm": 35.60037612915039, "learning_rate": 9.729691876750702e-06, "loss": 26.4098, "step": 28511 }, { "epoch": 678.8597014925373, "grad_norm": 30.431344985961914, "learning_rate": 9.729341736694679e-06, "loss": 27.0116, "step": 28512 }, { "epoch": 678.8835820895522, "grad_norm": 35.540367126464844, "learning_rate": 9.728991596638656e-06, "loss": 26.8558, "step": 28513 }, { "epoch": 678.9074626865672, "grad_norm": 27.98164176940918, "learning_rate": 9.728641456582633e-06, "loss": 27.9529, "step": 28514 }, { "epoch": 678.9313432835821, "grad_norm": 33.11812973022461, "learning_rate": 9.72829131652661e-06, "loss": 27.23, "step": 28515 }, { "epoch": 678.955223880597, "grad_norm": 28.967010498046875, "learning_rate": 9.72794117647059e-06, "loss": 26.8563, "step": 28516 }, { "epoch": 678.9791044776119, "grad_norm": 30.896198272705078, "learning_rate": 9.727591036414567e-06, "loss": 27.1751, "step": 28517 }, { "epoch": 679.0, "grad_norm": 23.41072654724121, "learning_rate": 9.727240896358544e-06, "loss": 23.8542, "step": 28518 }, { "epoch": 679.0238805970149, "grad_norm": NaN, "learning_rate": 9.726890756302521e-06, "loss": 46.1154, "step": 28519 }, { "epoch": 679.0477611940298, "grad_norm": 36.03982925415039, "learning_rate": 9.726890756302521e-06, "loss": 27.8789, "step": 28520 }, { "epoch": 679.0716417910447, "grad_norm": 56.71711730957031, "learning_rate": 9.7265406162465e-06, "loss": 28.6568, "step": 28521 }, { "epoch": 679.0955223880597, "grad_norm": 45.4666862487793, "learning_rate": 9.726190476190477e-06, "loss": 28.2803, "step": 28522 }, { "epoch": 679.1194029850747, "grad_norm": 44.701114654541016, "learning_rate": 9.725840336134455e-06, "loss": 27.7593, "step": 28523 }, { "epoch": 679.1432835820896, "grad_norm": 36.97593688964844, "learning_rate": 9.725490196078432e-06, "loss": 27.3984, "step": 28524 }, { "epoch": 679.1671641791045, "grad_norm": 58.6756477355957, "learning_rate": 9.725140056022409e-06, "loss": 27.9457, "step": 28525 }, { "epoch": 679.1910447761194, "grad_norm": 41.32433319091797, "learning_rate": 9.724789915966388e-06, "loss": 28.8127, "step": 28526 }, { "epoch": 679.2149253731343, "grad_norm": 71.30577087402344, "learning_rate": 9.724439775910365e-06, "loss": 28.7487, "step": 28527 }, { "epoch": 679.2388059701492, "grad_norm": 65.57572937011719, "learning_rate": 9.724089635854342e-06, "loss": 28.1174, "step": 28528 }, { "epoch": 679.2626865671642, "grad_norm": 46.74290084838867, "learning_rate": 9.72373949579832e-06, "loss": 27.822, "step": 28529 }, { "epoch": 679.2865671641791, "grad_norm": 44.51557159423828, "learning_rate": 9.723389355742297e-06, "loss": 27.3696, "step": 28530 }, { "epoch": 679.310447761194, "grad_norm": 59.60905456542969, "learning_rate": 9.723039215686276e-06, "loss": 27.9224, "step": 28531 }, { "epoch": 679.334328358209, "grad_norm": 43.48095703125, "learning_rate": 9.722689075630253e-06, "loss": 27.9182, "step": 28532 }, { "epoch": 679.3582089552239, "grad_norm": 60.71643829345703, "learning_rate": 9.72233893557423e-06, "loss": 28.9646, "step": 28533 }, { "epoch": 679.3820895522388, "grad_norm": 59.57084655761719, "learning_rate": 9.721988795518208e-06, "loss": 28.38, "step": 28534 }, { "epoch": 679.4059701492537, "grad_norm": 50.71586608886719, "learning_rate": 9.721638655462185e-06, "loss": 28.2359, "step": 28535 }, { "epoch": 679.4298507462687, "grad_norm": 42.589393615722656, "learning_rate": 9.721288515406164e-06, "loss": 26.8758, "step": 28536 }, { "epoch": 679.4537313432836, "grad_norm": 52.43650817871094, "learning_rate": 9.720938375350141e-06, "loss": 26.8315, "step": 28537 }, { "epoch": 679.4776119402985, "grad_norm": 50.3517951965332, "learning_rate": 9.720588235294118e-06, "loss": 28.4556, "step": 28538 }, { "epoch": 679.5014925373134, "grad_norm": 58.947914123535156, "learning_rate": 9.720238095238095e-06, "loss": 27.8361, "step": 28539 }, { "epoch": 679.5253731343283, "grad_norm": 60.92778778076172, "learning_rate": 9.719887955182074e-06, "loss": 28.5258, "step": 28540 }, { "epoch": 679.5492537313432, "grad_norm": 56.74370193481445, "learning_rate": 9.719537815126052e-06, "loss": 27.8257, "step": 28541 }, { "epoch": 679.5731343283583, "grad_norm": 51.7234992980957, "learning_rate": 9.719187675070029e-06, "loss": 27.6561, "step": 28542 }, { "epoch": 679.5970149253732, "grad_norm": 58.82917785644531, "learning_rate": 9.718837535014006e-06, "loss": 29.7257, "step": 28543 }, { "epoch": 679.6208955223881, "grad_norm": 46.784358978271484, "learning_rate": 9.718487394957983e-06, "loss": 28.1122, "step": 28544 }, { "epoch": 679.644776119403, "grad_norm": 65.30654907226562, "learning_rate": 9.718137254901962e-06, "loss": 27.5789, "step": 28545 }, { "epoch": 679.6686567164179, "grad_norm": 54.21125793457031, "learning_rate": 9.71778711484594e-06, "loss": 29.5674, "step": 28546 }, { "epoch": 679.6925373134328, "grad_norm": 54.26139831542969, "learning_rate": 9.717436974789917e-06, "loss": 27.7989, "step": 28547 }, { "epoch": 679.7164179104477, "grad_norm": 46.51760482788086, "learning_rate": 9.717086834733894e-06, "loss": 28.0889, "step": 28548 }, { "epoch": 679.7402985074627, "grad_norm": 58.02479553222656, "learning_rate": 9.716736694677871e-06, "loss": 29.6671, "step": 28549 }, { "epoch": 679.7641791044776, "grad_norm": 44.72018051147461, "learning_rate": 9.71638655462185e-06, "loss": 29.8742, "step": 28550 }, { "epoch": 679.7880597014926, "grad_norm": 59.23502731323242, "learning_rate": 9.716036414565827e-06, "loss": 28.1377, "step": 28551 }, { "epoch": 679.8119402985075, "grad_norm": 47.997802734375, "learning_rate": 9.715686274509805e-06, "loss": 28.0793, "step": 28552 }, { "epoch": 679.8358208955224, "grad_norm": 54.80280685424805, "learning_rate": 9.715336134453782e-06, "loss": 27.9564, "step": 28553 }, { "epoch": 679.8597014925373, "grad_norm": 56.01045227050781, "learning_rate": 9.714985994397759e-06, "loss": 29.4609, "step": 28554 }, { "epoch": 679.8835820895522, "grad_norm": 49.956024169921875, "learning_rate": 9.714635854341738e-06, "loss": 27.7067, "step": 28555 }, { "epoch": 679.9074626865672, "grad_norm": 48.206668853759766, "learning_rate": 9.714285714285715e-06, "loss": 29.1716, "step": 28556 }, { "epoch": 679.9313432835821, "grad_norm": 49.9427375793457, "learning_rate": 9.713935574229692e-06, "loss": 27.6385, "step": 28557 }, { "epoch": 679.955223880597, "grad_norm": 49.640621185302734, "learning_rate": 9.71358543417367e-06, "loss": 27.9321, "step": 28558 }, { "epoch": 679.9791044776119, "grad_norm": 57.95891571044922, "learning_rate": 9.713235294117649e-06, "loss": 29.005, "step": 28559 }, { "epoch": 680.0, "grad_norm": 41.629615783691406, "learning_rate": 9.712885154061626e-06, "loss": 23.9829, "step": 28560 }, { "epoch": 680.0, "step": 28560, "total_flos": 1.4039097966031004e+18, "train_loss": 0.7132285186222621, "train_runtime": 25609.4998, "train_samples_per_second": 142.11, "train_steps_per_second": 1.115 }, { "epoch": 680.0238805970149, "grad_norm": NaN, "learning_rate": 1e-05, "loss": 39.0296, "step": 28561 }, { "epoch": 680.0477611940298, "grad_norm": 172.53317260742188, "learning_rate": 1e-05, "loss": 28.8144, "step": 28562 }, { "epoch": 680.0716417910447, "grad_norm": Infinity, "learning_rate": 9.99965986394558e-06, "loss": 38.7396, "step": 28563 }, { "epoch": 680.0955223880597, "grad_norm": 711.681640625, "learning_rate": 9.99965986394558e-06, "loss": 38.662, "step": 28564 }, { "epoch": 680.1194029850747, "grad_norm": 393.78155517578125, "learning_rate": 9.999319727891158e-06, "loss": 32.6619, "step": 28565 }, { "epoch": 680.1432835820896, "grad_norm": 124.72893524169922, "learning_rate": 9.998979591836736e-06, "loss": 29.1967, "step": 28566 }, { "epoch": 680.1671641791045, "grad_norm": 166.9690399169922, "learning_rate": 9.998639455782314e-06, "loss": 27.362, "step": 28567 }, { "epoch": 680.1910447761194, "grad_norm": 133.78500366210938, "learning_rate": 9.998299319727893e-06, "loss": 28.0709, "step": 28568 }, { "epoch": 680.2149253731343, "grad_norm": 81.23767852783203, "learning_rate": 9.99795918367347e-06, "loss": 26.3403, "step": 28569 }, { "epoch": 680.2388059701492, "grad_norm": 64.27835083007812, "learning_rate": 9.997619047619048e-06, "loss": 26.2768, "step": 28570 }, { "epoch": 680.2626865671642, "grad_norm": 59.35773849487305, "learning_rate": 9.997278911564626e-06, "loss": 25.7791, "step": 28571 }, { "epoch": 680.2865671641791, "grad_norm": 48.699302673339844, "learning_rate": 9.996938775510205e-06, "loss": 25.8547, "step": 28572 }, { "epoch": 680.310447761194, "grad_norm": 50.62760925292969, "learning_rate": 9.996598639455783e-06, "loss": 26.2303, "step": 28573 }, { "epoch": 680.334328358209, "grad_norm": 45.13276290893555, "learning_rate": 9.996258503401362e-06, "loss": 26.5602, "step": 28574 }, { "epoch": 680.3582089552239, "grad_norm": 36.801856994628906, "learning_rate": 9.99591836734694e-06, "loss": 25.7953, "step": 28575 }, { "epoch": 680.3820895522388, "grad_norm": 34.50274658203125, "learning_rate": 9.995578231292517e-06, "loss": 24.7594, "step": 28576 }, { "epoch": 680.4059701492537, "grad_norm": 35.470394134521484, "learning_rate": 9.995238095238095e-06, "loss": 25.3314, "step": 28577 }, { "epoch": 680.4298507462687, "grad_norm": 35.67211151123047, "learning_rate": 9.994897959183675e-06, "loss": 25.9802, "step": 28578 }, { "epoch": 680.4537313432836, "grad_norm": 30.44239616394043, "learning_rate": 9.994557823129252e-06, "loss": 25.2293, "step": 28579 }, { "epoch": 680.4776119402985, "grad_norm": 38.773170471191406, "learning_rate": 9.99421768707483e-06, "loss": 25.2444, "step": 28580 }, { "epoch": 680.5014925373134, "grad_norm": 37.478050231933594, "learning_rate": 9.993877551020409e-06, "loss": 23.6805, "step": 28581 }, { "epoch": 680.5253731343283, "grad_norm": 28.686569213867188, "learning_rate": 9.993537414965987e-06, "loss": 25.0272, "step": 28582 }, { "epoch": 680.5492537313432, "grad_norm": 34.217464447021484, "learning_rate": 9.993197278911566e-06, "loss": 23.9591, "step": 28583 }, { "epoch": 680.5731343283583, "grad_norm": 31.413742065429688, "learning_rate": 9.992857142857144e-06, "loss": 24.9712, "step": 28584 }, { "epoch": 680.5970149253732, "grad_norm": 26.54638671875, "learning_rate": 9.992517006802723e-06, "loss": 24.7573, "step": 28585 }, { "epoch": 680.6208955223881, "grad_norm": 25.328115463256836, "learning_rate": 9.992176870748301e-06, "loss": 24.1041, "step": 28586 }, { "epoch": 680.644776119403, "grad_norm": 25.97628402709961, "learning_rate": 9.991836734693878e-06, "loss": 23.6751, "step": 28587 }, { "epoch": 680.6686567164179, "grad_norm": 26.06534194946289, "learning_rate": 9.991496598639456e-06, "loss": 23.5802, "step": 28588 }, { "epoch": 680.6925373134328, "grad_norm": 24.20393943786621, "learning_rate": 9.991156462585035e-06, "loss": 23.8208, "step": 28589 }, { "epoch": 680.7164179104477, "grad_norm": NaN, "learning_rate": 9.990816326530613e-06, "loss": 39.1017, "step": 28590 }, { "epoch": 680.7402985074627, "grad_norm": 29.364845275878906, "learning_rate": 9.990816326530613e-06, "loss": 23.7206, "step": 28591 }, { "epoch": 680.7641791044776, "grad_norm": 27.06854820251465, "learning_rate": 9.990476190476191e-06, "loss": 23.8936, "step": 28592 }, { "epoch": 680.7880597014926, "grad_norm": 25.268413543701172, "learning_rate": 9.99013605442177e-06, "loss": 23.9245, "step": 28593 }, { "epoch": 680.8119402985075, "grad_norm": 25.14745330810547, "learning_rate": 9.989795918367348e-06, "loss": 23.9592, "step": 28594 }, { "epoch": 680.8358208955224, "grad_norm": 29.528573989868164, "learning_rate": 9.989455782312925e-06, "loss": 24.6224, "step": 28595 }, { "epoch": 680.8597014925373, "grad_norm": 25.012426376342773, "learning_rate": 9.989115646258503e-06, "loss": 23.0661, "step": 28596 }, { "epoch": 680.8835820895522, "grad_norm": 22.714475631713867, "learning_rate": 9.988775510204084e-06, "loss": 22.9349, "step": 28597 }, { "epoch": 680.9074626865672, "grad_norm": 33.528812408447266, "learning_rate": 9.98843537414966e-06, "loss": 24.8574, "step": 28598 }, { "epoch": 680.9313432835821, "grad_norm": 26.331222534179688, "learning_rate": 9.988095238095239e-06, "loss": 23.2101, "step": 28599 }, { "epoch": 680.955223880597, "grad_norm": 29.997474670410156, "learning_rate": 9.987755102040817e-06, "loss": 24.8248, "step": 28600 }, { "epoch": 680.9791044776119, "grad_norm": 36.36250305175781, "learning_rate": 9.987414965986396e-06, "loss": 24.2421, "step": 28601 }, { "epoch": 681.0, "grad_norm": 27.042272567749023, "learning_rate": 9.987074829931972e-06, "loss": 21.3873, "step": 28602 }, { "epoch": 681.0238805970149, "grad_norm": 29.709016799926758, "learning_rate": 9.986734693877552e-06, "loss": 23.6949, "step": 28603 }, { "epoch": 681.0477611940298, "grad_norm": 39.276695251464844, "learning_rate": 9.98639455782313e-06, "loss": 24.2106, "step": 28604 }, { "epoch": 681.0716417910447, "grad_norm": 24.6978816986084, "learning_rate": 9.986054421768708e-06, "loss": 24.0097, "step": 28605 }, { "epoch": 681.0955223880597, "grad_norm": 34.929283142089844, "learning_rate": 9.985714285714286e-06, "loss": 22.8073, "step": 28606 }, { "epoch": 681.1194029850747, "grad_norm": 29.170747756958008, "learning_rate": 9.985374149659864e-06, "loss": 23.956, "step": 28607 }, { "epoch": 681.1432835820896, "grad_norm": 32.939334869384766, "learning_rate": 9.985034013605443e-06, "loss": 23.9158, "step": 28608 }, { "epoch": 681.1671641791045, "grad_norm": 29.28949737548828, "learning_rate": 9.984693877551021e-06, "loss": 22.4841, "step": 28609 }, { "epoch": 681.1910447761194, "grad_norm": 31.042869567871094, "learning_rate": 9.9843537414966e-06, "loss": 23.7701, "step": 28610 }, { "epoch": 681.2149253731343, "grad_norm": 27.796606063842773, "learning_rate": 9.984013605442178e-06, "loss": 22.5193, "step": 28611 }, { "epoch": 681.2388059701492, "grad_norm": 33.601348876953125, "learning_rate": 9.983673469387756e-06, "loss": 22.94, "step": 28612 }, { "epoch": 681.2626865671642, "grad_norm": 26.408971786499023, "learning_rate": 9.983333333333333e-06, "loss": 23.7933, "step": 28613 }, { "epoch": 681.2865671641791, "grad_norm": 26.81049919128418, "learning_rate": 9.982993197278913e-06, "loss": 23.4816, "step": 28614 }, { "epoch": 681.310447761194, "grad_norm": 33.103851318359375, "learning_rate": 9.982653061224492e-06, "loss": 24.272, "step": 28615 }, { "epoch": 681.334328358209, "grad_norm": 26.05010223388672, "learning_rate": 9.982312925170068e-06, "loss": 24.4315, "step": 28616 }, { "epoch": 681.3582089552239, "grad_norm": 27.762937545776367, "learning_rate": 9.981972789115647e-06, "loss": 24.8738, "step": 28617 }, { "epoch": 681.3820895522388, "grad_norm": 32.97795486450195, "learning_rate": 9.981632653061225e-06, "loss": 23.7692, "step": 28618 }, { "epoch": 681.4059701492537, "grad_norm": 24.92902183532715, "learning_rate": 9.981292517006804e-06, "loss": 23.9053, "step": 28619 }, { "epoch": 681.4298507462687, "grad_norm": 25.67403793334961, "learning_rate": 9.980952380952382e-06, "loss": 23.0696, "step": 28620 }, { "epoch": 681.4537313432836, "grad_norm": 26.5574893951416, "learning_rate": 9.98061224489796e-06, "loss": 22.9882, "step": 28621 }, { "epoch": 681.4776119402985, "grad_norm": 25.831443786621094, "learning_rate": 9.980272108843539e-06, "loss": 23.9185, "step": 28622 }, { "epoch": 681.5014925373134, "grad_norm": 25.212665557861328, "learning_rate": 9.979931972789116e-06, "loss": 23.5281, "step": 28623 }, { "epoch": 681.5253731343283, "grad_norm": 36.33821105957031, "learning_rate": 9.979591836734694e-06, "loss": 23.0672, "step": 28624 }, { "epoch": 681.5492537313432, "grad_norm": 30.03854751586914, "learning_rate": 9.979251700680273e-06, "loss": 23.3173, "step": 28625 }, { "epoch": 681.5731343283583, "grad_norm": 28.92542839050293, "learning_rate": 9.978911564625851e-06, "loss": 24.0376, "step": 28626 }, { "epoch": 681.5970149253732, "grad_norm": 30.687612533569336, "learning_rate": 9.97857142857143e-06, "loss": 23.8491, "step": 28627 }, { "epoch": 681.6208955223881, "grad_norm": 27.21717071533203, "learning_rate": 9.978231292517008e-06, "loss": 22.9388, "step": 28628 }, { "epoch": 681.644776119403, "grad_norm": 29.117448806762695, "learning_rate": 9.977891156462586e-06, "loss": 24.2257, "step": 28629 }, { "epoch": 681.6686567164179, "grad_norm": 30.735639572143555, "learning_rate": 9.977551020408165e-06, "loss": 24.524, "step": 28630 }, { "epoch": 681.6925373134328, "grad_norm": 27.4285831451416, "learning_rate": 9.977210884353741e-06, "loss": 23.5696, "step": 28631 }, { "epoch": 681.7164179104477, "grad_norm": 28.390178680419922, "learning_rate": 9.976870748299321e-06, "loss": 23.519, "step": 28632 }, { "epoch": 681.7402985074627, "grad_norm": 29.70440101623535, "learning_rate": 9.9765306122449e-06, "loss": 23.1855, "step": 28633 }, { "epoch": 681.7641791044776, "grad_norm": 28.802183151245117, "learning_rate": 9.976190476190477e-06, "loss": 23.618, "step": 28634 }, { "epoch": 681.7880597014926, "grad_norm": 26.63904571533203, "learning_rate": 9.975850340136055e-06, "loss": 23.1811, "step": 28635 }, { "epoch": 681.8119402985075, "grad_norm": 34.16448974609375, "learning_rate": 9.975510204081633e-06, "loss": 24.0845, "step": 28636 }, { "epoch": 681.8358208955224, "grad_norm": 27.568811416625977, "learning_rate": 9.975170068027212e-06, "loss": 23.6952, "step": 28637 }, { "epoch": 681.8597014925373, "grad_norm": 32.234832763671875, "learning_rate": 9.97482993197279e-06, "loss": 23.5129, "step": 28638 }, { "epoch": 681.8835820895522, "grad_norm": 35.83145523071289, "learning_rate": 9.974489795918369e-06, "loss": 22.5409, "step": 28639 }, { "epoch": 681.9074626865672, "grad_norm": 24.999113082885742, "learning_rate": 9.974149659863947e-06, "loss": 22.8622, "step": 28640 }, { "epoch": 681.9313432835821, "grad_norm": 36.047019958496094, "learning_rate": 9.973809523809524e-06, "loss": 23.8977, "step": 28641 }, { "epoch": 681.955223880597, "grad_norm": 27.003183364868164, "learning_rate": 9.973469387755102e-06, "loss": 23.2654, "step": 28642 }, { "epoch": 681.9791044776119, "grad_norm": 35.153011322021484, "learning_rate": 9.97312925170068e-06, "loss": 22.9293, "step": 28643 }, { "epoch": 682.0, "grad_norm": 27.591840744018555, "learning_rate": 9.972789115646259e-06, "loss": 19.7723, "step": 28644 }, { "epoch": 682.0238805970149, "grad_norm": 32.015342712402344, "learning_rate": 9.972448979591838e-06, "loss": 22.7244, "step": 28645 }, { "epoch": 682.0477611940298, "grad_norm": 32.36658477783203, "learning_rate": 9.972108843537416e-06, "loss": 22.9997, "step": 28646 }, { "epoch": 682.0716417910447, "grad_norm": 33.985923767089844, "learning_rate": 9.971768707482994e-06, "loss": 22.6487, "step": 28647 }, { "epoch": 682.0955223880597, "grad_norm": 24.986404418945312, "learning_rate": 9.971428571428571e-06, "loss": 23.8197, "step": 28648 }, { "epoch": 682.1194029850747, "grad_norm": 33.179683685302734, "learning_rate": 9.97108843537415e-06, "loss": 22.8802, "step": 28649 }, { "epoch": 682.1432835820896, "grad_norm": 27.976146697998047, "learning_rate": 9.97074829931973e-06, "loss": 23.0087, "step": 28650 }, { "epoch": 682.1671641791045, "grad_norm": 31.114912033081055, "learning_rate": 9.970408163265306e-06, "loss": 23.701, "step": 28651 }, { "epoch": 682.1910447761194, "grad_norm": 28.03066062927246, "learning_rate": 9.970068027210885e-06, "loss": 22.5306, "step": 28652 }, { "epoch": 682.2149253731343, "grad_norm": 28.372594833374023, "learning_rate": 9.969727891156463e-06, "loss": 22.785, "step": 28653 }, { "epoch": 682.2388059701492, "grad_norm": 29.594907760620117, "learning_rate": 9.969387755102042e-06, "loss": 23.6997, "step": 28654 }, { "epoch": 682.2626865671642, "grad_norm": 36.37104797363281, "learning_rate": 9.96904761904762e-06, "loss": 23.6487, "step": 28655 }, { "epoch": 682.2865671641791, "grad_norm": 28.99105453491211, "learning_rate": 9.968707482993198e-06, "loss": 23.8201, "step": 28656 }, { "epoch": 682.310447761194, "grad_norm": 30.01519775390625, "learning_rate": 9.968367346938777e-06, "loss": 24.0585, "step": 28657 }, { "epoch": 682.334328358209, "grad_norm": 35.49665451049805, "learning_rate": 9.968027210884355e-06, "loss": 22.5232, "step": 28658 }, { "epoch": 682.3582089552239, "grad_norm": 28.810476303100586, "learning_rate": 9.967687074829932e-06, "loss": 24.4217, "step": 28659 }, { "epoch": 682.3820895522388, "grad_norm": 26.94877815246582, "learning_rate": 9.96734693877551e-06, "loss": 23.0297, "step": 28660 }, { "epoch": 682.4059701492537, "grad_norm": 30.61544418334961, "learning_rate": 9.96700680272109e-06, "loss": 23.0749, "step": 28661 }, { "epoch": 682.4298507462687, "grad_norm": 26.074003219604492, "learning_rate": 9.966666666666667e-06, "loss": 23.3517, "step": 28662 }, { "epoch": 682.4537313432836, "grad_norm": 28.907644271850586, "learning_rate": 9.966326530612246e-06, "loss": 23.0058, "step": 28663 }, { "epoch": 682.4776119402985, "grad_norm": 31.645612716674805, "learning_rate": 9.965986394557824e-06, "loss": 23.0384, "step": 28664 }, { "epoch": 682.5014925373134, "grad_norm": 29.023523330688477, "learning_rate": 9.965646258503402e-06, "loss": 24.2648, "step": 28665 }, { "epoch": 682.5253731343283, "grad_norm": 25.84768295288086, "learning_rate": 9.96530612244898e-06, "loss": 23.7811, "step": 28666 }, { "epoch": 682.5492537313432, "grad_norm": 42.02558517456055, "learning_rate": 9.96496598639456e-06, "loss": 24.0436, "step": 28667 }, { "epoch": 682.5731343283583, "grad_norm": 25.439035415649414, "learning_rate": 9.964625850340138e-06, "loss": 22.7655, "step": 28668 }, { "epoch": 682.5970149253732, "grad_norm": 31.331520080566406, "learning_rate": 9.964285714285714e-06, "loss": 22.6179, "step": 28669 }, { "epoch": 682.6208955223881, "grad_norm": 37.08583450317383, "learning_rate": 9.963945578231293e-06, "loss": 23.6825, "step": 28670 }, { "epoch": 682.644776119403, "grad_norm": 24.663379669189453, "learning_rate": 9.963605442176871e-06, "loss": 23.1765, "step": 28671 }, { "epoch": 682.6686567164179, "grad_norm": 41.285919189453125, "learning_rate": 9.96326530612245e-06, "loss": 24.3286, "step": 28672 }, { "epoch": 682.6925373134328, "grad_norm": 27.82160758972168, "learning_rate": 9.962925170068028e-06, "loss": 23.3625, "step": 28673 }, { "epoch": 682.7164179104477, "grad_norm": 34.35285568237305, "learning_rate": 9.962585034013607e-06, "loss": 23.5875, "step": 28674 }, { "epoch": 682.7402985074627, "grad_norm": 31.889612197875977, "learning_rate": 9.962244897959185e-06, "loss": 23.5446, "step": 28675 }, { "epoch": 682.7641791044776, "grad_norm": 29.035192489624023, "learning_rate": 9.961904761904763e-06, "loss": 24.1974, "step": 28676 }, { "epoch": 682.7880597014926, "grad_norm": 38.834327697753906, "learning_rate": 9.96156462585034e-06, "loss": 23.589, "step": 28677 }, { "epoch": 682.8119402985075, "grad_norm": 30.822376251220703, "learning_rate": 9.961224489795919e-06, "loss": 23.4541, "step": 28678 }, { "epoch": 682.8358208955224, "grad_norm": 26.9703369140625, "learning_rate": 9.960884353741499e-06, "loss": 23.744, "step": 28679 }, { "epoch": 682.8597014925373, "grad_norm": 31.2972354888916, "learning_rate": 9.960544217687075e-06, "loss": 22.2382, "step": 28680 }, { "epoch": 682.8835820895522, "grad_norm": 25.8646183013916, "learning_rate": 9.960204081632654e-06, "loss": 23.8466, "step": 28681 }, { "epoch": 682.9074626865672, "grad_norm": 25.43581199645996, "learning_rate": 9.959863945578232e-06, "loss": 23.8355, "step": 28682 }, { "epoch": 682.9313432835821, "grad_norm": 31.458663940429688, "learning_rate": 9.95952380952381e-06, "loss": 22.8999, "step": 28683 }, { "epoch": 682.955223880597, "grad_norm": 26.04758644104004, "learning_rate": 9.959183673469387e-06, "loss": 23.2045, "step": 28684 }, { "epoch": 682.9791044776119, "grad_norm": 26.496578216552734, "learning_rate": 9.958843537414967e-06, "loss": 23.323, "step": 28685 }, { "epoch": 683.0, "grad_norm": 21.900089263916016, "learning_rate": 9.958503401360546e-06, "loss": 20.1, "step": 28686 }, { "epoch": 683.0238805970149, "grad_norm": 28.29529571533203, "learning_rate": 9.958163265306123e-06, "loss": 23.1785, "step": 28687 }, { "epoch": 683.0477611940298, "grad_norm": 24.744876861572266, "learning_rate": 9.957823129251701e-06, "loss": 23.8924, "step": 28688 }, { "epoch": 683.0716417910447, "grad_norm": 25.35099220275879, "learning_rate": 9.95748299319728e-06, "loss": 22.8532, "step": 28689 }, { "epoch": 683.0955223880597, "grad_norm": 27.33506202697754, "learning_rate": 9.957142857142858e-06, "loss": 22.9698, "step": 28690 }, { "epoch": 683.1194029850747, "grad_norm": 25.572179794311523, "learning_rate": 9.956802721088436e-06, "loss": 23.6855, "step": 28691 }, { "epoch": 683.1432835820896, "grad_norm": 25.683059692382812, "learning_rate": 9.956462585034015e-06, "loss": 23.1586, "step": 28692 }, { "epoch": 683.1671641791045, "grad_norm": 23.8266658782959, "learning_rate": 9.956122448979593e-06, "loss": 22.4555, "step": 28693 }, { "epoch": 683.1910447761194, "grad_norm": 26.256507873535156, "learning_rate": 9.955782312925172e-06, "loss": 23.0755, "step": 28694 }, { "epoch": 683.2149253731343, "grad_norm": 27.831928253173828, "learning_rate": 9.955442176870748e-06, "loss": 22.5466, "step": 28695 }, { "epoch": 683.2388059701492, "grad_norm": 27.55316734313965, "learning_rate": 9.955102040816327e-06, "loss": 22.4636, "step": 28696 }, { "epoch": 683.2626865671642, "grad_norm": 28.31735610961914, "learning_rate": 9.954761904761905e-06, "loss": 23.3146, "step": 28697 }, { "epoch": 683.2865671641791, "grad_norm": 20.70884895324707, "learning_rate": 9.954421768707484e-06, "loss": 22.7737, "step": 28698 }, { "epoch": 683.310447761194, "grad_norm": 31.67475700378418, "learning_rate": 9.954081632653062e-06, "loss": 23.84, "step": 28699 }, { "epoch": 683.334328358209, "grad_norm": 27.653221130371094, "learning_rate": 9.95374149659864e-06, "loss": 22.6162, "step": 28700 }, { "epoch": 683.3582089552239, "grad_norm": 28.82754898071289, "learning_rate": 9.953401360544219e-06, "loss": 22.8774, "step": 28701 }, { "epoch": 683.3820895522388, "grad_norm": 26.857934951782227, "learning_rate": 9.953061224489797e-06, "loss": 23.7098, "step": 28702 }, { "epoch": 683.4059701492537, "grad_norm": 28.10578155517578, "learning_rate": 9.952721088435376e-06, "loss": 23.8887, "step": 28703 }, { "epoch": 683.4298507462687, "grad_norm": 25.764310836791992, "learning_rate": 9.952380952380954e-06, "loss": 23.7634, "step": 28704 }, { "epoch": 683.4537313432836, "grad_norm": 27.81568145751953, "learning_rate": 9.95204081632653e-06, "loss": 23.5668, "step": 28705 }, { "epoch": 683.4776119402985, "grad_norm": 26.667264938354492, "learning_rate": 9.95170068027211e-06, "loss": 22.4885, "step": 28706 }, { "epoch": 683.5014925373134, "grad_norm": 24.212448120117188, "learning_rate": 9.951360544217688e-06, "loss": 23.8496, "step": 28707 }, { "epoch": 683.5253731343283, "grad_norm": 26.162187576293945, "learning_rate": 9.951020408163266e-06, "loss": 23.2629, "step": 28708 }, { "epoch": 683.5492537313432, "grad_norm": 28.075977325439453, "learning_rate": 9.950680272108844e-06, "loss": 22.8792, "step": 28709 }, { "epoch": 683.5731343283583, "grad_norm": 27.461660385131836, "learning_rate": 9.950340136054423e-06, "loss": 22.2256, "step": 28710 }, { "epoch": 683.5970149253732, "grad_norm": 24.468643188476562, "learning_rate": 9.950000000000001e-06, "loss": 24.0171, "step": 28711 }, { "epoch": 683.6208955223881, "grad_norm": 22.618274688720703, "learning_rate": 9.949659863945578e-06, "loss": 23.1233, "step": 28712 }, { "epoch": 683.644776119403, "grad_norm": NaN, "learning_rate": 9.949319727891156e-06, "loss": 24.3183, "step": 28713 }, { "epoch": 683.6686567164179, "grad_norm": NaN, "learning_rate": 9.949319727891156e-06, "loss": 42.838, "step": 28714 }, { "epoch": 683.6925373134328, "grad_norm": 30.37313461303711, "learning_rate": 9.949319727891156e-06, "loss": 23.7622, "step": 28715 }, { "epoch": 683.7164179104477, "grad_norm": 25.207759857177734, "learning_rate": 9.948979591836737e-06, "loss": 23.4847, "step": 28716 }, { "epoch": 683.7402985074627, "grad_norm": 24.13909149169922, "learning_rate": 9.948639455782313e-06, "loss": 23.3051, "step": 28717 }, { "epoch": 683.7641791044776, "grad_norm": 24.4110164642334, "learning_rate": 9.948299319727892e-06, "loss": 23.4064, "step": 28718 }, { "epoch": 683.7880597014926, "grad_norm": 25.75653076171875, "learning_rate": 9.94795918367347e-06, "loss": 23.6169, "step": 28719 }, { "epoch": 683.8119402985075, "grad_norm": 24.966590881347656, "learning_rate": 9.947619047619049e-06, "loss": 22.6557, "step": 28720 }, { "epoch": 683.8358208955224, "grad_norm": 27.204526901245117, "learning_rate": 9.947278911564627e-06, "loss": 23.8824, "step": 28721 }, { "epoch": 683.8597014925373, "grad_norm": 24.869504928588867, "learning_rate": 9.946938775510205e-06, "loss": 24.2817, "step": 28722 }, { "epoch": 683.8835820895522, "grad_norm": 27.119169235229492, "learning_rate": 9.946598639455784e-06, "loss": 23.7883, "step": 28723 }, { "epoch": 683.9074626865672, "grad_norm": 30.158279418945312, "learning_rate": 9.946258503401362e-06, "loss": 23.0226, "step": 28724 }, { "epoch": 683.9313432835821, "grad_norm": 30.30036735534668, "learning_rate": 9.945918367346939e-06, "loss": 23.297, "step": 28725 }, { "epoch": 683.955223880597, "grad_norm": 24.33742904663086, "learning_rate": 9.945578231292517e-06, "loss": 23.4821, "step": 28726 }, { "epoch": 683.9791044776119, "grad_norm": 25.11284637451172, "learning_rate": 9.945238095238096e-06, "loss": 23.2559, "step": 28727 }, { "epoch": 684.0, "grad_norm": 20.665430068969727, "learning_rate": 9.944897959183674e-06, "loss": 19.8381, "step": 28728 }, { "epoch": 684.0238805970149, "grad_norm": 30.4020938873291, "learning_rate": 9.944557823129253e-06, "loss": 24.518, "step": 28729 }, { "epoch": 684.0477611940298, "grad_norm": 24.68416404724121, "learning_rate": 9.944217687074831e-06, "loss": 23.4207, "step": 28730 }, { "epoch": 684.0716417910447, "grad_norm": 25.771860122680664, "learning_rate": 9.94387755102041e-06, "loss": 23.978, "step": 28731 }, { "epoch": 684.0955223880597, "grad_norm": 25.71906852722168, "learning_rate": 9.943537414965986e-06, "loss": 23.1232, "step": 28732 }, { "epoch": 684.1194029850747, "grad_norm": 24.168882369995117, "learning_rate": 9.943197278911565e-06, "loss": 22.8144, "step": 28733 }, { "epoch": 684.1432835820896, "grad_norm": 26.539854049682617, "learning_rate": 9.942857142857145e-06, "loss": 24.3642, "step": 28734 }, { "epoch": 684.1671641791045, "grad_norm": 25.80190658569336, "learning_rate": 9.942517006802721e-06, "loss": 21.9825, "step": 28735 }, { "epoch": 684.1910447761194, "grad_norm": 28.940114974975586, "learning_rate": 9.9421768707483e-06, "loss": 22.9736, "step": 28736 }, { "epoch": 684.2149253731343, "grad_norm": 30.07118797302246, "learning_rate": 9.941836734693878e-06, "loss": 23.4172, "step": 28737 }, { "epoch": 684.2388059701492, "grad_norm": 25.273120880126953, "learning_rate": 9.941496598639457e-06, "loss": 23.4308, "step": 28738 }, { "epoch": 684.2626865671642, "grad_norm": 26.093006134033203, "learning_rate": 9.941156462585035e-06, "loss": 22.9549, "step": 28739 }, { "epoch": 684.2865671641791, "grad_norm": 22.327163696289062, "learning_rate": 9.940816326530614e-06, "loss": 22.6506, "step": 28740 }, { "epoch": 684.310447761194, "grad_norm": 24.768526077270508, "learning_rate": 9.940476190476192e-06, "loss": 22.6888, "step": 28741 }, { "epoch": 684.334328358209, "grad_norm": 24.266672134399414, "learning_rate": 9.94013605442177e-06, "loss": 23.4242, "step": 28742 }, { "epoch": 684.3582089552239, "grad_norm": 23.01766014099121, "learning_rate": 9.939795918367347e-06, "loss": 22.6364, "step": 28743 }, { "epoch": 684.3820895522388, "grad_norm": 29.425064086914062, "learning_rate": 9.939455782312926e-06, "loss": 22.879, "step": 28744 }, { "epoch": 684.4059701492537, "grad_norm": 30.92513656616211, "learning_rate": 9.939115646258504e-06, "loss": 23.7366, "step": 28745 }, { "epoch": 684.4298507462687, "grad_norm": 28.251447677612305, "learning_rate": 9.938775510204082e-06, "loss": 24.0677, "step": 28746 }, { "epoch": 684.4537313432836, "grad_norm": 24.068260192871094, "learning_rate": 9.93843537414966e-06, "loss": 23.192, "step": 28747 }, { "epoch": 684.4776119402985, "grad_norm": 31.064836502075195, "learning_rate": 9.93809523809524e-06, "loss": 24.0375, "step": 28748 }, { "epoch": 684.5014925373134, "grad_norm": 29.491209030151367, "learning_rate": 9.937755102040818e-06, "loss": 23.908, "step": 28749 }, { "epoch": 684.5253731343283, "grad_norm": 27.14201545715332, "learning_rate": 9.937414965986394e-06, "loss": 23.1585, "step": 28750 }, { "epoch": 684.5492537313432, "grad_norm": 26.262081146240234, "learning_rate": 9.937074829931974e-06, "loss": 22.8844, "step": 28751 }, { "epoch": 684.5731343283583, "grad_norm": 29.731300354003906, "learning_rate": 9.936734693877553e-06, "loss": 22.3932, "step": 28752 }, { "epoch": 684.5970149253732, "grad_norm": 28.33931541442871, "learning_rate": 9.93639455782313e-06, "loss": 23.5816, "step": 28753 }, { "epoch": 684.6208955223881, "grad_norm": 26.286136627197266, "learning_rate": 9.936054421768708e-06, "loss": 23.3368, "step": 28754 }, { "epoch": 684.644776119403, "grad_norm": 21.797298431396484, "learning_rate": 9.935714285714286e-06, "loss": 23.8838, "step": 28755 }, { "epoch": 684.6686567164179, "grad_norm": 27.336891174316406, "learning_rate": 9.935374149659865e-06, "loss": 23.1492, "step": 28756 }, { "epoch": 684.6925373134328, "grad_norm": 28.629894256591797, "learning_rate": 9.935034013605443e-06, "loss": 22.2315, "step": 28757 }, { "epoch": 684.7164179104477, "grad_norm": 29.65131378173828, "learning_rate": 9.934693877551022e-06, "loss": 23.1842, "step": 28758 }, { "epoch": 684.7402985074627, "grad_norm": 24.937726974487305, "learning_rate": 9.9343537414966e-06, "loss": 23.393, "step": 28759 }, { "epoch": 684.7641791044776, "grad_norm": 24.396793365478516, "learning_rate": 9.934013605442177e-06, "loss": 23.5922, "step": 28760 }, { "epoch": 684.7880597014926, "grad_norm": 26.87015724182129, "learning_rate": 9.933673469387755e-06, "loss": 23.8652, "step": 28761 }, { "epoch": 684.8119402985075, "grad_norm": 24.105772018432617, "learning_rate": 9.933333333333334e-06, "loss": 23.4697, "step": 28762 }, { "epoch": 684.8358208955224, "grad_norm": 31.23802947998047, "learning_rate": 9.932993197278912e-06, "loss": 23.4667, "step": 28763 }, { "epoch": 684.8597014925373, "grad_norm": 24.488920211791992, "learning_rate": 9.93265306122449e-06, "loss": 22.6508, "step": 28764 }, { "epoch": 684.8835820895522, "grad_norm": 28.940305709838867, "learning_rate": 9.932312925170069e-06, "loss": 23.4062, "step": 28765 }, { "epoch": 684.9074626865672, "grad_norm": 26.663545608520508, "learning_rate": 9.931972789115647e-06, "loss": 22.2976, "step": 28766 }, { "epoch": 684.9313432835821, "grad_norm": 24.401721954345703, "learning_rate": 9.931632653061226e-06, "loss": 22.4473, "step": 28767 }, { "epoch": 684.955223880597, "grad_norm": 27.40601921081543, "learning_rate": 9.931292517006802e-06, "loss": 23.9298, "step": 28768 }, { "epoch": 684.9791044776119, "grad_norm": 26.645111083984375, "learning_rate": 9.930952380952383e-06, "loss": 23.7914, "step": 28769 }, { "epoch": 685.0, "grad_norm": 21.49996566772461, "learning_rate": 9.930612244897961e-06, "loss": 19.4809, "step": 28770 }, { "epoch": 685.0238805970149, "grad_norm": 29.562030792236328, "learning_rate": 9.930272108843538e-06, "loss": 23.5923, "step": 28771 }, { "epoch": 685.0477611940298, "grad_norm": 29.500667572021484, "learning_rate": 9.929931972789116e-06, "loss": 23.3171, "step": 28772 }, { "epoch": 685.0716417910447, "grad_norm": 30.290380477905273, "learning_rate": 9.929591836734695e-06, "loss": 23.4356, "step": 28773 }, { "epoch": 685.0955223880597, "grad_norm": 24.640661239624023, "learning_rate": 9.929251700680273e-06, "loss": 22.5731, "step": 28774 }, { "epoch": 685.1194029850747, "grad_norm": 28.916093826293945, "learning_rate": 9.928911564625851e-06, "loss": 23.5558, "step": 28775 }, { "epoch": 685.1432835820896, "grad_norm": 28.796525955200195, "learning_rate": 9.92857142857143e-06, "loss": 23.2629, "step": 28776 }, { "epoch": 685.1671641791045, "grad_norm": 31.637691497802734, "learning_rate": 9.928231292517008e-06, "loss": 23.1197, "step": 28777 }, { "epoch": 685.1910447761194, "grad_norm": 30.811044692993164, "learning_rate": 9.927891156462585e-06, "loss": 23.5851, "step": 28778 }, { "epoch": 685.2149253731343, "grad_norm": 25.80913734436035, "learning_rate": 9.927551020408163e-06, "loss": 22.5394, "step": 28779 }, { "epoch": 685.2388059701492, "grad_norm": 26.387985229492188, "learning_rate": 9.927210884353742e-06, "loss": 22.4761, "step": 28780 }, { "epoch": 685.2626865671642, "grad_norm": 28.122982025146484, "learning_rate": 9.92687074829932e-06, "loss": 24.253, "step": 28781 }, { "epoch": 685.2865671641791, "grad_norm": 28.844573974609375, "learning_rate": 9.926530612244899e-06, "loss": 23.5378, "step": 28782 }, { "epoch": 685.310447761194, "grad_norm": 24.574443817138672, "learning_rate": 9.926190476190477e-06, "loss": 23.6045, "step": 28783 }, { "epoch": 685.334328358209, "grad_norm": 25.73996353149414, "learning_rate": 9.925850340136055e-06, "loss": 23.7075, "step": 28784 }, { "epoch": 685.3582089552239, "grad_norm": 23.816362380981445, "learning_rate": 9.925510204081634e-06, "loss": 23.8688, "step": 28785 }, { "epoch": 685.3820895522388, "grad_norm": 25.368633270263672, "learning_rate": 9.92517006802721e-06, "loss": 23.1095, "step": 28786 }, { "epoch": 685.4059701492537, "grad_norm": 27.22933578491211, "learning_rate": 9.92482993197279e-06, "loss": 23.3755, "step": 28787 }, { "epoch": 685.4298507462687, "grad_norm": 31.462961196899414, "learning_rate": 9.92448979591837e-06, "loss": 23.3018, "step": 28788 }, { "epoch": 685.4537313432836, "grad_norm": 24.459257125854492, "learning_rate": 9.924149659863946e-06, "loss": 22.943, "step": 28789 }, { "epoch": 685.4776119402985, "grad_norm": 27.440326690673828, "learning_rate": 9.923809523809524e-06, "loss": 23.176, "step": 28790 }, { "epoch": 685.5014925373134, "grad_norm": 30.36067771911621, "learning_rate": 9.923469387755103e-06, "loss": 23.8437, "step": 28791 }, { "epoch": 685.5253731343283, "grad_norm": 24.2813777923584, "learning_rate": 9.923129251700681e-06, "loss": 22.8816, "step": 28792 }, { "epoch": 685.5492537313432, "grad_norm": 24.744647979736328, "learning_rate": 9.92278911564626e-06, "loss": 23.0723, "step": 28793 }, { "epoch": 685.5731343283583, "grad_norm": 27.15027618408203, "learning_rate": 9.922448979591838e-06, "loss": 23.025, "step": 28794 }, { "epoch": 685.5970149253732, "grad_norm": 29.21021270751953, "learning_rate": 9.922108843537416e-06, "loss": 23.1885, "step": 28795 }, { "epoch": 685.6208955223881, "grad_norm": 26.9857177734375, "learning_rate": 9.921768707482993e-06, "loss": 22.6929, "step": 28796 }, { "epoch": 685.644776119403, "grad_norm": 25.64822769165039, "learning_rate": 9.921428571428572e-06, "loss": 22.9512, "step": 28797 }, { "epoch": 685.6686567164179, "grad_norm": 31.038721084594727, "learning_rate": 9.921088435374152e-06, "loss": 22.7925, "step": 28798 }, { "epoch": 685.6925373134328, "grad_norm": 28.00916862487793, "learning_rate": 9.920748299319728e-06, "loss": 22.9397, "step": 28799 }, { "epoch": 685.7164179104477, "grad_norm": 28.846481323242188, "learning_rate": 9.920408163265307e-06, "loss": 23.7409, "step": 28800 }, { "epoch": 685.7402985074627, "grad_norm": 23.885231018066406, "learning_rate": 9.920068027210885e-06, "loss": 23.1479, "step": 28801 }, { "epoch": 685.7641791044776, "grad_norm": 23.801767349243164, "learning_rate": 9.919727891156464e-06, "loss": 22.6635, "step": 28802 }, { "epoch": 685.7880597014926, "grad_norm": 32.74446487426758, "learning_rate": 9.919387755102042e-06, "loss": 23.5127, "step": 28803 }, { "epoch": 685.8119402985075, "grad_norm": 25.371471405029297, "learning_rate": 9.91904761904762e-06, "loss": 22.7315, "step": 28804 }, { "epoch": 685.8358208955224, "grad_norm": 23.759296417236328, "learning_rate": 9.918707482993199e-06, "loss": 22.8868, "step": 28805 }, { "epoch": 685.8597014925373, "grad_norm": 23.264963150024414, "learning_rate": 9.918367346938776e-06, "loss": 22.7009, "step": 28806 }, { "epoch": 685.8835820895522, "grad_norm": 23.589994430541992, "learning_rate": 9.918027210884354e-06, "loss": 22.3895, "step": 28807 }, { "epoch": 685.9074626865672, "grad_norm": 24.74083709716797, "learning_rate": 9.917687074829932e-06, "loss": 23.7852, "step": 28808 }, { "epoch": 685.9313432835821, "grad_norm": 23.349824905395508, "learning_rate": 9.917346938775511e-06, "loss": 23.6401, "step": 28809 }, { "epoch": 685.955223880597, "grad_norm": 25.572917938232422, "learning_rate": 9.91700680272109e-06, "loss": 23.2104, "step": 28810 }, { "epoch": 685.9791044776119, "grad_norm": 30.841026306152344, "learning_rate": 9.916666666666668e-06, "loss": 23.5563, "step": 28811 }, { "epoch": 686.0, "grad_norm": 27.876468658447266, "learning_rate": 9.916326530612246e-06, "loss": 20.2959, "step": 28812 }, { "epoch": 686.0238805970149, "grad_norm": 22.77229118347168, "learning_rate": 9.915986394557825e-06, "loss": 22.246, "step": 28813 }, { "epoch": 686.0477611940298, "grad_norm": 26.25530242919922, "learning_rate": 9.915646258503401e-06, "loss": 23.4649, "step": 28814 }, { "epoch": 686.0716417910447, "grad_norm": 36.250099182128906, "learning_rate": 9.91530612244898e-06, "loss": 23.2591, "step": 28815 }, { "epoch": 686.0955223880597, "grad_norm": 23.405630111694336, "learning_rate": 9.91496598639456e-06, "loss": 22.0091, "step": 28816 }, { "epoch": 686.1194029850747, "grad_norm": 26.634613037109375, "learning_rate": 9.914625850340137e-06, "loss": 23.5181, "step": 28817 }, { "epoch": 686.1432835820896, "grad_norm": 29.797441482543945, "learning_rate": 9.914285714285715e-06, "loss": 23.207, "step": 28818 }, { "epoch": 686.1671641791045, "grad_norm": 26.05437469482422, "learning_rate": 9.913945578231293e-06, "loss": 23.8268, "step": 28819 }, { "epoch": 686.1910447761194, "grad_norm": 24.92290687561035, "learning_rate": 9.913605442176872e-06, "loss": 23.627, "step": 28820 }, { "epoch": 686.2149253731343, "grad_norm": 30.880828857421875, "learning_rate": 9.913265306122449e-06, "loss": 22.9683, "step": 28821 }, { "epoch": 686.2388059701492, "grad_norm": 32.45827865600586, "learning_rate": 9.912925170068029e-06, "loss": 22.918, "step": 28822 }, { "epoch": 686.2626865671642, "grad_norm": 23.680747985839844, "learning_rate": 9.912585034013607e-06, "loss": 22.2248, "step": 28823 }, { "epoch": 686.2865671641791, "grad_norm": 26.06313705444336, "learning_rate": 9.912244897959184e-06, "loss": 22.8087, "step": 28824 }, { "epoch": 686.310447761194, "grad_norm": 35.05406188964844, "learning_rate": 9.911904761904762e-06, "loss": 23.2322, "step": 28825 }, { "epoch": 686.334328358209, "grad_norm": 28.284828186035156, "learning_rate": 9.91156462585034e-06, "loss": 22.9868, "step": 28826 }, { "epoch": 686.3582089552239, "grad_norm": 28.030807495117188, "learning_rate": 9.911224489795919e-06, "loss": 23.7434, "step": 28827 }, { "epoch": 686.3820895522388, "grad_norm": 27.72010612487793, "learning_rate": 9.910884353741497e-06, "loss": 22.7408, "step": 28828 }, { "epoch": 686.4059701492537, "grad_norm": 28.86128807067871, "learning_rate": 9.910544217687076e-06, "loss": 23.4424, "step": 28829 }, { "epoch": 686.4298507462687, "grad_norm": 30.099760055541992, "learning_rate": 9.910204081632654e-06, "loss": 23.581, "step": 28830 }, { "epoch": 686.4537313432836, "grad_norm": 24.7676944732666, "learning_rate": 9.909863945578233e-06, "loss": 23.9937, "step": 28831 }, { "epoch": 686.4776119402985, "grad_norm": 25.585084915161133, "learning_rate": 9.90952380952381e-06, "loss": 22.9356, "step": 28832 }, { "epoch": 686.5014925373134, "grad_norm": 28.512418746948242, "learning_rate": 9.909183673469388e-06, "loss": 22.9384, "step": 28833 }, { "epoch": 686.5253731343283, "grad_norm": 27.87833023071289, "learning_rate": 9.908843537414968e-06, "loss": 22.9082, "step": 28834 }, { "epoch": 686.5492537313432, "grad_norm": 28.42380142211914, "learning_rate": 9.908503401360545e-06, "loss": 23.2848, "step": 28835 }, { "epoch": 686.5731343283583, "grad_norm": 26.1298828125, "learning_rate": 9.908163265306123e-06, "loss": 22.3599, "step": 28836 }, { "epoch": 686.5970149253732, "grad_norm": 25.36875343322754, "learning_rate": 9.907823129251702e-06, "loss": 22.2438, "step": 28837 }, { "epoch": 686.6208955223881, "grad_norm": 27.140165328979492, "learning_rate": 9.90748299319728e-06, "loss": 23.774, "step": 28838 }, { "epoch": 686.644776119403, "grad_norm": 29.018739700317383, "learning_rate": 9.907142857142858e-06, "loss": 22.866, "step": 28839 }, { "epoch": 686.6686567164179, "grad_norm": 29.604476928710938, "learning_rate": 9.906802721088437e-06, "loss": 24.3945, "step": 28840 }, { "epoch": 686.6925373134328, "grad_norm": 25.06964874267578, "learning_rate": 9.906462585034015e-06, "loss": 23.9424, "step": 28841 }, { "epoch": 686.7164179104477, "grad_norm": 26.736122131347656, "learning_rate": 9.906122448979592e-06, "loss": 23.1434, "step": 28842 }, { "epoch": 686.7402985074627, "grad_norm": 29.59410285949707, "learning_rate": 9.90578231292517e-06, "loss": 22.6056, "step": 28843 }, { "epoch": 686.7641791044776, "grad_norm": 24.572179794311523, "learning_rate": 9.905442176870749e-06, "loss": 23.943, "step": 28844 }, { "epoch": 686.7880597014926, "grad_norm": 26.8084659576416, "learning_rate": 9.905102040816327e-06, "loss": 23.1726, "step": 28845 }, { "epoch": 686.8119402985075, "grad_norm": 27.232385635375977, "learning_rate": 9.904761904761906e-06, "loss": 22.1196, "step": 28846 }, { "epoch": 686.8358208955224, "grad_norm": 27.035249710083008, "learning_rate": 9.904421768707484e-06, "loss": 24.261, "step": 28847 }, { "epoch": 686.8597014925373, "grad_norm": 26.005847930908203, "learning_rate": 9.904081632653062e-06, "loss": 22.7791, "step": 28848 }, { "epoch": 686.8835820895522, "grad_norm": 26.06692123413086, "learning_rate": 9.903741496598641e-06, "loss": 23.2271, "step": 28849 }, { "epoch": 686.9074626865672, "grad_norm": 26.624095916748047, "learning_rate": 9.903401360544218e-06, "loss": 23.7091, "step": 28850 }, { "epoch": 686.9313432835821, "grad_norm": 25.480167388916016, "learning_rate": 9.903061224489798e-06, "loss": 23.48, "step": 28851 }, { "epoch": 686.955223880597, "grad_norm": 26.415802001953125, "learning_rate": 9.902721088435376e-06, "loss": 23.1488, "step": 28852 }, { "epoch": 686.9791044776119, "grad_norm": 24.356475830078125, "learning_rate": 9.902380952380953e-06, "loss": 23.3488, "step": 28853 }, { "epoch": 687.0, "grad_norm": 22.838760375976562, "learning_rate": 9.902040816326531e-06, "loss": 20.189, "step": 28854 }, { "epoch": 687.0238805970149, "grad_norm": 24.810197830200195, "learning_rate": 9.90170068027211e-06, "loss": 22.8619, "step": 28855 }, { "epoch": 687.0477611940298, "grad_norm": 27.97254180908203, "learning_rate": 9.901360544217688e-06, "loss": 23.3379, "step": 28856 }, { "epoch": 687.0716417910447, "grad_norm": 25.604049682617188, "learning_rate": 9.901020408163267e-06, "loss": 22.8965, "step": 28857 }, { "epoch": 687.0955223880597, "grad_norm": 27.18520736694336, "learning_rate": 9.900680272108845e-06, "loss": 22.7325, "step": 28858 }, { "epoch": 687.1194029850747, "grad_norm": 26.473562240600586, "learning_rate": 9.900340136054423e-06, "loss": 23.4545, "step": 28859 }, { "epoch": 687.1432835820896, "grad_norm": 26.695951461791992, "learning_rate": 9.9e-06, "loss": 23.5587, "step": 28860 }, { "epoch": 687.1671641791045, "grad_norm": 25.537256240844727, "learning_rate": 9.899659863945579e-06, "loss": 23.0695, "step": 28861 }, { "epoch": 687.1910447761194, "grad_norm": 23.302127838134766, "learning_rate": 9.899319727891157e-06, "loss": 23.3435, "step": 28862 }, { "epoch": 687.2149253731343, "grad_norm": 27.60203742980957, "learning_rate": 9.898979591836735e-06, "loss": 23.3969, "step": 28863 }, { "epoch": 687.2388059701492, "grad_norm": 25.023101806640625, "learning_rate": 9.898639455782314e-06, "loss": 22.6599, "step": 28864 }, { "epoch": 687.2626865671642, "grad_norm": 30.366722106933594, "learning_rate": 9.898299319727892e-06, "loss": 22.9154, "step": 28865 }, { "epoch": 687.2865671641791, "grad_norm": 25.2574520111084, "learning_rate": 9.89795918367347e-06, "loss": 23.6589, "step": 28866 }, { "epoch": 687.310447761194, "grad_norm": 27.092363357543945, "learning_rate": 9.897619047619047e-06, "loss": 23.694, "step": 28867 }, { "epoch": 687.334328358209, "grad_norm": 27.951282501220703, "learning_rate": 9.897278911564626e-06, "loss": 23.0887, "step": 28868 }, { "epoch": 687.3582089552239, "grad_norm": 24.252199172973633, "learning_rate": 9.896938775510206e-06, "loss": 23.4461, "step": 28869 }, { "epoch": 687.3820895522388, "grad_norm": 26.392581939697266, "learning_rate": 9.896598639455783e-06, "loss": 23.5011, "step": 28870 }, { "epoch": 687.4059701492537, "grad_norm": 31.211240768432617, "learning_rate": 9.896258503401361e-06, "loss": 22.4158, "step": 28871 }, { "epoch": 687.4298507462687, "grad_norm": 31.083969116210938, "learning_rate": 9.89591836734694e-06, "loss": 22.504, "step": 28872 }, { "epoch": 687.4537313432836, "grad_norm": 26.423606872558594, "learning_rate": 9.895578231292518e-06, "loss": 23.9465, "step": 28873 }, { "epoch": 687.4776119402985, "grad_norm": 21.112506866455078, "learning_rate": 9.895238095238096e-06, "loss": 22.6494, "step": 28874 }, { "epoch": 687.5014925373134, "grad_norm": 23.864421844482422, "learning_rate": 9.894897959183675e-06, "loss": 22.3325, "step": 28875 }, { "epoch": 687.5253731343283, "grad_norm": 31.012041091918945, "learning_rate": 9.894557823129253e-06, "loss": 24.0339, "step": 28876 }, { "epoch": 687.5492537313432, "grad_norm": 37.63333511352539, "learning_rate": 9.894217687074832e-06, "loss": 22.9623, "step": 28877 }, { "epoch": 687.5731343283583, "grad_norm": 23.094240188598633, "learning_rate": 9.893877551020408e-06, "loss": 22.9662, "step": 28878 }, { "epoch": 687.5970149253732, "grad_norm": 32.98414993286133, "learning_rate": 9.893537414965987e-06, "loss": 23.109, "step": 28879 }, { "epoch": 687.6208955223881, "grad_norm": 35.3714714050293, "learning_rate": 9.893197278911565e-06, "loss": 22.7755, "step": 28880 }, { "epoch": 687.644776119403, "grad_norm": 26.490732192993164, "learning_rate": 9.892857142857143e-06, "loss": 24.0304, "step": 28881 }, { "epoch": 687.6686567164179, "grad_norm": 26.922780990600586, "learning_rate": 9.892517006802722e-06, "loss": 22.5034, "step": 28882 }, { "epoch": 687.6925373134328, "grad_norm": 41.384429931640625, "learning_rate": 9.8921768707483e-06, "loss": 23.3795, "step": 28883 }, { "epoch": 687.7164179104477, "grad_norm": 26.710248947143555, "learning_rate": 9.891836734693879e-06, "loss": 23.9561, "step": 28884 }, { "epoch": 687.7402985074627, "grad_norm": 42.41851806640625, "learning_rate": 9.891496598639455e-06, "loss": 23.9405, "step": 28885 }, { "epoch": 687.7641791044776, "grad_norm": 30.764408111572266, "learning_rate": 9.891156462585036e-06, "loss": 23.4339, "step": 28886 }, { "epoch": 687.7880597014926, "grad_norm": 26.78148078918457, "learning_rate": 9.890816326530614e-06, "loss": 22.7834, "step": 28887 }, { "epoch": 687.8119402985075, "grad_norm": 34.45462417602539, "learning_rate": 9.89047619047619e-06, "loss": 23.7533, "step": 28888 }, { "epoch": 687.8358208955224, "grad_norm": 27.12651824951172, "learning_rate": 9.890136054421769e-06, "loss": 23.3809, "step": 28889 }, { "epoch": 687.8597014925373, "grad_norm": 31.421613693237305, "learning_rate": 9.889795918367348e-06, "loss": 22.516, "step": 28890 }, { "epoch": 687.8835820895522, "grad_norm": 30.87417221069336, "learning_rate": 9.889455782312926e-06, "loss": 22.631, "step": 28891 }, { "epoch": 687.9074626865672, "grad_norm": 36.37142562866211, "learning_rate": 9.889115646258504e-06, "loss": 22.5421, "step": 28892 }, { "epoch": 687.9313432835821, "grad_norm": 25.912477493286133, "learning_rate": 9.888775510204083e-06, "loss": 24.0706, "step": 28893 }, { "epoch": 687.955223880597, "grad_norm": 34.77316665649414, "learning_rate": 9.888435374149661e-06, "loss": 22.1416, "step": 28894 }, { "epoch": 687.9791044776119, "grad_norm": 32.956302642822266, "learning_rate": 9.88809523809524e-06, "loss": 22.9681, "step": 28895 }, { "epoch": 688.0, "grad_norm": 22.164953231811523, "learning_rate": 9.887755102040816e-06, "loss": 19.9123, "step": 28896 }, { "epoch": 688.0238805970149, "grad_norm": 32.61861801147461, "learning_rate": 9.887414965986395e-06, "loss": 22.9394, "step": 28897 }, { "epoch": 688.0477611940298, "grad_norm": 26.46332359313965, "learning_rate": 9.887074829931975e-06, "loss": 22.5777, "step": 28898 }, { "epoch": 688.0716417910447, "grad_norm": 31.066171646118164, "learning_rate": 9.886734693877552e-06, "loss": 22.4979, "step": 28899 }, { "epoch": 688.0955223880597, "grad_norm": 26.725845336914062, "learning_rate": 9.88639455782313e-06, "loss": 21.509, "step": 28900 }, { "epoch": 688.1194029850747, "grad_norm": 30.234336853027344, "learning_rate": 9.886054421768708e-06, "loss": 23.0859, "step": 28901 }, { "epoch": 688.1432835820896, "grad_norm": 29.275188446044922, "learning_rate": 9.885714285714287e-06, "loss": 21.5962, "step": 28902 }, { "epoch": 688.1671641791045, "grad_norm": 23.843042373657227, "learning_rate": 9.885374149659864e-06, "loss": 22.582, "step": 28903 }, { "epoch": 688.1910447761194, "grad_norm": 32.28490447998047, "learning_rate": 9.885034013605444e-06, "loss": 22.7079, "step": 28904 }, { "epoch": 688.2149253731343, "grad_norm": 27.68662452697754, "learning_rate": 9.884693877551022e-06, "loss": 23.4748, "step": 28905 }, { "epoch": 688.2388059701492, "grad_norm": 27.82786750793457, "learning_rate": 9.884353741496599e-06, "loss": 22.9315, "step": 28906 }, { "epoch": 688.2626865671642, "grad_norm": 34.91178894042969, "learning_rate": 9.884013605442177e-06, "loss": 22.7014, "step": 28907 }, { "epoch": 688.2865671641791, "grad_norm": 30.041515350341797, "learning_rate": 9.883673469387756e-06, "loss": 22.9856, "step": 28908 }, { "epoch": 688.310447761194, "grad_norm": 27.348047256469727, "learning_rate": 9.883333333333334e-06, "loss": 23.6135, "step": 28909 }, { "epoch": 688.334328358209, "grad_norm": 24.873472213745117, "learning_rate": 9.882993197278913e-06, "loss": 23.5402, "step": 28910 }, { "epoch": 688.3582089552239, "grad_norm": 23.912010192871094, "learning_rate": 9.882653061224491e-06, "loss": 22.542, "step": 28911 }, { "epoch": 688.3820895522388, "grad_norm": 27.79771614074707, "learning_rate": 9.88231292517007e-06, "loss": 23.6575, "step": 28912 }, { "epoch": 688.4059701492537, "grad_norm": 27.898860931396484, "learning_rate": 9.881972789115646e-06, "loss": 22.5972, "step": 28913 }, { "epoch": 688.4298507462687, "grad_norm": 27.52629280090332, "learning_rate": 9.881632653061225e-06, "loss": 23.1782, "step": 28914 }, { "epoch": 688.4537313432836, "grad_norm": 31.232309341430664, "learning_rate": 9.881292517006803e-06, "loss": 23.9984, "step": 28915 }, { "epoch": 688.4776119402985, "grad_norm": 25.357776641845703, "learning_rate": 9.880952380952381e-06, "loss": 24.6526, "step": 28916 }, { "epoch": 688.5014925373134, "grad_norm": 29.37425422668457, "learning_rate": 9.88061224489796e-06, "loss": 22.6982, "step": 28917 }, { "epoch": 688.5253731343283, "grad_norm": 28.398591995239258, "learning_rate": 9.880272108843538e-06, "loss": 23.4872, "step": 28918 }, { "epoch": 688.5492537313432, "grad_norm": 31.7432804107666, "learning_rate": 9.879931972789117e-06, "loss": 23.6569, "step": 28919 }, { "epoch": 688.5731343283583, "grad_norm": 26.8442325592041, "learning_rate": 9.879591836734695e-06, "loss": 22.7383, "step": 28920 }, { "epoch": 688.5970149253732, "grad_norm": 27.072032928466797, "learning_rate": 9.879251700680272e-06, "loss": 23.2194, "step": 28921 }, { "epoch": 688.6208955223881, "grad_norm": 29.089855194091797, "learning_rate": 9.878911564625852e-06, "loss": 23.5252, "step": 28922 }, { "epoch": 688.644776119403, "grad_norm": 31.277355194091797, "learning_rate": 9.87857142857143e-06, "loss": 23.7091, "step": 28923 }, { "epoch": 688.6686567164179, "grad_norm": 26.532943725585938, "learning_rate": 9.878231292517007e-06, "loss": 23.0428, "step": 28924 }, { "epoch": 688.6925373134328, "grad_norm": 24.338502883911133, "learning_rate": 9.877891156462585e-06, "loss": 22.288, "step": 28925 }, { "epoch": 688.7164179104477, "grad_norm": 35.106502532958984, "learning_rate": 9.877551020408164e-06, "loss": 22.7022, "step": 28926 }, { "epoch": 688.7402985074627, "grad_norm": 35.76342010498047, "learning_rate": 9.877210884353742e-06, "loss": 23.3921, "step": 28927 }, { "epoch": 688.7641791044776, "grad_norm": 24.922805786132812, "learning_rate": 9.87687074829932e-06, "loss": 22.8772, "step": 28928 }, { "epoch": 688.7880597014926, "grad_norm": 28.502717971801758, "learning_rate": 9.876530612244899e-06, "loss": 22.9715, "step": 28929 }, { "epoch": 688.8119402985075, "grad_norm": 37.47354507446289, "learning_rate": 9.876190476190478e-06, "loss": 24.3749, "step": 28930 }, { "epoch": 688.8358208955224, "grad_norm": 23.58818817138672, "learning_rate": 9.875850340136054e-06, "loss": 24.1475, "step": 28931 }, { "epoch": 688.8597014925373, "grad_norm": 30.67217254638672, "learning_rate": 9.875510204081633e-06, "loss": 23.3603, "step": 28932 }, { "epoch": 688.8835820895522, "grad_norm": 30.635009765625, "learning_rate": 9.875170068027213e-06, "loss": 23.2627, "step": 28933 }, { "epoch": 688.9074626865672, "grad_norm": 29.260255813598633, "learning_rate": 9.87482993197279e-06, "loss": 22.9557, "step": 28934 }, { "epoch": 688.9313432835821, "grad_norm": 25.72157096862793, "learning_rate": 9.874489795918368e-06, "loss": 23.5305, "step": 28935 }, { "epoch": 688.955223880597, "grad_norm": 34.340362548828125, "learning_rate": 9.874149659863946e-06, "loss": 23.5494, "step": 28936 }, { "epoch": 688.9791044776119, "grad_norm": 25.02202606201172, "learning_rate": 9.873809523809525e-06, "loss": 22.9645, "step": 28937 }, { "epoch": 689.0, "grad_norm": 27.892730712890625, "learning_rate": 9.873469387755103e-06, "loss": 19.7209, "step": 28938 }, { "epoch": 689.0238805970149, "grad_norm": 33.253639221191406, "learning_rate": 9.873129251700682e-06, "loss": 22.9597, "step": 28939 }, { "epoch": 689.0477611940298, "grad_norm": 29.935827255249023, "learning_rate": 9.87278911564626e-06, "loss": 22.3695, "step": 28940 }, { "epoch": 689.0716417910447, "grad_norm": 25.819259643554688, "learning_rate": 9.872448979591838e-06, "loss": 23.5427, "step": 28941 }, { "epoch": 689.0955223880597, "grad_norm": 36.522586822509766, "learning_rate": 9.872108843537415e-06, "loss": 23.3385, "step": 28942 }, { "epoch": 689.1194029850747, "grad_norm": 30.656999588012695, "learning_rate": 9.871768707482994e-06, "loss": 22.8659, "step": 28943 }, { "epoch": 689.1432835820896, "grad_norm": 28.914897918701172, "learning_rate": 9.871428571428572e-06, "loss": 22.8465, "step": 28944 }, { "epoch": 689.1671641791045, "grad_norm": 29.789451599121094, "learning_rate": 9.87108843537415e-06, "loss": 22.6264, "step": 28945 }, { "epoch": 689.1910447761194, "grad_norm": 38.738197326660156, "learning_rate": 9.870748299319729e-06, "loss": 21.8491, "step": 28946 }, { "epoch": 689.2149253731343, "grad_norm": 25.987951278686523, "learning_rate": 9.870408163265307e-06, "loss": 23.0103, "step": 28947 }, { "epoch": 689.2388059701492, "grad_norm": 33.628143310546875, "learning_rate": 9.870068027210886e-06, "loss": 22.5708, "step": 28948 }, { "epoch": 689.2626865671642, "grad_norm": 38.73167037963867, "learning_rate": 9.869727891156462e-06, "loss": 23.2414, "step": 28949 }, { "epoch": 689.2865671641791, "grad_norm": 25.939842224121094, "learning_rate": 9.86938775510204e-06, "loss": 23.2707, "step": 28950 }, { "epoch": 689.310447761194, "grad_norm": 32.4922981262207, "learning_rate": 9.869047619047621e-06, "loss": 23.4697, "step": 28951 }, { "epoch": 689.334328358209, "grad_norm": 29.43461799621582, "learning_rate": 9.868707482993198e-06, "loss": 23.1489, "step": 28952 }, { "epoch": 689.3582089552239, "grad_norm": 30.25980567932129, "learning_rate": 9.868367346938776e-06, "loss": 22.4689, "step": 28953 }, { "epoch": 689.3820895522388, "grad_norm": 25.85740089416504, "learning_rate": 9.868027210884355e-06, "loss": 24.1344, "step": 28954 }, { "epoch": 689.4059701492537, "grad_norm": 31.998262405395508, "learning_rate": 9.867687074829933e-06, "loss": 22.4945, "step": 28955 }, { "epoch": 689.4298507462687, "grad_norm": 28.98504066467285, "learning_rate": 9.867346938775511e-06, "loss": 22.5171, "step": 28956 }, { "epoch": 689.4537313432836, "grad_norm": 28.92347526550293, "learning_rate": 9.86700680272109e-06, "loss": 23.2397, "step": 28957 }, { "epoch": 689.4776119402985, "grad_norm": 31.11577796936035, "learning_rate": 9.866666666666668e-06, "loss": 22.8989, "step": 28958 }, { "epoch": 689.5014925373134, "grad_norm": 27.217498779296875, "learning_rate": 9.866326530612245e-06, "loss": 23.6952, "step": 28959 }, { "epoch": 689.5253731343283, "grad_norm": 34.148231506347656, "learning_rate": 9.865986394557823e-06, "loss": 23.961, "step": 28960 }, { "epoch": 689.5492537313432, "grad_norm": 28.969409942626953, "learning_rate": 9.865646258503402e-06, "loss": 22.7088, "step": 28961 }, { "epoch": 689.5731343283583, "grad_norm": 34.04248046875, "learning_rate": 9.86530612244898e-06, "loss": 23.2493, "step": 28962 }, { "epoch": 689.5970149253732, "grad_norm": 28.480653762817383, "learning_rate": 9.864965986394559e-06, "loss": 24.0328, "step": 28963 }, { "epoch": 689.6208955223881, "grad_norm": 32.251338958740234, "learning_rate": 9.864625850340137e-06, "loss": 23.156, "step": 28964 }, { "epoch": 689.644776119403, "grad_norm": 29.21762466430664, "learning_rate": 9.864285714285715e-06, "loss": 22.6299, "step": 28965 }, { "epoch": 689.6686567164179, "grad_norm": 27.77020835876465, "learning_rate": 9.863945578231294e-06, "loss": 22.5398, "step": 28966 }, { "epoch": 689.6925373134328, "grad_norm": 25.736173629760742, "learning_rate": 9.86360544217687e-06, "loss": 23.2321, "step": 28967 }, { "epoch": 689.7164179104477, "grad_norm": 23.38526725769043, "learning_rate": 9.863265306122449e-06, "loss": 22.1059, "step": 28968 }, { "epoch": 689.7402985074627, "grad_norm": 28.603273391723633, "learning_rate": 9.862925170068029e-06, "loss": 24.2388, "step": 28969 }, { "epoch": 689.7641791044776, "grad_norm": 27.438810348510742, "learning_rate": 9.862585034013606e-06, "loss": 24.0304, "step": 28970 }, { "epoch": 689.7880597014926, "grad_norm": 30.305326461791992, "learning_rate": 9.862244897959184e-06, "loss": 23.0955, "step": 28971 }, { "epoch": 689.8119402985075, "grad_norm": 27.8486385345459, "learning_rate": 9.861904761904763e-06, "loss": 23.0524, "step": 28972 }, { "epoch": 689.8358208955224, "grad_norm": 24.42852783203125, "learning_rate": 9.861564625850341e-06, "loss": 23.937, "step": 28973 }, { "epoch": 689.8597014925373, "grad_norm": 35.22304153442383, "learning_rate": 9.861224489795918e-06, "loss": 23.3946, "step": 28974 }, { "epoch": 689.8835820895522, "grad_norm": 35.04738998413086, "learning_rate": 9.860884353741498e-06, "loss": 23.2271, "step": 28975 }, { "epoch": 689.9074626865672, "grad_norm": 25.32864761352539, "learning_rate": 9.860544217687076e-06, "loss": 22.6905, "step": 28976 }, { "epoch": 689.9313432835821, "grad_norm": 25.277917861938477, "learning_rate": 9.860204081632653e-06, "loss": 23.1296, "step": 28977 }, { "epoch": 689.955223880597, "grad_norm": 24.13591957092285, "learning_rate": 9.859863945578231e-06, "loss": 22.718, "step": 28978 }, { "epoch": 689.9791044776119, "grad_norm": 36.66907501220703, "learning_rate": 9.85952380952381e-06, "loss": 23.2346, "step": 28979 }, { "epoch": 690.0, "grad_norm": 25.918222427368164, "learning_rate": 9.859183673469388e-06, "loss": 19.856, "step": 28980 }, { "epoch": 690.0238805970149, "grad_norm": 24.302431106567383, "learning_rate": 9.858843537414967e-06, "loss": 22.4524, "step": 28981 }, { "epoch": 690.0477611940298, "grad_norm": 33.30189514160156, "learning_rate": 9.858503401360545e-06, "loss": 23.243, "step": 28982 }, { "epoch": 690.0716417910447, "grad_norm": 28.79104232788086, "learning_rate": 9.858163265306124e-06, "loss": 22.6426, "step": 28983 }, { "epoch": 690.0955223880597, "grad_norm": 30.24950408935547, "learning_rate": 9.857823129251702e-06, "loss": 23.1891, "step": 28984 }, { "epoch": 690.1194029850747, "grad_norm": 25.572519302368164, "learning_rate": 9.857482993197279e-06, "loss": 23.7485, "step": 28985 }, { "epoch": 690.1432835820896, "grad_norm": 25.07358741760254, "learning_rate": 9.857142857142859e-06, "loss": 22.3786, "step": 28986 }, { "epoch": 690.1671641791045, "grad_norm": 25.86833953857422, "learning_rate": 9.856802721088437e-06, "loss": 22.6679, "step": 28987 }, { "epoch": 690.1910447761194, "grad_norm": 25.767452239990234, "learning_rate": 9.856462585034014e-06, "loss": 23.1665, "step": 28988 }, { "epoch": 690.2149253731343, "grad_norm": 30.727441787719727, "learning_rate": 9.856122448979592e-06, "loss": 23.3192, "step": 28989 }, { "epoch": 690.2388059701492, "grad_norm": 26.831958770751953, "learning_rate": 9.85578231292517e-06, "loss": 23.6013, "step": 28990 }, { "epoch": 690.2626865671642, "grad_norm": 25.934106826782227, "learning_rate": 9.85544217687075e-06, "loss": 22.3414, "step": 28991 }, { "epoch": 690.2865671641791, "grad_norm": 32.13352584838867, "learning_rate": 9.855102040816328e-06, "loss": 23.6375, "step": 28992 }, { "epoch": 690.310447761194, "grad_norm": 25.453126907348633, "learning_rate": 9.854761904761906e-06, "loss": 23.0109, "step": 28993 }, { "epoch": 690.334328358209, "grad_norm": 37.48974609375, "learning_rate": 9.854421768707485e-06, "loss": 22.9555, "step": 28994 }, { "epoch": 690.3582089552239, "grad_norm": 24.449573516845703, "learning_rate": 9.854081632653061e-06, "loss": 23.1419, "step": 28995 }, { "epoch": 690.3820895522388, "grad_norm": 31.053930282592773, "learning_rate": 9.85374149659864e-06, "loss": 23.3198, "step": 28996 }, { "epoch": 690.4059701492537, "grad_norm": 34.311031341552734, "learning_rate": 9.853401360544218e-06, "loss": 23.572, "step": 28997 }, { "epoch": 690.4298507462687, "grad_norm": 33.56404113769531, "learning_rate": 9.853061224489796e-06, "loss": 23.166, "step": 28998 }, { "epoch": 690.4537313432836, "grad_norm": 29.798994064331055, "learning_rate": 9.852721088435375e-06, "loss": 23.8474, "step": 28999 }, { "epoch": 690.4776119402985, "grad_norm": 28.029386520385742, "learning_rate": 9.852380952380953e-06, "loss": 22.8826, "step": 29000 }, { "epoch": 690.5014925373134, "grad_norm": 31.343652725219727, "learning_rate": 9.852040816326532e-06, "loss": 23.8695, "step": 29001 }, { "epoch": 690.5253731343283, "grad_norm": 29.462364196777344, "learning_rate": 9.85170068027211e-06, "loss": 22.7764, "step": 29002 }, { "epoch": 690.5492537313432, "grad_norm": 25.90390396118164, "learning_rate": 9.851360544217687e-06, "loss": 23.7613, "step": 29003 }, { "epoch": 690.5731343283583, "grad_norm": 28.43182945251465, "learning_rate": 9.851020408163267e-06, "loss": 23.2238, "step": 29004 }, { "epoch": 690.5970149253732, "grad_norm": 27.05205535888672, "learning_rate": 9.850680272108845e-06, "loss": 22.3495, "step": 29005 }, { "epoch": 690.6208955223881, "grad_norm": 25.06647491455078, "learning_rate": 9.850340136054422e-06, "loss": 23.1713, "step": 29006 }, { "epoch": 690.644776119403, "grad_norm": 24.733919143676758, "learning_rate": 9.85e-06, "loss": 22.5105, "step": 29007 }, { "epoch": 690.6686567164179, "grad_norm": 23.670482635498047, "learning_rate": 9.849659863945579e-06, "loss": 21.4381, "step": 29008 }, { "epoch": 690.6925373134328, "grad_norm": 26.67989730834961, "learning_rate": 9.849319727891157e-06, "loss": 23.2182, "step": 29009 }, { "epoch": 690.7164179104477, "grad_norm": 26.428424835205078, "learning_rate": 9.848979591836736e-06, "loss": 23.3282, "step": 29010 }, { "epoch": 690.7402985074627, "grad_norm": 29.374666213989258, "learning_rate": 9.848639455782314e-06, "loss": 22.4196, "step": 29011 }, { "epoch": 690.7641791044776, "grad_norm": 24.23834800720215, "learning_rate": 9.848299319727893e-06, "loss": 23.1251, "step": 29012 }, { "epoch": 690.7880597014926, "grad_norm": 25.69297218322754, "learning_rate": 9.84795918367347e-06, "loss": 22.474, "step": 29013 }, { "epoch": 690.8119402985075, "grad_norm": 25.750696182250977, "learning_rate": 9.847619047619048e-06, "loss": 23.8737, "step": 29014 }, { "epoch": 690.8358208955224, "grad_norm": 30.92823600769043, "learning_rate": 9.847278911564626e-06, "loss": 22.8746, "step": 29015 }, { "epoch": 690.8597014925373, "grad_norm": 24.291704177856445, "learning_rate": 9.846938775510205e-06, "loss": 23.6487, "step": 29016 }, { "epoch": 690.8835820895522, "grad_norm": 30.04545021057129, "learning_rate": 9.846598639455783e-06, "loss": 22.5854, "step": 29017 }, { "epoch": 690.9074626865672, "grad_norm": 24.7747745513916, "learning_rate": 9.846258503401361e-06, "loss": 21.6446, "step": 29018 }, { "epoch": 690.9313432835821, "grad_norm": 27.659006118774414, "learning_rate": 9.84591836734694e-06, "loss": 23.4182, "step": 29019 }, { "epoch": 690.955223880597, "grad_norm": 29.325889587402344, "learning_rate": 9.845578231292517e-06, "loss": 22.7593, "step": 29020 }, { "epoch": 690.9791044776119, "grad_norm": 29.272531509399414, "learning_rate": 9.845238095238097e-06, "loss": 23.9488, "step": 29021 }, { "epoch": 691.0, "grad_norm": 26.603160858154297, "learning_rate": 9.844897959183675e-06, "loss": 21.0188, "step": 29022 }, { "epoch": 691.0238805970149, "grad_norm": 30.043771743774414, "learning_rate": 9.844557823129252e-06, "loss": 22.7496, "step": 29023 }, { "epoch": 691.0477611940298, "grad_norm": 25.64238929748535, "learning_rate": 9.84421768707483e-06, "loss": 23.0674, "step": 29024 }, { "epoch": 691.0716417910447, "grad_norm": 26.876386642456055, "learning_rate": 9.843877551020409e-06, "loss": 22.9185, "step": 29025 }, { "epoch": 691.0955223880597, "grad_norm": 26.804931640625, "learning_rate": 9.843537414965987e-06, "loss": 23.5772, "step": 29026 }, { "epoch": 691.1194029850747, "grad_norm": 30.990528106689453, "learning_rate": 9.843197278911566e-06, "loss": 23.7032, "step": 29027 }, { "epoch": 691.1432835820896, "grad_norm": 32.39432907104492, "learning_rate": 9.842857142857144e-06, "loss": 23.3552, "step": 29028 }, { "epoch": 691.1671641791045, "grad_norm": 26.992929458618164, "learning_rate": 9.842517006802722e-06, "loss": 23.6702, "step": 29029 }, { "epoch": 691.1910447761194, "grad_norm": 29.645200729370117, "learning_rate": 9.8421768707483e-06, "loss": 22.0186, "step": 29030 }, { "epoch": 691.2149253731343, "grad_norm": 26.77189826965332, "learning_rate": 9.841836734693878e-06, "loss": 23.3456, "step": 29031 }, { "epoch": 691.2388059701492, "grad_norm": NaN, "learning_rate": 9.841496598639456e-06, "loss": 19.9385, "step": 29032 }, { "epoch": 691.2626865671642, "grad_norm": 24.629682540893555, "learning_rate": 9.841496598639456e-06, "loss": 22.7305, "step": 29033 }, { "epoch": 691.2865671641791, "grad_norm": 32.3411750793457, "learning_rate": 9.841156462585036e-06, "loss": 22.6102, "step": 29034 }, { "epoch": 691.310447761194, "grad_norm": 33.01729202270508, "learning_rate": 9.840816326530613e-06, "loss": 22.3868, "step": 29035 }, { "epoch": 691.334328358209, "grad_norm": 25.382875442504883, "learning_rate": 9.840476190476191e-06, "loss": 23.1594, "step": 29036 }, { "epoch": 691.3582089552239, "grad_norm": 25.199073791503906, "learning_rate": 9.84013605442177e-06, "loss": 23.2038, "step": 29037 }, { "epoch": 691.3820895522388, "grad_norm": 23.08277130126953, "learning_rate": 9.839795918367348e-06, "loss": 22.6891, "step": 29038 }, { "epoch": 691.4059701492537, "grad_norm": 25.794776916503906, "learning_rate": 9.839455782312925e-06, "loss": 22.4705, "step": 29039 }, { "epoch": 691.4298507462687, "grad_norm": 34.633567810058594, "learning_rate": 9.839115646258505e-06, "loss": 22.6002, "step": 29040 }, { "epoch": 691.4537313432836, "grad_norm": 29.849531173706055, "learning_rate": 9.838775510204083e-06, "loss": 22.6807, "step": 29041 }, { "epoch": 691.4776119402985, "grad_norm": 19.6944522857666, "learning_rate": 9.83843537414966e-06, "loss": 22.7769, "step": 29042 }, { "epoch": 691.5014925373134, "grad_norm": 29.851972579956055, "learning_rate": 9.838095238095238e-06, "loss": 22.4473, "step": 29043 }, { "epoch": 691.5253731343283, "grad_norm": 31.219406127929688, "learning_rate": 9.837755102040817e-06, "loss": 23.4209, "step": 29044 }, { "epoch": 691.5492537313432, "grad_norm": 24.65085792541504, "learning_rate": 9.837414965986395e-06, "loss": 22.9904, "step": 29045 }, { "epoch": 691.5731343283583, "grad_norm": 28.588193893432617, "learning_rate": 9.837074829931974e-06, "loss": 22.3062, "step": 29046 }, { "epoch": 691.5970149253732, "grad_norm": 32.87985610961914, "learning_rate": 9.836734693877552e-06, "loss": 22.4825, "step": 29047 }, { "epoch": 691.6208955223881, "grad_norm": 29.944528579711914, "learning_rate": 9.83639455782313e-06, "loss": 23.2554, "step": 29048 }, { "epoch": 691.644776119403, "grad_norm": 23.048152923583984, "learning_rate": 9.836054421768709e-06, "loss": 23.9785, "step": 29049 }, { "epoch": 691.6686567164179, "grad_norm": 28.646730422973633, "learning_rate": 9.835714285714286e-06, "loss": 23.2486, "step": 29050 }, { "epoch": 691.6925373134328, "grad_norm": 31.539297103881836, "learning_rate": 9.835374149659864e-06, "loss": 23.4091, "step": 29051 }, { "epoch": 691.7164179104477, "grad_norm": 29.27550506591797, "learning_rate": 9.835034013605444e-06, "loss": 23.3797, "step": 29052 }, { "epoch": 691.7402985074627, "grad_norm": 27.32806968688965, "learning_rate": 9.834693877551021e-06, "loss": 22.686, "step": 29053 }, { "epoch": 691.7641791044776, "grad_norm": 26.984790802001953, "learning_rate": 9.8343537414966e-06, "loss": 24.0842, "step": 29054 }, { "epoch": 691.7880597014926, "grad_norm": 35.06270980834961, "learning_rate": 9.834013605442178e-06, "loss": 21.4805, "step": 29055 }, { "epoch": 691.8119402985075, "grad_norm": 25.301509857177734, "learning_rate": 9.833673469387756e-06, "loss": 23.331, "step": 29056 }, { "epoch": 691.8358208955224, "grad_norm": 29.28321075439453, "learning_rate": 9.833333333333333e-06, "loss": 23.1592, "step": 29057 }, { "epoch": 691.8597014925373, "grad_norm": 22.912851333618164, "learning_rate": 9.832993197278913e-06, "loss": 23.3183, "step": 29058 }, { "epoch": 691.8835820895522, "grad_norm": 30.76997184753418, "learning_rate": 9.832653061224491e-06, "loss": 24.1038, "step": 29059 }, { "epoch": 691.9074626865672, "grad_norm": 25.88775062561035, "learning_rate": 9.832312925170068e-06, "loss": 22.9446, "step": 29060 }, { "epoch": 691.9313432835821, "grad_norm": 34.52903366088867, "learning_rate": 9.831972789115647e-06, "loss": 22.8903, "step": 29061 }, { "epoch": 691.955223880597, "grad_norm": 27.08885383605957, "learning_rate": 9.831632653061225e-06, "loss": 22.922, "step": 29062 }, { "epoch": 691.9791044776119, "grad_norm": 26.1579647064209, "learning_rate": 9.831292517006803e-06, "loss": 23.2589, "step": 29063 }, { "epoch": 692.0, "grad_norm": 22.91669273376465, "learning_rate": 9.830952380952382e-06, "loss": 20.0808, "step": 29064 }, { "epoch": 692.0238805970149, "grad_norm": 29.428237915039062, "learning_rate": 9.83061224489796e-06, "loss": 23.3042, "step": 29065 }, { "epoch": 692.0477611940298, "grad_norm": 24.286712646484375, "learning_rate": 9.830272108843539e-06, "loss": 23.0395, "step": 29066 }, { "epoch": 692.0716417910447, "grad_norm": 27.216754913330078, "learning_rate": 9.829931972789115e-06, "loss": 23.2414, "step": 29067 }, { "epoch": 692.0955223880597, "grad_norm": 29.05799102783203, "learning_rate": 9.829591836734694e-06, "loss": 22.2103, "step": 29068 }, { "epoch": 692.1194029850747, "grad_norm": 30.550050735473633, "learning_rate": 9.829251700680274e-06, "loss": 22.47, "step": 29069 }, { "epoch": 692.1432835820896, "grad_norm": 21.48000717163086, "learning_rate": 9.82891156462585e-06, "loss": 22.3153, "step": 29070 }, { "epoch": 692.1671641791045, "grad_norm": 30.830429077148438, "learning_rate": 9.828571428571429e-06, "loss": 23.2145, "step": 29071 }, { "epoch": 692.1910447761194, "grad_norm": 30.271018981933594, "learning_rate": 9.828231292517008e-06, "loss": 22.4132, "step": 29072 }, { "epoch": 692.2149253731343, "grad_norm": 29.221284866333008, "learning_rate": 9.827891156462586e-06, "loss": 23.1211, "step": 29073 }, { "epoch": 692.2388059701492, "grad_norm": 25.26254653930664, "learning_rate": 9.827551020408164e-06, "loss": 22.8108, "step": 29074 }, { "epoch": 692.2626865671642, "grad_norm": 26.74179458618164, "learning_rate": 9.827210884353743e-06, "loss": 23.6223, "step": 29075 }, { "epoch": 692.2865671641791, "grad_norm": 30.64057159423828, "learning_rate": 9.826870748299321e-06, "loss": 23.5086, "step": 29076 }, { "epoch": 692.310447761194, "grad_norm": 27.549074172973633, "learning_rate": 9.8265306122449e-06, "loss": 22.8167, "step": 29077 }, { "epoch": 692.334328358209, "grad_norm": 25.781248092651367, "learning_rate": 9.826190476190476e-06, "loss": 23.0551, "step": 29078 }, { "epoch": 692.3582089552239, "grad_norm": 25.550214767456055, "learning_rate": 9.825850340136055e-06, "loss": 23.3203, "step": 29079 }, { "epoch": 692.3820895522388, "grad_norm": 36.7717399597168, "learning_rate": 9.825510204081633e-06, "loss": 22.9655, "step": 29080 }, { "epoch": 692.4059701492537, "grad_norm": 25.831647872924805, "learning_rate": 9.825170068027212e-06, "loss": 22.9586, "step": 29081 }, { "epoch": 692.4298507462687, "grad_norm": 27.81351089477539, "learning_rate": 9.82482993197279e-06, "loss": 23.0229, "step": 29082 }, { "epoch": 692.4537313432836, "grad_norm": 30.31983757019043, "learning_rate": 9.824489795918368e-06, "loss": 22.5594, "step": 29083 }, { "epoch": 692.4776119402985, "grad_norm": 34.13228988647461, "learning_rate": 9.824149659863947e-06, "loss": 23.6619, "step": 29084 }, { "epoch": 692.5014925373134, "grad_norm": 22.66373062133789, "learning_rate": 9.823809523809524e-06, "loss": 22.3083, "step": 29085 }, { "epoch": 692.5253731343283, "grad_norm": 40.745521545410156, "learning_rate": 9.823469387755102e-06, "loss": 23.3565, "step": 29086 }, { "epoch": 692.5492537313432, "grad_norm": 28.95842170715332, "learning_rate": 9.823129251700682e-06, "loss": 23.0381, "step": 29087 }, { "epoch": 692.5731343283583, "grad_norm": 29.9035587310791, "learning_rate": 9.822789115646259e-06, "loss": 24.0172, "step": 29088 }, { "epoch": 692.5970149253732, "grad_norm": 36.50214385986328, "learning_rate": 9.822448979591837e-06, "loss": 23.2257, "step": 29089 }, { "epoch": 692.6208955223881, "grad_norm": 28.188453674316406, "learning_rate": 9.822108843537416e-06, "loss": 22.5281, "step": 29090 }, { "epoch": 692.644776119403, "grad_norm": 30.609302520751953, "learning_rate": 9.821768707482994e-06, "loss": 22.6516, "step": 29091 }, { "epoch": 692.6686567164179, "grad_norm": 38.306358337402344, "learning_rate": 9.821428571428573e-06, "loss": 22.7882, "step": 29092 }, { "epoch": 692.6925373134328, "grad_norm": 26.31178855895996, "learning_rate": 9.821088435374151e-06, "loss": 22.8671, "step": 29093 }, { "epoch": 692.7164179104477, "grad_norm": 33.32093048095703, "learning_rate": 9.82074829931973e-06, "loss": 24.4963, "step": 29094 }, { "epoch": 692.7402985074627, "grad_norm": 31.224489212036133, "learning_rate": 9.820408163265308e-06, "loss": 22.7215, "step": 29095 }, { "epoch": 692.7641791044776, "grad_norm": 25.81822967529297, "learning_rate": 9.820068027210884e-06, "loss": 23.3316, "step": 29096 }, { "epoch": 692.7880597014926, "grad_norm": 35.09387969970703, "learning_rate": 9.819727891156463e-06, "loss": 22.6954, "step": 29097 }, { "epoch": 692.8119402985075, "grad_norm": 32.71377182006836, "learning_rate": 9.819387755102041e-06, "loss": 23.7672, "step": 29098 }, { "epoch": 692.8358208955224, "grad_norm": 23.70945167541504, "learning_rate": 9.81904761904762e-06, "loss": 22.9033, "step": 29099 }, { "epoch": 692.8597014925373, "grad_norm": 29.369037628173828, "learning_rate": 9.818707482993198e-06, "loss": 23.0372, "step": 29100 }, { "epoch": 692.8835820895522, "grad_norm": 36.50493240356445, "learning_rate": 9.818367346938777e-06, "loss": 22.0062, "step": 29101 }, { "epoch": 692.9074626865672, "grad_norm": 24.410993576049805, "learning_rate": 9.818027210884355e-06, "loss": 23.3713, "step": 29102 }, { "epoch": 692.9313432835821, "grad_norm": 30.583446502685547, "learning_rate": 9.817687074829932e-06, "loss": 22.1425, "step": 29103 }, { "epoch": 692.955223880597, "grad_norm": 37.92950439453125, "learning_rate": 9.81734693877551e-06, "loss": 23.0355, "step": 29104 }, { "epoch": 692.9791044776119, "grad_norm": 24.969684600830078, "learning_rate": 9.81700680272109e-06, "loss": 22.5352, "step": 29105 }, { "epoch": 693.0, "grad_norm": 33.54865646362305, "learning_rate": 9.816666666666667e-06, "loss": 20.4615, "step": 29106 }, { "epoch": 693.0238805970149, "grad_norm": 32.85574722290039, "learning_rate": 9.816326530612245e-06, "loss": 22.5144, "step": 29107 }, { "epoch": 693.0477611940298, "grad_norm": 27.211519241333008, "learning_rate": 9.815986394557824e-06, "loss": 22.317, "step": 29108 }, { "epoch": 693.0716417910447, "grad_norm": 44.192630767822266, "learning_rate": 9.815646258503402e-06, "loss": 22.5866, "step": 29109 }, { "epoch": 693.0955223880597, "grad_norm": 29.298667907714844, "learning_rate": 9.81530612244898e-06, "loss": 22.9474, "step": 29110 }, { "epoch": 693.1194029850747, "grad_norm": 47.820159912109375, "learning_rate": 9.814965986394559e-06, "loss": 23.2071, "step": 29111 }, { "epoch": 693.1432835820896, "grad_norm": 33.10763931274414, "learning_rate": 9.814625850340137e-06, "loss": 23.2774, "step": 29112 }, { "epoch": 693.1671641791045, "grad_norm": 53.21034622192383, "learning_rate": 9.814285714285716e-06, "loss": 23.1432, "step": 29113 }, { "epoch": 693.1910447761194, "grad_norm": 36.5284423828125, "learning_rate": 9.813945578231293e-06, "loss": 23.1313, "step": 29114 }, { "epoch": 693.2149253731343, "grad_norm": 55.2823486328125, "learning_rate": 9.813605442176871e-06, "loss": 22.9838, "step": 29115 }, { "epoch": 693.2388059701492, "grad_norm": 42.07659149169922, "learning_rate": 9.81326530612245e-06, "loss": 23.0124, "step": 29116 }, { "epoch": 693.2626865671642, "grad_norm": 56.57536697387695, "learning_rate": 9.812925170068028e-06, "loss": 22.5684, "step": 29117 }, { "epoch": 693.2865671641791, "grad_norm": 45.83025360107422, "learning_rate": 9.812585034013606e-06, "loss": 22.5821, "step": 29118 }, { "epoch": 693.310447761194, "grad_norm": 55.283241271972656, "learning_rate": 9.812244897959185e-06, "loss": 22.8112, "step": 29119 }, { "epoch": 693.334328358209, "grad_norm": 53.340553283691406, "learning_rate": 9.811904761904763e-06, "loss": 22.4972, "step": 29120 }, { "epoch": 693.3582089552239, "grad_norm": 49.77436828613281, "learning_rate": 9.81156462585034e-06, "loss": 23.5039, "step": 29121 }, { "epoch": 693.3820895522388, "grad_norm": 48.948726654052734, "learning_rate": 9.81122448979592e-06, "loss": 22.8128, "step": 29122 }, { "epoch": 693.4059701492537, "grad_norm": 42.77883529663086, "learning_rate": 9.810884353741498e-06, "loss": 22.48, "step": 29123 }, { "epoch": 693.4298507462687, "grad_norm": 36.69826889038086, "learning_rate": 9.810544217687075e-06, "loss": 23.3724, "step": 29124 }, { "epoch": 693.4537313432836, "grad_norm": 49.922027587890625, "learning_rate": 9.810204081632654e-06, "loss": 23.3083, "step": 29125 }, { "epoch": 693.4776119402985, "grad_norm": 35.7457275390625, "learning_rate": 9.809863945578232e-06, "loss": 22.5874, "step": 29126 }, { "epoch": 693.5014925373134, "grad_norm": 54.978729248046875, "learning_rate": 9.80952380952381e-06, "loss": 22.5083, "step": 29127 }, { "epoch": 693.5253731343283, "grad_norm": 44.69696807861328, "learning_rate": 9.809183673469389e-06, "loss": 23.1379, "step": 29128 }, { "epoch": 693.5492537313432, "grad_norm": 50.6873893737793, "learning_rate": 9.808843537414967e-06, "loss": 22.3352, "step": 29129 }, { "epoch": 693.5731343283583, "grad_norm": 48.67112350463867, "learning_rate": 9.808503401360546e-06, "loss": 23.898, "step": 29130 }, { "epoch": 693.5970149253732, "grad_norm": 50.83612060546875, "learning_rate": 9.808163265306122e-06, "loss": 22.8918, "step": 29131 }, { "epoch": 693.6208955223881, "grad_norm": 45.25410842895508, "learning_rate": 9.8078231292517e-06, "loss": 22.9339, "step": 29132 }, { "epoch": 693.644776119403, "grad_norm": 53.926414489746094, "learning_rate": 9.80748299319728e-06, "loss": 23.3475, "step": 29133 }, { "epoch": 693.6686567164179, "grad_norm": 51.02803039550781, "learning_rate": 9.807142857142858e-06, "loss": 22.912, "step": 29134 }, { "epoch": 693.6925373134328, "grad_norm": 43.22723388671875, "learning_rate": 9.806802721088436e-06, "loss": 22.9497, "step": 29135 }, { "epoch": 693.7164179104477, "grad_norm": 43.770626068115234, "learning_rate": 9.806462585034014e-06, "loss": 23.6188, "step": 29136 }, { "epoch": 693.7402985074627, "grad_norm": 46.61272430419922, "learning_rate": 9.806122448979593e-06, "loss": 22.9863, "step": 29137 }, { "epoch": 693.7641791044776, "grad_norm": 38.38888931274414, "learning_rate": 9.805782312925171e-06, "loss": 23.8419, "step": 29138 }, { "epoch": 693.7880597014926, "grad_norm": 51.617156982421875, "learning_rate": 9.805442176870748e-06, "loss": 22.6563, "step": 29139 }, { "epoch": 693.8119402985075, "grad_norm": 48.4527587890625, "learning_rate": 9.805102040816328e-06, "loss": 23.814, "step": 29140 }, { "epoch": 693.8358208955224, "grad_norm": 42.700233459472656, "learning_rate": 9.804761904761907e-06, "loss": 21.7158, "step": 29141 }, { "epoch": 693.8597014925373, "grad_norm": 41.52580261230469, "learning_rate": 9.804421768707483e-06, "loss": 22.7918, "step": 29142 }, { "epoch": 693.8835820895522, "grad_norm": 48.55147933959961, "learning_rate": 9.804081632653062e-06, "loss": 22.9832, "step": 29143 }, { "epoch": 693.9074626865672, "grad_norm": 38.981712341308594, "learning_rate": 9.80374149659864e-06, "loss": 22.6733, "step": 29144 }, { "epoch": 693.9313432835821, "grad_norm": 56.950599670410156, "learning_rate": 9.803401360544219e-06, "loss": 22.9848, "step": 29145 }, { "epoch": 693.955223880597, "grad_norm": 50.76698684692383, "learning_rate": 9.803061224489797e-06, "loss": 23.2646, "step": 29146 }, { "epoch": 693.9791044776119, "grad_norm": 42.293514251708984, "learning_rate": 9.802721088435375e-06, "loss": 23.8157, "step": 29147 }, { "epoch": 694.0, "grad_norm": 33.96970748901367, "learning_rate": 9.802380952380954e-06, "loss": 19.7281, "step": 29148 }, { "epoch": 694.0238805970149, "grad_norm": 45.68949890136719, "learning_rate": 9.80204081632653e-06, "loss": 23.0148, "step": 29149 }, { "epoch": 694.0477611940298, "grad_norm": 34.72135925292969, "learning_rate": 9.801700680272109e-06, "loss": 23.4391, "step": 29150 }, { "epoch": 694.0716417910447, "grad_norm": 54.89358901977539, "learning_rate": 9.801360544217687e-06, "loss": 23.6948, "step": 29151 }, { "epoch": 694.0955223880597, "grad_norm": 50.48374557495117, "learning_rate": 9.801020408163266e-06, "loss": 23.6358, "step": 29152 }, { "epoch": 694.1194029850747, "grad_norm": 42.48683547973633, "learning_rate": 9.800680272108844e-06, "loss": 22.2329, "step": 29153 }, { "epoch": 694.1432835820896, "grad_norm": 42.115089416503906, "learning_rate": 9.800340136054423e-06, "loss": 23.1352, "step": 29154 }, { "epoch": 694.1671641791045, "grad_norm": 44.595237731933594, "learning_rate": 9.800000000000001e-06, "loss": 23.0098, "step": 29155 }, { "epoch": 694.1910447761194, "grad_norm": 39.47611618041992, "learning_rate": 9.79965986394558e-06, "loss": 21.9889, "step": 29156 }, { "epoch": 694.2149253731343, "grad_norm": 50.10950469970703, "learning_rate": 9.799319727891158e-06, "loss": 22.7376, "step": 29157 }, { "epoch": 694.2388059701492, "grad_norm": 40.65927505493164, "learning_rate": 9.798979591836736e-06, "loss": 23.0918, "step": 29158 }, { "epoch": 694.2626865671642, "grad_norm": 47.1022834777832, "learning_rate": 9.798639455782315e-06, "loss": 23.0711, "step": 29159 }, { "epoch": 694.2865671641791, "grad_norm": 38.0605583190918, "learning_rate": 9.798299319727891e-06, "loss": 22.9091, "step": 29160 }, { "epoch": 694.310447761194, "grad_norm": 50.511295318603516, "learning_rate": 9.79795918367347e-06, "loss": 22.9668, "step": 29161 }, { "epoch": 694.334328358209, "grad_norm": 42.0439453125, "learning_rate": 9.797619047619048e-06, "loss": 23.0894, "step": 29162 }, { "epoch": 694.3582089552239, "grad_norm": 48.42329788208008, "learning_rate": 9.797278911564627e-06, "loss": 22.2022, "step": 29163 }, { "epoch": 694.3820895522388, "grad_norm": 43.07917785644531, "learning_rate": 9.796938775510205e-06, "loss": 23.1048, "step": 29164 }, { "epoch": 694.4059701492537, "grad_norm": 46.0787239074707, "learning_rate": 9.796598639455784e-06, "loss": 22.2524, "step": 29165 }, { "epoch": 694.4298507462687, "grad_norm": 39.69488525390625, "learning_rate": 9.796258503401362e-06, "loss": 23.1487, "step": 29166 }, { "epoch": 694.4537313432836, "grad_norm": 47.07962417602539, "learning_rate": 9.795918367346939e-06, "loss": 22.9049, "step": 29167 }, { "epoch": 694.4776119402985, "grad_norm": 43.161869049072266, "learning_rate": 9.795578231292517e-06, "loss": 22.4811, "step": 29168 }, { "epoch": 694.5014925373134, "grad_norm": 46.963592529296875, "learning_rate": 9.795238095238097e-06, "loss": 23.106, "step": 29169 }, { "epoch": 694.5253731343283, "grad_norm": 41.72783279418945, "learning_rate": 9.794897959183674e-06, "loss": 22.4495, "step": 29170 }, { "epoch": 694.5492537313432, "grad_norm": 46.52138137817383, "learning_rate": 9.794557823129252e-06, "loss": 23.025, "step": 29171 }, { "epoch": 694.5731343283583, "grad_norm": 43.17749786376953, "learning_rate": 9.79421768707483e-06, "loss": 23.1422, "step": 29172 }, { "epoch": 694.5970149253732, "grad_norm": 45.0753059387207, "learning_rate": 9.79387755102041e-06, "loss": 22.9483, "step": 29173 }, { "epoch": 694.6208955223881, "grad_norm": 41.87132263183594, "learning_rate": 9.793537414965986e-06, "loss": 22.5668, "step": 29174 }, { "epoch": 694.644776119403, "grad_norm": 46.31102752685547, "learning_rate": 9.793197278911566e-06, "loss": 22.4298, "step": 29175 }, { "epoch": 694.6686567164179, "grad_norm": 40.05924987792969, "learning_rate": 9.792857142857144e-06, "loss": 23.5913, "step": 29176 }, { "epoch": 694.6925373134328, "grad_norm": 46.547157287597656, "learning_rate": 9.792517006802721e-06, "loss": 22.655, "step": 29177 }, { "epoch": 694.7164179104477, "grad_norm": 42.961002349853516, "learning_rate": 9.7921768707483e-06, "loss": 23.0081, "step": 29178 }, { "epoch": 694.7402985074627, "grad_norm": 48.21125030517578, "learning_rate": 9.791836734693878e-06, "loss": 22.4425, "step": 29179 }, { "epoch": 694.7641791044776, "grad_norm": 39.509220123291016, "learning_rate": 9.791496598639456e-06, "loss": 23.0719, "step": 29180 }, { "epoch": 694.7880597014926, "grad_norm": 45.594520568847656, "learning_rate": 9.791156462585035e-06, "loss": 22.9465, "step": 29181 }, { "epoch": 694.8119402985075, "grad_norm": 41.25763702392578, "learning_rate": 9.790816326530613e-06, "loss": 22.7167, "step": 29182 }, { "epoch": 694.8358208955224, "grad_norm": 46.65616989135742, "learning_rate": 9.790476190476192e-06, "loss": 23.5161, "step": 29183 }, { "epoch": 694.8597014925373, "grad_norm": 43.469268798828125, "learning_rate": 9.79013605442177e-06, "loss": 23.5943, "step": 29184 }, { "epoch": 694.8835820895522, "grad_norm": 43.48239517211914, "learning_rate": 9.789795918367347e-06, "loss": 22.3451, "step": 29185 }, { "epoch": 694.9074626865672, "grad_norm": 40.82613754272461, "learning_rate": 9.789455782312925e-06, "loss": 23.2232, "step": 29186 }, { "epoch": 694.9313432835821, "grad_norm": 49.004112243652344, "learning_rate": 9.789115646258505e-06, "loss": 23.2477, "step": 29187 }, { "epoch": 694.955223880597, "grad_norm": 41.37343215942383, "learning_rate": 9.788775510204082e-06, "loss": 22.6128, "step": 29188 }, { "epoch": 694.9791044776119, "grad_norm": 41.602142333984375, "learning_rate": 9.78843537414966e-06, "loss": 22.3049, "step": 29189 }, { "epoch": 695.0, "grad_norm": 36.460235595703125, "learning_rate": 9.788095238095239e-06, "loss": 19.9536, "step": 29190 }, { "epoch": 695.0238805970149, "grad_norm": 39.70573806762695, "learning_rate": 9.787755102040817e-06, "loss": 23.192, "step": 29191 }, { "epoch": 695.0477611940298, "grad_norm": 33.30185317993164, "learning_rate": 9.787414965986394e-06, "loss": 23.0282, "step": 29192 }, { "epoch": 695.0716417910447, "grad_norm": 43.31639862060547, "learning_rate": 9.787074829931974e-06, "loss": 22.3109, "step": 29193 }, { "epoch": 695.0955223880597, "grad_norm": 38.757171630859375, "learning_rate": 9.786734693877553e-06, "loss": 22.6346, "step": 29194 }, { "epoch": 695.1194029850747, "grad_norm": 50.39634323120117, "learning_rate": 9.78639455782313e-06, "loss": 22.8437, "step": 29195 }, { "epoch": 695.1432835820896, "grad_norm": 40.38139724731445, "learning_rate": 9.786054421768708e-06, "loss": 22.9246, "step": 29196 }, { "epoch": 695.1671641791045, "grad_norm": 42.07522201538086, "learning_rate": 9.785714285714286e-06, "loss": 22.8353, "step": 29197 }, { "epoch": 695.1910447761194, "grad_norm": 41.092918395996094, "learning_rate": 9.785374149659865e-06, "loss": 23.066, "step": 29198 }, { "epoch": 695.2149253731343, "grad_norm": 40.56991958618164, "learning_rate": 9.785034013605443e-06, "loss": 23.4319, "step": 29199 }, { "epoch": 695.2388059701492, "grad_norm": 33.77806854248047, "learning_rate": 9.784693877551021e-06, "loss": 22.1911, "step": 29200 }, { "epoch": 695.2626865671642, "grad_norm": 42.342079162597656, "learning_rate": 9.7843537414966e-06, "loss": 22.3416, "step": 29201 }, { "epoch": 695.2865671641791, "grad_norm": 32.74491500854492, "learning_rate": 9.784013605442178e-06, "loss": 22.4391, "step": 29202 }, { "epoch": 695.310447761194, "grad_norm": 44.48106002807617, "learning_rate": 9.783673469387755e-06, "loss": 22.8024, "step": 29203 }, { "epoch": 695.334328358209, "grad_norm": 33.895511627197266, "learning_rate": 9.783333333333335e-06, "loss": 22.3265, "step": 29204 }, { "epoch": 695.3582089552239, "grad_norm": 44.87157440185547, "learning_rate": 9.782993197278914e-06, "loss": 23.3132, "step": 29205 }, { "epoch": 695.3820895522388, "grad_norm": 36.71040344238281, "learning_rate": 9.78265306122449e-06, "loss": 22.6187, "step": 29206 }, { "epoch": 695.4059701492537, "grad_norm": 48.816410064697266, "learning_rate": 9.782312925170069e-06, "loss": 22.4906, "step": 29207 }, { "epoch": 695.4298507462687, "grad_norm": 38.5993537902832, "learning_rate": 9.781972789115647e-06, "loss": 22.6582, "step": 29208 }, { "epoch": 695.4537313432836, "grad_norm": 42.74947738647461, "learning_rate": 9.781632653061225e-06, "loss": 23.0346, "step": 29209 }, { "epoch": 695.4776119402985, "grad_norm": 35.37027359008789, "learning_rate": 9.781292517006804e-06, "loss": 23.0214, "step": 29210 }, { "epoch": 695.5014925373134, "grad_norm": 47.346221923828125, "learning_rate": 9.780952380952382e-06, "loss": 22.9138, "step": 29211 }, { "epoch": 695.5253731343283, "grad_norm": NaN, "learning_rate": 9.78061224489796e-06, "loss": 34.8075, "step": 29212 }, { "epoch": 695.5492537313432, "grad_norm": 37.8542594909668, "learning_rate": 9.78061224489796e-06, "loss": 23.3525, "step": 29213 }, { "epoch": 695.5731343283583, "grad_norm": 49.316314697265625, "learning_rate": 9.780272108843537e-06, "loss": 22.6447, "step": 29214 }, { "epoch": 695.5970149253732, "grad_norm": 39.95734786987305, "learning_rate": 9.779931972789116e-06, "loss": 22.6713, "step": 29215 }, { "epoch": 695.6208955223881, "grad_norm": 45.175628662109375, "learning_rate": 9.779591836734694e-06, "loss": 23.9838, "step": 29216 }, { "epoch": 695.644776119403, "grad_norm": 39.788856506347656, "learning_rate": 9.779251700680273e-06, "loss": 22.2439, "step": 29217 }, { "epoch": 695.6686567164179, "grad_norm": 42.292564392089844, "learning_rate": 9.778911564625851e-06, "loss": 23.2486, "step": 29218 }, { "epoch": 695.6925373134328, "grad_norm": 36.41720199584961, "learning_rate": 9.77857142857143e-06, "loss": 22.5198, "step": 29219 }, { "epoch": 695.7164179104477, "grad_norm": 41.9666748046875, "learning_rate": 9.778231292517008e-06, "loss": 23.0741, "step": 29220 }, { "epoch": 695.7402985074627, "grad_norm": 34.84041213989258, "learning_rate": 9.777891156462586e-06, "loss": 23.1064, "step": 29221 }, { "epoch": 695.7641791044776, "grad_norm": 45.9777946472168, "learning_rate": 9.777551020408163e-06, "loss": 23.6415, "step": 29222 }, { "epoch": 695.7880597014926, "grad_norm": 38.385719299316406, "learning_rate": 9.777210884353743e-06, "loss": 23.4668, "step": 29223 }, { "epoch": 695.8119402985075, "grad_norm": 42.717430114746094, "learning_rate": 9.77687074829932e-06, "loss": 22.1389, "step": 29224 }, { "epoch": 695.8358208955224, "grad_norm": 39.49242401123047, "learning_rate": 9.776530612244898e-06, "loss": 22.1925, "step": 29225 }, { "epoch": 695.8597014925373, "grad_norm": 44.830474853515625, "learning_rate": 9.776190476190477e-06, "loss": 24.2487, "step": 29226 }, { "epoch": 695.8835820895522, "grad_norm": 39.20085525512695, "learning_rate": 9.775850340136055e-06, "loss": 23.5398, "step": 29227 }, { "epoch": 695.9074626865672, "grad_norm": 43.6811408996582, "learning_rate": 9.775510204081634e-06, "loss": 22.8036, "step": 29228 }, { "epoch": 695.9313432835821, "grad_norm": 36.44725036621094, "learning_rate": 9.775170068027212e-06, "loss": 23.2414, "step": 29229 }, { "epoch": 695.955223880597, "grad_norm": 46.64715576171875, "learning_rate": 9.77482993197279e-06, "loss": 22.2382, "step": 29230 }, { "epoch": 695.9791044776119, "grad_norm": 36.252864837646484, "learning_rate": 9.774489795918369e-06, "loss": 22.9804, "step": 29231 }, { "epoch": 696.0, "grad_norm": 35.001644134521484, "learning_rate": 9.774149659863946e-06, "loss": 20.3234, "step": 29232 }, { "epoch": 696.0238805970149, "grad_norm": 35.89237976074219, "learning_rate": 9.773809523809524e-06, "loss": 22.3264, "step": 29233 }, { "epoch": 696.0477611940298, "grad_norm": 38.17445373535156, "learning_rate": 9.773469387755102e-06, "loss": 22.7237, "step": 29234 }, { "epoch": 696.0716417910447, "grad_norm": 34.36155319213867, "learning_rate": 9.773129251700681e-06, "loss": 22.5519, "step": 29235 }, { "epoch": 696.0955223880597, "grad_norm": 29.85586166381836, "learning_rate": 9.77278911564626e-06, "loss": 21.7849, "step": 29236 }, { "epoch": 696.1194029850747, "grad_norm": 31.526840209960938, "learning_rate": 9.772448979591838e-06, "loss": 23.0259, "step": 29237 }, { "epoch": 696.1432835820896, "grad_norm": 35.00136947631836, "learning_rate": 9.772108843537416e-06, "loss": 23.054, "step": 29238 }, { "epoch": 696.1671641791045, "grad_norm": 31.977441787719727, "learning_rate": 9.771768707482993e-06, "loss": 22.9483, "step": 29239 }, { "epoch": 696.1910447761194, "grad_norm": 28.92215347290039, "learning_rate": 9.771428571428571e-06, "loss": 22.2151, "step": 29240 }, { "epoch": 696.2149253731343, "grad_norm": 29.50923728942871, "learning_rate": 9.771088435374151e-06, "loss": 22.9256, "step": 29241 }, { "epoch": 696.2388059701492, "grad_norm": 33.56807327270508, "learning_rate": 9.770748299319728e-06, "loss": 22.8388, "step": 29242 }, { "epoch": 696.2626865671642, "grad_norm": 28.304264068603516, "learning_rate": 9.770408163265307e-06, "loss": 23.1995, "step": 29243 }, { "epoch": 696.2865671641791, "grad_norm": 37.42516326904297, "learning_rate": 9.770068027210885e-06, "loss": 23.0, "step": 29244 }, { "epoch": 696.310447761194, "grad_norm": 29.442203521728516, "learning_rate": 9.769727891156463e-06, "loss": 23.784, "step": 29245 }, { "epoch": 696.334328358209, "grad_norm": 36.60150909423828, "learning_rate": 9.769387755102042e-06, "loss": 23.4861, "step": 29246 }, { "epoch": 696.3582089552239, "grad_norm": 30.4171199798584, "learning_rate": 9.76904761904762e-06, "loss": 22.4425, "step": 29247 }, { "epoch": 696.3820895522388, "grad_norm": 30.61048698425293, "learning_rate": 9.768707482993199e-06, "loss": 22.502, "step": 29248 }, { "epoch": 696.4059701492537, "grad_norm": 30.885726928710938, "learning_rate": 9.768367346938777e-06, "loss": 23.7751, "step": 29249 }, { "epoch": 696.4298507462687, "grad_norm": 31.200374603271484, "learning_rate": 9.768027210884354e-06, "loss": 23.2586, "step": 29250 }, { "epoch": 696.4537313432836, "grad_norm": 28.277647018432617, "learning_rate": 9.767687074829932e-06, "loss": 23.121, "step": 29251 }, { "epoch": 696.4776119402985, "grad_norm": 27.891340255737305, "learning_rate": 9.767346938775512e-06, "loss": 21.9771, "step": 29252 }, { "epoch": 696.5014925373134, "grad_norm": 27.59401512145996, "learning_rate": 9.767006802721089e-06, "loss": 22.6373, "step": 29253 }, { "epoch": 696.5253731343283, "grad_norm": 27.61656379699707, "learning_rate": 9.766666666666667e-06, "loss": 23.5602, "step": 29254 }, { "epoch": 696.5492537313432, "grad_norm": 30.036243438720703, "learning_rate": 9.766326530612246e-06, "loss": 22.2125, "step": 29255 }, { "epoch": 696.5731343283583, "grad_norm": 29.699831008911133, "learning_rate": 9.765986394557824e-06, "loss": 23.1017, "step": 29256 }, { "epoch": 696.5970149253732, "grad_norm": 26.369413375854492, "learning_rate": 9.765646258503401e-06, "loss": 22.9251, "step": 29257 }, { "epoch": 696.6208955223881, "grad_norm": 26.142826080322266, "learning_rate": 9.765306122448981e-06, "loss": 23.0205, "step": 29258 }, { "epoch": 696.644776119403, "grad_norm": 28.22520637512207, "learning_rate": 9.76496598639456e-06, "loss": 23.5891, "step": 29259 }, { "epoch": 696.6686567164179, "grad_norm": 28.244569778442383, "learning_rate": 9.764625850340136e-06, "loss": 22.8619, "step": 29260 }, { "epoch": 696.6925373134328, "grad_norm": 30.083724975585938, "learning_rate": 9.764285714285715e-06, "loss": 23.4779, "step": 29261 }, { "epoch": 696.7164179104477, "grad_norm": 24.93596839904785, "learning_rate": 9.763945578231293e-06, "loss": 22.4585, "step": 29262 }, { "epoch": 696.7402985074627, "grad_norm": 27.18376922607422, "learning_rate": 9.763605442176872e-06, "loss": 22.9053, "step": 29263 }, { "epoch": 696.7641791044776, "grad_norm": 25.178682327270508, "learning_rate": 9.76326530612245e-06, "loss": 22.5647, "step": 29264 }, { "epoch": 696.7880597014926, "grad_norm": 25.59387969970703, "learning_rate": 9.762925170068028e-06, "loss": 22.6505, "step": 29265 }, { "epoch": 696.8119402985075, "grad_norm": 27.902055740356445, "learning_rate": 9.762585034013607e-06, "loss": 23.1219, "step": 29266 }, { "epoch": 696.8358208955224, "grad_norm": 27.51023292541504, "learning_rate": 9.762244897959185e-06, "loss": 23.1946, "step": 29267 }, { "epoch": 696.8597014925373, "grad_norm": 27.384279251098633, "learning_rate": 9.761904761904762e-06, "loss": 22.5112, "step": 29268 }, { "epoch": 696.8835820895522, "grad_norm": 25.64081573486328, "learning_rate": 9.76156462585034e-06, "loss": 22.9608, "step": 29269 }, { "epoch": 696.9074626865672, "grad_norm": 24.78345489501953, "learning_rate": 9.76122448979592e-06, "loss": 23.231, "step": 29270 }, { "epoch": 696.9313432835821, "grad_norm": 27.714008331298828, "learning_rate": 9.760884353741497e-06, "loss": 22.6677, "step": 29271 }, { "epoch": 696.955223880597, "grad_norm": 27.814226150512695, "learning_rate": 9.760544217687076e-06, "loss": 23.1937, "step": 29272 }, { "epoch": 696.9791044776119, "grad_norm": 29.36530303955078, "learning_rate": 9.760204081632654e-06, "loss": 23.687, "step": 29273 }, { "epoch": 697.0, "grad_norm": 23.62784194946289, "learning_rate": 9.759863945578232e-06, "loss": 19.6463, "step": 29274 }, { "epoch": 697.0238805970149, "grad_norm": 28.395694732666016, "learning_rate": 9.75952380952381e-06, "loss": 22.6501, "step": 29275 }, { "epoch": 697.0477611940298, "grad_norm": 27.082197189331055, "learning_rate": 9.75918367346939e-06, "loss": 22.9414, "step": 29276 }, { "epoch": 697.0716417910447, "grad_norm": 26.627424240112305, "learning_rate": 9.758843537414968e-06, "loss": 23.0479, "step": 29277 }, { "epoch": 697.0955223880597, "grad_norm": 28.58576202392578, "learning_rate": 9.758503401360544e-06, "loss": 22.4269, "step": 29278 }, { "epoch": 697.1194029850747, "grad_norm": 28.73088264465332, "learning_rate": 9.758163265306123e-06, "loss": 23.241, "step": 29279 }, { "epoch": 697.1432835820896, "grad_norm": 31.179410934448242, "learning_rate": 9.757823129251701e-06, "loss": 21.4744, "step": 29280 }, { "epoch": 697.1671641791045, "grad_norm": 26.667085647583008, "learning_rate": 9.75748299319728e-06, "loss": 23.3482, "step": 29281 }, { "epoch": 697.1910447761194, "grad_norm": 31.696393966674805, "learning_rate": 9.757142857142858e-06, "loss": 23.0976, "step": 29282 }, { "epoch": 697.2149253731343, "grad_norm": 28.84362030029297, "learning_rate": 9.756802721088437e-06, "loss": 23.6862, "step": 29283 }, { "epoch": 697.2388059701492, "grad_norm": 25.152624130249023, "learning_rate": 9.756462585034015e-06, "loss": 22.3527, "step": 29284 }, { "epoch": 697.2626865671642, "grad_norm": 30.517969131469727, "learning_rate": 9.756122448979592e-06, "loss": 22.8016, "step": 29285 }, { "epoch": 697.2865671641791, "grad_norm": NaN, "learning_rate": 9.75578231292517e-06, "loss": 25.0975, "step": 29286 }, { "epoch": 697.310447761194, "grad_norm": 27.61777114868164, "learning_rate": 9.75578231292517e-06, "loss": 22.6783, "step": 29287 }, { "epoch": 697.334328358209, "grad_norm": 29.78957748413086, "learning_rate": 9.755442176870749e-06, "loss": 22.4589, "step": 29288 }, { "epoch": 697.3582089552239, "grad_norm": 24.590017318725586, "learning_rate": 9.755102040816327e-06, "loss": 22.5892, "step": 29289 }, { "epoch": 697.3820895522388, "grad_norm": 29.439428329467773, "learning_rate": 9.754761904761905e-06, "loss": 22.206, "step": 29290 }, { "epoch": 697.4059701492537, "grad_norm": 26.491119384765625, "learning_rate": 9.754421768707484e-06, "loss": 22.9459, "step": 29291 }, { "epoch": 697.4298507462687, "grad_norm": NaN, "learning_rate": 9.754081632653062e-06, "loss": 19.8971, "step": 29292 }, { "epoch": 697.4537313432836, "grad_norm": 25.267934799194336, "learning_rate": 9.754081632653062e-06, "loss": 23.3567, "step": 29293 }, { "epoch": 697.4776119402985, "grad_norm": 25.872356414794922, "learning_rate": 9.75374149659864e-06, "loss": 22.5923, "step": 29294 }, { "epoch": 697.5014925373134, "grad_norm": 29.871435165405273, "learning_rate": 9.753401360544217e-06, "loss": 23.0769, "step": 29295 }, { "epoch": 697.5253731343283, "grad_norm": 26.038774490356445, "learning_rate": 9.753061224489797e-06, "loss": 23.4076, "step": 29296 }, { "epoch": 697.5492537313432, "grad_norm": 25.70086097717285, "learning_rate": 9.752721088435376e-06, "loss": 21.829, "step": 29297 }, { "epoch": 697.5731343283583, "grad_norm": 26.78814125061035, "learning_rate": 9.752380952380953e-06, "loss": 22.9032, "step": 29298 }, { "epoch": 697.5970149253732, "grad_norm": 27.870344161987305, "learning_rate": 9.752040816326531e-06, "loss": 22.8345, "step": 29299 }, { "epoch": 697.6208955223881, "grad_norm": 24.584186553955078, "learning_rate": 9.75170068027211e-06, "loss": 22.4049, "step": 29300 }, { "epoch": 697.644776119403, "grad_norm": 26.46942138671875, "learning_rate": 9.751360544217688e-06, "loss": 22.2912, "step": 29301 }, { "epoch": 697.6686567164179, "grad_norm": 24.772159576416016, "learning_rate": 9.751020408163266e-06, "loss": 23.1661, "step": 29302 }, { "epoch": 697.6925373134328, "grad_norm": 30.69268226623535, "learning_rate": 9.750680272108845e-06, "loss": 22.5131, "step": 29303 }, { "epoch": 697.7164179104477, "grad_norm": 26.675861358642578, "learning_rate": 9.750340136054423e-06, "loss": 23.4672, "step": 29304 }, { "epoch": 697.7402985074627, "grad_norm": 26.80335235595703, "learning_rate": 9.75e-06, "loss": 23.0333, "step": 29305 }, { "epoch": 697.7641791044776, "grad_norm": 28.27373695373535, "learning_rate": 9.749659863945578e-06, "loss": 23.3556, "step": 29306 }, { "epoch": 697.7880597014926, "grad_norm": 28.029329299926758, "learning_rate": 9.749319727891158e-06, "loss": 23.2451, "step": 29307 }, { "epoch": 697.8119402985075, "grad_norm": 24.822925567626953, "learning_rate": 9.748979591836735e-06, "loss": 22.969, "step": 29308 }, { "epoch": 697.8358208955224, "grad_norm": 32.82056427001953, "learning_rate": 9.748639455782313e-06, "loss": 23.8989, "step": 29309 }, { "epoch": 697.8597014925373, "grad_norm": 27.247583389282227, "learning_rate": 9.748299319727892e-06, "loss": 22.965, "step": 29310 }, { "epoch": 697.8835820895522, "grad_norm": 27.50613021850586, "learning_rate": 9.74795918367347e-06, "loss": 23.3772, "step": 29311 }, { "epoch": 697.9074626865672, "grad_norm": 30.044292449951172, "learning_rate": 9.747619047619049e-06, "loss": 22.9519, "step": 29312 }, { "epoch": 697.9313432835821, "grad_norm": 29.542579650878906, "learning_rate": 9.747278911564627e-06, "loss": 22.9322, "step": 29313 }, { "epoch": 697.955223880597, "grad_norm": 23.576618194580078, "learning_rate": 9.746938775510206e-06, "loss": 23.2914, "step": 29314 }, { "epoch": 697.9791044776119, "grad_norm": 30.561403274536133, "learning_rate": 9.746598639455784e-06, "loss": 22.8041, "step": 29315 }, { "epoch": 698.0, "grad_norm": 30.4215087890625, "learning_rate": 9.74625850340136e-06, "loss": 20.7953, "step": 29316 }, { "epoch": 698.0238805970149, "grad_norm": 25.07583999633789, "learning_rate": 9.74591836734694e-06, "loss": 23.1698, "step": 29317 }, { "epoch": 698.0477611940298, "grad_norm": 31.756155014038086, "learning_rate": 9.745578231292518e-06, "loss": 22.6835, "step": 29318 }, { "epoch": 698.0716417910447, "grad_norm": 27.672531127929688, "learning_rate": 9.745238095238096e-06, "loss": 23.1252, "step": 29319 }, { "epoch": 698.0955223880597, "grad_norm": 24.19230079650879, "learning_rate": 9.744897959183674e-06, "loss": 22.8527, "step": 29320 }, { "epoch": 698.1194029850747, "grad_norm": 32.214359283447266, "learning_rate": 9.744557823129253e-06, "loss": 23.1977, "step": 29321 }, { "epoch": 698.1432835820896, "grad_norm": 26.116039276123047, "learning_rate": 9.744217687074831e-06, "loss": 22.5609, "step": 29322 }, { "epoch": 698.1671641791045, "grad_norm": 26.4638729095459, "learning_rate": 9.743877551020408e-06, "loss": 22.8625, "step": 29323 }, { "epoch": 698.1910447761194, "grad_norm": 27.01224136352539, "learning_rate": 9.743537414965986e-06, "loss": 22.7533, "step": 29324 }, { "epoch": 698.2149253731343, "grad_norm": 23.803991317749023, "learning_rate": 9.743197278911567e-06, "loss": 22.8453, "step": 29325 }, { "epoch": 698.2388059701492, "grad_norm": 27.0411319732666, "learning_rate": 9.742857142857143e-06, "loss": 22.3546, "step": 29326 }, { "epoch": 698.2626865671642, "grad_norm": 30.936119079589844, "learning_rate": 9.742517006802722e-06, "loss": 22.4813, "step": 29327 }, { "epoch": 698.2865671641791, "grad_norm": 26.67909049987793, "learning_rate": 9.7421768707483e-06, "loss": 22.4222, "step": 29328 }, { "epoch": 698.310447761194, "grad_norm": 25.868793487548828, "learning_rate": 9.741836734693878e-06, "loss": 23.1975, "step": 29329 }, { "epoch": 698.334328358209, "grad_norm": 27.028865814208984, "learning_rate": 9.741496598639457e-06, "loss": 22.6223, "step": 29330 }, { "epoch": 698.3582089552239, "grad_norm": 27.640270233154297, "learning_rate": 9.741156462585035e-06, "loss": 23.2214, "step": 29331 }, { "epoch": 698.3820895522388, "grad_norm": 25.11690902709961, "learning_rate": 9.740816326530614e-06, "loss": 23.6502, "step": 29332 }, { "epoch": 698.4059701492537, "grad_norm": 24.512575149536133, "learning_rate": 9.74047619047619e-06, "loss": 22.5654, "step": 29333 }, { "epoch": 698.4298507462687, "grad_norm": 28.181232452392578, "learning_rate": 9.740136054421769e-06, "loss": 22.7479, "step": 29334 }, { "epoch": 698.4537313432836, "grad_norm": 24.24068260192871, "learning_rate": 9.739795918367347e-06, "loss": 21.9526, "step": 29335 }, { "epoch": 698.4776119402985, "grad_norm": 28.386451721191406, "learning_rate": 9.739455782312926e-06, "loss": 23.3331, "step": 29336 }, { "epoch": 698.5014925373134, "grad_norm": 28.12354850769043, "learning_rate": 9.739115646258504e-06, "loss": 22.39, "step": 29337 }, { "epoch": 698.5253731343283, "grad_norm": 26.079349517822266, "learning_rate": 9.738775510204083e-06, "loss": 22.6566, "step": 29338 }, { "epoch": 698.5492537313432, "grad_norm": 29.260616302490234, "learning_rate": 9.738435374149661e-06, "loss": 23.3024, "step": 29339 }, { "epoch": 698.5731343283583, "grad_norm": 25.560104370117188, "learning_rate": 9.73809523809524e-06, "loss": 22.6366, "step": 29340 }, { "epoch": 698.5970149253732, "grad_norm": 26.498516082763672, "learning_rate": 9.737755102040816e-06, "loss": 23.2943, "step": 29341 }, { "epoch": 698.6208955223881, "grad_norm": 28.043243408203125, "learning_rate": 9.737414965986396e-06, "loss": 23.2813, "step": 29342 }, { "epoch": 698.644776119403, "grad_norm": 26.581113815307617, "learning_rate": 9.737074829931975e-06, "loss": 22.0846, "step": 29343 }, { "epoch": 698.6686567164179, "grad_norm": 30.330726623535156, "learning_rate": 9.736734693877551e-06, "loss": 23.4779, "step": 29344 }, { "epoch": 698.6925373134328, "grad_norm": 28.195756912231445, "learning_rate": 9.73639455782313e-06, "loss": 23.2951, "step": 29345 }, { "epoch": 698.7164179104477, "grad_norm": 27.560625076293945, "learning_rate": 9.736054421768708e-06, "loss": 22.0091, "step": 29346 }, { "epoch": 698.7402985074627, "grad_norm": 30.28885269165039, "learning_rate": 9.735714285714287e-06, "loss": 22.3689, "step": 29347 }, { "epoch": 698.7641791044776, "grad_norm": 26.213623046875, "learning_rate": 9.735374149659865e-06, "loss": 23.4083, "step": 29348 }, { "epoch": 698.7880597014926, "grad_norm": 29.396352767944336, "learning_rate": 9.735034013605443e-06, "loss": 22.7059, "step": 29349 }, { "epoch": 698.8119402985075, "grad_norm": 26.357837677001953, "learning_rate": 9.734693877551022e-06, "loss": 23.0758, "step": 29350 }, { "epoch": 698.8358208955224, "grad_norm": 34.634368896484375, "learning_rate": 9.734353741496599e-06, "loss": 23.5547, "step": 29351 }, { "epoch": 698.8597014925373, "grad_norm": 26.412471771240234, "learning_rate": 9.734013605442177e-06, "loss": 22.8147, "step": 29352 }, { "epoch": 698.8835820895522, "grad_norm": 28.587688446044922, "learning_rate": 9.733673469387755e-06, "loss": 22.8682, "step": 29353 }, { "epoch": 698.9074626865672, "grad_norm": 33.51738357543945, "learning_rate": 9.733333333333334e-06, "loss": 23.6727, "step": 29354 }, { "epoch": 698.9313432835821, "grad_norm": 28.543777465820312, "learning_rate": 9.732993197278912e-06, "loss": 23.0745, "step": 29355 }, { "epoch": 698.955223880597, "grad_norm": 25.314722061157227, "learning_rate": 9.73265306122449e-06, "loss": 23.1649, "step": 29356 }, { "epoch": 698.9791044776119, "grad_norm": 25.241098403930664, "learning_rate": 9.732312925170069e-06, "loss": 22.7374, "step": 29357 }, { "epoch": 699.0, "grad_norm": 28.18282127380371, "learning_rate": 9.731972789115648e-06, "loss": 18.6827, "step": 29358 }, { "epoch": 699.0238805970149, "grad_norm": 32.58746337890625, "learning_rate": 9.731632653061224e-06, "loss": 22.3174, "step": 29359 }, { "epoch": 699.0477611940298, "grad_norm": 25.27964210510254, "learning_rate": 9.731292517006804e-06, "loss": 22.5721, "step": 29360 }, { "epoch": 699.0716417910447, "grad_norm": 29.562828063964844, "learning_rate": 9.730952380952383e-06, "loss": 23.0797, "step": 29361 }, { "epoch": 699.0955223880597, "grad_norm": 31.803754806518555, "learning_rate": 9.73061224489796e-06, "loss": 23.1851, "step": 29362 }, { "epoch": 699.1194029850747, "grad_norm": 26.787132263183594, "learning_rate": 9.730272108843538e-06, "loss": 23.1699, "step": 29363 }, { "epoch": 699.1432835820896, "grad_norm": 30.665771484375, "learning_rate": 9.729931972789116e-06, "loss": 23.335, "step": 29364 }, { "epoch": 699.1671641791045, "grad_norm": 33.33851623535156, "learning_rate": 9.729591836734695e-06, "loss": 22.8272, "step": 29365 }, { "epoch": 699.1910447761194, "grad_norm": 28.588857650756836, "learning_rate": 9.729251700680273e-06, "loss": 22.6189, "step": 29366 }, { "epoch": 699.2149253731343, "grad_norm": 25.152971267700195, "learning_rate": 9.728911564625852e-06, "loss": 23.5969, "step": 29367 }, { "epoch": 699.2388059701492, "grad_norm": 39.7860107421875, "learning_rate": 9.72857142857143e-06, "loss": 23.3989, "step": 29368 }, { "epoch": 699.2626865671642, "grad_norm": 24.22866439819336, "learning_rate": 9.728231292517007e-06, "loss": 22.648, "step": 29369 }, { "epoch": 699.2865671641791, "grad_norm": 30.935699462890625, "learning_rate": 9.727891156462585e-06, "loss": 22.5243, "step": 29370 }, { "epoch": 699.310447761194, "grad_norm": 30.024433135986328, "learning_rate": 9.727551020408164e-06, "loss": 22.4375, "step": 29371 }, { "epoch": 699.334328358209, "grad_norm": 26.959867477416992, "learning_rate": 9.727210884353742e-06, "loss": 23.1759, "step": 29372 }, { "epoch": 699.3582089552239, "grad_norm": 25.905902862548828, "learning_rate": 9.72687074829932e-06, "loss": 22.0788, "step": 29373 }, { "epoch": 699.3820895522388, "grad_norm": 26.047189712524414, "learning_rate": 9.726530612244899e-06, "loss": 23.2317, "step": 29374 }, { "epoch": 699.4059701492537, "grad_norm": 31.40427589416504, "learning_rate": 9.726190476190477e-06, "loss": 22.8558, "step": 29375 }, { "epoch": 699.4298507462687, "grad_norm": 28.055252075195312, "learning_rate": 9.725850340136056e-06, "loss": 23.4971, "step": 29376 }, { "epoch": 699.4537313432836, "grad_norm": 26.925195693969727, "learning_rate": 9.725510204081632e-06, "loss": 22.4544, "step": 29377 }, { "epoch": 699.4776119402985, "grad_norm": 26.050403594970703, "learning_rate": 9.725170068027213e-06, "loss": 22.3693, "step": 29378 }, { "epoch": 699.5014925373134, "grad_norm": 29.94669532775879, "learning_rate": 9.724829931972791e-06, "loss": 22.5308, "step": 29379 }, { "epoch": 699.5253731343283, "grad_norm": 27.47682762145996, "learning_rate": 9.724489795918368e-06, "loss": 22.4007, "step": 29380 }, { "epoch": 699.5492537313432, "grad_norm": 28.568456649780273, "learning_rate": 9.724149659863946e-06, "loss": 23.0855, "step": 29381 }, { "epoch": 699.5731343283583, "grad_norm": 25.72081756591797, "learning_rate": 9.723809523809525e-06, "loss": 23.286, "step": 29382 }, { "epoch": 699.5970149253732, "grad_norm": 26.446826934814453, "learning_rate": 9.723469387755103e-06, "loss": 22.9208, "step": 29383 }, { "epoch": 699.6208955223881, "grad_norm": 25.68882179260254, "learning_rate": 9.723129251700681e-06, "loss": 23.0222, "step": 29384 }, { "epoch": 699.644776119403, "grad_norm": 27.6403865814209, "learning_rate": 9.72278911564626e-06, "loss": 22.1463, "step": 29385 }, { "epoch": 699.6686567164179, "grad_norm": 24.82706069946289, "learning_rate": 9.722448979591838e-06, "loss": 22.1847, "step": 29386 }, { "epoch": 699.6925373134328, "grad_norm": 28.009056091308594, "learning_rate": 9.722108843537415e-06, "loss": 22.2817, "step": 29387 }, { "epoch": 699.7164179104477, "grad_norm": 24.320589065551758, "learning_rate": 9.721768707482993e-06, "loss": 22.0101, "step": 29388 }, { "epoch": 699.7402985074627, "grad_norm": 25.654821395874023, "learning_rate": 9.721428571428573e-06, "loss": 23.3165, "step": 29389 }, { "epoch": 699.7641791044776, "grad_norm": 23.292320251464844, "learning_rate": 9.72108843537415e-06, "loss": 22.3006, "step": 29390 }, { "epoch": 699.7880597014926, "grad_norm": 25.010940551757812, "learning_rate": 9.720748299319729e-06, "loss": 22.9609, "step": 29391 }, { "epoch": 699.8119402985075, "grad_norm": 25.53882598876953, "learning_rate": 9.720408163265307e-06, "loss": 22.743, "step": 29392 }, { "epoch": 699.8358208955224, "grad_norm": 29.943445205688477, "learning_rate": 9.720068027210885e-06, "loss": 23.3667, "step": 29393 }, { "epoch": 699.8597014925373, "grad_norm": 27.57686424255371, "learning_rate": 9.719727891156462e-06, "loss": 23.6942, "step": 29394 }, { "epoch": 699.8835820895522, "grad_norm": 25.609756469726562, "learning_rate": 9.719387755102042e-06, "loss": 22.0326, "step": 29395 }, { "epoch": 699.9074626865672, "grad_norm": 25.62625503540039, "learning_rate": 9.71904761904762e-06, "loss": 22.7246, "step": 29396 }, { "epoch": 699.9313432835821, "grad_norm": 25.822063446044922, "learning_rate": 9.718707482993197e-06, "loss": 22.9991, "step": 29397 }, { "epoch": 699.955223880597, "grad_norm": 27.03577423095703, "learning_rate": 9.718367346938776e-06, "loss": 22.9818, "step": 29398 }, { "epoch": 699.9791044776119, "grad_norm": 27.4606990814209, "learning_rate": 9.718027210884354e-06, "loss": 22.6026, "step": 29399 }, { "epoch": 700.0, "grad_norm": 23.015674591064453, "learning_rate": 9.717687074829933e-06, "loss": 20.5101, "step": 29400 } ], "logging_steps": 1.0, "max_steps": 29400, "num_input_tokens_seen": 0, "num_train_epochs": 700, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4452359523236163e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }