{"current_steps": 5, "total_steps": 3150, "loss": 5.2246, "lr": 6.34920634920635e-07, "epoch": 0.015873015873015872, "percentage": 0.16, "elapsed_time": "0:00:01", "remaining_time": "0:11:34", "throughput": 956.4, "total_tokens": 1056} {"current_steps": 10, "total_steps": 3150, "loss": 5.1795, "lr": 1.4285714285714286e-06, "epoch": 0.031746031746031744, "percentage": 0.32, "elapsed_time": "0:00:01", "remaining_time": "0:08:03", "throughput": 1411.82, "total_tokens": 2176} {"current_steps": 15, "total_steps": 3150, "loss": 4.8763, "lr": 2.2222222222222225e-06, "epoch": 0.047619047619047616, "percentage": 0.48, "elapsed_time": "0:00:01", "remaining_time": "0:06:53", "throughput": 1656.06, "total_tokens": 3280} {"current_steps": 20, "total_steps": 3150, "loss": 4.1228, "lr": 3.015873015873016e-06, "epoch": 0.06349206349206349, "percentage": 0.63, "elapsed_time": "0:00:02", "remaining_time": "0:06:17", "throughput": 1817.01, "total_tokens": 4384} {"current_steps": 25, "total_steps": 3150, "loss": 3.6725, "lr": 3.8095238095238102e-06, "epoch": 0.07936507936507936, "percentage": 0.79, "elapsed_time": "0:00:02", "remaining_time": "0:05:54", "throughput": 1936.03, "total_tokens": 5488} {"current_steps": 30, "total_steps": 3150, "loss": 3.4242, "lr": 4.603174603174604e-06, "epoch": 0.09523809523809523, "percentage": 0.95, "elapsed_time": "0:00:03", "remaining_time": "0:05:38", "throughput": 2008.25, "total_tokens": 6528} {"current_steps": 35, "total_steps": 3150, "loss": 2.7371, "lr": 5.396825396825397e-06, "epoch": 0.1111111111111111, "percentage": 1.11, "elapsed_time": "0:00:03", "remaining_time": "0:05:27", "throughput": 2077.18, "total_tokens": 7632} {"current_steps": 40, "total_steps": 3150, "loss": 2.1231, "lr": 6.190476190476191e-06, "epoch": 0.12698412698412698, "percentage": 1.27, "elapsed_time": "0:00:04", "remaining_time": "0:05:17", "throughput": 2117.15, "total_tokens": 8656} {"current_steps": 45, "total_steps": 3150, "loss": 1.8849, "lr": 6.984126984126985e-06, "epoch": 0.14285714285714285, "percentage": 1.43, "elapsed_time": "0:00:04", "remaining_time": "0:05:10", "throughput": 2156.6, "total_tokens": 9712} {"current_steps": 50, "total_steps": 3150, "loss": 1.6089, "lr": 7.777777777777777e-06, "epoch": 0.15873015873015872, "percentage": 1.59, "elapsed_time": "0:00:04", "remaining_time": "0:05:05", "throughput": 2194.25, "total_tokens": 10800} {"current_steps": 55, "total_steps": 3150, "loss": 1.2047, "lr": 8.571428571428573e-06, "epoch": 0.1746031746031746, "percentage": 1.75, "elapsed_time": "0:00:05", "remaining_time": "0:05:00", "throughput": 2214.81, "total_tokens": 11840} {"current_steps": 60, "total_steps": 3150, "loss": 1.3018, "lr": 9.365079365079366e-06, "epoch": 0.19047619047619047, "percentage": 1.9, "elapsed_time": "0:00:05", "remaining_time": "0:04:58", "throughput": 2239.67, "total_tokens": 12960} {"current_steps": 65, "total_steps": 3150, "loss": 1.2098, "lr": 1.015873015873016e-05, "epoch": 0.20634920634920634, "percentage": 2.06, "elapsed_time": "0:00:06", "remaining_time": "0:04:54", "throughput": 2259.09, "total_tokens": 14032} {"current_steps": 70, "total_steps": 3150, "loss": 1.002, "lr": 1.0952380952380953e-05, "epoch": 0.2222222222222222, "percentage": 2.22, "elapsed_time": "0:00:06", "remaining_time": "0:04:51", "throughput": 2281.15, "total_tokens": 15120} {"current_steps": 75, "total_steps": 3150, "loss": 0.8685, "lr": 1.1746031746031746e-05, "epoch": 0.23809523809523808, "percentage": 2.38, "elapsed_time": "0:00:07", "remaining_time": "0:04:48", "throughput": 2304.54, "total_tokens": 16240} {"current_steps": 80, "total_steps": 3150, "loss": 1.0762, "lr": 1.253968253968254e-05, "epoch": 0.25396825396825395, "percentage": 2.54, "elapsed_time": "0:00:07", "remaining_time": "0:04:46", "throughput": 2327.86, "total_tokens": 17376} {"current_steps": 85, "total_steps": 3150, "loss": 0.8077, "lr": 1.3333333333333333e-05, "epoch": 0.2698412698412698, "percentage": 2.7, "elapsed_time": "0:00:07", "remaining_time": "0:04:44", "throughput": 2345.62, "total_tokens": 18480} {"current_steps": 90, "total_steps": 3150, "loss": 0.7154, "lr": 1.4126984126984127e-05, "epoch": 0.2857142857142857, "percentage": 2.86, "elapsed_time": "0:00:08", "remaining_time": "0:04:41", "throughput": 2358.13, "total_tokens": 19552} {"current_steps": 95, "total_steps": 3150, "loss": 0.9305, "lr": 1.4920634920634922e-05, "epoch": 0.30158730158730157, "percentage": 3.02, "elapsed_time": "0:00:08", "remaining_time": "0:04:39", "throughput": 2363.52, "total_tokens": 20576} {"current_steps": 100, "total_steps": 3150, "loss": 0.7443, "lr": 1.5714285714285715e-05, "epoch": 0.31746031746031744, "percentage": 3.17, "elapsed_time": "0:00:09", "remaining_time": "0:04:38", "throughput": 2369.44, "total_tokens": 21616} {"current_steps": 105, "total_steps": 3150, "loss": 0.7175, "lr": 1.6507936507936507e-05, "epoch": 0.3333333333333333, "percentage": 3.33, "elapsed_time": "0:00:09", "remaining_time": "0:04:36", "throughput": 2383.93, "total_tokens": 22736} {"current_steps": 110, "total_steps": 3150, "loss": 0.5574, "lr": 1.7301587301587302e-05, "epoch": 0.3492063492063492, "percentage": 3.49, "elapsed_time": "0:00:09", "remaining_time": "0:04:35", "throughput": 2395.61, "total_tokens": 23840} {"current_steps": 115, "total_steps": 3150, "loss": 0.8255, "lr": 1.8095238095238094e-05, "epoch": 0.36507936507936506, "percentage": 3.65, "elapsed_time": "0:00:10", "remaining_time": "0:04:33", "throughput": 2407.48, "total_tokens": 24960} {"current_steps": 120, "total_steps": 3150, "loss": 0.6832, "lr": 1.888888888888889e-05, "epoch": 0.38095238095238093, "percentage": 3.81, "elapsed_time": "0:00:10", "remaining_time": "0:04:32", "throughput": 2417.14, "total_tokens": 26064} {"current_steps": 125, "total_steps": 3150, "loss": 0.7522, "lr": 1.9682539682539684e-05, "epoch": 0.3968253968253968, "percentage": 3.97, "elapsed_time": "0:00:11", "remaining_time": "0:04:31", "throughput": 2423.16, "total_tokens": 27136} {"current_steps": 130, "total_steps": 3150, "loss": 0.8572, "lr": 2.0476190476190476e-05, "epoch": 0.4126984126984127, "percentage": 4.13, "elapsed_time": "0:00:11", "remaining_time": "0:04:29", "throughput": 2424.81, "total_tokens": 28160} {"current_steps": 135, "total_steps": 3150, "loss": 0.935, "lr": 2.126984126984127e-05, "epoch": 0.42857142857142855, "percentage": 4.29, "elapsed_time": "0:00:12", "remaining_time": "0:04:28", "throughput": 2434.03, "total_tokens": 29280} {"current_steps": 140, "total_steps": 3150, "loss": 1.0101, "lr": 2.2063492063492063e-05, "epoch": 0.4444444444444444, "percentage": 4.44, "elapsed_time": "0:00:12", "remaining_time": "0:04:27", "throughput": 2437.88, "total_tokens": 30336} {"current_steps": 145, "total_steps": 3150, "loss": 0.7488, "lr": 2.2857142857142858e-05, "epoch": 0.4603174603174603, "percentage": 4.6, "elapsed_time": "0:00:12", "remaining_time": "0:04:26", "throughput": 2444.63, "total_tokens": 31440} {"current_steps": 150, "total_steps": 3150, "loss": 0.5824, "lr": 2.365079365079365e-05, "epoch": 0.47619047619047616, "percentage": 4.76, "elapsed_time": "0:00:13", "remaining_time": "0:04:25", "throughput": 2446.82, "total_tokens": 32480} {"current_steps": 155, "total_steps": 3150, "loss": 0.5526, "lr": 2.4444444444444445e-05, "epoch": 0.49206349206349204, "percentage": 4.92, "elapsed_time": "0:00:13", "remaining_time": "0:04:24", "throughput": 2446.8, "total_tokens": 33488} {"current_steps": 158, "total_steps": 3150, "eval_loss": 0.7046324610710144, "epoch": 0.5015873015873016, "percentage": 5.02, "elapsed_time": "0:00:15", "remaining_time": "0:04:52", "throughput": 2209.6, "total_tokens": 34176} {"current_steps": 160, "total_steps": 3150, "loss": 0.7868, "lr": 2.523809523809524e-05, "epoch": 0.5079365079365079, "percentage": 5.08, "elapsed_time": "0:00:17", "remaining_time": "0:05:21", "throughput": 2014.53, "total_tokens": 34608} {"current_steps": 165, "total_steps": 3150, "loss": 0.5539, "lr": 2.6031746031746035e-05, "epoch": 0.5238095238095238, "percentage": 5.24, "elapsed_time": "0:00:17", "remaining_time": "0:05:18", "throughput": 2023.68, "total_tokens": 35664} {"current_steps": 170, "total_steps": 3150, "loss": 0.7127, "lr": 2.6825396825396827e-05, "epoch": 0.5396825396825397, "percentage": 5.4, "elapsed_time": "0:00:18", "remaining_time": "0:05:16", "throughput": 2034.47, "total_tokens": 36704} {"current_steps": 175, "total_steps": 3150, "loss": 0.8412, "lr": 2.7619047619047622e-05, "epoch": 0.5555555555555556, "percentage": 5.56, "elapsed_time": "0:00:18", "remaining_time": "0:05:13", "throughput": 2049.57, "total_tokens": 37824} {"current_steps": 180, "total_steps": 3150, "loss": 0.5793, "lr": 2.8412698412698414e-05, "epoch": 0.5714285714285714, "percentage": 5.71, "elapsed_time": "0:00:18", "remaining_time": "0:05:11", "throughput": 2060.47, "total_tokens": 38880} {"current_steps": 185, "total_steps": 3150, "loss": 0.5911, "lr": 2.920634920634921e-05, "epoch": 0.5873015873015873, "percentage": 5.87, "elapsed_time": "0:00:19", "remaining_time": "0:05:09", "throughput": 2069.96, "total_tokens": 39920} {"current_steps": 190, "total_steps": 3150, "loss": 0.4416, "lr": 3e-05, "epoch": 0.6031746031746031, "percentage": 6.03, "elapsed_time": "0:00:19", "remaining_time": "0:05:06", "throughput": 2079.74, "total_tokens": 40976} {"current_steps": 195, "total_steps": 3150, "loss": 0.8699, "lr": 3.0793650793650796e-05, "epoch": 0.6190476190476191, "percentage": 6.19, "elapsed_time": "0:00:20", "remaining_time": "0:05:04", "throughput": 2088.41, "total_tokens": 42016} {"current_steps": 200, "total_steps": 3150, "loss": 0.545, "lr": 3.158730158730159e-05, "epoch": 0.6349206349206349, "percentage": 6.35, "elapsed_time": "0:00:20", "remaining_time": "0:05:03", "throughput": 2095.17, "total_tokens": 43088} {"current_steps": 205, "total_steps": 3150, "loss": 0.6905, "lr": 3.2380952380952386e-05, "epoch": 0.6507936507936508, "percentage": 6.51, "elapsed_time": "0:00:20", "remaining_time": "0:05:01", "throughput": 2103.28, "total_tokens": 44144} {"current_steps": 210, "total_steps": 3150, "loss": 0.6162, "lr": 3.317460317460318e-05, "epoch": 0.6666666666666666, "percentage": 6.67, "elapsed_time": "0:00:21", "remaining_time": "0:04:59", "throughput": 2110.78, "total_tokens": 45216} {"current_steps": 215, "total_steps": 3150, "loss": 0.6326, "lr": 3.396825396825397e-05, "epoch": 0.6825396825396826, "percentage": 6.83, "elapsed_time": "0:00:21", "remaining_time": "0:04:58", "throughput": 2119.83, "total_tokens": 46320} {"current_steps": 220, "total_steps": 3150, "loss": 0.5431, "lr": 3.476190476190476e-05, "epoch": 0.6984126984126984, "percentage": 6.98, "elapsed_time": "0:00:22", "remaining_time": "0:04:56", "throughput": 2126.4, "total_tokens": 47376} {"current_steps": 225, "total_steps": 3150, "loss": 0.5803, "lr": 3.555555555555556e-05, "epoch": 0.7142857142857143, "percentage": 7.14, "elapsed_time": "0:00:22", "remaining_time": "0:04:55", "throughput": 2133.75, "total_tokens": 48448} {"current_steps": 230, "total_steps": 3150, "loss": 0.7395, "lr": 3.634920634920635e-05, "epoch": 0.7301587301587301, "percentage": 7.3, "elapsed_time": "0:00:23", "remaining_time": "0:04:53", "throughput": 2141.29, "total_tokens": 49520} {"current_steps": 235, "total_steps": 3150, "loss": 0.5249, "lr": 3.7142857142857143e-05, "epoch": 0.746031746031746, "percentage": 7.46, "elapsed_time": "0:00:23", "remaining_time": "0:04:52", "throughput": 2147.74, "total_tokens": 50560} {"current_steps": 240, "total_steps": 3150, "loss": 0.5832, "lr": 3.7936507936507935e-05, "epoch": 0.7619047619047619, "percentage": 7.62, "elapsed_time": "0:00:23", "remaining_time": "0:04:50", "throughput": 2153.41, "total_tokens": 51584} {"current_steps": 245, "total_steps": 3150, "loss": 0.8221, "lr": 3.8730158730158734e-05, "epoch": 0.7777777777777778, "percentage": 7.78, "elapsed_time": "0:00:24", "remaining_time": "0:04:48", "throughput": 2160.57, "total_tokens": 52656} {"current_steps": 250, "total_steps": 3150, "loss": 0.7483, "lr": 3.9523809523809526e-05, "epoch": 0.7936507936507936, "percentage": 7.94, "elapsed_time": "0:00:24", "remaining_time": "0:04:47", "throughput": 2166.92, "total_tokens": 53712} {"current_steps": 255, "total_steps": 3150, "loss": 0.5607, "lr": 4.031746031746032e-05, "epoch": 0.8095238095238095, "percentage": 8.1, "elapsed_time": "0:00:25", "remaining_time": "0:04:46", "throughput": 2173.72, "total_tokens": 54784} {"current_steps": 260, "total_steps": 3150, "loss": 0.4334, "lr": 4.111111111111111e-05, "epoch": 0.8253968253968254, "percentage": 8.25, "elapsed_time": "0:00:25", "remaining_time": "0:04:44", "throughput": 2181.48, "total_tokens": 55888} {"current_steps": 265, "total_steps": 3150, "loss": 0.6269, "lr": 4.190476190476191e-05, "epoch": 0.8412698412698413, "percentage": 8.41, "elapsed_time": "0:00:26", "remaining_time": "0:04:43", "throughput": 2188.99, "total_tokens": 56992} {"current_steps": 270, "total_steps": 3150, "loss": 0.7236, "lr": 4.26984126984127e-05, "epoch": 0.8571428571428571, "percentage": 8.57, "elapsed_time": "0:00:26", "remaining_time": "0:04:42", "throughput": 2195.19, "total_tokens": 58064} {"current_steps": 275, "total_steps": 3150, "loss": 0.541, "lr": 4.34920634920635e-05, "epoch": 0.873015873015873, "percentage": 8.73, "elapsed_time": "0:00:26", "remaining_time": "0:04:40", "throughput": 2201.92, "total_tokens": 59152} {"current_steps": 280, "total_steps": 3150, "loss": 0.3819, "lr": 4.428571428571428e-05, "epoch": 0.8888888888888888, "percentage": 8.89, "elapsed_time": "0:00:27", "remaining_time": "0:04:39", "throughput": 2208.39, "total_tokens": 60240} {"current_steps": 285, "total_steps": 3150, "loss": 0.5826, "lr": 4.507936507936508e-05, "epoch": 0.9047619047619048, "percentage": 9.05, "elapsed_time": "0:00:27", "remaining_time": "0:04:38", "throughput": 2214.16, "total_tokens": 61312} {"current_steps": 290, "total_steps": 3150, "loss": 0.3932, "lr": 4.587301587301587e-05, "epoch": 0.9206349206349206, "percentage": 9.21, "elapsed_time": "0:00:28", "remaining_time": "0:04:37", "throughput": 2220.11, "total_tokens": 62400} {"current_steps": 295, "total_steps": 3150, "loss": 0.3105, "lr": 4.666666666666667e-05, "epoch": 0.9365079365079365, "percentage": 9.37, "elapsed_time": "0:00:28", "remaining_time": "0:04:36", "throughput": 2223.28, "total_tokens": 63408} {"current_steps": 300, "total_steps": 3150, "loss": 0.3389, "lr": 4.746031746031746e-05, "epoch": 0.9523809523809523, "percentage": 9.52, "elapsed_time": "0:00:28", "remaining_time": "0:04:34", "throughput": 2227.99, "total_tokens": 64464} {"current_steps": 305, "total_steps": 3150, "loss": 0.4306, "lr": 4.8253968253968255e-05, "epoch": 0.9682539682539683, "percentage": 9.68, "elapsed_time": "0:00:29", "remaining_time": "0:04:33", "throughput": 2233.08, "total_tokens": 65536} {"current_steps": 310, "total_steps": 3150, "loss": 0.2159, "lr": 4.904761904761905e-05, "epoch": 0.9841269841269841, "percentage": 9.84, "elapsed_time": "0:00:29", "remaining_time": "0:04:32", "throughput": 2240.03, "total_tokens": 66656} {"current_steps": 315, "total_steps": 3150, "loss": 0.2434, "lr": 4.9841269841269845e-05, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:00:30", "remaining_time": "0:04:31", "throughput": 2240.14, "total_tokens": 67680} {"current_steps": 316, "total_steps": 3150, "eval_loss": 0.2998451292514801, "epoch": 1.0031746031746032, "percentage": 10.03, "elapsed_time": "0:00:31", "remaining_time": "0:04:46", "throughput": 2122.15, "total_tokens": 67872} {"current_steps": 320, "total_steps": 3150, "loss": 0.2994, "lr": 4.99997544030871e-05, "epoch": 1.0158730158730158, "percentage": 10.16, "elapsed_time": "0:00:35", "remaining_time": "0:05:13", "throughput": 1938.04, "total_tokens": 68736} {"current_steps": 325, "total_steps": 3150, "loss": 0.27, "lr": 4.999875667389858e-05, "epoch": 1.0317460317460316, "percentage": 10.32, "elapsed_time": "0:00:35", "remaining_time": "0:05:11", "throughput": 1945.19, "total_tokens": 69808} {"current_steps": 330, "total_steps": 3150, "loss": 0.3093, "lr": 4.9996991493233693e-05, "epoch": 1.0476190476190477, "percentage": 10.48, "elapsed_time": "0:00:36", "remaining_time": "0:05:10", "throughput": 1951.04, "total_tokens": 70848} {"current_steps": 335, "total_steps": 3150, "loss": 0.3382, "lr": 4.9994458915282744e-05, "epoch": 1.0634920634920635, "percentage": 10.63, "elapsed_time": "0:00:36", "remaining_time": "0:05:08", "throughput": 1957.56, "total_tokens": 71904} {"current_steps": 340, "total_steps": 3150, "loss": 0.2543, "lr": 4.999115901779484e-05, "epoch": 1.0793650793650793, "percentage": 10.79, "elapsed_time": "0:00:37", "remaining_time": "0:05:07", "throughput": 1963.43, "total_tokens": 72944} {"current_steps": 345, "total_steps": 3150, "loss": 0.395, "lr": 4.998709190207547e-05, "epoch": 1.0952380952380953, "percentage": 10.95, "elapsed_time": "0:00:37", "remaining_time": "0:05:05", "throughput": 1973.12, "total_tokens": 74144} {"current_steps": 350, "total_steps": 3150, "loss": 0.2672, "lr": 4.998225769298339e-05, "epoch": 1.1111111111111112, "percentage": 11.11, "elapsed_time": "0:00:37", "remaining_time": "0:05:03", "throughput": 1979.59, "total_tokens": 75216} {"current_steps": 355, "total_steps": 3150, "loss": 0.191, "lr": 4.9976656538926825e-05, "epoch": 1.126984126984127, "percentage": 11.27, "elapsed_time": "0:00:38", "remaining_time": "0:05:02", "throughput": 1985.74, "total_tokens": 76272} {"current_steps": 360, "total_steps": 3150, "loss": 0.2025, "lr": 4.997028861185888e-05, "epoch": 1.1428571428571428, "percentage": 11.43, "elapsed_time": "0:00:38", "remaining_time": "0:05:00", "throughput": 1992.19, "total_tokens": 77344} {"current_steps": 365, "total_steps": 3150, "loss": 0.1881, "lr": 4.9963154107272295e-05, "epoch": 1.1587301587301586, "percentage": 11.59, "elapsed_time": "0:00:39", "remaining_time": "0:04:59", "throughput": 1999.31, "total_tokens": 78448} {"current_steps": 370, "total_steps": 3150, "loss": 0.1361, "lr": 4.9955253244193375e-05, "epoch": 1.1746031746031746, "percentage": 11.75, "elapsed_time": "0:00:39", "remaining_time": "0:04:57", "throughput": 2006.2, "total_tokens": 79552} {"current_steps": 375, "total_steps": 3150, "loss": 0.0219, "lr": 4.9946586265175376e-05, "epoch": 1.1904761904761905, "percentage": 11.9, "elapsed_time": "0:00:40", "remaining_time": "0:04:56", "throughput": 2011.42, "total_tokens": 80592} {"current_steps": 380, "total_steps": 3150, "loss": 0.1482, "lr": 4.993715343629095e-05, "epoch": 1.2063492063492063, "percentage": 12.06, "elapsed_time": "0:00:40", "remaining_time": "0:04:55", "throughput": 2018.81, "total_tokens": 81728} {"current_steps": 385, "total_steps": 3150, "loss": 0.2375, "lr": 4.992695504712402e-05, "epoch": 1.2222222222222223, "percentage": 12.22, "elapsed_time": "0:00:40", "remaining_time": "0:04:53", "throughput": 2024.45, "total_tokens": 82800} {"current_steps": 390, "total_steps": 3150, "loss": 0.1005, "lr": 4.991599141076094e-05, "epoch": 1.2380952380952381, "percentage": 12.38, "elapsed_time": "0:00:41", "remaining_time": "0:04:52", "throughput": 2030.07, "total_tokens": 83872} {"current_steps": 395, "total_steps": 3150, "loss": 0.193, "lr": 4.990426286378079e-05, "epoch": 1.253968253968254, "percentage": 12.54, "elapsed_time": "0:00:41", "remaining_time": "0:04:51", "throughput": 2035.95, "total_tokens": 84960} {"current_steps": 400, "total_steps": 3150, "loss": 0.1855, "lr": 4.989176976624511e-05, "epoch": 1.2698412698412698, "percentage": 12.7, "elapsed_time": "0:00:42", "remaining_time": "0:04:49", "throughput": 2041.44, "total_tokens": 86032} {"current_steps": 405, "total_steps": 3150, "loss": 0.11, "lr": 4.987851250168682e-05, "epoch": 1.2857142857142856, "percentage": 12.86, "elapsed_time": "0:00:42", "remaining_time": "0:04:48", "throughput": 2046.38, "total_tokens": 87088} {"current_steps": 410, "total_steps": 3150, "loss": 0.1098, "lr": 4.986449147709844e-05, "epoch": 1.3015873015873016, "percentage": 13.02, "elapsed_time": "0:00:42", "remaining_time": "0:04:47", "throughput": 2051.88, "total_tokens": 88176} {"current_steps": 415, "total_steps": 3150, "loss": 0.2938, "lr": 4.984970712291963e-05, "epoch": 1.3174603174603174, "percentage": 13.17, "elapsed_time": "0:00:43", "remaining_time": "0:04:45", "throughput": 2056.63, "total_tokens": 89232} {"current_steps": 420, "total_steps": 3150, "loss": 0.1689, "lr": 4.983415989302394e-05, "epoch": 1.3333333333333333, "percentage": 13.33, "elapsed_time": "0:00:43", "remaining_time": "0:04:44", "throughput": 2060.94, "total_tokens": 90272} {"current_steps": 425, "total_steps": 3150, "loss": 0.1951, "lr": 4.9817850264704883e-05, "epoch": 1.3492063492063493, "percentage": 13.49, "elapsed_time": "0:00:44", "remaining_time": "0:04:43", "throughput": 2065.46, "total_tokens": 91328} {"current_steps": 430, "total_steps": 3150, "loss": 0.2203, "lr": 4.9800778738661295e-05, "epoch": 1.3650793650793651, "percentage": 13.65, "elapsed_time": "0:00:44", "remaining_time": "0:04:42", "throughput": 2069.74, "total_tokens": 92384} {"current_steps": 435, "total_steps": 3150, "loss": 0.1728, "lr": 4.978294583898196e-05, "epoch": 1.380952380952381, "percentage": 13.81, "elapsed_time": "0:00:45", "remaining_time": "0:04:41", "throughput": 2074.12, "total_tokens": 93440} {"current_steps": 440, "total_steps": 3150, "loss": 0.1872, "lr": 4.9764352113129506e-05, "epoch": 1.3968253968253967, "percentage": 13.97, "elapsed_time": "0:00:45", "remaining_time": "0:04:40", "throughput": 2078.97, "total_tokens": 94528} {"current_steps": 445, "total_steps": 3150, "loss": 0.1592, "lr": 4.974499813192362e-05, "epoch": 1.4126984126984126, "percentage": 14.13, "elapsed_time": "0:00:45", "remaining_time": "0:04:38", "throughput": 2082.56, "total_tokens": 95552} {"current_steps": 450, "total_steps": 3150, "loss": 0.2428, "lr": 4.972488448952351e-05, "epoch": 1.4285714285714286, "percentage": 14.29, "elapsed_time": "0:00:46", "remaining_time": "0:04:37", "throughput": 2086.72, "total_tokens": 96608} {"current_steps": 455, "total_steps": 3150, "loss": 0.2154, "lr": 4.970401180340965e-05, "epoch": 1.4444444444444444, "percentage": 14.44, "elapsed_time": "0:00:46", "remaining_time": "0:04:36", "throughput": 2090.58, "total_tokens": 97664} {"current_steps": 460, "total_steps": 3150, "loss": 0.1337, "lr": 4.9682380714364897e-05, "epoch": 1.4603174603174602, "percentage": 14.6, "elapsed_time": "0:00:47", "remaining_time": "0:04:35", "throughput": 2094.88, "total_tokens": 98736} {"current_steps": 465, "total_steps": 3150, "loss": 0.0631, "lr": 4.965999188645469e-05, "epoch": 1.4761904761904763, "percentage": 14.76, "elapsed_time": "0:00:47", "remaining_time": "0:04:34", "throughput": 2098.49, "total_tokens": 99776} {"current_steps": 470, "total_steps": 3150, "loss": 0.0913, "lr": 4.963684600700679e-05, "epoch": 1.492063492063492, "percentage": 14.92, "elapsed_time": "0:00:47", "remaining_time": "0:04:33", "throughput": 2101.44, "total_tokens": 100784} {"current_steps": 474, "total_steps": 3150, "eval_loss": 0.1424291729927063, "epoch": 1.5047619047619047, "percentage": 15.05, "elapsed_time": "0:00:49", "remaining_time": "0:04:41", "throughput": 2041.13, "total_tokens": 101696} {"current_steps": 475, "total_steps": 3150, "loss": 0.1079, "lr": 4.9612943786590104e-05, "epoch": 1.507936507936508, "percentage": 15.08, "elapsed_time": "0:00:52", "remaining_time": "0:04:56", "throughput": 1938.53, "total_tokens": 101920} {"current_steps": 480, "total_steps": 3150, "loss": 0.0542, "lr": 4.958828595899291e-05, "epoch": 1.5238095238095237, "percentage": 15.24, "elapsed_time": "0:00:53", "remaining_time": "0:04:54", "throughput": 1942.16, "total_tokens": 102960} {"current_steps": 485, "total_steps": 3150, "loss": 0.0581, "lr": 4.956287328120029e-05, "epoch": 1.5396825396825395, "percentage": 15.4, "elapsed_time": "0:00:53", "remaining_time": "0:04:53", "throughput": 1946.0, "total_tokens": 103984} {"current_steps": 490, "total_steps": 3150, "loss": 0.0125, "lr": 4.953670653337093e-05, "epoch": 1.5555555555555556, "percentage": 15.56, "elapsed_time": "0:00:53", "remaining_time": "0:04:52", "throughput": 1949.07, "total_tokens": 104976} {"current_steps": 495, "total_steps": 3150, "loss": 0.1718, "lr": 4.950978651881315e-05, "epoch": 1.5714285714285714, "percentage": 15.71, "elapsed_time": "0:00:54", "remaining_time": "0:04:51", "throughput": 1953.78, "total_tokens": 106048} {"current_steps": 500, "total_steps": 3150, "loss": 0.1954, "lr": 4.9482114063960254e-05, "epoch": 1.5873015873015874, "percentage": 15.87, "elapsed_time": "0:00:54", "remaining_time": "0:04:49", "throughput": 1958.54, "total_tokens": 107152} {"current_steps": 505, "total_steps": 3150, "loss": 0.2002, "lr": 4.9453690018345144e-05, "epoch": 1.6031746031746033, "percentage": 16.03, "elapsed_time": "0:00:55", "remaining_time": "0:04:48", "throughput": 1962.85, "total_tokens": 108224} {"current_steps": 510, "total_steps": 3150, "loss": 0.205, "lr": 4.942451525457424e-05, "epoch": 1.619047619047619, "percentage": 16.19, "elapsed_time": "0:00:55", "remaining_time": "0:04:47", "throughput": 1968.6, "total_tokens": 109376} {"current_steps": 515, "total_steps": 3150, "loss": 0.0587, "lr": 4.9394590668300724e-05, "epoch": 1.6349206349206349, "percentage": 16.35, "elapsed_time": "0:00:55", "remaining_time": "0:04:46", "throughput": 1973.61, "total_tokens": 110480} {"current_steps": 520, "total_steps": 3150, "loss": 0.2771, "lr": 4.9363917178196986e-05, "epoch": 1.6507936507936507, "percentage": 16.51, "elapsed_time": "0:00:56", "remaining_time": "0:04:45", "throughput": 1978.88, "total_tokens": 111600} {"current_steps": 525, "total_steps": 3150, "loss": 0.1043, "lr": 4.933249572592646e-05, "epoch": 1.6666666666666665, "percentage": 16.67, "elapsed_time": "0:00:56", "remaining_time": "0:04:44", "throughput": 1983.81, "total_tokens": 112704} {"current_steps": 530, "total_steps": 3150, "loss": 0.0808, "lr": 4.930032727611474e-05, "epoch": 1.6825396825396826, "percentage": 16.83, "elapsed_time": "0:00:57", "remaining_time": "0:04:42", "throughput": 1989.18, "total_tokens": 113840} {"current_steps": 535, "total_steps": 3150, "loss": 0.0951, "lr": 4.926741281631991e-05, "epoch": 1.6984126984126984, "percentage": 16.98, "elapsed_time": "0:00:57", "remaining_time": "0:04:41", "throughput": 1993.04, "total_tokens": 114896} {"current_steps": 540, "total_steps": 3150, "loss": 0.1518, "lr": 4.923375335700223e-05, "epoch": 1.7142857142857144, "percentage": 17.14, "elapsed_time": "0:00:58", "remaining_time": "0:04:40", "throughput": 1996.58, "total_tokens": 115936} {"current_steps": 545, "total_steps": 3150, "loss": 0.11, "lr": 4.919934993149319e-05, "epoch": 1.7301587301587302, "percentage": 17.3, "elapsed_time": "0:00:58", "remaining_time": "0:04:39", "throughput": 2000.18, "total_tokens": 116976} {"current_steps": 550, "total_steps": 3150, "loss": 0.1034, "lr": 4.916420359596368e-05, "epoch": 1.746031746031746, "percentage": 17.46, "elapsed_time": "0:00:58", "remaining_time": "0:04:38", "throughput": 2004.02, "total_tokens": 118032} {"current_steps": 555, "total_steps": 3150, "loss": 0.0583, "lr": 4.912831542939166e-05, "epoch": 1.7619047619047619, "percentage": 17.62, "elapsed_time": "0:00:59", "remaining_time": "0:04:37", "throughput": 2008.15, "total_tokens": 119120} {"current_steps": 560, "total_steps": 3150, "loss": 0.0428, "lr": 4.909168653352898e-05, "epoch": 1.7777777777777777, "percentage": 17.78, "elapsed_time": "0:00:59", "remaining_time": "0:04:36", "throughput": 2012.83, "total_tokens": 120240} {"current_steps": 565, "total_steps": 3150, "loss": 0.471, "lr": 4.905431803286757e-05, "epoch": 1.7936507936507935, "percentage": 17.94, "elapsed_time": "0:01:00", "remaining_time": "0:04:35", "throughput": 2016.45, "total_tokens": 121296} {"current_steps": 570, "total_steps": 3150, "loss": 0.1294, "lr": 4.9016211074604904e-05, "epoch": 1.8095238095238095, "percentage": 18.1, "elapsed_time": "0:01:00", "remaining_time": "0:04:34", "throughput": 2021.51, "total_tokens": 122448} {"current_steps": 575, "total_steps": 3150, "loss": 0.0806, "lr": 4.897736682860885e-05, "epoch": 1.8253968253968254, "percentage": 18.25, "elapsed_time": "0:01:00", "remaining_time": "0:04:33", "throughput": 2025.83, "total_tokens": 123552} {"current_steps": 580, "total_steps": 3150, "loss": 0.089, "lr": 4.893778648738165e-05, "epoch": 1.8412698412698414, "percentage": 18.41, "elapsed_time": "0:01:01", "remaining_time": "0:04:32", "throughput": 2029.34, "total_tokens": 124608} {"current_steps": 585, "total_steps": 3150, "loss": 0.3091, "lr": 4.88974712660234e-05, "epoch": 1.8571428571428572, "percentage": 18.57, "elapsed_time": "0:01:01", "remaining_time": "0:04:31", "throughput": 2033.24, "total_tokens": 125696} {"current_steps": 590, "total_steps": 3150, "loss": 0.1272, "lr": 4.88564224021947e-05, "epoch": 1.873015873015873, "percentage": 18.73, "elapsed_time": "0:01:02", "remaining_time": "0:04:30", "throughput": 2036.87, "total_tokens": 126768} {"current_steps": 595, "total_steps": 3150, "loss": 0.0921, "lr": 4.881464115607865e-05, "epoch": 1.8888888888888888, "percentage": 18.89, "elapsed_time": "0:01:02", "remaining_time": "0:04:29", "throughput": 2040.23, "total_tokens": 127824} {"current_steps": 600, "total_steps": 3150, "loss": 0.2582, "lr": 4.8772128810342235e-05, "epoch": 1.9047619047619047, "percentage": 19.05, "elapsed_time": "0:01:03", "remaining_time": "0:04:28", "throughput": 2043.76, "total_tokens": 128896} {"current_steps": 605, "total_steps": 3150, "loss": 0.1102, "lr": 4.8728886670096826e-05, "epoch": 1.9206349206349205, "percentage": 19.21, "elapsed_time": "0:01:03", "remaining_time": "0:04:27", "throughput": 2046.57, "total_tokens": 129920} {"current_steps": 610, "total_steps": 3150, "loss": 0.1334, "lr": 4.868491606285823e-05, "epoch": 1.9365079365079365, "percentage": 19.37, "elapsed_time": "0:01:03", "remaining_time": "0:04:26", "throughput": 2051.03, "total_tokens": 131056} {"current_steps": 615, "total_steps": 3150, "loss": 0.0593, "lr": 4.8640218338505894e-05, "epoch": 1.9523809523809523, "percentage": 19.52, "elapsed_time": "0:01:04", "remaining_time": "0:04:25", "throughput": 2054.9, "total_tokens": 132160} {"current_steps": 620, "total_steps": 3150, "loss": 0.0552, "lr": 4.8594794869241434e-05, "epoch": 1.9682539682539684, "percentage": 19.68, "elapsed_time": "0:01:04", "remaining_time": "0:04:24", "throughput": 2058.97, "total_tokens": 133280} {"current_steps": 625, "total_steps": 3150, "loss": 0.1558, "lr": 4.8548647049546535e-05, "epoch": 1.9841269841269842, "percentage": 19.84, "elapsed_time": "0:01:05", "remaining_time": "0:04:23", "throughput": 2062.55, "total_tokens": 134352} {"current_steps": 630, "total_steps": 3150, "loss": 0.0227, "lr": 4.850177629614016e-05, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:01:05", "remaining_time": "0:04:22", "throughput": 2062.07, "total_tokens": 135360} {"current_steps": 632, "total_steps": 3150, "eval_loss": 0.14102911949157715, "epoch": 2.0063492063492063, "percentage": 20.06, "elapsed_time": "0:01:07", "remaining_time": "0:04:28", "throughput": 2011.78, "total_tokens": 135776} {"current_steps": 635, "total_steps": 3150, "loss": 0.1162, "lr": 4.845418404793503e-05, "epoch": 2.015873015873016, "percentage": 20.16, "elapsed_time": "0:01:09", "remaining_time": "0:04:34", "throughput": 1967.94, "total_tokens": 136432} {"current_steps": 640, "total_steps": 3150, "loss": 0.0571, "lr": 4.8405871765993433e-05, "epoch": 2.0317460317460316, "percentage": 20.32, "elapsed_time": "0:01:09", "remaining_time": "0:04:33", "throughput": 1971.44, "total_tokens": 137504} {"current_steps": 645, "total_steps": 3150, "loss": 0.053, "lr": 4.835684093348244e-05, "epoch": 2.0476190476190474, "percentage": 20.48, "elapsed_time": "0:01:10", "remaining_time": "0:04:32", "throughput": 1975.22, "total_tokens": 138608} {"current_steps": 650, "total_steps": 3150, "loss": 0.1767, "lr": 4.8307093055628284e-05, "epoch": 2.0634920634920633, "percentage": 20.63, "elapsed_time": "0:01:10", "remaining_time": "0:04:31", "throughput": 1978.71, "total_tokens": 139680} {"current_steps": 655, "total_steps": 3150, "loss": 0.0545, "lr": 4.825662965967023e-05, "epoch": 2.0793650793650795, "percentage": 20.79, "elapsed_time": "0:01:11", "remaining_time": "0:04:30", "throughput": 1982.33, "total_tokens": 140768} {"current_steps": 660, "total_steps": 3150, "loss": 0.0394, "lr": 4.820545229481361e-05, "epoch": 2.0952380952380953, "percentage": 20.95, "elapsed_time": "0:01:11", "remaining_time": "0:04:29", "throughput": 1985.54, "total_tokens": 141824} {"current_steps": 665, "total_steps": 3150, "loss": 0.0014, "lr": 4.815356253218235e-05, "epoch": 2.111111111111111, "percentage": 21.11, "elapsed_time": "0:01:11", "remaining_time": "0:04:28", "throughput": 1987.79, "total_tokens": 142816} {"current_steps": 670, "total_steps": 3150, "loss": 0.0977, "lr": 4.810096196477066e-05, "epoch": 2.126984126984127, "percentage": 21.27, "elapsed_time": "0:01:12", "remaining_time": "0:04:27", "throughput": 1991.38, "total_tokens": 143904} {"current_steps": 675, "total_steps": 3150, "loss": 0.1443, "lr": 4.804765220739419e-05, "epoch": 2.142857142857143, "percentage": 21.43, "elapsed_time": "0:01:12", "remaining_time": "0:04:26", "throughput": 1994.56, "total_tokens": 144960} {"current_steps": 680, "total_steps": 3150, "loss": 0.057, "lr": 4.7993634896640394e-05, "epoch": 2.1587301587301586, "percentage": 21.59, "elapsed_time": "0:01:13", "remaining_time": "0:04:25", "throughput": 1998.33, "total_tokens": 146064} {"current_steps": 685, "total_steps": 3150, "loss": 0.0177, "lr": 4.7938911690818347e-05, "epoch": 2.1746031746031744, "percentage": 21.75, "elapsed_time": "0:01:13", "remaining_time": "0:04:24", "throughput": 2001.45, "total_tokens": 147120} {"current_steps": 690, "total_steps": 3150, "loss": 0.0516, "lr": 4.788348426990781e-05, "epoch": 2.1904761904761907, "percentage": 21.9, "elapsed_time": "0:01:13", "remaining_time": "0:04:23", "throughput": 2004.96, "total_tokens": 148208} {"current_steps": 695, "total_steps": 3150, "loss": 0.0517, "lr": 4.782735433550763e-05, "epoch": 2.2063492063492065, "percentage": 22.06, "elapsed_time": "0:01:14", "remaining_time": "0:04:22", "throughput": 2007.57, "total_tokens": 149232} {"current_steps": 700, "total_steps": 3150, "loss": 0.1118, "lr": 4.777052361078358e-05, "epoch": 2.2222222222222223, "percentage": 22.22, "elapsed_time": "0:01:14", "remaining_time": "0:04:21", "throughput": 2010.32, "total_tokens": 150272} {"current_steps": 705, "total_steps": 3150, "loss": 0.2711, "lr": 4.7712993840415346e-05, "epoch": 2.238095238095238, "percentage": 22.38, "elapsed_time": "0:01:15", "remaining_time": "0:04:20", "throughput": 2012.67, "total_tokens": 151280} {"current_steps": 710, "total_steps": 3150, "loss": 0.1314, "lr": 4.765476679054305e-05, "epoch": 2.253968253968254, "percentage": 22.54, "elapsed_time": "0:01:15", "remaining_time": "0:04:19", "throughput": 2015.7, "total_tokens": 152352} {"current_steps": 715, "total_steps": 3150, "loss": 0.0468, "lr": 4.759584424871302e-05, "epoch": 2.2698412698412698, "percentage": 22.7, "elapsed_time": "0:01:15", "remaining_time": "0:04:18", "throughput": 2019.38, "total_tokens": 153472} {"current_steps": 720, "total_steps": 3150, "loss": 0.0786, "lr": 4.7536228023822873e-05, "epoch": 2.2857142857142856, "percentage": 22.86, "elapsed_time": "0:01:16", "remaining_time": "0:04:17", "throughput": 2023.05, "total_tokens": 154592} {"current_steps": 725, "total_steps": 3150, "loss": 0.1631, "lr": 4.7475919946066036e-05, "epoch": 2.3015873015873014, "percentage": 23.02, "elapsed_time": "0:01:16", "remaining_time": "0:04:16", "throughput": 2026.47, "total_tokens": 155696} {"current_steps": 730, "total_steps": 3150, "loss": 0.142, "lr": 4.7414921866875524e-05, "epoch": 2.317460317460317, "percentage": 23.17, "elapsed_time": "0:01:17", "remaining_time": "0:04:16", "throughput": 2030.11, "total_tokens": 156816} {"current_steps": 735, "total_steps": 3150, "loss": 0.0906, "lr": 4.735323565886709e-05, "epoch": 2.3333333333333335, "percentage": 23.33, "elapsed_time": "0:01:17", "remaining_time": "0:04:15", "throughput": 2033.84, "total_tokens": 157952} {"current_steps": 740, "total_steps": 3150, "loss": 0.0354, "lr": 4.729086321578177e-05, "epoch": 2.3492063492063493, "percentage": 23.49, "elapsed_time": "0:01:18", "remaining_time": "0:04:14", "throughput": 2036.4, "total_tokens": 158992} {"current_steps": 745, "total_steps": 3150, "loss": 0.025, "lr": 4.7227806452427746e-05, "epoch": 2.365079365079365, "percentage": 23.65, "elapsed_time": "0:01:18", "remaining_time": "0:04:13", "throughput": 2039.71, "total_tokens": 160096} {"current_steps": 750, "total_steps": 3150, "loss": 0.0779, "lr": 4.7164067304621536e-05, "epoch": 2.380952380952381, "percentage": 23.81, "elapsed_time": "0:01:18", "remaining_time": "0:04:12", "throughput": 2042.2, "total_tokens": 161136} {"current_steps": 755, "total_steps": 3150, "loss": 0.0971, "lr": 4.709964772912857e-05, "epoch": 2.3968253968253967, "percentage": 23.97, "elapsed_time": "0:01:19", "remaining_time": "0:04:11", "throughput": 2044.84, "total_tokens": 162192} {"current_steps": 760, "total_steps": 3150, "loss": 0.0874, "lr": 4.703454970360315e-05, "epoch": 2.4126984126984126, "percentage": 24.13, "elapsed_time": "0:01:19", "remaining_time": "0:04:10", "throughput": 2046.83, "total_tokens": 163200} {"current_steps": 765, "total_steps": 3150, "loss": 0.0481, "lr": 4.696877522652768e-05, "epoch": 2.4285714285714284, "percentage": 24.29, "elapsed_time": "0:01:20", "remaining_time": "0:04:09", "throughput": 2049.24, "total_tokens": 164240} {"current_steps": 770, "total_steps": 3150, "loss": 0.0458, "lr": 4.690232631715138e-05, "epoch": 2.4444444444444446, "percentage": 24.44, "elapsed_time": "0:01:20", "remaining_time": "0:04:09", "throughput": 2052.33, "total_tokens": 165344} {"current_steps": 775, "total_steps": 3150, "loss": 0.0097, "lr": 4.6835205015428246e-05, "epoch": 2.4603174603174605, "percentage": 24.6, "elapsed_time": "0:01:20", "remaining_time": "0:04:08", "throughput": 2054.84, "total_tokens": 166400} {"current_steps": 780, "total_steps": 3150, "loss": 0.0432, "lr": 4.6767413381954435e-05, "epoch": 2.4761904761904763, "percentage": 24.76, "elapsed_time": "0:01:21", "remaining_time": "0:04:07", "throughput": 2058.26, "total_tokens": 167536} {"current_steps": 785, "total_steps": 3150, "loss": 0.1327, "lr": 4.669895349790502e-05, "epoch": 2.492063492063492, "percentage": 24.92, "elapsed_time": "0:01:21", "remaining_time": "0:04:06", "throughput": 2061.27, "total_tokens": 168640} {"current_steps": 790, "total_steps": 3150, "loss": 0.0576, "lr": 4.662982746497009e-05, "epoch": 2.507936507936508, "percentage": 25.08, "elapsed_time": "0:01:22", "remaining_time": "0:04:05", "throughput": 2063.92, "total_tokens": 169712} {"current_steps": 790, "total_steps": 3150, "eval_loss": 0.14468249678611755, "epoch": 2.507936507936508, "percentage": 25.08, "elapsed_time": "0:01:23", "remaining_time": "0:04:10", "throughput": 2026.26, "total_tokens": 169712} {"current_steps": 795, "total_steps": 3150, "loss": 0.133, "lr": 4.6560037405290225e-05, "epoch": 2.5238095238095237, "percentage": 25.24, "elapsed_time": "0:01:25", "remaining_time": "0:04:14", "throughput": 1987.88, "total_tokens": 170720} {"current_steps": 800, "total_steps": 3150, "loss": 0.0177, "lr": 4.648958546139136e-05, "epoch": 2.5396825396825395, "percentage": 25.4, "elapsed_time": "0:01:26", "remaining_time": "0:04:13", "throughput": 1990.14, "total_tokens": 171744} {"current_steps": 805, "total_steps": 3150, "loss": 0.0331, "lr": 4.641847379611898e-05, "epoch": 2.5555555555555554, "percentage": 25.56, "elapsed_time": "0:01:26", "remaining_time": "0:04:12", "throughput": 1992.93, "total_tokens": 172832} {"current_steps": 810, "total_steps": 3150, "loss": 0.111, "lr": 4.634670459257177e-05, "epoch": 2.571428571428571, "percentage": 25.71, "elapsed_time": "0:01:27", "remaining_time": "0:04:11", "throughput": 1995.92, "total_tokens": 173936} {"current_steps": 815, "total_steps": 3150, "loss": 0.0354, "lr": 4.6274280054034546e-05, "epoch": 2.5873015873015874, "percentage": 25.87, "elapsed_time": "0:01:27", "remaining_time": "0:04:10", "throughput": 1998.76, "total_tokens": 175024} {"current_steps": 820, "total_steps": 3150, "loss": 0.1533, "lr": 4.620120240391065e-05, "epoch": 2.6031746031746033, "percentage": 26.03, "elapsed_time": "0:01:27", "remaining_time": "0:04:09", "throughput": 2002.21, "total_tokens": 176160} {"current_steps": 825, "total_steps": 3150, "loss": 0.084, "lr": 4.612747388565368e-05, "epoch": 2.619047619047619, "percentage": 26.19, "elapsed_time": "0:01:28", "remaining_time": "0:04:09", "throughput": 2005.25, "total_tokens": 177264} {"current_steps": 830, "total_steps": 3150, "loss": 0.0219, "lr": 4.60530967626986e-05, "epoch": 2.634920634920635, "percentage": 26.35, "elapsed_time": "0:01:28", "remaining_time": "0:04:08", "throughput": 2007.63, "total_tokens": 178304} {"current_steps": 835, "total_steps": 3150, "loss": 0.1529, "lr": 4.597807331839229e-05, "epoch": 2.6507936507936507, "percentage": 26.51, "elapsed_time": "0:01:29", "remaining_time": "0:04:07", "throughput": 2010.53, "total_tokens": 179408} {"current_steps": 840, "total_steps": 3150, "loss": 0.0234, "lr": 4.59024058559234e-05, "epoch": 2.6666666666666665, "percentage": 26.67, "elapsed_time": "0:01:29", "remaining_time": "0:04:06", "throughput": 2013.18, "total_tokens": 180480} {"current_steps": 845, "total_steps": 3150, "loss": 0.0691, "lr": 4.582609669825172e-05, "epoch": 2.682539682539683, "percentage": 26.83, "elapsed_time": "0:01:30", "remaining_time": "0:04:05", "throughput": 2016.48, "total_tokens": 181616} {"current_steps": 850, "total_steps": 3150, "loss": 0.0557, "lr": 4.5749148188036764e-05, "epoch": 2.6984126984126986, "percentage": 26.98, "elapsed_time": "0:01:30", "remaining_time": "0:04:04", "throughput": 2019.04, "total_tokens": 182688} {"current_steps": 855, "total_steps": 3150, "loss": 0.1149, "lr": 4.567156268756594e-05, "epoch": 2.7142857142857144, "percentage": 27.14, "elapsed_time": "0:01:30", "remaining_time": "0:04:03", "throughput": 2021.5, "total_tokens": 183744} {"current_steps": 860, "total_steps": 3150, "loss": 0.0569, "lr": 4.5593342578681963e-05, "epoch": 2.7301587301587302, "percentage": 27.3, "elapsed_time": "0:01:31", "remaining_time": "0:04:03", "throughput": 2023.58, "total_tokens": 184768} {"current_steps": 865, "total_steps": 3150, "loss": 0.02, "lr": 4.551449026270979e-05, "epoch": 2.746031746031746, "percentage": 27.46, "elapsed_time": "0:01:31", "remaining_time": "0:04:02", "throughput": 2025.98, "total_tokens": 185824} {"current_steps": 870, "total_steps": 3150, "loss": 0.0456, "lr": 4.5435008160382866e-05, "epoch": 2.761904761904762, "percentage": 27.62, "elapsed_time": "0:01:32", "remaining_time": "0:04:01", "throughput": 2028.36, "total_tokens": 186880} {"current_steps": 875, "total_steps": 3150, "loss": 0.0986, "lr": 4.535489871176881e-05, "epoch": 2.7777777777777777, "percentage": 27.78, "elapsed_time": "0:01:32", "remaining_time": "0:04:00", "throughput": 2031.04, "total_tokens": 187968} {"current_steps": 880, "total_steps": 3150, "loss": 0.1785, "lr": 4.5274164376194514e-05, "epoch": 2.7936507936507935, "percentage": 27.94, "elapsed_time": "0:01:32", "remaining_time": "0:03:59", "throughput": 2033.84, "total_tokens": 189072} {"current_steps": 885, "total_steps": 3150, "loss": 0.0949, "lr": 4.5192807632170644e-05, "epoch": 2.8095238095238093, "percentage": 28.1, "elapsed_time": "0:01:33", "remaining_time": "0:03:58", "throughput": 2036.92, "total_tokens": 190208} {"current_steps": 890, "total_steps": 3150, "loss": 0.0363, "lr": 4.5110830977315556e-05, "epoch": 2.825396825396825, "percentage": 28.25, "elapsed_time": "0:01:33", "remaining_time": "0:03:58", "throughput": 2040.16, "total_tokens": 191360} {"current_steps": 895, "total_steps": 3150, "loss": 0.1222, "lr": 4.502823692827859e-05, "epoch": 2.8412698412698414, "percentage": 28.41, "elapsed_time": "0:01:34", "remaining_time": "0:03:57", "throughput": 2042.71, "total_tokens": 192448} {"current_steps": 900, "total_steps": 3150, "loss": 0.0356, "lr": 4.494502802066285e-05, "epoch": 2.857142857142857, "percentage": 28.57, "elapsed_time": "0:01:34", "remaining_time": "0:03:56", "throughput": 2044.91, "total_tokens": 193504} {"current_steps": 905, "total_steps": 3150, "loss": 0.1125, "lr": 4.486120680894733e-05, "epoch": 2.873015873015873, "percentage": 28.73, "elapsed_time": "0:01:35", "remaining_time": "0:03:55", "throughput": 2046.77, "total_tokens": 194528} {"current_steps": 910, "total_steps": 3150, "loss": 0.0827, "lr": 4.477677586640854e-05, "epoch": 2.888888888888889, "percentage": 28.89, "elapsed_time": "0:01:35", "remaining_time": "0:03:54", "throughput": 2049.08, "total_tokens": 195600} {"current_steps": 915, "total_steps": 3150, "loss": 0.0519, "lr": 4.469173778504141e-05, "epoch": 2.9047619047619047, "percentage": 29.05, "elapsed_time": "0:01:35", "remaining_time": "0:03:54", "throughput": 2051.55, "total_tokens": 196688} {"current_steps": 920, "total_steps": 3150, "loss": 0.1161, "lr": 4.460609517547984e-05, "epoch": 2.9206349206349205, "percentage": 29.21, "elapsed_time": "0:01:36", "remaining_time": "0:03:53", "throughput": 2054.18, "total_tokens": 197792} {"current_steps": 925, "total_steps": 3150, "loss": 0.195, "lr": 4.4519850666916484e-05, "epoch": 2.9365079365079367, "percentage": 29.37, "elapsed_time": "0:01:36", "remaining_time": "0:03:52", "throughput": 2056.63, "total_tokens": 198880} {"current_steps": 930, "total_steps": 3150, "loss": 0.0761, "lr": 4.443300690702201e-05, "epoch": 2.9523809523809526, "percentage": 29.52, "elapsed_time": "0:01:37", "remaining_time": "0:03:51", "throughput": 2059.2, "total_tokens": 199984} {"current_steps": 935, "total_steps": 3150, "loss": 0.0174, "lr": 4.434556656186391e-05, "epoch": 2.9682539682539684, "percentage": 29.68, "elapsed_time": "0:01:37", "remaining_time": "0:03:51", "throughput": 2061.15, "total_tokens": 201024} {"current_steps": 940, "total_steps": 3150, "loss": 0.0537, "lr": 4.425753231582454e-05, "epoch": 2.984126984126984, "percentage": 29.84, "elapsed_time": "0:01:37", "remaining_time": "0:03:50", "throughput": 2063.42, "total_tokens": 202080} {"current_steps": 945, "total_steps": 3150, "loss": 0.0193, "lr": 4.416890687151879e-05, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:01:38", "remaining_time": "0:03:49", "throughput": 2062.69, "total_tokens": 203040} {"current_steps": 948, "total_steps": 3150, "eval_loss": 0.10855190455913544, "epoch": 3.0095238095238095, "percentage": 30.1, "elapsed_time": "0:01:40", "remaining_time": "0:03:53", "throughput": 2029.22, "total_tokens": 203712} {"current_steps": 950, "total_steps": 3150, "loss": 0.1139, "lr": 4.407969294971112e-05, "epoch": 3.015873015873016, "percentage": 30.16, "elapsed_time": "0:01:41", "remaining_time": "0:03:56", "throughput": 2002.2, "total_tokens": 204096} {"current_steps": 955, "total_steps": 3150, "loss": 0.0319, "lr": 4.3989893289231954e-05, "epoch": 3.0317460317460316, "percentage": 30.32, "elapsed_time": "0:01:42", "remaining_time": "0:03:55", "throughput": 2004.43, "total_tokens": 205168} {"current_steps": 960, "total_steps": 3150, "loss": 0.0375, "lr": 4.38995106468937e-05, "epoch": 3.0476190476190474, "percentage": 30.48, "elapsed_time": "0:01:42", "remaining_time": "0:03:54", "throughput": 2006.57, "total_tokens": 206256} {"current_steps": 965, "total_steps": 3150, "loss": 0.0531, "lr": 4.3808547797406027e-05, "epoch": 3.0634920634920633, "percentage": 30.63, "elapsed_time": "0:01:43", "remaining_time": "0:03:53", "throughput": 2009.23, "total_tokens": 207376} {"current_steps": 970, "total_steps": 3150, "loss": 0.0573, "lr": 4.371700753329075e-05, "epoch": 3.0793650793650795, "percentage": 30.79, "elapsed_time": "0:01:43", "remaining_time": "0:03:52", "throughput": 2011.54, "total_tokens": 208464} {"current_steps": 975, "total_steps": 3150, "loss": 0.0605, "lr": 4.362489266479608e-05, "epoch": 3.0952380952380953, "percentage": 30.95, "elapsed_time": "0:01:44", "remaining_time": "0:03:52", "throughput": 2013.89, "total_tokens": 209552} {"current_steps": 980, "total_steps": 3150, "loss": 0.0237, "lr": 4.35322060198103e-05, "epoch": 3.111111111111111, "percentage": 31.11, "elapsed_time": "0:01:44", "remaining_time": "0:03:51", "throughput": 2015.84, "total_tokens": 210624} {"current_steps": 985, "total_steps": 3150, "loss": 0.0687, "lr": 4.343895044377504e-05, "epoch": 3.126984126984127, "percentage": 31.27, "elapsed_time": "0:01:44", "remaining_time": "0:03:50", "throughput": 2017.84, "total_tokens": 211696} {"current_steps": 990, "total_steps": 3150, "loss": 0.0415, "lr": 4.334512879959786e-05, "epoch": 3.142857142857143, "percentage": 31.43, "elapsed_time": "0:01:45", "remaining_time": "0:03:49", "throughput": 2020.14, "total_tokens": 212800} {"current_steps": 995, "total_steps": 3150, "loss": 0.0949, "lr": 4.325074396756437e-05, "epoch": 3.1587301587301586, "percentage": 31.59, "elapsed_time": "0:01:45", "remaining_time": "0:03:49", "throughput": 2022.47, "total_tokens": 213888} {"current_steps": 1000, "total_steps": 3150, "loss": 0.1535, "lr": 4.3155798845249827e-05, "epoch": 3.1746031746031744, "percentage": 31.75, "elapsed_time": "0:01:46", "remaining_time": "0:03:48", "throughput": 2025.2, "total_tokens": 215024} {"current_steps": 1005, "total_steps": 3150, "loss": 0.0329, "lr": 4.3060296347430175e-05, "epoch": 3.1904761904761907, "percentage": 31.9, "elapsed_time": "0:01:46", "remaining_time": "0:03:47", "throughput": 2027.77, "total_tokens": 216144} {"current_steps": 1010, "total_steps": 3150, "loss": 0.0232, "lr": 4.296423940599253e-05, "epoch": 3.2063492063492065, "percentage": 32.06, "elapsed_time": "0:01:47", "remaining_time": "0:03:46", "throughput": 2029.92, "total_tokens": 217216} {"current_steps": 1015, "total_steps": 3150, "loss": 0.015, "lr": 4.286763096984523e-05, "epoch": 3.2222222222222223, "percentage": 32.22, "elapsed_time": "0:01:47", "remaining_time": "0:03:45", "throughput": 2031.91, "total_tokens": 218272} {"current_steps": 1020, "total_steps": 3150, "loss": 0.0152, "lr": 4.277047400482723e-05, "epoch": 3.238095238095238, "percentage": 32.38, "elapsed_time": "0:01:47", "remaining_time": "0:03:45", "throughput": 2034.15, "total_tokens": 219360} {"current_steps": 1025, "total_steps": 3150, "loss": 0.0009, "lr": 4.267277149361713e-05, "epoch": 3.253968253968254, "percentage": 32.54, "elapsed_time": "0:01:48", "remaining_time": "0:03:44", "throughput": 2035.64, "total_tokens": 220368} {"current_steps": 1030, "total_steps": 3150, "loss": 0.0463, "lr": 4.257452643564155e-05, "epoch": 3.2698412698412698, "percentage": 32.7, "elapsed_time": "0:01:48", "remaining_time": "0:03:43", "throughput": 2038.0, "total_tokens": 221472} {"current_steps": 1035, "total_steps": 3150, "loss": 0.0069, "lr": 4.2475741846983084e-05, "epoch": 3.2857142857142856, "percentage": 32.86, "elapsed_time": "0:01:49", "remaining_time": "0:03:42", "throughput": 2040.61, "total_tokens": 222608} {"current_steps": 1040, "total_steps": 3150, "loss": 0.0183, "lr": 4.2376420760287705e-05, "epoch": 3.3015873015873014, "percentage": 33.02, "elapsed_time": "0:01:49", "remaining_time": "0:03:42", "throughput": 2043.05, "total_tokens": 223728} {"current_steps": 1045, "total_steps": 3150, "loss": 0.0124, "lr": 4.227656622467162e-05, "epoch": 3.317460317460317, "percentage": 33.17, "elapsed_time": "0:01:49", "remaining_time": "0:03:41", "throughput": 2044.75, "total_tokens": 224768} {"current_steps": 1050, "total_steps": 3150, "loss": 0.006, "lr": 4.217618130562773e-05, "epoch": 3.3333333333333335, "percentage": 33.33, "elapsed_time": "0:01:50", "remaining_time": "0:03:40", "throughput": 2046.46, "total_tokens": 225808} {"current_steps": 1055, "total_steps": 3150, "loss": 0.0451, "lr": 4.207526908493147e-05, "epoch": 3.3492063492063493, "percentage": 33.49, "elapsed_time": "0:01:50", "remaining_time": "0:03:39", "throughput": 2048.33, "total_tokens": 226864} {"current_steps": 1060, "total_steps": 3150, "loss": 0.0463, "lr": 4.197383266054621e-05, "epoch": 3.365079365079365, "percentage": 33.65, "elapsed_time": "0:01:51", "remaining_time": "0:03:39", "throughput": 2050.38, "total_tokens": 227952} {"current_steps": 1065, "total_steps": 3150, "loss": 0.0557, "lr": 4.1871875146528195e-05, "epoch": 3.380952380952381, "percentage": 33.81, "elapsed_time": "0:01:51", "remaining_time": "0:03:38", "throughput": 2052.22, "total_tokens": 229008} {"current_steps": 1070, "total_steps": 3150, "loss": 0.0246, "lr": 4.176939967293085e-05, "epoch": 3.3968253968253967, "percentage": 33.97, "elapsed_time": "0:01:52", "remaining_time": "0:03:37", "throughput": 2054.03, "total_tokens": 230064} {"current_steps": 1075, "total_steps": 3150, "loss": 0.0288, "lr": 4.166640938570879e-05, "epoch": 3.4126984126984126, "percentage": 34.13, "elapsed_time": "0:01:52", "remaining_time": "0:03:37", "throughput": 2055.82, "total_tokens": 231120} {"current_steps": 1080, "total_steps": 3150, "loss": 0.0355, "lr": 4.156290744662117e-05, "epoch": 3.4285714285714284, "percentage": 34.29, "elapsed_time": "0:01:52", "remaining_time": "0:03:36", "throughput": 2058.23, "total_tokens": 232256} {"current_steps": 1085, "total_steps": 3150, "loss": 0.0557, "lr": 4.145889703313466e-05, "epoch": 3.4444444444444446, "percentage": 34.44, "elapsed_time": "0:01:53", "remaining_time": "0:03:35", "throughput": 2060.01, "total_tokens": 233312} {"current_steps": 1090, "total_steps": 3150, "loss": 0.0306, "lr": 4.1354381338325864e-05, "epoch": 3.4603174603174605, "percentage": 34.6, "elapsed_time": "0:01:53", "remaining_time": "0:03:34", "throughput": 2061.23, "total_tokens": 234304} {"current_steps": 1095, "total_steps": 3150, "loss": 0.0223, "lr": 4.124936357078334e-05, "epoch": 3.4761904761904763, "percentage": 34.76, "elapsed_time": "0:01:54", "remaining_time": "0:03:34", "throughput": 2062.97, "total_tokens": 235360} {"current_steps": 1100, "total_steps": 3150, "loss": 0.0373, "lr": 4.114384695450906e-05, "epoch": 3.492063492063492, "percentage": 34.92, "elapsed_time": "0:01:54", "remaining_time": "0:03:33", "throughput": 2064.54, "total_tokens": 236400} {"current_steps": 1105, "total_steps": 3150, "loss": 0.1033, "lr": 4.1037834728819425e-05, "epoch": 3.507936507936508, "percentage": 35.08, "elapsed_time": "0:01:54", "remaining_time": "0:03:32", "throughput": 2066.27, "total_tokens": 237456} {"current_steps": 1106, "total_steps": 3150, "eval_loss": 0.1209958866238594, "epoch": 3.511111111111111, "percentage": 35.11, "elapsed_time": "0:01:56", "remaining_time": "0:03:35", "throughput": 2039.37, "total_tokens": 237664} {"current_steps": 1110, "total_steps": 3150, "loss": 0.0008, "lr": 4.093133014824587e-05, "epoch": 3.5238095238095237, "percentage": 35.24, "elapsed_time": "0:01:58", "remaining_time": "0:03:37", "throughput": 2015.83, "total_tokens": 238496} {"current_steps": 1115, "total_steps": 3150, "loss": 0.044, "lr": 4.082433648243491e-05, "epoch": 3.5396825396825395, "percentage": 35.4, "elapsed_time": "0:01:58", "remaining_time": "0:03:36", "throughput": 2017.41, "total_tokens": 239536} {"current_steps": 1120, "total_steps": 3150, "loss": 0.0069, "lr": 4.071685701604777e-05, "epoch": 3.5555555555555554, "percentage": 35.56, "elapsed_time": "0:01:59", "remaining_time": "0:03:35", "throughput": 2018.54, "total_tokens": 240528} {"current_steps": 1125, "total_steps": 3150, "loss": 0.0004, "lr": 4.060889504865956e-05, "epoch": 3.571428571428571, "percentage": 35.71, "elapsed_time": "0:01:59", "remaining_time": "0:03:35", "throughput": 2020.14, "total_tokens": 241568} {"current_steps": 1130, "total_steps": 3150, "loss": 0.071, "lr": 4.050045389465794e-05, "epoch": 3.5873015873015874, "percentage": 35.87, "elapsed_time": "0:01:59", "remaining_time": "0:03:34", "throughput": 2022.43, "total_tokens": 242688} {"current_steps": 1135, "total_steps": 3150, "loss": 0.1045, "lr": 4.039153688314145e-05, "epoch": 3.6031746031746033, "percentage": 36.03, "elapsed_time": "0:02:00", "remaining_time": "0:03:33", "throughput": 2024.17, "total_tokens": 243744} {"current_steps": 1140, "total_steps": 3150, "loss": 0.012, "lr": 4.0282147357817244e-05, "epoch": 3.619047619047619, "percentage": 36.19, "elapsed_time": "0:02:00", "remaining_time": "0:03:33", "throughput": 2025.73, "total_tokens": 244768} {"current_steps": 1145, "total_steps": 3150, "loss": 0.0025, "lr": 4.017228867689842e-05, "epoch": 3.634920634920635, "percentage": 36.35, "elapsed_time": "0:02:01", "remaining_time": "0:03:32", "throughput": 2027.39, "total_tokens": 245808} {"current_steps": 1150, "total_steps": 3150, "loss": 0.0165, "lr": 4.006196421300099e-05, "epoch": 3.6507936507936507, "percentage": 36.51, "elapsed_time": "0:02:01", "remaining_time": "0:03:31", "throughput": 2029.15, "total_tokens": 246864} {"current_steps": 1155, "total_steps": 3150, "loss": 0.0486, "lr": 3.9951177353040305e-05, "epoch": 3.6666666666666665, "percentage": 36.67, "elapsed_time": "0:02:02", "remaining_time": "0:03:30", "throughput": 2031.38, "total_tokens": 247984} {"current_steps": 1160, "total_steps": 3150, "loss": 0.061, "lr": 3.983993149812709e-05, "epoch": 3.682539682539683, "percentage": 36.83, "elapsed_time": "0:02:02", "remaining_time": "0:03:30", "throughput": 2033.77, "total_tokens": 249120} {"current_steps": 1165, "total_steps": 3150, "loss": 0.0395, "lr": 3.9728230063463e-05, "epoch": 3.6984126984126986, "percentage": 36.98, "elapsed_time": "0:02:02", "remaining_time": "0:03:29", "throughput": 2035.59, "total_tokens": 250192} {"current_steps": 1170, "total_steps": 3150, "loss": 0.0713, "lr": 3.961607647823583e-05, "epoch": 3.7142857142857144, "percentage": 37.14, "elapsed_time": "0:02:03", "remaining_time": "0:03:28", "throughput": 2037.43, "total_tokens": 251264} {"current_steps": 1175, "total_steps": 3150, "loss": 0.0113, "lr": 3.950347418551419e-05, "epoch": 3.7301587301587302, "percentage": 37.3, "elapsed_time": "0:02:03", "remaining_time": "0:03:27", "throughput": 2039.37, "total_tokens": 252352} {"current_steps": 1180, "total_steps": 3150, "loss": 0.0656, "lr": 3.939042664214184e-05, "epoch": 3.746031746031746, "percentage": 37.46, "elapsed_time": "0:02:04", "remaining_time": "0:03:27", "throughput": 2041.05, "total_tokens": 253408} {"current_steps": 1185, "total_steps": 3150, "loss": 0.021, "lr": 3.927693731863153e-05, "epoch": 3.761904761904762, "percentage": 37.62, "elapsed_time": "0:02:04", "remaining_time": "0:03:26", "throughput": 2042.86, "total_tokens": 254480} {"current_steps": 1190, "total_steps": 3150, "loss": 0.0327, "lr": 3.91630096990585e-05, "epoch": 3.7777777777777777, "percentage": 37.78, "elapsed_time": "0:02:04", "remaining_time": "0:03:25", "throughput": 2044.54, "total_tokens": 255536} {"current_steps": 1195, "total_steps": 3150, "loss": 0.0762, "lr": 3.904864728095349e-05, "epoch": 3.7936507936507935, "percentage": 37.94, "elapsed_time": "0:02:05", "remaining_time": "0:03:25", "throughput": 2046.42, "total_tokens": 256624} {"current_steps": 1200, "total_steps": 3150, "loss": 0.0245, "lr": 3.893385357519534e-05, "epoch": 3.8095238095238093, "percentage": 38.1, "elapsed_time": "0:02:05", "remaining_time": "0:03:24", "throughput": 2047.81, "total_tokens": 257648} {"current_steps": 1205, "total_steps": 3150, "loss": 0.0355, "lr": 3.881863210590332e-05, "epoch": 3.825396825396825, "percentage": 38.25, "elapsed_time": "0:02:06", "remaining_time": "0:03:23", "throughput": 2049.47, "total_tokens": 258704} {"current_steps": 1210, "total_steps": 3150, "loss": 0.0365, "lr": 3.870298641032878e-05, "epoch": 3.8412698412698414, "percentage": 38.41, "elapsed_time": "0:02:06", "remaining_time": "0:03:23", "throughput": 2050.9, "total_tokens": 259728} {"current_steps": 1215, "total_steps": 3150, "loss": 0.102, "lr": 3.85869200387467e-05, "epoch": 3.857142857142857, "percentage": 38.57, "elapsed_time": "0:02:07", "remaining_time": "0:03:22", "throughput": 2053.01, "total_tokens": 260848} {"current_steps": 1220, "total_steps": 3150, "loss": 0.0322, "lr": 3.84704365543466e-05, "epoch": 3.873015873015873, "percentage": 38.73, "elapsed_time": "0:02:07", "remaining_time": "0:03:21", "throughput": 2054.96, "total_tokens": 261952} {"current_steps": 1225, "total_steps": 3150, "loss": 0.0985, "lr": 3.835353953312322e-05, "epoch": 3.888888888888889, "percentage": 38.89, "elapsed_time": "0:02:07", "remaining_time": "0:03:20", "throughput": 2057.42, "total_tokens": 263136} {"current_steps": 1230, "total_steps": 3150, "loss": 0.0469, "lr": 3.82362325637667e-05, "epoch": 3.9047619047619047, "percentage": 39.05, "elapsed_time": "0:02:08", "remaining_time": "0:03:20", "throughput": 2058.65, "total_tokens": 264144} {"current_steps": 1235, "total_steps": 3150, "loss": 0.1767, "lr": 3.8118519247552395e-05, "epoch": 3.9206349206349205, "percentage": 39.21, "elapsed_time": "0:02:08", "remaining_time": "0:03:19", "throughput": 2060.08, "total_tokens": 265184} {"current_steps": 1240, "total_steps": 3150, "loss": 0.0521, "lr": 3.8000403198230387e-05, "epoch": 3.9365079365079367, "percentage": 39.37, "elapsed_time": "0:02:09", "remaining_time": "0:03:18", "throughput": 2061.64, "total_tokens": 266240} {"current_steps": 1245, "total_steps": 3150, "loss": 0.0234, "lr": 3.788188804191446e-05, "epoch": 3.9523809523809526, "percentage": 39.52, "elapsed_time": "0:02:09", "remaining_time": "0:03:18", "throughput": 2063.53, "total_tokens": 267344} {"current_steps": 1250, "total_steps": 3150, "loss": 0.0472, "lr": 3.776297741697082e-05, "epoch": 3.9682539682539684, "percentage": 39.68, "elapsed_time": "0:02:09", "remaining_time": "0:03:17", "throughput": 2065.06, "total_tokens": 268400} {"current_steps": 1255, "total_steps": 3150, "loss": 0.138, "lr": 3.7643674973906417e-05, "epoch": 3.984126984126984, "percentage": 39.84, "elapsed_time": "0:02:10", "remaining_time": "0:03:16", "throughput": 2067.5, "total_tokens": 269568} {"current_steps": 1260, "total_steps": 3150, "loss": 0.0019, "lr": 3.752398437525684e-05, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:02:10", "remaining_time": "0:03:16", "throughput": 2067.21, "total_tokens": 270576} {"current_steps": 1264, "total_steps": 3150, "eval_loss": 0.10666527599096298, "epoch": 4.012698412698413, "percentage": 40.13, "elapsed_time": "0:02:12", "remaining_time": "0:03:18", "throughput": 2042.49, "total_tokens": 271472} {"current_steps": 1265, "total_steps": 3150, "loss": 0.0364, "lr": 3.7403909295473885e-05, "epoch": 4.015873015873016, "percentage": 40.16, "elapsed_time": "0:02:14", "remaining_time": "0:03:20", "throughput": 2021.25, "total_tokens": 271680} {"current_steps": 1270, "total_steps": 3150, "loss": 0.0224, "lr": 3.7283453420812786e-05, "epoch": 4.031746031746032, "percentage": 40.32, "elapsed_time": "0:02:14", "remaining_time": "0:03:19", "throughput": 2022.72, "total_tokens": 272736} {"current_steps": 1275, "total_steps": 3150, "loss": 0.0063, "lr": 3.7162620449219e-05, "epoch": 4.0476190476190474, "percentage": 40.48, "elapsed_time": "0:02:15", "remaining_time": "0:03:18", "throughput": 2023.79, "total_tokens": 273744} {"current_steps": 1280, "total_steps": 3150, "loss": 0.0302, "lr": 3.704141409021471e-05, "epoch": 4.063492063492063, "percentage": 40.63, "elapsed_time": "0:02:15", "remaining_time": "0:03:18", "throughput": 2025.27, "total_tokens": 274800} {"current_steps": 1285, "total_steps": 3150, "loss": 0.0208, "lr": 3.691983806478494e-05, "epoch": 4.079365079365079, "percentage": 40.79, "elapsed_time": "0:02:16", "remaining_time": "0:03:17", "throughput": 2026.92, "total_tokens": 275872} {"current_steps": 1290, "total_steps": 3150, "loss": 0.0775, "lr": 3.679789610526332e-05, "epoch": 4.095238095238095, "percentage": 40.95, "elapsed_time": "0:02:16", "remaining_time": "0:03:16", "throughput": 2028.5, "total_tokens": 276928} {"current_steps": 1295, "total_steps": 3150, "loss": 0.0057, "lr": 3.66755919552175e-05, "epoch": 4.111111111111111, "percentage": 41.11, "elapsed_time": "0:02:16", "remaining_time": "0:03:16", "throughput": 2030.22, "total_tokens": 278016} {"current_steps": 1300, "total_steps": 3150, "loss": 0.0056, "lr": 3.655292936933424e-05, "epoch": 4.1269841269841265, "percentage": 41.27, "elapsed_time": "0:02:17", "remaining_time": "0:03:15", "throughput": 2031.7, "total_tokens": 279072} {"current_steps": 1305, "total_steps": 3150, "loss": 0.0114, "lr": 3.6429912113304115e-05, "epoch": 4.142857142857143, "percentage": 41.43, "elapsed_time": "0:02:17", "remaining_time": "0:03:14", "throughput": 2033.33, "total_tokens": 280144} {"current_steps": 1310, "total_steps": 3150, "loss": 0.0408, "lr": 3.630654396370594e-05, "epoch": 4.158730158730159, "percentage": 41.59, "elapsed_time": "0:02:18", "remaining_time": "0:03:14", "throughput": 2034.97, "total_tokens": 281216} {"current_steps": 1315, "total_steps": 3150, "loss": 0.0019, "lr": 3.6182828707890816e-05, "epoch": 4.174603174603175, "percentage": 41.75, "elapsed_time": "0:02:18", "remaining_time": "0:03:13", "throughput": 2036.71, "total_tokens": 282304} {"current_steps": 1320, "total_steps": 3150, "loss": 0.037, "lr": 3.6058770143865855e-05, "epoch": 4.190476190476191, "percentage": 41.9, "elapsed_time": "0:02:19", "remaining_time": "0:03:12", "throughput": 2038.68, "total_tokens": 283424} {"current_steps": 1325, "total_steps": 3150, "loss": 0.0197, "lr": 3.5934372080177594e-05, "epoch": 4.2063492063492065, "percentage": 42.06, "elapsed_time": "0:02:19", "remaining_time": "0:03:12", "throughput": 2040.07, "total_tokens": 284464} {"current_steps": 1330, "total_steps": 3150, "loss": 0.0091, "lr": 3.5809638335795066e-05, "epoch": 4.222222222222222, "percentage": 42.22, "elapsed_time": "0:02:19", "remaining_time": "0:03:11", "throughput": 2041.88, "total_tokens": 285568} {"current_steps": 1335, "total_steps": 3150, "loss": 0.0033, "lr": 3.5684572739992564e-05, "epoch": 4.238095238095238, "percentage": 42.38, "elapsed_time": "0:02:20", "remaining_time": "0:03:10", "throughput": 2043.68, "total_tokens": 286672} {"current_steps": 1340, "total_steps": 3150, "loss": 0.044, "lr": 3.555917913223208e-05, "epoch": 4.253968253968254, "percentage": 42.54, "elapsed_time": "0:02:20", "remaining_time": "0:03:10", "throughput": 2045.16, "total_tokens": 287728} {"current_steps": 1345, "total_steps": 3150, "loss": 0.0108, "lr": 3.543346136204545e-05, "epoch": 4.26984126984127, "percentage": 42.7, "elapsed_time": "0:02:21", "remaining_time": "0:03:09", "throughput": 2046.42, "total_tokens": 288752} {"current_steps": 1350, "total_steps": 3150, "loss": 0.0173, "lr": 3.530742328891614e-05, "epoch": 4.285714285714286, "percentage": 42.86, "elapsed_time": "0:02:21", "remaining_time": "0:03:08", "throughput": 2048.3, "total_tokens": 289872} {"current_steps": 1355, "total_steps": 3150, "loss": 0.0038, "lr": 3.518106878216079e-05, "epoch": 4.301587301587301, "percentage": 43.02, "elapsed_time": "0:02:21", "remaining_time": "0:03:08", "throughput": 2049.56, "total_tokens": 290896} {"current_steps": 1360, "total_steps": 3150, "loss": 0.0035, "lr": 3.505440172081044e-05, "epoch": 4.317460317460317, "percentage": 43.17, "elapsed_time": "0:02:22", "remaining_time": "0:03:07", "throughput": 2051.55, "total_tokens": 292032} {"current_steps": 1365, "total_steps": 3150, "loss": 0.1602, "lr": 3.4927425993491404e-05, "epoch": 4.333333333333333, "percentage": 43.33, "elapsed_time": "0:02:22", "remaining_time": "0:03:06", "throughput": 2052.79, "total_tokens": 293056} {"current_steps": 1370, "total_steps": 3150, "loss": 0.0003, "lr": 3.480014549830593e-05, "epoch": 4.349206349206349, "percentage": 43.49, "elapsed_time": "0:02:23", "remaining_time": "0:03:06", "throughput": 2054.41, "total_tokens": 294144} {"current_steps": 1375, "total_steps": 3150, "loss": 0.0021, "lr": 3.467256414271249e-05, "epoch": 4.365079365079365, "percentage": 43.65, "elapsed_time": "0:02:23", "remaining_time": "0:03:05", "throughput": 2055.61, "total_tokens": 295168} {"current_steps": 1380, "total_steps": 3150, "loss": 0.0031, "lr": 3.454468584340588e-05, "epoch": 4.380952380952381, "percentage": 43.81, "elapsed_time": "0:02:24", "remaining_time": "0:03:04", "throughput": 2056.9, "total_tokens": 296208} {"current_steps": 1385, "total_steps": 3150, "loss": 0.0185, "lr": 3.4416514526196914e-05, "epoch": 4.396825396825397, "percentage": 43.97, "elapsed_time": "0:02:24", "remaining_time": "0:03:04", "throughput": 2058.65, "total_tokens": 297328} {"current_steps": 1390, "total_steps": 3150, "loss": 0.0025, "lr": 3.428805412589195e-05, "epoch": 4.412698412698413, "percentage": 44.13, "elapsed_time": "0:02:24", "remaining_time": "0:03:03", "throughput": 2059.94, "total_tokens": 298368} {"current_steps": 1395, "total_steps": 3150, "loss": 0.0313, "lr": 3.415930858617208e-05, "epoch": 4.428571428571429, "percentage": 44.29, "elapsed_time": "0:02:25", "remaining_time": "0:03:02", "throughput": 2061.11, "total_tokens": 299392} {"current_steps": 1400, "total_steps": 3150, "loss": 0.0583, "lr": 3.4030281859472046e-05, "epoch": 4.444444444444445, "percentage": 44.44, "elapsed_time": "0:02:25", "remaining_time": "0:03:02", "throughput": 2062.81, "total_tokens": 300496} {"current_steps": 1405, "total_steps": 3150, "loss": 0.0293, "lr": 3.390097790685892e-05, "epoch": 4.4603174603174605, "percentage": 44.6, "elapsed_time": "0:02:26", "remaining_time": "0:03:01", "throughput": 2064.06, "total_tokens": 301536} {"current_steps": 1410, "total_steps": 3150, "loss": 0.0382, "lr": 3.377140069791049e-05, "epoch": 4.476190476190476, "percentage": 44.76, "elapsed_time": "0:02:26", "remaining_time": "0:03:00", "throughput": 2065.22, "total_tokens": 302560} {"current_steps": 1415, "total_steps": 3150, "loss": 0.0402, "lr": 3.364155421059342e-05, "epoch": 4.492063492063492, "percentage": 44.92, "elapsed_time": "0:02:26", "remaining_time": "0:03:00", "throughput": 2066.47, "total_tokens": 303600} {"current_steps": 1420, "total_steps": 3150, "loss": 0.079, "lr": 3.351144243114108e-05, "epoch": 4.507936507936508, "percentage": 45.08, "elapsed_time": "0:02:27", "remaining_time": "0:02:59", "throughput": 2067.92, "total_tokens": 304672} {"current_steps": 1422, "total_steps": 3150, "eval_loss": 0.13925819098949432, "epoch": 4.514285714285714, "percentage": 45.14, "elapsed_time": "0:02:29", "remaining_time": "0:03:01", "throughput": 2047.14, "total_tokens": 305088} {"current_steps": 1425, "total_steps": 3150, "loss": 0.0364, "lr": 3.338106935393121e-05, "epoch": 4.523809523809524, "percentage": 45.24, "elapsed_time": "0:02:30", "remaining_time": "0:03:02", "throughput": 2028.81, "total_tokens": 305712} {"current_steps": 1430, "total_steps": 3150, "loss": 0.0408, "lr": 3.32504389813633e-05, "epoch": 4.5396825396825395, "percentage": 45.4, "elapsed_time": "0:02:31", "remaining_time": "0:03:01", "throughput": 2030.47, "total_tokens": 306816} {"current_steps": 1435, "total_steps": 3150, "loss": 0.0418, "lr": 3.3119555323735665e-05, "epoch": 4.555555555555555, "percentage": 45.56, "elapsed_time": "0:02:31", "remaining_time": "0:03:01", "throughput": 2032.32, "total_tokens": 307968} {"current_steps": 1440, "total_steps": 3150, "loss": 0.0753, "lr": 3.2988422399122403e-05, "epoch": 4.571428571428571, "percentage": 45.71, "elapsed_time": "0:02:31", "remaining_time": "0:03:00", "throughput": 2033.68, "total_tokens": 309024} {"current_steps": 1445, "total_steps": 3150, "loss": 0.014, "lr": 3.285704423324998e-05, "epoch": 4.587301587301587, "percentage": 45.87, "elapsed_time": "0:02:32", "remaining_time": "0:02:59", "throughput": 2035.25, "total_tokens": 310112} {"current_steps": 1450, "total_steps": 3150, "loss": 0.09, "lr": 3.272542485937369e-05, "epoch": 4.603174603174603, "percentage": 46.03, "elapsed_time": "0:02:32", "remaining_time": "0:02:59", "throughput": 2036.83, "total_tokens": 311200} {"current_steps": 1455, "total_steps": 3150, "loss": 0.0023, "lr": 3.259356831815378e-05, "epoch": 4.619047619047619, "percentage": 46.19, "elapsed_time": "0:02:33", "remaining_time": "0:02:58", "throughput": 2038.63, "total_tokens": 312336} {"current_steps": 1460, "total_steps": 3150, "loss": 0.0409, "lr": 3.246147865753148e-05, "epoch": 4.634920634920634, "percentage": 46.35, "elapsed_time": "0:02:33", "remaining_time": "0:02:57", "throughput": 2040.39, "total_tokens": 313472} {"current_steps": 1465, "total_steps": 3150, "loss": 0.0194, "lr": 3.232915993260464e-05, "epoch": 4.650793650793651, "percentage": 46.51, "elapsed_time": "0:02:34", "remaining_time": "0:02:57", "throughput": 2041.49, "total_tokens": 314512} {"current_steps": 1470, "total_steps": 3150, "loss": 0.0441, "lr": 3.219661620550332e-05, "epoch": 4.666666666666667, "percentage": 46.67, "elapsed_time": "0:02:34", "remaining_time": "0:02:56", "throughput": 2042.42, "total_tokens": 315520} {"current_steps": 1475, "total_steps": 3150, "loss": 0.0256, "lr": 3.2063851545265055e-05, "epoch": 4.682539682539683, "percentage": 46.83, "elapsed_time": "0:02:34", "remaining_time": "0:02:55", "throughput": 2043.87, "total_tokens": 316592} {"current_steps": 1480, "total_steps": 3150, "loss": 0.0156, "lr": 3.193087002770993e-05, "epoch": 4.698412698412699, "percentage": 46.98, "elapsed_time": "0:02:35", "remaining_time": "0:02:55", "throughput": 2045.39, "total_tokens": 317680} {"current_steps": 1485, "total_steps": 3150, "loss": 0.0036, "lr": 3.1797675735315455e-05, "epoch": 4.714285714285714, "percentage": 47.14, "elapsed_time": "0:02:35", "remaining_time": "0:02:54", "throughput": 2046.92, "total_tokens": 318768} {"current_steps": 1490, "total_steps": 3150, "loss": 0.0287, "lr": 3.166427275709123e-05, "epoch": 4.73015873015873, "percentage": 47.3, "elapsed_time": "0:02:36", "remaining_time": "0:02:53", "throughput": 2048.33, "total_tokens": 319840} {"current_steps": 1495, "total_steps": 3150, "loss": 0.0838, "lr": 3.1530665188453464e-05, "epoch": 4.746031746031746, "percentage": 47.46, "elapsed_time": "0:02:36", "remaining_time": "0:02:53", "throughput": 2049.33, "total_tokens": 320848} {"current_steps": 1500, "total_steps": 3150, "loss": 0.004, "lr": 3.139685713109915e-05, "epoch": 4.761904761904762, "percentage": 47.62, "elapsed_time": "0:02:36", "remaining_time": "0:02:52", "throughput": 2051.12, "total_tokens": 321984} {"current_steps": 1505, "total_steps": 3150, "loss": 0.003, "lr": 3.126285269288024e-05, "epoch": 4.777777777777778, "percentage": 47.78, "elapsed_time": "0:02:37", "remaining_time": "0:02:52", "throughput": 2052.44, "total_tokens": 323040} {"current_steps": 1510, "total_steps": 3150, "loss": 0.0001, "lr": 3.11286559876775e-05, "epoch": 4.7936507936507935, "percentage": 47.94, "elapsed_time": "0:02:37", "remaining_time": "0:02:51", "throughput": 2053.84, "total_tokens": 324112} {"current_steps": 1515, "total_steps": 3150, "loss": 0.0562, "lr": 3.099427113527419e-05, "epoch": 4.809523809523809, "percentage": 48.1, "elapsed_time": "0:02:38", "remaining_time": "0:02:50", "throughput": 2055.42, "total_tokens": 325216} {"current_steps": 1520, "total_steps": 3150, "loss": 0.0032, "lr": 3.085970226122962e-05, "epoch": 4.825396825396825, "percentage": 48.25, "elapsed_time": "0:02:38", "remaining_time": "0:02:50", "throughput": 2056.61, "total_tokens": 326256} {"current_steps": 1525, "total_steps": 3150, "loss": 0.0059, "lr": 3.072495349675249e-05, "epoch": 4.841269841269841, "percentage": 48.41, "elapsed_time": "0:02:39", "remaining_time": "0:02:49", "throughput": 2058.16, "total_tokens": 327360} {"current_steps": 1530, "total_steps": 3150, "loss": 0.0112, "lr": 3.059002897857407e-05, "epoch": 4.857142857142857, "percentage": 48.57, "elapsed_time": "0:02:39", "remaining_time": "0:02:48", "throughput": 2059.8, "total_tokens": 328480} {"current_steps": 1535, "total_steps": 3150, "loss": 0.0053, "lr": 3.0454932848821182e-05, "epoch": 4.8730158730158735, "percentage": 48.73, "elapsed_time": "0:02:39", "remaining_time": "0:02:48", "throughput": 2061.54, "total_tokens": 329616} {"current_steps": 1540, "total_steps": 3150, "loss": 0.0048, "lr": 3.0319669254889055e-05, "epoch": 4.888888888888889, "percentage": 48.89, "elapsed_time": "0:02:40", "remaining_time": "0:02:47", "throughput": 2062.98, "total_tokens": 330704} {"current_steps": 1545, "total_steps": 3150, "loss": 0.0107, "lr": 3.018424234931401e-05, "epoch": 4.904761904761905, "percentage": 49.05, "elapsed_time": "0:02:40", "remaining_time": "0:02:46", "throughput": 2064.49, "total_tokens": 331808} {"current_steps": 1550, "total_steps": 3150, "loss": 0.0263, "lr": 3.0048656289645944e-05, "epoch": 4.920634920634921, "percentage": 49.21, "elapsed_time": "0:02:41", "remaining_time": "0:02:46", "throughput": 2065.93, "total_tokens": 332896} {"current_steps": 1555, "total_steps": 3150, "loss": 0.0723, "lr": 2.9912915238320754e-05, "epoch": 4.936507936507937, "percentage": 49.37, "elapsed_time": "0:02:41", "remaining_time": "0:02:45", "throughput": 2067.26, "total_tokens": 333968} {"current_steps": 1560, "total_steps": 3150, "loss": 0.0137, "lr": 2.9777023362532486e-05, "epoch": 4.9523809523809526, "percentage": 49.52, "elapsed_time": "0:02:41", "remaining_time": "0:02:45", "throughput": 2068.2, "total_tokens": 334976} {"current_steps": 1565, "total_steps": 3150, "loss": 0.0015, "lr": 2.9640984834105445e-05, "epoch": 4.968253968253968, "percentage": 49.68, "elapsed_time": "0:02:42", "remaining_time": "0:02:44", "throughput": 2069.51, "total_tokens": 336048} {"current_steps": 1570, "total_steps": 3150, "loss": 0.0268, "lr": 2.950480382936611e-05, "epoch": 4.984126984126984, "percentage": 49.84, "elapsed_time": "0:02:42", "remaining_time": "0:02:43", "throughput": 2071.02, "total_tokens": 337136} {"current_steps": 1575, "total_steps": 3150, "loss": 0.0128, "lr": 2.936848452901494e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:02:43", "remaining_time": "0:02:43", "throughput": 2071.12, "total_tokens": 338192} {"current_steps": 1580, "total_steps": 3150, "loss": 0.0025, "lr": 2.9232031117997983e-05, "epoch": 5.015873015873016, "percentage": 50.16, "elapsed_time": "0:02:43", "remaining_time": "0:02:42", "throughput": 2070.41, "total_tokens": 339264} {"current_steps": 1580, "total_steps": 3150, "eval_loss": 0.14511053264141083, "epoch": 5.015873015873016, "percentage": 50.16, "elapsed_time": "0:02:45", "remaining_time": "0:02:44", "throughput": 2051.09, "total_tokens": 339264} {"current_steps": 1585, "total_steps": 3150, "loss": 0.0003, "lr": 2.9095447785378443e-05, "epoch": 5.031746031746032, "percentage": 50.32, "elapsed_time": "0:02:47", "remaining_time": "0:02:45", "throughput": 2033.09, "total_tokens": 340288} {"current_steps": 1590, "total_steps": 3150, "loss": 0.0014, "lr": 2.8958738724208072e-05, "epoch": 5.0476190476190474, "percentage": 50.48, "elapsed_time": "0:02:47", "remaining_time": "0:02:44", "throughput": 2034.53, "total_tokens": 341392} {"current_steps": 1595, "total_steps": 3150, "loss": 0.0002, "lr": 2.8821908131398423e-05, "epoch": 5.063492063492063, "percentage": 50.63, "elapsed_time": "0:02:48", "remaining_time": "0:02:44", "throughput": 2035.85, "total_tokens": 342480} {"current_steps": 1600, "total_steps": 3150, "loss": 0.0016, "lr": 2.8684960207592032e-05, "epoch": 5.079365079365079, "percentage": 50.79, "elapsed_time": "0:02:48", "remaining_time": "0:02:43", "throughput": 2037.31, "total_tokens": 343584} {"current_steps": 1605, "total_steps": 3150, "loss": 0.0075, "lr": 2.8547899157033432e-05, "epoch": 5.095238095238095, "percentage": 50.95, "elapsed_time": "0:02:49", "remaining_time": "0:02:42", "throughput": 2038.68, "total_tokens": 344672} {"current_steps": 1610, "total_steps": 3150, "loss": 0.0058, "lr": 2.8410729187440116e-05, "epoch": 5.111111111111111, "percentage": 51.11, "elapsed_time": "0:02:49", "remaining_time": "0:02:42", "throughput": 2040.2, "total_tokens": 345792} {"current_steps": 1615, "total_steps": 3150, "loss": 0.0002, "lr": 2.8273454509873333e-05, "epoch": 5.1269841269841265, "percentage": 51.27, "elapsed_time": "0:02:49", "remaining_time": "0:02:41", "throughput": 2041.67, "total_tokens": 346896} {"current_steps": 1620, "total_steps": 3150, "loss": 0.0821, "lr": 2.8136079338608833e-05, "epoch": 5.142857142857143, "percentage": 51.43, "elapsed_time": "0:02:50", "remaining_time": "0:02:40", "throughput": 2043.08, "total_tokens": 347984} {"current_steps": 1625, "total_steps": 3150, "loss": 0.0008, "lr": 2.7998607891007495e-05, "epoch": 5.158730158730159, "percentage": 51.59, "elapsed_time": "0:02:50", "remaining_time": "0:02:40", "throughput": 2044.22, "total_tokens": 349024} {"current_steps": 1630, "total_steps": 3150, "loss": 0.0226, "lr": 2.78610443873858e-05, "epoch": 5.174603174603175, "percentage": 51.75, "elapsed_time": "0:02:51", "remaining_time": "0:02:39", "throughput": 2045.75, "total_tokens": 350144} {"current_steps": 1635, "total_steps": 3150, "loss": 0.0122, "lr": 2.7723393050886355e-05, "epoch": 5.190476190476191, "percentage": 51.9, "elapsed_time": "0:02:51", "remaining_time": "0:02:38", "throughput": 2046.85, "total_tokens": 351184} {"current_steps": 1640, "total_steps": 3150, "loss": 0.0044, "lr": 2.7585658107348162e-05, "epoch": 5.2063492063492065, "percentage": 52.06, "elapsed_time": "0:02:51", "remaining_time": "0:02:38", "throughput": 2047.98, "total_tokens": 352224} {"current_steps": 1645, "total_steps": 3150, "loss": 0.0128, "lr": 2.7447843785176956e-05, "epoch": 5.222222222222222, "percentage": 52.22, "elapsed_time": "0:02:52", "remaining_time": "0:02:37", "throughput": 2049.51, "total_tokens": 353344} {"current_steps": 1650, "total_steps": 3150, "loss": 0.0079, "lr": 2.730995431521535e-05, "epoch": 5.238095238095238, "percentage": 52.38, "elapsed_time": "0:02:52", "remaining_time": "0:02:37", "throughput": 2050.89, "total_tokens": 354432} {"current_steps": 1655, "total_steps": 3150, "loss": 0.0201, "lr": 2.717199393061296e-05, "epoch": 5.253968253968254, "percentage": 52.54, "elapsed_time": "0:02:53", "remaining_time": "0:02:36", "throughput": 2052.34, "total_tokens": 355536} {"current_steps": 1660, "total_steps": 3150, "loss": 0.0163, "lr": 2.7033966866696457e-05, "epoch": 5.26984126984127, "percentage": 52.7, "elapsed_time": "0:02:53", "remaining_time": "0:02:35", "throughput": 2053.81, "total_tokens": 356640} {"current_steps": 1665, "total_steps": 3150, "loss": 0.0001, "lr": 2.6895877360839537e-05, "epoch": 5.285714285714286, "percentage": 52.86, "elapsed_time": "0:02:54", "remaining_time": "0:02:35", "throughput": 2054.99, "total_tokens": 357696} {"current_steps": 1670, "total_steps": 3150, "loss": 0.0024, "lr": 2.675772965233284e-05, "epoch": 5.301587301587301, "percentage": 53.02, "elapsed_time": "0:02:54", "remaining_time": "0:02:34", "throughput": 2056.48, "total_tokens": 358816} {"current_steps": 1675, "total_steps": 3150, "loss": 0.0246, "lr": 2.6619527982253794e-05, "epoch": 5.317460317460317, "percentage": 53.17, "elapsed_time": "0:02:54", "remaining_time": "0:02:34", "throughput": 2057.66, "total_tokens": 359872} {"current_steps": 1680, "total_steps": 3150, "loss": 0.0074, "lr": 2.648127659333645e-05, "epoch": 5.333333333333333, "percentage": 53.33, "elapsed_time": "0:02:55", "remaining_time": "0:02:33", "throughput": 2059.06, "total_tokens": 360976} {"current_steps": 1685, "total_steps": 3150, "loss": 0.0033, "lr": 2.6342979729841166e-05, "epoch": 5.349206349206349, "percentage": 53.49, "elapsed_time": "0:02:55", "remaining_time": "0:02:32", "throughput": 2060.22, "total_tokens": 362032} {"current_steps": 1690, "total_steps": 3150, "loss": 0.0375, "lr": 2.6204641637424394e-05, "epoch": 5.365079365079365, "percentage": 53.65, "elapsed_time": "0:02:56", "remaining_time": "0:02:32", "throughput": 2061.62, "total_tokens": 363136} {"current_steps": 1695, "total_steps": 3150, "loss": 0.0512, "lr": 2.6066266563008267e-05, "epoch": 5.380952380952381, "percentage": 53.81, "elapsed_time": "0:02:56", "remaining_time": "0:02:31", "throughput": 2062.68, "total_tokens": 364176} {"current_steps": 1700, "total_steps": 3150, "loss": 0.0013, "lr": 2.5927858754650257e-05, "epoch": 5.396825396825397, "percentage": 53.97, "elapsed_time": "0:02:56", "remaining_time": "0:02:30", "throughput": 2063.83, "total_tokens": 365232} {"current_steps": 1705, "total_steps": 3150, "loss": 0.0191, "lr": 2.5789422461412776e-05, "epoch": 5.412698412698413, "percentage": 54.13, "elapsed_time": "0:02:57", "remaining_time": "0:02:30", "throughput": 2065.04, "total_tokens": 366304} {"current_steps": 1710, "total_steps": 3150, "loss": 0.0001, "lr": 2.5650961933232663e-05, "epoch": 5.428571428571429, "percentage": 54.29, "elapsed_time": "0:02:57", "remaining_time": "0:02:29", "throughput": 2066.18, "total_tokens": 367360} {"current_steps": 1715, "total_steps": 3150, "loss": 0.0003, "lr": 2.551248142079081e-05, "epoch": 5.444444444444445, "percentage": 54.44, "elapsed_time": "0:02:58", "remaining_time": "0:02:29", "throughput": 2067.65, "total_tokens": 368480} {"current_steps": 1720, "total_steps": 3150, "loss": 0.0394, "lr": 2.5373985175381594e-05, "epoch": 5.4603174603174605, "percentage": 54.6, "elapsed_time": "0:02:58", "remaining_time": "0:02:28", "throughput": 2069.1, "total_tokens": 369600} {"current_steps": 1725, "total_steps": 3150, "loss": 0.0049, "lr": 2.523547744878238e-05, "epoch": 5.476190476190476, "percentage": 54.76, "elapsed_time": "0:02:59", "remaining_time": "0:02:27", "throughput": 2070.22, "total_tokens": 370656} {"current_steps": 1730, "total_steps": 3150, "loss": 0.0008, "lr": 2.5096962493123012e-05, "epoch": 5.492063492063492, "percentage": 54.92, "elapsed_time": "0:02:59", "remaining_time": "0:02:27", "throughput": 2071.4, "total_tokens": 371728} {"current_steps": 1735, "total_steps": 3150, "loss": 0.0008, "lr": 2.4958444560755264e-05, "epoch": 5.507936507936508, "percentage": 55.08, "elapsed_time": "0:02:59", "remaining_time": "0:02:26", "throughput": 2072.4, "total_tokens": 372768} {"current_steps": 1738, "total_steps": 3150, "eval_loss": 0.16769738495349884, "epoch": 5.517460317460317, "percentage": 55.17, "elapsed_time": "0:03:01", "remaining_time": "0:02:27", "throughput": 2055.89, "total_tokens": 373488} {"current_steps": 1740, "total_steps": 3150, "loss": 0.0009, "lr": 2.4819927904122287e-05, "epoch": 5.523809523809524, "percentage": 55.24, "elapsed_time": "0:03:03", "remaining_time": "0:02:28", "throughput": 2040.54, "total_tokens": 373872} {"current_steps": 1745, "total_steps": 3150, "loss": 0.0005, "lr": 2.468141677562808e-05, "epoch": 5.5396825396825395, "percentage": 55.4, "elapsed_time": "0:03:03", "remaining_time": "0:02:27", "throughput": 2041.42, "total_tokens": 374896} {"current_steps": 1750, "total_steps": 3150, "loss": 0.0061, "lr": 2.4542915427506913e-05, "epoch": 5.555555555555555, "percentage": 55.56, "elapsed_time": "0:03:04", "remaining_time": "0:02:27", "throughput": 2042.59, "total_tokens": 375984} {"current_steps": 1755, "total_steps": 3150, "loss": 0.0018, "lr": 2.4404428111692817e-05, "epoch": 5.571428571428571, "percentage": 55.71, "elapsed_time": "0:03:04", "remaining_time": "0:02:26", "throughput": 2043.94, "total_tokens": 377088} {"current_steps": 1760, "total_steps": 3150, "loss": 0.01, "lr": 2.4265959079689028e-05, "epoch": 5.587301587301587, "percentage": 55.87, "elapsed_time": "0:03:04", "remaining_time": "0:02:26", "throughput": 2045.43, "total_tokens": 378224} {"current_steps": 1765, "total_steps": 3150, "loss": 0.0194, "lr": 2.4127512582437485e-05, "epoch": 5.603174603174603, "percentage": 56.03, "elapsed_time": "0:03:05", "remaining_time": "0:02:25", "throughput": 2046.55, "total_tokens": 379280} {"current_steps": 1770, "total_steps": 3150, "loss": 0.0005, "lr": 2.3989092870188296e-05, "epoch": 5.619047619047619, "percentage": 56.19, "elapsed_time": "0:03:05", "remaining_time": "0:02:24", "throughput": 2048.14, "total_tokens": 380432} {"current_steps": 1775, "total_steps": 3150, "loss": 0.0018, "lr": 2.3850704192369334e-05, "epoch": 5.634920634920634, "percentage": 56.35, "elapsed_time": "0:03:06", "remaining_time": "0:02:24", "throughput": 2049.33, "total_tokens": 381504} {"current_steps": 1780, "total_steps": 3150, "loss": 0.0228, "lr": 2.371235079745565e-05, "epoch": 5.650793650793651, "percentage": 56.51, "elapsed_time": "0:03:06", "remaining_time": "0:02:23", "throughput": 2050.7, "total_tokens": 382608} {"current_steps": 1785, "total_steps": 3150, "loss": 0.0, "lr": 2.3574036932839214e-05, "epoch": 5.666666666666667, "percentage": 56.67, "elapsed_time": "0:03:06", "remaining_time": "0:02:22", "throughput": 2051.73, "total_tokens": 383648} {"current_steps": 1790, "total_steps": 3150, "loss": 0.0123, "lr": 2.343576684469834e-05, "epoch": 5.682539682539683, "percentage": 56.83, "elapsed_time": "0:03:07", "remaining_time": "0:02:22", "throughput": 2052.98, "total_tokens": 384736} {"current_steps": 1795, "total_steps": 3150, "loss": 0.005, "lr": 2.32975447778675e-05, "epoch": 5.698412698412699, "percentage": 56.98, "elapsed_time": "0:03:07", "remaining_time": "0:02:21", "throughput": 2053.99, "total_tokens": 385776} {"current_steps": 1800, "total_steps": 3150, "loss": 0.0, "lr": 2.3159374975706884e-05, "epoch": 5.714285714285714, "percentage": 57.14, "elapsed_time": "0:03:08", "remaining_time": "0:02:21", "throughput": 2054.99, "total_tokens": 386816} {"current_steps": 1805, "total_steps": 3150, "loss": 0.0001, "lr": 2.30212616799722e-05, "epoch": 5.73015873015873, "percentage": 57.3, "elapsed_time": "0:03:08", "remaining_time": "0:02:20", "throughput": 2055.92, "total_tokens": 387840} {"current_steps": 1810, "total_steps": 3150, "loss": 0.0003, "lr": 2.288320913068442e-05, "epoch": 5.746031746031746, "percentage": 57.46, "elapsed_time": "0:03:09", "remaining_time": "0:02:19", "throughput": 2056.59, "total_tokens": 388816} {"current_steps": 1815, "total_steps": 3150, "loss": 0.0, "lr": 2.274522156599964e-05, "epoch": 5.761904761904762, "percentage": 57.62, "elapsed_time": "0:03:09", "remaining_time": "0:02:19", "throughput": 2057.82, "total_tokens": 389904} {"current_steps": 1820, "total_steps": 3150, "loss": 0.0119, "lr": 2.260730322207894e-05, "epoch": 5.777777777777778, "percentage": 57.78, "elapsed_time": "0:03:09", "remaining_time": "0:02:18", "throughput": 2058.96, "total_tokens": 390976} {"current_steps": 1825, "total_steps": 3150, "loss": 0.0019, "lr": 2.246945833295836e-05, "epoch": 5.7936507936507935, "percentage": 57.94, "elapsed_time": "0:03:10", "remaining_time": "0:02:18", "throughput": 2060.17, "total_tokens": 392064} {"current_steps": 1830, "total_steps": 3150, "loss": 0.0003, "lr": 2.2331691130418903e-05, "epoch": 5.809523809523809, "percentage": 58.1, "elapsed_time": "0:03:10", "remaining_time": "0:02:17", "throughput": 2061.54, "total_tokens": 393184} {"current_steps": 1835, "total_steps": 3150, "loss": 0.0351, "lr": 2.2194005843856636e-05, "epoch": 5.825396825396825, "percentage": 58.25, "elapsed_time": "0:03:11", "remaining_time": "0:02:16", "throughput": 2062.97, "total_tokens": 394320} {"current_steps": 1840, "total_steps": 3150, "loss": 0.0111, "lr": 2.2056406700152814e-05, "epoch": 5.841269841269841, "percentage": 58.41, "elapsed_time": "0:03:11", "remaining_time": "0:02:16", "throughput": 2063.86, "total_tokens": 395344} {"current_steps": 1845, "total_steps": 3150, "loss": 0.0063, "lr": 2.1918897923544196e-05, "epoch": 5.857142857142857, "percentage": 58.57, "elapsed_time": "0:03:11", "remaining_time": "0:02:15", "throughput": 2064.83, "total_tokens": 396384} {"current_steps": 1850, "total_steps": 3150, "loss": 0.0, "lr": 2.178148373549323e-05, "epoch": 5.8730158730158735, "percentage": 58.73, "elapsed_time": "0:03:12", "remaining_time": "0:02:15", "throughput": 2065.7, "total_tokens": 397408} {"current_steps": 1855, "total_steps": 3150, "loss": 0.0143, "lr": 2.164416835455862e-05, "epoch": 5.888888888888889, "percentage": 58.89, "elapsed_time": "0:03:12", "remaining_time": "0:02:14", "throughput": 2066.9, "total_tokens": 398496} {"current_steps": 1860, "total_steps": 3150, "loss": 0.0015, "lr": 2.150695599626565e-05, "epoch": 5.904761904761905, "percentage": 59.05, "elapsed_time": "0:03:13", "remaining_time": "0:02:14", "throughput": 2068.22, "total_tokens": 399616} {"current_steps": 1865, "total_steps": 3150, "loss": 0.0108, "lr": 2.1369850872976923e-05, "epoch": 5.920634920634921, "percentage": 59.21, "elapsed_time": "0:03:13", "remaining_time": "0:02:13", "throughput": 2069.09, "total_tokens": 400640} {"current_steps": 1870, "total_steps": 3150, "loss": 0.0001, "lr": 2.1232857193762924e-05, "epoch": 5.936507936507937, "percentage": 59.37, "elapsed_time": "0:03:14", "remaining_time": "0:02:12", "throughput": 2070.2, "total_tokens": 401712} {"current_steps": 1875, "total_steps": 3150, "loss": 0.0126, "lr": 2.1095979164272874e-05, "epoch": 5.9523809523809526, "percentage": 59.52, "elapsed_time": "0:03:14", "remaining_time": "0:02:12", "throughput": 2071.13, "total_tokens": 402752} {"current_steps": 1880, "total_steps": 3150, "loss": 0.0171, "lr": 2.0959220986605578e-05, "epoch": 5.968253968253968, "percentage": 59.68, "elapsed_time": "0:03:14", "remaining_time": "0:02:11", "throughput": 2072.19, "total_tokens": 403824} {"current_steps": 1885, "total_steps": 3150, "loss": 0.0181, "lr": 2.0822586859180468e-05, "epoch": 5.984126984126984, "percentage": 59.84, "elapsed_time": "0:03:15", "remaining_time": "0:02:11", "throughput": 2073.28, "total_tokens": 404880} {"current_steps": 1890, "total_steps": 3150, "loss": 0.001, "lr": 2.0686080976608653e-05, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:03:15", "remaining_time": "0:02:10", "throughput": 2073.06, "total_tokens": 405888} {"current_steps": 1895, "total_steps": 3150, "loss": 0.0053, "lr": 2.05497075295642e-05, "epoch": 6.015873015873016, "percentage": 60.16, "elapsed_time": "0:03:16", "remaining_time": "0:02:10", "throughput": 2072.85, "total_tokens": 407040} {"current_steps": 1896, "total_steps": 3150, "eval_loss": 0.1907743513584137, "epoch": 6.019047619047619, "percentage": 60.19, "elapsed_time": "0:03:17", "remaining_time": "0:02:10", "throughput": 2057.15, "total_tokens": 407264} {"current_steps": 1900, "total_steps": 3150, "loss": 0.0014, "lr": 2.0413470704655445e-05, "epoch": 6.031746031746032, "percentage": 60.32, "elapsed_time": "0:03:20", "remaining_time": "0:02:11", "throughput": 2035.52, "total_tokens": 408112} {"current_steps": 1905, "total_steps": 3150, "loss": 0.0005, "lr": 2.02773746842965e-05, "epoch": 6.0476190476190474, "percentage": 60.48, "elapsed_time": "0:03:20", "remaining_time": "0:02:11", "throughput": 2036.55, "total_tokens": 409168} {"current_steps": 1910, "total_steps": 3150, "loss": 0.001, "lr": 2.0141423646578812e-05, "epoch": 6.063492063492063, "percentage": 60.63, "elapsed_time": "0:03:21", "remaining_time": "0:02:10", "throughput": 2037.45, "total_tokens": 410192} {"current_steps": 1915, "total_steps": 3150, "loss": 0.0192, "lr": 2.000562176514294e-05, "epoch": 6.079365079365079, "percentage": 60.79, "elapsed_time": "0:03:21", "remaining_time": "0:02:10", "throughput": 2038.55, "total_tokens": 411280} {"current_steps": 1920, "total_steps": 3150, "loss": 0.0, "lr": 1.9869973209050395e-05, "epoch": 6.095238095238095, "percentage": 60.95, "elapsed_time": "0:03:22", "remaining_time": "0:02:09", "throughput": 2039.56, "total_tokens": 412336} {"current_steps": 1925, "total_steps": 3150, "loss": 0.0153, "lr": 1.9734482142655686e-05, "epoch": 6.111111111111111, "percentage": 61.11, "elapsed_time": "0:03:22", "remaining_time": "0:02:08", "throughput": 2040.41, "total_tokens": 413376} {"current_steps": 1930, "total_steps": 3150, "loss": 0.0021, "lr": 1.9599152725478406e-05, "epoch": 6.1269841269841265, "percentage": 61.27, "elapsed_time": "0:03:23", "remaining_time": "0:02:08", "throughput": 2041.59, "total_tokens": 414464} {"current_steps": 1935, "total_steps": 3150, "loss": 0.0035, "lr": 1.9463989112075647e-05, "epoch": 6.142857142857143, "percentage": 61.43, "elapsed_time": "0:03:23", "remaining_time": "0:02:07", "throughput": 2042.5, "total_tokens": 415488} {"current_steps": 1940, "total_steps": 3150, "loss": 0.0016, "lr": 1.932899545191433e-05, "epoch": 6.158730158730159, "percentage": 61.59, "elapsed_time": "0:03:23", "remaining_time": "0:02:07", "throughput": 2043.24, "total_tokens": 416512} {"current_steps": 1945, "total_steps": 3150, "loss": 0.0786, "lr": 1.919417588924394e-05, "epoch": 6.174603174603175, "percentage": 61.75, "elapsed_time": "0:03:24", "remaining_time": "0:02:06", "throughput": 2044.37, "total_tokens": 417600} {"current_steps": 1950, "total_steps": 3150, "loss": 0.0001, "lr": 1.9059534562969195e-05, "epoch": 6.190476190476191, "percentage": 61.9, "elapsed_time": "0:03:24", "remaining_time": "0:02:05", "throughput": 2045.76, "total_tokens": 418752} {"current_steps": 1955, "total_steps": 3150, "loss": 0.0002, "lr": 1.892507560652306e-05, "epoch": 6.2063492063492065, "percentage": 62.06, "elapsed_time": "0:03:25", "remaining_time": "0:02:05", "throughput": 2046.91, "total_tokens": 419840} {"current_steps": 1960, "total_steps": 3150, "loss": 0.0, "lr": 1.8790803147739803e-05, "epoch": 6.222222222222222, "percentage": 62.22, "elapsed_time": "0:03:25", "remaining_time": "0:02:04", "throughput": 2047.68, "total_tokens": 420848} {"current_steps": 1965, "total_steps": 3150, "loss": 0.0022, "lr": 1.8656721308728305e-05, "epoch": 6.238095238095238, "percentage": 62.38, "elapsed_time": "0:03:25", "remaining_time": "0:02:04", "throughput": 2048.74, "total_tokens": 421920} {"current_steps": 1970, "total_steps": 3150, "loss": 0.0011, "lr": 1.8522834205745476e-05, "epoch": 6.253968253968254, "percentage": 62.54, "elapsed_time": "0:03:26", "remaining_time": "0:02:03", "throughput": 2049.6, "total_tokens": 422944} {"current_steps": 1975, "total_steps": 3150, "loss": 0.0071, "lr": 1.838914594906995e-05, "epoch": 6.26984126984127, "percentage": 62.7, "elapsed_time": "0:03:26", "remaining_time": "0:02:03", "throughput": 2050.89, "total_tokens": 424064} {"current_steps": 1980, "total_steps": 3150, "loss": 0.0125, "lr": 1.825566064287582e-05, "epoch": 6.285714285714286, "percentage": 62.86, "elapsed_time": "0:03:27", "remaining_time": "0:02:02", "throughput": 2051.96, "total_tokens": 425136} {"current_steps": 1985, "total_steps": 3150, "loss": 0.0, "lr": 1.8122382385106713e-05, "epoch": 6.301587301587301, "percentage": 63.02, "elapsed_time": "0:03:27", "remaining_time": "0:02:01", "throughput": 2053.09, "total_tokens": 426224} {"current_steps": 1990, "total_steps": 3150, "loss": 0.0006, "lr": 1.7989315267349936e-05, "epoch": 6.317460317460317, "percentage": 63.17, "elapsed_time": "0:03:28", "remaining_time": "0:02:01", "throughput": 2054.22, "total_tokens": 427312} {"current_steps": 1995, "total_steps": 3150, "loss": 0.0025, "lr": 1.78564633747109e-05, "epoch": 6.333333333333333, "percentage": 63.33, "elapsed_time": "0:03:28", "remaining_time": "0:02:00", "throughput": 2055.55, "total_tokens": 428448} {"current_steps": 2000, "total_steps": 3150, "loss": 0.02, "lr": 1.7723830785687674e-05, "epoch": 6.349206349206349, "percentage": 63.49, "elapsed_time": "0:03:28", "remaining_time": "0:02:00", "throughput": 2056.38, "total_tokens": 429472} {"current_steps": 2005, "total_steps": 3150, "loss": 0.0012, "lr": 1.759142157204583e-05, "epoch": 6.365079365079365, "percentage": 63.65, "elapsed_time": "0:03:29", "remaining_time": "0:01:59", "throughput": 2057.41, "total_tokens": 430544} {"current_steps": 2010, "total_steps": 3150, "loss": 0.001, "lr": 1.7459239798693364e-05, "epoch": 6.380952380952381, "percentage": 63.81, "elapsed_time": "0:03:29", "remaining_time": "0:01:58", "throughput": 2058.95, "total_tokens": 431728} {"current_steps": 2015, "total_steps": 3150, "loss": 0.0001, "lr": 1.7327289523555994e-05, "epoch": 6.396825396825397, "percentage": 63.97, "elapsed_time": "0:03:30", "remaining_time": "0:01:58", "throughput": 2059.98, "total_tokens": 432800} {"current_steps": 2020, "total_steps": 3150, "loss": 0.0047, "lr": 1.7195574797452508e-05, "epoch": 6.412698412698413, "percentage": 64.13, "elapsed_time": "0:03:30", "remaining_time": "0:01:57", "throughput": 2061.25, "total_tokens": 433936} {"current_steps": 2025, "total_steps": 3150, "loss": 0.0001, "lr": 1.7064099663970454e-05, "epoch": 6.428571428571429, "percentage": 64.29, "elapsed_time": "0:03:30", "remaining_time": "0:01:57", "throughput": 2062.11, "total_tokens": 434976} {"current_steps": 2030, "total_steps": 3150, "loss": 0.0002, "lr": 1.6932868159341962e-05, "epoch": 6.444444444444445, "percentage": 64.44, "elapsed_time": "0:03:31", "remaining_time": "0:01:56", "throughput": 2063.28, "total_tokens": 436080} {"current_steps": 2035, "total_steps": 3150, "loss": 0.0004, "lr": 1.6801884312319895e-05, "epoch": 6.4603174603174605, "percentage": 64.6, "elapsed_time": "0:03:31", "remaining_time": "0:01:56", "throughput": 2064.18, "total_tokens": 437136} {"current_steps": 2040, "total_steps": 3150, "loss": 0.0652, "lr": 1.6671152144054086e-05, "epoch": 6.476190476190476, "percentage": 64.76, "elapsed_time": "0:03:32", "remaining_time": "0:01:55", "throughput": 2065.28, "total_tokens": 438224} {"current_steps": 2045, "total_steps": 3150, "loss": 0.0, "lr": 1.6540675667967974e-05, "epoch": 6.492063492063492, "percentage": 64.92, "elapsed_time": "0:03:32", "remaining_time": "0:01:54", "throughput": 2066.3, "total_tokens": 439296} {"current_steps": 2050, "total_steps": 3150, "loss": 0.0004, "lr": 1.6410458889635326e-05, "epoch": 6.507936507936508, "percentage": 65.08, "elapsed_time": "0:03:33", "remaining_time": "0:01:54", "throughput": 2067.17, "total_tokens": 440336} {"current_steps": 2054, "total_steps": 3150, "eval_loss": 0.16089297831058502, "epoch": 6.520634920634921, "percentage": 65.21, "elapsed_time": "0:03:34", "remaining_time": "0:01:54", "throughput": 2053.2, "total_tokens": 441200} {"current_steps": 2055, "total_steps": 3150, "loss": 0.0001, "lr": 1.6280505806657314e-05, "epoch": 6.523809523809524, "percentage": 65.24, "elapsed_time": "0:03:36", "remaining_time": "0:01:55", "throughput": 2039.9, "total_tokens": 441408} {"current_steps": 2060, "total_steps": 3150, "loss": 0.0002, "lr": 1.615082040853975e-05, "epoch": 6.5396825396825395, "percentage": 65.4, "elapsed_time": "0:03:36", "remaining_time": "0:01:54", "throughput": 2040.93, "total_tokens": 442496} {"current_steps": 2065, "total_steps": 3150, "loss": 0.008, "lr": 1.6021406676570665e-05, "epoch": 6.555555555555555, "percentage": 65.56, "elapsed_time": "0:03:37", "remaining_time": "0:01:54", "throughput": 2041.79, "total_tokens": 443552} {"current_steps": 2070, "total_steps": 3150, "loss": 0.0062, "lr": 1.589226858369801e-05, "epoch": 6.571428571428571, "percentage": 65.71, "elapsed_time": "0:03:37", "remaining_time": "0:01:53", "throughput": 2042.53, "total_tokens": 444560} {"current_steps": 2075, "total_steps": 3150, "loss": 0.0023, "lr": 1.576341009440778e-05, "epoch": 6.587301587301587, "percentage": 65.87, "elapsed_time": "0:03:38", "remaining_time": "0:01:52", "throughput": 2043.53, "total_tokens": 445632} {"current_steps": 2080, "total_steps": 3150, "loss": 0.0004, "lr": 1.56348351646022e-05, "epoch": 6.603174603174603, "percentage": 66.03, "elapsed_time": "0:03:38", "remaining_time": "0:01:52", "throughput": 2044.57, "total_tokens": 446720} {"current_steps": 2085, "total_steps": 3150, "loss": 0.0002, "lr": 1.5506547741478388e-05, "epoch": 6.619047619047619, "percentage": 66.19, "elapsed_time": "0:03:38", "remaining_time": "0:01:51", "throughput": 2045.25, "total_tokens": 447712} {"current_steps": 2090, "total_steps": 3150, "loss": 0.0003, "lr": 1.537855176340708e-05, "epoch": 6.634920634920634, "percentage": 66.35, "elapsed_time": "0:03:39", "remaining_time": "0:01:51", "throughput": 2046.35, "total_tokens": 448800} {"current_steps": 2095, "total_steps": 3150, "loss": 0.0003, "lr": 1.5250851159811808e-05, "epoch": 6.650793650793651, "percentage": 66.51, "elapsed_time": "0:03:39", "remaining_time": "0:01:50", "throughput": 2047.17, "total_tokens": 449824} {"current_steps": 2100, "total_steps": 3150, "loss": 0.0004, "lr": 1.5123449851048203e-05, "epoch": 6.666666666666667, "percentage": 66.67, "elapsed_time": "0:03:40", "remaining_time": "0:01:50", "throughput": 2048.48, "total_tokens": 450960} {"current_steps": 2105, "total_steps": 3150, "loss": 0.0, "lr": 1.4996351748283688e-05, "epoch": 6.682539682539683, "percentage": 66.83, "elapsed_time": "0:03:40", "remaining_time": "0:01:49", "throughput": 2049.55, "total_tokens": 452048} {"current_steps": 2110, "total_steps": 3150, "loss": 0.0, "lr": 1.4869560753377376e-05, "epoch": 6.698412698412699, "percentage": 66.98, "elapsed_time": "0:03:40", "remaining_time": "0:01:48", "throughput": 2050.49, "total_tokens": 453104} {"current_steps": 2115, "total_steps": 3150, "loss": 0.0212, "lr": 1.47430807587603e-05, "epoch": 6.714285714285714, "percentage": 67.14, "elapsed_time": "0:03:41", "remaining_time": "0:01:48", "throughput": 2051.63, "total_tokens": 454208} {"current_steps": 2120, "total_steps": 3150, "loss": 0.0002, "lr": 1.4616915647315904e-05, "epoch": 6.73015873015873, "percentage": 67.3, "elapsed_time": "0:03:41", "remaining_time": "0:01:47", "throughput": 2052.5, "total_tokens": 455248} {"current_steps": 2125, "total_steps": 3150, "loss": 0.0024, "lr": 1.4491069292260868e-05, "epoch": 6.746031746031746, "percentage": 67.46, "elapsed_time": "0:03:42", "remaining_time": "0:01:47", "throughput": 2053.75, "total_tokens": 456384} {"current_steps": 2130, "total_steps": 3150, "loss": 0.0015, "lr": 1.4365545557026155e-05, "epoch": 6.761904761904762, "percentage": 67.62, "elapsed_time": "0:03:42", "remaining_time": "0:01:46", "throughput": 2055.01, "total_tokens": 457520} {"current_steps": 2135, "total_steps": 3150, "loss": 0.0, "lr": 1.4240348295138472e-05, "epoch": 6.777777777777778, "percentage": 67.78, "elapsed_time": "0:03:43", "remaining_time": "0:01:46", "throughput": 2055.93, "total_tokens": 458576} {"current_steps": 2140, "total_steps": 3150, "loss": 0.0001, "lr": 1.4115481350101894e-05, "epoch": 6.7936507936507935, "percentage": 67.94, "elapsed_time": "0:03:43", "remaining_time": "0:01:45", "throughput": 2056.92, "total_tokens": 459648} {"current_steps": 2145, "total_steps": 3150, "loss": 0.0001, "lr": 1.3990948555279921e-05, "epoch": 6.809523809523809, "percentage": 68.1, "elapsed_time": "0:03:43", "remaining_time": "0:01:44", "throughput": 2058.08, "total_tokens": 460768} {"current_steps": 2150, "total_steps": 3150, "loss": 0.0001, "lr": 1.3866753733777765e-05, "epoch": 6.825396825396825, "percentage": 68.25, "elapsed_time": "0:03:44", "remaining_time": "0:01:44", "throughput": 2058.98, "total_tokens": 461824} {"current_steps": 2155, "total_steps": 3150, "loss": 0.0003, "lr": 1.3742900698325034e-05, "epoch": 6.841269841269841, "percentage": 68.41, "elapsed_time": "0:03:44", "remaining_time": "0:01:43", "throughput": 2059.79, "total_tokens": 462864} {"current_steps": 2160, "total_steps": 3150, "loss": 0.0544, "lr": 1.3619393251158604e-05, "epoch": 6.857142857142857, "percentage": 68.57, "elapsed_time": "0:03:45", "remaining_time": "0:01:43", "throughput": 2060.49, "total_tokens": 463872} {"current_steps": 2165, "total_steps": 3150, "loss": 0.0001, "lr": 1.349623518390596e-05, "epoch": 6.8730158730158735, "percentage": 68.73, "elapsed_time": "0:03:45", "remaining_time": "0:01:42", "throughput": 2061.64, "total_tokens": 464992} {"current_steps": 2170, "total_steps": 3150, "loss": 0.0001, "lr": 1.337343027746874e-05, "epoch": 6.888888888888889, "percentage": 68.89, "elapsed_time": "0:03:45", "remaining_time": "0:01:42", "throughput": 2062.27, "total_tokens": 466000} {"current_steps": 2175, "total_steps": 3150, "loss": 0.0001, "lr": 1.3250982301906733e-05, "epoch": 6.904761904761905, "percentage": 69.05, "elapsed_time": "0:03:46", "remaining_time": "0:01:41", "throughput": 2063.15, "total_tokens": 467056} {"current_steps": 2180, "total_steps": 3150, "loss": 0.0001, "lr": 1.3128895016322063e-05, "epoch": 6.920634920634921, "percentage": 69.21, "elapsed_time": "0:03:46", "remaining_time": "0:01:40", "throughput": 2063.9, "total_tokens": 468080} {"current_steps": 2185, "total_steps": 3150, "loss": 0.0008, "lr": 1.3007172168743854e-05, "epoch": 6.936507936507937, "percentage": 69.37, "elapsed_time": "0:03:47", "remaining_time": "0:01:40", "throughput": 2064.92, "total_tokens": 469168} {"current_steps": 2190, "total_steps": 3150, "loss": 0.0081, "lr": 1.2885817496013081e-05, "epoch": 6.9523809523809526, "percentage": 69.52, "elapsed_time": "0:03:47", "remaining_time": "0:01:39", "throughput": 2065.79, "total_tokens": 470224} {"current_steps": 2195, "total_steps": 3150, "loss": 0.0017, "lr": 1.276483472366796e-05, "epoch": 6.968253968253968, "percentage": 69.68, "elapsed_time": "0:03:48", "remaining_time": "0:01:39", "throughput": 2066.95, "total_tokens": 471360} {"current_steps": 2200, "total_steps": 3150, "loss": 0.0018, "lr": 1.264422756582948e-05, "epoch": 6.984126984126984, "percentage": 69.84, "elapsed_time": "0:03:48", "remaining_time": "0:01:38", "throughput": 2067.88, "total_tokens": 472416} {"current_steps": 2205, "total_steps": 3150, "loss": 0.0003, "lr": 1.2523999725087455e-05, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:03:48", "remaining_time": "0:01:38", "throughput": 2068.03, "total_tokens": 473488} {"current_steps": 2210, "total_steps": 3150, "loss": 0.0001, "lr": 1.240415489238679e-05, "epoch": 7.015873015873016, "percentage": 70.16, "elapsed_time": "0:03:49", "remaining_time": "0:01:37", "throughput": 2067.45, "total_tokens": 474544} {"current_steps": 2212, "total_steps": 3150, "eval_loss": 0.14928248524665833, "epoch": 7.022222222222222, "percentage": 70.22, "elapsed_time": "0:03:51", "remaining_time": "0:01:38", "throughput": 2054.27, "total_tokens": 475008} {"current_steps": 2215, "total_steps": 3150, "loss": 0.0001, "lr": 1.2284696746914216e-05, "epoch": 7.031746031746032, "percentage": 70.32, "elapsed_time": "0:03:54", "remaining_time": "0:01:38", "throughput": 2031.12, "total_tokens": 475664} {"current_steps": 2220, "total_steps": 3150, "loss": 0.0007, "lr": 1.2165628955985314e-05, "epoch": 7.0476190476190474, "percentage": 70.48, "elapsed_time": "0:03:54", "remaining_time": "0:01:38", "throughput": 2031.97, "total_tokens": 476720} {"current_steps": 2225, "total_steps": 3150, "loss": 0.0005, "lr": 1.204695517493197e-05, "epoch": 7.063492063492063, "percentage": 70.63, "elapsed_time": "0:03:55", "remaining_time": "0:01:37", "throughput": 2033.22, "total_tokens": 477888} {"current_steps": 2230, "total_steps": 3150, "loss": 0.0031, "lr": 1.1928679046990107e-05, "epoch": 7.079365079365079, "percentage": 70.79, "elapsed_time": "0:03:55", "remaining_time": "0:01:37", "throughput": 2034.04, "total_tokens": 478928} {"current_steps": 2235, "total_steps": 3150, "loss": 0.0003, "lr": 1.181080420318786e-05, "epoch": 7.095238095238095, "percentage": 70.95, "elapsed_time": "0:03:55", "remaining_time": "0:01:36", "throughput": 2034.76, "total_tokens": 479952} {"current_steps": 2240, "total_steps": 3150, "loss": 0.0002, "lr": 1.1693334262234116e-05, "epoch": 7.111111111111111, "percentage": 71.11, "elapsed_time": "0:03:56", "remaining_time": "0:01:35", "throughput": 2035.59, "total_tokens": 480992} {"current_steps": 2245, "total_steps": 3150, "loss": 0.0005, "lr": 1.1576272830407416e-05, "epoch": 7.1269841269841265, "percentage": 71.27, "elapsed_time": "0:03:56", "remaining_time": "0:01:35", "throughput": 2036.47, "total_tokens": 482048} {"current_steps": 2250, "total_steps": 3150, "loss": 0.0005, "lr": 1.1459623501445235e-05, "epoch": 7.142857142857143, "percentage": 71.43, "elapsed_time": "0:03:57", "remaining_time": "0:01:34", "throughput": 2037.17, "total_tokens": 483056} {"current_steps": 2255, "total_steps": 3150, "loss": 0.0011, "lr": 1.1343389856433658e-05, "epoch": 7.158730158730159, "percentage": 71.59, "elapsed_time": "0:03:57", "remaining_time": "0:01:34", "throughput": 2038.24, "total_tokens": 484160} {"current_steps": 2260, "total_steps": 3150, "loss": 0.0165, "lr": 1.122757546369744e-05, "epoch": 7.174603174603175, "percentage": 71.75, "elapsed_time": "0:03:57", "remaining_time": "0:01:33", "throughput": 2039.4, "total_tokens": 485280} {"current_steps": 2265, "total_steps": 3150, "loss": 0.0, "lr": 1.1112183878690485e-05, "epoch": 7.190476190476191, "percentage": 71.9, "elapsed_time": "0:03:58", "remaining_time": "0:01:33", "throughput": 2040.15, "total_tokens": 486304} {"current_steps": 2270, "total_steps": 3150, "loss": 0.0001, "lr": 1.099721864388665e-05, "epoch": 7.2063492063492065, "percentage": 72.06, "elapsed_time": "0:03:58", "remaining_time": "0:01:32", "throughput": 2040.83, "total_tokens": 487312} {"current_steps": 2275, "total_steps": 3150, "loss": 0.0035, "lr": 1.0882683288671041e-05, "epoch": 7.222222222222222, "percentage": 72.22, "elapsed_time": "0:03:59", "remaining_time": "0:01:31", "throughput": 2041.96, "total_tokens": 488432} {"current_steps": 2280, "total_steps": 3150, "loss": 0.0001, "lr": 1.0768581329231625e-05, "epoch": 7.238095238095238, "percentage": 72.38, "elapsed_time": "0:03:59", "remaining_time": "0:01:31", "throughput": 2042.89, "total_tokens": 489504} {"current_steps": 2285, "total_steps": 3150, "loss": 0.0, "lr": 1.0654916268451298e-05, "epoch": 7.253968253968254, "percentage": 72.54, "elapsed_time": "0:04:00", "remaining_time": "0:01:30", "throughput": 2044.01, "total_tokens": 490624} {"current_steps": 2290, "total_steps": 3150, "loss": 0.0012, "lr": 1.0541691595800337e-05, "epoch": 7.26984126984127, "percentage": 72.7, "elapsed_time": "0:04:00", "remaining_time": "0:01:30", "throughput": 2044.78, "total_tokens": 491664} {"current_steps": 2295, "total_steps": 3150, "loss": 0.0001, "lr": 1.0428910787229321e-05, "epoch": 7.285714285714286, "percentage": 72.86, "elapsed_time": "0:04:00", "remaining_time": "0:01:29", "throughput": 2045.75, "total_tokens": 492752} {"current_steps": 2300, "total_steps": 3150, "loss": 0.0084, "lr": 1.0316577305062352e-05, "epoch": 7.301587301587301, "percentage": 73.02, "elapsed_time": "0:04:01", "remaining_time": "0:01:29", "throughput": 2046.88, "total_tokens": 493888} {"current_steps": 2305, "total_steps": 3150, "loss": 0.0001, "lr": 1.0204694597890812e-05, "epoch": 7.317460317460317, "percentage": 73.17, "elapsed_time": "0:04:01", "remaining_time": "0:01:28", "throughput": 2047.79, "total_tokens": 494960} {"current_steps": 2310, "total_steps": 3150, "loss": 0.0, "lr": 1.0093266100467463e-05, "epoch": 7.333333333333333, "percentage": 73.33, "elapsed_time": "0:04:02", "remaining_time": "0:01:28", "throughput": 2048.76, "total_tokens": 496048} {"current_steps": 2315, "total_steps": 3150, "loss": 0.0032, "lr": 9.982295233601044e-06, "epoch": 7.349206349206349, "percentage": 73.49, "elapsed_time": "0:04:02", "remaining_time": "0:01:27", "throughput": 2049.67, "total_tokens": 497136} {"current_steps": 2320, "total_steps": 3150, "loss": 0.0001, "lr": 9.8717854040512e-06, "epoch": 7.365079365079365, "percentage": 73.65, "elapsed_time": "0:04:02", "remaining_time": "0:01:26", "throughput": 2050.33, "total_tokens": 498144} {"current_steps": 2325, "total_steps": 3150, "loss": 0.0001, "lr": 9.761740004423927e-06, "epoch": 7.380952380952381, "percentage": 73.81, "elapsed_time": "0:04:03", "remaining_time": "0:01:26", "throughput": 2051.23, "total_tokens": 499216} {"current_steps": 2330, "total_steps": 3150, "loss": 0.0001, "lr": 9.65216241306741e-06, "epoch": 7.396825396825397, "percentage": 73.97, "elapsed_time": "0:04:03", "remaining_time": "0:01:25", "throughput": 2051.81, "total_tokens": 500208} {"current_steps": 2335, "total_steps": 3150, "loss": 0.0002, "lr": 9.54305599396834e-06, "epoch": 7.412698412698413, "percentage": 74.13, "elapsed_time": "0:04:04", "remaining_time": "0:01:25", "throughput": 2052.71, "total_tokens": 501280} {"current_steps": 2340, "total_steps": 3150, "loss": 0.0002, "lr": 9.434424096648575e-06, "epoch": 7.428571428571429, "percentage": 74.29, "elapsed_time": "0:04:04", "remaining_time": "0:01:24", "throughput": 2053.66, "total_tokens": 502368} {"current_steps": 2345, "total_steps": 3150, "loss": 0.0, "lr": 9.326270056062397e-06, "epoch": 7.444444444444445, "percentage": 74.44, "elapsed_time": "0:04:05", "remaining_time": "0:01:24", "throughput": 2054.37, "total_tokens": 503392} {"current_steps": 2350, "total_steps": 3150, "loss": 0.0001, "lr": 9.21859719249403e-06, "epoch": 7.4603174603174605, "percentage": 74.6, "elapsed_time": "0:04:05", "remaining_time": "0:01:23", "throughput": 2055.26, "total_tokens": 504464} {"current_steps": 2355, "total_steps": 3150, "loss": 0.0, "lr": 9.11140881145581e-06, "epoch": 7.476190476190476, "percentage": 74.76, "elapsed_time": "0:04:05", "remaining_time": "0:01:22", "throughput": 2056.29, "total_tokens": 505568} {"current_steps": 2360, "total_steps": 3150, "loss": 0.0001, "lr": 9.00470820358663e-06, "epoch": 7.492063492063492, "percentage": 74.92, "elapsed_time": "0:04:06", "remaining_time": "0:01:22", "throughput": 2057.3, "total_tokens": 506672} {"current_steps": 2365, "total_steps": 3150, "loss": 0.0005, "lr": 8.898498644550974e-06, "epoch": 7.507936507936508, "percentage": 75.08, "elapsed_time": "0:04:06", "remaining_time": "0:01:21", "throughput": 2058.24, "total_tokens": 507760} {"current_steps": 2370, "total_steps": 3150, "loss": 0.0001, "lr": 8.792783394938312e-06, "epoch": 7.523809523809524, "percentage": 75.24, "elapsed_time": "0:04:07", "remaining_time": "0:01:21", "throughput": 2059.13, "total_tokens": 508832} {"current_steps": 2370, "total_steps": 3150, "eval_loss": 0.17293857038021088, "epoch": 7.523809523809524, "percentage": 75.24, "elapsed_time": "0:04:08", "remaining_time": "0:01:21", "throughput": 2046.43, "total_tokens": 508832} {"current_steps": 2375, "total_steps": 3150, "loss": 0.0001, "lr": 8.687565700163017e-06, "epoch": 7.5396825396825395, "percentage": 75.4, "elapsed_time": "0:04:11", "remaining_time": "0:01:21", "throughput": 2030.68, "total_tokens": 509888} {"current_steps": 2380, "total_steps": 3150, "loss": 0.0001, "lr": 8.582848790364739e-06, "epoch": 7.555555555555555, "percentage": 75.56, "elapsed_time": "0:04:11", "remaining_time": "0:01:21", "throughput": 2031.7, "total_tokens": 511008} {"current_steps": 2385, "total_steps": 3150, "loss": 0.0001, "lr": 8.478635880309254e-06, "epoch": 7.571428571428571, "percentage": 75.71, "elapsed_time": "0:04:11", "remaining_time": "0:01:20", "throughput": 2032.47, "total_tokens": 512064} {"current_steps": 2390, "total_steps": 3150, "loss": 0.0, "lr": 8.374930169289735e-06, "epoch": 7.587301587301587, "percentage": 75.87, "elapsed_time": "0:04:12", "remaining_time": "0:01:20", "throughput": 2033.39, "total_tokens": 513152} {"current_steps": 2395, "total_steps": 3150, "loss": 0.0004, "lr": 8.271734841028553e-06, "epoch": 7.603174603174603, "percentage": 76.03, "elapsed_time": "0:04:12", "remaining_time": "0:01:19", "throughput": 2034.59, "total_tokens": 514320} {"current_steps": 2400, "total_steps": 3150, "loss": 0.0003, "lr": 8.16905306357954e-06, "epoch": 7.619047619047619, "percentage": 76.19, "elapsed_time": "0:04:13", "remaining_time": "0:01:19", "throughput": 2035.36, "total_tokens": 515376} {"current_steps": 2405, "total_steps": 3150, "loss": 0.0066, "lr": 8.066887989230757e-06, "epoch": 7.634920634920634, "percentage": 76.35, "elapsed_time": "0:04:13", "remaining_time": "0:01:18", "throughput": 2036.09, "total_tokens": 516432} {"current_steps": 2410, "total_steps": 3150, "loss": 0.0, "lr": 7.965242754407651e-06, "epoch": 7.650793650793651, "percentage": 76.51, "elapsed_time": "0:04:14", "remaining_time": "0:01:18", "throughput": 2036.91, "total_tokens": 517504} {"current_steps": 2415, "total_steps": 3150, "loss": 0.0002, "lr": 7.864120479576864e-06, "epoch": 7.666666666666667, "percentage": 76.67, "elapsed_time": "0:04:14", "remaining_time": "0:01:17", "throughput": 2037.73, "total_tokens": 518560} {"current_steps": 2420, "total_steps": 3150, "loss": 0.0001, "lr": 7.763524269150316e-06, "epoch": 7.682539682539683, "percentage": 76.83, "elapsed_time": "0:04:14", "remaining_time": "0:01:16", "throughput": 2038.59, "total_tokens": 519632} {"current_steps": 2425, "total_steps": 3150, "loss": 0.0, "lr": 7.66345721139003e-06, "epoch": 7.698412698412699, "percentage": 76.98, "elapsed_time": "0:04:15", "remaining_time": "0:01:16", "throughput": 2039.5, "total_tokens": 520720} {"current_steps": 2430, "total_steps": 3150, "loss": 0.0011, "lr": 7.563922378313218e-06, "epoch": 7.714285714285714, "percentage": 77.14, "elapsed_time": "0:04:15", "remaining_time": "0:01:15", "throughput": 2040.32, "total_tokens": 521776} {"current_steps": 2435, "total_steps": 3150, "loss": 0.0001, "lr": 7.4649228255980506e-06, "epoch": 7.73015873015873, "percentage": 77.3, "elapsed_time": "0:04:16", "remaining_time": "0:01:15", "throughput": 2041.08, "total_tokens": 522816} {"current_steps": 2440, "total_steps": 3150, "loss": 0.0028, "lr": 7.366461592489782e-06, "epoch": 7.746031746031746, "percentage": 77.46, "elapsed_time": "0:04:16", "remaining_time": "0:01:14", "throughput": 2041.85, "total_tokens": 523872} {"current_steps": 2445, "total_steps": 3150, "loss": 0.0, "lr": 7.268541701707493e-06, "epoch": 7.761904761904762, "percentage": 77.62, "elapsed_time": "0:04:16", "remaining_time": "0:01:14", "throughput": 2043.06, "total_tokens": 525040} {"current_steps": 2450, "total_steps": 3150, "loss": 0.0, "lr": 7.1711661593512694e-06, "epoch": 7.777777777777778, "percentage": 77.78, "elapsed_time": "0:04:17", "remaining_time": "0:01:13", "throughput": 2043.63, "total_tokens": 526032} {"current_steps": 2455, "total_steps": 3150, "loss": 0.0001, "lr": 7.074337954809945e-06, "epoch": 7.7936507936507935, "percentage": 77.94, "elapsed_time": "0:04:17", "remaining_time": "0:01:12", "throughput": 2044.49, "total_tokens": 527104} {"current_steps": 2460, "total_steps": 3150, "loss": 0.0, "lr": 6.9780600606692896e-06, "epoch": 7.809523809523809, "percentage": 78.1, "elapsed_time": "0:04:18", "remaining_time": "0:01:12", "throughput": 2045.46, "total_tokens": 528208} {"current_steps": 2465, "total_steps": 3150, "loss": 0.0001, "lr": 6.882335432620779e-06, "epoch": 7.825396825396825, "percentage": 78.25, "elapsed_time": "0:04:18", "remaining_time": "0:01:11", "throughput": 2046.56, "total_tokens": 529344} {"current_steps": 2470, "total_steps": 3150, "loss": 0.0, "lr": 6.787167009370843e-06, "epoch": 7.841269841269841, "percentage": 78.41, "elapsed_time": "0:04:19", "remaining_time": "0:01:11", "throughput": 2047.19, "total_tokens": 530352} {"current_steps": 2475, "total_steps": 3150, "loss": 0.0, "lr": 6.6925577125506705e-06, "epoch": 7.857142857142857, "percentage": 78.57, "elapsed_time": "0:04:19", "remaining_time": "0:01:10", "throughput": 2047.87, "total_tokens": 531376} {"current_steps": 2480, "total_steps": 3150, "loss": 0.0017, "lr": 6.598510446626482e-06, "epoch": 7.8730158730158735, "percentage": 78.73, "elapsed_time": "0:04:19", "remaining_time": "0:01:10", "throughput": 2048.74, "total_tokens": 532464} {"current_steps": 2485, "total_steps": 3150, "loss": 0.0005, "lr": 6.505028098810406e-06, "epoch": 7.888888888888889, "percentage": 78.89, "elapsed_time": "0:04:20", "remaining_time": "0:01:09", "throughput": 2049.47, "total_tokens": 533504} {"current_steps": 2490, "total_steps": 3150, "loss": 0.0, "lr": 6.412113538971781e-06, "epoch": 7.904761904761905, "percentage": 79.05, "elapsed_time": "0:04:20", "remaining_time": "0:01:09", "throughput": 2050.18, "total_tokens": 534544} {"current_steps": 2495, "total_steps": 3150, "loss": 0.0001, "lr": 6.319769619549129e-06, "epoch": 7.920634920634921, "percentage": 79.21, "elapsed_time": "0:04:21", "remaining_time": "0:01:08", "throughput": 2050.98, "total_tokens": 535600} {"current_steps": 2500, "total_steps": 3150, "loss": 0.0, "lr": 6.22799917546252e-06, "epoch": 7.936507936507937, "percentage": 79.37, "elapsed_time": "0:04:21", "remaining_time": "0:01:08", "throughput": 2051.99, "total_tokens": 536720} {"current_steps": 2505, "total_steps": 3150, "loss": 0.0, "lr": 6.1368050240265925e-06, "epoch": 7.9523809523809526, "percentage": 79.52, "elapsed_time": "0:04:21", "remaining_time": "0:01:07", "throughput": 2052.72, "total_tokens": 537760} {"current_steps": 2510, "total_steps": 3150, "loss": 0.0001, "lr": 6.046189964864019e-06, "epoch": 7.968253968253968, "percentage": 79.68, "elapsed_time": "0:04:22", "remaining_time": "0:01:06", "throughput": 2053.67, "total_tokens": 538864} {"current_steps": 2515, "total_steps": 3150, "loss": 0.0, "lr": 5.9561567798195855e-06, "epoch": 7.984126984126984, "percentage": 79.84, "elapsed_time": "0:04:22", "remaining_time": "0:01:06", "throughput": 2054.64, "total_tokens": 539952} {"current_steps": 2520, "total_steps": 3150, "loss": 0.0005, "lr": 5.8667082328747795e-06, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:04:23", "remaining_time": "0:01:05", "throughput": 2054.73, "total_tokens": 541008} {"current_steps": 2525, "total_steps": 3150, "loss": 0.0001, "lr": 5.7778470700629615e-06, "epoch": 8.015873015873016, "percentage": 80.16, "elapsed_time": "0:04:23", "remaining_time": "0:01:05", "throughput": 2054.35, "total_tokens": 542096} {"current_steps": 2528, "total_steps": 3150, "eval_loss": 0.1764838993549347, "epoch": 8.025396825396825, "percentage": 80.25, "elapsed_time": "0:04:25", "remaining_time": "0:01:05", "throughput": 2042.96, "total_tokens": 542720} {"current_steps": 2530, "total_steps": 3150, "loss": 0.0, "lr": 5.689576019385015e-06, "epoch": 8.031746031746032, "percentage": 80.32, "elapsed_time": "0:04:27", "remaining_time": "0:01:05", "throughput": 2032.32, "total_tokens": 543104} {"current_steps": 2535, "total_steps": 3150, "loss": 0.0, "lr": 5.601897790725643e-06, "epoch": 8.047619047619047, "percentage": 80.48, "elapsed_time": "0:04:27", "remaining_time": "0:01:04", "throughput": 2033.12, "total_tokens": 544176} {"current_steps": 2540, "total_steps": 3150, "loss": 0.0001, "lr": 5.514815075770144e-06, "epoch": 8.063492063492063, "percentage": 80.63, "elapsed_time": "0:04:28", "remaining_time": "0:01:04", "throughput": 2034.02, "total_tokens": 545280} {"current_steps": 2545, "total_steps": 3150, "loss": 0.0001, "lr": 5.428330547921809e-06, "epoch": 8.079365079365079, "percentage": 80.79, "elapsed_time": "0:04:28", "remaining_time": "0:01:03", "throughput": 2034.86, "total_tokens": 546368} {"current_steps": 2550, "total_steps": 3150, "loss": 0.0001, "lr": 5.342446862219827e-06, "epoch": 8.095238095238095, "percentage": 80.95, "elapsed_time": "0:04:28", "remaining_time": "0:01:03", "throughput": 2035.54, "total_tokens": 547392} {"current_steps": 2555, "total_steps": 3150, "loss": 0.0, "lr": 5.25716665525777e-06, "epoch": 8.11111111111111, "percentage": 81.11, "elapsed_time": "0:04:29", "remaining_time": "0:01:02", "throughput": 2036.47, "total_tokens": 548496} {"current_steps": 2560, "total_steps": 3150, "loss": 0.0, "lr": 5.172492545102673e-06, "epoch": 8.126984126984127, "percentage": 81.27, "elapsed_time": "0:04:29", "remaining_time": "0:01:02", "throughput": 2037.13, "total_tokens": 549520} {"current_steps": 2565, "total_steps": 3150, "loss": 0.0, "lr": 5.088427131214657e-06, "epoch": 8.142857142857142, "percentage": 81.43, "elapsed_time": "0:04:30", "remaining_time": "0:01:01", "throughput": 2037.8, "total_tokens": 550544} {"current_steps": 2570, "total_steps": 3150, "loss": 0.0, "lr": 5.004972994367102e-06, "epoch": 8.158730158730158, "percentage": 81.59, "elapsed_time": "0:04:30", "remaining_time": "0:01:01", "throughput": 2038.57, "total_tokens": 551616} {"current_steps": 2575, "total_steps": 3150, "loss": 0.0, "lr": 4.922132696567464e-06, "epoch": 8.174603174603174, "percentage": 81.75, "elapsed_time": "0:04:31", "remaining_time": "0:01:00", "throughput": 2039.51, "total_tokens": 552720} {"current_steps": 2580, "total_steps": 3150, "loss": 0.0, "lr": 4.839908780978547e-06, "epoch": 8.19047619047619, "percentage": 81.9, "elapsed_time": "0:04:31", "remaining_time": "0:00:59", "throughput": 2040.23, "total_tokens": 553776} {"current_steps": 2585, "total_steps": 3150, "loss": 0.0, "lr": 4.758303771840525e-06, "epoch": 8.206349206349206, "percentage": 82.06, "elapsed_time": "0:04:31", "remaining_time": "0:00:59", "throughput": 2041.32, "total_tokens": 554928} {"current_steps": 2590, "total_steps": 3150, "loss": 0.0, "lr": 4.677320174393382e-06, "epoch": 8.222222222222221, "percentage": 82.22, "elapsed_time": "0:04:32", "remaining_time": "0:00:58", "throughput": 2042.02, "total_tokens": 555968} {"current_steps": 2595, "total_steps": 3150, "loss": 0.0001, "lr": 4.596960474800013e-06, "epoch": 8.238095238095237, "percentage": 82.38, "elapsed_time": "0:04:32", "remaining_time": "0:00:58", "throughput": 2042.68, "total_tokens": 556992} {"current_steps": 2600, "total_steps": 3150, "loss": 0.0, "lr": 4.517227140069919e-06, "epoch": 8.253968253968253, "percentage": 82.54, "elapsed_time": "0:04:33", "remaining_time": "0:00:57", "throughput": 2043.44, "total_tokens": 558048} {"current_steps": 2605, "total_steps": 3150, "loss": 0.0038, "lr": 4.438122617983443e-06, "epoch": 8.26984126984127, "percentage": 82.7, "elapsed_time": "0:04:33", "remaining_time": "0:00:57", "throughput": 2044.29, "total_tokens": 559136} {"current_steps": 2610, "total_steps": 3150, "loss": 0.0001, "lr": 4.359649337016664e-06, "epoch": 8.285714285714286, "percentage": 82.86, "elapsed_time": "0:04:33", "remaining_time": "0:00:56", "throughput": 2045.18, "total_tokens": 560256} {"current_steps": 2615, "total_steps": 3150, "loss": 0.0, "lr": 4.2818097062667956e-06, "epoch": 8.301587301587302, "percentage": 83.02, "elapsed_time": "0:04:34", "remaining_time": "0:00:56", "throughput": 2046.05, "total_tokens": 561344} {"current_steps": 2620, "total_steps": 3150, "loss": 0.0001, "lr": 4.204606115378282e-06, "epoch": 8.317460317460318, "percentage": 83.17, "elapsed_time": "0:04:34", "remaining_time": "0:00:55", "throughput": 2046.91, "total_tokens": 562432} {"current_steps": 2625, "total_steps": 3150, "loss": 0.0, "lr": 4.128040934469363e-06, "epoch": 8.333333333333334, "percentage": 83.33, "elapsed_time": "0:04:35", "remaining_time": "0:00:55", "throughput": 2047.89, "total_tokens": 563552} {"current_steps": 2630, "total_steps": 3150, "loss": 0.0001, "lr": 4.052116514059401e-06, "epoch": 8.34920634920635, "percentage": 83.49, "elapsed_time": "0:04:35", "remaining_time": "0:00:54", "throughput": 2048.69, "total_tokens": 564624} {"current_steps": 2635, "total_steps": 3150, "loss": 0.0001, "lr": 3.976835184996644e-06, "epoch": 8.365079365079366, "percentage": 83.65, "elapsed_time": "0:04:36", "remaining_time": "0:00:53", "throughput": 2049.66, "total_tokens": 565744} {"current_steps": 2640, "total_steps": 3150, "loss": 0.0, "lr": 3.9021992583867325e-06, "epoch": 8.380952380952381, "percentage": 83.81, "elapsed_time": "0:04:36", "remaining_time": "0:00:53", "throughput": 2050.41, "total_tokens": 566800} {"current_steps": 2645, "total_steps": 3150, "loss": 0.0, "lr": 3.8282110255216915e-06, "epoch": 8.396825396825397, "percentage": 83.97, "elapsed_time": "0:04:36", "remaining_time": "0:00:52", "throughput": 2050.93, "total_tokens": 567792} {"current_steps": 2650, "total_steps": 3150, "loss": 0.0016, "lr": 3.75487275780963e-06, "epoch": 8.412698412698413, "percentage": 84.13, "elapsed_time": "0:04:37", "remaining_time": "0:00:52", "throughput": 2051.62, "total_tokens": 568832} {"current_steps": 2655, "total_steps": 3150, "loss": 0.0, "lr": 3.682186706704982e-06, "epoch": 8.428571428571429, "percentage": 84.29, "elapsed_time": "0:04:37", "remaining_time": "0:00:51", "throughput": 2052.41, "total_tokens": 569904} {"current_steps": 2660, "total_steps": 3150, "loss": 0.0, "lr": 3.6101551036394316e-06, "epoch": 8.444444444444445, "percentage": 84.44, "elapsed_time": "0:04:38", "remaining_time": "0:00:51", "throughput": 2053.1, "total_tokens": 570944} {"current_steps": 2665, "total_steps": 3150, "loss": 0.0, "lr": 3.5387801599533475e-06, "epoch": 8.46031746031746, "percentage": 84.6, "elapsed_time": "0:04:38", "remaining_time": "0:00:50", "throughput": 2053.89, "total_tokens": 572016} {"current_steps": 2670, "total_steps": 3150, "loss": 0.0001, "lr": 3.4680640668279444e-06, "epoch": 8.476190476190476, "percentage": 84.76, "elapsed_time": "0:04:38", "remaining_time": "0:00:50", "throughput": 2054.79, "total_tokens": 573120} {"current_steps": 2675, "total_steps": 3150, "loss": 0.0, "lr": 3.398008995217988e-06, "epoch": 8.492063492063492, "percentage": 84.92, "elapsed_time": "0:04:39", "remaining_time": "0:00:49", "throughput": 2055.52, "total_tokens": 574176} {"current_steps": 2680, "total_steps": 3150, "loss": 0.0, "lr": 3.328617095785172e-06, "epoch": 8.507936507936508, "percentage": 85.08, "elapsed_time": "0:04:39", "remaining_time": "0:00:49", "throughput": 2056.19, "total_tokens": 575216} {"current_steps": 2685, "total_steps": 3150, "loss": 0.0, "lr": 3.2598904988320578e-06, "epoch": 8.523809523809524, "percentage": 85.24, "elapsed_time": "0:04:40", "remaining_time": "0:00:48", "throughput": 2057.02, "total_tokens": 576304} {"current_steps": 2686, "total_steps": 3150, "eval_loss": 0.17982663214206696, "epoch": 8.526984126984127, "percentage": 85.27, "elapsed_time": "0:04:41", "remaining_time": "0:00:48", "throughput": 2045.99, "total_tokens": 576512} {"current_steps": 2690, "total_steps": 3150, "loss": 0.0, "lr": 3.1918313142367232e-06, "epoch": 8.53968253968254, "percentage": 85.4, "elapsed_time": "0:04:43", "remaining_time": "0:00:48", "throughput": 2035.16, "total_tokens": 577360} {"current_steps": 2695, "total_steps": 3150, "loss": 0.0001, "lr": 3.124441631387931e-06, "epoch": 8.555555555555555, "percentage": 85.56, "elapsed_time": "0:04:44", "remaining_time": "0:00:47", "throughput": 2036.09, "total_tokens": 578480} {"current_steps": 2700, "total_steps": 3150, "loss": 0.0004, "lr": 3.057723519121047e-06, "epoch": 8.571428571428571, "percentage": 85.71, "elapsed_time": "0:04:44", "remaining_time": "0:00:47", "throughput": 2036.87, "total_tokens": 579568} {"current_steps": 2705, "total_steps": 3150, "loss": 0.0004, "lr": 2.991679025654476e-06, "epoch": 8.587301587301587, "percentage": 85.87, "elapsed_time": "0:04:44", "remaining_time": "0:00:46", "throughput": 2037.67, "total_tokens": 580656} {"current_steps": 2710, "total_steps": 3150, "loss": 0.0, "lr": 2.9263101785268254e-06, "epoch": 8.603174603174603, "percentage": 86.03, "elapsed_time": "0:04:45", "remaining_time": "0:00:46", "throughput": 2038.48, "total_tokens": 581744} {"current_steps": 2715, "total_steps": 3150, "loss": 0.0, "lr": 2.8616189845346223e-06, "epoch": 8.619047619047619, "percentage": 86.19, "elapsed_time": "0:04:45", "remaining_time": "0:00:45", "throughput": 2039.21, "total_tokens": 582800} {"current_steps": 2720, "total_steps": 3150, "loss": 0.0, "lr": 2.7976074296707318e-06, "epoch": 8.634920634920634, "percentage": 86.35, "elapsed_time": "0:04:46", "remaining_time": "0:00:45", "throughput": 2039.84, "total_tokens": 583840} {"current_steps": 2725, "total_steps": 3150, "loss": 0.0001, "lr": 2.7342774790633686e-06, "epoch": 8.65079365079365, "percentage": 86.51, "elapsed_time": "0:04:46", "remaining_time": "0:00:44", "throughput": 2040.57, "total_tokens": 584896} {"current_steps": 2730, "total_steps": 3150, "loss": 0.0, "lr": 2.671631076915798e-06, "epoch": 8.666666666666666, "percentage": 86.67, "elapsed_time": "0:04:47", "remaining_time": "0:00:44", "throughput": 2041.45, "total_tokens": 586000} {"current_steps": 2735, "total_steps": 3150, "loss": 0.0001, "lr": 2.609670146446619e-06, "epoch": 8.682539682539682, "percentage": 86.83, "elapsed_time": "0:04:47", "remaining_time": "0:00:43", "throughput": 2042.24, "total_tokens": 587072} {"current_steps": 2740, "total_steps": 3150, "loss": 0.0, "lr": 2.5483965898307316e-06, "epoch": 8.698412698412698, "percentage": 86.98, "elapsed_time": "0:04:47", "remaining_time": "0:00:43", "throughput": 2043.12, "total_tokens": 588176} {"current_steps": 2745, "total_steps": 3150, "loss": 0.0, "lr": 2.487812288140945e-06, "epoch": 8.714285714285714, "percentage": 87.14, "elapsed_time": "0:04:48", "remaining_time": "0:00:42", "throughput": 2043.84, "total_tokens": 589232} {"current_steps": 2750, "total_steps": 3150, "loss": 0.0, "lr": 2.4279191012902352e-06, "epoch": 8.73015873015873, "percentage": 87.3, "elapsed_time": "0:04:48", "remaining_time": "0:00:41", "throughput": 2044.4, "total_tokens": 590240} {"current_steps": 2755, "total_steps": 3150, "loss": 0.0, "lr": 2.3687188679746315e-06, "epoch": 8.746031746031747, "percentage": 87.46, "elapsed_time": "0:04:49", "remaining_time": "0:00:41", "throughput": 2045.17, "total_tokens": 591312} {"current_steps": 2760, "total_steps": 3150, "loss": 0.0, "lr": 2.310213405616779e-06, "epoch": 8.761904761904763, "percentage": 87.62, "elapsed_time": "0:04:49", "remaining_time": "0:00:40", "throughput": 2045.95, "total_tokens": 592384} {"current_steps": 2765, "total_steps": 3150, "loss": 0.0, "lr": 2.2524045103101325e-06, "epoch": 8.777777777777779, "percentage": 87.78, "elapsed_time": "0:04:49", "remaining_time": "0:00:40", "throughput": 2046.71, "total_tokens": 593456} {"current_steps": 2770, "total_steps": 3150, "loss": 0.0, "lr": 2.195293956763847e-06, "epoch": 8.793650793650794, "percentage": 87.94, "elapsed_time": "0:04:50", "remaining_time": "0:00:39", "throughput": 2047.52, "total_tokens": 594544} {"current_steps": 2775, "total_steps": 3150, "loss": 0.0001, "lr": 2.1388834982482535e-06, "epoch": 8.80952380952381, "percentage": 88.1, "elapsed_time": "0:04:50", "remaining_time": "0:00:39", "throughput": 2048.33, "total_tokens": 595632} {"current_steps": 2780, "total_steps": 3150, "loss": 0.0, "lr": 2.0831748665410765e-06, "epoch": 8.825396825396826, "percentage": 88.25, "elapsed_time": "0:04:51", "remaining_time": "0:00:38", "throughput": 2049.19, "total_tokens": 596736} {"current_steps": 2785, "total_steps": 3150, "loss": 0.0, "lr": 2.0281697718742333e-06, "epoch": 8.841269841269842, "percentage": 88.41, "elapsed_time": "0:04:51", "remaining_time": "0:00:38", "throughput": 2050.0, "total_tokens": 597824} {"current_steps": 2790, "total_steps": 3150, "loss": 0.0001, "lr": 1.973869902881345e-06, "epoch": 8.857142857142858, "percentage": 88.57, "elapsed_time": "0:04:52", "remaining_time": "0:00:37", "throughput": 2050.76, "total_tokens": 598896} {"current_steps": 2795, "total_steps": 3150, "loss": 0.0001, "lr": 1.9202769265458996e-06, "epoch": 8.873015873015873, "percentage": 88.73, "elapsed_time": "0:04:52", "remaining_time": "0:00:37", "throughput": 2051.56, "total_tokens": 599984} {"current_steps": 2800, "total_steps": 3150, "loss": 0.0001, "lr": 1.8673924881500826e-06, "epoch": 8.88888888888889, "percentage": 88.89, "elapsed_time": "0:04:52", "remaining_time": "0:00:36", "throughput": 2052.57, "total_tokens": 601136} {"current_steps": 2805, "total_steps": 3150, "loss": 0.0, "lr": 1.8152182112242428e-06, "epoch": 8.904761904761905, "percentage": 89.05, "elapsed_time": "0:04:53", "remaining_time": "0:00:36", "throughput": 2053.32, "total_tokens": 602208} {"current_steps": 2810, "total_steps": 3150, "loss": 0.0, "lr": 1.7637556974970799e-06, "epoch": 8.920634920634921, "percentage": 89.21, "elapsed_time": "0:04:53", "remaining_time": "0:00:35", "throughput": 2053.86, "total_tokens": 603216} {"current_steps": 2815, "total_steps": 3150, "loss": 0.0, "lr": 1.713006526846439e-06, "epoch": 8.936507936507937, "percentage": 89.37, "elapsed_time": "0:04:54", "remaining_time": "0:00:35", "throughput": 2054.71, "total_tokens": 604320} {"current_steps": 2820, "total_steps": 3150, "loss": 0.0, "lr": 1.6629722572508554e-06, "epoch": 8.952380952380953, "percentage": 89.52, "elapsed_time": "0:04:54", "remaining_time": "0:00:34", "throughput": 2055.4, "total_tokens": 605376} {"current_steps": 2825, "total_steps": 3150, "loss": 0.0001, "lr": 1.6136544247416785e-06, "epoch": 8.968253968253968, "percentage": 89.68, "elapsed_time": "0:04:54", "remaining_time": "0:00:33", "throughput": 2056.08, "total_tokens": 606432} {"current_steps": 2830, "total_steps": 3150, "loss": 0.0001, "lr": 1.5650545433559372e-06, "epoch": 8.984126984126984, "percentage": 89.84, "elapsed_time": "0:04:55", "remaining_time": "0:00:33", "throughput": 2056.77, "total_tokens": 607472} {"current_steps": 2835, "total_steps": 3150, "loss": 0.0, "lr": 1.5171741050898637e-06, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:04:55", "remaining_time": "0:00:32", "throughput": 2057.15, "total_tokens": 608624} {"current_steps": 2840, "total_steps": 3150, "loss": 0.0, "lr": 1.4700145798530824e-06, "epoch": 9.015873015873016, "percentage": 90.16, "elapsed_time": "0:04:56", "remaining_time": "0:00:32", "throughput": 2057.02, "total_tokens": 609776} {"current_steps": 2844, "total_steps": 3150, "eval_loss": 0.17914976179599762, "epoch": 9.028571428571428, "percentage": 90.29, "elapsed_time": "0:04:58", "remaining_time": "0:00:32", "throughput": 2047.21, "total_tokens": 610688} {"current_steps": 2845, "total_steps": 3150, "loss": 0.0001, "lr": 1.4235774154234855e-06, "epoch": 9.031746031746032, "percentage": 90.32, "elapsed_time": "0:04:59", "remaining_time": "0:00:32", "throughput": 2037.79, "total_tokens": 610912} {"current_steps": 2850, "total_steps": 3150, "loss": 0.0, "lr": 1.3778640374027985e-06, "epoch": 9.047619047619047, "percentage": 90.48, "elapsed_time": "0:05:00", "remaining_time": "0:00:31", "throughput": 2038.38, "total_tokens": 611952} {"current_steps": 2855, "total_steps": 3150, "loss": 0.0, "lr": 1.3328758491727767e-06, "epoch": 9.063492063492063, "percentage": 90.63, "elapsed_time": "0:05:00", "remaining_time": "0:00:31", "throughput": 2039.12, "total_tokens": 613056} {"current_steps": 2860, "total_steps": 3150, "loss": 0.0, "lr": 1.2886142318521781e-06, "epoch": 9.079365079365079, "percentage": 90.79, "elapsed_time": "0:05:01", "remaining_time": "0:00:30", "throughput": 2039.71, "total_tokens": 614096} {"current_steps": 2865, "total_steps": 3150, "loss": 0.0001, "lr": 1.245080544254318e-06, "epoch": 9.095238095238095, "percentage": 90.95, "elapsed_time": "0:05:01", "remaining_time": "0:00:29", "throughput": 2040.74, "total_tokens": 615264} {"current_steps": 2870, "total_steps": 3150, "loss": 0.0001, "lr": 1.2022761228453805e-06, "epoch": 9.11111111111111, "percentage": 91.11, "elapsed_time": "0:05:01", "remaining_time": "0:00:29", "throughput": 2041.58, "total_tokens": 616368} {"current_steps": 2875, "total_steps": 3150, "loss": 0.0, "lr": 1.160202281703371e-06, "epoch": 9.126984126984127, "percentage": 91.27, "elapsed_time": "0:05:02", "remaining_time": "0:00:28", "throughput": 2042.23, "total_tokens": 617424} {"current_steps": 2880, "total_steps": 3150, "loss": 0.0, "lr": 1.1188603124777901e-06, "epoch": 9.142857142857142, "percentage": 91.43, "elapsed_time": "0:05:02", "remaining_time": "0:00:28", "throughput": 2042.98, "total_tokens": 618512} {"current_steps": 2885, "total_steps": 3150, "loss": 0.0, "lr": 1.0782514843499653e-06, "epoch": 9.158730158730158, "percentage": 91.59, "elapsed_time": "0:05:03", "remaining_time": "0:00:27", "throughput": 2043.57, "total_tokens": 619568} {"current_steps": 2890, "total_steps": 3150, "loss": 0.0001, "lr": 1.038377043994107e-06, "epoch": 9.174603174603174, "percentage": 91.75, "elapsed_time": "0:05:03", "remaining_time": "0:00:27", "throughput": 2044.14, "total_tokens": 620608} {"current_steps": 2895, "total_steps": 3150, "loss": 0.0001, "lr": 9.992382155390195e-07, "epoch": 9.19047619047619, "percentage": 91.9, "elapsed_time": "0:05:04", "remaining_time": "0:00:26", "throughput": 2044.92, "total_tokens": 621696} {"current_steps": 2900, "total_steps": 3150, "loss": 0.0, "lr": 9.608362005305238e-07, "epoch": 9.206349206349206, "percentage": 92.06, "elapsed_time": "0:05:04", "remaining_time": "0:00:26", "throughput": 2045.45, "total_tokens": 622704} {"current_steps": 2905, "total_steps": 3150, "loss": 0.0, "lr": 9.23172177894574e-07, "epoch": 9.222222222222221, "percentage": 92.22, "elapsed_time": "0:05:04", "remaining_time": "0:00:25", "throughput": 2046.17, "total_tokens": 623776} {"current_steps": 2910, "total_steps": 3150, "loss": 0.0, "lr": 8.862473039010749e-07, "epoch": 9.238095238095237, "percentage": 92.38, "elapsed_time": "0:05:05", "remaining_time": "0:00:25", "throughput": 2046.89, "total_tokens": 624848} {"current_steps": 2915, "total_steps": 3150, "loss": 0.0001, "lr": 8.500627121283522e-07, "epoch": 9.253968253968253, "percentage": 92.54, "elapsed_time": "0:05:05", "remaining_time": "0:00:24", "throughput": 2047.86, "total_tokens": 626000} {"current_steps": 2920, "total_steps": 3150, "loss": 0.0, "lr": 8.146195134284052e-07, "epoch": 9.26984126984127, "percentage": 92.7, "elapsed_time": "0:05:06", "remaining_time": "0:00:24", "throughput": 2048.78, "total_tokens": 627136} {"current_steps": 2925, "total_steps": 3150, "loss": 0.0, "lr": 7.79918795892734e-07, "epoch": 9.285714285714286, "percentage": 92.86, "elapsed_time": "0:05:06", "remaining_time": "0:00:23", "throughput": 2049.39, "total_tokens": 628176} {"current_steps": 2930, "total_steps": 3150, "loss": 0.0001, "lr": 7.45961624819011e-07, "epoch": 9.301587301587302, "percentage": 93.02, "elapsed_time": "0:05:06", "remaining_time": "0:00:23", "throughput": 2050.11, "total_tokens": 629248} {"current_steps": 2935, "total_steps": 3150, "loss": 0.0, "lr": 7.127490426783123e-07, "epoch": 9.317460317460318, "percentage": 93.17, "elapsed_time": "0:05:07", "remaining_time": "0:00:22", "throughput": 2050.63, "total_tokens": 630256} {"current_steps": 2940, "total_steps": 3150, "loss": 0.0002, "lr": 6.802820690831658e-07, "epoch": 9.333333333333334, "percentage": 93.33, "elapsed_time": "0:05:07", "remaining_time": "0:00:21", "throughput": 2051.24, "total_tokens": 631296} {"current_steps": 2945, "total_steps": 3150, "loss": 0.0, "lr": 6.485617007562089e-07, "epoch": 9.34920634920635, "percentage": 93.49, "elapsed_time": "0:05:08", "remaining_time": "0:00:21", "throughput": 2051.86, "total_tokens": 632336} {"current_steps": 2950, "total_steps": 3150, "loss": 0.0, "lr": 6.175889114996114e-07, "epoch": 9.365079365079366, "percentage": 93.65, "elapsed_time": "0:05:08", "remaining_time": "0:00:20", "throughput": 2052.75, "total_tokens": 633472} {"current_steps": 2955, "total_steps": 3150, "loss": 0.0001, "lr": 5.87364652165176e-07, "epoch": 9.380952380952381, "percentage": 93.81, "elapsed_time": "0:05:09", "remaining_time": "0:00:20", "throughput": 2053.61, "total_tokens": 634592} {"current_steps": 2960, "total_steps": 3150, "loss": 0.0, "lr": 5.578898506251457e-07, "epoch": 9.396825396825397, "percentage": 93.97, "elapsed_time": "0:05:09", "remaining_time": "0:00:19", "throughput": 2054.36, "total_tokens": 635680} {"current_steps": 2965, "total_steps": 3150, "loss": 0.0, "lr": 5.291654117437261e-07, "epoch": 9.412698412698413, "percentage": 94.13, "elapsed_time": "0:05:09", "remaining_time": "0:00:19", "throughput": 2055.06, "total_tokens": 636752} {"current_steps": 2970, "total_steps": 3150, "loss": 0.0, "lr": 5.011922173492861e-07, "epoch": 9.428571428571429, "percentage": 94.29, "elapsed_time": "0:05:10", "remaining_time": "0:00:18", "throughput": 2055.72, "total_tokens": 637808} {"current_steps": 2975, "total_steps": 3150, "loss": 0.0, "lr": 4.739711262073121e-07, "epoch": 9.444444444444445, "percentage": 94.44, "elapsed_time": "0:05:10", "remaining_time": "0:00:18", "throughput": 2056.37, "total_tokens": 638864} {"current_steps": 2980, "total_steps": 3150, "loss": 0.0, "lr": 4.475029739940295e-07, "epoch": 9.46031746031746, "percentage": 94.6, "elapsed_time": "0:05:11", "remaining_time": "0:00:17", "throughput": 2057.12, "total_tokens": 639952} {"current_steps": 2985, "total_steps": 3150, "loss": 0.0005, "lr": 4.217885732707455e-07, "epoch": 9.476190476190476, "percentage": 94.76, "elapsed_time": "0:05:11", "remaining_time": "0:00:17", "throughput": 2057.72, "total_tokens": 640992} {"current_steps": 2990, "total_steps": 3150, "loss": 0.0001, "lr": 3.9682871345891883e-07, "epoch": 9.492063492063492, "percentage": 94.92, "elapsed_time": "0:05:11", "remaining_time": "0:00:16", "throughput": 2058.65, "total_tokens": 642144} {"current_steps": 2995, "total_steps": 3150, "loss": 0.0, "lr": 3.726241608158987e-07, "epoch": 9.507936507936508, "percentage": 95.08, "elapsed_time": "0:05:12", "remaining_time": "0:00:16", "throughput": 2059.59, "total_tokens": 643312} {"current_steps": 3000, "total_steps": 3150, "loss": 0.0, "lr": 3.491756584114381e-07, "epoch": 9.523809523809524, "percentage": 95.24, "elapsed_time": "0:05:12", "remaining_time": "0:00:15", "throughput": 2060.34, "total_tokens": 644400} {"current_steps": 3002, "total_steps": 3150, "eval_loss": 0.17812933027744293, "epoch": 9.53015873015873, "percentage": 95.3, "elapsed_time": "0:05:14", "remaining_time": "0:00:15", "throughput": 2050.65, "total_tokens": 644848} {"current_steps": 3005, "total_steps": 3150, "loss": 0.0, "lr": 3.264839261048369e-07, "epoch": 9.53968253968254, "percentage": 95.4, "elapsed_time": "0:05:16", "remaining_time": "0:00:15", "throughput": 2041.58, "total_tokens": 645456} {"current_steps": 3010, "total_steps": 3150, "loss": 0.0001, "lr": 3.045496605228904e-07, "epoch": 9.555555555555555, "percentage": 95.56, "elapsed_time": "0:05:16", "remaining_time": "0:00:14", "throughput": 2042.15, "total_tokens": 646496} {"current_steps": 3015, "total_steps": 3150, "loss": 0.0, "lr": 2.833735350384614e-07, "epoch": 9.571428571428571, "percentage": 95.71, "elapsed_time": "0:05:17", "remaining_time": "0:00:14", "throughput": 2042.93, "total_tokens": 647616} {"current_steps": 3020, "total_steps": 3150, "loss": 0.0, "lr": 2.6295619974983364e-07, "epoch": 9.587301587301587, "percentage": 95.87, "elapsed_time": "0:05:17", "remaining_time": "0:00:13", "throughput": 2043.38, "total_tokens": 648608} {"current_steps": 3025, "total_steps": 3150, "loss": 0.0, "lr": 2.4329828146074095e-07, "epoch": 9.603174603174603, "percentage": 96.03, "elapsed_time": "0:05:17", "remaining_time": "0:00:13", "throughput": 2044.0, "total_tokens": 649664} {"current_steps": 3030, "total_steps": 3150, "loss": 0.0001, "lr": 2.2440038366113858e-07, "epoch": 9.619047619047619, "percentage": 96.19, "elapsed_time": "0:05:18", "remaining_time": "0:00:12", "throughput": 2044.64, "total_tokens": 650720} {"current_steps": 3035, "total_steps": 3150, "loss": 0.0, "lr": 2.0626308650866234e-07, "epoch": 9.634920634920634, "percentage": 96.35, "elapsed_time": "0:05:18", "remaining_time": "0:00:12", "throughput": 2045.34, "total_tokens": 651792} {"current_steps": 3040, "total_steps": 3150, "loss": 0.0, "lr": 1.8888694681081787e-07, "epoch": 9.65079365079365, "percentage": 96.51, "elapsed_time": "0:05:19", "remaining_time": "0:00:11", "throughput": 2045.98, "total_tokens": 652848} {"current_steps": 3045, "total_steps": 3150, "loss": 0.0, "lr": 1.7227249800789713e-07, "epoch": 9.666666666666666, "percentage": 96.67, "elapsed_time": "0:05:19", "remaining_time": "0:00:11", "throughput": 2046.87, "total_tokens": 653984} {"current_steps": 3050, "total_steps": 3150, "loss": 0.0001, "lr": 1.5642025015660533e-07, "epoch": 9.682539682539682, "percentage": 96.83, "elapsed_time": "0:05:19", "remaining_time": "0:00:10", "throughput": 2047.54, "total_tokens": 655056} {"current_steps": 3055, "total_steps": 3150, "loss": 0.0, "lr": 1.4133068991437902e-07, "epoch": 9.698412698412698, "percentage": 96.98, "elapsed_time": "0:05:20", "remaining_time": "0:00:09", "throughput": 2048.03, "total_tokens": 656064} {"current_steps": 3060, "total_steps": 3150, "loss": 0.0, "lr": 1.2700428052447033e-07, "epoch": 9.714285714285714, "percentage": 97.14, "elapsed_time": "0:05:20", "remaining_time": "0:00:09", "throughput": 2048.76, "total_tokens": 657152} {"current_steps": 3065, "total_steps": 3150, "loss": 0.0, "lr": 1.13441461801711e-07, "epoch": 9.73015873015873, "percentage": 97.3, "elapsed_time": "0:05:21", "remaining_time": "0:00:08", "throughput": 2049.56, "total_tokens": 658256} {"current_steps": 3070, "total_steps": 3150, "loss": 0.0, "lr": 1.006426501190233e-07, "epoch": 9.746031746031747, "percentage": 97.46, "elapsed_time": "0:05:21", "remaining_time": "0:00:08", "throughput": 2050.1, "total_tokens": 659280} {"current_steps": 3075, "total_steps": 3150, "loss": 0.0, "lr": 8.860823839462461e-08, "epoch": 9.761904761904763, "percentage": 97.62, "elapsed_time": "0:05:21", "remaining_time": "0:00:07", "throughput": 2050.74, "total_tokens": 660336} {"current_steps": 3080, "total_steps": 3150, "loss": 0.0, "lr": 7.733859607997052e-08, "epoch": 9.777777777777779, "percentage": 97.78, "elapsed_time": "0:05:22", "remaining_time": "0:00:07", "throughput": 2051.29, "total_tokens": 661360} {"current_steps": 3085, "total_steps": 3150, "loss": 0.0, "lr": 6.683406914840818e-08, "epoch": 9.793650793650794, "percentage": 97.94, "elapsed_time": "0:05:22", "remaining_time": "0:00:06", "throughput": 2051.97, "total_tokens": 662432} {"current_steps": 3090, "total_steps": 3150, "loss": 0.0, "lr": 5.709498008456826e-08, "epoch": 9.80952380952381, "percentage": 98.1, "elapsed_time": "0:05:23", "remaining_time": "0:00:06", "throughput": 2052.46, "total_tokens": 663440} {"current_steps": 3095, "total_steps": 3150, "loss": 0.0, "lr": 4.8121627874450625e-08, "epoch": 9.825396825396826, "percentage": 98.25, "elapsed_time": "0:05:23", "remaining_time": "0:00:05", "throughput": 2053.0, "total_tokens": 664464} {"current_steps": 3100, "total_steps": 3150, "loss": 0.0, "lr": 3.9914287996251074e-08, "epoch": 9.841269841269842, "percentage": 98.41, "elapsed_time": "0:05:24", "remaining_time": "0:00:05", "throughput": 2053.63, "total_tokens": 665520} {"current_steps": 3105, "total_steps": 3150, "loss": 0.0, "lr": 3.2473212411904264e-08, "epoch": 9.857142857142858, "percentage": 98.57, "elapsed_time": "0:05:24", "remaining_time": "0:00:04", "throughput": 2054.35, "total_tokens": 666608} {"current_steps": 3110, "total_steps": 3150, "loss": 0.0, "lr": 2.5798629559350974e-08, "epoch": 9.873015873015873, "percentage": 98.73, "elapsed_time": "0:05:24", "remaining_time": "0:00:04", "throughput": 2054.97, "total_tokens": 667664} {"current_steps": 3115, "total_steps": 3150, "loss": 0.0, "lr": 1.9890744345518742e-08, "epoch": 9.88888888888889, "percentage": 98.89, "elapsed_time": "0:05:25", "remaining_time": "0:00:03", "throughput": 2055.69, "total_tokens": 668752} {"current_steps": 3120, "total_steps": 3150, "loss": 0.0, "lr": 1.4749738140037994e-08, "epoch": 9.904761904761905, "percentage": 99.05, "elapsed_time": "0:05:25", "remaining_time": "0:00:03", "throughput": 2056.45, "total_tokens": 669856} {"current_steps": 3125, "total_steps": 3150, "loss": 0.0, "lr": 1.0375768769668725e-08, "epoch": 9.920634920634921, "percentage": 99.21, "elapsed_time": "0:05:26", "remaining_time": "0:00:02", "throughput": 2057.12, "total_tokens": 670928} {"current_steps": 3130, "total_steps": 3150, "loss": 0.0, "lr": 6.768970513457151e-09, "epoch": 9.936507936507937, "percentage": 99.37, "elapsed_time": "0:05:26", "remaining_time": "0:00:02", "throughput": 2057.79, "total_tokens": 672000} {"current_steps": 3135, "total_steps": 3150, "loss": 0.0, "lr": 3.929454098619556e-09, "epoch": 9.952380952380953, "percentage": 99.52, "elapsed_time": "0:05:26", "remaining_time": "0:00:01", "throughput": 2058.5, "total_tokens": 673088} {"current_steps": 3140, "total_steps": 3150, "loss": 0.0003, "lr": 1.8573066971339092e-09, "epoch": 9.968253968253968, "percentage": 99.68, "elapsed_time": "0:05:27", "remaining_time": "0:00:01", "throughput": 2059.1, "total_tokens": 674144} {"current_steps": 3145, "total_steps": 3150, "loss": 0.0, "lr": 5.525919230670029e-10, "epoch": 9.984126984126984, "percentage": 99.84, "elapsed_time": "0:05:27", "remaining_time": "0:00:00", "throughput": 2059.82, "total_tokens": 675216} {"current_steps": 3150, "total_steps": 3150, "loss": 0.0001, "lr": 1.534983061768358e-11, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:05:28", "remaining_time": "0:00:00", "throughput": 2060.04, "total_tokens": 676320} {"current_steps": 3150, "total_steps": 3150, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:05:29", "remaining_time": "0:00:00", "throughput": 2051.15, "total_tokens": 676320}