| {"current_steps": 5, "total_steps": 3600, "loss": 8.8223, "lr": 5.555555555555556e-07, "epoch": 0.027777777777777776, "percentage": 0.14, "elapsed_time": "0:00:02", "remaining_time": "0:28:01", "throughput": 321.51, "total_tokens": 752} | |
| {"current_steps": 10, "total_steps": 3600, "loss": 8.0585, "lr": 1.25e-06, "epoch": 0.05555555555555555, "percentage": 0.28, "elapsed_time": "0:00:02", "remaining_time": "0:16:27", "throughput": 552.57, "total_tokens": 1520} | |
| {"current_steps": 15, "total_steps": 3600, "loss": 6.6675, "lr": 1.9444444444444444e-06, "epoch": 0.08333333333333333, "percentage": 0.42, "elapsed_time": "0:00:03", "remaining_time": "0:12:35", "throughput": 733.45, "total_tokens": 2320} | |
| {"current_steps": 20, "total_steps": 3600, "loss": 5.3834, "lr": 2.638888888888889e-06, "epoch": 0.1111111111111111, "percentage": 0.56, "elapsed_time": "0:00:03", "remaining_time": "0:10:39", "throughput": 859.66, "total_tokens": 3072} | |
| {"current_steps": 25, "total_steps": 3600, "loss": 3.9719, "lr": 3.3333333333333333e-06, "epoch": 0.1388888888888889, "percentage": 0.69, "elapsed_time": "0:00:03", "remaining_time": "0:09:29", "throughput": 963.76, "total_tokens": 3840} | |
| {"current_steps": 30, "total_steps": 3600, "loss": 2.8677, "lr": 4.027777777777779e-06, "epoch": 0.16666666666666666, "percentage": 0.83, "elapsed_time": "0:00:04", "remaining_time": "0:08:42", "throughput": 1041.43, "total_tokens": 4576} | |
| {"current_steps": 35, "total_steps": 3600, "loss": 1.9241, "lr": 4.722222222222222e-06, "epoch": 0.19444444444444445, "percentage": 0.97, "elapsed_time": "0:00:04", "remaining_time": "0:08:09", "throughput": 1109.04, "total_tokens": 5328} | |
| {"current_steps": 40, "total_steps": 3600, "loss": 1.0376, "lr": 5.416666666666667e-06, "epoch": 0.2222222222222222, "percentage": 1.11, "elapsed_time": "0:00:05", "remaining_time": "0:07:43", "throughput": 1172.35, "total_tokens": 6112} | |
| {"current_steps": 45, "total_steps": 3600, "loss": 0.52, "lr": 6.111111111111111e-06, "epoch": 0.25, "percentage": 1.25, "elapsed_time": "0:00:05", "remaining_time": "0:07:24", "throughput": 1217.89, "total_tokens": 6848} | |
| {"current_steps": 50, "total_steps": 3600, "loss": 0.4276, "lr": 6.805555555555556e-06, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:06", "remaining_time": "0:07:08", "throughput": 1258.56, "total_tokens": 7600} | |
| {"current_steps": 55, "total_steps": 3600, "loss": 0.2801, "lr": 7.5e-06, "epoch": 0.3055555555555556, "percentage": 1.53, "elapsed_time": "0:00:06", "remaining_time": "0:06:55", "throughput": 1297.97, "total_tokens": 8368} | |
| {"current_steps": 60, "total_steps": 3600, "loss": 0.1757, "lr": 8.194444444444445e-06, "epoch": 0.3333333333333333, "percentage": 1.67, "elapsed_time": "0:00:06", "remaining_time": "0:06:44", "throughput": 1334.82, "total_tokens": 9152} | |
| {"current_steps": 65, "total_steps": 3600, "loss": 0.4626, "lr": 8.88888888888889e-06, "epoch": 0.3611111111111111, "percentage": 1.81, "elapsed_time": "0:00:07", "remaining_time": "0:06:35", "throughput": 1360.86, "total_tokens": 9888} | |
| {"current_steps": 70, "total_steps": 3600, "loss": 0.5824, "lr": 9.583333333333334e-06, "epoch": 0.3888888888888889, "percentage": 1.94, "elapsed_time": "0:00:07", "remaining_time": "0:06:27", "throughput": 1388.35, "total_tokens": 10656} | |
| {"current_steps": 75, "total_steps": 3600, "loss": 0.3283, "lr": 1.0277777777777777e-05, "epoch": 0.4166666666666667, "percentage": 2.08, "elapsed_time": "0:00:08", "remaining_time": "0:06:20", "throughput": 1410.83, "total_tokens": 11408} | |
| {"current_steps": 80, "total_steps": 3600, "loss": 0.3072, "lr": 1.0972222222222223e-05, "epoch": 0.4444444444444444, "percentage": 2.22, "elapsed_time": "0:00:08", "remaining_time": "0:06:13", "throughput": 1429.4, "total_tokens": 12144} | |
| {"current_steps": 85, "total_steps": 3600, "loss": 0.2421, "lr": 1.1666666666666668e-05, "epoch": 0.4722222222222222, "percentage": 2.36, "elapsed_time": "0:00:08", "remaining_time": "0:06:08", "throughput": 1446.07, "total_tokens": 12880} | |
| {"current_steps": 90, "total_steps": 3600, "loss": 0.2412, "lr": 1.2361111111111112e-05, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:09", "remaining_time": "0:06:03", "throughput": 1465.95, "total_tokens": 13664} | |
| {"current_steps": 95, "total_steps": 3600, "loss": 0.4789, "lr": 1.3055555555555557e-05, "epoch": 0.5277777777777778, "percentage": 2.64, "elapsed_time": "0:00:09", "remaining_time": "0:05:59", "throughput": 1485.8, "total_tokens": 14464} | |
| {"current_steps": 100, "total_steps": 3600, "loss": 0.2502, "lr": 1.3750000000000002e-05, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:00:10", "remaining_time": "0:05:55", "throughput": 1499.25, "total_tokens": 15216} | |
| {"current_steps": 105, "total_steps": 3600, "loss": 0.2588, "lr": 1.4444444444444444e-05, "epoch": 0.5833333333333334, "percentage": 2.92, "elapsed_time": "0:00:10", "remaining_time": "0:05:51", "throughput": 1511.94, "total_tokens": 15984} | |
| {"current_steps": 110, "total_steps": 3600, "loss": 0.3088, "lr": 1.5138888888888888e-05, "epoch": 0.6111111111111112, "percentage": 3.06, "elapsed_time": "0:00:10", "remaining_time": "0:05:48", "throughput": 1526.59, "total_tokens": 16768} | |
| {"current_steps": 115, "total_steps": 3600, "loss": 0.2734, "lr": 1.5833333333333333e-05, "epoch": 0.6388888888888888, "percentage": 3.19, "elapsed_time": "0:00:11", "remaining_time": "0:05:45", "throughput": 1538.23, "total_tokens": 17552} | |
| {"current_steps": 120, "total_steps": 3600, "loss": 0.2656, "lr": 1.6527777777777777e-05, "epoch": 0.6666666666666666, "percentage": 3.33, "elapsed_time": "0:00:11", "remaining_time": "0:05:42", "throughput": 1547.76, "total_tokens": 18304} | |
| {"current_steps": 125, "total_steps": 3600, "loss": 0.23, "lr": 1.7222222222222224e-05, "epoch": 0.6944444444444444, "percentage": 3.47, "elapsed_time": "0:00:12", "remaining_time": "0:05:40", "throughput": 1558.52, "total_tokens": 19072} | |
| {"current_steps": 130, "total_steps": 3600, "loss": 0.2338, "lr": 1.7916666666666667e-05, "epoch": 0.7222222222222222, "percentage": 3.61, "elapsed_time": "0:00:12", "remaining_time": "0:05:37", "throughput": 1567.99, "total_tokens": 19840} | |
| {"current_steps": 135, "total_steps": 3600, "loss": 0.3357, "lr": 1.861111111111111e-05, "epoch": 0.75, "percentage": 3.75, "elapsed_time": "0:00:13", "remaining_time": "0:05:35", "throughput": 1579.97, "total_tokens": 20640} | |
| {"current_steps": 140, "total_steps": 3600, "loss": 0.3484, "lr": 1.9305555555555558e-05, "epoch": 0.7777777777777778, "percentage": 3.89, "elapsed_time": "0:00:13", "remaining_time": "0:05:33", "throughput": 1588.84, "total_tokens": 21408} | |
| {"current_steps": 145, "total_steps": 3600, "loss": 0.2401, "lr": 2e-05, "epoch": 0.8055555555555556, "percentage": 4.03, "elapsed_time": "0:00:13", "remaining_time": "0:05:30", "throughput": 1593.42, "total_tokens": 22128} | |
| {"current_steps": 150, "total_steps": 3600, "loss": 0.2147, "lr": 2.0694444444444445e-05, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:00:14", "remaining_time": "0:05:28", "throughput": 1600.25, "total_tokens": 22880} | |
| {"current_steps": 155, "total_steps": 3600, "loss": 0.2482, "lr": 2.138888888888889e-05, "epoch": 0.8611111111111112, "percentage": 4.31, "elapsed_time": "0:00:14", "remaining_time": "0:05:26", "throughput": 1608.85, "total_tokens": 23664} | |
| {"current_steps": 160, "total_steps": 3600, "loss": 0.2706, "lr": 2.2083333333333333e-05, "epoch": 0.8888888888888888, "percentage": 4.44, "elapsed_time": "0:00:15", "remaining_time": "0:05:25", "throughput": 1615.94, "total_tokens": 24432} | |
| {"current_steps": 165, "total_steps": 3600, "loss": 0.2638, "lr": 2.277777777777778e-05, "epoch": 0.9166666666666666, "percentage": 4.58, "elapsed_time": "0:00:15", "remaining_time": "0:05:23", "throughput": 1621.02, "total_tokens": 25184} | |
| {"current_steps": 170, "total_steps": 3600, "loss": 0.2324, "lr": 2.3472222222222223e-05, "epoch": 0.9444444444444444, "percentage": 4.72, "elapsed_time": "0:00:15", "remaining_time": "0:05:21", "throughput": 1625.36, "total_tokens": 25920} | |
| {"current_steps": 175, "total_steps": 3600, "loss": 0.2318, "lr": 2.4166666666666667e-05, "epoch": 0.9722222222222222, "percentage": 4.86, "elapsed_time": "0:00:16", "remaining_time": "0:05:19", "throughput": 1631.59, "total_tokens": 26672} | |
| {"current_steps": 180, "total_steps": 3600, "loss": 0.2307, "lr": 2.4861111111111114e-05, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:16", "remaining_time": "0:05:19", "throughput": 1630.72, "total_tokens": 27408} | |
| {"current_steps": 180, "total_steps": 3600, "eval_loss": 0.24429556727409363, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:17", "remaining_time": "0:05:35", "throughput": 1552.91, "total_tokens": 27408} | |
| {"current_steps": 185, "total_steps": 3600, "loss": 0.2288, "lr": 2.5555555555555554e-05, "epoch": 1.0277777777777777, "percentage": 5.14, "elapsed_time": "0:00:19", "remaining_time": "0:05:58", "throughput": 1448.97, "total_tokens": 28176} | |
| {"current_steps": 190, "total_steps": 3600, "loss": 0.2356, "lr": 2.625e-05, "epoch": 1.0555555555555556, "percentage": 5.28, "elapsed_time": "0:00:19", "remaining_time": "0:05:57", "throughput": 1452.15, "total_tokens": 28928} | |
| {"current_steps": 195, "total_steps": 3600, "loss": 0.2577, "lr": 2.6944444444444445e-05, "epoch": 1.0833333333333333, "percentage": 5.42, "elapsed_time": "0:00:20", "remaining_time": "0:05:55", "throughput": 1457.06, "total_tokens": 29696} | |
| {"current_steps": 200, "total_steps": 3600, "loss": 0.2403, "lr": 2.7638888888888892e-05, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:00:20", "remaining_time": "0:05:54", "throughput": 1462.11, "total_tokens": 30448} | |
| {"current_steps": 205, "total_steps": 3600, "loss": 0.257, "lr": 2.8333333333333335e-05, "epoch": 1.1388888888888888, "percentage": 5.69, "elapsed_time": "0:00:21", "remaining_time": "0:05:51", "throughput": 1468.82, "total_tokens": 31200} | |
| {"current_steps": 210, "total_steps": 3600, "loss": 0.2328, "lr": 2.9027777777777782e-05, "epoch": 1.1666666666666667, "percentage": 5.83, "elapsed_time": "0:00:21", "remaining_time": "0:05:50", "throughput": 1472.52, "total_tokens": 31936} | |
| {"current_steps": 215, "total_steps": 3600, "loss": 0.2362, "lr": 2.9722222222222223e-05, "epoch": 1.1944444444444444, "percentage": 5.97, "elapsed_time": "0:00:22", "remaining_time": "0:05:48", "throughput": 1479.06, "total_tokens": 32704} | |
| {"current_steps": 220, "total_steps": 3600, "loss": 0.2375, "lr": 3.0416666666666666e-05, "epoch": 1.2222222222222223, "percentage": 6.11, "elapsed_time": "0:00:22", "remaining_time": "0:05:46", "throughput": 1482.46, "total_tokens": 33440} | |
| {"current_steps": 225, "total_steps": 3600, "loss": 0.2384, "lr": 3.111111111111111e-05, "epoch": 1.25, "percentage": 6.25, "elapsed_time": "0:00:22", "remaining_time": "0:05:44", "throughput": 1488.41, "total_tokens": 34192} | |
| {"current_steps": 230, "total_steps": 3600, "loss": 0.2388, "lr": 3.180555555555556e-05, "epoch": 1.2777777777777777, "percentage": 6.39, "elapsed_time": "0:00:23", "remaining_time": "0:05:42", "throughput": 1495.1, "total_tokens": 34960} | |
| {"current_steps": 235, "total_steps": 3600, "loss": 0.2736, "lr": 3.2500000000000004e-05, "epoch": 1.3055555555555556, "percentage": 6.53, "elapsed_time": "0:00:23", "remaining_time": "0:05:40", "throughput": 1501.55, "total_tokens": 35728} | |
| {"current_steps": 240, "total_steps": 3600, "loss": 0.2352, "lr": 3.3194444444444444e-05, "epoch": 1.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:24", "remaining_time": "0:05:38", "throughput": 1506.51, "total_tokens": 36464} | |
| {"current_steps": 245, "total_steps": 3600, "loss": 0.2393, "lr": 3.388888888888889e-05, "epoch": 1.3611111111111112, "percentage": 6.81, "elapsed_time": "0:00:24", "remaining_time": "0:05:37", "throughput": 1511.97, "total_tokens": 37216} | |
| {"current_steps": 250, "total_steps": 3600, "loss": 0.2299, "lr": 3.458333333333333e-05, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:00:25", "remaining_time": "0:05:35", "throughput": 1518.49, "total_tokens": 38000} | |
| {"current_steps": 255, "total_steps": 3600, "loss": 0.275, "lr": 3.527777777777778e-05, "epoch": 1.4166666666666667, "percentage": 7.08, "elapsed_time": "0:00:25", "remaining_time": "0:05:33", "throughput": 1525.49, "total_tokens": 38800} | |
| {"current_steps": 260, "total_steps": 3600, "loss": 0.2884, "lr": 3.5972222222222225e-05, "epoch": 1.4444444444444444, "percentage": 7.22, "elapsed_time": "0:00:25", "remaining_time": "0:05:32", "throughput": 1531.62, "total_tokens": 39584} | |
| {"current_steps": 265, "total_steps": 3600, "loss": 0.2382, "lr": 3.6666666666666666e-05, "epoch": 1.4722222222222223, "percentage": 7.36, "elapsed_time": "0:00:26", "remaining_time": "0:05:30", "throughput": 1536.96, "total_tokens": 40352} | |
| {"current_steps": 270, "total_steps": 3600, "loss": 0.2257, "lr": 3.736111111111111e-05, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:26", "remaining_time": "0:05:28", "throughput": 1542.14, "total_tokens": 41120} | |
| {"current_steps": 275, "total_steps": 3600, "loss": 0.2836, "lr": 3.805555555555555e-05, "epoch": 1.5277777777777777, "percentage": 7.64, "elapsed_time": "0:00:27", "remaining_time": "0:05:27", "throughput": 1545.64, "total_tokens": 41856} | |
| {"current_steps": 280, "total_steps": 3600, "loss": 0.8554, "lr": 3.875e-05, "epoch": 1.5555555555555556, "percentage": 7.78, "elapsed_time": "0:00:27", "remaining_time": "0:05:25", "throughput": 1551.01, "total_tokens": 42640} | |
| {"current_steps": 285, "total_steps": 3600, "loss": 3.143, "lr": 3.944444444444445e-05, "epoch": 1.5833333333333335, "percentage": 7.92, "elapsed_time": "0:00:27", "remaining_time": "0:05:24", "throughput": 1555.02, "total_tokens": 43392} | |
| {"current_steps": 290, "total_steps": 3600, "loss": 0.3615, "lr": 4.0138888888888894e-05, "epoch": 1.6111111111111112, "percentage": 8.06, "elapsed_time": "0:00:28", "remaining_time": "0:05:23", "throughput": 1559.98, "total_tokens": 44176} | |
| {"current_steps": 295, "total_steps": 3600, "loss": 0.2289, "lr": 4.0833333333333334e-05, "epoch": 1.6388888888888888, "percentage": 8.19, "elapsed_time": "0:00:28", "remaining_time": "0:05:21", "throughput": 1563.49, "total_tokens": 44928} | |
| {"current_steps": 300, "total_steps": 3600, "loss": 0.2271, "lr": 4.152777777777778e-05, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:29", "remaining_time": "0:05:20", "throughput": 1567.68, "total_tokens": 45696} | |
| {"current_steps": 305, "total_steps": 3600, "loss": 0.2477, "lr": 4.222222222222222e-05, "epoch": 1.6944444444444444, "percentage": 8.47, "elapsed_time": "0:00:29", "remaining_time": "0:05:19", "throughput": 1571.93, "total_tokens": 46464} | |
| {"current_steps": 310, "total_steps": 3600, "loss": 0.2669, "lr": 4.291666666666667e-05, "epoch": 1.7222222222222223, "percentage": 8.61, "elapsed_time": "0:00:29", "remaining_time": "0:05:18", "throughput": 1575.51, "total_tokens": 47216} | |
| {"current_steps": 315, "total_steps": 3600, "loss": 0.2423, "lr": 4.3611111111111116e-05, "epoch": 1.75, "percentage": 8.75, "elapsed_time": "0:00:30", "remaining_time": "0:05:16", "throughput": 1579.09, "total_tokens": 47984} | |
| {"current_steps": 320, "total_steps": 3600, "loss": 0.2228, "lr": 4.4305555555555556e-05, "epoch": 1.7777777777777777, "percentage": 8.89, "elapsed_time": "0:00:30", "remaining_time": "0:05:15", "throughput": 1581.73, "total_tokens": 48720} | |
| {"current_steps": 325, "total_steps": 3600, "loss": 0.2698, "lr": 4.5e-05, "epoch": 1.8055555555555556, "percentage": 9.03, "elapsed_time": "0:00:31", "remaining_time": "0:05:14", "throughput": 1584.3, "total_tokens": 49472} | |
| {"current_steps": 330, "total_steps": 3600, "loss": 0.2317, "lr": 4.569444444444444e-05, "epoch": 1.8333333333333335, "percentage": 9.17, "elapsed_time": "0:00:31", "remaining_time": "0:05:13", "throughput": 1587.28, "total_tokens": 50224} | |
| {"current_steps": 335, "total_steps": 3600, "loss": 0.2387, "lr": 4.638888888888889e-05, "epoch": 1.8611111111111112, "percentage": 9.31, "elapsed_time": "0:00:32", "remaining_time": "0:05:12", "throughput": 1588.75, "total_tokens": 50944} | |
| {"current_steps": 340, "total_steps": 3600, "loss": 0.264, "lr": 4.708333333333334e-05, "epoch": 1.8888888888888888, "percentage": 9.44, "elapsed_time": "0:00:32", "remaining_time": "0:05:11", "throughput": 1591.54, "total_tokens": 51696} | |
| {"current_steps": 345, "total_steps": 3600, "loss": 0.3239, "lr": 4.7777777777777784e-05, "epoch": 1.9166666666666665, "percentage": 9.58, "elapsed_time": "0:00:32", "remaining_time": "0:05:10", "throughput": 1595.1, "total_tokens": 52480} | |
| {"current_steps": 350, "total_steps": 3600, "loss": 0.2497, "lr": 4.8472222222222224e-05, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:00:33", "remaining_time": "0:05:09", "throughput": 1598.1, "total_tokens": 53232} | |
| {"current_steps": 355, "total_steps": 3600, "loss": 0.2366, "lr": 4.9166666666666665e-05, "epoch": 1.9722222222222223, "percentage": 9.86, "elapsed_time": "0:00:33", "remaining_time": "0:05:08", "throughput": 1602.19, "total_tokens": 54016} | |
| {"current_steps": 360, "total_steps": 3600, "loss": 0.2399, "lr": 4.986111111111111e-05, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:34", "remaining_time": "0:05:07", "throughput": 1601.88, "total_tokens": 54752} | |
| {"current_steps": 360, "total_steps": 3600, "eval_loss": 0.23347768187522888, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:35", "remaining_time": "0:05:16", "throughput": 1556.58, "total_tokens": 54752} | |
| {"current_steps": 365, "total_steps": 3600, "loss": 0.2378, "lr": 4.99998119647914e-05, "epoch": 2.0277777777777777, "percentage": 10.14, "elapsed_time": "0:00:37", "remaining_time": "0:05:29", "throughput": 1493.61, "total_tokens": 55520} | |
| {"current_steps": 370, "total_steps": 3600, "loss": 0.2401, "lr": 4.999904807660428e-05, "epoch": 2.0555555555555554, "percentage": 10.28, "elapsed_time": "0:00:37", "remaining_time": "0:05:28", "throughput": 1497.28, "total_tokens": 56288} | |
| {"current_steps": 375, "total_steps": 3600, "loss": 0.2284, "lr": 4.999769660117901e-05, "epoch": 2.0833333333333335, "percentage": 10.42, "elapsed_time": "0:00:38", "remaining_time": "0:05:27", "throughput": 1499.5, "total_tokens": 57040} | |
| {"current_steps": 380, "total_steps": 3600, "loss": 0.2262, "lr": 4.999575757028119e-05, "epoch": 2.111111111111111, "percentage": 10.56, "elapsed_time": "0:00:38", "remaining_time": "0:05:25", "throughput": 1502.81, "total_tokens": 57792} | |
| {"current_steps": 385, "total_steps": 3600, "loss": 0.2531, "lr": 4.9993231029486544e-05, "epoch": 2.138888888888889, "percentage": 10.69, "elapsed_time": "0:00:38", "remaining_time": "0:05:24", "throughput": 1507.09, "total_tokens": 58576} | |
| {"current_steps": 390, "total_steps": 3600, "loss": 0.2609, "lr": 4.999011703817986e-05, "epoch": 2.1666666666666665, "percentage": 10.83, "elapsed_time": "0:00:39", "remaining_time": "0:05:23", "throughput": 1510.7, "total_tokens": 59344} | |
| {"current_steps": 395, "total_steps": 3600, "loss": 0.2575, "lr": 4.9986415669553586e-05, "epoch": 2.1944444444444446, "percentage": 10.97, "elapsed_time": "0:00:39", "remaining_time": "0:05:22", "throughput": 1514.33, "total_tokens": 60112} | |
| {"current_steps": 400, "total_steps": 3600, "loss": 0.215, "lr": 4.998212701060612e-05, "epoch": 2.2222222222222223, "percentage": 11.11, "elapsed_time": "0:00:40", "remaining_time": "0:05:20", "throughput": 1518.24, "total_tokens": 60896} | |
| {"current_steps": 405, "total_steps": 3600, "loss": 0.2506, "lr": 4.997725116213973e-05, "epoch": 2.25, "percentage": 11.25, "elapsed_time": "0:00:40", "remaining_time": "0:05:19", "throughput": 1521.07, "total_tokens": 61648} | |
| {"current_steps": 410, "total_steps": 3600, "loss": 0.241, "lr": 4.997178823875826e-05, "epoch": 2.2777777777777777, "percentage": 11.39, "elapsed_time": "0:00:40", "remaining_time": "0:05:18", "throughput": 1524.1, "total_tokens": 62400} | |
| {"current_steps": 415, "total_steps": 3600, "loss": 0.2412, "lr": 4.996573836886435e-05, "epoch": 2.3055555555555554, "percentage": 11.53, "elapsed_time": "0:00:41", "remaining_time": "0:05:17", "throughput": 1526.5, "total_tokens": 63136} | |
| {"current_steps": 420, "total_steps": 3600, "loss": 0.23, "lr": 4.995910169465646e-05, "epoch": 2.3333333333333335, "percentage": 11.67, "elapsed_time": "0:00:41", "remaining_time": "0:05:16", "throughput": 1529.31, "total_tokens": 63888} | |
| {"current_steps": 425, "total_steps": 3600, "loss": 0.2064, "lr": 4.9951878372125547e-05, "epoch": 2.361111111111111, "percentage": 11.81, "elapsed_time": "0:00:42", "remaining_time": "0:05:15", "throughput": 1531.37, "total_tokens": 64624} | |
| {"current_steps": 430, "total_steps": 3600, "loss": 0.2645, "lr": 4.994406857105136e-05, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:42", "remaining_time": "0:05:14", "throughput": 1533.97, "total_tokens": 65376} | |
| {"current_steps": 435, "total_steps": 3600, "loss": 0.2689, "lr": 4.993567247499845e-05, "epoch": 2.4166666666666665, "percentage": 12.08, "elapsed_time": "0:00:43", "remaining_time": "0:05:13", "throughput": 1536.18, "total_tokens": 66112} | |
| {"current_steps": 440, "total_steps": 3600, "loss": 0.2355, "lr": 4.9926690281311904e-05, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:43", "remaining_time": "0:05:12", "throughput": 1539.5, "total_tokens": 66896} | |
| {"current_steps": 445, "total_steps": 3600, "loss": 0.2409, "lr": 4.9917122201112656e-05, "epoch": 2.4722222222222223, "percentage": 12.36, "elapsed_time": "0:00:43", "remaining_time": "0:05:10", "throughput": 1542.6, "total_tokens": 67664} | |
| {"current_steps": 450, "total_steps": 3600, "loss": 0.204, "lr": 4.9906968459292524e-05, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:44", "remaining_time": "0:05:09", "throughput": 1545.63, "total_tokens": 68432} | |
| {"current_steps": 455, "total_steps": 3600, "loss": 0.2514, "lr": 4.9896229294508976e-05, "epoch": 2.5277777777777777, "percentage": 12.64, "elapsed_time": "0:00:44", "remaining_time": "0:05:08", "throughput": 1547.32, "total_tokens": 69152} | |
| {"current_steps": 460, "total_steps": 3600, "loss": 0.2615, "lr": 4.988490495917947e-05, "epoch": 2.5555555555555554, "percentage": 12.78, "elapsed_time": "0:00:45", "remaining_time": "0:05:07", "throughput": 1550.58, "total_tokens": 69936} | |
| {"current_steps": 465, "total_steps": 3600, "loss": 0.2344, "lr": 4.987299571947553e-05, "epoch": 2.5833333333333335, "percentage": 12.92, "elapsed_time": "0:00:45", "remaining_time": "0:05:06", "throughput": 1553.2, "total_tokens": 70704} | |
| {"current_steps": 470, "total_steps": 3600, "loss": 0.2223, "lr": 4.9860501855316514e-05, "epoch": 2.611111111111111, "percentage": 13.06, "elapsed_time": "0:00:45", "remaining_time": "0:05:05", "throughput": 1556.16, "total_tokens": 71488} | |
| {"current_steps": 475, "total_steps": 3600, "loss": 0.2689, "lr": 4.9847423660363e-05, "epoch": 2.638888888888889, "percentage": 13.19, "elapsed_time": "0:00:46", "remaining_time": "0:05:04", "throughput": 1558.07, "total_tokens": 72224} | |
| {"current_steps": 480, "total_steps": 3600, "loss": 0.242, "lr": 4.983376144200992e-05, "epoch": 2.6666666666666665, "percentage": 13.33, "elapsed_time": "0:00:46", "remaining_time": "0:05:03", "throughput": 1560.44, "total_tokens": 72976} | |
| {"current_steps": 485, "total_steps": 3600, "loss": 0.233, "lr": 4.981951552137929e-05, "epoch": 2.6944444444444446, "percentage": 13.47, "elapsed_time": "0:00:47", "remaining_time": "0:05:03", "throughput": 1564.14, "total_tokens": 73792} | |
| {"current_steps": 490, "total_steps": 3600, "loss": 0.2351, "lr": 4.980468623331273e-05, "epoch": 2.7222222222222223, "percentage": 13.61, "elapsed_time": "0:00:47", "remaining_time": "0:05:02", "throughput": 1566.62, "total_tokens": 74560} | |
| {"current_steps": 495, "total_steps": 3600, "loss": 0.2396, "lr": 4.978927392636351e-05, "epoch": 2.75, "percentage": 13.75, "elapsed_time": "0:00:48", "remaining_time": "0:05:01", "throughput": 1569.16, "total_tokens": 75328} | |
| {"current_steps": 500, "total_steps": 3600, "loss": 0.2568, "lr": 4.9773278962788436e-05, "epoch": 2.7777777777777777, "percentage": 13.89, "elapsed_time": "0:00:48", "remaining_time": "0:05:00", "throughput": 1571.39, "total_tokens": 76080} | |
| {"current_steps": 505, "total_steps": 3600, "loss": 0.2442, "lr": 4.975670171853926e-05, "epoch": 2.8055555555555554, "percentage": 14.03, "elapsed_time": "0:00:48", "remaining_time": "0:04:59", "throughput": 1573.76, "total_tokens": 76848} | |
| {"current_steps": 510, "total_steps": 3600, "loss": 0.2304, "lr": 4.973954258325392e-05, "epoch": 2.8333333333333335, "percentage": 14.17, "elapsed_time": "0:00:49", "remaining_time": "0:04:58", "throughput": 1576.03, "total_tokens": 77616} | |
| {"current_steps": 515, "total_steps": 3600, "loss": 0.222, "lr": 4.972180196024733e-05, "epoch": 2.861111111111111, "percentage": 14.31, "elapsed_time": "0:00:49", "remaining_time": "0:04:57", "throughput": 1578.27, "total_tokens": 78384} | |
| {"current_steps": 520, "total_steps": 3600, "loss": 0.2374, "lr": 4.97034802665019e-05, "epoch": 2.888888888888889, "percentage": 14.44, "elapsed_time": "0:00:50", "remaining_time": "0:04:56", "throughput": 1580.31, "total_tokens": 79136} | |
| {"current_steps": 525, "total_steps": 3600, "loss": 0.2284, "lr": 4.9684577932657786e-05, "epoch": 2.9166666666666665, "percentage": 14.58, "elapsed_time": "0:00:50", "remaining_time": "0:04:55", "throughput": 1582.98, "total_tokens": 79920} | |
| {"current_steps": 530, "total_steps": 3600, "loss": 0.2185, "lr": 4.966509540300269e-05, "epoch": 2.9444444444444446, "percentage": 14.72, "elapsed_time": "0:00:50", "remaining_time": "0:04:54", "throughput": 1584.91, "total_tokens": 80672} | |
| {"current_steps": 535, "total_steps": 3600, "loss": 0.2737, "lr": 4.9645033135461494e-05, "epoch": 2.9722222222222223, "percentage": 14.86, "elapsed_time": "0:00:51", "remaining_time": "0:04:53", "throughput": 1587.46, "total_tokens": 81440} | |
| {"current_steps": 540, "total_steps": 3600, "loss": 0.2437, "lr": 4.962439160158544e-05, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:51", "remaining_time": "0:04:53", "throughput": 1587.45, "total_tokens": 82176} | |
| {"current_steps": 540, "total_steps": 3600, "eval_loss": 0.23567190766334534, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:52", "remaining_time": "0:04:58", "throughput": 1561.72, "total_tokens": 82176} | |
| {"current_steps": 545, "total_steps": 3600, "loss": 0.2396, "lr": 4.960317128654108e-05, "epoch": 3.0277777777777777, "percentage": 15.14, "elapsed_time": "0:00:54", "remaining_time": "0:05:04", "throughput": 1525.18, "total_tokens": 82944} | |
| {"current_steps": 550, "total_steps": 3600, "loss": 0.2521, "lr": 4.958137268909887e-05, "epoch": 3.0555555555555554, "percentage": 15.28, "elapsed_time": "0:00:54", "remaining_time": "0:05:03", "throughput": 1527.12, "total_tokens": 83712} | |
| {"current_steps": 555, "total_steps": 3600, "loss": 0.2297, "lr": 4.9558996321621405e-05, "epoch": 3.0833333333333335, "percentage": 15.42, "elapsed_time": "0:00:55", "remaining_time": "0:05:03", "throughput": 1529.68, "total_tokens": 84496} | |
| {"current_steps": 560, "total_steps": 3600, "loss": 0.2335, "lr": 4.953604271005144e-05, "epoch": 3.111111111111111, "percentage": 15.56, "elapsed_time": "0:00:55", "remaining_time": "0:05:02", "throughput": 1532.45, "total_tokens": 85280} | |
| {"current_steps": 565, "total_steps": 3600, "loss": 0.2434, "lr": 4.951251239389948e-05, "epoch": 3.138888888888889, "percentage": 15.69, "elapsed_time": "0:00:56", "remaining_time": "0:05:01", "throughput": 1534.3, "total_tokens": 86016} | |
| {"current_steps": 570, "total_steps": 3600, "loss": 0.2308, "lr": 4.9488405926231144e-05, "epoch": 3.1666666666666665, "percentage": 15.83, "elapsed_time": "0:00:56", "remaining_time": "0:05:00", "throughput": 1536.93, "total_tokens": 86800} | |
| {"current_steps": 575, "total_steps": 3600, "loss": 0.2427, "lr": 4.946372387365409e-05, "epoch": 3.1944444444444446, "percentage": 15.97, "elapsed_time": "0:00:56", "remaining_time": "0:04:59", "throughput": 1539.33, "total_tokens": 87568} | |
| {"current_steps": 580, "total_steps": 3600, "loss": 0.2317, "lr": 4.943846681630479e-05, "epoch": 3.2222222222222223, "percentage": 16.11, "elapsed_time": "0:00:57", "remaining_time": "0:04:58", "throughput": 1541.32, "total_tokens": 88320} | |
| {"current_steps": 585, "total_steps": 3600, "loss": 0.2285, "lr": 4.941263534783482e-05, "epoch": 3.25, "percentage": 16.25, "elapsed_time": "0:00:57", "remaining_time": "0:04:57", "throughput": 1543.38, "total_tokens": 89072} | |
| {"current_steps": 590, "total_steps": 3600, "loss": 0.2165, "lr": 4.9386230075396964e-05, "epoch": 3.2777777777777777, "percentage": 16.39, "elapsed_time": "0:00:58", "remaining_time": "0:04:56", "throughput": 1544.86, "total_tokens": 89792} | |
| {"current_steps": 595, "total_steps": 3600, "loss": 0.2417, "lr": 4.9359251619630886e-05, "epoch": 3.3055555555555554, "percentage": 16.53, "elapsed_time": "0:00:58", "remaining_time": "0:04:55", "throughput": 1547.43, "total_tokens": 90576} | |
| {"current_steps": 600, "total_steps": 3600, "loss": 0.2425, "lr": 4.933170061464858e-05, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:58", "remaining_time": "0:04:54", "throughput": 1549.97, "total_tokens": 91360} | |
| {"current_steps": 605, "total_steps": 3600, "loss": 0.1958, "lr": 4.930357770801947e-05, "epoch": 3.361111111111111, "percentage": 16.81, "elapsed_time": "0:00:59", "remaining_time": "0:04:53", "throughput": 1552.19, "total_tokens": 92128} | |
| {"current_steps": 610, "total_steps": 3600, "loss": 0.2538, "lr": 4.9274883560755156e-05, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:00:59", "remaining_time": "0:04:52", "throughput": 1554.66, "total_tokens": 92912} | |
| {"current_steps": 615, "total_steps": 3600, "loss": 0.2223, "lr": 4.924561884729391e-05, "epoch": 3.4166666666666665, "percentage": 17.08, "elapsed_time": "0:01:00", "remaining_time": "0:04:52", "throughput": 1556.15, "total_tokens": 93648} | |
| {"current_steps": 620, "total_steps": 3600, "loss": 0.2504, "lr": 4.921578425548482e-05, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:01:00", "remaining_time": "0:04:51", "throughput": 1558.06, "total_tokens": 94416} | |
| {"current_steps": 625, "total_steps": 3600, "loss": 0.2311, "lr": 4.9185380486571595e-05, "epoch": 3.4722222222222223, "percentage": 17.36, "elapsed_time": "0:01:01", "remaining_time": "0:04:50", "throughput": 1559.95, "total_tokens": 95168} | |
| {"current_steps": 630, "total_steps": 3600, "loss": 0.2216, "lr": 4.915440825517612e-05, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:01:01", "remaining_time": "0:04:50", "throughput": 1555.93, "total_tokens": 95936} | |
| {"current_steps": 635, "total_steps": 3600, "loss": 0.2187, "lr": 4.912286828928162e-05, "epoch": 3.5277777777777777, "percentage": 17.64, "elapsed_time": "0:01:02", "remaining_time": "0:04:49", "throughput": 1557.71, "total_tokens": 96688} | |
| {"current_steps": 640, "total_steps": 3600, "loss": 0.2402, "lr": 4.909076133021557e-05, "epoch": 3.5555555555555554, "percentage": 17.78, "elapsed_time": "0:01:02", "remaining_time": "0:04:49", "throughput": 1559.35, "total_tokens": 97456} | |
| {"current_steps": 645, "total_steps": 3600, "loss": 0.2395, "lr": 4.9058088132632306e-05, "epoch": 3.5833333333333335, "percentage": 17.92, "elapsed_time": "0:01:02", "remaining_time": "0:04:48", "throughput": 1561.02, "total_tokens": 98208} | |
| {"current_steps": 650, "total_steps": 3600, "loss": 0.2456, "lr": 4.9024849464495215e-05, "epoch": 3.611111111111111, "percentage": 18.06, "elapsed_time": "0:01:03", "remaining_time": "0:04:47", "throughput": 1562.23, "total_tokens": 98944} | |
| {"current_steps": 655, "total_steps": 3600, "loss": 0.2295, "lr": 4.8991046107058735e-05, "epoch": 3.638888888888889, "percentage": 18.19, "elapsed_time": "0:01:03", "remaining_time": "0:04:46", "throughput": 1564.26, "total_tokens": 99728} | |
| {"current_steps": 660, "total_steps": 3600, "loss": 0.2403, "lr": 4.895667885484997e-05, "epoch": 3.6666666666666665, "percentage": 18.33, "elapsed_time": "0:01:04", "remaining_time": "0:04:45", "throughput": 1566.04, "total_tokens": 100496} | |
| {"current_steps": 665, "total_steps": 3600, "loss": 0.2246, "lr": 4.892174851565004e-05, "epoch": 3.6944444444444446, "percentage": 18.47, "elapsed_time": "0:01:04", "remaining_time": "0:04:45", "throughput": 1567.88, "total_tokens": 101264} | |
| {"current_steps": 670, "total_steps": 3600, "loss": 0.2396, "lr": 4.8886255910475054e-05, "epoch": 3.7222222222222223, "percentage": 18.61, "elapsed_time": "0:01:05", "remaining_time": "0:04:44", "throughput": 1569.41, "total_tokens": 102016} | |
| {"current_steps": 675, "total_steps": 3600, "loss": 0.2517, "lr": 4.885020187355687e-05, "epoch": 3.75, "percentage": 18.75, "elapsed_time": "0:01:05", "remaining_time": "0:04:43", "throughput": 1570.83, "total_tokens": 102768} | |
| {"current_steps": 680, "total_steps": 3600, "loss": 0.2297, "lr": 4.881358725232342e-05, "epoch": 3.7777777777777777, "percentage": 18.89, "elapsed_time": "0:01:05", "remaining_time": "0:04:42", "throughput": 1572.41, "total_tokens": 103520} | |
| {"current_steps": 685, "total_steps": 3600, "loss": 0.2379, "lr": 4.877641290737884e-05, "epoch": 3.8055555555555554, "percentage": 19.03, "elapsed_time": "0:01:06", "remaining_time": "0:04:41", "throughput": 1574.18, "total_tokens": 104288} | |
| {"current_steps": 690, "total_steps": 3600, "loss": 0.2403, "lr": 4.873867971248324e-05, "epoch": 3.8333333333333335, "percentage": 19.17, "elapsed_time": "0:01:06", "remaining_time": "0:04:41", "throughput": 1575.91, "total_tokens": 105056} | |
| {"current_steps": 695, "total_steps": 3600, "loss": 0.2258, "lr": 4.870038855453213e-05, "epoch": 3.861111111111111, "percentage": 19.31, "elapsed_time": "0:01:07", "remaining_time": "0:04:40", "throughput": 1577.23, "total_tokens": 105792} | |
| {"current_steps": 700, "total_steps": 3600, "loss": 0.23, "lr": 4.866154033353561e-05, "epoch": 3.888888888888889, "percentage": 19.44, "elapsed_time": "0:01:07", "remaining_time": "0:04:39", "throughput": 1578.8, "total_tokens": 106544} | |
| {"current_steps": 705, "total_steps": 3600, "loss": 0.23, "lr": 4.86221359625972e-05, "epoch": 3.9166666666666665, "percentage": 19.58, "elapsed_time": "0:01:07", "remaining_time": "0:04:38", "throughput": 1580.62, "total_tokens": 107312} | |
| {"current_steps": 710, "total_steps": 3600, "loss": 0.2158, "lr": 4.858217636789241e-05, "epoch": 3.9444444444444446, "percentage": 19.72, "elapsed_time": "0:01:08", "remaining_time": "0:04:38", "throughput": 1582.15, "total_tokens": 108064} | |
| {"current_steps": 715, "total_steps": 3600, "loss": 0.2468, "lr": 4.854166248864689e-05, "epoch": 3.9722222222222223, "percentage": 19.86, "elapsed_time": "0:01:08", "remaining_time": "0:04:37", "throughput": 1584.35, "total_tokens": 108848} | |
| {"current_steps": 720, "total_steps": 3600, "loss": 0.2275, "lr": 4.850059527711444e-05, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:09", "remaining_time": "0:04:36", "throughput": 1584.44, "total_tokens": 109584} | |
| {"current_steps": 720, "total_steps": 3600, "eval_loss": 0.23304803669452667, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:10", "remaining_time": "0:04:40", "throughput": 1565.43, "total_tokens": 109584} | |
| {"current_steps": 725, "total_steps": 3600, "loss": 0.2294, "lr": 4.84589756985546e-05, "epoch": 4.027777777777778, "percentage": 20.14, "elapsed_time": "0:01:11", "remaining_time": "0:04:45", "throughput": 1533.03, "total_tokens": 110336} | |
| {"current_steps": 730, "total_steps": 3600, "loss": 0.2314, "lr": 4.8416804731209945e-05, "epoch": 4.055555555555555, "percentage": 20.28, "elapsed_time": "0:01:12", "remaining_time": "0:04:44", "throughput": 1534.55, "total_tokens": 111104} | |
| {"current_steps": 735, "total_steps": 3600, "loss": 0.2337, "lr": 4.8374083366283096e-05, "epoch": 4.083333333333333, "percentage": 20.42, "elapsed_time": "0:01:12", "remaining_time": "0:04:43", "throughput": 1535.83, "total_tokens": 111856} | |
| {"current_steps": 740, "total_steps": 3600, "loss": 0.2443, "lr": 4.833081260791345e-05, "epoch": 4.111111111111111, "percentage": 20.56, "elapsed_time": "0:01:13", "remaining_time": "0:04:43", "throughput": 1537.48, "total_tokens": 112624} | |
| {"current_steps": 745, "total_steps": 3600, "loss": 0.2314, "lr": 4.828699347315356e-05, "epoch": 4.138888888888889, "percentage": 20.69, "elapsed_time": "0:01:13", "remaining_time": "0:04:42", "throughput": 1539.02, "total_tokens": 113376} | |
| {"current_steps": 750, "total_steps": 3600, "loss": 0.2316, "lr": 4.82426269919452e-05, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:01:14", "remaining_time": "0:04:41", "throughput": 1540.13, "total_tokens": 114096} | |
| {"current_steps": 755, "total_steps": 3600, "loss": 0.2339, "lr": 4.8197714207095205e-05, "epoch": 4.194444444444445, "percentage": 20.97, "elapsed_time": "0:01:14", "remaining_time": "0:04:40", "throughput": 1541.55, "total_tokens": 114832} | |
| {"current_steps": 760, "total_steps": 3600, "loss": 0.2317, "lr": 4.815225617425095e-05, "epoch": 4.222222222222222, "percentage": 21.11, "elapsed_time": "0:01:14", "remaining_time": "0:04:39", "throughput": 1543.77, "total_tokens": 115632} | |
| {"current_steps": 765, "total_steps": 3600, "loss": 0.2277, "lr": 4.8106253961875506e-05, "epoch": 4.25, "percentage": 21.25, "elapsed_time": "0:01:15", "remaining_time": "0:04:39", "throughput": 1544.82, "total_tokens": 116352} | |
| {"current_steps": 770, "total_steps": 3600, "loss": 0.2379, "lr": 4.805970865122257e-05, "epoch": 4.277777777777778, "percentage": 21.39, "elapsed_time": "0:01:15", "remaining_time": "0:04:38", "throughput": 1547.0, "total_tokens": 117152} | |
| {"current_steps": 775, "total_steps": 3600, "loss": 0.2341, "lr": 4.8012621336311016e-05, "epoch": 4.305555555555555, "percentage": 21.53, "elapsed_time": "0:01:16", "remaining_time": "0:04:37", "throughput": 1548.54, "total_tokens": 117904} | |
| {"current_steps": 780, "total_steps": 3600, "loss": 0.2498, "lr": 4.7964993123899195e-05, "epoch": 4.333333333333333, "percentage": 21.67, "elapsed_time": "0:01:16", "remaining_time": "0:04:36", "throughput": 1550.27, "total_tokens": 118672} | |
| {"current_steps": 785, "total_steps": 3600, "loss": 0.2342, "lr": 4.791682513345892e-05, "epoch": 4.361111111111111, "percentage": 21.81, "elapsed_time": "0:01:16", "remaining_time": "0:04:35", "throughput": 1551.76, "total_tokens": 119424} | |
| {"current_steps": 790, "total_steps": 3600, "loss": 0.2317, "lr": 4.786811849714918e-05, "epoch": 4.388888888888889, "percentage": 21.94, "elapsed_time": "0:01:17", "remaining_time": "0:04:35", "throughput": 1553.25, "total_tokens": 120176} | |
| {"current_steps": 795, "total_steps": 3600, "loss": 0.2275, "lr": 4.781887435978947e-05, "epoch": 4.416666666666667, "percentage": 22.08, "elapsed_time": "0:01:17", "remaining_time": "0:04:34", "throughput": 1555.12, "total_tokens": 120960} | |
| {"current_steps": 800, "total_steps": 3600, "loss": 0.2274, "lr": 4.776909387883292e-05, "epoch": 4.444444444444445, "percentage": 22.22, "elapsed_time": "0:01:18", "remaining_time": "0:04:33", "throughput": 1556.57, "total_tokens": 121712} | |
| {"current_steps": 805, "total_steps": 3600, "loss": 0.227, "lr": 4.771877822433911e-05, "epoch": 4.472222222222222, "percentage": 22.36, "elapsed_time": "0:01:18", "remaining_time": "0:04:32", "throughput": 1558.0, "total_tokens": 122464} | |
| {"current_steps": 810, "total_steps": 3600, "loss": 0.2314, "lr": 4.766792857894652e-05, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:01:19", "remaining_time": "0:04:32", "throughput": 1559.55, "total_tokens": 123232} | |
| {"current_steps": 815, "total_steps": 3600, "loss": 0.2583, "lr": 4.761654613784477e-05, "epoch": 4.527777777777778, "percentage": 22.64, "elapsed_time": "0:01:19", "remaining_time": "0:04:31", "throughput": 1561.12, "total_tokens": 124000} | |
| {"current_steps": 820, "total_steps": 3600, "loss": 0.2518, "lr": 4.756463210874652e-05, "epoch": 4.555555555555555, "percentage": 22.78, "elapsed_time": "0:01:19", "remaining_time": "0:04:30", "throughput": 1562.75, "total_tokens": 124768} | |
| {"current_steps": 825, "total_steps": 3600, "loss": 0.2418, "lr": 4.751218771185906e-05, "epoch": 4.583333333333333, "percentage": 22.92, "elapsed_time": "0:01:20", "remaining_time": "0:04:29", "throughput": 1563.99, "total_tokens": 125520} | |
| {"current_steps": 830, "total_steps": 3600, "loss": 0.2181, "lr": 4.745921417985566e-05, "epoch": 4.611111111111111, "percentage": 23.06, "elapsed_time": "0:01:20", "remaining_time": "0:04:29", "throughput": 1565.03, "total_tokens": 126256} | |
| {"current_steps": 835, "total_steps": 3600, "loss": 0.232, "lr": 4.740571275784659e-05, "epoch": 4.638888888888889, "percentage": 23.19, "elapsed_time": "0:01:21", "remaining_time": "0:04:28", "throughput": 1566.39, "total_tokens": 127024} | |
| {"current_steps": 840, "total_steps": 3600, "loss": 0.2327, "lr": 4.735168470334984e-05, "epoch": 4.666666666666667, "percentage": 23.33, "elapsed_time": "0:01:21", "remaining_time": "0:04:27", "throughput": 1567.8, "total_tokens": 127792} | |
| {"current_steps": 845, "total_steps": 3600, "loss": 0.2405, "lr": 4.729713128626158e-05, "epoch": 4.694444444444445, "percentage": 23.47, "elapsed_time": "0:01:21", "remaining_time": "0:04:27", "throughput": 1568.93, "total_tokens": 128544} | |
| {"current_steps": 850, "total_steps": 3600, "loss": 0.234, "lr": 4.72420537888263e-05, "epoch": 4.722222222222222, "percentage": 23.61, "elapsed_time": "0:01:22", "remaining_time": "0:04:26", "throughput": 1570.38, "total_tokens": 129312} | |
| {"current_steps": 855, "total_steps": 3600, "loss": 0.2316, "lr": 4.7186453505606676e-05, "epoch": 4.75, "percentage": 23.75, "elapsed_time": "0:01:22", "remaining_time": "0:04:25", "throughput": 1572.0, "total_tokens": 130112} | |
| {"current_steps": 860, "total_steps": 3600, "loss": 0.236, "lr": 4.713033174345314e-05, "epoch": 4.777777777777778, "percentage": 23.89, "elapsed_time": "0:01:23", "remaining_time": "0:04:25", "throughput": 1573.18, "total_tokens": 130864} | |
| {"current_steps": 865, "total_steps": 3600, "loss": 0.2155, "lr": 4.707368982147318e-05, "epoch": 4.805555555555555, "percentage": 24.03, "elapsed_time": "0:01:23", "remaining_time": "0:04:24", "throughput": 1574.53, "total_tokens": 131632} | |
| {"current_steps": 870, "total_steps": 3600, "loss": 0.2335, "lr": 4.701652907100029e-05, "epoch": 4.833333333333333, "percentage": 24.17, "elapsed_time": "0:01:24", "remaining_time": "0:04:23", "throughput": 1575.97, "total_tokens": 132400} | |
| {"current_steps": 875, "total_steps": 3600, "loss": 0.2363, "lr": 4.695885083556275e-05, "epoch": 4.861111111111111, "percentage": 24.31, "elapsed_time": "0:01:24", "remaining_time": "0:04:22", "throughput": 1577.18, "total_tokens": 133152} | |
| {"current_steps": 880, "total_steps": 3600, "loss": 0.2614, "lr": 4.6900656470851964e-05, "epoch": 4.888888888888889, "percentage": 24.44, "elapsed_time": "0:01:24", "remaining_time": "0:04:22", "throughput": 1578.6, "total_tokens": 133920} | |
| {"current_steps": 885, "total_steps": 3600, "loss": 0.2221, "lr": 4.684194734469067e-05, "epoch": 4.916666666666667, "percentage": 24.58, "elapsed_time": "0:01:25", "remaining_time": "0:04:21", "throughput": 1580.04, "total_tokens": 134688} | |
| {"current_steps": 890, "total_steps": 3600, "loss": 0.2317, "lr": 4.678272483700074e-05, "epoch": 4.944444444444445, "percentage": 24.72, "elapsed_time": "0:01:25", "remaining_time": "0:04:20", "throughput": 1581.46, "total_tokens": 135456} | |
| {"current_steps": 895, "total_steps": 3600, "loss": 0.2498, "lr": 4.672299033977076e-05, "epoch": 4.972222222222222, "percentage": 24.86, "elapsed_time": "0:01:26", "remaining_time": "0:04:20", "throughput": 1583.21, "total_tokens": 136240} | |
| {"current_steps": 900, "total_steps": 3600, "loss": 0.2341, "lr": 4.6662745257023325e-05, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:26", "remaining_time": "0:04:19", "throughput": 1583.67, "total_tokens": 137008} | |
| {"current_steps": 900, "total_steps": 3600, "eval_loss": 0.23569175601005554, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:27", "remaining_time": "0:04:22", "throughput": 1568.39, "total_tokens": 137008} | |
| {"current_steps": 905, "total_steps": 3600, "loss": 0.2336, "lr": 4.660199100478202e-05, "epoch": 5.027777777777778, "percentage": 25.14, "elapsed_time": "0:01:29", "remaining_time": "0:04:25", "throughput": 1546.18, "total_tokens": 137776} | |
| {"current_steps": 910, "total_steps": 3600, "loss": 0.2295, "lr": 4.6540729011038146e-05, "epoch": 5.055555555555555, "percentage": 25.28, "elapsed_time": "0:01:29", "remaining_time": "0:04:24", "throughput": 1546.9, "total_tokens": 138496} | |
| {"current_steps": 915, "total_steps": 3600, "loss": 0.2253, "lr": 4.6478960715717176e-05, "epoch": 5.083333333333333, "percentage": 25.42, "elapsed_time": "0:01:29", "remaining_time": "0:04:23", "throughput": 1548.48, "total_tokens": 139280} | |
| {"current_steps": 920, "total_steps": 3600, "loss": 0.2338, "lr": 4.641668757064486e-05, "epoch": 5.111111111111111, "percentage": 25.56, "elapsed_time": "0:01:30", "remaining_time": "0:04:23", "throughput": 1550.09, "total_tokens": 140080} | |
| {"current_steps": 925, "total_steps": 3600, "loss": 0.2493, "lr": 4.6353911039513145e-05, "epoch": 5.138888888888889, "percentage": 25.69, "elapsed_time": "0:01:30", "remaining_time": "0:04:22", "throughput": 1551.58, "total_tokens": 140848} | |
| {"current_steps": 930, "total_steps": 3600, "loss": 0.2273, "lr": 4.6290632597845755e-05, "epoch": 5.166666666666667, "percentage": 25.83, "elapsed_time": "0:01:31", "remaining_time": "0:04:21", "throughput": 1553.07, "total_tokens": 141632} | |
| {"current_steps": 935, "total_steps": 3600, "loss": 0.2319, "lr": 4.622685373296353e-05, "epoch": 5.194444444444445, "percentage": 25.97, "elapsed_time": "0:01:31", "remaining_time": "0:04:21", "throughput": 1554.06, "total_tokens": 142368} | |
| {"current_steps": 940, "total_steps": 3600, "loss": 0.2338, "lr": 4.61625759439494e-05, "epoch": 5.222222222222222, "percentage": 26.11, "elapsed_time": "0:01:32", "remaining_time": "0:04:20", "throughput": 1555.23, "total_tokens": 143120} | |
| {"current_steps": 945, "total_steps": 3600, "loss": 0.2296, "lr": 4.609780074161327e-05, "epoch": 5.25, "percentage": 26.25, "elapsed_time": "0:01:32", "remaining_time": "0:04:19", "throughput": 1556.81, "total_tokens": 143904} | |
| {"current_steps": 950, "total_steps": 3600, "loss": 0.2296, "lr": 4.603252964845638e-05, "epoch": 5.277777777777778, "percentage": 26.39, "elapsed_time": "0:01:32", "remaining_time": "0:04:19", "throughput": 1557.75, "total_tokens": 144640} | |
| {"current_steps": 955, "total_steps": 3600, "loss": 0.2294, "lr": 4.5966764198635606e-05, "epoch": 5.305555555555555, "percentage": 26.53, "elapsed_time": "0:01:33", "remaining_time": "0:04:18", "throughput": 1558.58, "total_tokens": 145376} | |
| {"current_steps": 960, "total_steps": 3600, "loss": 0.2235, "lr": 4.590050593792736e-05, "epoch": 5.333333333333333, "percentage": 26.67, "elapsed_time": "0:01:33", "remaining_time": "0:04:17", "throughput": 1559.88, "total_tokens": 146144} | |
| {"current_steps": 965, "total_steps": 3600, "loss": 0.2386, "lr": 4.583375642369129e-05, "epoch": 5.361111111111111, "percentage": 26.81, "elapsed_time": "0:01:34", "remaining_time": "0:04:16", "throughput": 1561.22, "total_tokens": 146912} | |
| {"current_steps": 970, "total_steps": 3600, "loss": 0.2377, "lr": 4.5766517224833637e-05, "epoch": 5.388888888888889, "percentage": 26.94, "elapsed_time": "0:01:34", "remaining_time": "0:04:16", "throughput": 1562.27, "total_tokens": 147664} | |
| {"current_steps": 975, "total_steps": 3600, "loss": 0.2251, "lr": 4.569878992177039e-05, "epoch": 5.416666666666667, "percentage": 27.08, "elapsed_time": "0:01:34", "remaining_time": "0:04:15", "throughput": 1563.7, "total_tokens": 148448} | |
| {"current_steps": 980, "total_steps": 3600, "loss": 0.2462, "lr": 4.5630576106390114e-05, "epoch": 5.444444444444445, "percentage": 27.22, "elapsed_time": "0:01:35", "remaining_time": "0:04:14", "throughput": 1564.83, "total_tokens": 149200} | |
| {"current_steps": 985, "total_steps": 3600, "loss": 0.2139, "lr": 4.556187738201656e-05, "epoch": 5.472222222222222, "percentage": 27.36, "elapsed_time": "0:01:35", "remaining_time": "0:04:14", "throughput": 1565.73, "total_tokens": 149936} | |
| {"current_steps": 990, "total_steps": 3600, "loss": 0.2209, "lr": 4.549269536337095e-05, "epoch": 5.5, "percentage": 27.5, "elapsed_time": "0:01:36", "remaining_time": "0:04:13", "throughput": 1566.58, "total_tokens": 150672} | |
| {"current_steps": 995, "total_steps": 3600, "loss": 0.2216, "lr": 4.5423031676534065e-05, "epoch": 5.527777777777778, "percentage": 27.64, "elapsed_time": "0:01:36", "remaining_time": "0:04:12", "throughput": 1567.73, "total_tokens": 151440} | |
| {"current_steps": 1000, "total_steps": 3600, "loss": 0.24, "lr": 4.535288795890798e-05, "epoch": 5.555555555555555, "percentage": 27.78, "elapsed_time": "0:01:37", "remaining_time": "0:04:12", "throughput": 1569.16, "total_tokens": 152224} | |
| {"current_steps": 1005, "total_steps": 3600, "loss": 0.2357, "lr": 4.528226585917761e-05, "epoch": 5.583333333333333, "percentage": 27.92, "elapsed_time": "0:01:37", "remaining_time": "0:04:11", "throughput": 1570.26, "total_tokens": 152976} | |
| {"current_steps": 1010, "total_steps": 3600, "loss": 0.2383, "lr": 4.521116703727193e-05, "epoch": 5.611111111111111, "percentage": 28.06, "elapsed_time": "0:01:37", "remaining_time": "0:04:10", "throughput": 1571.56, "total_tokens": 153744} | |
| {"current_steps": 1015, "total_steps": 3600, "loss": 0.2352, "lr": 4.5139593164324986e-05, "epoch": 5.638888888888889, "percentage": 28.19, "elapsed_time": "0:01:38", "remaining_time": "0:04:10", "throughput": 1572.76, "total_tokens": 154512} | |
| {"current_steps": 1020, "total_steps": 3600, "loss": 0.2343, "lr": 4.506754592263662e-05, "epoch": 5.666666666666667, "percentage": 28.33, "elapsed_time": "0:01:38", "remaining_time": "0:04:09", "throughput": 1573.95, "total_tokens": 155280} | |
| {"current_steps": 1025, "total_steps": 3600, "loss": 0.2422, "lr": 4.49950270056329e-05, "epoch": 5.694444444444445, "percentage": 28.47, "elapsed_time": "0:01:39", "remaining_time": "0:04:08", "throughput": 1575.05, "total_tokens": 156032} | |
| {"current_steps": 1030, "total_steps": 3600, "loss": 0.2336, "lr": 4.4922038117826334e-05, "epoch": 5.722222222222222, "percentage": 28.61, "elapsed_time": "0:01:39", "remaining_time": "0:04:08", "throughput": 1576.72, "total_tokens": 156848} | |
| {"current_steps": 1035, "total_steps": 3600, "loss": 0.2347, "lr": 4.48485809747758e-05, "epoch": 5.75, "percentage": 28.75, "elapsed_time": "0:01:39", "remaining_time": "0:04:07", "throughput": 1577.61, "total_tokens": 157600} | |
| {"current_steps": 1040, "total_steps": 3600, "loss": 0.2333, "lr": 4.477465730304624e-05, "epoch": 5.777777777777778, "percentage": 28.89, "elapsed_time": "0:01:40", "remaining_time": "0:04:06", "throughput": 1578.59, "total_tokens": 158352} | |
| {"current_steps": 1045, "total_steps": 3600, "loss": 0.2372, "lr": 4.4700268840168045e-05, "epoch": 5.805555555555555, "percentage": 29.03, "elapsed_time": "0:01:40", "remaining_time": "0:04:06", "throughput": 1579.64, "total_tokens": 159104} | |
| {"current_steps": 1050, "total_steps": 3600, "loss": 0.2318, "lr": 4.462541733459628e-05, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:01:41", "remaining_time": "0:04:05", "throughput": 1580.62, "total_tokens": 159856} | |
| {"current_steps": 1055, "total_steps": 3600, "loss": 0.2337, "lr": 4.455010454566947e-05, "epoch": 5.861111111111111, "percentage": 29.31, "elapsed_time": "0:01:41", "remaining_time": "0:04:04", "throughput": 1581.78, "total_tokens": 160624} | |
| {"current_steps": 1060, "total_steps": 3600, "loss": 0.2385, "lr": 4.447433224356839e-05, "epoch": 5.888888888888889, "percentage": 29.44, "elapsed_time": "0:01:41", "remaining_time": "0:04:04", "throughput": 1582.58, "total_tokens": 161360} | |
| {"current_steps": 1065, "total_steps": 3600, "loss": 0.2285, "lr": 4.439810220927436e-05, "epoch": 5.916666666666667, "percentage": 29.58, "elapsed_time": "0:01:42", "remaining_time": "0:04:03", "throughput": 1583.05, "total_tokens": 162080} | |
| {"current_steps": 1070, "total_steps": 3600, "loss": 0.2425, "lr": 4.432141623452743e-05, "epoch": 5.944444444444445, "percentage": 29.72, "elapsed_time": "0:01:42", "remaining_time": "0:04:03", "throughput": 1584.05, "total_tokens": 162832} | |
| {"current_steps": 1075, "total_steps": 3600, "loss": 0.2258, "lr": 4.4244276121784195e-05, "epoch": 5.972222222222222, "percentage": 29.86, "elapsed_time": "0:01:43", "remaining_time": "0:04:02", "throughput": 1584.98, "total_tokens": 163568} | |
| {"current_steps": 1080, "total_steps": 3600, "loss": 0.236, "lr": 4.416668368417556e-05, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:43", "remaining_time": "0:04:01", "throughput": 1585.31, "total_tokens": 164336} | |
| {"current_steps": 1080, "total_steps": 3600, "eval_loss": 0.2312491238117218, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:44", "remaining_time": "0:04:03", "throughput": 1572.48, "total_tokens": 164336} | |
| {"current_steps": 1085, "total_steps": 3600, "loss": 0.2276, "lr": 4.408864074546401e-05, "epoch": 6.027777777777778, "percentage": 30.14, "elapsed_time": "0:01:46", "remaining_time": "0:04:06", "throughput": 1551.48, "total_tokens": 165072} | |
| {"current_steps": 1090, "total_steps": 3600, "loss": 0.2238, "lr": 4.401014914000078e-05, "epoch": 6.055555555555555, "percentage": 30.28, "elapsed_time": "0:01:46", "remaining_time": "0:04:05", "throughput": 1552.37, "total_tokens": 165824} | |
| {"current_steps": 1095, "total_steps": 3600, "loss": 0.2221, "lr": 4.393121071268274e-05, "epoch": 6.083333333333333, "percentage": 30.42, "elapsed_time": "0:01:47", "remaining_time": "0:04:05", "throughput": 1553.62, "total_tokens": 166608} | |
| {"current_steps": 1100, "total_steps": 3600, "loss": 0.2433, "lr": 4.3851827318909036e-05, "epoch": 6.111111111111111, "percentage": 30.56, "elapsed_time": "0:01:47", "remaining_time": "0:04:04", "throughput": 1554.65, "total_tokens": 167360} | |
| {"current_steps": 1105, "total_steps": 3600, "loss": 0.2346, "lr": 4.377200082453749e-05, "epoch": 6.138888888888889, "percentage": 30.69, "elapsed_time": "0:01:48", "remaining_time": "0:04:04", "throughput": 1555.76, "total_tokens": 168128} | |
| {"current_steps": 1110, "total_steps": 3600, "loss": 0.2325, "lr": 4.36917331058407e-05, "epoch": 6.166666666666667, "percentage": 30.83, "elapsed_time": "0:01:48", "remaining_time": "0:04:03", "throughput": 1556.8, "total_tokens": 168896} | |
| {"current_steps": 1115, "total_steps": 3600, "loss": 0.2294, "lr": 4.361102604946201e-05, "epoch": 6.194444444444445, "percentage": 30.97, "elapsed_time": "0:01:48", "remaining_time": "0:04:02", "throughput": 1557.64, "total_tokens": 169648} | |
| {"current_steps": 1120, "total_steps": 3600, "loss": 0.2294, "lr": 4.3529881552371096e-05, "epoch": 6.222222222222222, "percentage": 31.11, "elapsed_time": "0:01:49", "remaining_time": "0:04:02", "throughput": 1558.79, "total_tokens": 170416} | |
| {"current_steps": 1125, "total_steps": 3600, "loss": 0.2337, "lr": 4.344830152181941e-05, "epoch": 6.25, "percentage": 31.25, "elapsed_time": "0:01:49", "remaining_time": "0:04:01", "throughput": 1559.94, "total_tokens": 171184} | |
| {"current_steps": 1130, "total_steps": 3600, "loss": 0.2317, "lr": 4.336628787529538e-05, "epoch": 6.277777777777778, "percentage": 31.39, "elapsed_time": "0:01:50", "remaining_time": "0:04:00", "throughput": 1561.29, "total_tokens": 171984} | |
| {"current_steps": 1135, "total_steps": 3600, "loss": 0.2233, "lr": 4.3283842540479264e-05, "epoch": 6.305555555555555, "percentage": 31.53, "elapsed_time": "0:01:50", "remaining_time": "0:04:00", "throughput": 1562.54, "total_tokens": 172768} | |
| {"current_steps": 1140, "total_steps": 3600, "loss": 0.2317, "lr": 4.320096745519793e-05, "epoch": 6.333333333333333, "percentage": 31.67, "elapsed_time": "0:01:50", "remaining_time": "0:03:59", "throughput": 1563.5, "total_tokens": 173520} | |
| {"current_steps": 1145, "total_steps": 3600, "loss": 0.2278, "lr": 4.3117664567379237e-05, "epoch": 6.361111111111111, "percentage": 31.81, "elapsed_time": "0:01:51", "remaining_time": "0:03:58", "throughput": 1564.71, "total_tokens": 174304} | |
| {"current_steps": 1150, "total_steps": 3600, "loss": 0.2358, "lr": 4.303393583500628e-05, "epoch": 6.388888888888889, "percentage": 31.94, "elapsed_time": "0:01:51", "remaining_time": "0:03:58", "throughput": 1565.39, "total_tokens": 175040} | |
| {"current_steps": 1155, "total_steps": 3600, "loss": 0.2325, "lr": 4.2949783226071406e-05, "epoch": 6.416666666666667, "percentage": 32.08, "elapsed_time": "0:01:52", "remaining_time": "0:03:57", "throughput": 1566.18, "total_tokens": 175776} | |
| {"current_steps": 1160, "total_steps": 3600, "loss": 0.2291, "lr": 4.286520871852987e-05, "epoch": 6.444444444444445, "percentage": 32.22, "elapsed_time": "0:01:52", "remaining_time": "0:03:56", "throughput": 1566.94, "total_tokens": 176512} | |
| {"current_steps": 1165, "total_steps": 3600, "loss": 0.2456, "lr": 4.278021430025343e-05, "epoch": 6.472222222222222, "percentage": 32.36, "elapsed_time": "0:01:53", "remaining_time": "0:03:56", "throughput": 1568.03, "total_tokens": 177280} | |
| {"current_steps": 1170, "total_steps": 3600, "loss": 0.2344, "lr": 4.2694801968983566e-05, "epoch": 6.5, "percentage": 32.5, "elapsed_time": "0:01:53", "remaining_time": "0:03:55", "throughput": 1568.89, "total_tokens": 178032} | |
| {"current_steps": 1175, "total_steps": 3600, "loss": 0.2234, "lr": 4.260897373228456e-05, "epoch": 6.527777777777778, "percentage": 32.64, "elapsed_time": "0:01:53", "remaining_time": "0:03:55", "throughput": 1569.83, "total_tokens": 178784} | |
| {"current_steps": 1180, "total_steps": 3600, "loss": 0.2243, "lr": 4.2522731607496275e-05, "epoch": 6.555555555555555, "percentage": 32.78, "elapsed_time": "0:01:54", "remaining_time": "0:03:54", "throughput": 1570.87, "total_tokens": 179568} | |
| {"current_steps": 1185, "total_steps": 3600, "loss": 0.236, "lr": 4.2436077621686786e-05, "epoch": 6.583333333333333, "percentage": 32.92, "elapsed_time": "0:01:54", "remaining_time": "0:03:53", "throughput": 1571.9, "total_tokens": 180336} | |
| {"current_steps": 1190, "total_steps": 3600, "loss": 0.2335, "lr": 4.234901381160469e-05, "epoch": 6.611111111111111, "percentage": 33.06, "elapsed_time": "0:01:55", "remaining_time": "0:03:53", "throughput": 1572.48, "total_tokens": 181056} | |
| {"current_steps": 1195, "total_steps": 3600, "loss": 0.2302, "lr": 4.226154222363124e-05, "epoch": 6.638888888888889, "percentage": 33.19, "elapsed_time": "0:01:55", "remaining_time": "0:03:52", "throughput": 1573.65, "total_tokens": 181840} | |
| {"current_steps": 1200, "total_steps": 3600, "loss": 0.2272, "lr": 4.21736649137323e-05, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:55", "remaining_time": "0:03:51", "throughput": 1574.52, "total_tokens": 182592} | |
| {"current_steps": 1205, "total_steps": 3600, "loss": 0.2556, "lr": 4.208538394740993e-05, "epoch": 6.694444444444445, "percentage": 33.47, "elapsed_time": "0:01:56", "remaining_time": "0:03:51", "throughput": 1575.4, "total_tokens": 183344} | |
| {"current_steps": 1210, "total_steps": 3600, "loss": 0.2201, "lr": 4.199670139965393e-05, "epoch": 6.722222222222222, "percentage": 33.61, "elapsed_time": "0:01:56", "remaining_time": "0:03:50", "throughput": 1576.37, "total_tokens": 184112} | |
| {"current_steps": 1215, "total_steps": 3600, "loss": 0.2304, "lr": 4.1907619354892965e-05, "epoch": 6.75, "percentage": 33.75, "elapsed_time": "0:01:57", "remaining_time": "0:03:50", "throughput": 1577.19, "total_tokens": 184864} | |
| {"current_steps": 1220, "total_steps": 3600, "loss": 0.2436, "lr": 4.1818139906945694e-05, "epoch": 6.777777777777778, "percentage": 33.89, "elapsed_time": "0:01:57", "remaining_time": "0:03:49", "throughput": 1578.07, "total_tokens": 185632} | |
| {"current_steps": 1225, "total_steps": 3600, "loss": 0.2305, "lr": 4.172826515897146e-05, "epoch": 6.805555555555555, "percentage": 34.03, "elapsed_time": "0:01:58", "remaining_time": "0:03:48", "throughput": 1578.79, "total_tokens": 186368} | |
| {"current_steps": 1230, "total_steps": 3600, "loss": 0.2342, "lr": 4.163799722342089e-05, "epoch": 6.833333333333333, "percentage": 34.17, "elapsed_time": "0:01:58", "remaining_time": "0:03:48", "throughput": 1579.65, "total_tokens": 187120} | |
| {"current_steps": 1235, "total_steps": 3600, "loss": 0.2402, "lr": 4.1547338221986266e-05, "epoch": 6.861111111111111, "percentage": 34.31, "elapsed_time": "0:01:58", "remaining_time": "0:03:47", "throughput": 1580.65, "total_tokens": 187888} | |
| {"current_steps": 1240, "total_steps": 3600, "loss": 0.2297, "lr": 4.1456290285551596e-05, "epoch": 6.888888888888889, "percentage": 34.44, "elapsed_time": "0:01:59", "remaining_time": "0:03:47", "throughput": 1581.76, "total_tokens": 188672} | |
| {"current_steps": 1245, "total_steps": 3600, "loss": 0.2256, "lr": 4.13648555541426e-05, "epoch": 6.916666666666667, "percentage": 34.58, "elapsed_time": "0:01:59", "remaining_time": "0:03:46", "throughput": 1582.62, "total_tokens": 189424} | |
| {"current_steps": 1250, "total_steps": 3600, "loss": 0.232, "lr": 4.127303617687636e-05, "epoch": 6.944444444444445, "percentage": 34.72, "elapsed_time": "0:02:00", "remaining_time": "0:03:45", "throughput": 1583.47, "total_tokens": 190176} | |
| {"current_steps": 1255, "total_steps": 3600, "loss": 0.2197, "lr": 4.118083431191081e-05, "epoch": 6.972222222222222, "percentage": 34.86, "elapsed_time": "0:02:00", "remaining_time": "0:03:45", "throughput": 1584.39, "total_tokens": 190928} | |
| {"current_steps": 1260, "total_steps": 3600, "loss": 0.2103, "lr": 4.108825212639405e-05, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:02:00", "remaining_time": "0:03:44", "throughput": 1584.84, "total_tokens": 191712} | |
| {"current_steps": 1260, "total_steps": 3600, "eval_loss": 0.23792143166065216, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:02:01", "remaining_time": "0:03:46", "throughput": 1573.88, "total_tokens": 191712} | |
| {"current_steps": 1265, "total_steps": 3600, "loss": 0.2294, "lr": 4.099529179641337e-05, "epoch": 7.027777777777778, "percentage": 35.14, "elapsed_time": "0:02:04", "remaining_time": "0:03:48", "throughput": 1551.49, "total_tokens": 192480} | |
| {"current_steps": 1270, "total_steps": 3600, "loss": 0.2162, "lr": 4.09019555069441e-05, "epoch": 7.055555555555555, "percentage": 35.28, "elapsed_time": "0:02:04", "remaining_time": "0:03:48", "throughput": 1552.23, "total_tokens": 193248} | |
| {"current_steps": 1275, "total_steps": 3600, "loss": 0.2273, "lr": 4.080824545179828e-05, "epoch": 7.083333333333333, "percentage": 35.42, "elapsed_time": "0:02:04", "remaining_time": "0:03:47", "throughput": 1552.95, "total_tokens": 194000} | |
| {"current_steps": 1280, "total_steps": 3600, "loss": 0.2495, "lr": 4.071416383357307e-05, "epoch": 7.111111111111111, "percentage": 35.56, "elapsed_time": "0:02:05", "remaining_time": "0:03:47", "throughput": 1553.69, "total_tokens": 194752} | |
| {"current_steps": 1285, "total_steps": 3600, "loss": 0.2551, "lr": 4.0619712863599e-05, "epoch": 7.138888888888889, "percentage": 35.69, "elapsed_time": "0:02:05", "remaining_time": "0:03:46", "throughput": 1554.52, "total_tokens": 195504} | |
| {"current_steps": 1290, "total_steps": 3600, "loss": 0.2465, "lr": 4.0524894761888e-05, "epoch": 7.166666666666667, "percentage": 35.83, "elapsed_time": "0:02:06", "remaining_time": "0:03:45", "throughput": 1555.02, "total_tokens": 196224} | |
| {"current_steps": 1295, "total_steps": 3600, "loss": 0.2236, "lr": 4.042971175708118e-05, "epoch": 7.194444444444445, "percentage": 35.97, "elapsed_time": "0:02:06", "remaining_time": "0:03:45", "throughput": 1555.8, "total_tokens": 196976} | |
| {"current_steps": 1300, "total_steps": 3600, "loss": 0.234, "lr": 4.0334166086396484e-05, "epoch": 7.222222222222222, "percentage": 36.11, "elapsed_time": "0:02:07", "remaining_time": "0:03:44", "throughput": 1557.05, "total_tokens": 197776} | |
| {"current_steps": 1305, "total_steps": 3600, "loss": 0.234, "lr": 4.0238259995576084e-05, "epoch": 7.25, "percentage": 36.25, "elapsed_time": "0:02:07", "remaining_time": "0:03:44", "throughput": 1557.74, "total_tokens": 198512} | |
| {"current_steps": 1310, "total_steps": 3600, "loss": 0.2295, "lr": 4.0141995738833625e-05, "epoch": 7.277777777777778, "percentage": 36.39, "elapsed_time": "0:02:07", "remaining_time": "0:03:43", "throughput": 1558.62, "total_tokens": 199280} | |
| {"current_steps": 1315, "total_steps": 3600, "loss": 0.238, "lr": 4.0045375578801214e-05, "epoch": 7.305555555555555, "percentage": 36.53, "elapsed_time": "0:02:08", "remaining_time": "0:03:42", "throughput": 1559.49, "total_tokens": 200032} | |
| {"current_steps": 1320, "total_steps": 3600, "loss": 0.2259, "lr": 3.994840178647623e-05, "epoch": 7.333333333333333, "percentage": 36.67, "elapsed_time": "0:02:08", "remaining_time": "0:03:42", "throughput": 1560.23, "total_tokens": 200768} | |
| {"current_steps": 1325, "total_steps": 3600, "loss": 0.2207, "lr": 3.985107664116798e-05, "epoch": 7.361111111111111, "percentage": 36.81, "elapsed_time": "0:02:09", "remaining_time": "0:03:41", "throughput": 1561.34, "total_tokens": 201552} | |
| {"current_steps": 1330, "total_steps": 3600, "loss": 0.224, "lr": 3.9753402430444116e-05, "epoch": 7.388888888888889, "percentage": 36.94, "elapsed_time": "0:02:09", "remaining_time": "0:03:41", "throughput": 1562.32, "total_tokens": 202320} | |
| {"current_steps": 1335, "total_steps": 3600, "loss": 0.2026, "lr": 3.9655381450076826e-05, "epoch": 7.416666666666667, "percentage": 37.08, "elapsed_time": "0:02:09", "remaining_time": "0:03:40", "throughput": 1563.3, "total_tokens": 203088} | |
| {"current_steps": 1340, "total_steps": 3600, "loss": 0.2945, "lr": 3.955701600398892e-05, "epoch": 7.444444444444445, "percentage": 37.22, "elapsed_time": "0:02:10", "remaining_time": "0:03:39", "throughput": 1563.96, "total_tokens": 203824} | |
| {"current_steps": 1345, "total_steps": 3600, "loss": 0.2271, "lr": 3.945830840419966e-05, "epoch": 7.472222222222222, "percentage": 37.36, "elapsed_time": "0:02:10", "remaining_time": "0:03:39", "throughput": 1564.8, "total_tokens": 204576} | |
| {"current_steps": 1350, "total_steps": 3600, "loss": 0.2358, "lr": 3.935926097077045e-05, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:02:11", "remaining_time": "0:03:38", "throughput": 1565.48, "total_tokens": 205312} | |
| {"current_steps": 1355, "total_steps": 3600, "loss": 0.2252, "lr": 3.925987603175023e-05, "epoch": 7.527777777777778, "percentage": 37.64, "elapsed_time": "0:02:11", "remaining_time": "0:03:37", "throughput": 1566.43, "total_tokens": 206080} | |
| {"current_steps": 1360, "total_steps": 3600, "loss": 0.2275, "lr": 3.916015592312082e-05, "epoch": 7.555555555555555, "percentage": 37.78, "elapsed_time": "0:02:11", "remaining_time": "0:03:37", "throughput": 1567.37, "total_tokens": 206848} | |
| {"current_steps": 1365, "total_steps": 3600, "loss": 0.2218, "lr": 3.9060102988742e-05, "epoch": 7.583333333333333, "percentage": 37.92, "elapsed_time": "0:02:12", "remaining_time": "0:03:36", "throughput": 1568.28, "total_tokens": 207616} | |
| {"current_steps": 1370, "total_steps": 3600, "loss": 0.2411, "lr": 3.8959719580296415e-05, "epoch": 7.611111111111111, "percentage": 38.06, "elapsed_time": "0:02:12", "remaining_time": "0:03:36", "throughput": 1568.96, "total_tokens": 208352} | |
| {"current_steps": 1375, "total_steps": 3600, "loss": 0.2321, "lr": 3.885900805723429e-05, "epoch": 7.638888888888889, "percentage": 38.19, "elapsed_time": "0:02:13", "remaining_time": "0:03:35", "throughput": 1569.71, "total_tokens": 209104} | |
| {"current_steps": 1380, "total_steps": 3600, "loss": 0.2197, "lr": 3.875797078671798e-05, "epoch": 7.666666666666667, "percentage": 38.33, "elapsed_time": "0:02:13", "remaining_time": "0:03:34", "throughput": 1570.19, "total_tokens": 209824} | |
| {"current_steps": 1385, "total_steps": 3600, "loss": 0.2272, "lr": 3.865661014356635e-05, "epoch": 7.694444444444445, "percentage": 38.47, "elapsed_time": "0:02:14", "remaining_time": "0:03:34", "throughput": 1570.96, "total_tokens": 210576} | |
| {"current_steps": 1390, "total_steps": 3600, "loss": 0.2434, "lr": 3.855492851019893e-05, "epoch": 7.722222222222222, "percentage": 38.61, "elapsed_time": "0:02:14", "remaining_time": "0:03:33", "throughput": 1571.91, "total_tokens": 211376} | |
| {"current_steps": 1395, "total_steps": 3600, "loss": 0.2328, "lr": 3.8452928276579916e-05, "epoch": 7.75, "percentage": 38.75, "elapsed_time": "0:02:14", "remaining_time": "0:03:33", "throughput": 1572.58, "total_tokens": 212128} | |
| {"current_steps": 1400, "total_steps": 3600, "loss": 0.236, "lr": 3.835061184016203e-05, "epoch": 7.777777777777778, "percentage": 38.89, "elapsed_time": "0:02:15", "remaining_time": "0:03:32", "throughput": 1573.57, "total_tokens": 212928} | |
| {"current_steps": 1405, "total_steps": 3600, "loss": 0.2321, "lr": 3.824798160583012e-05, "epoch": 7.805555555555555, "percentage": 39.03, "elapsed_time": "0:02:15", "remaining_time": "0:03:32", "throughput": 1574.46, "total_tokens": 213696} | |
| {"current_steps": 1410, "total_steps": 3600, "loss": 0.2256, "lr": 3.814503998584471e-05, "epoch": 7.833333333333333, "percentage": 39.17, "elapsed_time": "0:02:16", "remaining_time": "0:03:31", "throughput": 1575.35, "total_tokens": 214464} | |
| {"current_steps": 1415, "total_steps": 3600, "loss": 0.2198, "lr": 3.804178939978517e-05, "epoch": 7.861111111111111, "percentage": 39.31, "elapsed_time": "0:02:16", "remaining_time": "0:03:30", "throughput": 1576.37, "total_tokens": 215248} | |
| {"current_steps": 1420, "total_steps": 3600, "loss": 0.2279, "lr": 3.7938232274493e-05, "epoch": 7.888888888888889, "percentage": 39.44, "elapsed_time": "0:02:16", "remaining_time": "0:03:30", "throughput": 1577.26, "total_tokens": 216016} | |
| {"current_steps": 1425, "total_steps": 3600, "loss": 0.2001, "lr": 3.783437104401469e-05, "epoch": 7.916666666666667, "percentage": 39.58, "elapsed_time": "0:02:17", "remaining_time": "0:03:29", "throughput": 1578.15, "total_tokens": 216784} | |
| {"current_steps": 1430, "total_steps": 3600, "loss": 0.2461, "lr": 3.773020814954453e-05, "epoch": 7.944444444444445, "percentage": 39.72, "elapsed_time": "0:02:17", "remaining_time": "0:03:29", "throughput": 1579.03, "total_tokens": 217552} | |
| {"current_steps": 1435, "total_steps": 3600, "loss": 0.2246, "lr": 3.762574603936725e-05, "epoch": 7.972222222222222, "percentage": 39.86, "elapsed_time": "0:02:18", "remaining_time": "0:03:28", "throughput": 1580.01, "total_tokens": 218320} | |
| {"current_steps": 1440, "total_steps": 3600, "loss": 0.24, "lr": 3.752098716880045e-05, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:18", "remaining_time": "0:03:27", "throughput": 1580.16, "total_tokens": 219072} | |
| {"current_steps": 1440, "total_steps": 3600, "eval_loss": 0.24177499115467072, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:02:19", "remaining_time": "0:03:29", "throughput": 1570.6, "total_tokens": 219072} | |
| {"current_steps": 1445, "total_steps": 3600, "loss": 0.2449, "lr": 3.74159340001369e-05, "epoch": 8.027777777777779, "percentage": 40.14, "elapsed_time": "0:02:21", "remaining_time": "0:03:30", "throughput": 1553.82, "total_tokens": 219824} | |
| {"current_steps": 1450, "total_steps": 3600, "loss": 0.2346, "lr": 3.731058900258668e-05, "epoch": 8.055555555555555, "percentage": 40.28, "elapsed_time": "0:02:21", "remaining_time": "0:03:30", "throughput": 1554.7, "total_tokens": 220608} | |
| {"current_steps": 1455, "total_steps": 3600, "loss": 0.2317, "lr": 3.7204954652219104e-05, "epoch": 8.083333333333334, "percentage": 40.42, "elapsed_time": "0:02:22", "remaining_time": "0:03:29", "throughput": 1555.25, "total_tokens": 221344} | |
| {"current_steps": 1460, "total_steps": 3600, "loss": 0.2173, "lr": 3.7099033431904575e-05, "epoch": 8.11111111111111, "percentage": 40.56, "elapsed_time": "0:02:22", "remaining_time": "0:03:29", "throughput": 1555.93, "total_tokens": 222096} | |
| {"current_steps": 1465, "total_steps": 3600, "loss": 0.2158, "lr": 3.699282783125616e-05, "epoch": 8.13888888888889, "percentage": 40.69, "elapsed_time": "0:02:23", "remaining_time": "0:03:28", "throughput": 1556.66, "total_tokens": 222864} | |
| {"current_steps": 1470, "total_steps": 3600, "loss": 0.2098, "lr": 3.688634034657115e-05, "epoch": 8.166666666666666, "percentage": 40.83, "elapsed_time": "0:02:23", "remaining_time": "0:03:28", "throughput": 1557.58, "total_tokens": 223648} | |
| {"current_steps": 1475, "total_steps": 3600, "loss": 0.2215, "lr": 3.6779573480772325e-05, "epoch": 8.194444444444445, "percentage": 40.97, "elapsed_time": "0:02:24", "remaining_time": "0:03:27", "throughput": 1558.56, "total_tokens": 224448} | |
| {"current_steps": 1480, "total_steps": 3600, "loss": 0.255, "lr": 3.6672529743349146e-05, "epoch": 8.222222222222221, "percentage": 41.11, "elapsed_time": "0:02:24", "remaining_time": "0:03:26", "throughput": 1559.08, "total_tokens": 225184} | |
| {"current_steps": 1485, "total_steps": 3600, "loss": 0.2673, "lr": 3.656521165029879e-05, "epoch": 8.25, "percentage": 41.25, "elapsed_time": "0:02:24", "remaining_time": "0:03:26", "throughput": 1559.78, "total_tokens": 225936} | |
| {"current_steps": 1490, "total_steps": 3600, "loss": 0.2224, "lr": 3.6457621724066964e-05, "epoch": 8.277777777777779, "percentage": 41.39, "elapsed_time": "0:02:25", "remaining_time": "0:03:25", "throughput": 1560.31, "total_tokens": 226672} | |
| {"current_steps": 1495, "total_steps": 3600, "loss": 0.2324, "lr": 3.634976249348867e-05, "epoch": 8.305555555555555, "percentage": 41.53, "elapsed_time": "0:02:25", "remaining_time": "0:03:25", "throughput": 1561.06, "total_tokens": 227424} | |
| {"current_steps": 1500, "total_steps": 3600, "loss": 0.2379, "lr": 3.6241636493728736e-05, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:02:26", "remaining_time": "0:03:24", "throughput": 1562.02, "total_tokens": 228208} | |
| {"current_steps": 1505, "total_steps": 3600, "loss": 0.2496, "lr": 3.613324626622224e-05, "epoch": 8.36111111111111, "percentage": 41.81, "elapsed_time": "0:02:26", "remaining_time": "0:03:23", "throughput": 1562.94, "total_tokens": 228992} | |
| {"current_steps": 1510, "total_steps": 3600, "loss": 0.2361, "lr": 3.602459435861475e-05, "epoch": 8.38888888888889, "percentage": 41.94, "elapsed_time": "0:02:26", "remaining_time": "0:03:23", "throughput": 1563.67, "total_tokens": 229744} | |
| {"current_steps": 1515, "total_steps": 3600, "loss": 0.2283, "lr": 3.591568332470249e-05, "epoch": 8.416666666666666, "percentage": 42.08, "elapsed_time": "0:02:27", "remaining_time": "0:03:22", "throughput": 1564.38, "total_tokens": 230496} | |
| {"current_steps": 1520, "total_steps": 3600, "loss": 0.2484, "lr": 3.5806515724372274e-05, "epoch": 8.444444444444445, "percentage": 42.22, "elapsed_time": "0:02:27", "remaining_time": "0:03:22", "throughput": 1565.12, "total_tokens": 231248} | |
| {"current_steps": 1525, "total_steps": 3600, "loss": 0.2262, "lr": 3.569709412354136e-05, "epoch": 8.472222222222221, "percentage": 42.36, "elapsed_time": "0:02:28", "remaining_time": "0:03:21", "throughput": 1565.86, "total_tokens": 232000} | |
| {"current_steps": 1530, "total_steps": 3600, "loss": 0.2362, "lr": 3.5587421094097115e-05, "epoch": 8.5, "percentage": 42.5, "elapsed_time": "0:02:28", "remaining_time": "0:03:21", "throughput": 1566.69, "total_tokens": 232768} | |
| {"current_steps": 1535, "total_steps": 3600, "loss": 0.2216, "lr": 3.5477499213836616e-05, "epoch": 8.527777777777779, "percentage": 42.64, "elapsed_time": "0:02:28", "remaining_time": "0:03:20", "throughput": 1567.75, "total_tokens": 233568} | |
| {"current_steps": 1540, "total_steps": 3600, "loss": 0.2283, "lr": 3.536733106640598e-05, "epoch": 8.555555555555555, "percentage": 42.78, "elapsed_time": "0:02:29", "remaining_time": "0:03:19", "throughput": 1568.49, "total_tokens": 234320} | |
| {"current_steps": 1545, "total_steps": 3600, "loss": 0.2336, "lr": 3.525691924123971e-05, "epoch": 8.583333333333334, "percentage": 42.92, "elapsed_time": "0:02:29", "remaining_time": "0:03:19", "throughput": 1569.0, "total_tokens": 235040} | |
| {"current_steps": 1550, "total_steps": 3600, "loss": 0.2289, "lr": 3.5146266333499795e-05, "epoch": 8.61111111111111, "percentage": 43.06, "elapsed_time": "0:02:30", "remaining_time": "0:03:18", "throughput": 1569.91, "total_tokens": 235824} | |
| {"current_steps": 1555, "total_steps": 3600, "loss": 0.248, "lr": 3.503537494401473e-05, "epoch": 8.63888888888889, "percentage": 43.19, "elapsed_time": "0:02:30", "remaining_time": "0:03:18", "throughput": 1570.73, "total_tokens": 236592} | |
| {"current_steps": 1560, "total_steps": 3600, "loss": 0.2297, "lr": 3.4924247679218375e-05, "epoch": 8.666666666666666, "percentage": 43.33, "elapsed_time": "0:02:31", "remaining_time": "0:03:17", "throughput": 1571.43, "total_tokens": 237344} | |
| {"current_steps": 1565, "total_steps": 3600, "loss": 0.2316, "lr": 3.481288715108868e-05, "epoch": 8.694444444444445, "percentage": 43.47, "elapsed_time": "0:02:31", "remaining_time": "0:03:16", "throughput": 1571.99, "total_tokens": 238080} | |
| {"current_steps": 1570, "total_steps": 3600, "loss": 0.2211, "lr": 3.4701295977086324e-05, "epoch": 8.722222222222221, "percentage": 43.61, "elapsed_time": "0:02:31", "remaining_time": "0:03:16", "throughput": 1572.55, "total_tokens": 238816} | |
| {"current_steps": 1575, "total_steps": 3600, "loss": 0.2264, "lr": 3.4589476780093166e-05, "epoch": 8.75, "percentage": 43.75, "elapsed_time": "0:02:32", "remaining_time": "0:03:15", "throughput": 1573.25, "total_tokens": 239568} | |
| {"current_steps": 1580, "total_steps": 3600, "loss": 0.2365, "lr": 3.44774321883506e-05, "epoch": 8.777777777777779, "percentage": 43.89, "elapsed_time": "0:02:32", "remaining_time": "0:03:15", "throughput": 1574.02, "total_tokens": 240352} | |
| {"current_steps": 1585, "total_steps": 3600, "loss": 0.2298, "lr": 3.436516483539781e-05, "epoch": 8.805555555555555, "percentage": 44.03, "elapsed_time": "0:02:33", "remaining_time": "0:03:14", "throughput": 1574.75, "total_tokens": 241120} | |
| {"current_steps": 1590, "total_steps": 3600, "loss": 0.2276, "lr": 3.42526773600098e-05, "epoch": 8.833333333333334, "percentage": 44.17, "elapsed_time": "0:02:33", "remaining_time": "0:03:14", "throughput": 1575.3, "total_tokens": 241856} | |
| {"current_steps": 1595, "total_steps": 3600, "loss": 0.2219, "lr": 3.4139972406135464e-05, "epoch": 8.86111111111111, "percentage": 44.31, "elapsed_time": "0:02:33", "remaining_time": "0:03:13", "throughput": 1575.93, "total_tokens": 242608} | |
| {"current_steps": 1600, "total_steps": 3600, "loss": 0.2305, "lr": 3.402705262283537e-05, "epoch": 8.88888888888889, "percentage": 44.44, "elapsed_time": "0:02:34", "remaining_time": "0:03:12", "throughput": 1576.53, "total_tokens": 243360} | |
| {"current_steps": 1605, "total_steps": 3600, "loss": 0.2253, "lr": 3.39139206642195e-05, "epoch": 8.916666666666666, "percentage": 44.58, "elapsed_time": "0:02:34", "remaining_time": "0:03:12", "throughput": 1577.2, "total_tokens": 244128} | |
| {"current_steps": 1610, "total_steps": 3600, "loss": 0.2334, "lr": 3.3800579189384944e-05, "epoch": 8.944444444444445, "percentage": 44.72, "elapsed_time": "0:02:35", "remaining_time": "0:03:11", "throughput": 1577.92, "total_tokens": 244896} | |
| {"current_steps": 1615, "total_steps": 3600, "loss": 0.232, "lr": 3.3687030862353286e-05, "epoch": 8.972222222222221, "percentage": 44.86, "elapsed_time": "0:02:35", "remaining_time": "0:03:11", "throughput": 1578.78, "total_tokens": 245664} | |
| {"current_steps": 1620, "total_steps": 3600, "loss": 0.231, "lr": 3.357327835200807e-05, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:36", "remaining_time": "0:03:10", "throughput": 1578.91, "total_tokens": 246416} | |
| {"current_steps": 1620, "total_steps": 3600, "eval_loss": 0.2354724407196045, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:36", "remaining_time": "0:03:11", "throughput": 1570.34, "total_tokens": 246416} | |
| {"current_steps": 1625, "total_steps": 3600, "loss": 0.2321, "lr": 3.3459324332032035e-05, "epoch": 9.027777777777779, "percentage": 45.14, "elapsed_time": "0:02:38", "remaining_time": "0:03:12", "throughput": 1557.09, "total_tokens": 247184} | |
| {"current_steps": 1630, "total_steps": 3600, "loss": 0.212, "lr": 3.3345171480844275e-05, "epoch": 9.055555555555555, "percentage": 45.28, "elapsed_time": "0:02:39", "remaining_time": "0:03:12", "throughput": 1557.61, "total_tokens": 247936} | |
| {"current_steps": 1635, "total_steps": 3600, "loss": 0.2297, "lr": 3.32308224815373e-05, "epoch": 9.083333333333334, "percentage": 45.42, "elapsed_time": "0:02:39", "remaining_time": "0:03:11", "throughput": 1558.27, "total_tokens": 248688} | |
| {"current_steps": 1640, "total_steps": 3600, "loss": 0.2371, "lr": 3.311628002181398e-05, "epoch": 9.11111111111111, "percentage": 45.56, "elapsed_time": "0:02:40", "remaining_time": "0:03:11", "throughput": 1559.04, "total_tokens": 249472} | |
| {"current_steps": 1645, "total_steps": 3600, "loss": 0.2321, "lr": 3.3001546793924285e-05, "epoch": 9.13888888888889, "percentage": 45.69, "elapsed_time": "0:02:40", "remaining_time": "0:03:10", "throughput": 1559.72, "total_tokens": 250224} | |
| {"current_steps": 1650, "total_steps": 3600, "loss": 0.2218, "lr": 3.288662549460216e-05, "epoch": 9.166666666666666, "percentage": 45.83, "elapsed_time": "0:02:40", "remaining_time": "0:03:10", "throughput": 1560.27, "total_tokens": 250960} | |
| {"current_steps": 1655, "total_steps": 3600, "loss": 0.2258, "lr": 3.277151882500199e-05, "epoch": 9.194444444444445, "percentage": 45.97, "elapsed_time": "0:02:41", "remaining_time": "0:03:09", "throughput": 1560.96, "total_tokens": 251728} | |
| {"current_steps": 1660, "total_steps": 3600, "loss": 0.2476, "lr": 3.26562294906352e-05, "epoch": 9.222222222222221, "percentage": 46.11, "elapsed_time": "0:02:41", "remaining_time": "0:03:08", "throughput": 1561.7, "total_tokens": 252512} | |
| {"current_steps": 1665, "total_steps": 3600, "loss": 0.2142, "lr": 3.254076020130664e-05, "epoch": 9.25, "percentage": 46.25, "elapsed_time": "0:02:42", "remaining_time": "0:03:08", "throughput": 1562.48, "total_tokens": 253280} | |
| {"current_steps": 1670, "total_steps": 3600, "loss": 0.2091, "lr": 3.242511367105087e-05, "epoch": 9.277777777777779, "percentage": 46.39, "elapsed_time": "0:02:42", "remaining_time": "0:03:07", "throughput": 1563.16, "total_tokens": 254032} | |
| {"current_steps": 1675, "total_steps": 3600, "loss": 0.1549, "lr": 3.230929261806842e-05, "epoch": 9.305555555555555, "percentage": 46.53, "elapsed_time": "0:02:42", "remaining_time": "0:03:07", "throughput": 1563.9, "total_tokens": 254800} | |
| {"current_steps": 1680, "total_steps": 3600, "loss": 0.2614, "lr": 3.2193299764661845e-05, "epoch": 9.333333333333334, "percentage": 46.67, "elapsed_time": "0:02:43", "remaining_time": "0:03:06", "throughput": 1564.74, "total_tokens": 255584} | |
| {"current_steps": 1685, "total_steps": 3600, "loss": 0.3004, "lr": 3.207713783717176e-05, "epoch": 9.36111111111111, "percentage": 46.81, "elapsed_time": "0:02:43", "remaining_time": "0:03:06", "throughput": 1565.57, "total_tokens": 256368} | |
| {"current_steps": 1690, "total_steps": 3600, "loss": 0.2912, "lr": 3.1960809565912794e-05, "epoch": 9.38888888888889, "percentage": 46.94, "elapsed_time": "0:02:44", "remaining_time": "0:03:05", "throughput": 1566.11, "total_tokens": 257104} | |
| {"current_steps": 1695, "total_steps": 3600, "loss": 0.4081, "lr": 3.1844317685109354e-05, "epoch": 9.416666666666666, "percentage": 47.08, "elapsed_time": "0:02:44", "remaining_time": "0:03:04", "throughput": 1566.66, "total_tokens": 257856} | |
| {"current_steps": 1700, "total_steps": 3600, "loss": 0.3059, "lr": 3.1727664932831394e-05, "epoch": 9.444444444444445, "percentage": 47.22, "elapsed_time": "0:02:45", "remaining_time": "0:03:04", "throughput": 1567.26, "total_tokens": 258608} | |
| {"current_steps": 1705, "total_steps": 3600, "loss": 0.2625, "lr": 3.161085405093006e-05, "epoch": 9.472222222222221, "percentage": 47.36, "elapsed_time": "0:02:45", "remaining_time": "0:03:03", "throughput": 1567.72, "total_tokens": 259344} | |
| {"current_steps": 1710, "total_steps": 3600, "loss": 0.2651, "lr": 3.149388778497323e-05, "epoch": 9.5, "percentage": 47.5, "elapsed_time": "0:02:45", "remaining_time": "0:03:03", "throughput": 1568.44, "total_tokens": 260112} | |
| {"current_steps": 1715, "total_steps": 3600, "loss": 0.2377, "lr": 3.137676888418099e-05, "epoch": 9.527777777777779, "percentage": 47.64, "elapsed_time": "0:02:46", "remaining_time": "0:03:02", "throughput": 1569.07, "total_tokens": 260864} | |
| {"current_steps": 1720, "total_steps": 3600, "loss": 0.2376, "lr": 3.125950010136104e-05, "epoch": 9.555555555555555, "percentage": 47.78, "elapsed_time": "0:02:46", "remaining_time": "0:03:02", "throughput": 1569.81, "total_tokens": 261632} | |
| {"current_steps": 1725, "total_steps": 3600, "loss": 0.2311, "lr": 3.114208419284391e-05, "epoch": 9.583333333333334, "percentage": 47.92, "elapsed_time": "0:02:47", "remaining_time": "0:03:01", "throughput": 1570.35, "total_tokens": 262368} | |
| {"current_steps": 1730, "total_steps": 3600, "loss": 0.2083, "lr": 3.102452391841828e-05, "epoch": 9.61111111111111, "percentage": 48.06, "elapsed_time": "0:02:47", "remaining_time": "0:03:01", "throughput": 1571.09, "total_tokens": 263136} | |
| {"current_steps": 1735, "total_steps": 3600, "loss": 0.2502, "lr": 3.090682204126604e-05, "epoch": 9.63888888888889, "percentage": 48.19, "elapsed_time": "0:02:47", "remaining_time": "0:03:00", "throughput": 1571.61, "total_tokens": 263872} | |
| {"current_steps": 1740, "total_steps": 3600, "loss": 0.2408, "lr": 3.078898132789735e-05, "epoch": 9.666666666666666, "percentage": 48.33, "elapsed_time": "0:02:48", "remaining_time": "0:02:59", "throughput": 1572.16, "total_tokens": 264608} | |
| {"current_steps": 1745, "total_steps": 3600, "loss": 0.2263, "lr": 3.0671004548085675e-05, "epoch": 9.694444444444445, "percentage": 48.47, "elapsed_time": "0:02:48", "remaining_time": "0:02:59", "throughput": 1572.54, "total_tokens": 265328} | |
| {"current_steps": 1750, "total_steps": 3600, "loss": 0.2311, "lr": 3.0552894474802584e-05, "epoch": 9.722222222222221, "percentage": 48.61, "elapsed_time": "0:02:49", "remaining_time": "0:02:58", "throughput": 1573.36, "total_tokens": 266112} | |
| {"current_steps": 1755, "total_steps": 3600, "loss": 0.2469, "lr": 3.043465388415267e-05, "epoch": 9.75, "percentage": 48.75, "elapsed_time": "0:02:49", "remaining_time": "0:02:58", "throughput": 1573.95, "total_tokens": 266864} | |
| {"current_steps": 1760, "total_steps": 3600, "loss": 0.2179, "lr": 3.0316285555308233e-05, "epoch": 9.777777777777779, "percentage": 48.89, "elapsed_time": "0:02:49", "remaining_time": "0:02:57", "throughput": 1574.43, "total_tokens": 267600} | |
| {"current_steps": 1765, "total_steps": 3600, "loss": 0.2245, "lr": 3.0197792270443982e-05, "epoch": 9.805555555555555, "percentage": 49.03, "elapsed_time": "0:02:50", "remaining_time": "0:02:57", "throughput": 1575.16, "total_tokens": 268384} | |
| {"current_steps": 1770, "total_steps": 3600, "loss": 0.2253, "lr": 3.0079176814671656e-05, "epoch": 9.833333333333334, "percentage": 49.17, "elapsed_time": "0:02:50", "remaining_time": "0:02:56", "throughput": 1575.95, "total_tokens": 269168} | |
| {"current_steps": 1775, "total_steps": 3600, "loss": 0.2276, "lr": 2.9960441975974534e-05, "epoch": 9.86111111111111, "percentage": 49.31, "elapsed_time": "0:02:51", "remaining_time": "0:02:56", "throughput": 1576.46, "total_tokens": 269904} | |
| {"current_steps": 1780, "total_steps": 3600, "loss": 0.2348, "lr": 2.9841590545141906e-05, "epoch": 9.88888888888889, "percentage": 49.44, "elapsed_time": "0:02:51", "remaining_time": "0:02:55", "throughput": 1577.06, "total_tokens": 270656} | |
| {"current_steps": 1785, "total_steps": 3600, "loss": 0.2349, "lr": 2.9722625315703512e-05, "epoch": 9.916666666666666, "percentage": 49.58, "elapsed_time": "0:02:52", "remaining_time": "0:02:54", "throughput": 1577.67, "total_tokens": 271408} | |
| {"current_steps": 1790, "total_steps": 3600, "loss": 0.2326, "lr": 2.9603549083863847e-05, "epoch": 9.944444444444445, "percentage": 49.72, "elapsed_time": "0:02:52", "remaining_time": "0:02:54", "throughput": 1578.47, "total_tokens": 272192} | |
| {"current_steps": 1795, "total_steps": 3600, "loss": 0.2394, "lr": 2.9484364648436437e-05, "epoch": 9.972222222222221, "percentage": 49.86, "elapsed_time": "0:02:52", "remaining_time": "0:02:53", "throughput": 1579.21, "total_tokens": 272960} | |
| {"current_steps": 1800, "total_steps": 3600, "loss": 0.2325, "lr": 2.9365074810778094e-05, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:53", "remaining_time": "0:02:53", "throughput": 1579.3, "total_tokens": 273712} | |
| {"current_steps": 1800, "total_steps": 3600, "eval_loss": 0.23395749926567078, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:54", "remaining_time": "0:02:54", "throughput": 1571.64, "total_tokens": 273712} | |
| {"current_steps": 1805, "total_steps": 3600, "loss": 0.2183, "lr": 2.9245682374723016e-05, "epoch": 10.027777777777779, "percentage": 50.14, "elapsed_time": "0:02:56", "remaining_time": "0:02:55", "throughput": 1559.05, "total_tokens": 274480} | |
| {"current_steps": 1810, "total_steps": 3600, "loss": 0.2281, "lr": 2.9126190146516942e-05, "epoch": 10.055555555555555, "percentage": 50.28, "elapsed_time": "0:02:56", "remaining_time": "0:02:54", "throughput": 1559.76, "total_tokens": 275264} | |
| {"current_steps": 1815, "total_steps": 3600, "loss": 0.2193, "lr": 2.9006600934751145e-05, "epoch": 10.083333333333334, "percentage": 50.42, "elapsed_time": "0:02:56", "remaining_time": "0:02:53", "throughput": 1560.35, "total_tokens": 276016} | |
| {"current_steps": 1820, "total_steps": 3600, "loss": 0.2351, "lr": 2.888691755029642e-05, "epoch": 10.11111111111111, "percentage": 50.56, "elapsed_time": "0:02:57", "remaining_time": "0:02:53", "throughput": 1560.74, "total_tokens": 276752} | |
| {"current_steps": 1825, "total_steps": 3600, "loss": 0.2296, "lr": 2.876714280623708e-05, "epoch": 10.13888888888889, "percentage": 50.69, "elapsed_time": "0:02:57", "remaining_time": "0:02:52", "throughput": 1561.4, "total_tokens": 277520} | |
| {"current_steps": 1830, "total_steps": 3600, "loss": 0.2205, "lr": 2.8647279517804754e-05, "epoch": 10.166666666666666, "percentage": 50.83, "elapsed_time": "0:02:58", "remaining_time": "0:02:52", "throughput": 1561.98, "total_tokens": 278272} | |
| {"current_steps": 1835, "total_steps": 3600, "loss": 0.2259, "lr": 2.8527330502312248e-05, "epoch": 10.194444444444445, "percentage": 50.97, "elapsed_time": "0:02:58", "remaining_time": "0:02:51", "throughput": 1562.67, "total_tokens": 279040} | |
| {"current_steps": 1840, "total_steps": 3600, "loss": 0.2343, "lr": 2.8407298579087365e-05, "epoch": 10.222222222222221, "percentage": 51.11, "elapsed_time": "0:02:58", "remaining_time": "0:02:51", "throughput": 1563.17, "total_tokens": 279792} | |
| {"current_steps": 1845, "total_steps": 3600, "loss": 0.2281, "lr": 2.8287186569406566e-05, "epoch": 10.25, "percentage": 51.25, "elapsed_time": "0:02:59", "remaining_time": "0:02:50", "throughput": 1563.87, "total_tokens": 280560} | |
| {"current_steps": 1850, "total_steps": 3600, "loss": 0.2175, "lr": 2.816699729642871e-05, "epoch": 10.277777777777779, "percentage": 51.39, "elapsed_time": "0:02:59", "remaining_time": "0:02:50", "throughput": 1564.56, "total_tokens": 281328} | |
| {"current_steps": 1855, "total_steps": 3600, "loss": 0.2199, "lr": 2.8046733585128687e-05, "epoch": 10.305555555555555, "percentage": 51.53, "elapsed_time": "0:03:00", "remaining_time": "0:02:49", "throughput": 1565.32, "total_tokens": 282112} | |
| {"current_steps": 1860, "total_steps": 3600, "loss": 0.223, "lr": 2.792639826223101e-05, "epoch": 10.333333333333334, "percentage": 51.67, "elapsed_time": "0:03:00", "remaining_time": "0:02:48", "throughput": 1566.01, "total_tokens": 282880} | |
| {"current_steps": 1865, "total_steps": 3600, "loss": 0.2083, "lr": 2.7805994156143376e-05, "epoch": 10.36111111111111, "percentage": 51.81, "elapsed_time": "0:03:01", "remaining_time": "0:02:48", "throughput": 1566.7, "total_tokens": 283648} | |
| {"current_steps": 1870, "total_steps": 3600, "loss": 0.2172, "lr": 2.7685524096890185e-05, "epoch": 10.38888888888889, "percentage": 51.94, "elapsed_time": "0:03:01", "remaining_time": "0:02:47", "throughput": 1567.59, "total_tokens": 284464} | |
| {"current_steps": 1875, "total_steps": 3600, "loss": 0.2526, "lr": 2.756499091604603e-05, "epoch": 10.416666666666666, "percentage": 52.08, "elapsed_time": "0:03:01", "remaining_time": "0:02:47", "throughput": 1568.22, "total_tokens": 285232} | |
| {"current_steps": 1880, "total_steps": 3600, "loss": 0.2165, "lr": 2.744439744666915e-05, "epoch": 10.444444444444445, "percentage": 52.22, "elapsed_time": "0:03:02", "remaining_time": "0:02:46", "throughput": 1568.8, "total_tokens": 285984} | |
| {"current_steps": 1885, "total_steps": 3600, "loss": 0.2424, "lr": 2.732374652323481e-05, "epoch": 10.472222222222221, "percentage": 52.36, "elapsed_time": "0:03:02", "remaining_time": "0:02:46", "throughput": 1569.48, "total_tokens": 286752} | |
| {"current_steps": 1890, "total_steps": 3600, "loss": 0.2334, "lr": 2.72030409815687e-05, "epoch": 10.5, "percentage": 52.5, "elapsed_time": "0:03:03", "remaining_time": "0:02:45", "throughput": 1570.13, "total_tokens": 287520} | |
| {"current_steps": 1895, "total_steps": 3600, "loss": 0.2073, "lr": 2.7082283658780288e-05, "epoch": 10.527777777777779, "percentage": 52.64, "elapsed_time": "0:03:03", "remaining_time": "0:02:45", "throughput": 1570.49, "total_tokens": 288240} | |
| {"current_steps": 1900, "total_steps": 3600, "loss": 0.2345, "lr": 2.6961477393196126e-05, "epoch": 10.555555555555555, "percentage": 52.78, "elapsed_time": "0:03:03", "remaining_time": "0:02:44", "throughput": 1571.1, "total_tokens": 289008} | |
| {"current_steps": 1905, "total_steps": 3600, "loss": 0.2182, "lr": 2.684062502429312e-05, "epoch": 10.583333333333334, "percentage": 52.92, "elapsed_time": "0:03:04", "remaining_time": "0:02:44", "throughput": 1571.74, "total_tokens": 289776} | |
| {"current_steps": 1910, "total_steps": 3600, "loss": 0.2547, "lr": 2.6719729392631826e-05, "epoch": 10.61111111111111, "percentage": 53.06, "elapsed_time": "0:03:04", "remaining_time": "0:02:43", "throughput": 1572.38, "total_tokens": 290560} | |
| {"current_steps": 1915, "total_steps": 3600, "loss": 0.217, "lr": 2.659879333978964e-05, "epoch": 10.63888888888889, "percentage": 53.19, "elapsed_time": "0:03:05", "remaining_time": "0:02:42", "throughput": 1572.79, "total_tokens": 291296} | |
| {"current_steps": 1920, "total_steps": 3600, "loss": 0.2522, "lr": 2.6477819708294064e-05, "epoch": 10.666666666666666, "percentage": 53.33, "elapsed_time": "0:03:05", "remaining_time": "0:02:42", "throughput": 1573.17, "total_tokens": 292032} | |
| {"current_steps": 1925, "total_steps": 3600, "loss": 0.2324, "lr": 2.635681134155585e-05, "epoch": 10.694444444444445, "percentage": 53.47, "elapsed_time": "0:03:06", "remaining_time": "0:02:41", "throughput": 1573.72, "total_tokens": 292784} | |
| {"current_steps": 1930, "total_steps": 3600, "loss": 0.2305, "lr": 2.623577108380215e-05, "epoch": 10.722222222222221, "percentage": 53.61, "elapsed_time": "0:03:06", "remaining_time": "0:02:41", "throughput": 1574.19, "total_tokens": 293520} | |
| {"current_steps": 1935, "total_steps": 3600, "loss": 0.2424, "lr": 2.6114701780009753e-05, "epoch": 10.75, "percentage": 53.75, "elapsed_time": "0:03:06", "remaining_time": "0:02:40", "throughput": 1574.76, "total_tokens": 294272} | |
| {"current_steps": 1940, "total_steps": 3600, "loss": 0.2427, "lr": 2.5993606275838117e-05, "epoch": 10.777777777777779, "percentage": 53.89, "elapsed_time": "0:03:07", "remaining_time": "0:02:40", "throughput": 1575.23, "total_tokens": 295008} | |
| {"current_steps": 1945, "total_steps": 3600, "loss": 0.2342, "lr": 2.587248741756253e-05, "epoch": 10.805555555555555, "percentage": 54.03, "elapsed_time": "0:03:07", "remaining_time": "0:02:39", "throughput": 1575.86, "total_tokens": 295776} | |
| {"current_steps": 1950, "total_steps": 3600, "loss": 0.2315, "lr": 2.5751348052007206e-05, "epoch": 10.833333333333334, "percentage": 54.17, "elapsed_time": "0:03:08", "remaining_time": "0:02:39", "throughput": 1576.3, "total_tokens": 296512} | |
| {"current_steps": 1955, "total_steps": 3600, "loss": 0.2293, "lr": 2.5630191026478368e-05, "epoch": 10.86111111111111, "percentage": 54.31, "elapsed_time": "0:03:08", "remaining_time": "0:02:38", "throughput": 1576.65, "total_tokens": 297248} | |
| {"current_steps": 1960, "total_steps": 3600, "loss": 0.2214, "lr": 2.5509019188697343e-05, "epoch": 10.88888888888889, "percentage": 54.44, "elapsed_time": "0:03:08", "remaining_time": "0:02:38", "throughput": 1577.23, "total_tokens": 298032} | |
| {"current_steps": 1965, "total_steps": 3600, "loss": 0.2314, "lr": 2.5387835386733584e-05, "epoch": 10.916666666666666, "percentage": 54.58, "elapsed_time": "0:03:09", "remaining_time": "0:02:37", "throughput": 1577.69, "total_tokens": 298800} | |
| {"current_steps": 1970, "total_steps": 3600, "loss": 0.2428, "lr": 2.5266642468937766e-05, "epoch": 10.944444444444445, "percentage": 54.72, "elapsed_time": "0:03:09", "remaining_time": "0:02:37", "throughput": 1578.21, "total_tokens": 299552} | |
| {"current_steps": 1975, "total_steps": 3600, "loss": 0.2221, "lr": 2.5145443283874848e-05, "epoch": 10.972222222222221, "percentage": 54.86, "elapsed_time": "0:03:10", "remaining_time": "0:02:36", "throughput": 1578.82, "total_tokens": 300320} | |
| {"current_steps": 1980, "total_steps": 3600, "loss": 0.2153, "lr": 2.5024240680257055e-05, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:03:10", "remaining_time": "0:02:36", "throughput": 1578.5, "total_tokens": 301088} | |
| {"current_steps": 1980, "total_steps": 3600, "eval_loss": 0.23569095134735107, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:03:11", "remaining_time": "0:02:36", "throughput": 1571.53, "total_tokens": 301088} | |
| {"current_steps": 1985, "total_steps": 3600, "loss": 0.2065, "lr": 2.4903037506876997e-05, "epoch": 11.027777777777779, "percentage": 55.14, "elapsed_time": "0:03:13", "remaining_time": "0:02:37", "throughput": 1559.93, "total_tokens": 301856} | |
| {"current_steps": 1990, "total_steps": 3600, "loss": 0.239, "lr": 2.4781836612540657e-05, "epoch": 11.055555555555555, "percentage": 55.28, "elapsed_time": "0:03:13", "remaining_time": "0:02:36", "throughput": 1560.28, "total_tokens": 302592} | |
| {"current_steps": 1995, "total_steps": 3600, "loss": 0.2296, "lr": 2.4660640846000453e-05, "epoch": 11.083333333333334, "percentage": 55.42, "elapsed_time": "0:03:14", "remaining_time": "0:02:36", "throughput": 1560.87, "total_tokens": 303360} | |
| {"current_steps": 2000, "total_steps": 3600, "loss": 0.2151, "lr": 2.4539453055888297e-05, "epoch": 11.11111111111111, "percentage": 55.56, "elapsed_time": "0:03:14", "remaining_time": "0:02:35", "throughput": 1561.28, "total_tokens": 304096} | |
| {"current_steps": 2005, "total_steps": 3600, "loss": 0.2218, "lr": 2.4418276090648596e-05, "epoch": 11.13888888888889, "percentage": 55.69, "elapsed_time": "0:03:15", "remaining_time": "0:02:35", "throughput": 1561.87, "total_tokens": 304880} | |
| {"current_steps": 2010, "total_steps": 3600, "loss": 0.2321, "lr": 2.4297112798471326e-05, "epoch": 11.166666666666666, "percentage": 55.83, "elapsed_time": "0:03:15", "remaining_time": "0:02:34", "throughput": 1562.53, "total_tokens": 305664} | |
| {"current_steps": 2015, "total_steps": 3600, "loss": 0.2202, "lr": 2.4175966027225107e-05, "epoch": 11.194444444444445, "percentage": 55.97, "elapsed_time": "0:03:16", "remaining_time": "0:02:34", "throughput": 1563.2, "total_tokens": 306448} | |
| {"current_steps": 2020, "total_steps": 3600, "loss": 0.2241, "lr": 2.405483862439023e-05, "epoch": 11.222222222222221, "percentage": 56.11, "elapsed_time": "0:03:16", "remaining_time": "0:02:33", "throughput": 1563.82, "total_tokens": 307216} | |
| {"current_steps": 2025, "total_steps": 3600, "loss": 0.2306, "lr": 2.3933733436991732e-05, "epoch": 11.25, "percentage": 56.25, "elapsed_time": "0:03:16", "remaining_time": "0:02:33", "throughput": 1564.35, "total_tokens": 307968} | |
| {"current_steps": 2030, "total_steps": 3600, "loss": 0.2391, "lr": 2.381265331153252e-05, "epoch": 11.277777777777779, "percentage": 56.39, "elapsed_time": "0:03:17", "remaining_time": "0:02:32", "throughput": 1564.85, "total_tokens": 308720} | |
| {"current_steps": 2035, "total_steps": 3600, "loss": 0.2059, "lr": 2.3691601093926404e-05, "epoch": 11.305555555555555, "percentage": 56.53, "elapsed_time": "0:03:17", "remaining_time": "0:02:32", "throughput": 1565.38, "total_tokens": 309472} | |
| {"current_steps": 2040, "total_steps": 3600, "loss": 0.2382, "lr": 2.3570579629431267e-05, "epoch": 11.333333333333334, "percentage": 56.67, "elapsed_time": "0:03:18", "remaining_time": "0:02:31", "throughput": 1565.91, "total_tokens": 310224} | |
| {"current_steps": 2045, "total_steps": 3600, "loss": 0.222, "lr": 2.344959176258212e-05, "epoch": 11.36111111111111, "percentage": 56.81, "elapsed_time": "0:03:18", "remaining_time": "0:02:30", "throughput": 1566.62, "total_tokens": 311008} | |
| {"current_steps": 2050, "total_steps": 3600, "loss": 0.211, "lr": 2.3328640337124326e-05, "epoch": 11.38888888888889, "percentage": 56.94, "elapsed_time": "0:03:18", "remaining_time": "0:02:30", "throughput": 1566.99, "total_tokens": 311744} | |
| {"current_steps": 2055, "total_steps": 3600, "loss": 0.2242, "lr": 2.3207728195946688e-05, "epoch": 11.416666666666666, "percentage": 57.08, "elapsed_time": "0:03:19", "remaining_time": "0:02:29", "throughput": 1567.6, "total_tokens": 312512} | |
| {"current_steps": 2060, "total_steps": 3600, "loss": 0.2537, "lr": 2.3086858181014653e-05, "epoch": 11.444444444444445, "percentage": 57.22, "elapsed_time": "0:03:19", "remaining_time": "0:02:29", "throughput": 1568.06, "total_tokens": 313248} | |
| {"current_steps": 2065, "total_steps": 3600, "loss": 0.2294, "lr": 2.2966033133303545e-05, "epoch": 11.472222222222221, "percentage": 57.36, "elapsed_time": "0:03:20", "remaining_time": "0:02:28", "throughput": 1568.76, "total_tokens": 314032} | |
| {"current_steps": 2070, "total_steps": 3600, "loss": 0.2345, "lr": 2.2845255892731733e-05, "epoch": 11.5, "percentage": 57.5, "elapsed_time": "0:03:20", "remaining_time": "0:02:28", "throughput": 1569.24, "total_tokens": 314784} | |
| {"current_steps": 2075, "total_steps": 3600, "loss": 0.2169, "lr": 2.2724529298093915e-05, "epoch": 11.527777777777779, "percentage": 57.64, "elapsed_time": "0:03:21", "remaining_time": "0:02:27", "throughput": 1569.69, "total_tokens": 315520} | |
| {"current_steps": 2080, "total_steps": 3600, "loss": 0.2138, "lr": 2.26038561869944e-05, "epoch": 11.555555555555555, "percentage": 57.78, "elapsed_time": "0:03:21", "remaining_time": "0:02:27", "throughput": 1570.29, "total_tokens": 316288} | |
| {"current_steps": 2085, "total_steps": 3600, "loss": 0.2234, "lr": 2.248323939578039e-05, "epoch": 11.583333333333334, "percentage": 57.92, "elapsed_time": "0:03:21", "remaining_time": "0:02:26", "throughput": 1570.81, "total_tokens": 317040} | |
| {"current_steps": 2090, "total_steps": 3600, "loss": 0.2273, "lr": 2.2362681759475307e-05, "epoch": 11.61111111111111, "percentage": 58.06, "elapsed_time": "0:03:22", "remaining_time": "0:02:26", "throughput": 1571.25, "total_tokens": 317776} | |
| {"current_steps": 2095, "total_steps": 3600, "loss": 0.2244, "lr": 2.2242186111712208e-05, "epoch": 11.63888888888889, "percentage": 58.19, "elapsed_time": "0:03:22", "remaining_time": "0:02:25", "throughput": 1571.93, "total_tokens": 318560} | |
| {"current_steps": 2100, "total_steps": 3600, "loss": 0.2486, "lr": 2.212175528466712e-05, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:03:23", "remaining_time": "0:02:25", "throughput": 1572.37, "total_tokens": 319296} | |
| {"current_steps": 2105, "total_steps": 3600, "loss": 0.1969, "lr": 2.2001392108992504e-05, "epoch": 11.694444444444445, "percentage": 58.47, "elapsed_time": "0:03:23", "remaining_time": "0:02:24", "throughput": 1572.91, "total_tokens": 320064} | |
| {"current_steps": 2110, "total_steps": 3600, "loss": 0.2327, "lr": 2.1881099413750733e-05, "epoch": 11.722222222222221, "percentage": 58.61, "elapsed_time": "0:03:23", "remaining_time": "0:02:23", "throughput": 1573.28, "total_tokens": 320800} | |
| {"current_steps": 2115, "total_steps": 3600, "loss": 0.2108, "lr": 2.1760880026347562e-05, "epoch": 11.75, "percentage": 58.75, "elapsed_time": "0:03:24", "remaining_time": "0:02:23", "throughput": 1573.66, "total_tokens": 321536} | |
| {"current_steps": 2120, "total_steps": 3600, "loss": 0.2259, "lr": 2.16407367724657e-05, "epoch": 11.777777777777779, "percentage": 58.89, "elapsed_time": "0:03:24", "remaining_time": "0:02:22", "throughput": 1574.33, "total_tokens": 322320} | |
| {"current_steps": 2125, "total_steps": 3600, "loss": 0.2064, "lr": 2.1520672475998373e-05, "epoch": 11.805555555555555, "percentage": 59.03, "elapsed_time": "0:03:25", "remaining_time": "0:02:22", "throughput": 1574.64, "total_tokens": 323056} | |
| {"current_steps": 2130, "total_steps": 3600, "loss": 0.2108, "lr": 2.140068995898297e-05, "epoch": 11.833333333333334, "percentage": 59.17, "elapsed_time": "0:03:25", "remaining_time": "0:02:21", "throughput": 1575.17, "total_tokens": 323824} | |
| {"current_steps": 2135, "total_steps": 3600, "loss": 0.2186, "lr": 2.1280792041534714e-05, "epoch": 11.86111111111111, "percentage": 59.31, "elapsed_time": "0:03:26", "remaining_time": "0:02:21", "throughput": 1575.82, "total_tokens": 324624} | |
| {"current_steps": 2140, "total_steps": 3600, "loss": 0.2016, "lr": 2.116098154178035e-05, "epoch": 11.88888888888889, "percentage": 59.44, "elapsed_time": "0:03:26", "remaining_time": "0:02:20", "throughput": 1576.4, "total_tokens": 325392} | |
| {"current_steps": 2145, "total_steps": 3600, "loss": 0.248, "lr": 2.1041261275791933e-05, "epoch": 11.916666666666666, "percentage": 59.58, "elapsed_time": "0:03:26", "remaining_time": "0:02:20", "throughput": 1576.85, "total_tokens": 326144} | |
| {"current_steps": 2150, "total_steps": 3600, "loss": 0.201, "lr": 2.092163405752063e-05, "epoch": 11.944444444444445, "percentage": 59.72, "elapsed_time": "0:03:27", "remaining_time": "0:02:19", "throughput": 1577.15, "total_tokens": 326880} | |
| {"current_steps": 2155, "total_steps": 3600, "loss": 0.2192, "lr": 2.0802102698730574e-05, "epoch": 11.972222222222221, "percentage": 59.86, "elapsed_time": "0:03:27", "remaining_time": "0:02:19", "throughput": 1577.68, "total_tokens": 327648} | |
| {"current_steps": 2160, "total_steps": 3600, "loss": 0.3359, "lr": 2.0682670008932785e-05, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:28", "remaining_time": "0:02:18", "throughput": 1577.67, "total_tokens": 328384} | |
| {"current_steps": 2160, "total_steps": 3600, "eval_loss": 0.3373684883117676, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:03:28", "remaining_time": "0:02:19", "throughput": 1571.23, "total_tokens": 328384} | |
| {"current_steps": 2165, "total_steps": 3600, "loss": 0.2025, "lr": 2.0563338795319123e-05, "epoch": 12.027777777777779, "percentage": 60.14, "elapsed_time": "0:03:30", "remaining_time": "0:02:19", "throughput": 1560.45, "total_tokens": 329136} | |
| {"current_steps": 2170, "total_steps": 3600, "loss": 0.2236, "lr": 2.0444111862696314e-05, "epoch": 12.055555555555555, "percentage": 60.28, "elapsed_time": "0:03:31", "remaining_time": "0:02:19", "throughput": 1561.04, "total_tokens": 329904} | |
| {"current_steps": 2175, "total_steps": 3600, "loss": 0.222, "lr": 2.032499201342003e-05, "epoch": 12.083333333333334, "percentage": 60.42, "elapsed_time": "0:03:31", "remaining_time": "0:02:18", "throughput": 1561.39, "total_tokens": 330640} | |
| {"current_steps": 2180, "total_steps": 3600, "loss": 0.2336, "lr": 2.020598204732901e-05, "epoch": 12.11111111111111, "percentage": 60.56, "elapsed_time": "0:03:32", "remaining_time": "0:02:18", "throughput": 1561.68, "total_tokens": 331360} | |
| {"current_steps": 2185, "total_steps": 3600, "loss": 0.2341, "lr": 2.0087084761679245e-05, "epoch": 12.13888888888889, "percentage": 60.69, "elapsed_time": "0:03:32", "remaining_time": "0:02:17", "throughput": 1562.15, "total_tokens": 332112} | |
| {"current_steps": 2190, "total_steps": 3600, "loss": 0.2747, "lr": 1.996830295107827e-05, "epoch": 12.166666666666666, "percentage": 60.83, "elapsed_time": "0:03:33", "remaining_time": "0:02:17", "throughput": 1562.64, "total_tokens": 332864} | |
| {"current_steps": 2195, "total_steps": 3600, "loss": 0.2155, "lr": 1.9849639407419423e-05, "epoch": 12.194444444444445, "percentage": 60.97, "elapsed_time": "0:03:33", "remaining_time": "0:02:16", "throughput": 1563.29, "total_tokens": 333648} | |
| {"current_steps": 2200, "total_steps": 3600, "loss": 0.2432, "lr": 1.973109691981627e-05, "epoch": 12.222222222222221, "percentage": 61.11, "elapsed_time": "0:03:33", "remaining_time": "0:02:16", "throughput": 1563.82, "total_tokens": 334416} | |
| {"current_steps": 2205, "total_steps": 3600, "loss": 0.2355, "lr": 1.9612678274537005e-05, "epoch": 12.25, "percentage": 61.25, "elapsed_time": "0:03:34", "remaining_time": "0:02:15", "throughput": 1564.37, "total_tokens": 335184} | |
| {"current_steps": 2210, "total_steps": 3600, "loss": 0.197, "lr": 1.9494386254939e-05, "epoch": 12.277777777777779, "percentage": 61.39, "elapsed_time": "0:03:34", "remaining_time": "0:02:15", "throughput": 1565.03, "total_tokens": 335968} | |
| {"current_steps": 2215, "total_steps": 3600, "loss": 0.2029, "lr": 1.937622364140338e-05, "epoch": 12.305555555555555, "percentage": 61.53, "elapsed_time": "0:03:35", "remaining_time": "0:02:14", "throughput": 1565.55, "total_tokens": 336736} | |
| {"current_steps": 2220, "total_steps": 3600, "loss": 0.2067, "lr": 1.925819321126964e-05, "epoch": 12.333333333333334, "percentage": 61.67, "elapsed_time": "0:03:35", "remaining_time": "0:02:13", "throughput": 1566.04, "total_tokens": 337488} | |
| {"current_steps": 2225, "total_steps": 3600, "loss": 0.2677, "lr": 1.9140297738770385e-05, "epoch": 12.36111111111111, "percentage": 61.81, "elapsed_time": "0:03:35", "remaining_time": "0:02:13", "throughput": 1566.48, "total_tokens": 338240} | |
| {"current_steps": 2230, "total_steps": 3600, "loss": 0.2074, "lr": 1.9022539994966147e-05, "epoch": 12.38888888888889, "percentage": 61.94, "elapsed_time": "0:03:36", "remaining_time": "0:02:12", "throughput": 1566.76, "total_tokens": 338976} | |
| {"current_steps": 2235, "total_steps": 3600, "loss": 0.2153, "lr": 1.8904922747680204e-05, "epoch": 12.416666666666666, "percentage": 62.08, "elapsed_time": "0:03:36", "remaining_time": "0:02:12", "throughput": 1567.24, "total_tokens": 339760} | |
| {"current_steps": 2240, "total_steps": 3600, "loss": 0.2247, "lr": 1.8787448761433556e-05, "epoch": 12.444444444444445, "percentage": 62.22, "elapsed_time": "0:03:37", "remaining_time": "0:02:11", "throughput": 1567.69, "total_tokens": 340528} | |
| {"current_steps": 2245, "total_steps": 3600, "loss": 0.2214, "lr": 1.8670120797379958e-05, "epoch": 12.472222222222221, "percentage": 62.36, "elapsed_time": "0:03:37", "remaining_time": "0:02:11", "throughput": 1567.88, "total_tokens": 341232} | |
| {"current_steps": 2250, "total_steps": 3600, "loss": 0.2235, "lr": 1.8552941613240983e-05, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:03:38", "remaining_time": "0:02:10", "throughput": 1568.41, "total_tokens": 342000} | |
| {"current_steps": 2255, "total_steps": 3600, "loss": 0.197, "lr": 1.8435913963241226e-05, "epoch": 12.527777777777779, "percentage": 62.64, "elapsed_time": "0:03:38", "remaining_time": "0:02:10", "throughput": 1568.93, "total_tokens": 342768} | |
| {"current_steps": 2260, "total_steps": 3600, "loss": 0.2268, "lr": 1.831904059804358e-05, "epoch": 12.555555555555555, "percentage": 62.78, "elapsed_time": "0:03:38", "remaining_time": "0:02:09", "throughput": 1569.61, "total_tokens": 343568} | |
| {"current_steps": 2265, "total_steps": 3600, "loss": 0.2185, "lr": 1.8202324264684544e-05, "epoch": 12.583333333333334, "percentage": 62.92, "elapsed_time": "0:03:39", "remaining_time": "0:02:09", "throughput": 1569.98, "total_tokens": 344304} | |
| {"current_steps": 2270, "total_steps": 3600, "loss": 0.1767, "lr": 1.8085767706509712e-05, "epoch": 12.61111111111111, "percentage": 63.06, "elapsed_time": "0:03:39", "remaining_time": "0:02:08", "throughput": 1570.54, "total_tokens": 345088} | |
| {"current_steps": 2275, "total_steps": 3600, "loss": 0.2127, "lr": 1.7969373663109234e-05, "epoch": 12.63888888888889, "percentage": 63.19, "elapsed_time": "0:03:40", "remaining_time": "0:02:08", "throughput": 1571.09, "total_tokens": 345856} | |
| {"current_steps": 2280, "total_steps": 3600, "loss": 0.2382, "lr": 1.7853144870253458e-05, "epoch": 12.666666666666666, "percentage": 63.33, "elapsed_time": "0:03:40", "remaining_time": "0:02:07", "throughput": 1571.47, "total_tokens": 346608} | |
| {"current_steps": 2285, "total_steps": 3600, "loss": 0.194, "lr": 1.7737084059828637e-05, "epoch": 12.694444444444445, "percentage": 63.47, "elapsed_time": "0:03:40", "remaining_time": "0:02:07", "throughput": 1571.98, "total_tokens": 347376} | |
| {"current_steps": 2290, "total_steps": 3600, "loss": 0.1908, "lr": 1.7621193959772657e-05, "epoch": 12.722222222222221, "percentage": 63.61, "elapsed_time": "0:03:41", "remaining_time": "0:02:06", "throughput": 1572.41, "total_tokens": 348144} | |
| {"current_steps": 2295, "total_steps": 3600, "loss": 0.1831, "lr": 1.750547729401101e-05, "epoch": 12.75, "percentage": 63.75, "elapsed_time": "0:03:41", "remaining_time": "0:02:06", "throughput": 1572.89, "total_tokens": 348912} | |
| {"current_steps": 2300, "total_steps": 3600, "loss": 0.2239, "lr": 1.7389936782392695e-05, "epoch": 12.777777777777779, "percentage": 63.89, "elapsed_time": "0:03:42", "remaining_time": "0:02:05", "throughput": 1573.3, "total_tokens": 349664} | |
| {"current_steps": 2305, "total_steps": 3600, "loss": 0.1875, "lr": 1.7274575140626318e-05, "epoch": 12.805555555555555, "percentage": 64.03, "elapsed_time": "0:03:42", "remaining_time": "0:02:05", "throughput": 1573.73, "total_tokens": 350432} | |
| {"current_steps": 2310, "total_steps": 3600, "loss": 0.344, "lr": 1.7159395080216273e-05, "epoch": 12.833333333333334, "percentage": 64.17, "elapsed_time": "0:03:43", "remaining_time": "0:02:04", "throughput": 1574.26, "total_tokens": 351200} | |
| {"current_steps": 2315, "total_steps": 3600, "loss": 0.3025, "lr": 1.7044399308398983e-05, "epoch": 12.86111111111111, "percentage": 64.31, "elapsed_time": "0:03:43", "remaining_time": "0:02:04", "throughput": 1574.66, "total_tokens": 351936} | |
| {"current_steps": 2320, "total_steps": 3600, "loss": 0.1906, "lr": 1.692959052807928e-05, "epoch": 12.88888888888889, "percentage": 64.44, "elapsed_time": "0:03:43", "remaining_time": "0:02:03", "throughput": 1575.19, "total_tokens": 352720} | |
| {"current_steps": 2325, "total_steps": 3600, "loss": 0.2825, "lr": 1.681497143776689e-05, "epoch": 12.916666666666666, "percentage": 64.58, "elapsed_time": "0:03:44", "remaining_time": "0:02:03", "throughput": 1575.72, "total_tokens": 353488} | |
| {"current_steps": 2330, "total_steps": 3600, "loss": 0.1878, "lr": 1.670054473151298e-05, "epoch": 12.944444444444445, "percentage": 64.72, "elapsed_time": "0:03:44", "remaining_time": "0:02:02", "throughput": 1576.22, "total_tokens": 354256} | |
| {"current_steps": 2335, "total_steps": 3600, "loss": 0.2078, "lr": 1.658631309884684e-05, "epoch": 12.972222222222221, "percentage": 64.86, "elapsed_time": "0:03:45", "remaining_time": "0:02:01", "throughput": 1576.73, "total_tokens": 355008} | |
| {"current_steps": 2340, "total_steps": 3600, "loss": 0.2397, "lr": 1.6472279224712702e-05, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:45", "remaining_time": "0:02:01", "throughput": 1576.82, "total_tokens": 355760} | |
| {"current_steps": 2340, "total_steps": 3600, "eval_loss": 0.24227285385131836, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:03:46", "remaining_time": "0:02:01", "throughput": 1570.9, "total_tokens": 355760} | |
| {"current_steps": 2345, "total_steps": 3600, "loss": 0.2267, "lr": 1.6358445789406584e-05, "epoch": 13.027777777777779, "percentage": 65.14, "elapsed_time": "0:03:48", "remaining_time": "0:02:02", "throughput": 1561.78, "total_tokens": 356528} | |
| {"current_steps": 2350, "total_steps": 3600, "loss": 0.1854, "lr": 1.6244815468513315e-05, "epoch": 13.055555555555555, "percentage": 65.28, "elapsed_time": "0:03:48", "remaining_time": "0:02:01", "throughput": 1562.23, "total_tokens": 357296} | |
| {"current_steps": 2355, "total_steps": 3600, "loss": 0.182, "lr": 1.6131390932843648e-05, "epoch": 13.083333333333334, "percentage": 65.42, "elapsed_time": "0:03:49", "remaining_time": "0:02:01", "throughput": 1562.68, "total_tokens": 358048} | |
| {"current_steps": 2360, "total_steps": 3600, "loss": 0.2446, "lr": 1.6018174848371494e-05, "epoch": 13.11111111111111, "percentage": 65.56, "elapsed_time": "0:03:49", "remaining_time": "0:02:00", "throughput": 1563.18, "total_tokens": 358816} | |
| {"current_steps": 2365, "total_steps": 3600, "loss": 0.1739, "lr": 1.5905169876171223e-05, "epoch": 13.13888888888889, "percentage": 65.69, "elapsed_time": "0:03:49", "remaining_time": "0:02:00", "throughput": 1563.54, "total_tokens": 359568} | |
| {"current_steps": 2370, "total_steps": 3600, "loss": 0.2171, "lr": 1.579237867235514e-05, "epoch": 13.166666666666666, "percentage": 65.83, "elapsed_time": "0:03:50", "remaining_time": "0:01:59", "throughput": 1563.94, "total_tokens": 360336} | |
| {"current_steps": 2375, "total_steps": 3600, "loss": 0.2339, "lr": 1.567980388801109e-05, "epoch": 13.194444444444445, "percentage": 65.97, "elapsed_time": "0:03:50", "remaining_time": "0:01:59", "throughput": 1564.15, "total_tokens": 361056} | |
| {"current_steps": 2380, "total_steps": 3600, "loss": 0.2172, "lr": 1.556744816914008e-05, "epoch": 13.222222222222221, "percentage": 66.11, "elapsed_time": "0:03:51", "remaining_time": "0:01:58", "throughput": 1564.52, "total_tokens": 361792} | |
| {"current_steps": 2385, "total_steps": 3600, "loss": 0.1971, "lr": 1.5455314156594124e-05, "epoch": 13.25, "percentage": 66.25, "elapsed_time": "0:03:51", "remaining_time": "0:01:58", "throughput": 1565.11, "total_tokens": 362576} | |
| {"current_steps": 2390, "total_steps": 3600, "loss": 0.2087, "lr": 1.534340448601418e-05, "epoch": 13.277777777777779, "percentage": 66.39, "elapsed_time": "0:03:52", "remaining_time": "0:01:57", "throughput": 1565.6, "total_tokens": 363344} | |
| {"current_steps": 2395, "total_steps": 3600, "loss": 0.2058, "lr": 1.523172178776816e-05, "epoch": 13.305555555555555, "percentage": 66.53, "elapsed_time": "0:03:52", "remaining_time": "0:01:56", "throughput": 1566.02, "total_tokens": 364096} | |
| {"current_steps": 2400, "total_steps": 3600, "loss": 0.1853, "lr": 1.512026868688915e-05, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:52", "remaining_time": "0:01:56", "throughput": 1566.43, "total_tokens": 364848} | |
| {"current_steps": 2405, "total_steps": 3600, "loss": 0.2178, "lr": 1.5009047803013699e-05, "epoch": 13.36111111111111, "percentage": 66.81, "elapsed_time": "0:03:53", "remaining_time": "0:01:55", "throughput": 1566.68, "total_tokens": 365568} | |
| {"current_steps": 2410, "total_steps": 3600, "loss": 0.1482, "lr": 1.4898061750320212e-05, "epoch": 13.38888888888889, "percentage": 66.94, "elapsed_time": "0:03:53", "remaining_time": "0:01:55", "throughput": 1567.32, "total_tokens": 366368} | |
| {"current_steps": 2415, "total_steps": 3600, "loss": 0.1657, "lr": 1.4787313137467546e-05, "epoch": 13.416666666666666, "percentage": 67.08, "elapsed_time": "0:03:54", "remaining_time": "0:01:54", "throughput": 1567.96, "total_tokens": 367168} | |
| {"current_steps": 2420, "total_steps": 3600, "loss": 0.2238, "lr": 1.4676804567533687e-05, "epoch": 13.444444444444445, "percentage": 67.22, "elapsed_time": "0:03:54", "remaining_time": "0:01:54", "throughput": 1568.3, "total_tokens": 367904} | |
| {"current_steps": 2425, "total_steps": 3600, "loss": 0.1961, "lr": 1.4566538637954554e-05, "epoch": 13.472222222222221, "percentage": 67.36, "elapsed_time": "0:03:55", "remaining_time": "0:01:53", "throughput": 1568.75, "total_tokens": 368672} | |
| {"current_steps": 2430, "total_steps": 3600, "loss": 0.2555, "lr": 1.4456517940462949e-05, "epoch": 13.5, "percentage": 67.5, "elapsed_time": "0:03:55", "remaining_time": "0:01:53", "throughput": 1569.17, "total_tokens": 369424} | |
| {"current_steps": 2435, "total_steps": 3600, "loss": 0.1898, "lr": 1.4346745061027644e-05, "epoch": 13.527777777777779, "percentage": 67.64, "elapsed_time": "0:03:55", "remaining_time": "0:01:52", "throughput": 1569.69, "total_tokens": 370192} | |
| {"current_steps": 2440, "total_steps": 3600, "loss": 0.2684, "lr": 1.4237222579792618e-05, "epoch": 13.555555555555555, "percentage": 67.78, "elapsed_time": "0:03:56", "remaining_time": "0:01:52", "throughput": 1569.98, "total_tokens": 370928} | |
| {"current_steps": 2445, "total_steps": 3600, "loss": 0.1869, "lr": 1.4127953071016383e-05, "epoch": 13.583333333333334, "percentage": 67.92, "elapsed_time": "0:03:56", "remaining_time": "0:01:51", "throughput": 1570.34, "total_tokens": 371664} | |
| {"current_steps": 2450, "total_steps": 3600, "loss": 0.2325, "lr": 1.4018939103011472e-05, "epoch": 13.61111111111111, "percentage": 68.06, "elapsed_time": "0:03:57", "remaining_time": "0:01:51", "throughput": 1570.83, "total_tokens": 372448} | |
| {"current_steps": 2455, "total_steps": 3600, "loss": 0.2535, "lr": 1.3910183238084112e-05, "epoch": 13.63888888888889, "percentage": 68.19, "elapsed_time": "0:03:57", "remaining_time": "0:01:50", "throughput": 1571.38, "total_tokens": 373232} | |
| {"current_steps": 2460, "total_steps": 3600, "loss": 0.3524, "lr": 1.3801688032473958e-05, "epoch": 13.666666666666666, "percentage": 68.33, "elapsed_time": "0:03:57", "remaining_time": "0:01:50", "throughput": 1571.82, "total_tokens": 374000} | |
| {"current_steps": 2465, "total_steps": 3600, "loss": 0.3079, "lr": 1.369345603629406e-05, "epoch": 13.694444444444445, "percentage": 68.47, "elapsed_time": "0:03:58", "remaining_time": "0:01:49", "throughput": 1572.21, "total_tokens": 374768} | |
| {"current_steps": 2470, "total_steps": 3600, "loss": 0.1759, "lr": 1.3585489793470862e-05, "epoch": 13.722222222222221, "percentage": 68.61, "elapsed_time": "0:03:58", "remaining_time": "0:01:49", "throughput": 1572.71, "total_tokens": 375552} | |
| {"current_steps": 2475, "total_steps": 3600, "loss": 0.1818, "lr": 1.3477791841684451e-05, "epoch": 13.75, "percentage": 68.75, "elapsed_time": "0:03:59", "remaining_time": "0:01:48", "throughput": 1573.16, "total_tokens": 376320} | |
| {"current_steps": 2480, "total_steps": 3600, "loss": 0.2174, "lr": 1.337036471230889e-05, "epoch": 13.777777777777779, "percentage": 68.89, "elapsed_time": "0:03:59", "remaining_time": "0:01:48", "throughput": 1573.65, "total_tokens": 377104} | |
| {"current_steps": 2485, "total_steps": 3600, "loss": 0.1612, "lr": 1.3263210930352737e-05, "epoch": 13.805555555555555, "percentage": 69.03, "elapsed_time": "0:04:00", "remaining_time": "0:01:47", "throughput": 1574.08, "total_tokens": 377872} | |
| {"current_steps": 2490, "total_steps": 3600, "loss": 0.1359, "lr": 1.3156333014399674e-05, "epoch": 13.833333333333334, "percentage": 69.17, "elapsed_time": "0:04:00", "remaining_time": "0:01:47", "throughput": 1574.54, "total_tokens": 378656} | |
| {"current_steps": 2495, "total_steps": 3600, "loss": 0.1586, "lr": 1.3049733476549352e-05, "epoch": 13.86111111111111, "percentage": 69.31, "elapsed_time": "0:04:00", "remaining_time": "0:01:46", "throughput": 1574.86, "total_tokens": 379408} | |
| {"current_steps": 2500, "total_steps": 3600, "loss": 0.14, "lr": 1.2943414822358285e-05, "epoch": 13.88888888888889, "percentage": 69.44, "elapsed_time": "0:04:01", "remaining_time": "0:01:46", "throughput": 1575.11, "total_tokens": 380144} | |
| {"current_steps": 2505, "total_steps": 3600, "loss": 0.1432, "lr": 1.2837379550781003e-05, "epoch": 13.916666666666666, "percentage": 69.58, "elapsed_time": "0:04:01", "remaining_time": "0:01:45", "throughput": 1575.38, "total_tokens": 380880} | |
| {"current_steps": 2510, "total_steps": 3600, "loss": 0.2479, "lr": 1.2731630154111296e-05, "epoch": 13.944444444444445, "percentage": 69.72, "elapsed_time": "0:04:02", "remaining_time": "0:01:45", "throughput": 1575.76, "total_tokens": 381632} | |
| {"current_steps": 2515, "total_steps": 3600, "loss": 0.1723, "lr": 1.262616911792365e-05, "epoch": 13.972222222222221, "percentage": 69.86, "elapsed_time": "0:04:02", "remaining_time": "0:01:44", "throughput": 1576.11, "total_tokens": 382368} | |
| {"current_steps": 2520, "total_steps": 3600, "loss": 0.1748, "lr": 1.2520998921014792e-05, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:04:03", "remaining_time": "0:01:44", "throughput": 1575.97, "total_tokens": 383088} | |
| {"current_steps": 2520, "total_steps": 3600, "eval_loss": 0.2950591444969177, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:04:03", "remaining_time": "0:01:44", "throughput": 1570.42, "total_tokens": 383088} | |
| {"current_steps": 2525, "total_steps": 3600, "loss": 0.1308, "lr": 1.2416122035345507e-05, "epoch": 14.027777777777779, "percentage": 70.14, "elapsed_time": "0:04:05", "remaining_time": "0:01:44", "throughput": 1562.07, "total_tokens": 383840} | |
| {"current_steps": 2530, "total_steps": 3600, "loss": 0.251, "lr": 1.2311540925982403e-05, "epoch": 14.055555555555555, "percentage": 70.28, "elapsed_time": "0:04:06", "remaining_time": "0:01:44", "throughput": 1562.5, "total_tokens": 384624} | |
| {"current_steps": 2535, "total_steps": 3600, "loss": 0.209, "lr": 1.2207258051040099e-05, "epoch": 14.083333333333334, "percentage": 70.42, "elapsed_time": "0:04:06", "remaining_time": "0:01:43", "throughput": 1562.88, "total_tokens": 385392} | |
| {"current_steps": 2540, "total_steps": 3600, "loss": 0.1824, "lr": 1.2103275861623378e-05, "epoch": 14.11111111111111, "percentage": 70.56, "elapsed_time": "0:04:07", "remaining_time": "0:01:43", "throughput": 1563.36, "total_tokens": 386176} | |
| {"current_steps": 2545, "total_steps": 3600, "loss": 0.1749, "lr": 1.1999596801769616e-05, "epoch": 14.13888888888889, "percentage": 70.69, "elapsed_time": "0:04:07", "remaining_time": "0:01:42", "throughput": 1563.74, "total_tokens": 386944} | |
| {"current_steps": 2550, "total_steps": 3600, "loss": 0.1024, "lr": 1.189622330839129e-05, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "0:04:07", "remaining_time": "0:01:42", "throughput": 1564.04, "total_tokens": 387696} | |
| {"current_steps": 2555, "total_steps": 3600, "loss": 0.1795, "lr": 1.179315781121874e-05, "epoch": 14.194444444444445, "percentage": 70.97, "elapsed_time": "0:04:08", "remaining_time": "0:01:41", "throughput": 1564.44, "total_tokens": 388464} | |
| {"current_steps": 2560, "total_steps": 3600, "loss": 0.151, "lr": 1.1690402732743042e-05, "epoch": 14.222222222222221, "percentage": 71.11, "elapsed_time": "0:04:08", "remaining_time": "0:01:41", "throughput": 1564.81, "total_tokens": 389232} | |
| {"current_steps": 2565, "total_steps": 3600, "loss": 0.2119, "lr": 1.158796048815906e-05, "epoch": 14.25, "percentage": 71.25, "elapsed_time": "0:04:09", "remaining_time": "0:01:40", "throughput": 1565.21, "total_tokens": 390000} | |
| {"current_steps": 2570, "total_steps": 3600, "loss": 0.1076, "lr": 1.1485833485308702e-05, "epoch": 14.277777777777779, "percentage": 71.39, "elapsed_time": "0:04:09", "remaining_time": "0:01:40", "throughput": 1565.62, "total_tokens": 390768} | |
| {"current_steps": 2575, "total_steps": 3600, "loss": 0.14, "lr": 1.1384024124624324e-05, "epoch": 14.305555555555555, "percentage": 71.53, "elapsed_time": "0:04:10", "remaining_time": "0:01:39", "throughput": 1566.12, "total_tokens": 391568} | |
| {"current_steps": 2580, "total_steps": 3600, "loss": 0.197, "lr": 1.1282534799072272e-05, "epoch": 14.333333333333334, "percentage": 71.67, "elapsed_time": "0:04:10", "remaining_time": "0:01:39", "throughput": 1566.46, "total_tokens": 392320} | |
| {"current_steps": 2585, "total_steps": 3600, "loss": 0.3318, "lr": 1.1181367894096684e-05, "epoch": 14.36111111111111, "percentage": 71.81, "elapsed_time": "0:04:10", "remaining_time": "0:01:38", "throughput": 1567.02, "total_tokens": 393136} | |
| {"current_steps": 2590, "total_steps": 3600, "loss": 0.2645, "lr": 1.1080525787563393e-05, "epoch": 14.38888888888889, "percentage": 71.94, "elapsed_time": "0:04:11", "remaining_time": "0:01:38", "throughput": 1567.54, "total_tokens": 393936} | |
| {"current_steps": 2595, "total_steps": 3600, "loss": 0.1461, "lr": 1.0980010849704036e-05, "epoch": 14.416666666666666, "percentage": 72.08, "elapsed_time": "0:04:11", "remaining_time": "0:01:37", "throughput": 1567.86, "total_tokens": 394688} | |
| {"current_steps": 2600, "total_steps": 3600, "loss": 0.3501, "lr": 1.0879825443060362e-05, "epoch": 14.444444444444445, "percentage": 72.22, "elapsed_time": "0:04:12", "remaining_time": "0:01:36", "throughput": 1568.25, "total_tokens": 395456} | |
| {"current_steps": 2605, "total_steps": 3600, "loss": 0.155, "lr": 1.0779971922428711e-05, "epoch": 14.472222222222221, "percentage": 72.36, "elapsed_time": "0:04:12", "remaining_time": "0:01:36", "throughput": 1568.6, "total_tokens": 396224} | |
| {"current_steps": 2610, "total_steps": 3600, "loss": 0.2158, "lr": 1.0680452634804603e-05, "epoch": 14.5, "percentage": 72.5, "elapsed_time": "0:04:13", "remaining_time": "0:01:35", "throughput": 1568.83, "total_tokens": 396960} | |
| {"current_steps": 2615, "total_steps": 3600, "loss": 0.0862, "lr": 1.0581269919327643e-05, "epoch": 14.527777777777779, "percentage": 72.64, "elapsed_time": "0:04:13", "remaining_time": "0:01:35", "throughput": 1569.13, "total_tokens": 397712} | |
| {"current_steps": 2620, "total_steps": 3600, "loss": 0.0899, "lr": 1.0482426107226507e-05, "epoch": 14.555555555555555, "percentage": 72.78, "elapsed_time": "0:04:13", "remaining_time": "0:01:34", "throughput": 1569.4, "total_tokens": 398448} | |
| {"current_steps": 2625, "total_steps": 3600, "loss": 0.1994, "lr": 1.0383923521764174e-05, "epoch": 14.583333333333334, "percentage": 72.92, "elapsed_time": "0:04:14", "remaining_time": "0:01:34", "throughput": 1569.72, "total_tokens": 399200} | |
| {"current_steps": 2630, "total_steps": 3600, "loss": 0.2465, "lr": 1.0285764478183284e-05, "epoch": 14.61111111111111, "percentage": 73.06, "elapsed_time": "0:04:14", "remaining_time": "0:01:33", "throughput": 1570.07, "total_tokens": 399952} | |
| {"current_steps": 2635, "total_steps": 3600, "loss": 0.1971, "lr": 1.0187951283651736e-05, "epoch": 14.63888888888889, "percentage": 73.19, "elapsed_time": "0:04:15", "remaining_time": "0:01:33", "throughput": 1570.35, "total_tokens": 400688} | |
| {"current_steps": 2640, "total_steps": 3600, "loss": 0.2033, "lr": 1.0090486237208463e-05, "epoch": 14.666666666666666, "percentage": 73.33, "elapsed_time": "0:04:15", "remaining_time": "0:01:32", "throughput": 1570.63, "total_tokens": 401424} | |
| {"current_steps": 2645, "total_steps": 3600, "loss": 0.0746, "lr": 9.993371629709391e-06, "epoch": 14.694444444444445, "percentage": 73.47, "elapsed_time": "0:04:16", "remaining_time": "0:01:32", "throughput": 1570.93, "total_tokens": 402176} | |
| {"current_steps": 2650, "total_steps": 3600, "loss": 0.1266, "lr": 9.89660974377359e-06, "epoch": 14.722222222222221, "percentage": 73.61, "elapsed_time": "0:04:16", "remaining_time": "0:01:31", "throughput": 1571.28, "total_tokens": 402944} | |
| {"current_steps": 2655, "total_steps": 3600, "loss": 0.142, "lr": 9.800202853729651e-06, "epoch": 14.75, "percentage": 73.75, "elapsed_time": "0:04:16", "remaining_time": "0:01:31", "throughput": 1571.57, "total_tokens": 403696} | |
| {"current_steps": 2660, "total_steps": 3600, "loss": 0.4962, "lr": 9.704153225562171e-06, "epoch": 14.777777777777779, "percentage": 73.89, "elapsed_time": "0:04:17", "remaining_time": "0:01:30", "throughput": 1571.93, "total_tokens": 404464} | |
| {"current_steps": 2665, "total_steps": 3600, "loss": 0.1042, "lr": 9.608463116858542e-06, "epoch": 14.805555555555555, "percentage": 74.03, "elapsed_time": "0:04:17", "remaining_time": "0:01:30", "throughput": 1572.14, "total_tokens": 405200} | |
| {"current_steps": 2670, "total_steps": 3600, "loss": 0.1045, "lr": 9.51313477675588e-06, "epoch": 14.833333333333334, "percentage": 74.17, "elapsed_time": "0:04:18", "remaining_time": "0:01:29", "throughput": 1572.42, "total_tokens": 405936} | |
| {"current_steps": 2675, "total_steps": 3600, "loss": 0.3351, "lr": 9.418170445888139e-06, "epoch": 14.86111111111111, "percentage": 74.31, "elapsed_time": "0:04:18", "remaining_time": "0:01:29", "throughput": 1572.81, "total_tokens": 406688} | |
| {"current_steps": 2680, "total_steps": 3600, "loss": 0.1427, "lr": 9.323572356333454e-06, "epoch": 14.88888888888889, "percentage": 74.44, "elapsed_time": "0:04:18", "remaining_time": "0:01:28", "throughput": 1573.13, "total_tokens": 407424} | |
| {"current_steps": 2685, "total_steps": 3600, "loss": 0.1361, "lr": 9.22934273156172e-06, "epoch": 14.916666666666666, "percentage": 74.58, "elapsed_time": "0:04:19", "remaining_time": "0:01:28", "throughput": 1573.44, "total_tokens": 408160} | |
| {"current_steps": 2690, "total_steps": 3600, "loss": 0.2055, "lr": 9.135483786382262e-06, "epoch": 14.944444444444445, "percentage": 74.72, "elapsed_time": "0:04:19", "remaining_time": "0:01:27", "throughput": 1573.86, "total_tokens": 408912} | |
| {"current_steps": 2695, "total_steps": 3600, "loss": 0.1449, "lr": 9.0419977268918e-06, "epoch": 14.972222222222221, "percentage": 74.86, "elapsed_time": "0:04:20", "remaining_time": "0:01:27", "throughput": 1574.46, "total_tokens": 409696} | |
| {"current_steps": 2700, "total_steps": 3600, "loss": 0.0885, "lr": 8.948886750422636e-06, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:04:20", "remaining_time": "0:01:26", "throughput": 1574.57, "total_tokens": 410448} | |
| {"current_steps": 2700, "total_steps": 3600, "eval_loss": 0.38744935393333435, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:04:21", "remaining_time": "0:01:27", "throughput": 1569.41, "total_tokens": 410448} | |
| {"current_steps": 2705, "total_steps": 3600, "loss": 0.0314, "lr": 8.856153045490948e-06, "epoch": 15.027777777777779, "percentage": 75.14, "elapsed_time": "0:04:23", "remaining_time": "0:01:27", "throughput": 1560.94, "total_tokens": 411184} | |
| {"current_steps": 2710, "total_steps": 3600, "loss": 0.0263, "lr": 8.763798791745411e-06, "epoch": 15.055555555555555, "percentage": 75.28, "elapsed_time": "0:04:23", "remaining_time": "0:01:26", "throughput": 1561.21, "total_tokens": 411936} | |
| {"current_steps": 2715, "total_steps": 3600, "loss": 0.1901, "lr": 8.671826159915907e-06, "epoch": 15.083333333333334, "percentage": 75.42, "elapsed_time": "0:04:24", "remaining_time": "0:01:26", "throughput": 1561.65, "total_tokens": 412720} | |
| {"current_steps": 2720, "total_steps": 3600, "loss": 0.2449, "lr": 8.58023731176254e-06, "epoch": 15.11111111111111, "percentage": 75.56, "elapsed_time": "0:04:24", "remaining_time": "0:01:25", "throughput": 1561.93, "total_tokens": 413472} | |
| {"current_steps": 2725, "total_steps": 3600, "loss": 0.0795, "lr": 8.489034400024812e-06, "epoch": 15.13888888888889, "percentage": 75.69, "elapsed_time": "0:04:25", "remaining_time": "0:01:25", "throughput": 1562.2, "total_tokens": 414208} | |
| {"current_steps": 2730, "total_steps": 3600, "loss": 0.1313, "lr": 8.39821956837102e-06, "epoch": 15.166666666666666, "percentage": 75.83, "elapsed_time": "0:04:25", "remaining_time": "0:01:24", "throughput": 1562.48, "total_tokens": 414944} | |
| {"current_steps": 2735, "total_steps": 3600, "loss": 0.1054, "lr": 8.3077949513479e-06, "epoch": 15.194444444444445, "percentage": 75.97, "elapsed_time": "0:04:25", "remaining_time": "0:01:24", "throughput": 1562.85, "total_tokens": 415712} | |
| {"current_steps": 2740, "total_steps": 3600, "loss": 0.1153, "lr": 8.217762674330413e-06, "epoch": 15.222222222222221, "percentage": 76.11, "elapsed_time": "0:04:26", "remaining_time": "0:01:23", "throughput": 1563.11, "total_tokens": 416448} | |
| {"current_steps": 2745, "total_steps": 3600, "loss": 0.3075, "lr": 8.128124853471814e-06, "epoch": 15.25, "percentage": 76.25, "elapsed_time": "0:04:26", "remaining_time": "0:01:23", "throughput": 1563.36, "total_tokens": 417184} | |
| {"current_steps": 2750, "total_steps": 3600, "loss": 0.1342, "lr": 8.03888359565391e-06, "epoch": 15.277777777777779, "percentage": 76.39, "elapsed_time": "0:04:27", "remaining_time": "0:01:22", "throughput": 1563.6, "total_tokens": 417920} | |
| {"current_steps": 2755, "total_steps": 3600, "loss": 0.2187, "lr": 7.950040998437542e-06, "epoch": 15.305555555555555, "percentage": 76.53, "elapsed_time": "0:04:27", "remaining_time": "0:01:22", "throughput": 1563.96, "total_tokens": 418688} | |
| {"current_steps": 2760, "total_steps": 3600, "loss": 0.0683, "lr": 7.86159915001326e-06, "epoch": 15.333333333333334, "percentage": 76.67, "elapsed_time": "0:04:28", "remaining_time": "0:01:21", "throughput": 1564.18, "total_tokens": 419424} | |
| {"current_steps": 2765, "total_steps": 3600, "loss": 0.0755, "lr": 7.7735601291523e-06, "epoch": 15.36111111111111, "percentage": 76.81, "elapsed_time": "0:04:28", "remaining_time": "0:01:21", "throughput": 1564.65, "total_tokens": 420224} | |
| {"current_steps": 2770, "total_steps": 3600, "loss": 0.0281, "lr": 7.685926005157651e-06, "epoch": 15.38888888888889, "percentage": 76.94, "elapsed_time": "0:04:28", "remaining_time": "0:01:20", "throughput": 1565.03, "total_tokens": 420992} | |
| {"current_steps": 2775, "total_steps": 3600, "loss": 0.1672, "lr": 7.598698837815449e-06, "epoch": 15.416666666666666, "percentage": 77.08, "elapsed_time": "0:04:29", "remaining_time": "0:01:20", "throughput": 1565.34, "total_tokens": 421744} | |
| {"current_steps": 2780, "total_steps": 3600, "loss": 0.3337, "lr": 7.511880677346578e-06, "epoch": 15.444444444444445, "percentage": 77.22, "elapsed_time": "0:04:29", "remaining_time": "0:01:19", "throughput": 1565.66, "total_tokens": 422496} | |
| {"current_steps": 2785, "total_steps": 3600, "loss": 0.0069, "lr": 7.4254735643584564e-06, "epoch": 15.472222222222221, "percentage": 77.36, "elapsed_time": "0:04:30", "remaining_time": "0:01:19", "throughput": 1566.02, "total_tokens": 423264} | |
| {"current_steps": 2790, "total_steps": 3600, "loss": 0.1213, "lr": 7.339479529797111e-06, "epoch": 15.5, "percentage": 77.5, "elapsed_time": "0:04:30", "remaining_time": "0:01:18", "throughput": 1566.37, "total_tokens": 424032} | |
| {"current_steps": 2795, "total_steps": 3600, "loss": 0.0621, "lr": 7.2539005948993825e-06, "epoch": 15.527777777777779, "percentage": 77.64, "elapsed_time": "0:04:31", "remaining_time": "0:01:18", "throughput": 1566.8, "total_tokens": 424816} | |
| {"current_steps": 2800, "total_steps": 3600, "loss": 0.1208, "lr": 7.168738771145464e-06, "epoch": 15.555555555555555, "percentage": 77.78, "elapsed_time": "0:04:31", "remaining_time": "0:01:17", "throughput": 1567.16, "total_tokens": 425584} | |
| {"current_steps": 2805, "total_steps": 3600, "loss": 0.1817, "lr": 7.083996060211607e-06, "epoch": 15.583333333333334, "percentage": 77.92, "elapsed_time": "0:04:31", "remaining_time": "0:01:17", "throughput": 1567.46, "total_tokens": 426336} | |
| {"current_steps": 2810, "total_steps": 3600, "loss": 0.1328, "lr": 6.9996744539230665e-06, "epoch": 15.61111111111111, "percentage": 78.06, "elapsed_time": "0:04:32", "remaining_time": "0:01:16", "throughput": 1567.89, "total_tokens": 427120} | |
| {"current_steps": 2815, "total_steps": 3600, "loss": 0.2257, "lr": 6.9157759342072995e-06, "epoch": 15.63888888888889, "percentage": 78.19, "elapsed_time": "0:04:32", "remaining_time": "0:01:16", "throughput": 1568.31, "total_tokens": 427904} | |
| {"current_steps": 2820, "total_steps": 3600, "loss": 0.0523, "lr": 6.832302473047384e-06, "epoch": 15.666666666666666, "percentage": 78.33, "elapsed_time": "0:04:33", "remaining_time": "0:01:15", "throughput": 1568.64, "total_tokens": 428672} | |
| {"current_steps": 2825, "total_steps": 3600, "loss": 0.1134, "lr": 6.7492560324356355e-06, "epoch": 15.694444444444445, "percentage": 78.47, "elapsed_time": "0:04:33", "remaining_time": "0:01:15", "throughput": 1569.04, "total_tokens": 429456} | |
| {"current_steps": 2830, "total_steps": 3600, "loss": 0.0496, "lr": 6.666638564327532e-06, "epoch": 15.722222222222221, "percentage": 78.61, "elapsed_time": "0:04:34", "remaining_time": "0:01:14", "throughput": 1569.28, "total_tokens": 430192} | |
| {"current_steps": 2835, "total_steps": 3600, "loss": 0.2287, "lr": 6.584452010595807e-06, "epoch": 15.75, "percentage": 78.75, "elapsed_time": "0:04:34", "remaining_time": "0:01:14", "throughput": 1569.58, "total_tokens": 430944} | |
| {"current_steps": 2840, "total_steps": 3600, "loss": 0.0264, "lr": 6.502698302984811e-06, "epoch": 15.777777777777779, "percentage": 78.89, "elapsed_time": "0:04:34", "remaining_time": "0:01:13", "throughput": 1569.82, "total_tokens": 431680} | |
| {"current_steps": 2845, "total_steps": 3600, "loss": 0.1039, "lr": 6.421379363065142e-06, "epoch": 15.805555555555555, "percentage": 79.03, "elapsed_time": "0:04:35", "remaining_time": "0:01:13", "throughput": 1570.28, "total_tokens": 432480} | |
| {"current_steps": 2850, "total_steps": 3600, "loss": 0.135, "lr": 6.340497102188425e-06, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "0:04:35", "remaining_time": "0:01:12", "throughput": 1570.63, "total_tokens": 433248} | |
| {"current_steps": 2855, "total_steps": 3600, "loss": 0.3508, "lr": 6.26005342144241e-06, "epoch": 15.86111111111111, "percentage": 79.31, "elapsed_time": "0:04:36", "remaining_time": "0:01:12", "throughput": 1571.0, "total_tokens": 434016} | |
| {"current_steps": 2860, "total_steps": 3600, "loss": 0.0026, "lr": 6.180050211606303e-06, "epoch": 15.88888888888889, "percentage": 79.44, "elapsed_time": "0:04:36", "remaining_time": "0:01:11", "throughput": 1571.29, "total_tokens": 434768} | |
| {"current_steps": 2865, "total_steps": 3600, "loss": 0.3825, "lr": 6.100489353106304e-06, "epoch": 15.916666666666666, "percentage": 79.58, "elapsed_time": "0:04:37", "remaining_time": "0:01:11", "throughput": 1571.53, "total_tokens": 435504} | |
| {"current_steps": 2870, "total_steps": 3600, "loss": 0.2274, "lr": 6.021372715971437e-06, "epoch": 15.944444444444445, "percentage": 79.72, "elapsed_time": "0:04:37", "remaining_time": "0:01:10", "throughput": 1571.83, "total_tokens": 436256} | |
| {"current_steps": 2875, "total_steps": 3600, "loss": 0.0297, "lr": 5.942702159789554e-06, "epoch": 15.972222222222221, "percentage": 79.86, "elapsed_time": "0:04:37", "remaining_time": "0:01:10", "throughput": 1572.28, "total_tokens": 437024} | |
| {"current_steps": 2880, "total_steps": 3600, "loss": 0.1848, "lr": 5.864479533663655e-06, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:38", "remaining_time": "0:01:09", "throughput": 1572.32, "total_tokens": 437776} | |
| {"current_steps": 2880, "total_steps": 3600, "eval_loss": 0.6435995697975159, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:04:39", "remaining_time": "0:01:09", "throughput": 1567.47, "total_tokens": 437776} | |
| {"current_steps": 2885, "total_steps": 3600, "loss": 0.0116, "lr": 5.786706676168424e-06, "epoch": 16.02777777777778, "percentage": 80.14, "elapsed_time": "0:04:41", "remaining_time": "0:01:09", "throughput": 1559.0, "total_tokens": 438576} | |
| {"current_steps": 2890, "total_steps": 3600, "loss": 0.0365, "lr": 5.709385415307006e-06, "epoch": 16.055555555555557, "percentage": 80.28, "elapsed_time": "0:04:41", "remaining_time": "0:01:09", "throughput": 1559.48, "total_tokens": 439360} | |
| {"current_steps": 2895, "total_steps": 3600, "loss": 0.1803, "lr": 5.6325175684680374e-06, "epoch": 16.083333333333332, "percentage": 80.42, "elapsed_time": "0:04:42", "remaining_time": "0:01:08", "throughput": 1559.67, "total_tokens": 440096} | |
| {"current_steps": 2900, "total_steps": 3600, "loss": 0.0186, "lr": 5.556104942382964e-06, "epoch": 16.11111111111111, "percentage": 80.56, "elapsed_time": "0:04:42", "remaining_time": "0:01:08", "throughput": 1560.0, "total_tokens": 440848} | |
| {"current_steps": 2905, "total_steps": 3600, "loss": 0.1468, "lr": 5.48014933308352e-06, "epoch": 16.13888888888889, "percentage": 80.69, "elapsed_time": "0:04:43", "remaining_time": "0:01:07", "throughput": 1560.42, "total_tokens": 441616} | |
| {"current_steps": 2910, "total_steps": 3600, "loss": 0.1638, "lr": 5.404652525859552e-06, "epoch": 16.166666666666668, "percentage": 80.83, "elapsed_time": "0:04:43", "remaining_time": "0:01:07", "throughput": 1560.7, "total_tokens": 442352} | |
| {"current_steps": 2915, "total_steps": 3600, "loss": 0.0028, "lr": 5.329616295217046e-06, "epoch": 16.194444444444443, "percentage": 80.97, "elapsed_time": "0:04:43", "remaining_time": "0:01:06", "throughput": 1560.97, "total_tokens": 443088} | |
| {"current_steps": 2920, "total_steps": 3600, "loss": 0.0059, "lr": 5.2550424048364185e-06, "epoch": 16.22222222222222, "percentage": 81.11, "elapsed_time": "0:04:44", "remaining_time": "0:01:06", "throughput": 1561.3, "total_tokens": 443856} | |
| {"current_steps": 2925, "total_steps": 3600, "loss": 0.3279, "lr": 5.180932607531056e-06, "epoch": 16.25, "percentage": 81.25, "elapsed_time": "0:04:44", "remaining_time": "0:01:05", "throughput": 1561.65, "total_tokens": 444608} | |
| {"current_steps": 2930, "total_steps": 3600, "loss": 0.0014, "lr": 5.107288645206149e-06, "epoch": 16.27777777777778, "percentage": 81.39, "elapsed_time": "0:04:45", "remaining_time": "0:01:05", "throughput": 1561.96, "total_tokens": 445344} | |
| {"current_steps": 2935, "total_steps": 3600, "loss": 0.0828, "lr": 5.034112248817685e-06, "epoch": 16.305555555555557, "percentage": 81.53, "elapsed_time": "0:04:45", "remaining_time": "0:01:04", "throughput": 1562.41, "total_tokens": 446128} | |
| {"current_steps": 2940, "total_steps": 3600, "loss": 0.0058, "lr": 4.961405138331826e-06, "epoch": 16.333333333333332, "percentage": 81.67, "elapsed_time": "0:04:45", "remaining_time": "0:01:04", "throughput": 1562.84, "total_tokens": 446912} | |
| {"current_steps": 2945, "total_steps": 3600, "loss": 0.1174, "lr": 4.88916902268445e-06, "epoch": 16.36111111111111, "percentage": 81.81, "elapsed_time": "0:04:46", "remaining_time": "0:01:03", "throughput": 1563.16, "total_tokens": 447664} | |
| {"current_steps": 2950, "total_steps": 3600, "loss": 0.0612, "lr": 4.817405599741004e-06, "epoch": 16.38888888888889, "percentage": 81.94, "elapsed_time": "0:04:46", "remaining_time": "0:01:03", "throughput": 1563.53, "total_tokens": 448416} | |
| {"current_steps": 2955, "total_steps": 3600, "loss": 0.366, "lr": 4.746116556256569e-06, "epoch": 16.416666666666668, "percentage": 82.08, "elapsed_time": "0:04:47", "remaining_time": "0:01:02", "throughput": 1563.96, "total_tokens": 449184} | |
| {"current_steps": 2960, "total_steps": 3600, "loss": 0.0248, "lr": 4.6753035678362314e-06, "epoch": 16.444444444444443, "percentage": 82.22, "elapsed_time": "0:04:47", "remaining_time": "0:01:02", "throughput": 1564.33, "total_tokens": 449936} | |
| {"current_steps": 2965, "total_steps": 3600, "loss": 0.0326, "lr": 4.604968298895703e-06, "epoch": 16.47222222222222, "percentage": 82.36, "elapsed_time": "0:04:48", "remaining_time": "0:01:01", "throughput": 1564.71, "total_tokens": 450688} | |
| {"current_steps": 2970, "total_steps": 3600, "loss": 0.0039, "lr": 4.535112402622185e-06, "epoch": 16.5, "percentage": 82.5, "elapsed_time": "0:04:48", "remaining_time": "0:01:01", "throughput": 1565.08, "total_tokens": 451440} | |
| {"current_steps": 2975, "total_steps": 3600, "loss": 0.0057, "lr": 4.465737520935517e-06, "epoch": 16.52777777777778, "percentage": 82.64, "elapsed_time": "0:04:48", "remaining_time": "0:01:00", "throughput": 1565.33, "total_tokens": 452160} | |
| {"current_steps": 2980, "total_steps": 3600, "loss": 0.002, "lr": 4.396845284449608e-06, "epoch": 16.555555555555557, "percentage": 82.78, "elapsed_time": "0:04:49", "remaining_time": "0:01:00", "throughput": 1565.81, "total_tokens": 452944} | |
| {"current_steps": 2985, "total_steps": 3600, "loss": 0.3633, "lr": 4.328437312434067e-06, "epoch": 16.583333333333332, "percentage": 82.92, "elapsed_time": "0:04:49", "remaining_time": "0:00:59", "throughput": 1566.12, "total_tokens": 453680} | |
| {"current_steps": 2990, "total_steps": 3600, "loss": 0.0115, "lr": 4.2605152127761675e-06, "epoch": 16.61111111111111, "percentage": 83.06, "elapsed_time": "0:04:50", "remaining_time": "0:00:59", "throughput": 1566.49, "total_tokens": 454432} | |
| {"current_steps": 2995, "total_steps": 3600, "loss": 0.0943, "lr": 4.19308058194306e-06, "epoch": 16.63888888888889, "percentage": 83.19, "elapsed_time": "0:04:50", "remaining_time": "0:00:58", "throughput": 1567.02, "total_tokens": 455232} | |
| {"current_steps": 3000, "total_steps": 3600, "loss": 0.0029, "lr": 4.126135004944231e-06, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:04:50", "remaining_time": "0:00:58", "throughput": 1567.38, "total_tokens": 455984} | |
| {"current_steps": 3005, "total_steps": 3600, "loss": 0.0577, "lr": 4.059680055294266e-06, "epoch": 16.694444444444443, "percentage": 83.47, "elapsed_time": "0:04:51", "remaining_time": "0:00:57", "throughput": 1567.74, "total_tokens": 456736} | |
| {"current_steps": 3010, "total_steps": 3600, "loss": 0.1053, "lr": 3.993717294975863e-06, "epoch": 16.72222222222222, "percentage": 83.61, "elapsed_time": "0:04:51", "remaining_time": "0:00:57", "throughput": 1568.17, "total_tokens": 457520} | |
| {"current_steps": 3015, "total_steps": 3600, "loss": 0.2089, "lr": 3.92824827440309e-06, "epoch": 16.75, "percentage": 83.75, "elapsed_time": "0:04:52", "remaining_time": "0:00:56", "throughput": 1568.47, "total_tokens": 458256} | |
| {"current_steps": 3020, "total_steps": 3600, "loss": 0.003, "lr": 3.863274532384981e-06, "epoch": 16.77777777777778, "percentage": 83.89, "elapsed_time": "0:04:52", "remaining_time": "0:00:56", "throughput": 1568.81, "total_tokens": 459008} | |
| {"current_steps": 3025, "total_steps": 3600, "loss": 0.0759, "lr": 3.798797596089351e-06, "epoch": 16.805555555555557, "percentage": 84.03, "elapsed_time": "0:04:53", "remaining_time": "0:00:55", "throughput": 1569.29, "total_tokens": 459808} | |
| {"current_steps": 3030, "total_steps": 3600, "loss": 0.0139, "lr": 3.73481898100691e-06, "epoch": 16.833333333333332, "percentage": 84.17, "elapsed_time": "0:04:53", "remaining_time": "0:00:55", "throughput": 1569.66, "total_tokens": 460576} | |
| {"current_steps": 3035, "total_steps": 3600, "loss": 0.0528, "lr": 3.6713401909156204e-06, "epoch": 16.86111111111111, "percentage": 84.31, "elapsed_time": "0:04:53", "remaining_time": "0:00:54", "throughput": 1570.01, "total_tokens": 461328} | |
| {"current_steps": 3040, "total_steps": 3600, "loss": 0.0237, "lr": 3.608362717845376e-06, "epoch": 16.88888888888889, "percentage": 84.44, "elapsed_time": "0:04:54", "remaining_time": "0:00:54", "throughput": 1570.41, "total_tokens": 462096} | |
| {"current_steps": 3045, "total_steps": 3600, "loss": 0.1684, "lr": 3.5458880420429135e-06, "epoch": 16.916666666666668, "percentage": 84.58, "elapsed_time": "0:04:54", "remaining_time": "0:00:53", "throughput": 1570.77, "total_tokens": 462848} | |
| {"current_steps": 3050, "total_steps": 3600, "loss": 0.0018, "lr": 3.4839176319370394e-06, "epoch": 16.944444444444443, "percentage": 84.72, "elapsed_time": "0:04:55", "remaining_time": "0:00:53", "throughput": 1571.18, "total_tokens": 463616} | |
| {"current_steps": 3055, "total_steps": 3600, "loss": 0.1967, "lr": 3.4224529441040904e-06, "epoch": 16.97222222222222, "percentage": 84.86, "elapsed_time": "0:04:55", "remaining_time": "0:00:52", "throughput": 1571.64, "total_tokens": 464384} | |
| {"current_steps": 3060, "total_steps": 3600, "loss": 0.0092, "lr": 3.3614954232337374e-06, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:55", "remaining_time": "0:00:52", "throughput": 1571.83, "total_tokens": 465168} | |
| {"current_steps": 3060, "total_steps": 3600, "eval_loss": 0.8914836645126343, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:04:56", "remaining_time": "0:00:52", "throughput": 1567.36, "total_tokens": 465168} | |
| {"current_steps": 3065, "total_steps": 3600, "loss": 0.0159, "lr": 3.3010465020949818e-06, "epoch": 17.02777777777778, "percentage": 85.14, "elapsed_time": "0:04:58", "remaining_time": "0:00:52", "throughput": 1559.84, "total_tokens": 465920} | |
| {"current_steps": 3070, "total_steps": 3600, "loss": 0.217, "lr": 3.2411076015025075e-06, "epoch": 17.055555555555557, "percentage": 85.28, "elapsed_time": "0:04:59", "remaining_time": "0:00:51", "throughput": 1560.16, "total_tokens": 466688} | |
| {"current_steps": 3075, "total_steps": 3600, "loss": 0.0058, "lr": 3.1816801302832848e-06, "epoch": 17.083333333333332, "percentage": 85.42, "elapsed_time": "0:04:59", "remaining_time": "0:00:51", "throughput": 1560.64, "total_tokens": 467488} | |
| {"current_steps": 3080, "total_steps": 3600, "loss": 0.001, "lr": 3.1227654852434454e-06, "epoch": 17.11111111111111, "percentage": 85.56, "elapsed_time": "0:04:59", "remaining_time": "0:00:50", "throughput": 1561.0, "total_tokens": 468256} | |
| {"current_steps": 3085, "total_steps": 3600, "loss": 0.0073, "lr": 3.0643650511354484e-06, "epoch": 17.13888888888889, "percentage": 85.69, "elapsed_time": "0:05:00", "remaining_time": "0:00:50", "throughput": 1561.3, "total_tokens": 469008} | |
| {"current_steps": 3090, "total_steps": 3600, "loss": 0.2167, "lr": 3.006480200625572e-06, "epoch": 17.166666666666668, "percentage": 85.83, "elapsed_time": "0:05:00", "remaining_time": "0:00:49", "throughput": 1561.79, "total_tokens": 469808} | |
| {"current_steps": 3095, "total_steps": 3600, "loss": 0.0127, "lr": 2.949112294261591e-06, "epoch": 17.194444444444443, "percentage": 85.97, "elapsed_time": "0:05:01", "remaining_time": "0:00:49", "throughput": 1562.15, "total_tokens": 470560} | |
| {"current_steps": 3100, "total_steps": 3600, "loss": 0.1617, "lr": 2.89226268044083e-06, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "0:05:01", "remaining_time": "0:00:48", "throughput": 1562.54, "total_tokens": 471328} | |
| {"current_steps": 3105, "total_steps": 3600, "loss": 0.0014, "lr": 2.8359326953784737e-06, "epoch": 17.25, "percentage": 86.25, "elapsed_time": "0:05:02", "remaining_time": "0:00:48", "throughput": 1562.82, "total_tokens": 472064} | |
| {"current_steps": 3110, "total_steps": 3600, "loss": 0.0004, "lr": 2.780123663076142e-06, "epoch": 17.27777777777778, "percentage": 86.39, "elapsed_time": "0:05:02", "remaining_time": "0:00:47", "throughput": 1563.2, "total_tokens": 472832} | |
| {"current_steps": 3115, "total_steps": 3600, "loss": 0.0079, "lr": 2.7248368952908053e-06, "epoch": 17.305555555555557, "percentage": 86.53, "elapsed_time": "0:05:02", "remaining_time": "0:00:47", "throughput": 1563.58, "total_tokens": 473600} | |
| {"current_steps": 3120, "total_steps": 3600, "loss": 0.0046, "lr": 2.670073691503902e-06, "epoch": 17.333333333333332, "percentage": 86.67, "elapsed_time": "0:05:03", "remaining_time": "0:00:46", "throughput": 1563.93, "total_tokens": 474352} | |
| {"current_steps": 3125, "total_steps": 3600, "loss": 0.0058, "lr": 2.6158353388908293e-06, "epoch": 17.36111111111111, "percentage": 86.81, "elapsed_time": "0:05:03", "remaining_time": "0:00:46", "throughput": 1564.29, "total_tokens": 475104} | |
| {"current_steps": 3130, "total_steps": 3600, "loss": 0.1262, "lr": 2.5621231122906873e-06, "epoch": 17.38888888888889, "percentage": 86.94, "elapsed_time": "0:05:04", "remaining_time": "0:00:45", "throughput": 1564.65, "total_tokens": 475856} | |
| {"current_steps": 3135, "total_steps": 3600, "loss": 0.0091, "lr": 2.5089382741762925e-06, "epoch": 17.416666666666668, "percentage": 87.08, "elapsed_time": "0:05:04", "remaining_time": "0:00:45", "throughput": 1565.11, "total_tokens": 476640} | |
| {"current_steps": 3140, "total_steps": 3600, "loss": 0.002, "lr": 2.4562820746245386e-06, "epoch": 17.444444444444443, "percentage": 87.22, "elapsed_time": "0:05:04", "remaining_time": "0:00:44", "throughput": 1565.52, "total_tokens": 477408} | |
| {"current_steps": 3145, "total_steps": 3600, "loss": 0.0016, "lr": 2.4041557512869878e-06, "epoch": 17.47222222222222, "percentage": 87.36, "elapsed_time": "0:05:05", "remaining_time": "0:00:44", "throughput": 1565.87, "total_tokens": 478160} | |
| {"current_steps": 3150, "total_steps": 3600, "loss": 0.0007, "lr": 2.3525605293607784e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:05:05", "remaining_time": "0:00:43", "throughput": 1566.27, "total_tokens": 478928} | |
| {"current_steps": 3155, "total_steps": 3600, "loss": 0.016, "lr": 2.3014976215598503e-06, "epoch": 17.52777777777778, "percentage": 87.64, "elapsed_time": "0:05:06", "remaining_time": "0:00:43", "throughput": 1566.67, "total_tokens": 479696} | |
| {"current_steps": 3160, "total_steps": 3600, "loss": 0.0076, "lr": 2.2509682280864224e-06, "epoch": 17.555555555555557, "percentage": 87.78, "elapsed_time": "0:05:06", "remaining_time": "0:00:42", "throughput": 1567.06, "total_tokens": 480464} | |
| {"current_steps": 3165, "total_steps": 3600, "loss": 0.1149, "lr": 2.2009735366027795e-06, "epoch": 17.583333333333332, "percentage": 87.92, "elapsed_time": "0:05:07", "remaining_time": "0:00:42", "throughput": 1567.46, "total_tokens": 481232} | |
| {"current_steps": 3170, "total_steps": 3600, "loss": 0.097, "lr": 2.151514722203385e-06, "epoch": 17.61111111111111, "percentage": 88.06, "elapsed_time": "0:05:07", "remaining_time": "0:00:41", "throughput": 1567.81, "total_tokens": 481984} | |
| {"current_steps": 3175, "total_steps": 3600, "loss": 0.0034, "lr": 2.1025929473872274e-06, "epoch": 17.63888888888889, "percentage": 88.19, "elapsed_time": "0:05:07", "remaining_time": "0:00:41", "throughput": 1568.26, "total_tokens": 482768} | |
| {"current_steps": 3180, "total_steps": 3600, "loss": 0.0002, "lr": 2.0542093620305042e-06, "epoch": 17.666666666666668, "percentage": 88.33, "elapsed_time": "0:05:08", "remaining_time": "0:00:40", "throughput": 1568.66, "total_tokens": 483536} | |
| {"current_steps": 3185, "total_steps": 3600, "loss": 0.0124, "lr": 2.0063651033596143e-06, "epoch": 17.694444444444443, "percentage": 88.47, "elapsed_time": "0:05:08", "remaining_time": "0:00:40", "throughput": 1569.12, "total_tokens": 484320} | |
| {"current_steps": 3190, "total_steps": 3600, "loss": 0.0128, "lr": 1.9590612959244055e-06, "epoch": 17.72222222222222, "percentage": 88.61, "elapsed_time": "0:05:09", "remaining_time": "0:00:39", "throughput": 1569.4, "total_tokens": 485056} | |
| {"current_steps": 3195, "total_steps": 3600, "loss": 0.0008, "lr": 1.912299051571764e-06, "epoch": 17.75, "percentage": 88.75, "elapsed_time": "0:05:09", "remaining_time": "0:00:39", "throughput": 1569.74, "total_tokens": 485808} | |
| {"current_steps": 3200, "total_steps": 3600, "loss": 0.0161, "lr": 1.8660794694194573e-06, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "0:05:09", "remaining_time": "0:00:38", "throughput": 1570.06, "total_tokens": 486560} | |
| {"current_steps": 3205, "total_steps": 3600, "loss": 0.1385, "lr": 1.8204036358303173e-06, "epoch": 17.805555555555557, "percentage": 89.03, "elapsed_time": "0:05:10", "remaining_time": "0:00:38", "throughput": 1570.38, "total_tokens": 487312} | |
| {"current_steps": 3210, "total_steps": 3600, "loss": 0.0007, "lr": 1.775272624386695e-06, "epoch": 17.833333333333332, "percentage": 89.17, "elapsed_time": "0:05:10", "remaining_time": "0:00:37", "throughput": 1570.73, "total_tokens": 488080} | |
| {"current_steps": 3215, "total_steps": 3600, "loss": 0.0409, "lr": 1.7306874958652408e-06, "epoch": 17.86111111111111, "percentage": 89.31, "elapsed_time": "0:05:11", "remaining_time": "0:00:37", "throughput": 1571.0, "total_tokens": 488832} | |
| {"current_steps": 3220, "total_steps": 3600, "loss": 0.0264, "lr": 1.686649298211951e-06, "epoch": 17.88888888888889, "percentage": 89.44, "elapsed_time": "0:05:11", "remaining_time": "0:00:36", "throughput": 1571.34, "total_tokens": 489600} | |
| {"current_steps": 3225, "total_steps": 3600, "loss": 0.0869, "lr": 1.643159066517566e-06, "epoch": 17.916666666666668, "percentage": 89.58, "elapsed_time": "0:05:11", "remaining_time": "0:00:36", "throughput": 1571.62, "total_tokens": 490336} | |
| {"current_steps": 3230, "total_steps": 3600, "loss": 0.003, "lr": 1.6002178229932107e-06, "epoch": 17.944444444444443, "percentage": 89.72, "elapsed_time": "0:05:12", "remaining_time": "0:00:35", "throughput": 1571.85, "total_tokens": 491056} | |
| {"current_steps": 3235, "total_steps": 3600, "loss": 0.1735, "lr": 1.5578265769463806e-06, "epoch": 17.97222222222222, "percentage": 89.86, "elapsed_time": "0:05:12", "remaining_time": "0:00:35", "throughput": 1572.19, "total_tokens": 491792} | |
| {"current_steps": 3240, "total_steps": 3600, "loss": 0.0543, "lr": 1.5159863247572236e-06, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:05:13", "remaining_time": "0:00:34", "throughput": 1572.3, "total_tokens": 492560} | |
| {"current_steps": 3240, "total_steps": 3600, "eval_loss": 1.0005823373794556, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:05:14", "remaining_time": "0:00:34", "throughput": 1568.06, "total_tokens": 492560} | |
| {"current_steps": 3245, "total_steps": 3600, "loss": 0.0001, "lr": 1.4746980498551112e-06, "epoch": 18.02777777777778, "percentage": 90.14, "elapsed_time": "0:05:15", "remaining_time": "0:00:34", "throughput": 1561.45, "total_tokens": 493280} | |
| {"current_steps": 3250, "total_steps": 3600, "loss": 0.0002, "lr": 1.4339627226955392e-06, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "0:05:16", "remaining_time": "0:00:34", "throughput": 1561.82, "total_tokens": 494048} | |
| {"current_steps": 3255, "total_steps": 3600, "loss": 0.0007, "lr": 1.3937813007373013e-06, "epoch": 18.083333333333332, "percentage": 90.42, "elapsed_time": "0:05:16", "remaining_time": "0:00:33", "throughput": 1562.08, "total_tokens": 494784} | |
| {"current_steps": 3260, "total_steps": 3600, "loss": 0.1658, "lr": 1.354154728419979e-06, "epoch": 18.11111111111111, "percentage": 90.56, "elapsed_time": "0:05:17", "remaining_time": "0:00:33", "throughput": 1562.46, "total_tokens": 495552} | |
| {"current_steps": 3265, "total_steps": 3600, "loss": 0.0021, "lr": 1.31508393714177e-06, "epoch": 18.13888888888889, "percentage": 90.69, "elapsed_time": "0:05:17", "remaining_time": "0:00:32", "throughput": 1562.82, "total_tokens": 496320} | |
| {"current_steps": 3270, "total_steps": 3600, "loss": 0.0193, "lr": 1.276569845237574e-06, "epoch": 18.166666666666668, "percentage": 90.83, "elapsed_time": "0:05:17", "remaining_time": "0:00:32", "throughput": 1563.14, "total_tokens": 497072} | |
| {"current_steps": 3275, "total_steps": 3600, "loss": 0.0078, "lr": 1.2386133579574189e-06, "epoch": 18.194444444444443, "percentage": 90.97, "elapsed_time": "0:05:18", "remaining_time": "0:00:31", "throughput": 1563.4, "total_tokens": 497808} | |
| {"current_steps": 3280, "total_steps": 3600, "loss": 0.0005, "lr": 1.2012153674451715e-06, "epoch": 18.22222222222222, "percentage": 91.11, "elapsed_time": "0:05:18", "remaining_time": "0:00:31", "throughput": 1563.62, "total_tokens": 498528} | |
| {"current_steps": 3285, "total_steps": 3600, "loss": 0.0034, "lr": 1.1643767527175857e-06, "epoch": 18.25, "percentage": 91.25, "elapsed_time": "0:05:19", "remaining_time": "0:00:30", "throughput": 1563.99, "total_tokens": 499296} | |
| {"current_steps": 3290, "total_steps": 3600, "loss": 0.0233, "lr": 1.1280983796436245e-06, "epoch": 18.27777777777778, "percentage": 91.39, "elapsed_time": "0:05:19", "remaining_time": "0:00:30", "throughput": 1564.39, "total_tokens": 500064} | |
| {"current_steps": 3295, "total_steps": 3600, "loss": 0.0002, "lr": 1.0923811009241142e-06, "epoch": 18.305555555555557, "percentage": 91.53, "elapsed_time": "0:05:20", "remaining_time": "0:00:29", "throughput": 1564.82, "total_tokens": 500848} | |
| {"current_steps": 3300, "total_steps": 3600, "loss": 0.0861, "lr": 1.0572257560717086e-06, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:05:20", "remaining_time": "0:00:29", "throughput": 1565.11, "total_tokens": 501600} | |
| {"current_steps": 3305, "total_steps": 3600, "loss": 0.0012, "lr": 1.0226331713911546e-06, "epoch": 18.36111111111111, "percentage": 91.81, "elapsed_time": "0:05:20", "remaining_time": "0:00:28", "throughput": 1565.42, "total_tokens": 502352} | |
| {"current_steps": 3310, "total_steps": 3600, "loss": 0.0049, "lr": 9.886041599598606e-07, "epoch": 18.38888888888889, "percentage": 91.94, "elapsed_time": "0:05:21", "remaining_time": "0:00:28", "throughput": 1565.77, "total_tokens": 503120} | |
| {"current_steps": 3315, "total_steps": 3600, "loss": 0.0011, "lr": 9.551395216087944e-07, "epoch": 18.416666666666668, "percentage": 92.08, "elapsed_time": "0:05:21", "remaining_time": "0:00:27", "throughput": 1566.09, "total_tokens": 503872} | |
| {"current_steps": 3320, "total_steps": 3600, "loss": 0.0504, "lr": 9.222400429036854e-07, "epoch": 18.444444444444443, "percentage": 92.22, "elapsed_time": "0:05:22", "remaining_time": "0:00:27", "throughput": 1566.39, "total_tokens": 504624} | |
| {"current_steps": 3325, "total_steps": 3600, "loss": 0.0048, "lr": 8.899064971265276e-07, "epoch": 18.47222222222222, "percentage": 92.36, "elapsed_time": "0:05:22", "remaining_time": "0:00:26", "throughput": 1566.74, "total_tokens": 505392} | |
| {"current_steps": 3330, "total_steps": 3600, "loss": 0.1322, "lr": 8.581396442574135e-07, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:05:22", "remaining_time": "0:00:26", "throughput": 1567.09, "total_tokens": 506160} | |
| {"current_steps": 3335, "total_steps": 3600, "loss": 0.0058, "lr": 8.269402309566743e-07, "epoch": 18.52777777777778, "percentage": 92.64, "elapsed_time": "0:05:23", "remaining_time": "0:00:25", "throughput": 1567.27, "total_tokens": 506880} | |
| {"current_steps": 3340, "total_steps": 3600, "loss": 0.0184, "lr": 7.963089905473092e-07, "epoch": 18.555555555555557, "percentage": 92.78, "elapsed_time": "0:05:23", "remaining_time": "0:00:25", "throughput": 1567.54, "total_tokens": 507616} | |
| {"current_steps": 3345, "total_steps": 3600, "loss": 0.0009, "lr": 7.662466429977699e-07, "epoch": 18.583333333333332, "percentage": 92.92, "elapsed_time": "0:05:24", "remaining_time": "0:00:24", "throughput": 1567.87, "total_tokens": 508368} | |
| {"current_steps": 3350, "total_steps": 3600, "loss": 0.0825, "lr": 7.367538949050345e-07, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "0:05:24", "remaining_time": "0:00:24", "throughput": 1568.3, "total_tokens": 509152} | |
| {"current_steps": 3355, "total_steps": 3600, "loss": 0.01, "lr": 7.078314394779961e-07, "epoch": 18.63888888888889, "percentage": 93.19, "elapsed_time": "0:05:25", "remaining_time": "0:00:23", "throughput": 1568.73, "total_tokens": 509936} | |
| {"current_steps": 3360, "total_steps": 3600, "loss": 0.0002, "lr": 6.794799565211646e-07, "epoch": 18.666666666666668, "percentage": 93.33, "elapsed_time": "0:05:25", "remaining_time": "0:00:23", "throughput": 1569.06, "total_tokens": 510688} | |
| {"current_steps": 3365, "total_steps": 3600, "loss": 0.0017, "lr": 6.517001124186989e-07, "epoch": 18.694444444444443, "percentage": 93.47, "elapsed_time": "0:05:25", "remaining_time": "0:00:22", "throughput": 1569.3, "total_tokens": 511440} | |
| {"current_steps": 3370, "total_steps": 3600, "loss": 0.1572, "lr": 6.244925601187363e-07, "epoch": 18.72222222222222, "percentage": 93.61, "elapsed_time": "0:05:26", "remaining_time": "0:00:22", "throughput": 1569.55, "total_tokens": 512192} | |
| {"current_steps": 3375, "total_steps": 3600, "loss": 0.0347, "lr": 5.978579391180461e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "0:05:26", "remaining_time": "0:00:21", "throughput": 1569.86, "total_tokens": 512960} | |
| {"current_steps": 3380, "total_steps": 3600, "loss": 0.0024, "lr": 5.717968754469977e-07, "epoch": 18.77777777777778, "percentage": 93.89, "elapsed_time": "0:05:27", "remaining_time": "0:00:21", "throughput": 1570.17, "total_tokens": 513712} | |
| {"current_steps": 3385, "total_steps": 3600, "loss": 0.0162, "lr": 5.463099816548579e-07, "epoch": 18.805555555555557, "percentage": 94.03, "elapsed_time": "0:05:27", "remaining_time": "0:00:20", "throughput": 1570.55, "total_tokens": 514496} | |
| {"current_steps": 3390, "total_steps": 3600, "loss": 0.0261, "lr": 5.213978567953775e-07, "epoch": 18.833333333333332, "percentage": 94.17, "elapsed_time": "0:05:28", "remaining_time": "0:00:20", "throughput": 1571.01, "total_tokens": 515296} | |
| {"current_steps": 3395, "total_steps": 3600, "loss": 0.0021, "lr": 4.970610864127173e-07, "epoch": 18.86111111111111, "percentage": 94.31, "elapsed_time": "0:05:28", "remaining_time": "0:00:19", "throughput": 1571.24, "total_tokens": 516032} | |
| {"current_steps": 3400, "total_steps": 3600, "loss": 0.0003, "lr": 4.7330024252768555e-07, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "0:05:28", "remaining_time": "0:00:19", "throughput": 1571.63, "total_tokens": 516816} | |
| {"current_steps": 3405, "total_steps": 3600, "loss": 0.0022, "lr": 4.5011588362429134e-07, "epoch": 18.916666666666668, "percentage": 94.58, "elapsed_time": "0:05:29", "remaining_time": "0:00:18", "throughput": 1571.98, "total_tokens": 517584} | |
| {"current_steps": 3410, "total_steps": 3600, "loss": 0.0024, "lr": 4.2750855463662143e-07, "epoch": 18.944444444444443, "percentage": 94.72, "elapsed_time": "0:05:29", "remaining_time": "0:00:18", "throughput": 1572.23, "total_tokens": 518336} | |
| {"current_steps": 3415, "total_steps": 3600, "loss": 0.0029, "lr": 4.05478786936031e-07, "epoch": 18.97222222222222, "percentage": 94.86, "elapsed_time": "0:05:30", "remaining_time": "0:00:17", "throughput": 1572.57, "total_tokens": 519088} | |
| {"current_steps": 3420, "total_steps": 3600, "loss": 0.1114, "lr": 3.8402709831865113e-07, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:05:30", "remaining_time": "0:00:17", "throughput": 1572.62, "total_tokens": 519840} | |
| {"current_steps": 3420, "total_steps": 3600, "eval_loss": 1.036169409751892, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:05:31", "remaining_time": "0:00:17", "throughput": 1568.56, "total_tokens": 519840} | |
| {"current_steps": 3425, "total_steps": 3600, "loss": 0.0218, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:33", "remaining_time": "0:00:17", "throughput": 1562.54, "total_tokens": 520624} | |
| {"current_steps": 3430, "total_steps": 3600, "loss": 0.0019, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:33", "remaining_time": "0:00:16", "throughput": 1562.83, "total_tokens": 521392} | |
| {"current_steps": 3435, "total_steps": 3600, "loss": 0.0078, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:05:34", "remaining_time": "0:00:16", "throughput": 1563.11, "total_tokens": 522144} | |
| {"current_steps": 3440, "total_steps": 3600, "loss": 0.0194, "lr": 3.040110147984221e-07, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:05:34", "remaining_time": "0:00:15", "throughput": 1563.36, "total_tokens": 522896} | |
| {"current_steps": 3445, "total_steps": 3600, "loss": 0.0008, "lr": 2.8545701257221e-07, "epoch": 19.13888888888889, "percentage": 95.69, "elapsed_time": "0:05:34", "remaining_time": "0:00:15", "throughput": 1563.59, "total_tokens": 523632} | |
| {"current_steps": 3450, "total_steps": 3600, "loss": 0.0007, "lr": 2.674839104671367e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:05:35", "remaining_time": "0:00:14", "throughput": 1563.92, "total_tokens": 524400} | |
| {"current_steps": 3455, "total_steps": 3600, "loss": 0.0131, "lr": 2.5009213092991034e-07, "epoch": 19.194444444444443, "percentage": 95.97, "elapsed_time": "0:05:35", "remaining_time": "0:00:14", "throughput": 1564.33, "total_tokens": 525200} | |
| {"current_steps": 3460, "total_steps": 3600, "loss": 0.0003, "lr": 2.3328208274359942e-07, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:05:36", "remaining_time": "0:00:13", "throughput": 1564.65, "total_tokens": 525952} | |
| {"current_steps": 3465, "total_steps": 3600, "loss": 0.0003, "lr": 2.170541610180432e-07, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:05:36", "remaining_time": "0:00:13", "throughput": 1564.94, "total_tokens": 526704} | |
| {"current_steps": 3470, "total_steps": 3600, "loss": 0.0009, "lr": 2.014087471805509e-07, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:05:36", "remaining_time": "0:00:12", "throughput": 1565.16, "total_tokens": 527440} | |
| {"current_steps": 3475, "total_steps": 3600, "loss": 0.0005, "lr": 1.8634620896695043e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "0:05:37", "remaining_time": "0:00:12", "throughput": 1565.5, "total_tokens": 528208} | |
| {"current_steps": 3480, "total_steps": 3600, "loss": 0.0009, "lr": 1.7186690041292586e-07, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:05:37", "remaining_time": "0:00:11", "throughput": 1565.93, "total_tokens": 529008} | |
| {"current_steps": 3485, "total_steps": 3600, "loss": 0.0011, "lr": 1.5797116184571304e-07, "epoch": 19.36111111111111, "percentage": 96.81, "elapsed_time": "0:05:38", "remaining_time": "0:00:11", "throughput": 1566.24, "total_tokens": 529760} | |
| {"current_steps": 3490, "total_steps": 3600, "loss": 0.0019, "lr": 1.4465931987609482e-07, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:05:38", "remaining_time": "0:00:10", "throughput": 1566.45, "total_tokens": 530480} | |
| {"current_steps": 3495, "total_steps": 3600, "loss": 0.0017, "lr": 1.319316873907267e-07, "epoch": 19.416666666666668, "percentage": 97.08, "elapsed_time": "0:05:39", "remaining_time": "0:00:10", "throughput": 1566.67, "total_tokens": 531216} | |
| {"current_steps": 3500, "total_steps": 3600, "loss": 0.0654, "lr": 1.1978856354477595e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:05:39", "remaining_time": "0:00:09", "throughput": 1566.96, "total_tokens": 531968} | |
| {"current_steps": 3505, "total_steps": 3600, "loss": 0.0026, "lr": 1.0823023375489127e-07, "epoch": 19.47222222222222, "percentage": 97.36, "elapsed_time": "0:05:39", "remaining_time": "0:00:09", "throughput": 1567.31, "total_tokens": 532736} | |
| {"current_steps": 3510, "total_steps": 3600, "loss": 0.0137, "lr": 9.725696969249965e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:05:40", "remaining_time": "0:00:08", "throughput": 1567.59, "total_tokens": 533504} | |
| {"current_steps": 3515, "total_steps": 3600, "loss": 0.0004, "lr": 8.686902927741991e-08, "epoch": 19.52777777777778, "percentage": 97.64, "elapsed_time": "0:05:40", "remaining_time": "0:00:08", "throughput": 1567.83, "total_tokens": 534256} | |
| {"current_steps": 3520, "total_steps": 3600, "loss": 0.0004, "lr": 7.706665667180091e-08, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:05:41", "remaining_time": "0:00:07", "throughput": 1568.06, "total_tokens": 535008} | |
| {"current_steps": 3525, "total_steps": 3600, "loss": 0.0438, "lr": 6.785008227437329e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "0:05:41", "remaining_time": "0:00:07", "throughput": 1568.46, "total_tokens": 535792} | |
| {"current_steps": 3530, "total_steps": 3600, "loss": 0.0086, "lr": 5.921952271504827e-08, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:05:42", "remaining_time": "0:00:06", "throughput": 1568.79, "total_tokens": 536560} | |
| {"current_steps": 3535, "total_steps": 3600, "loss": 0.0005, "lr": 5.117518084981621e-08, "epoch": 19.63888888888889, "percentage": 98.19, "elapsed_time": "0:05:42", "remaining_time": "0:00:06", "throughput": 1569.11, "total_tokens": 537328} | |
| {"current_steps": 3540, "total_steps": 3600, "loss": 0.0002, "lr": 4.371724575597535e-08, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:05:42", "remaining_time": "0:00:05", "throughput": 1569.33, "total_tokens": 538064} | |
| {"current_steps": 3545, "total_steps": 3600, "loss": 0.1792, "lr": 3.684589272771044e-08, "epoch": 19.694444444444443, "percentage": 98.47, "elapsed_time": "0:05:43", "remaining_time": "0:00:05", "throughput": 1569.71, "total_tokens": 538848} | |
| {"current_steps": 3550, "total_steps": 3600, "loss": 0.0761, "lr": 3.056128327193486e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:05:43", "remaining_time": "0:00:04", "throughput": 1570.03, "total_tokens": 539616} | |
| {"current_steps": 3555, "total_steps": 3600, "loss": 0.001, "lr": 2.486356510453258e-08, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:05:44", "remaining_time": "0:00:04", "throughput": 1570.29, "total_tokens": 540352} | |
| {"current_steps": 3560, "total_steps": 3600, "loss": 0.0004, "lr": 1.975287214685817e-08, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:05:44", "remaining_time": "0:00:03", "throughput": 1570.57, "total_tokens": 541104} | |
| {"current_steps": 3565, "total_steps": 3600, "loss": 0.0166, "lr": 1.522932452260595e-08, "epoch": 19.805555555555557, "percentage": 99.03, "elapsed_time": "0:05:44", "remaining_time": "0:00:03", "throughput": 1570.83, "total_tokens": 541856} | |
| {"current_steps": 3570, "total_steps": 3600, "loss": 0.001, "lr": 1.1293028554978935e-08, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:05:45", "remaining_time": "0:00:02", "throughput": 1571.18, "total_tokens": 542640} | |
| {"current_steps": 3575, "total_steps": 3600, "loss": 0.0002, "lr": 7.944076764190845e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "0:05:45", "remaining_time": "0:00:02", "throughput": 1571.46, "total_tokens": 543392} | |
| {"current_steps": 3580, "total_steps": 3600, "loss": 0.0512, "lr": 5.182547865290044e-09, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:05:46", "remaining_time": "0:00:01", "throughput": 1571.66, "total_tokens": 544128} | |
| {"current_steps": 3585, "total_steps": 3600, "loss": 0.155, "lr": 3.008506766313812e-09, "epoch": 19.916666666666668, "percentage": 99.58, "elapsed_time": "0:05:46", "remaining_time": "0:00:01", "throughput": 1571.92, "total_tokens": 544880} | |
| {"current_steps": 3590, "total_steps": 3600, "loss": 0.0003, "lr": 1.4220045667645566e-09, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.09, "total_tokens": 545600} | |
| {"current_steps": 3595, "total_steps": 3600, "loss": 0.006, "lr": 4.2307855639411865e-10, "epoch": 19.97222222222222, "percentage": 99.86, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.47, "total_tokens": 546384} | |
| {"current_steps": 3600, "total_steps": 3600, "loss": 0.0002, "lr": 1.1752214348903501e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.5, "total_tokens": 547136} | |
| {"current_steps": 3600, "total_steps": 3600, "eval_loss": 1.036588430404663, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:48", "remaining_time": "0:00:00", "throughput": 1568.57, "total_tokens": 547136} | |
| {"current_steps": 3600, "total_steps": 3600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:50", "remaining_time": "0:00:00", "throughput": 1562.85, "total_tokens": 547136} | |