| {"current_steps": 5, "total_steps": 3330, "loss": 10.1271, "lr": 6.006006006006006e-06, "epoch": 0.04504504504504504, "percentage": 0.15, "elapsed_time": "0:00:01", "remaining_time": "0:14:21", "throughput": 1482.26, "total_tokens": 1920} | |
| {"current_steps": 10, "total_steps": 3330, "loss": 5.278, "lr": 1.3513513513513515e-05, "epoch": 0.09009009009009009, "percentage": 0.3, "elapsed_time": "0:00:01", "remaining_time": "0:09:52", "throughput": 2490.55, "total_tokens": 4448} | |
| {"current_steps": 15, "total_steps": 3330, "loss": 1.8516, "lr": 2.102102102102102e-05, "epoch": 0.13513513513513514, "percentage": 0.45, "elapsed_time": "0:00:02", "remaining_time": "0:08:18", "throughput": 2906.39, "total_tokens": 6560} | |
| {"current_steps": 20, "total_steps": 3330, "loss": 0.5796, "lr": 2.8528528528528528e-05, "epoch": 0.18018018018018017, "percentage": 0.6, "elapsed_time": "0:00:02", "remaining_time": "0:07:25", "throughput": 3103.8, "total_tokens": 8352} | |
| {"current_steps": 25, "total_steps": 3330, "loss": 0.4379, "lr": 3.603603603603604e-05, "epoch": 0.22522522522522523, "percentage": 0.75, "elapsed_time": "0:00:03", "remaining_time": "0:06:53", "throughput": 3335.55, "total_tokens": 10432} | |
| {"current_steps": 30, "total_steps": 3330, "loss": 0.4127, "lr": 4.354354354354354e-05, "epoch": 0.2702702702702703, "percentage": 0.9, "elapsed_time": "0:00:03", "remaining_time": "0:06:37", "throughput": 3388.27, "total_tokens": 12256} | |
| {"current_steps": 35, "total_steps": 3330, "loss": 0.4075, "lr": 5.105105105105105e-05, "epoch": 0.3153153153153153, "percentage": 1.05, "elapsed_time": "0:00:04", "remaining_time": "0:06:30", "throughput": 3453.62, "total_tokens": 14336} | |
| {"current_steps": 40, "total_steps": 3330, "loss": 0.4046, "lr": 5.855855855855856e-05, "epoch": 0.36036036036036034, "percentage": 1.2, "elapsed_time": "0:00:04", "remaining_time": "0:06:20", "throughput": 3542.79, "total_tokens": 16384} | |
| {"current_steps": 45, "total_steps": 3330, "loss": 0.4308, "lr": 6.606606606606606e-05, "epoch": 0.40540540540540543, "percentage": 1.35, "elapsed_time": "0:00:05", "remaining_time": "0:06:11", "throughput": 3604.47, "total_tokens": 18368} | |
| {"current_steps": 50, "total_steps": 3330, "loss": 0.548, "lr": 7.357357357357357e-05, "epoch": 0.45045045045045046, "percentage": 1.5, "elapsed_time": "0:00:05", "remaining_time": "0:06:06", "throughput": 3598.59, "total_tokens": 20128} | |
| {"current_steps": 55, "total_steps": 3330, "loss": 0.5391, "lr": 8.108108108108109e-05, "epoch": 0.4954954954954955, "percentage": 1.65, "elapsed_time": "0:00:06", "remaining_time": "0:06:00", "throughput": 3633.3, "total_tokens": 21984} | |
| {"current_steps": 60, "total_steps": 3330, "loss": 0.337, "lr": 8.85885885885886e-05, "epoch": 0.5405405405405406, "percentage": 1.8, "elapsed_time": "0:00:06", "remaining_time": "0:05:53", "throughput": 3676.48, "total_tokens": 23840} | |
| {"current_steps": 65, "total_steps": 3330, "loss": 0.4438, "lr": 9.60960960960961e-05, "epoch": 0.5855855855855856, "percentage": 1.95, "elapsed_time": "0:00:07", "remaining_time": "0:05:51", "throughput": 3709.1, "total_tokens": 25984} | |
| {"current_steps": 70, "total_steps": 3330, "loss": 0.4852, "lr": 0.0001036036036036036, "epoch": 0.6306306306306306, "percentage": 2.1, "elapsed_time": "0:00:07", "remaining_time": "0:05:49", "throughput": 3712.46, "total_tokens": 27872} | |
| {"current_steps": 75, "total_steps": 3330, "loss": 0.4096, "lr": 0.0001111111111111111, "epoch": 0.6756756756756757, "percentage": 2.25, "elapsed_time": "0:00:07", "remaining_time": "0:05:45", "throughput": 3751.47, "total_tokens": 29824} | |
| {"current_steps": 80, "total_steps": 3330, "loss": 0.2499, "lr": 0.00011861861861861862, "epoch": 0.7207207207207207, "percentage": 2.4, "elapsed_time": "0:00:08", "remaining_time": "0:05:41", "throughput": 3772.67, "total_tokens": 31680} | |
| {"current_steps": 85, "total_steps": 3330, "loss": 0.5662, "lr": 0.00012612612612612612, "epoch": 0.7657657657657657, "percentage": 2.55, "elapsed_time": "0:00:08", "remaining_time": "0:05:37", "throughput": 3802.52, "total_tokens": 33632} | |
| {"current_steps": 90, "total_steps": 3330, "loss": 0.4575, "lr": 0.00013363363363363365, "epoch": 0.8108108108108109, "percentage": 2.7, "elapsed_time": "0:00:09", "remaining_time": "0:05:33", "throughput": 3832.82, "total_tokens": 35520} | |
| {"current_steps": 95, "total_steps": 3330, "loss": 0.42, "lr": 0.00014114114114114116, "epoch": 0.8558558558558559, "percentage": 2.85, "elapsed_time": "0:00:09", "remaining_time": "0:05:30", "throughput": 3892.6, "total_tokens": 37824} | |
| {"current_steps": 100, "total_steps": 3330, "loss": 0.7992, "lr": 0.00014864864864864866, "epoch": 0.9009009009009009, "percentage": 3.0, "elapsed_time": "0:00:10", "remaining_time": "0:05:27", "throughput": 3906.27, "total_tokens": 39616} | |
| {"current_steps": 105, "total_steps": 3330, "loss": 0.4349, "lr": 0.00015615615615615616, "epoch": 0.9459459459459459, "percentage": 3.15, "elapsed_time": "0:00:10", "remaining_time": "0:05:24", "throughput": 3922.05, "total_tokens": 41440} | |
| {"current_steps": 110, "total_steps": 3330, "loss": 0.4096, "lr": 0.00016366366366366367, "epoch": 0.990990990990991, "percentage": 3.3, "elapsed_time": "0:00:10", "remaining_time": "0:05:21", "throughput": 3931.89, "total_tokens": 43232} | |
| {"current_steps": 115, "total_steps": 3330, "loss": 1.7942, "lr": 0.00017117117117117117, "epoch": 1.0360360360360361, "percentage": 3.45, "elapsed_time": "0:00:11", "remaining_time": "0:05:27", "throughput": 3876.22, "total_tokens": 45440} | |
| {"current_steps": 120, "total_steps": 3330, "loss": 0.3969, "lr": 0.00017867867867867867, "epoch": 1.0810810810810811, "percentage": 3.6, "elapsed_time": "0:00:12", "remaining_time": "0:05:26", "throughput": 3891.7, "total_tokens": 47456} | |
| {"current_steps": 125, "total_steps": 3330, "loss": 0.3697, "lr": 0.00018618618618618617, "epoch": 1.1261261261261262, "percentage": 3.75, "elapsed_time": "0:00:12", "remaining_time": "0:05:24", "throughput": 3903.39, "total_tokens": 49440} | |
| {"current_steps": 130, "total_steps": 3330, "loss": 0.3804, "lr": 0.00019369369369369368, "epoch": 1.1711711711711712, "percentage": 3.9, "elapsed_time": "0:00:13", "remaining_time": "0:05:22", "throughput": 3915.02, "total_tokens": 51264} | |
| {"current_steps": 135, "total_steps": 3330, "loss": 0.3599, "lr": 0.0002012012012012012, "epoch": 1.2162162162162162, "percentage": 4.05, "elapsed_time": "0:00:13", "remaining_time": "0:05:20", "throughput": 3940.09, "total_tokens": 53408} | |
| {"current_steps": 140, "total_steps": 3330, "loss": 0.3894, "lr": 0.0002087087087087087, "epoch": 1.2612612612612613, "percentage": 4.2, "elapsed_time": "0:00:14", "remaining_time": "0:05:19", "throughput": 3933.41, "total_tokens": 55104} | |
| {"current_steps": 145, "total_steps": 3330, "loss": 0.5272, "lr": 0.00021621621621621624, "epoch": 1.3063063063063063, "percentage": 4.35, "elapsed_time": "0:00:14", "remaining_time": "0:05:17", "throughput": 3961.68, "total_tokens": 57280} | |
| {"current_steps": 150, "total_steps": 3330, "loss": 0.5666, "lr": 0.00022372372372372374, "epoch": 1.3513513513513513, "percentage": 4.5, "elapsed_time": "0:00:14", "remaining_time": "0:05:16", "throughput": 3964.25, "total_tokens": 59264} | |
| {"current_steps": 155, "total_steps": 3330, "loss": 0.3785, "lr": 0.00023123123123123125, "epoch": 1.3963963963963963, "percentage": 4.65, "elapsed_time": "0:00:15", "remaining_time": "0:05:15", "throughput": 3954.15, "total_tokens": 60992} | |
| {"current_steps": 160, "total_steps": 3330, "loss": 0.36, "lr": 0.00023873873873873875, "epoch": 1.4414414414414414, "percentage": 4.8, "elapsed_time": "0:00:15", "remaining_time": "0:05:14", "throughput": 3965.21, "total_tokens": 62944} | |
| {"current_steps": 165, "total_steps": 3330, "loss": 0.414, "lr": 0.00024624624624624625, "epoch": 1.4864864864864864, "percentage": 4.95, "elapsed_time": "0:00:16", "remaining_time": "0:05:13", "throughput": 3986.21, "total_tokens": 65152} | |
| {"current_steps": 167, "total_steps": 3330, "eval_loss": 0.4660390019416809, "epoch": 1.5045045045045045, "percentage": 5.02, "elapsed_time": "0:00:17", "remaining_time": "0:05:38", "throughput": 3688.39, "total_tokens": 65984} | |
| {"current_steps": 170, "total_steps": 3330, "loss": 0.4435, "lr": 0.00025375375375375376, "epoch": 1.5315315315315314, "percentage": 5.11, "elapsed_time": "0:00:20", "remaining_time": "0:06:28", "throughput": 3211.94, "total_tokens": 67104} | |
| {"current_steps": 175, "total_steps": 3330, "loss": 0.3524, "lr": 0.00026126126126126126, "epoch": 1.5765765765765765, "percentage": 5.26, "elapsed_time": "0:00:21", "remaining_time": "0:06:26", "throughput": 3210.72, "total_tokens": 68800} | |
| {"current_steps": 180, "total_steps": 3330, "loss": 0.3485, "lr": 0.00026876876876876876, "epoch": 1.6216216216216215, "percentage": 5.41, "elapsed_time": "0:00:21", "remaining_time": "0:06:23", "throughput": 3221.7, "total_tokens": 70592} | |
| {"current_steps": 185, "total_steps": 3330, "loss": 1.411, "lr": 0.00027627627627627627, "epoch": 1.6666666666666665, "percentage": 5.56, "elapsed_time": "0:00:22", "remaining_time": "0:06:20", "throughput": 3246.48, "total_tokens": 72736} | |
| {"current_steps": 190, "total_steps": 3330, "loss": 1.0786, "lr": 0.00028378378378378377, "epoch": 1.7117117117117115, "percentage": 5.71, "elapsed_time": "0:00:22", "remaining_time": "0:06:18", "throughput": 3247.0, "total_tokens": 74400} | |
| {"current_steps": 195, "total_steps": 3330, "loss": 0.5167, "lr": 0.00029129129129129127, "epoch": 1.7567567567567568, "percentage": 5.86, "elapsed_time": "0:00:23", "remaining_time": "0:06:15", "throughput": 3258.62, "total_tokens": 76192} | |
| {"current_steps": 200, "total_steps": 3330, "loss": 0.9988, "lr": 0.0002987987987987988, "epoch": 1.8018018018018018, "percentage": 6.01, "elapsed_time": "0:00:23", "remaining_time": "0:06:14", "throughput": 3279.66, "total_tokens": 78400} | |
| {"current_steps": 205, "total_steps": 3330, "loss": 0.3591, "lr": 0.0003063063063063063, "epoch": 1.8468468468468469, "percentage": 6.16, "elapsed_time": "0:00:24", "remaining_time": "0:06:11", "throughput": 3287.55, "total_tokens": 80096} | |
| {"current_steps": 210, "total_steps": 3330, "loss": 0.381, "lr": 0.0003138138138138138, "epoch": 1.8918918918918919, "percentage": 6.31, "elapsed_time": "0:00:24", "remaining_time": "0:06:09", "throughput": 3311.23, "total_tokens": 82336} | |
| {"current_steps": 215, "total_steps": 3330, "loss": 0.4551, "lr": 0.0003213213213213213, "epoch": 1.936936936936937, "percentage": 6.46, "elapsed_time": "0:00:25", "remaining_time": "0:06:07", "throughput": 3328.59, "total_tokens": 84352} | |
| {"current_steps": 220, "total_steps": 3330, "loss": 0.3549, "lr": 0.00032882882882882884, "epoch": 1.981981981981982, "percentage": 6.61, "elapsed_time": "0:00:25", "remaining_time": "0:06:04", "throughput": 3353.61, "total_tokens": 86496} | |
| {"current_steps": 225, "total_steps": 3330, "loss": 0.2953, "lr": 0.00033633633633633635, "epoch": 2.027027027027027, "percentage": 6.76, "elapsed_time": "0:00:26", "remaining_time": "0:06:09", "throughput": 3294.04, "total_tokens": 88088} | |
| {"current_steps": 230, "total_steps": 3330, "loss": 1.573, "lr": 0.00034384384384384385, "epoch": 2.0720720720720722, "percentage": 6.91, "elapsed_time": "0:00:27", "remaining_time": "0:06:07", "throughput": 3305.34, "total_tokens": 90232} | |
| {"current_steps": 235, "total_steps": 3330, "loss": 0.3753, "lr": 0.00035135135135135135, "epoch": 2.1171171171171173, "percentage": 7.06, "elapsed_time": "0:00:27", "remaining_time": "0:06:06", "throughput": 3299.35, "total_tokens": 91864} | |
| {"current_steps": 240, "total_steps": 3330, "loss": 0.3136, "lr": 0.0003588588588588589, "epoch": 2.1621621621621623, "percentage": 7.21, "elapsed_time": "0:00:28", "remaining_time": "0:06:04", "throughput": 3328.3, "total_tokens": 94232} | |
| {"current_steps": 245, "total_steps": 3330, "loss": 0.4186, "lr": 0.0003663663663663664, "epoch": 2.2072072072072073, "percentage": 7.36, "elapsed_time": "0:00:28", "remaining_time": "0:06:02", "throughput": 3352.71, "total_tokens": 96536} | |
| {"current_steps": 250, "total_steps": 3330, "loss": 0.3911, "lr": 0.0003738738738738739, "epoch": 2.2522522522522523, "percentage": 7.51, "elapsed_time": "0:00:29", "remaining_time": "0:06:00", "throughput": 3358.39, "total_tokens": 98360} | |
| {"current_steps": 255, "total_steps": 3330, "loss": 0.3668, "lr": 0.0003813813813813814, "epoch": 2.2972972972972974, "percentage": 7.66, "elapsed_time": "0:00:29", "remaining_time": "0:05:59", "throughput": 3361.59, "total_tokens": 100152} | |
| {"current_steps": 260, "total_steps": 3330, "loss": 0.3606, "lr": 0.0003888888888888889, "epoch": 2.3423423423423424, "percentage": 7.81, "elapsed_time": "0:00:30", "remaining_time": "0:05:57", "throughput": 3376.15, "total_tokens": 102296} | |
| {"current_steps": 265, "total_steps": 3330, "loss": 0.3488, "lr": 0.0003963963963963964, "epoch": 2.3873873873873874, "percentage": 7.96, "elapsed_time": "0:00:30", "remaining_time": "0:05:55", "throughput": 3380.7, "total_tokens": 104024} | |
| {"current_steps": 270, "total_steps": 3330, "loss": 0.3477, "lr": 0.00040390390390390393, "epoch": 2.4324324324324325, "percentage": 8.11, "elapsed_time": "0:00:31", "remaining_time": "0:05:54", "throughput": 3391.28, "total_tokens": 105944} | |
| {"current_steps": 275, "total_steps": 3330, "loss": 0.3467, "lr": 0.00041141141141141143, "epoch": 2.4774774774774775, "percentage": 8.26, "elapsed_time": "0:00:31", "remaining_time": "0:05:52", "throughput": 3404.26, "total_tokens": 107928} | |
| {"current_steps": 280, "total_steps": 3330, "loss": 0.3638, "lr": 0.00041891891891891893, "epoch": 2.5225225225225225, "percentage": 8.41, "elapsed_time": "0:00:32", "remaining_time": "0:05:50", "throughput": 3417.75, "total_tokens": 109976} | |
| {"current_steps": 285, "total_steps": 3330, "loss": 0.3496, "lr": 0.00042642642642642644, "epoch": 2.5675675675675675, "percentage": 8.56, "elapsed_time": "0:00:32", "remaining_time": "0:05:48", "throughput": 3431.22, "total_tokens": 112024} | |
| {"current_steps": 290, "total_steps": 3330, "loss": 0.3593, "lr": 0.00043393393393393394, "epoch": 2.6126126126126126, "percentage": 8.71, "elapsed_time": "0:00:33", "remaining_time": "0:05:47", "throughput": 3459.05, "total_tokens": 114584} | |
| {"current_steps": 295, "total_steps": 3330, "loss": 0.3504, "lr": 0.00044144144144144144, "epoch": 2.6576576576576576, "percentage": 8.86, "elapsed_time": "0:00:33", "remaining_time": "0:05:45", "throughput": 3467.86, "total_tokens": 116472} | |
| {"current_steps": 300, "total_steps": 3330, "loss": 0.4647, "lr": 0.00044894894894894895, "epoch": 2.7027027027027026, "percentage": 9.01, "elapsed_time": "0:00:34", "remaining_time": "0:05:43", "throughput": 3470.21, "total_tokens": 118136} | |
| {"current_steps": 305, "total_steps": 3330, "loss": 3.7793, "lr": 0.00045645645645645645, "epoch": 2.7477477477477477, "percentage": 9.16, "elapsed_time": "0:00:34", "remaining_time": "0:05:42", "throughput": 3481.18, "total_tokens": 120120} | |
| {"current_steps": 310, "total_steps": 3330, "loss": 6.114, "lr": 0.00046396396396396395, "epoch": 2.7927927927927927, "percentage": 9.31, "elapsed_time": "0:00:34", "remaining_time": "0:05:40", "throughput": 3484.89, "total_tokens": 121880} | |
| {"current_steps": 315, "total_steps": 3330, "loss": 3.3028, "lr": 0.00047147147147147146, "epoch": 2.8378378378378377, "percentage": 9.46, "elapsed_time": "0:00:35", "remaining_time": "0:05:39", "throughput": 3488.14, "total_tokens": 123800} | |
| {"current_steps": 320, "total_steps": 3330, "loss": 0.8823, "lr": 0.00047897897897897896, "epoch": 2.8828828828828827, "percentage": 9.61, "elapsed_time": "0:00:35", "remaining_time": "0:05:38", "throughput": 3496.2, "total_tokens": 125752} | |
| {"current_steps": 325, "total_steps": 3330, "loss": 0.3914, "lr": 0.0004864864864864865, "epoch": 2.9279279279279278, "percentage": 9.76, "elapsed_time": "0:00:36", "remaining_time": "0:05:37", "throughput": 3508.07, "total_tokens": 127928} | |
| {"current_steps": 330, "total_steps": 3330, "loss": 0.4193, "lr": 0.0004939939939939941, "epoch": 2.972972972972973, "percentage": 9.91, "elapsed_time": "0:00:36", "remaining_time": "0:05:35", "throughput": 3517.64, "total_tokens": 129816} | |
| {"current_steps": 334, "total_steps": 3330, "eval_loss": 0.49270448088645935, "epoch": 3.009009009009009, "percentage": 10.03, "elapsed_time": "0:00:38", "remaining_time": "0:05:48", "throughput": 3370.38, "total_tokens": 131096} | |
| {"current_steps": 335, "total_steps": 3330, "loss": 0.328, "lr": 0.0004999998626476062, "epoch": 3.018018018018018, "percentage": 10.06, "elapsed_time": "0:00:41", "remaining_time": "0:06:08", "throughput": 3191.96, "total_tokens": 131416} | |
| {"current_steps": 340, "total_steps": 3330, "loss": 0.4295, "lr": 0.0004999950553296708, "epoch": 3.063063063063063, "percentage": 10.21, "elapsed_time": "0:00:41", "remaining_time": "0:06:07", "throughput": 3190.56, "total_tokens": 133336} | |
| {"current_steps": 345, "total_steps": 3330, "loss": 0.3431, "lr": 0.0004999833805429708, "epoch": 3.108108108108108, "percentage": 10.36, "elapsed_time": "0:00:42", "remaining_time": "0:06:05", "throughput": 3199.42, "total_tokens": 135288} | |
| {"current_steps": 350, "total_steps": 3330, "loss": 0.3772, "lr": 0.0004999648386082173, "epoch": 3.153153153153153, "percentage": 10.51, "elapsed_time": "0:00:42", "remaining_time": "0:06:04", "throughput": 3209.01, "total_tokens": 137272} | |
| {"current_steps": 355, "total_steps": 3330, "loss": 0.3617, "lr": 0.0004999394300347652, "epoch": 3.1981981981981984, "percentage": 10.66, "elapsed_time": "0:00:43", "remaining_time": "0:06:02", "throughput": 3217.05, "total_tokens": 139096} | |
| {"current_steps": 360, "total_steps": 3330, "loss": 0.2842, "lr": 0.0004999071555205985, "epoch": 3.2432432432432434, "percentage": 10.81, "elapsed_time": "0:00:43", "remaining_time": "0:06:00", "throughput": 3234.35, "total_tokens": 141304} | |
| {"current_steps": 365, "total_steps": 3330, "loss": 0.4347, "lr": 0.0004998680159523117, "epoch": 3.2882882882882885, "percentage": 10.96, "elapsed_time": "0:00:44", "remaining_time": "0:05:58", "throughput": 3242.17, "total_tokens": 143160} | |
| {"current_steps": 370, "total_steps": 3330, "loss": 0.3724, "lr": 0.000499822012405085, "epoch": 3.3333333333333335, "percentage": 11.11, "elapsed_time": "0:00:44", "remaining_time": "0:05:56", "throughput": 3252.76, "total_tokens": 145144} | |
| {"current_steps": 375, "total_steps": 3330, "loss": 0.3634, "lr": 0.0004997691461426552, "epoch": 3.3783783783783785, "percentage": 11.26, "elapsed_time": "0:00:45", "remaining_time": "0:05:55", "throughput": 3267.64, "total_tokens": 147320} | |
| {"current_steps": 380, "total_steps": 3330, "loss": 0.3547, "lr": 0.0004997094186172807, "epoch": 3.4234234234234235, "percentage": 11.41, "elapsed_time": "0:00:45", "remaining_time": "0:05:53", "throughput": 3277.06, "total_tokens": 149208} | |
| {"current_steps": 385, "total_steps": 3330, "loss": 0.3272, "lr": 0.0004996428314697015, "epoch": 3.4684684684684686, "percentage": 11.56, "elapsed_time": "0:00:46", "remaining_time": "0:05:52", "throughput": 3282.24, "total_tokens": 151256} | |
| {"current_steps": 390, "total_steps": 3330, "loss": 0.3502, "lr": 0.0004995693865290945, "epoch": 3.5135135135135136, "percentage": 11.71, "elapsed_time": "0:00:46", "remaining_time": "0:05:51", "throughput": 3279.98, "total_tokens": 152920} | |
| {"current_steps": 395, "total_steps": 3330, "loss": 0.3687, "lr": 0.0004994890858130227, "epoch": 3.5585585585585586, "percentage": 11.86, "elapsed_time": "0:00:47", "remaining_time": "0:05:50", "throughput": 3286.32, "total_tokens": 154968} | |
| {"current_steps": 400, "total_steps": 3330, "loss": 0.3518, "lr": 0.0004994019315273806, "epoch": 3.6036036036036037, "percentage": 12.01, "elapsed_time": "0:00:47", "remaining_time": "0:05:49", "throughput": 3289.42, "total_tokens": 156760} | |
| {"current_steps": 405, "total_steps": 3330, "loss": 0.3441, "lr": 0.0004993079260663329, "epoch": 3.6486486486486487, "percentage": 12.16, "elapsed_time": "0:00:48", "remaining_time": "0:05:47", "throughput": 3295.24, "total_tokens": 158712} | |
| {"current_steps": 410, "total_steps": 3330, "loss": 0.3488, "lr": 0.0004992070720122489, "epoch": 3.6936936936936937, "percentage": 12.31, "elapsed_time": "0:00:48", "remaining_time": "0:05:46", "throughput": 3303.32, "total_tokens": 160792} | |
| {"current_steps": 415, "total_steps": 3330, "loss": 0.3604, "lr": 0.0004990993721356316, "epoch": 3.7387387387387387, "percentage": 12.46, "elapsed_time": "0:00:49", "remaining_time": "0:05:45", "throughput": 3316.19, "total_tokens": 162968} | |
| {"current_steps": 420, "total_steps": 3330, "loss": 0.3691, "lr": 0.0004989848293950417, "epoch": 3.7837837837837838, "percentage": 12.61, "elapsed_time": "0:00:49", "remaining_time": "0:05:43", "throughput": 3321.97, "total_tokens": 164792} | |
| {"current_steps": 425, "total_steps": 3330, "loss": 0.3632, "lr": 0.0004988634469370164, "epoch": 3.828828828828829, "percentage": 12.76, "elapsed_time": "0:00:50", "remaining_time": "0:05:42", "throughput": 3331.12, "total_tokens": 167000} | |
| {"current_steps": 430, "total_steps": 3330, "loss": 0.3381, "lr": 0.0004987352280959822, "epoch": 3.873873873873874, "percentage": 12.91, "elapsed_time": "0:00:50", "remaining_time": "0:05:41", "throughput": 3334.58, "total_tokens": 168824} | |
| {"current_steps": 435, "total_steps": 3330, "loss": 0.3518, "lr": 0.0004986001763941647, "epoch": 3.918918918918919, "percentage": 13.06, "elapsed_time": "0:00:51", "remaining_time": "0:05:40", "throughput": 3340.11, "total_tokens": 170776} | |
| {"current_steps": 440, "total_steps": 3330, "loss": 0.3556, "lr": 0.0004984582955414904, "epoch": 3.963963963963964, "percentage": 13.21, "elapsed_time": "0:00:51", "remaining_time": "0:05:38", "throughput": 3345.01, "total_tokens": 172536} | |
| {"current_steps": 445, "total_steps": 3330, "loss": 0.3534, "lr": 0.0004983095894354857, "epoch": 4.009009009009009, "percentage": 13.36, "elapsed_time": "0:00:52", "remaining_time": "0:05:39", "throughput": 3327.93, "total_tokens": 174288} | |
| {"current_steps": 450, "total_steps": 3330, "loss": 0.3765, "lr": 0.0004981540621611698, "epoch": 4.054054054054054, "percentage": 13.51, "elapsed_time": "0:00:52", "remaining_time": "0:05:38", "throughput": 3335.83, "total_tokens": 176368} | |
| {"current_steps": 455, "total_steps": 3330, "loss": 0.3463, "lr": 0.0004979917179909417, "epoch": 4.099099099099099, "percentage": 13.66, "elapsed_time": "0:00:53", "remaining_time": "0:05:36", "throughput": 3339.17, "total_tokens": 178064} | |
| {"current_steps": 460, "total_steps": 3330, "loss": 0.3851, "lr": 0.0004978225613844638, "epoch": 4.1441441441441444, "percentage": 13.81, "elapsed_time": "0:00:53", "remaining_time": "0:05:35", "throughput": 3346.14, "total_tokens": 179984} | |
| {"current_steps": 465, "total_steps": 3330, "loss": 0.3643, "lr": 0.000497646596988539, "epoch": 4.1891891891891895, "percentage": 13.96, "elapsed_time": "0:00:54", "remaining_time": "0:05:34", "throughput": 3360.17, "total_tokens": 182352} | |
| {"current_steps": 470, "total_steps": 3330, "loss": 0.3766, "lr": 0.0004974638296369826, "epoch": 4.2342342342342345, "percentage": 14.11, "elapsed_time": "0:00:54", "remaining_time": "0:05:33", "throughput": 3361.58, "total_tokens": 184016} | |
| {"current_steps": 475, "total_steps": 3330, "loss": 0.3598, "lr": 0.0004972742643504904, "epoch": 4.2792792792792795, "percentage": 14.26, "elapsed_time": "0:00:55", "remaining_time": "0:05:31", "throughput": 3369.93, "total_tokens": 186096} | |
| {"current_steps": 480, "total_steps": 3330, "loss": 0.368, "lr": 0.0004970779063365, "epoch": 4.324324324324325, "percentage": 14.41, "elapsed_time": "0:00:55", "remaining_time": "0:05:30", "throughput": 3374.89, "total_tokens": 187920} | |
| {"current_steps": 485, "total_steps": 3330, "loss": 0.3598, "lr": 0.0004968747609890484, "epoch": 4.36936936936937, "percentage": 14.56, "elapsed_time": "0:00:56", "remaining_time": "0:05:29", "throughput": 3379.69, "total_tokens": 189744} | |
| {"current_steps": 490, "total_steps": 3330, "loss": 0.3559, "lr": 0.000496664833888623, "epoch": 4.414414414414415, "percentage": 14.71, "elapsed_time": "0:00:56", "remaining_time": "0:05:28", "throughput": 3387.72, "total_tokens": 191760} | |
| {"current_steps": 495, "total_steps": 3330, "loss": 0.3447, "lr": 0.0004964481308020093, "epoch": 4.45945945945946, "percentage": 14.86, "elapsed_time": "0:00:57", "remaining_time": "0:05:26", "throughput": 3396.41, "total_tokens": 193872} | |
| {"current_steps": 500, "total_steps": 3330, "loss": 0.3295, "lr": 0.0004962246576821318, "epoch": 4.504504504504505, "percentage": 15.02, "elapsed_time": "0:00:57", "remaining_time": "0:05:25", "throughput": 3408.3, "total_tokens": 196048} | |
| {"current_steps": 501, "total_steps": 3330, "eval_loss": 0.3681543171405792, "epoch": 4.513513513513513, "percentage": 15.05, "elapsed_time": "0:00:58", "remaining_time": "0:05:31", "throughput": 3341.72, "total_tokens": 196400} | |
| {"current_steps": 505, "total_steps": 3330, "loss": 0.3872, "lr": 0.0004959944206678903, "epoch": 4.54954954954955, "percentage": 15.17, "elapsed_time": "0:01:01", "remaining_time": "0:05:42", "throughput": 3235.53, "total_tokens": 198128} | |
| {"current_steps": 510, "total_steps": 3330, "loss": 0.335, "lr": 0.0004957574260839923, "epoch": 4.594594594594595, "percentage": 15.32, "elapsed_time": "0:01:01", "remaining_time": "0:05:41", "throughput": 3241.72, "total_tokens": 199984} | |
| {"current_steps": 515, "total_steps": 3330, "loss": 0.325, "lr": 0.0004955136804407779, "epoch": 4.63963963963964, "percentage": 15.47, "elapsed_time": "0:01:02", "remaining_time": "0:05:39", "throughput": 3249.22, "total_tokens": 201968} | |
| {"current_steps": 520, "total_steps": 3330, "loss": 0.6477, "lr": 0.000495263190434042, "epoch": 4.684684684684685, "percentage": 15.62, "elapsed_time": "0:01:02", "remaining_time": "0:05:38", "throughput": 3257.15, "total_tokens": 203984} | |
| {"current_steps": 525, "total_steps": 3330, "loss": 1.822, "lr": 0.0004950059629448499, "epoch": 4.72972972972973, "percentage": 15.77, "elapsed_time": "0:01:03", "remaining_time": "0:05:37", "throughput": 3258.27, "total_tokens": 205808} | |
| {"current_steps": 530, "total_steps": 3330, "loss": 2.169, "lr": 0.0004947420050393483, "epoch": 4.774774774774775, "percentage": 15.92, "elapsed_time": "0:01:03", "remaining_time": "0:05:36", "throughput": 3269.46, "total_tokens": 208016} | |
| {"current_steps": 535, "total_steps": 3330, "loss": 2.7455, "lr": 0.0004944713239685713, "epoch": 4.81981981981982, "percentage": 16.07, "elapsed_time": "0:01:04", "remaining_time": "0:05:35", "throughput": 3279.72, "total_tokens": 210320} | |
| {"current_steps": 540, "total_steps": 3330, "loss": 0.9305, "lr": 0.0004941939271682411, "epoch": 4.864864864864865, "percentage": 16.22, "elapsed_time": "0:01:04", "remaining_time": "0:05:33", "throughput": 3282.52, "total_tokens": 212048} | |
| {"current_steps": 545, "total_steps": 3330, "loss": 1.1274, "lr": 0.000493909822258564, "epoch": 4.90990990990991, "percentage": 16.37, "elapsed_time": "0:01:05", "remaining_time": "0:05:32", "throughput": 3290.55, "total_tokens": 214032} | |
| {"current_steps": 550, "total_steps": 3330, "loss": 0.5231, "lr": 0.0004936190170440208, "epoch": 4.954954954954955, "percentage": 16.52, "elapsed_time": "0:01:05", "remaining_time": "0:05:31", "throughput": 3296.01, "total_tokens": 215856} | |
| {"current_steps": 555, "total_steps": 3330, "loss": 0.3789, "lr": 0.0004933215195131521, "epoch": 5.0, "percentage": 16.67, "elapsed_time": "0:01:06", "remaining_time": "0:05:30", "throughput": 3290.76, "total_tokens": 217408} | |
| {"current_steps": 560, "total_steps": 3330, "loss": 0.5117, "lr": 0.00049301733783834, "epoch": 5.045045045045045, "percentage": 16.82, "elapsed_time": "0:01:06", "remaining_time": "0:05:30", "throughput": 3282.39, "total_tokens": 219232} | |
| {"current_steps": 565, "total_steps": 3330, "loss": 0.4079, "lr": 0.000492706480375582, "epoch": 5.09009009009009, "percentage": 16.97, "elapsed_time": "0:01:07", "remaining_time": "0:05:29", "throughput": 3295.08, "total_tokens": 221664} | |
| {"current_steps": 570, "total_steps": 3330, "loss": 0.3651, "lr": 0.0004923889556642626, "epoch": 5.135135135135135, "percentage": 17.12, "elapsed_time": "0:01:07", "remaining_time": "0:05:28", "throughput": 3293.94, "total_tokens": 223232} | |
| {"current_steps": 575, "total_steps": 3330, "loss": 0.535, "lr": 0.0004920647724269188, "epoch": 5.18018018018018, "percentage": 17.27, "elapsed_time": "0:01:08", "remaining_time": "0:05:27", "throughput": 3296.96, "total_tokens": 225024} | |
| {"current_steps": 580, "total_steps": 3330, "loss": 0.6379, "lr": 0.0004917339395689996, "epoch": 5.225225225225225, "percentage": 17.42, "elapsed_time": "0:01:08", "remaining_time": "0:05:25", "throughput": 3303.35, "total_tokens": 227040} | |
| {"current_steps": 585, "total_steps": 3330, "loss": 0.649, "lr": 0.000491396466178622, "epoch": 5.27027027027027, "percentage": 17.57, "elapsed_time": "0:01:09", "remaining_time": "0:05:24", "throughput": 3310.25, "total_tokens": 229184} | |
| {"current_steps": 590, "total_steps": 3330, "loss": 2.0758, "lr": 0.0004910523615263213, "epoch": 5.315315315315315, "percentage": 17.72, "elapsed_time": "0:01:09", "remaining_time": "0:05:23", "throughput": 3314.69, "total_tokens": 231104} | |
| {"current_steps": 595, "total_steps": 3330, "loss": 0.5733, "lr": 0.0004907016350647961, "epoch": 5.36036036036036, "percentage": 17.87, "elapsed_time": "0:01:10", "remaining_time": "0:05:22", "throughput": 3320.11, "total_tokens": 233088} | |
| {"current_steps": 600, "total_steps": 3330, "loss": 0.4, "lr": 0.0004903442964286492, "epoch": 5.405405405405405, "percentage": 18.02, "elapsed_time": "0:01:10", "remaining_time": "0:05:21", "throughput": 3324.8, "total_tokens": 235008} | |
| {"current_steps": 605, "total_steps": 3330, "loss": 0.395, "lr": 0.0004899803554341225, "epoch": 5.45045045045045, "percentage": 18.17, "elapsed_time": "0:01:11", "remaining_time": "0:05:20", "throughput": 3330.9, "total_tokens": 237120} | |
| {"current_steps": 610, "total_steps": 3330, "loss": 0.6124, "lr": 0.0004896098220788272, "epoch": 5.495495495495495, "percentage": 18.32, "elapsed_time": "0:01:11", "remaining_time": "0:05:19", "throughput": 3332.36, "total_tokens": 238752} | |
| {"current_steps": 615, "total_steps": 3330, "loss": 1.2727, "lr": 0.0004892327065414697, "epoch": 5.54054054054054, "percentage": 18.47, "elapsed_time": "0:01:12", "remaining_time": "0:05:18", "throughput": 3340.07, "total_tokens": 240864} | |
| {"current_steps": 620, "total_steps": 3330, "loss": 0.7417, "lr": 0.0004888490191815716, "epoch": 5.585585585585585, "percentage": 18.62, "elapsed_time": "0:01:12", "remaining_time": "0:05:17", "throughput": 3343.26, "total_tokens": 242720} | |
| {"current_steps": 625, "total_steps": 3330, "loss": 0.59, "lr": 0.0004884587705391851, "epoch": 5.63063063063063, "percentage": 18.77, "elapsed_time": "0:01:13", "remaining_time": "0:05:16", "throughput": 3351.89, "total_tokens": 245184} | |
| {"current_steps": 630, "total_steps": 3330, "loss": 0.8984, "lr": 0.0004880619713346038, "epoch": 5.675675675675675, "percentage": 18.92, "elapsed_time": "0:01:13", "remaining_time": "0:05:15", "throughput": 3355.66, "total_tokens": 247008} | |
| {"current_steps": 635, "total_steps": 3330, "loss": 0.6586, "lr": 0.0004876586324680679, "epoch": 5.7207207207207205, "percentage": 19.07, "elapsed_time": "0:01:14", "remaining_time": "0:05:14", "throughput": 3358.7, "total_tokens": 248768} | |
| {"current_steps": 640, "total_steps": 3330, "loss": 0.4359, "lr": 0.0004872487650194647, "epoch": 5.7657657657657655, "percentage": 19.22, "elapsed_time": "0:01:14", "remaining_time": "0:05:13", "throughput": 3359.36, "total_tokens": 250464} | |
| {"current_steps": 645, "total_steps": 3330, "loss": 0.4257, "lr": 0.00048683238024802456, "epoch": 5.8108108108108105, "percentage": 19.37, "elapsed_time": "0:01:15", "remaining_time": "0:05:12", "throughput": 3364.83, "total_tokens": 252672} | |
| {"current_steps": 650, "total_steps": 3330, "loss": 0.3523, "lr": 0.0004864094895920113, "epoch": 5.8558558558558556, "percentage": 19.52, "elapsed_time": "0:01:15", "remaining_time": "0:05:11", "throughput": 3365.69, "total_tokens": 254400} | |
| {"current_steps": 655, "total_steps": 3330, "loss": 0.3772, "lr": 0.0004859801046684083, "epoch": 5.900900900900901, "percentage": 19.67, "elapsed_time": "0:01:16", "remaining_time": "0:05:10", "throughput": 3373.23, "total_tokens": 256576} | |
| {"current_steps": 660, "total_steps": 3330, "loss": 0.3582, "lr": 0.0004855442372725989, "epoch": 5.945945945945946, "percentage": 19.82, "elapsed_time": "0:01:16", "remaining_time": "0:05:09", "throughput": 3375.21, "total_tokens": 258304} | |
| {"current_steps": 665, "total_steps": 3330, "loss": 0.4134, "lr": 0.0004851018993780429, "epoch": 5.990990990990991, "percentage": 19.97, "elapsed_time": "0:01:17", "remaining_time": "0:05:08", "throughput": 3381.73, "total_tokens": 260448} | |
| {"current_steps": 668, "total_steps": 3330, "eval_loss": 0.3488299548625946, "epoch": 6.018018018018018, "percentage": 20.06, "elapsed_time": "0:01:18", "remaining_time": "0:05:14", "throughput": 3310.48, "total_tokens": 261392} | |
| {"current_steps": 670, "total_steps": 3330, "loss": 0.4853, "lr": 0.00048465310313594736, "epoch": 6.036036036036036, "percentage": 20.12, "elapsed_time": "0:01:21", "remaining_time": "0:05:22", "throughput": 3223.77, "total_tokens": 262096} | |
| {"current_steps": 675, "total_steps": 3330, "loss": 0.4484, "lr": 0.00048419786087493277, "epoch": 6.081081081081081, "percentage": 20.27, "elapsed_time": "0:01:21", "remaining_time": "0:05:21", "throughput": 3225.87, "total_tokens": 264080} | |
| {"current_steps": 680, "total_steps": 3330, "loss": 0.3794, "lr": 0.0004837361851006945, "epoch": 6.126126126126126, "percentage": 20.42, "elapsed_time": "0:01:22", "remaining_time": "0:05:21", "throughput": 3223.24, "total_tokens": 265840} | |
| {"current_steps": 685, "total_steps": 3330, "loss": 0.3794, "lr": 0.00048326808849565934, "epoch": 6.171171171171171, "percentage": 20.57, "elapsed_time": "0:01:22", "remaining_time": "0:05:20", "throughput": 3227.02, "total_tokens": 267792} | |
| {"current_steps": 690, "total_steps": 3330, "loss": 0.3546, "lr": 0.0004827935839186368, "epoch": 6.216216216216216, "percentage": 20.72, "elapsed_time": "0:01:23", "remaining_time": "0:05:19", "throughput": 3230.82, "total_tokens": 269776} | |
| {"current_steps": 695, "total_steps": 3330, "loss": 0.3455, "lr": 0.0004823126844044661, "epoch": 6.261261261261261, "percentage": 20.87, "elapsed_time": "0:01:24", "remaining_time": "0:05:18", "throughput": 3239.12, "total_tokens": 272176} | |
| {"current_steps": 700, "total_steps": 3330, "loss": 0.3537, "lr": 0.0004818254031636581, "epoch": 6.306306306306306, "percentage": 21.02, "elapsed_time": "0:01:24", "remaining_time": "0:05:17", "throughput": 3246.9, "total_tokens": 274480} | |
| {"current_steps": 705, "total_steps": 3330, "loss": 0.3623, "lr": 0.00048133175358203243, "epoch": 6.351351351351352, "percentage": 21.17, "elapsed_time": "0:01:25", "remaining_time": "0:05:16", "throughput": 3246.19, "total_tokens": 276176} | |
| {"current_steps": 710, "total_steps": 3330, "loss": 0.3725, "lr": 0.0004808317492203496, "epoch": 6.396396396396397, "percentage": 21.32, "elapsed_time": "0:01:25", "remaining_time": "0:05:15", "throughput": 3249.1, "total_tokens": 277968} | |
| {"current_steps": 715, "total_steps": 3330, "loss": 0.3177, "lr": 0.00048032540381393854, "epoch": 6.441441441441442, "percentage": 21.47, "elapsed_time": "0:01:26", "remaining_time": "0:05:14", "throughput": 3252.42, "total_tokens": 279824} | |
| {"current_steps": 720, "total_steps": 3330, "loss": 0.428, "lr": 0.0004798127312723195, "epoch": 6.486486486486487, "percentage": 21.62, "elapsed_time": "0:01:26", "remaining_time": "0:05:13", "throughput": 3253.03, "total_tokens": 281584} | |
| {"current_steps": 725, "total_steps": 3330, "loss": 0.3647, "lr": 0.00047929374567882174, "epoch": 6.531531531531532, "percentage": 21.77, "elapsed_time": "0:01:27", "remaining_time": "0:05:13", "throughput": 3255.59, "total_tokens": 283632} | |
| {"current_steps": 730, "total_steps": 3330, "loss": 0.482, "lr": 0.00047876846129019655, "epoch": 6.576576576576577, "percentage": 21.92, "elapsed_time": "0:01:27", "remaining_time": "0:05:12", "throughput": 3260.06, "total_tokens": 285808} | |
| {"current_steps": 735, "total_steps": 3330, "loss": 0.3713, "lr": 0.00047823689253622586, "epoch": 6.621621621621622, "percentage": 22.07, "elapsed_time": "0:01:28", "remaining_time": "0:05:11", "throughput": 3265.64, "total_tokens": 287792} | |
| {"current_steps": 740, "total_steps": 3330, "loss": 0.3927, "lr": 0.00047769905401932585, "epoch": 6.666666666666667, "percentage": 22.22, "elapsed_time": "0:01:28", "remaining_time": "0:05:10", "throughput": 3268.18, "total_tokens": 289680} | |
| {"current_steps": 745, "total_steps": 3330, "loss": 0.3208, "lr": 0.0004771549605141455, "epoch": 6.711711711711712, "percentage": 22.37, "elapsed_time": "0:01:29", "remaining_time": "0:05:09", "throughput": 3274.49, "total_tokens": 291792} | |
| {"current_steps": 750, "total_steps": 3330, "loss": 0.3977, "lr": 0.00047660462696716107, "epoch": 6.756756756756757, "percentage": 22.52, "elapsed_time": "0:01:29", "remaining_time": "0:05:08", "throughput": 3274.28, "total_tokens": 293584} | |
| {"current_steps": 755, "total_steps": 3330, "loss": 0.3499, "lr": 0.0004760480684962654, "epoch": 6.801801801801802, "percentage": 22.67, "elapsed_time": "0:01:30", "remaining_time": "0:05:07", "throughput": 3279.48, "total_tokens": 295568} | |
| {"current_steps": 760, "total_steps": 3330, "loss": 0.3588, "lr": 0.00047548530039035245, "epoch": 6.846846846846847, "percentage": 22.82, "elapsed_time": "0:01:30", "remaining_time": "0:05:06", "throughput": 3282.68, "total_tokens": 297424} | |
| {"current_steps": 765, "total_steps": 3330, "loss": 0.373, "lr": 0.0004749163381088976, "epoch": 6.891891891891892, "percentage": 22.97, "elapsed_time": "0:01:31", "remaining_time": "0:05:05", "throughput": 3283.16, "total_tokens": 299056} | |
| {"current_steps": 770, "total_steps": 3330, "loss": 0.3411, "lr": 0.00047434119728153267, "epoch": 6.936936936936937, "percentage": 23.12, "elapsed_time": "0:01:31", "remaining_time": "0:05:04", "throughput": 3290.12, "total_tokens": 301200} | |
| {"current_steps": 775, "total_steps": 3330, "loss": 0.3939, "lr": 0.00047375989370761695, "epoch": 6.981981981981982, "percentage": 23.27, "elapsed_time": "0:01:31", "remaining_time": "0:05:03", "throughput": 3296.2, "total_tokens": 303216} | |
| {"current_steps": 780, "total_steps": 3330, "loss": 0.3639, "lr": 0.00047317244335580286, "epoch": 7.027027027027027, "percentage": 23.42, "elapsed_time": "0:01:32", "remaining_time": "0:05:03", "throughput": 3290.25, "total_tokens": 305328} | |
| {"current_steps": 785, "total_steps": 3330, "loss": 0.3619, "lr": 0.0004725788623635972, "epoch": 7.072072072072072, "percentage": 23.57, "elapsed_time": "0:01:33", "remaining_time": "0:05:02", "throughput": 3289.18, "total_tokens": 307056} | |
| {"current_steps": 790, "total_steps": 3330, "loss": 0.3291, "lr": 0.00047197916703691823, "epoch": 7.117117117117117, "percentage": 23.72, "elapsed_time": "0:01:33", "remaining_time": "0:05:01", "throughput": 3296.14, "total_tokens": 309232} | |
| {"current_steps": 795, "total_steps": 3330, "loss": 0.3975, "lr": 0.0004713733738496475, "epoch": 7.162162162162162, "percentage": 23.87, "elapsed_time": "0:01:34", "remaining_time": "0:05:00", "throughput": 3297.46, "total_tokens": 311056} | |
| {"current_steps": 800, "total_steps": 3330, "loss": 0.3461, "lr": 0.00047076149944317734, "epoch": 7.207207207207207, "percentage": 24.02, "elapsed_time": "0:01:34", "remaining_time": "0:04:59", "throughput": 3301.16, "total_tokens": 312944} | |
| {"current_steps": 805, "total_steps": 3330, "loss": 0.3391, "lr": 0.00047014356062595366, "epoch": 7.252252252252252, "percentage": 24.17, "elapsed_time": "0:01:35", "remaining_time": "0:04:58", "throughput": 3306.84, "total_tokens": 314992} | |
| {"current_steps": 810, "total_steps": 3330, "loss": 0.466, "lr": 0.00046951957437301427, "epoch": 7.297297297297297, "percentage": 24.32, "elapsed_time": "0:01:35", "remaining_time": "0:04:57", "throughput": 3312.2, "total_tokens": 317200} | |
| {"current_steps": 815, "total_steps": 3330, "loss": 0.3583, "lr": 0.0004688895578255227, "epoch": 7.342342342342342, "percentage": 24.47, "elapsed_time": "0:01:36", "remaining_time": "0:04:56", "throughput": 3316.7, "total_tokens": 319152} | |
| {"current_steps": 820, "total_steps": 3330, "loss": 0.4826, "lr": 0.00046825352829029705, "epoch": 7.387387387387387, "percentage": 24.62, "elapsed_time": "0:01:36", "remaining_time": "0:04:55", "throughput": 3321.0, "total_tokens": 321136} | |
| {"current_steps": 825, "total_steps": 3330, "loss": 0.3208, "lr": 0.00046761150323933474, "epoch": 7.4324324324324325, "percentage": 24.77, "elapsed_time": "0:01:37", "remaining_time": "0:04:54", "throughput": 3324.31, "total_tokens": 322960} | |
| {"current_steps": 830, "total_steps": 3330, "loss": 0.3802, "lr": 0.0004669635003093325, "epoch": 7.4774774774774775, "percentage": 24.92, "elapsed_time": "0:01:37", "remaining_time": "0:04:54", "throughput": 3327.86, "total_tokens": 324880} | |
| {"current_steps": 835, "total_steps": 3330, "loss": 0.3588, "lr": 0.00046630953730120205, "epoch": 7.5225225225225225, "percentage": 25.08, "elapsed_time": "0:01:38", "remaining_time": "0:04:53", "throughput": 3332.73, "total_tokens": 326864} | |
| {"current_steps": 835, "total_steps": 3330, "eval_loss": 0.35348400473594666, "epoch": 7.5225225225225225, "percentage": 25.08, "elapsed_time": "0:01:39", "remaining_time": "0:04:56", "throughput": 3292.0, "total_tokens": 326864} | |
| {"current_steps": 840, "total_steps": 3330, "loss": 0.3296, "lr": 0.00046564963217958077, "epoch": 7.5675675675675675, "percentage": 25.23, "elapsed_time": "0:01:42", "remaining_time": "0:05:02", "throughput": 3218.13, "total_tokens": 328688} | |
| {"current_steps": 845, "total_steps": 3330, "loss": 0.4332, "lr": 0.0004649838030723385, "epoch": 7.612612612612613, "percentage": 25.38, "elapsed_time": "0:01:42", "remaining_time": "0:05:02", "throughput": 3222.32, "total_tokens": 330928} | |
| {"current_steps": 850, "total_steps": 3330, "loss": 0.3653, "lr": 0.0004643120682700792, "epoch": 7.657657657657658, "percentage": 25.53, "elapsed_time": "0:01:43", "remaining_time": "0:05:01", "throughput": 3225.54, "total_tokens": 332816} | |
| {"current_steps": 855, "total_steps": 3330, "loss": 0.3581, "lr": 0.00046363444622563916, "epoch": 7.702702702702703, "percentage": 25.68, "elapsed_time": "0:01:43", "remaining_time": "0:05:00", "throughput": 3228.95, "total_tokens": 334832} | |
| {"current_steps": 860, "total_steps": 3330, "loss": 0.3797, "lr": 0.00046295095555357936, "epoch": 7.747747747747748, "percentage": 25.83, "elapsed_time": "0:01:44", "remaining_time": "0:04:59", "throughput": 3231.04, "total_tokens": 336592} | |
| {"current_steps": 865, "total_steps": 3330, "loss": 0.3576, "lr": 0.00046226161502967443, "epoch": 7.792792792792793, "percentage": 25.98, "elapsed_time": "0:01:44", "remaining_time": "0:04:58", "throughput": 3238.63, "total_tokens": 338928} | |
| {"current_steps": 870, "total_steps": 3330, "loss": 0.3579, "lr": 0.00046156644359039717, "epoch": 7.837837837837838, "percentage": 26.13, "elapsed_time": "0:01:45", "remaining_time": "0:04:57", "throughput": 3243.25, "total_tokens": 340944} | |
| {"current_steps": 875, "total_steps": 3330, "loss": 0.4455, "lr": 0.0004608654603323977, "epoch": 7.882882882882883, "percentage": 26.28, "elapsed_time": "0:01:45", "remaining_time": "0:04:56", "throughput": 3245.1, "total_tokens": 342704} | |
| {"current_steps": 880, "total_steps": 3330, "loss": 0.3447, "lr": 0.0004601586845119795, "epoch": 7.927927927927928, "percentage": 26.43, "elapsed_time": "0:01:46", "remaining_time": "0:04:55", "throughput": 3251.46, "total_tokens": 344976} | |
| {"current_steps": 885, "total_steps": 3330, "loss": 0.3682, "lr": 0.00045944613554457005, "epoch": 7.972972972972973, "percentage": 26.58, "elapsed_time": "0:01:46", "remaining_time": "0:04:54", "throughput": 3253.2, "total_tokens": 346640} | |
| {"current_steps": 890, "total_steps": 3330, "loss": 0.3688, "lr": 0.0004587278330041876, "epoch": 8.018018018018019, "percentage": 26.73, "elapsed_time": "0:01:47", "remaining_time": "0:04:54", "throughput": 3239.41, "total_tokens": 348200} | |
| {"current_steps": 895, "total_steps": 3330, "loss": 0.3515, "lr": 0.00045800379662290334, "epoch": 8.063063063063064, "percentage": 26.88, "elapsed_time": "0:01:47", "remaining_time": "0:04:53", "throughput": 3243.42, "total_tokens": 350184} | |
| {"current_steps": 900, "total_steps": 3330, "loss": 0.3535, "lr": 0.00045727404629029985, "epoch": 8.108108108108109, "percentage": 27.03, "elapsed_time": "0:01:48", "remaining_time": "0:04:52", "throughput": 3244.97, "total_tokens": 351912} | |
| {"current_steps": 905, "total_steps": 3330, "loss": 0.3547, "lr": 0.00045653860205292383, "epoch": 8.153153153153154, "percentage": 27.18, "elapsed_time": "0:01:48", "remaining_time": "0:04:51", "throughput": 3248.98, "total_tokens": 353992} | |
| {"current_steps": 910, "total_steps": 3330, "loss": 0.344, "lr": 0.0004557974841137363, "epoch": 8.198198198198199, "percentage": 27.33, "elapsed_time": "0:01:49", "remaining_time": "0:04:51", "throughput": 3253.61, "total_tokens": 356136} | |
| {"current_steps": 915, "total_steps": 3330, "loss": 0.3385, "lr": 0.0004550507128315572, "epoch": 8.243243243243244, "percentage": 27.48, "elapsed_time": "0:01:50", "remaining_time": "0:04:50", "throughput": 3253.91, "total_tokens": 358056} | |
| {"current_steps": 920, "total_steps": 3330, "loss": 0.3668, "lr": 0.00045429830872050587, "epoch": 8.288288288288289, "percentage": 27.63, "elapsed_time": "0:01:50", "remaining_time": "0:04:49", "throughput": 3254.91, "total_tokens": 359848} | |
| {"current_steps": 925, "total_steps": 3330, "loss": 0.3702, "lr": 0.0004535402924494382, "epoch": 8.333333333333334, "percentage": 27.78, "elapsed_time": "0:01:51", "remaining_time": "0:04:48", "throughput": 3257.8, "total_tokens": 361992} | |
| {"current_steps": 930, "total_steps": 3330, "loss": 0.3888, "lr": 0.00045277668484137827, "epoch": 8.378378378378379, "percentage": 27.93, "elapsed_time": "0:01:51", "remaining_time": "0:04:48", "throughput": 3257.45, "total_tokens": 363624} | |
| {"current_steps": 935, "total_steps": 3330, "loss": 0.3698, "lr": 0.00045200750687294654, "epoch": 8.423423423423424, "percentage": 28.08, "elapsed_time": "0:01:52", "remaining_time": "0:04:47", "throughput": 3258.6, "total_tokens": 365352} | |
| {"current_steps": 940, "total_steps": 3330, "loss": 0.3522, "lr": 0.00045123277967378374, "epoch": 8.468468468468469, "percentage": 28.23, "elapsed_time": "0:01:52", "remaining_time": "0:04:46", "throughput": 3261.89, "total_tokens": 367400} | |
| {"current_steps": 945, "total_steps": 3330, "loss": 0.3336, "lr": 0.00045045252452596997, "epoch": 8.513513513513514, "percentage": 28.38, "elapsed_time": "0:01:53", "remaining_time": "0:04:45", "throughput": 3263.17, "total_tokens": 369384} | |
| {"current_steps": 950, "total_steps": 3330, "loss": 0.3534, "lr": 0.00044966676286344064, "epoch": 8.558558558558559, "percentage": 28.53, "elapsed_time": "0:01:53", "remaining_time": "0:04:44", "throughput": 3265.82, "total_tokens": 371336} | |
| {"current_steps": 955, "total_steps": 3330, "loss": 0.3527, "lr": 0.0004488755162713975, "epoch": 8.603603603603604, "percentage": 28.68, "elapsed_time": "0:01:54", "remaining_time": "0:04:44", "throughput": 3269.05, "total_tokens": 373320} | |
| {"current_steps": 960, "total_steps": 3330, "loss": 0.3905, "lr": 0.0004480788064857153, "epoch": 8.64864864864865, "percentage": 28.83, "elapsed_time": "0:01:54", "remaining_time": "0:04:43", "throughput": 3269.02, "total_tokens": 375048} | |
| {"current_steps": 965, "total_steps": 3330, "loss": 0.3999, "lr": 0.0004472766553923454, "epoch": 8.693693693693694, "percentage": 28.98, "elapsed_time": "0:01:55", "remaining_time": "0:04:42", "throughput": 3277.73, "total_tokens": 377672} | |
| {"current_steps": 970, "total_steps": 3330, "loss": 0.3372, "lr": 0.00044646908502671376, "epoch": 8.73873873873874, "percentage": 29.13, "elapsed_time": "0:01:55", "remaining_time": "0:04:41", "throughput": 3280.27, "total_tokens": 379560} | |
| {"current_steps": 975, "total_steps": 3330, "loss": 0.4068, "lr": 0.0004456561175731162, "epoch": 8.783783783783784, "percentage": 29.28, "elapsed_time": "0:01:56", "remaining_time": "0:04:40", "throughput": 3280.37, "total_tokens": 381128} | |
| {"current_steps": 980, "total_steps": 3330, "loss": 0.3719, "lr": 0.000444837775364109, "epoch": 8.82882882882883, "percentage": 29.43, "elapsed_time": "0:01:56", "remaining_time": "0:04:39", "throughput": 3285.87, "total_tokens": 383400} | |
| {"current_steps": 985, "total_steps": 3330, "loss": 0.356, "lr": 0.00044401408087989475, "epoch": 8.873873873873874, "percentage": 29.58, "elapsed_time": "0:01:57", "remaining_time": "0:04:39", "throughput": 3288.98, "total_tokens": 385448} | |
| {"current_steps": 990, "total_steps": 3330, "loss": 0.4219, "lr": 0.0004431850567477058, "epoch": 8.91891891891892, "percentage": 29.73, "elapsed_time": "0:01:57", "remaining_time": "0:04:38", "throughput": 3297.45, "total_tokens": 388040} | |
| {"current_steps": 995, "total_steps": 3330, "loss": 0.453, "lr": 0.0004423507257411817, "epoch": 8.963963963963964, "percentage": 29.88, "elapsed_time": "0:01:58", "remaining_time": "0:04:37", "throughput": 3299.47, "total_tokens": 389768} | |
| {"current_steps": 1000, "total_steps": 3330, "loss": 0.3683, "lr": 0.0004415111107797445, "epoch": 9.00900900900901, "percentage": 30.03, "elapsed_time": "0:01:59", "remaining_time": "0:04:37", "throughput": 3287.03, "total_tokens": 391224} | |
| {"current_steps": 1002, "total_steps": 3330, "eval_loss": 0.35251328349113464, "epoch": 9.027027027027026, "percentage": 30.09, "elapsed_time": "0:02:00", "remaining_time": "0:04:40", "throughput": 3251.0, "total_tokens": 391800} | |
| {"current_steps": 1005, "total_steps": 3330, "loss": 0.3787, "lr": 0.0004406662349279683, "epoch": 9.054054054054054, "percentage": 30.18, "elapsed_time": "0:02:03", "remaining_time": "0:04:44", "throughput": 3190.93, "total_tokens": 392952} | |
| {"current_steps": 1010, "total_steps": 3330, "loss": 0.3692, "lr": 0.0004398161213949464, "epoch": 9.0990990990991, "percentage": 30.33, "elapsed_time": "0:02:03", "remaining_time": "0:04:44", "throughput": 3192.2, "total_tokens": 394840} | |
| {"current_steps": 1015, "total_steps": 3330, "loss": 0.39, "lr": 0.000438960793533653, "epoch": 9.144144144144144, "percentage": 30.48, "elapsed_time": "0:02:04", "remaining_time": "0:04:43", "throughput": 3196.01, "total_tokens": 396856} | |
| {"current_steps": 1020, "total_steps": 3330, "loss": 0.3373, "lr": 0.00043810027484030245, "epoch": 9.18918918918919, "percentage": 30.63, "elapsed_time": "0:02:04", "remaining_time": "0:04:42", "throughput": 3196.89, "total_tokens": 398680} | |
| {"current_steps": 1025, "total_steps": 3330, "loss": 0.3693, "lr": 0.0004372345889537034, "epoch": 9.234234234234235, "percentage": 30.78, "elapsed_time": "0:02:05", "remaining_time": "0:04:41", "throughput": 3199.67, "total_tokens": 400696} | |
| {"current_steps": 1030, "total_steps": 3330, "loss": 0.3758, "lr": 0.00043636375965460907, "epoch": 9.27927927927928, "percentage": 30.93, "elapsed_time": "0:02:05", "remaining_time": "0:04:40", "throughput": 3200.2, "total_tokens": 402520} | |
| {"current_steps": 1035, "total_steps": 3330, "loss": 0.3887, "lr": 0.00043548781086506484, "epoch": 9.324324324324325, "percentage": 31.08, "elapsed_time": "0:02:06", "remaining_time": "0:04:39", "throughput": 3205.48, "total_tokens": 404728} | |
| {"current_steps": 1040, "total_steps": 3330, "loss": 0.3376, "lr": 0.00043460676664775036, "epoch": 9.36936936936937, "percentage": 31.23, "elapsed_time": "0:02:06", "remaining_time": "0:04:39", "throughput": 3209.06, "total_tokens": 406776} | |
| {"current_steps": 1045, "total_steps": 3330, "loss": 0.3739, "lr": 0.000433720651205319, "epoch": 9.414414414414415, "percentage": 31.38, "elapsed_time": "0:02:07", "remaining_time": "0:04:38", "throughput": 3211.6, "total_tokens": 408568} | |
| {"current_steps": 1050, "total_steps": 3330, "loss": 0.3643, "lr": 0.0004328294888797326, "epoch": 9.45945945945946, "percentage": 31.53, "elapsed_time": "0:02:07", "remaining_time": "0:04:37", "throughput": 3213.87, "total_tokens": 410392} | |
| {"current_steps": 1055, "total_steps": 3330, "loss": 0.3606, "lr": 0.00043193330415159314, "epoch": 9.504504504504505, "percentage": 31.68, "elapsed_time": "0:02:08", "remaining_time": "0:04:36", "throughput": 3216.17, "total_tokens": 412344} | |
| {"current_steps": 1060, "total_steps": 3330, "loss": 0.3674, "lr": 0.00043103212163947, "epoch": 9.54954954954955, "percentage": 31.83, "elapsed_time": "0:02:08", "remaining_time": "0:04:35", "throughput": 3221.55, "total_tokens": 414584} | |
| {"current_steps": 1065, "total_steps": 3330, "loss": 0.3834, "lr": 0.000430125966099224, "epoch": 9.594594594594595, "percentage": 31.98, "elapsed_time": "0:02:09", "remaining_time": "0:04:34", "throughput": 3223.71, "total_tokens": 416536} | |
| {"current_steps": 1070, "total_steps": 3330, "loss": 0.35, "lr": 0.0004292148624233268, "epoch": 9.63963963963964, "percentage": 32.13, "elapsed_time": "0:02:09", "remaining_time": "0:04:34", "throughput": 3229.12, "total_tokens": 418904} | |
| {"current_steps": 1075, "total_steps": 3330, "loss": 0.3552, "lr": 0.0004282988356401776, "epoch": 9.684684684684685, "percentage": 32.28, "elapsed_time": "0:02:10", "remaining_time": "0:04:33", "throughput": 3233.09, "total_tokens": 420952} | |
| {"current_steps": 1080, "total_steps": 3330, "loss": 0.3631, "lr": 0.00042737791091341533, "epoch": 9.72972972972973, "percentage": 32.43, "elapsed_time": "0:02:10", "remaining_time": "0:04:32", "throughput": 3233.74, "total_tokens": 422616} | |
| {"current_steps": 1085, "total_steps": 3330, "loss": 0.3431, "lr": 0.0004264521135412276, "epoch": 9.774774774774775, "percentage": 32.58, "elapsed_time": "0:02:11", "remaining_time": "0:04:31", "throughput": 3237.74, "total_tokens": 424696} | |
| {"current_steps": 1090, "total_steps": 3330, "loss": 0.3644, "lr": 0.0004255214689556557, "epoch": 9.81981981981982, "percentage": 32.73, "elapsed_time": "0:02:11", "remaining_time": "0:04:30", "throughput": 3239.36, "total_tokens": 426680} | |
| {"current_steps": 1095, "total_steps": 3330, "loss": 0.3517, "lr": 0.00042458600272189553, "epoch": 9.864864864864865, "percentage": 32.88, "elapsed_time": "0:02:12", "remaining_time": "0:04:29", "throughput": 3242.01, "total_tokens": 428536} | |
| {"current_steps": 1100, "total_steps": 3330, "loss": 0.3707, "lr": 0.000423645740537596, "epoch": 9.90990990990991, "percentage": 33.03, "elapsed_time": "0:02:12", "remaining_time": "0:04:28", "throughput": 3248.91, "total_tokens": 431032} | |
| {"current_steps": 1105, "total_steps": 3330, "loss": 0.346, "lr": 0.0004227007082321528, "epoch": 9.954954954954955, "percentage": 33.18, "elapsed_time": "0:02:13", "remaining_time": "0:04:28", "throughput": 3252.57, "total_tokens": 433048} | |
| {"current_steps": 1110, "total_steps": 3330, "loss": 0.3361, "lr": 0.00042175093176599854, "epoch": 10.0, "percentage": 33.33, "elapsed_time": "0:02:13", "remaining_time": "0:04:27", "throughput": 3250.93, "total_tokens": 434728} | |
| {"current_steps": 1115, "total_steps": 3330, "loss": 0.3894, "lr": 0.0004207964372298904, "epoch": 10.045045045045045, "percentage": 33.48, "elapsed_time": "0:02:14", "remaining_time": "0:04:27", "throughput": 3248.02, "total_tokens": 436968} | |
| {"current_steps": 1120, "total_steps": 3330, "loss": 0.3593, "lr": 0.0004198372508441924, "epoch": 10.09009009009009, "percentage": 33.63, "elapsed_time": "0:02:15", "remaining_time": "0:04:26", "throughput": 3251.13, "total_tokens": 439016} | |
| {"current_steps": 1125, "total_steps": 3330, "loss": 0.3895, "lr": 0.00041887339895815606, "epoch": 10.135135135135135, "percentage": 33.78, "elapsed_time": "0:02:15", "remaining_time": "0:04:25", "throughput": 3254.81, "total_tokens": 441224} | |
| {"current_steps": 1130, "total_steps": 3330, "loss": 0.3566, "lr": 0.00041790490804919576, "epoch": 10.18018018018018, "percentage": 33.93, "elapsed_time": "0:02:16", "remaining_time": "0:04:24", "throughput": 3256.42, "total_tokens": 443208} | |
| {"current_steps": 1135, "total_steps": 3330, "loss": 0.3513, "lr": 0.00041693180472216206, "epoch": 10.225225225225225, "percentage": 34.08, "elapsed_time": "0:02:16", "remaining_time": "0:04:24", "throughput": 3256.31, "total_tokens": 445000} | |
| {"current_steps": 1140, "total_steps": 3330, "loss": 0.3488, "lr": 0.0004159541157086106, "epoch": 10.27027027027027, "percentage": 34.23, "elapsed_time": "0:02:17", "remaining_time": "0:04:23", "throughput": 3260.74, "total_tokens": 447240} | |
| {"current_steps": 1145, "total_steps": 3330, "loss": 0.3306, "lr": 0.00041497186786606746, "epoch": 10.315315315315315, "percentage": 34.38, "elapsed_time": "0:02:17", "remaining_time": "0:04:22", "throughput": 3262.57, "total_tokens": 449032} | |
| {"current_steps": 1150, "total_steps": 3330, "loss": 0.3395, "lr": 0.00041398508817729187, "epoch": 10.36036036036036, "percentage": 34.53, "elapsed_time": "0:02:18", "remaining_time": "0:04:21", "throughput": 3266.69, "total_tokens": 451176} | |
| {"current_steps": 1155, "total_steps": 3330, "loss": 0.4138, "lr": 0.00041299380374953487, "epoch": 10.405405405405405, "percentage": 34.68, "elapsed_time": "0:02:18", "remaining_time": "0:04:21", "throughput": 3268.65, "total_tokens": 453096} | |
| {"current_steps": 1160, "total_steps": 3330, "loss": 0.4009, "lr": 0.00041199804181379424, "epoch": 10.45045045045045, "percentage": 34.83, "elapsed_time": "0:02:19", "remaining_time": "0:04:20", "throughput": 3269.41, "total_tokens": 454856} | |
| {"current_steps": 1165, "total_steps": 3330, "loss": 0.3652, "lr": 0.0004109978297240671, "epoch": 10.495495495495495, "percentage": 34.98, "elapsed_time": "0:02:19", "remaining_time": "0:04:19", "throughput": 3274.04, "total_tokens": 457096} | |
| {"current_steps": 1169, "total_steps": 3330, "eval_loss": 0.34910741448402405, "epoch": 10.531531531531531, "percentage": 35.11, "elapsed_time": "0:02:21", "remaining_time": "0:04:20", "throughput": 3248.34, "total_tokens": 458568} | |
| {"current_steps": 1170, "total_steps": 3330, "loss": 0.3655, "lr": 0.000409993194956598, "epoch": 10.54054054054054, "percentage": 35.14, "elapsed_time": "0:02:23", "remaining_time": "0:04:24", "throughput": 3200.16, "total_tokens": 459080} | |
| {"current_steps": 1175, "total_steps": 3330, "loss": 0.3446, "lr": 0.0004089841651091243, "epoch": 10.585585585585585, "percentage": 35.29, "elapsed_time": "0:02:23", "remaining_time": "0:04:24", "throughput": 3203.44, "total_tokens": 461192} | |
| {"current_steps": 1180, "total_steps": 3330, "loss": 0.3531, "lr": 0.000407970767900118, "epoch": 10.63063063063063, "percentage": 35.44, "elapsed_time": "0:02:24", "remaining_time": "0:04:23", "throughput": 3207.14, "total_tokens": 463240} | |
| {"current_steps": 1185, "total_steps": 3330, "loss": 0.357, "lr": 0.0004069530311680247, "epoch": 10.675675675675675, "percentage": 35.59, "elapsed_time": "0:02:24", "remaining_time": "0:04:22", "throughput": 3211.72, "total_tokens": 465416} | |
| {"current_steps": 1190, "total_steps": 3330, "loss": 0.3487, "lr": 0.0004059309828704979, "epoch": 10.72072072072072, "percentage": 35.74, "elapsed_time": "0:02:25", "remaining_time": "0:04:21", "throughput": 3213.02, "total_tokens": 467304} | |
| {"current_steps": 1195, "total_steps": 3330, "loss": 0.3571, "lr": 0.00040490465108363214, "epoch": 10.765765765765765, "percentage": 35.89, "elapsed_time": "0:02:25", "remaining_time": "0:04:20", "throughput": 3216.67, "total_tokens": 469288} | |
| {"current_steps": 1200, "total_steps": 3330, "loss": 0.3383, "lr": 0.0004038740640011911, "epoch": 10.81081081081081, "percentage": 36.04, "elapsed_time": "0:02:26", "remaining_time": "0:04:19", "throughput": 3215.65, "total_tokens": 470920} | |
| {"current_steps": 1205, "total_steps": 3330, "loss": 0.3565, "lr": 0.0004028392499338328, "epoch": 10.855855855855856, "percentage": 36.19, "elapsed_time": "0:02:26", "remaining_time": "0:04:19", "throughput": 3219.23, "total_tokens": 473160} | |
| {"current_steps": 1210, "total_steps": 3330, "loss": 0.3829, "lr": 0.00040180023730833293, "epoch": 10.9009009009009, "percentage": 36.34, "elapsed_time": "0:02:27", "remaining_time": "0:04:18", "throughput": 3219.08, "total_tokens": 475048} | |
| {"current_steps": 1215, "total_steps": 3330, "loss": 0.3605, "lr": 0.0004007570546668029, "epoch": 10.945945945945946, "percentage": 36.49, "elapsed_time": "0:02:28", "remaining_time": "0:04:17", "throughput": 3219.78, "total_tokens": 476712} | |
| {"current_steps": 1220, "total_steps": 3330, "loss": 0.4403, "lr": 0.00039970973066590635, "epoch": 10.99099099099099, "percentage": 36.64, "elapsed_time": "0:02:28", "remaining_time": "0:04:16", "throughput": 3223.24, "total_tokens": 478696} | |
| {"current_steps": 1225, "total_steps": 3330, "loss": 0.3381, "lr": 0.0003986582940760717, "epoch": 11.036036036036036, "percentage": 36.79, "elapsed_time": "0:02:29", "remaining_time": "0:04:16", "throughput": 3217.66, "total_tokens": 480512} | |
| {"current_steps": 1230, "total_steps": 3330, "loss": 0.3521, "lr": 0.00039760277378070205, "epoch": 11.08108108108108, "percentage": 36.94, "elapsed_time": "0:02:29", "remaining_time": "0:04:15", "throughput": 3218.49, "total_tokens": 482368} | |
| {"current_steps": 1235, "total_steps": 3330, "loss": 0.372, "lr": 0.0003965431987753815, "epoch": 11.126126126126126, "percentage": 37.09, "elapsed_time": "0:02:30", "remaining_time": "0:04:15", "throughput": 3219.8, "total_tokens": 484320} | |
| {"current_steps": 1240, "total_steps": 3330, "loss": 0.3468, "lr": 0.0003954795981670788, "epoch": 11.17117117117117, "percentage": 37.24, "elapsed_time": "0:02:30", "remaining_time": "0:04:14", "throughput": 3220.55, "total_tokens": 486208} | |
| {"current_steps": 1245, "total_steps": 3330, "loss": 0.3548, "lr": 0.0003944120011733476, "epoch": 11.216216216216216, "percentage": 37.39, "elapsed_time": "0:02:31", "remaining_time": "0:04:13", "throughput": 3220.23, "total_tokens": 487872} | |
| {"current_steps": 1250, "total_steps": 3330, "loss": 0.3167, "lr": 0.0003933404371215241, "epoch": 11.26126126126126, "percentage": 37.54, "elapsed_time": "0:02:32", "remaining_time": "0:04:12", "throughput": 3222.83, "total_tokens": 489952} | |
| {"current_steps": 1255, "total_steps": 3330, "loss": 0.3697, "lr": 0.00039226493544792097, "epoch": 11.306306306306306, "percentage": 37.69, "elapsed_time": "0:02:32", "remaining_time": "0:04:12", "throughput": 3225.91, "total_tokens": 492032} | |
| {"current_steps": 1260, "total_steps": 3330, "loss": 0.355, "lr": 0.0003911855256970193, "epoch": 11.35135135135135, "percentage": 37.84, "elapsed_time": "0:02:33", "remaining_time": "0:04:11", "throughput": 3227.64, "total_tokens": 493888} | |
| {"current_steps": 1265, "total_steps": 3330, "loss": 0.3531, "lr": 0.00039010223752065644, "epoch": 11.396396396396396, "percentage": 37.99, "elapsed_time": "0:02:33", "remaining_time": "0:04:10", "throughput": 3228.7, "total_tokens": 495648} | |
| {"current_steps": 1270, "total_steps": 3330, "loss": 0.3595, "lr": 0.0003890151006772119, "epoch": 11.441441441441441, "percentage": 38.14, "elapsed_time": "0:02:34", "remaining_time": "0:04:09", "throughput": 3230.51, "total_tokens": 497600} | |
| {"current_steps": 1275, "total_steps": 3330, "loss": 0.3331, "lr": 0.00038792414503078967, "epoch": 11.486486486486486, "percentage": 38.29, "elapsed_time": "0:02:34", "remaining_time": "0:04:09", "throughput": 3233.1, "total_tokens": 499552} | |
| {"current_steps": 1280, "total_steps": 3330, "loss": 0.352, "lr": 0.00038682940055039764, "epoch": 11.531531531531531, "percentage": 38.44, "elapsed_time": "0:02:35", "remaining_time": "0:04:08", "throughput": 3233.73, "total_tokens": 501312} | |
| {"current_steps": 1285, "total_steps": 3330, "loss": 0.3477, "lr": 0.0003857308973091249, "epoch": 11.576576576576576, "percentage": 38.59, "elapsed_time": "0:02:35", "remaining_time": "0:04:07", "throughput": 3236.09, "total_tokens": 503232} | |
| {"current_steps": 1290, "total_steps": 3330, "loss": 0.356, "lr": 0.00038462866548331486, "epoch": 11.621621621621621, "percentage": 38.74, "elapsed_time": "0:02:36", "remaining_time": "0:04:06", "throughput": 3236.58, "total_tokens": 505024} | |
| {"current_steps": 1295, "total_steps": 3330, "loss": 0.3746, "lr": 0.0003835227353517372, "epoch": 11.666666666666666, "percentage": 38.89, "elapsed_time": "0:02:36", "remaining_time": "0:04:05", "throughput": 3243.41, "total_tokens": 507680} | |
| {"current_steps": 1300, "total_steps": 3330, "loss": 0.3634, "lr": 0.0003824131372947551, "epoch": 11.711711711711711, "percentage": 39.04, "elapsed_time": "0:02:37", "remaining_time": "0:04:05", "throughput": 3245.58, "total_tokens": 509632} | |
| {"current_steps": 1305, "total_steps": 3330, "loss": 0.3565, "lr": 0.0003812999017934916, "epoch": 11.756756756756756, "percentage": 39.19, "elapsed_time": "0:02:37", "remaining_time": "0:04:04", "throughput": 3247.97, "total_tokens": 511648} | |
| {"current_steps": 1310, "total_steps": 3330, "loss": 0.3554, "lr": 0.00038018305942899165, "epoch": 11.801801801801801, "percentage": 39.34, "elapsed_time": "0:02:38", "remaining_time": "0:04:03", "throughput": 3251.0, "total_tokens": 513728} | |
| {"current_steps": 1315, "total_steps": 3330, "loss": 0.3206, "lr": 0.0003790626408813822, "epoch": 11.846846846846846, "percentage": 39.49, "elapsed_time": "0:02:38", "remaining_time": "0:04:02", "throughput": 3254.2, "total_tokens": 515904} | |
| {"current_steps": 1320, "total_steps": 3330, "loss": 0.3995, "lr": 0.0003779386769290296, "epoch": 11.891891891891891, "percentage": 39.64, "elapsed_time": "0:02:39", "remaining_time": "0:04:02", "throughput": 3254.33, "total_tokens": 517600} | |
| {"current_steps": 1325, "total_steps": 3330, "loss": 0.3536, "lr": 0.0003768111984476938, "epoch": 11.936936936936936, "percentage": 39.79, "elapsed_time": "0:02:39", "remaining_time": "0:04:01", "throughput": 3255.69, "total_tokens": 519520} | |
| {"current_steps": 1330, "total_steps": 3330, "loss": 0.346, "lr": 0.00037568023640968044, "epoch": 11.981981981981981, "percentage": 39.94, "elapsed_time": "0:02:40", "remaining_time": "0:04:00", "throughput": 3257.19, "total_tokens": 521280} | |
| {"current_steps": 1335, "total_steps": 3330, "loss": 0.3456, "lr": 0.0003745458218829899, "epoch": 12.027027027027026, "percentage": 40.09, "elapsed_time": "0:02:40", "remaining_time": "0:04:00", "throughput": 3250.16, "total_tokens": 522960} | |
| {"current_steps": 1336, "total_steps": 3330, "eval_loss": 0.3908711075782776, "epoch": 12.036036036036036, "percentage": 40.12, "elapsed_time": "0:02:42", "remaining_time": "0:04:02", "throughput": 3225.05, "total_tokens": 523312} | |
| {"current_steps": 1340, "total_steps": 3330, "loss": 0.363, "lr": 0.0003734079860304639, "epoch": 12.072072072072071, "percentage": 40.24, "elapsed_time": "0:02:44", "remaining_time": "0:04:04", "throughput": 3182.46, "total_tokens": 524912} | |
| {"current_steps": 1345, "total_steps": 3330, "loss": 0.3042, "lr": 0.00037226676010892925, "epoch": 12.117117117117116, "percentage": 40.39, "elapsed_time": "0:02:45", "remaining_time": "0:04:04", "throughput": 3185.74, "total_tokens": 526928} | |
| {"current_steps": 1350, "total_steps": 3330, "loss": 0.3613, "lr": 0.00037112217546833955, "epoch": 12.162162162162161, "percentage": 40.54, "elapsed_time": "0:02:45", "remaining_time": "0:04:03", "throughput": 3189.19, "total_tokens": 528944} | |
| {"current_steps": 1355, "total_steps": 3330, "loss": 0.3547, "lr": 0.00036997426355091375, "epoch": 12.207207207207206, "percentage": 40.69, "elapsed_time": "0:02:46", "remaining_time": "0:04:02", "throughput": 3194.99, "total_tokens": 531376} | |
| {"current_steps": 1360, "total_steps": 3330, "loss": 0.3653, "lr": 0.0003688230558902725, "epoch": 12.252252252252251, "percentage": 40.84, "elapsed_time": "0:02:46", "remaining_time": "0:04:01", "throughput": 3197.67, "total_tokens": 533264} | |
| {"current_steps": 1365, "total_steps": 3330, "loss": 0.3665, "lr": 0.0003676685841105719, "epoch": 12.297297297297296, "percentage": 40.99, "elapsed_time": "0:02:47", "remaining_time": "0:04:00", "throughput": 3200.59, "total_tokens": 535120} | |
| {"current_steps": 1370, "total_steps": 3330, "loss": 0.2919, "lr": 0.00036651087992563476, "epoch": 12.342342342342342, "percentage": 41.14, "elapsed_time": "0:02:47", "remaining_time": "0:03:59", "throughput": 3202.16, "total_tokens": 536912} | |
| {"current_steps": 1375, "total_steps": 3330, "loss": 0.4497, "lr": 0.00036534997513807934, "epoch": 12.387387387387387, "percentage": 41.29, "elapsed_time": "0:02:48", "remaining_time": "0:03:59", "throughput": 3203.95, "total_tokens": 538736} | |
| {"current_steps": 1380, "total_steps": 3330, "loss": 0.3984, "lr": 0.00036418590163844587, "epoch": 12.432432432432432, "percentage": 41.44, "elapsed_time": "0:02:48", "remaining_time": "0:03:58", "throughput": 3206.44, "total_tokens": 540720} | |
| {"current_steps": 1385, "total_steps": 3330, "loss": 0.3396, "lr": 0.00036301869140432057, "epoch": 12.477477477477478, "percentage": 41.59, "elapsed_time": "0:02:49", "remaining_time": "0:03:57", "throughput": 3209.85, "total_tokens": 542832} | |
| {"current_steps": 1390, "total_steps": 3330, "loss": 0.3497, "lr": 0.00036184837649945673, "epoch": 12.522522522522522, "percentage": 41.74, "elapsed_time": "0:02:49", "remaining_time": "0:03:56", "throughput": 3211.57, "total_tokens": 544592} | |
| {"current_steps": 1395, "total_steps": 3330, "loss": 0.4109, "lr": 0.00036067498907289456, "epoch": 12.567567567567568, "percentage": 41.89, "elapsed_time": "0:02:50", "remaining_time": "0:03:55", "throughput": 3213.53, "total_tokens": 546416} | |
| {"current_steps": 1400, "total_steps": 3330, "loss": 0.3466, "lr": 0.0003594985613580775, "epoch": 12.612612612612612, "percentage": 42.04, "elapsed_time": "0:02:50", "remaining_time": "0:03:55", "throughput": 3215.41, "total_tokens": 548336} | |
| {"current_steps": 1405, "total_steps": 3330, "loss": 0.4403, "lr": 0.00035831912567196715, "epoch": 12.657657657657658, "percentage": 42.19, "elapsed_time": "0:02:51", "remaining_time": "0:03:54", "throughput": 3218.09, "total_tokens": 550384} | |
| {"current_steps": 1410, "total_steps": 3330, "loss": 0.379, "lr": 0.0003571367144141552, "epoch": 12.702702702702704, "percentage": 42.34, "elapsed_time": "0:02:51", "remaining_time": "0:03:53", "throughput": 3221.3, "total_tokens": 552464} | |
| {"current_steps": 1415, "total_steps": 3330, "loss": 0.3625, "lr": 0.00035595136006597375, "epoch": 12.747747747747749, "percentage": 42.49, "elapsed_time": "0:02:51", "remaining_time": "0:03:52", "throughput": 3225.57, "total_tokens": 554768} | |
| {"current_steps": 1420, "total_steps": 3330, "loss": 0.3392, "lr": 0.0003547630951896025, "epoch": 12.792792792792794, "percentage": 42.64, "elapsed_time": "0:02:52", "remaining_time": "0:03:52", "throughput": 3226.21, "total_tokens": 556752} | |
| {"current_steps": 1425, "total_steps": 3330, "loss": 0.3869, "lr": 0.0003535719524271749, "epoch": 12.837837837837839, "percentage": 42.79, "elapsed_time": "0:02:53", "remaining_time": "0:03:51", "throughput": 3228.23, "total_tokens": 558832} | |
| {"current_steps": 1430, "total_steps": 3330, "loss": 0.3764, "lr": 0.00035237796449988086, "epoch": 12.882882882882884, "percentage": 42.94, "elapsed_time": "0:02:53", "remaining_time": "0:03:50", "throughput": 3231.0, "total_tokens": 560880} | |
| {"current_steps": 1435, "total_steps": 3330, "loss": 0.3478, "lr": 0.0003511811642070684, "epoch": 12.927927927927929, "percentage": 43.09, "elapsed_time": "0:02:54", "remaining_time": "0:03:49", "throughput": 3229.67, "total_tokens": 562416} | |
| {"current_steps": 1440, "total_steps": 3330, "loss": 0.3449, "lr": 0.0003499815844253423, "epoch": 12.972972972972974, "percentage": 43.24, "elapsed_time": "0:02:54", "remaining_time": "0:03:49", "throughput": 3231.42, "total_tokens": 564144} | |
| {"current_steps": 1445, "total_steps": 3330, "loss": 0.4018, "lr": 0.00034877925810766086, "epoch": 13.018018018018019, "percentage": 43.39, "elapsed_time": "0:02:55", "remaining_time": "0:03:48", "throughput": 3226.42, "total_tokens": 566336} | |
| {"current_steps": 1450, "total_steps": 3330, "loss": 0.3527, "lr": 0.0003475742182824314, "epoch": 13.063063063063064, "percentage": 43.54, "elapsed_time": "0:02:55", "remaining_time": "0:03:48", "throughput": 3230.92, "total_tokens": 568608} | |
| {"current_steps": 1455, "total_steps": 3330, "loss": 0.3576, "lr": 0.0003463664980526018, "epoch": 13.108108108108109, "percentage": 43.69, "elapsed_time": "0:02:56", "remaining_time": "0:03:47", "throughput": 3234.43, "total_tokens": 570944} | |
| {"current_steps": 1460, "total_steps": 3330, "loss": 0.3338, "lr": 0.0003451561305947522, "epoch": 13.153153153153154, "percentage": 43.84, "elapsed_time": "0:02:57", "remaining_time": "0:03:46", "throughput": 3236.19, "total_tokens": 573056} | |
| {"current_steps": 1465, "total_steps": 3330, "loss": 0.3806, "lr": 0.000343943149158183, "epoch": 13.198198198198199, "percentage": 43.99, "elapsed_time": "0:02:57", "remaining_time": "0:03:46", "throughput": 3239.75, "total_tokens": 575360} | |
| {"current_steps": 1470, "total_steps": 3330, "loss": 0.3276, "lr": 0.00034272758706400193, "epoch": 13.243243243243244, "percentage": 44.14, "elapsed_time": "0:02:58", "remaining_time": "0:03:45", "throughput": 3241.94, "total_tokens": 577504} | |
| {"current_steps": 1475, "total_steps": 3330, "loss": 0.3685, "lr": 0.0003415094777042081, "epoch": 13.288288288288289, "percentage": 44.29, "elapsed_time": "0:02:58", "remaining_time": "0:03:44", "throughput": 3241.91, "total_tokens": 579456} | |
| {"current_steps": 1480, "total_steps": 3330, "loss": 0.4039, "lr": 0.0003402888545407753, "epoch": 13.333333333333334, "percentage": 44.44, "elapsed_time": "0:02:59", "remaining_time": "0:03:44", "throughput": 3242.06, "total_tokens": 581184} | |
| {"current_steps": 1485, "total_steps": 3330, "loss": 0.3558, "lr": 0.0003390657511047326, "epoch": 13.378378378378379, "percentage": 44.59, "elapsed_time": "0:02:59", "remaining_time": "0:03:43", "throughput": 3243.75, "total_tokens": 583136} | |
| {"current_steps": 1490, "total_steps": 3330, "loss": 0.351, "lr": 0.00033784020099524297, "epoch": 13.423423423423424, "percentage": 44.74, "elapsed_time": "0:03:00", "remaining_time": "0:03:42", "throughput": 3245.29, "total_tokens": 585056} | |
| {"current_steps": 1495, "total_steps": 3330, "loss": 0.3586, "lr": 0.00033661223787868097, "epoch": 13.468468468468469, "percentage": 44.89, "elapsed_time": "0:03:00", "remaining_time": "0:03:41", "throughput": 3245.84, "total_tokens": 586784} | |
| {"current_steps": 1500, "total_steps": 3330, "loss": 0.3703, "lr": 0.00033538189548770677, "epoch": 13.513513513513514, "percentage": 45.05, "elapsed_time": "0:03:01", "remaining_time": "0:03:41", "throughput": 3247.86, "total_tokens": 588768} | |
| {"current_steps": 1503, "total_steps": 3330, "eval_loss": 0.35342463850975037, "epoch": 13.54054054054054, "percentage": 45.14, "elapsed_time": "0:03:02", "remaining_time": "0:03:42", "throughput": 3225.89, "total_tokens": 589824} | |
| {"current_steps": 1505, "total_steps": 3330, "loss": 0.3472, "lr": 0.00033414920762034095, "epoch": 13.558558558558559, "percentage": 45.2, "elapsed_time": "0:03:05", "remaining_time": "0:03:44", "throughput": 3188.17, "total_tokens": 590560} | |
| {"current_steps": 1510, "total_steps": 3330, "loss": 0.3228, "lr": 0.0003329142081390348, "epoch": 13.603603603603604, "percentage": 45.35, "elapsed_time": "0:03:05", "remaining_time": "0:03:43", "throughput": 3189.06, "total_tokens": 592416} | |
| {"current_steps": 1515, "total_steps": 3330, "loss": 0.3979, "lr": 0.00033167693096974085, "epoch": 13.64864864864865, "percentage": 45.5, "elapsed_time": "0:03:06", "remaining_time": "0:03:43", "throughput": 3190.89, "total_tokens": 594336} | |
| {"current_steps": 1520, "total_steps": 3330, "loss": 0.3445, "lr": 0.00033043741010098046, "epoch": 13.693693693693694, "percentage": 45.65, "elapsed_time": "0:03:06", "remaining_time": "0:03:42", "throughput": 3192.45, "total_tokens": 596320} | |
| {"current_steps": 1525, "total_steps": 3330, "loss": 0.3552, "lr": 0.00032919567958291073, "epoch": 13.73873873873874, "percentage": 45.8, "elapsed_time": "0:03:07", "remaining_time": "0:03:41", "throughput": 3193.32, "total_tokens": 598240} | |
| {"current_steps": 1530, "total_steps": 3330, "loss": 0.3583, "lr": 0.0003279517735263883, "epoch": 13.783783783783784, "percentage": 45.95, "elapsed_time": "0:03:07", "remaining_time": "0:03:41", "throughput": 3193.12, "total_tokens": 599872} | |
| {"current_steps": 1535, "total_steps": 3330, "loss": 0.3281, "lr": 0.0003267057261020331, "epoch": 13.82882882882883, "percentage": 46.1, "elapsed_time": "0:03:08", "remaining_time": "0:03:40", "throughput": 3194.29, "total_tokens": 601856} | |
| {"current_steps": 1540, "total_steps": 3330, "loss": 0.3492, "lr": 0.00032545757153928923, "epoch": 13.873873873873874, "percentage": 46.25, "elapsed_time": "0:03:08", "remaining_time": "0:03:39", "throughput": 3194.69, "total_tokens": 603648} | |
| {"current_steps": 1545, "total_steps": 3330, "loss": 0.389, "lr": 0.0003242073441254846, "epoch": 13.91891891891892, "percentage": 46.4, "elapsed_time": "0:03:09", "remaining_time": "0:03:38", "throughput": 3199.62, "total_tokens": 606144} | |
| {"current_steps": 1550, "total_steps": 3330, "loss": 0.3362, "lr": 0.00032295507820488944, "epoch": 13.963963963963964, "percentage": 46.55, "elapsed_time": "0:03:09", "remaining_time": "0:03:38", "throughput": 3201.66, "total_tokens": 607904} | |
| {"current_steps": 1555, "total_steps": 3330, "loss": 0.3516, "lr": 0.0003217008081777726, "epoch": 14.00900900900901, "percentage": 46.7, "elapsed_time": "0:03:10", "remaining_time": "0:03:37", "throughput": 3197.55, "total_tokens": 609688} | |
| {"current_steps": 1560, "total_steps": 3330, "loss": 0.3243, "lr": 0.00032044456849945636, "epoch": 14.054054054054054, "percentage": 46.85, "elapsed_time": "0:03:11", "remaining_time": "0:03:36", "throughput": 3199.72, "total_tokens": 611608} | |
| {"current_steps": 1565, "total_steps": 3330, "loss": 0.4164, "lr": 0.00031918639367937025, "epoch": 14.0990990990991, "percentage": 47.0, "elapsed_time": "0:03:11", "remaining_time": "0:03:36", "throughput": 3203.76, "total_tokens": 613848} | |
| {"current_steps": 1570, "total_steps": 3330, "loss": 0.3623, "lr": 0.00031792631828010323, "epoch": 14.144144144144144, "percentage": 47.15, "elapsed_time": "0:03:12", "remaining_time": "0:03:35", "throughput": 3204.84, "total_tokens": 615704} | |
| {"current_steps": 1575, "total_steps": 3330, "loss": 0.3426, "lr": 0.0003166643769164533, "epoch": 14.18918918918919, "percentage": 47.3, "elapsed_time": "0:03:12", "remaining_time": "0:03:34", "throughput": 3207.84, "total_tokens": 617784} | |
| {"current_steps": 1580, "total_steps": 3330, "loss": 0.3457, "lr": 0.00031540060425447813, "epoch": 14.234234234234235, "percentage": 47.45, "elapsed_time": "0:03:13", "remaining_time": "0:03:33", "throughput": 3208.96, "total_tokens": 619768} | |
| {"current_steps": 1585, "total_steps": 3330, "loss": 0.3775, "lr": 0.0003141350350105413, "epoch": 14.27927927927928, "percentage": 47.6, "elapsed_time": "0:03:13", "remaining_time": "0:03:33", "throughput": 3210.64, "total_tokens": 621816} | |
| {"current_steps": 1590, "total_steps": 3330, "loss": 0.3601, "lr": 0.0003128677039503594, "epoch": 14.324324324324325, "percentage": 47.75, "elapsed_time": "0:03:14", "remaining_time": "0:03:32", "throughput": 3213.43, "total_tokens": 623864} | |
| {"current_steps": 1595, "total_steps": 3330, "loss": 0.3431, "lr": 0.00031159864588804694, "epoch": 14.36936936936937, "percentage": 47.9, "elapsed_time": "0:03:14", "remaining_time": "0:03:31", "throughput": 3214.85, "total_tokens": 625752} | |
| {"current_steps": 1600, "total_steps": 3330, "loss": 0.303, "lr": 0.0003103278956851598, "epoch": 14.414414414414415, "percentage": 48.05, "elapsed_time": "0:03:15", "remaining_time": "0:03:31", "throughput": 3215.84, "total_tokens": 627736} | |
| {"current_steps": 1605, "total_steps": 3330, "loss": 0.3428, "lr": 0.0003090554882497378, "epoch": 14.45945945945946, "percentage": 48.2, "elapsed_time": "0:03:15", "remaining_time": "0:03:30", "throughput": 3218.44, "total_tokens": 629688} | |
| {"current_steps": 1610, "total_steps": 3330, "loss": 0.3394, "lr": 0.00030778145853534557, "epoch": 14.504504504504505, "percentage": 48.35, "elapsed_time": "0:03:16", "remaining_time": "0:03:29", "throughput": 3220.54, "total_tokens": 631576} | |
| {"current_steps": 1615, "total_steps": 3330, "loss": 0.3293, "lr": 0.0003065058415401123, "epoch": 14.54954954954955, "percentage": 48.5, "elapsed_time": "0:03:16", "remaining_time": "0:03:28", "throughput": 3224.52, "total_tokens": 633816} | |
| {"current_steps": 1620, "total_steps": 3330, "loss": 0.379, "lr": 0.00030522867230577057, "epoch": 14.594594594594595, "percentage": 48.65, "elapsed_time": "0:03:17", "remaining_time": "0:03:28", "throughput": 3224.7, "total_tokens": 635608} | |
| {"current_steps": 1625, "total_steps": 3330, "loss": 0.3417, "lr": 0.00030394998591669364, "epoch": 14.63963963963964, "percentage": 48.8, "elapsed_time": "0:03:17", "remaining_time": "0:03:27", "throughput": 3225.68, "total_tokens": 637400} | |
| {"current_steps": 1630, "total_steps": 3330, "loss": 0.3849, "lr": 0.0003026698174989316, "epoch": 14.684684684684685, "percentage": 48.95, "elapsed_time": "0:03:18", "remaining_time": "0:03:26", "throughput": 3227.36, "total_tokens": 639352} | |
| {"current_steps": 1635, "total_steps": 3330, "loss": 0.362, "lr": 0.0003013882022192465, "epoch": 14.72972972972973, "percentage": 49.1, "elapsed_time": "0:03:18", "remaining_time": "0:03:25", "throughput": 3230.9, "total_tokens": 641560} | |
| {"current_steps": 1640, "total_steps": 3330, "loss": 0.3574, "lr": 0.0003001051752841462, "epoch": 14.774774774774775, "percentage": 49.25, "elapsed_time": "0:03:19", "remaining_time": "0:03:25", "throughput": 3232.83, "total_tokens": 643480} | |
| {"current_steps": 1645, "total_steps": 3330, "loss": 0.3483, "lr": 0.0002988207719389175, "epoch": 14.81981981981982, "percentage": 49.4, "elapsed_time": "0:03:19", "remaining_time": "0:03:24", "throughput": 3235.71, "total_tokens": 645464} | |
| {"current_steps": 1650, "total_steps": 3330, "loss": 0.3562, "lr": 0.0002975350274666577, "epoch": 14.864864864864865, "percentage": 49.55, "elapsed_time": "0:03:19", "remaining_time": "0:03:23", "throughput": 3237.72, "total_tokens": 647416} | |
| {"current_steps": 1655, "total_steps": 3330, "loss": 0.3806, "lr": 0.0002962479771873053, "epoch": 14.90990990990991, "percentage": 49.7, "elapsed_time": "0:03:20", "remaining_time": "0:03:22", "throughput": 3238.12, "total_tokens": 649240} | |
| {"current_steps": 1660, "total_steps": 3330, "loss": 0.3684, "lr": 0.00029495965645667, "epoch": 14.954954954954955, "percentage": 49.85, "elapsed_time": "0:03:20", "remaining_time": "0:03:22", "throughput": 3240.93, "total_tokens": 651256} | |
| {"current_steps": 1665, "total_steps": 3330, "loss": 0.333, "lr": 0.0002936701006654613, "epoch": 15.0, "percentage": 50.0, "elapsed_time": "0:03:21", "remaining_time": "0:03:21", "throughput": 3240.85, "total_tokens": 653216} | |
| {"current_steps": 1670, "total_steps": 3330, "loss": 0.3643, "lr": 0.0002923793452383163, "epoch": 15.045045045045045, "percentage": 50.15, "elapsed_time": "0:03:22", "remaining_time": "0:03:21", "throughput": 3239.02, "total_tokens": 655200} | |
| {"current_steps": 1670, "total_steps": 3330, "eval_loss": 0.3588047921657562, "epoch": 15.045045045045045, "percentage": 50.15, "elapsed_time": "0:03:23", "remaining_time": "0:03:22", "throughput": 3218.56, "total_tokens": 655200} | |
| {"current_steps": 1675, "total_steps": 3330, "loss": 0.353, "lr": 0.00029108742563282655, "epoch": 15.09009009009009, "percentage": 50.3, "elapsed_time": "0:03:26", "remaining_time": "0:03:23", "throughput": 3183.75, "total_tokens": 657056} | |
| {"current_steps": 1680, "total_steps": 3330, "loss": 0.3474, "lr": 0.00028979437733856427, "epoch": 15.135135135135135, "percentage": 50.45, "elapsed_time": "0:03:26", "remaining_time": "0:03:23", "throughput": 3186.75, "total_tokens": 659232} | |
| {"current_steps": 1685, "total_steps": 3330, "loss": 0.3456, "lr": 0.000288500235876107, "epoch": 15.18018018018018, "percentage": 50.6, "elapsed_time": "0:03:27", "remaining_time": "0:03:22", "throughput": 3188.54, "total_tokens": 661184} | |
| {"current_steps": 1690, "total_steps": 3330, "loss": 0.3307, "lr": 0.00028720503679606225, "epoch": 15.225225225225225, "percentage": 50.75, "elapsed_time": "0:03:27", "remaining_time": "0:03:21", "throughput": 3189.57, "total_tokens": 663040} | |
| {"current_steps": 1695, "total_steps": 3330, "loss": 0.3643, "lr": 0.0002859088156780906, "epoch": 15.27027027027027, "percentage": 50.9, "elapsed_time": "0:03:28", "remaining_time": "0:03:20", "throughput": 3191.78, "total_tokens": 665088} | |
| {"current_steps": 1700, "total_steps": 3330, "loss": 0.3735, "lr": 0.00028461160812992846, "epoch": 15.315315315315315, "percentage": 51.05, "elapsed_time": "0:03:28", "remaining_time": "0:03:20", "throughput": 3192.1, "total_tokens": 666880} | |
| {"current_steps": 1705, "total_steps": 3330, "loss": 0.3643, "lr": 0.00028331344978640993, "epoch": 15.36036036036036, "percentage": 51.2, "elapsed_time": "0:03:29", "remaining_time": "0:03:19", "throughput": 3194.88, "total_tokens": 669152} | |
| {"current_steps": 1710, "total_steps": 3330, "loss": 0.359, "lr": 0.00028201437630848787, "epoch": 15.405405405405405, "percentage": 51.35, "elapsed_time": "0:03:30", "remaining_time": "0:03:18", "throughput": 3196.04, "total_tokens": 671168} | |
| {"current_steps": 1715, "total_steps": 3330, "loss": 0.3206, "lr": 0.0002807144233822542, "epoch": 15.45045045045045, "percentage": 51.5, "elapsed_time": "0:03:30", "remaining_time": "0:03:18", "throughput": 3197.19, "total_tokens": 673312} | |
| {"current_steps": 1720, "total_steps": 3330, "loss": 0.367, "lr": 0.0002794136267179596, "epoch": 15.495495495495495, "percentage": 51.65, "elapsed_time": "0:03:31", "remaining_time": "0:03:17", "throughput": 3198.44, "total_tokens": 675200} | |
| {"current_steps": 1725, "total_steps": 3330, "loss": 0.364, "lr": 0.00027811202204903297, "epoch": 15.54054054054054, "percentage": 51.8, "elapsed_time": "0:03:31", "remaining_time": "0:03:16", "throughput": 3199.28, "total_tokens": 677088} | |
| {"current_steps": 1730, "total_steps": 3330, "loss": 0.3549, "lr": 0.00027680964513109876, "epoch": 15.585585585585585, "percentage": 51.95, "elapsed_time": "0:03:32", "remaining_time": "0:03:16", "throughput": 3202.36, "total_tokens": 679360} | |
| {"current_steps": 1735, "total_steps": 3330, "loss": 0.3467, "lr": 0.00027550653174099603, "epoch": 15.63063063063063, "percentage": 52.1, "elapsed_time": "0:03:32", "remaining_time": "0:03:15", "throughput": 3202.61, "total_tokens": 681120} | |
| {"current_steps": 1740, "total_steps": 3330, "loss": 0.3562, "lr": 0.0002742027176757948, "epoch": 15.675675675675675, "percentage": 52.25, "elapsed_time": "0:03:33", "remaining_time": "0:03:14", "throughput": 3203.44, "total_tokens": 683008} | |
| {"current_steps": 1745, "total_steps": 3330, "loss": 0.3613, "lr": 0.0002728982387518129, "epoch": 15.72072072072072, "percentage": 52.4, "elapsed_time": "0:03:33", "remaining_time": "0:03:14", "throughput": 3203.48, "total_tokens": 684704} | |
| {"current_steps": 1750, "total_steps": 3330, "loss": 0.3453, "lr": 0.0002715931308036321, "epoch": 15.765765765765765, "percentage": 52.55, "elapsed_time": "0:03:34", "remaining_time": "0:03:13", "throughput": 3203.67, "total_tokens": 686528} | |
| {"current_steps": 1755, "total_steps": 3330, "loss": 0.3251, "lr": 0.0002702874296831139, "epoch": 15.81081081081081, "percentage": 52.7, "elapsed_time": "0:03:34", "remaining_time": "0:03:12", "throughput": 3204.36, "total_tokens": 688256} | |
| {"current_steps": 1760, "total_steps": 3330, "loss": 0.4018, "lr": 0.0002689811712584143, "epoch": 15.855855855855856, "percentage": 52.85, "elapsed_time": "0:03:35", "remaining_time": "0:03:12", "throughput": 3206.4, "total_tokens": 690400} | |
| {"current_steps": 1765, "total_steps": 3330, "loss": 0.3742, "lr": 0.00026767439141299866, "epoch": 15.9009009009009, "percentage": 53.0, "elapsed_time": "0:03:35", "remaining_time": "0:03:11", "throughput": 3206.89, "total_tokens": 692256} | |
| {"current_steps": 1770, "total_steps": 3330, "loss": 0.3503, "lr": 0.00026636712604465626, "epoch": 15.945945945945946, "percentage": 53.15, "elapsed_time": "0:03:36", "remaining_time": "0:03:10", "throughput": 3210.07, "total_tokens": 694560} | |
| {"current_steps": 1775, "total_steps": 3330, "loss": 0.3626, "lr": 0.0002650594110645136, "epoch": 15.99099099099099, "percentage": 53.3, "elapsed_time": "0:03:36", "remaining_time": "0:03:09", "throughput": 3212.73, "total_tokens": 696608} | |
| {"current_steps": 1780, "total_steps": 3330, "loss": 0.3368, "lr": 0.0002637512823960483, "epoch": 16.036036036036037, "percentage": 53.45, "elapsed_time": "0:03:37", "remaining_time": "0:03:09", "throughput": 3206.16, "total_tokens": 698424} | |
| {"current_steps": 1785, "total_steps": 3330, "loss": 0.3531, "lr": 0.00026244277597410213, "epoch": 16.08108108108108, "percentage": 53.6, "elapsed_time": "0:03:38", "remaining_time": "0:03:09", "throughput": 3207.54, "total_tokens": 700568} | |
| {"current_steps": 1790, "total_steps": 3330, "loss": 0.37, "lr": 0.0002611339277438943, "epoch": 16.126126126126128, "percentage": 53.75, "elapsed_time": "0:03:38", "remaining_time": "0:03:08", "throughput": 3209.1, "total_tokens": 702712} | |
| {"current_steps": 1795, "total_steps": 3330, "loss": 0.3759, "lr": 0.0002598247736600328, "epoch": 16.17117117117117, "percentage": 53.9, "elapsed_time": "0:03:39", "remaining_time": "0:03:07", "throughput": 3208.69, "total_tokens": 704568} | |
| {"current_steps": 1800, "total_steps": 3330, "loss": 0.3573, "lr": 0.00025851534968552843, "epoch": 16.216216216216218, "percentage": 54.05, "elapsed_time": "0:03:40", "remaining_time": "0:03:07", "throughput": 3208.77, "total_tokens": 706488} | |
| {"current_steps": 1805, "total_steps": 3330, "loss": 0.3595, "lr": 0.0002572056917908055, "epoch": 16.26126126126126, "percentage": 54.2, "elapsed_time": "0:03:40", "remaining_time": "0:03:06", "throughput": 3211.75, "total_tokens": 708920} | |
| {"current_steps": 1810, "total_steps": 3330, "loss": 0.341, "lr": 0.00025589583595271424, "epoch": 16.306306306306308, "percentage": 54.35, "elapsed_time": "0:03:41", "remaining_time": "0:03:05", "throughput": 3215.43, "total_tokens": 711352} | |
| {"current_steps": 1815, "total_steps": 3330, "loss": 0.3457, "lr": 0.0002545858181535426, "epoch": 16.35135135135135, "percentage": 54.5, "elapsed_time": "0:03:41", "remaining_time": "0:03:05", "throughput": 3217.3, "total_tokens": 713304} | |
| {"current_steps": 1820, "total_steps": 3330, "loss": 0.3579, "lr": 0.00025327567438002775, "epoch": 16.396396396396398, "percentage": 54.65, "elapsed_time": "0:03:42", "remaining_time": "0:03:04", "throughput": 3217.52, "total_tokens": 715096} | |
| {"current_steps": 1825, "total_steps": 3330, "loss": 0.3405, "lr": 0.0002519654406223671, "epoch": 16.44144144144144, "percentage": 54.8, "elapsed_time": "0:03:42", "remaining_time": "0:03:03", "throughput": 3217.7, "total_tokens": 716760} | |
| {"current_steps": 1830, "total_steps": 3330, "loss": 0.3609, "lr": 0.0002506551528732302, "epoch": 16.486486486486488, "percentage": 54.95, "elapsed_time": "0:03:43", "remaining_time": "0:03:02", "throughput": 3218.72, "total_tokens": 718456} | |
| {"current_steps": 1835, "total_steps": 3330, "loss": 0.3647, "lr": 0.0002493448471267698, "epoch": 16.53153153153153, "percentage": 55.11, "elapsed_time": "0:03:43", "remaining_time": "0:03:02", "throughput": 3219.44, "total_tokens": 720280} | |
| {"current_steps": 1837, "total_steps": 3330, "eval_loss": 0.35697561502456665, "epoch": 16.54954954954955, "percentage": 55.17, "elapsed_time": "0:03:45", "remaining_time": "0:03:03", "throughput": 3201.71, "total_tokens": 721016} | |
| {"current_steps": 1840, "total_steps": 3330, "loss": 0.3843, "lr": 0.0002480345593776329, "epoch": 16.576576576576578, "percentage": 55.26, "elapsed_time": "0:03:48", "remaining_time": "0:03:04", "throughput": 3166.56, "total_tokens": 721976} | |
| {"current_steps": 1845, "total_steps": 3330, "loss": 0.367, "lr": 0.00024672432561997237, "epoch": 16.62162162162162, "percentage": 55.41, "elapsed_time": "0:03:48", "remaining_time": "0:03:03", "throughput": 3166.97, "total_tokens": 723608} | |
| {"current_steps": 1850, "total_steps": 3330, "loss": 0.3813, "lr": 0.0002454141818464574, "epoch": 16.666666666666668, "percentage": 55.56, "elapsed_time": "0:03:49", "remaining_time": "0:03:03", "throughput": 3168.15, "total_tokens": 725560} | |
| {"current_steps": 1855, "total_steps": 3330, "loss": 0.3493, "lr": 0.0002441041640472858, "epoch": 16.71171171171171, "percentage": 55.71, "elapsed_time": "0:03:49", "remaining_time": "0:03:02", "throughput": 3169.49, "total_tokens": 727320} | |
| {"current_steps": 1860, "total_steps": 3330, "loss": 0.3563, "lr": 0.00024279430820919458, "epoch": 16.756756756756758, "percentage": 55.86, "elapsed_time": "0:03:49", "remaining_time": "0:03:01", "throughput": 3171.06, "total_tokens": 729304} | |
| {"current_steps": 1865, "total_steps": 3330, "loss": 0.3437, "lr": 0.00024148465031447155, "epoch": 16.8018018018018, "percentage": 56.01, "elapsed_time": "0:03:50", "remaining_time": "0:03:01", "throughput": 3173.12, "total_tokens": 731544} | |
| {"current_steps": 1870, "total_steps": 3330, "loss": 0.3361, "lr": 0.00024017522633996722, "epoch": 16.846846846846848, "percentage": 56.16, "elapsed_time": "0:03:51", "remaining_time": "0:03:00", "throughput": 3174.84, "total_tokens": 733624} | |
| {"current_steps": 1875, "total_steps": 3330, "loss": 0.3625, "lr": 0.00023886607225610582, "epoch": 16.89189189189189, "percentage": 56.31, "elapsed_time": "0:03:51", "remaining_time": "0:02:59", "throughput": 3178.42, "total_tokens": 736024} | |
| {"current_steps": 1880, "total_steps": 3330, "loss": 0.3521, "lr": 0.0002375572240258978, "epoch": 16.936936936936938, "percentage": 56.46, "elapsed_time": "0:03:52", "remaining_time": "0:02:59", "throughput": 3180.32, "total_tokens": 738136} | |
| {"current_steps": 1885, "total_steps": 3330, "loss": 0.3756, "lr": 0.00023624871760395176, "epoch": 16.98198198198198, "percentage": 56.61, "elapsed_time": "0:03:52", "remaining_time": "0:02:58", "throughput": 3182.18, "total_tokens": 740024} | |
| {"current_steps": 1890, "total_steps": 3330, "loss": 0.3346, "lr": 0.00023494058893548653, "epoch": 17.027027027027028, "percentage": 56.76, "elapsed_time": "0:03:53", "remaining_time": "0:02:57", "throughput": 3178.27, "total_tokens": 741904} | |
| {"current_steps": 1895, "total_steps": 3330, "loss": 0.363, "lr": 0.00023363287395534375, "epoch": 17.07207207207207, "percentage": 56.91, "elapsed_time": "0:03:53", "remaining_time": "0:02:57", "throughput": 3180.99, "total_tokens": 743984} | |
| {"current_steps": 1900, "total_steps": 3330, "loss": 0.3736, "lr": 0.00023232560858700135, "epoch": 17.117117117117118, "percentage": 57.06, "elapsed_time": "0:03:54", "remaining_time": "0:02:56", "throughput": 3180.94, "total_tokens": 745648} | |
| {"current_steps": 1905, "total_steps": 3330, "loss": 0.3406, "lr": 0.00023101882874158582, "epoch": 17.16216216216216, "percentage": 57.21, "elapsed_time": "0:03:54", "remaining_time": "0:02:55", "throughput": 3181.61, "total_tokens": 747376} | |
| {"current_steps": 1910, "total_steps": 3330, "loss": 0.358, "lr": 0.00022971257031688614, "epoch": 17.207207207207208, "percentage": 57.36, "elapsed_time": "0:03:55", "remaining_time": "0:02:55", "throughput": 3182.59, "total_tokens": 749232} | |
| {"current_steps": 1915, "total_steps": 3330, "loss": 0.3406, "lr": 0.0002284068691963679, "epoch": 17.25225225225225, "percentage": 57.51, "elapsed_time": "0:03:55", "remaining_time": "0:02:54", "throughput": 3183.95, "total_tokens": 751184} | |
| {"current_steps": 1920, "total_steps": 3330, "loss": 0.3266, "lr": 0.00022710176124818721, "epoch": 17.2972972972973, "percentage": 57.66, "elapsed_time": "0:03:56", "remaining_time": "0:02:53", "throughput": 3185.08, "total_tokens": 753072} | |
| {"current_steps": 1925, "total_steps": 3330, "loss": 0.358, "lr": 0.00022579728232420524, "epoch": 17.34234234234234, "percentage": 57.81, "elapsed_time": "0:03:56", "remaining_time": "0:02:52", "throughput": 3188.12, "total_tokens": 755280} | |
| {"current_steps": 1930, "total_steps": 3330, "loss": 0.3415, "lr": 0.00022449346825900398, "epoch": 17.38738738738739, "percentage": 57.96, "elapsed_time": "0:03:57", "remaining_time": "0:02:52", "throughput": 3191.51, "total_tokens": 757584} | |
| {"current_steps": 1935, "total_steps": 3330, "loss": 0.3345, "lr": 0.0002231903548689013, "epoch": 17.43243243243243, "percentage": 58.11, "elapsed_time": "0:03:57", "remaining_time": "0:02:51", "throughput": 3194.5, "total_tokens": 760080} | |
| {"current_steps": 1940, "total_steps": 3330, "loss": 0.3513, "lr": 0.0002218879779509671, "epoch": 17.47747747747748, "percentage": 58.26, "elapsed_time": "0:03:58", "remaining_time": "0:02:50", "throughput": 3194.66, "total_tokens": 761904} | |
| {"current_steps": 1945, "total_steps": 3330, "loss": 0.3454, "lr": 0.0002205863732820404, "epoch": 17.52252252252252, "percentage": 58.41, "elapsed_time": "0:03:58", "remaining_time": "0:02:50", "throughput": 3196.2, "total_tokens": 763728} | |
| {"current_steps": 1950, "total_steps": 3330, "loss": 0.3534, "lr": 0.0002192855766177459, "epoch": 17.56756756756757, "percentage": 58.56, "elapsed_time": "0:03:59", "remaining_time": "0:02:49", "throughput": 3197.47, "total_tokens": 765552} | |
| {"current_steps": 1955, "total_steps": 3330, "loss": 0.3411, "lr": 0.00021798562369151214, "epoch": 17.61261261261261, "percentage": 58.71, "elapsed_time": "0:03:59", "remaining_time": "0:02:48", "throughput": 3197.94, "total_tokens": 767472} | |
| {"current_steps": 1960, "total_steps": 3330, "loss": 0.3653, "lr": 0.00021668655021359008, "epoch": 17.65765765765766, "percentage": 58.86, "elapsed_time": "0:04:00", "remaining_time": "0:02:48", "throughput": 3198.33, "total_tokens": 769328} | |
| {"current_steps": 1965, "total_steps": 3330, "loss": 0.3634, "lr": 0.0002153883918700716, "epoch": 17.7027027027027, "percentage": 59.01, "elapsed_time": "0:04:01", "remaining_time": "0:02:47", "throughput": 3200.39, "total_tokens": 771344} | |
| {"current_steps": 1970, "total_steps": 3330, "loss": 0.3583, "lr": 0.00021409118432190943, "epoch": 17.74774774774775, "percentage": 59.16, "elapsed_time": "0:04:01", "remaining_time": "0:02:46", "throughput": 3200.52, "total_tokens": 773072} | |
| {"current_steps": 1975, "total_steps": 3330, "loss": 0.3642, "lr": 0.0002127949632039378, "epoch": 17.792792792792792, "percentage": 59.31, "elapsed_time": "0:04:02", "remaining_time": "0:02:46", "throughput": 3201.93, "total_tokens": 775024} | |
| {"current_steps": 1980, "total_steps": 3330, "loss": 0.3709, "lr": 0.00021149976412389301, "epoch": 17.83783783783784, "percentage": 59.46, "elapsed_time": "0:04:02", "remaining_time": "0:02:45", "throughput": 3205.32, "total_tokens": 777392} | |
| {"current_steps": 1985, "total_steps": 3330, "loss": 0.3605, "lr": 0.00021020562266143571, "epoch": 17.882882882882882, "percentage": 59.61, "elapsed_time": "0:04:03", "remaining_time": "0:02:44", "throughput": 3208.07, "total_tokens": 779632} | |
| {"current_steps": 1990, "total_steps": 3330, "loss": 0.3565, "lr": 0.00020891257436717354, "epoch": 17.92792792792793, "percentage": 59.76, "elapsed_time": "0:04:03", "remaining_time": "0:02:43", "throughput": 3209.18, "total_tokens": 781424} | |
| {"current_steps": 1995, "total_steps": 3330, "loss": 0.351, "lr": 0.00020762065476168375, "epoch": 17.972972972972972, "percentage": 59.91, "elapsed_time": "0:04:03", "remaining_time": "0:02:43", "throughput": 3211.81, "total_tokens": 783504} | |
| {"current_steps": 2000, "total_steps": 3330, "loss": 0.3484, "lr": 0.00020632989933453877, "epoch": 18.01801801801802, "percentage": 60.06, "elapsed_time": "0:04:04", "remaining_time": "0:02:42", "throughput": 3205.8, "total_tokens": 785256} | |
| {"current_steps": 2004, "total_steps": 3330, "eval_loss": 0.35014501214027405, "epoch": 18.054054054054053, "percentage": 60.18, "elapsed_time": "0:04:06", "remaining_time": "0:02:43", "throughput": 3190.16, "total_tokens": 787016} | |
| {"current_steps": 2005, "total_steps": 3330, "loss": 0.3465, "lr": 0.00020504034354333007, "epoch": 18.063063063063062, "percentage": 60.21, "elapsed_time": "0:04:09", "remaining_time": "0:02:44", "throughput": 3162.68, "total_tokens": 787656} | |
| {"current_steps": 2010, "total_steps": 3330, "loss": 0.3516, "lr": 0.00020375202281269474, "epoch": 18.10810810810811, "percentage": 60.36, "elapsed_time": "0:04:09", "remaining_time": "0:02:43", "throughput": 3162.97, "total_tokens": 789352} | |
| {"current_steps": 2015, "total_steps": 3330, "loss": 0.3434, "lr": 0.00020246497253334232, "epoch": 18.153153153153152, "percentage": 60.51, "elapsed_time": "0:04:10", "remaining_time": "0:02:43", "throughput": 3165.05, "total_tokens": 791400} | |
| {"current_steps": 2020, "total_steps": 3330, "loss": 0.3454, "lr": 0.00020117922806108256, "epoch": 18.1981981981982, "percentage": 60.66, "elapsed_time": "0:04:10", "remaining_time": "0:02:42", "throughput": 3167.25, "total_tokens": 793384} | |
| {"current_steps": 2025, "total_steps": 3330, "loss": 0.3578, "lr": 0.0001998948247158538, "epoch": 18.243243243243242, "percentage": 60.81, "elapsed_time": "0:04:10", "remaining_time": "0:02:41", "throughput": 3168.47, "total_tokens": 795208} | |
| {"current_steps": 2030, "total_steps": 3330, "loss": 0.3378, "lr": 0.00019861179778075355, "epoch": 18.28828828828829, "percentage": 60.96, "elapsed_time": "0:04:11", "remaining_time": "0:02:41", "throughput": 3168.95, "total_tokens": 796808} | |
| {"current_steps": 2035, "total_steps": 3330, "loss": 0.3566, "lr": 0.0001973301825010685, "epoch": 18.333333333333332, "percentage": 61.11, "elapsed_time": "0:04:11", "remaining_time": "0:02:40", "throughput": 3171.4, "total_tokens": 798984} | |
| {"current_steps": 2040, "total_steps": 3330, "loss": 0.3535, "lr": 0.00019605001408330632, "epoch": 18.37837837837838, "percentage": 61.26, "elapsed_time": "0:04:12", "remaining_time": "0:02:39", "throughput": 3171.54, "total_tokens": 800840} | |
| {"current_steps": 2045, "total_steps": 3330, "loss": 0.3403, "lr": 0.00019477132769422947, "epoch": 18.423423423423422, "percentage": 61.41, "elapsed_time": "0:04:12", "remaining_time": "0:02:38", "throughput": 3172.72, "total_tokens": 802664} | |
| {"current_steps": 2050, "total_steps": 3330, "loss": 0.3461, "lr": 0.00019349415845988776, "epoch": 18.46846846846847, "percentage": 61.56, "elapsed_time": "0:04:13", "remaining_time": "0:02:38", "throughput": 3174.07, "total_tokens": 804520} | |
| {"current_steps": 2055, "total_steps": 3330, "loss": 0.3545, "lr": 0.00019221854146465444, "epoch": 18.513513513513512, "percentage": 61.71, "elapsed_time": "0:04:13", "remaining_time": "0:02:37", "throughput": 3176.58, "total_tokens": 806632} | |
| {"current_steps": 2060, "total_steps": 3330, "loss": 0.3518, "lr": 0.00019094451175026217, "epoch": 18.55855855855856, "percentage": 61.86, "elapsed_time": "0:04:14", "remaining_time": "0:02:36", "throughput": 3178.66, "total_tokens": 808616} | |
| {"current_steps": 2065, "total_steps": 3330, "loss": 0.3558, "lr": 0.0001896721043148402, "epoch": 18.603603603603602, "percentage": 62.01, "elapsed_time": "0:04:14", "remaining_time": "0:02:36", "throughput": 3179.48, "total_tokens": 810536} | |
| {"current_steps": 2070, "total_steps": 3330, "loss": 0.3543, "lr": 0.00018840135411195307, "epoch": 18.64864864864865, "percentage": 62.16, "elapsed_time": "0:04:15", "remaining_time": "0:02:35", "throughput": 3181.31, "total_tokens": 812424} | |
| {"current_steps": 2075, "total_steps": 3330, "loss": 0.3484, "lr": 0.00018713229604964065, "epoch": 18.693693693693692, "percentage": 62.31, "elapsed_time": "0:04:15", "remaining_time": "0:02:34", "throughput": 3183.81, "total_tokens": 814632} | |
| {"current_steps": 2080, "total_steps": 3330, "loss": 0.3513, "lr": 0.00018586496498945875, "epoch": 18.73873873873874, "percentage": 62.46, "elapsed_time": "0:04:16", "remaining_time": "0:02:34", "throughput": 3184.47, "total_tokens": 816360} | |
| {"current_steps": 2085, "total_steps": 3330, "loss": 0.3141, "lr": 0.00018459939574552186, "epoch": 18.783783783783782, "percentage": 62.61, "elapsed_time": "0:04:16", "remaining_time": "0:02:33", "throughput": 3187.43, "total_tokens": 818632} | |
| {"current_steps": 2090, "total_steps": 3330, "loss": 0.4119, "lr": 0.00018333562308354666, "epoch": 18.82882882882883, "percentage": 62.76, "elapsed_time": "0:04:17", "remaining_time": "0:02:32", "throughput": 3189.96, "total_tokens": 820712} | |
| {"current_steps": 2095, "total_steps": 3330, "loss": 0.3738, "lr": 0.0001820736817198969, "epoch": 18.873873873873872, "percentage": 62.91, "elapsed_time": "0:04:17", "remaining_time": "0:02:31", "throughput": 3193.32, "total_tokens": 823112} | |
| {"current_steps": 2100, "total_steps": 3330, "loss": 0.3442, "lr": 0.0001808136063206297, "epoch": 18.91891891891892, "percentage": 63.06, "elapsed_time": "0:04:18", "remaining_time": "0:02:31", "throughput": 3194.43, "total_tokens": 824840} | |
| {"current_steps": 2105, "total_steps": 3330, "loss": 0.3343, "lr": 0.0001795554315005437, "epoch": 18.963963963963963, "percentage": 63.21, "elapsed_time": "0:04:18", "remaining_time": "0:02:30", "throughput": 3196.66, "total_tokens": 826792} | |
| {"current_steps": 2110, "total_steps": 3330, "loss": 0.3641, "lr": 0.00017829919182222752, "epoch": 19.00900900900901, "percentage": 63.36, "elapsed_time": "0:04:19", "remaining_time": "0:02:30", "throughput": 3191.63, "total_tokens": 828464} | |
| {"current_steps": 2115, "total_steps": 3330, "loss": 0.3437, "lr": 0.0001770449217951105, "epoch": 19.054054054054053, "percentage": 63.51, "elapsed_time": "0:04:20", "remaining_time": "0:02:29", "throughput": 3193.92, "total_tokens": 830768} | |
| {"current_steps": 2120, "total_steps": 3330, "loss": 0.3369, "lr": 0.00017579265587451542, "epoch": 19.0990990990991, "percentage": 63.66, "elapsed_time": "0:04:20", "remaining_time": "0:02:28", "throughput": 3195.75, "total_tokens": 832752} | |
| {"current_steps": 2125, "total_steps": 3330, "loss": 0.3546, "lr": 0.00017454242846071084, "epoch": 19.144144144144143, "percentage": 63.81, "elapsed_time": "0:04:21", "remaining_time": "0:02:28", "throughput": 3199.24, "total_tokens": 835184} | |
| {"current_steps": 2130, "total_steps": 3330, "loss": 0.3485, "lr": 0.00017329427389796686, "epoch": 19.18918918918919, "percentage": 63.96, "elapsed_time": "0:04:21", "remaining_time": "0:02:27", "throughput": 3201.76, "total_tokens": 837296} | |
| {"current_steps": 2135, "total_steps": 3330, "loss": 0.3563, "lr": 0.00017204822647361173, "epoch": 19.234234234234233, "percentage": 64.11, "elapsed_time": "0:04:21", "remaining_time": "0:02:26", "throughput": 3203.64, "total_tokens": 839312} | |
| {"current_steps": 2140, "total_steps": 3330, "loss": 0.3526, "lr": 0.00017080432041708939, "epoch": 19.27927927927928, "percentage": 64.26, "elapsed_time": "0:04:22", "remaining_time": "0:02:25", "throughput": 3204.99, "total_tokens": 841264} | |
| {"current_steps": 2145, "total_steps": 3330, "loss": 0.3502, "lr": 0.00016956258989901955, "epoch": 19.324324324324323, "percentage": 64.41, "elapsed_time": "0:04:22", "remaining_time": "0:02:25", "throughput": 3206.63, "total_tokens": 843216} | |
| {"current_steps": 2150, "total_steps": 3330, "loss": 0.3429, "lr": 0.00016832306903025925, "epoch": 19.36936936936937, "percentage": 64.56, "elapsed_time": "0:04:23", "remaining_time": "0:02:24", "throughput": 3208.11, "total_tokens": 845104} | |
| {"current_steps": 2155, "total_steps": 3330, "loss": 0.347, "lr": 0.0001670857918609653, "epoch": 19.414414414414413, "percentage": 64.71, "elapsed_time": "0:04:23", "remaining_time": "0:02:23", "throughput": 3209.35, "total_tokens": 846832} | |
| {"current_steps": 2160, "total_steps": 3330, "loss": 0.3285, "lr": 0.00016585079237965906, "epoch": 19.45945945945946, "percentage": 64.86, "elapsed_time": "0:04:24", "remaining_time": "0:02:23", "throughput": 3212.04, "total_tokens": 849072} | |
| {"current_steps": 2165, "total_steps": 3330, "loss": 0.3499, "lr": 0.00016461810451229324, "epoch": 19.504504504504503, "percentage": 65.02, "elapsed_time": "0:04:24", "remaining_time": "0:02:22", "throughput": 3214.68, "total_tokens": 851184} | |
| {"current_steps": 2170, "total_steps": 3330, "loss": 0.3594, "lr": 0.00016338776212131918, "epoch": 19.54954954954955, "percentage": 65.17, "elapsed_time": "0:04:25", "remaining_time": "0:02:21", "throughput": 3216.6, "total_tokens": 853328} | |
| {"current_steps": 2171, "total_steps": 3330, "eval_loss": 0.3500675559043884, "epoch": 19.55855855855856, "percentage": 65.2, "elapsed_time": "0:04:26", "remaining_time": "0:02:22", "throughput": 3202.38, "total_tokens": 853744} | |
| {"current_steps": 2175, "total_steps": 3330, "loss": 0.3259, "lr": 0.000162159799004757, "epoch": 19.594594594594593, "percentage": 65.32, "elapsed_time": "0:04:29", "remaining_time": "0:02:23", "throughput": 3172.88, "total_tokens": 855120} | |
| {"current_steps": 2180, "total_steps": 3330, "loss": 0.3588, "lr": 0.00016093424889526746, "epoch": 19.63963963963964, "percentage": 65.47, "elapsed_time": "0:04:30", "remaining_time": "0:02:22", "throughput": 3173.67, "total_tokens": 856976} | |
| {"current_steps": 2185, "total_steps": 3330, "loss": 0.3517, "lr": 0.00015971114545922476, "epoch": 19.684684684684683, "percentage": 65.62, "elapsed_time": "0:04:30", "remaining_time": "0:02:21", "throughput": 3175.3, "total_tokens": 858928} | |
| {"current_steps": 2190, "total_steps": 3330, "loss": 0.3468, "lr": 0.00015849052229579194, "epoch": 19.72972972972973, "percentage": 65.77, "elapsed_time": "0:04:30", "remaining_time": "0:02:21", "throughput": 3176.89, "total_tokens": 860912} | |
| {"current_steps": 2195, "total_steps": 3330, "loss": 0.3379, "lr": 0.0001572724129359981, "epoch": 19.774774774774773, "percentage": 65.92, "elapsed_time": "0:04:31", "remaining_time": "0:02:20", "throughput": 3177.38, "total_tokens": 862544} | |
| {"current_steps": 2200, "total_steps": 3330, "loss": 0.3682, "lr": 0.000156056850841817, "epoch": 19.81981981981982, "percentage": 66.07, "elapsed_time": "0:04:32", "remaining_time": "0:02:19", "throughput": 3178.5, "total_tokens": 864688} | |
| {"current_steps": 2205, "total_steps": 3330, "loss": 0.3406, "lr": 0.00015484386940524777, "epoch": 19.864864864864863, "percentage": 66.22, "elapsed_time": "0:04:32", "remaining_time": "0:02:19", "throughput": 3178.49, "total_tokens": 866608} | |
| {"current_steps": 2210, "total_steps": 3330, "loss": 0.3479, "lr": 0.00015363350194739822, "epoch": 19.90990990990991, "percentage": 66.37, "elapsed_time": "0:04:33", "remaining_time": "0:02:18", "throughput": 3179.34, "total_tokens": 868656} | |
| {"current_steps": 2215, "total_steps": 3330, "loss": 0.3691, "lr": 0.00015242578171756867, "epoch": 19.954954954954957, "percentage": 66.52, "elapsed_time": "0:04:33", "remaining_time": "0:02:17", "throughput": 3180.58, "total_tokens": 870608} | |
| {"current_steps": 2220, "total_steps": 3330, "loss": 0.3807, "lr": 0.0001512207418923391, "epoch": 20.0, "percentage": 66.67, "elapsed_time": "0:04:34", "remaining_time": "0:02:17", "throughput": 3179.42, "total_tokens": 872376} | |
| {"current_steps": 2225, "total_steps": 3330, "loss": 0.3353, "lr": 0.00015001841557465777, "epoch": 20.045045045045047, "percentage": 66.82, "elapsed_time": "0:04:35", "remaining_time": "0:02:16", "throughput": 3177.03, "total_tokens": 874424} | |
| {"current_steps": 2230, "total_steps": 3330, "loss": 0.3518, "lr": 0.00014881883579293171, "epoch": 20.09009009009009, "percentage": 66.97, "elapsed_time": "0:04:35", "remaining_time": "0:02:15", "throughput": 3179.34, "total_tokens": 876568} | |
| {"current_steps": 2235, "total_steps": 3330, "loss": 0.3388, "lr": 0.00014762203550011918, "epoch": 20.135135135135137, "percentage": 67.12, "elapsed_time": "0:04:36", "remaining_time": "0:02:15", "throughput": 3180.68, "total_tokens": 878360} | |
| {"current_steps": 2240, "total_steps": 3330, "loss": 0.3563, "lr": 0.0001464280475728252, "epoch": 20.18018018018018, "percentage": 67.27, "elapsed_time": "0:04:36", "remaining_time": "0:02:14", "throughput": 3181.94, "total_tokens": 880216} | |
| {"current_steps": 2245, "total_steps": 3330, "loss": 0.3295, "lr": 0.00014523690481039762, "epoch": 20.225225225225227, "percentage": 67.42, "elapsed_time": "0:04:37", "remaining_time": "0:02:13", "throughput": 3184.44, "total_tokens": 882392} | |
| {"current_steps": 2250, "total_steps": 3330, "loss": 0.3598, "lr": 0.00014404863993402634, "epoch": 20.27027027027027, "percentage": 67.57, "elapsed_time": "0:04:37", "remaining_time": "0:02:13", "throughput": 3185.69, "total_tokens": 884312} | |
| {"current_steps": 2255, "total_steps": 3330, "loss": 0.3408, "lr": 0.00014286328558584476, "epoch": 20.315315315315317, "percentage": 67.72, "elapsed_time": "0:04:38", "remaining_time": "0:02:12", "throughput": 3188.46, "total_tokens": 886552} | |
| {"current_steps": 2260, "total_steps": 3330, "loss": 0.339, "lr": 0.00014168087432803292, "epoch": 20.36036036036036, "percentage": 67.87, "elapsed_time": "0:04:38", "remaining_time": "0:02:11", "throughput": 3189.0, "total_tokens": 888408} | |
| {"current_steps": 2265, "total_steps": 3330, "loss": 0.3481, "lr": 0.00014050143864192252, "epoch": 20.405405405405407, "percentage": 68.02, "elapsed_time": "0:04:39", "remaining_time": "0:02:11", "throughput": 3190.22, "total_tokens": 890424} | |
| {"current_steps": 2270, "total_steps": 3330, "loss": 0.3534, "lr": 0.00013932501092710553, "epoch": 20.45045045045045, "percentage": 68.17, "elapsed_time": "0:04:39", "remaining_time": "0:02:10", "throughput": 3190.76, "total_tokens": 892312} | |
| {"current_steps": 2275, "total_steps": 3330, "loss": 0.3622, "lr": 0.0001381516235005433, "epoch": 20.495495495495497, "percentage": 68.32, "elapsed_time": "0:04:40", "remaining_time": "0:02:09", "throughput": 3192.83, "total_tokens": 894456} | |
| {"current_steps": 2280, "total_steps": 3330, "loss": 0.3318, "lr": 0.00013698130859567944, "epoch": 20.54054054054054, "percentage": 68.47, "elapsed_time": "0:04:40", "remaining_time": "0:02:09", "throughput": 3194.5, "total_tokens": 896504} | |
| {"current_steps": 2285, "total_steps": 3330, "loss": 0.3132, "lr": 0.00013581409836155414, "epoch": 20.585585585585587, "percentage": 68.62, "elapsed_time": "0:04:41", "remaining_time": "0:02:08", "throughput": 3197.3, "total_tokens": 898808} | |
| {"current_steps": 2290, "total_steps": 3330, "loss": 0.3523, "lr": 0.00013465002486192078, "epoch": 20.63063063063063, "percentage": 68.77, "elapsed_time": "0:04:41", "remaining_time": "0:02:07", "throughput": 3198.13, "total_tokens": 900824} | |
| {"current_steps": 2295, "total_steps": 3330, "loss": 0.3698, "lr": 0.00013348912007436536, "epoch": 20.675675675675677, "percentage": 68.92, "elapsed_time": "0:04:42", "remaining_time": "0:02:07", "throughput": 3199.57, "total_tokens": 902744} | |
| {"current_steps": 2300, "total_steps": 3330, "loss": 0.3084, "lr": 0.00013233141588942817, "epoch": 20.72072072072072, "percentage": 69.07, "elapsed_time": "0:04:42", "remaining_time": "0:02:06", "throughput": 3199.41, "total_tokens": 904376} | |
| {"current_steps": 2305, "total_steps": 3330, "loss": 0.4093, "lr": 0.00013117694410972748, "epoch": 20.765765765765767, "percentage": 69.22, "elapsed_time": "0:04:43", "remaining_time": "0:02:05", "throughput": 3200.71, "total_tokens": 906264} | |
| {"current_steps": 2310, "total_steps": 3330, "loss": 0.3543, "lr": 0.0001300257364490863, "epoch": 20.81081081081081, "percentage": 69.37, "elapsed_time": "0:04:43", "remaining_time": "0:02:05", "throughput": 3202.25, "total_tokens": 908280} | |
| {"current_steps": 2315, "total_steps": 3330, "loss": 0.3565, "lr": 0.00012887782453166057, "epoch": 20.855855855855857, "percentage": 69.52, "elapsed_time": "0:04:44", "remaining_time": "0:02:04", "throughput": 3202.31, "total_tokens": 910008} | |
| {"current_steps": 2320, "total_steps": 3330, "loss": 0.3421, "lr": 0.00012773323989107073, "epoch": 20.9009009009009, "percentage": 69.67, "elapsed_time": "0:04:44", "remaining_time": "0:02:03", "throughput": 3203.6, "total_tokens": 911896} | |
| {"current_steps": 2325, "total_steps": 3330, "loss": 0.3413, "lr": 0.00012659201396953614, "epoch": 20.945945945945947, "percentage": 69.82, "elapsed_time": "0:04:45", "remaining_time": "0:02:03", "throughput": 3206.21, "total_tokens": 914072} | |
| {"current_steps": 2330, "total_steps": 3330, "loss": 0.3586, "lr": 0.00012545417811701015, "epoch": 20.99099099099099, "percentage": 69.97, "elapsed_time": "0:04:45", "remaining_time": "0:02:02", "throughput": 3207.7, "total_tokens": 915928} | |
| {"current_steps": 2335, "total_steps": 3330, "loss": 0.3425, "lr": 0.00012431976359031957, "epoch": 21.036036036036037, "percentage": 70.12, "elapsed_time": "0:04:46", "remaining_time": "0:02:02", "throughput": 3203.03, "total_tokens": 917696} | |
| {"current_steps": 2338, "total_steps": 3330, "eval_loss": 0.3508879244327545, "epoch": 21.063063063063062, "percentage": 70.21, "elapsed_time": "0:04:48", "remaining_time": "0:02:02", "throughput": 3187.79, "total_tokens": 918752} | |
| {"current_steps": 2340, "total_steps": 3330, "loss": 0.3551, "lr": 0.00012318880155230618, "epoch": 21.08108108108108, "percentage": 70.27, "elapsed_time": "0:04:50", "remaining_time": "0:02:02", "throughput": 3162.99, "total_tokens": 919392} | |
| {"current_steps": 2345, "total_steps": 3330, "loss": 0.3355, "lr": 0.00012206132307097046, "epoch": 21.126126126126128, "percentage": 70.42, "elapsed_time": "0:04:51", "remaining_time": "0:02:02", "throughput": 3162.79, "total_tokens": 921184} | |
| {"current_steps": 2350, "total_steps": 3330, "loss": 0.3491, "lr": 0.00012093735911861778, "epoch": 21.17117117117117, "percentage": 70.57, "elapsed_time": "0:04:51", "remaining_time": "0:02:01", "throughput": 3163.89, "total_tokens": 923232} | |
| {"current_steps": 2355, "total_steps": 3330, "loss": 0.3439, "lr": 0.00011981694057100839, "epoch": 21.216216216216218, "percentage": 70.72, "elapsed_time": "0:04:52", "remaining_time": "0:02:01", "throughput": 3165.33, "total_tokens": 925184} | |
| {"current_steps": 2360, "total_steps": 3330, "loss": 0.3361, "lr": 0.00011870009820650837, "epoch": 21.26126126126126, "percentage": 70.87, "elapsed_time": "0:04:52", "remaining_time": "0:02:00", "throughput": 3166.42, "total_tokens": 927040} | |
| {"current_steps": 2365, "total_steps": 3330, "loss": 0.3225, "lr": 0.00011758686270524483, "epoch": 21.306306306306308, "percentage": 71.02, "elapsed_time": "0:04:53", "remaining_time": "0:01:59", "throughput": 3168.08, "total_tokens": 929120} | |
| {"current_steps": 2370, "total_steps": 3330, "loss": 0.3348, "lr": 0.00011647726464826283, "epoch": 21.35135135135135, "percentage": 71.17, "elapsed_time": "0:04:53", "remaining_time": "0:01:59", "throughput": 3167.96, "total_tokens": 930912} | |
| {"current_steps": 2375, "total_steps": 3330, "loss": 0.3551, "lr": 0.00011537133451668519, "epoch": 21.396396396396398, "percentage": 71.32, "elapsed_time": "0:04:54", "remaining_time": "0:01:58", "throughput": 3170.09, "total_tokens": 933152} | |
| {"current_steps": 2380, "total_steps": 3330, "loss": 0.3522, "lr": 0.00011426910269087517, "epoch": 21.44144144144144, "percentage": 71.47, "elapsed_time": "0:04:54", "remaining_time": "0:01:57", "throughput": 3171.39, "total_tokens": 935264} | |
| {"current_steps": 2385, "total_steps": 3330, "loss": 0.3481, "lr": 0.00011317059944960234, "epoch": 21.486486486486488, "percentage": 71.62, "elapsed_time": "0:04:55", "remaining_time": "0:01:57", "throughput": 3172.69, "total_tokens": 937376} | |
| {"current_steps": 2390, "total_steps": 3330, "loss": 0.3708, "lr": 0.0001120758549692104, "epoch": 21.53153153153153, "percentage": 71.77, "elapsed_time": "0:04:55", "remaining_time": "0:01:56", "throughput": 3175.05, "total_tokens": 939648} | |
| {"current_steps": 2395, "total_steps": 3330, "loss": 0.3384, "lr": 0.00011098489932278811, "epoch": 21.576576576576578, "percentage": 71.92, "elapsed_time": "0:04:56", "remaining_time": "0:01:55", "throughput": 3175.53, "total_tokens": 941376} | |
| {"current_steps": 2400, "total_steps": 3330, "loss": 0.3501, "lr": 0.00010989776247934363, "epoch": 21.62162162162162, "percentage": 72.07, "elapsed_time": "0:04:56", "remaining_time": "0:01:55", "throughput": 3177.14, "total_tokens": 943456} | |
| {"current_steps": 2405, "total_steps": 3330, "loss": 0.3407, "lr": 0.00010881447430298075, "epoch": 21.666666666666668, "percentage": 72.22, "elapsed_time": "0:04:57", "remaining_time": "0:01:54", "throughput": 3178.02, "total_tokens": 945376} | |
| {"current_steps": 2410, "total_steps": 3330, "loss": 0.3434, "lr": 0.00010773506455207901, "epoch": 21.71171171171171, "percentage": 72.37, "elapsed_time": "0:04:57", "remaining_time": "0:01:53", "throughput": 3178.58, "total_tokens": 947168} | |
| {"current_steps": 2415, "total_steps": 3330, "loss": 0.3436, "lr": 0.00010665956287847598, "epoch": 21.756756756756758, "percentage": 72.52, "elapsed_time": "0:04:58", "remaining_time": "0:01:53", "throughput": 3179.65, "total_tokens": 949056} | |
| {"current_steps": 2420, "total_steps": 3330, "loss": 0.348, "lr": 0.00010558799882665245, "epoch": 21.8018018018018, "percentage": 72.67, "elapsed_time": "0:04:59", "remaining_time": "0:01:52", "throughput": 3180.47, "total_tokens": 951040} | |
| {"current_steps": 2425, "total_steps": 3330, "loss": 0.3484, "lr": 0.00010452040183292125, "epoch": 21.846846846846848, "percentage": 72.82, "elapsed_time": "0:04:59", "remaining_time": "0:01:51", "throughput": 3181.45, "total_tokens": 952864} | |
| {"current_steps": 2430, "total_steps": 3330, "loss": 0.3411, "lr": 0.0001034568012246185, "epoch": 21.89189189189189, "percentage": 72.97, "elapsed_time": "0:05:00", "remaining_time": "0:01:51", "throughput": 3182.89, "total_tokens": 954944} | |
| {"current_steps": 2435, "total_steps": 3330, "loss": 0.3372, "lr": 0.00010239722621929803, "epoch": 21.936936936936938, "percentage": 73.12, "elapsed_time": "0:05:00", "remaining_time": "0:01:50", "throughput": 3184.33, "total_tokens": 956864} | |
| {"current_steps": 2440, "total_steps": 3330, "loss": 0.3583, "lr": 0.00010134170592392835, "epoch": 21.98198198198198, "percentage": 73.27, "elapsed_time": "0:05:00", "remaining_time": "0:01:49", "throughput": 3186.75, "total_tokens": 959072} | |
| {"current_steps": 2445, "total_steps": 3330, "loss": 0.337, "lr": 0.00010029026933409377, "epoch": 22.027027027027028, "percentage": 73.42, "elapsed_time": "0:05:01", "remaining_time": "0:01:49", "throughput": 3183.96, "total_tokens": 960856} | |
| {"current_steps": 2450, "total_steps": 3330, "loss": 0.3712, "lr": 9.924294533319713e-05, "epoch": 22.07207207207207, "percentage": 73.57, "elapsed_time": "0:05:02", "remaining_time": "0:01:48", "throughput": 3186.21, "total_tokens": 963096} | |
| {"current_steps": 2455, "total_steps": 3330, "loss": 0.3404, "lr": 9.819976269166705e-05, "epoch": 22.117117117117118, "percentage": 73.72, "elapsed_time": "0:05:02", "remaining_time": "0:01:47", "throughput": 3187.61, "total_tokens": 965144} | |
| {"current_steps": 2460, "total_steps": 3330, "loss": 0.3386, "lr": 9.71607500661672e-05, "epoch": 22.16216216216216, "percentage": 73.87, "elapsed_time": "0:05:03", "remaining_time": "0:01:47", "throughput": 3188.36, "total_tokens": 967000} | |
| {"current_steps": 2465, "total_steps": 3330, "loss": 0.3393, "lr": 9.612593599880904e-05, "epoch": 22.207207207207208, "percentage": 74.02, "elapsed_time": "0:05:03", "remaining_time": "0:01:46", "throughput": 3188.75, "total_tokens": 968920} | |
| {"current_steps": 2470, "total_steps": 3330, "loss": 0.3312, "lr": 9.509534891636787e-05, "epoch": 22.25225225225225, "percentage": 74.17, "elapsed_time": "0:05:04", "remaining_time": "0:01:45", "throughput": 3188.91, "total_tokens": 970744} | |
| {"current_steps": 2475, "total_steps": 3330, "loss": 0.3672, "lr": 9.406901712950208e-05, "epoch": 22.2972972972973, "percentage": 74.32, "elapsed_time": "0:05:04", "remaining_time": "0:01:45", "throughput": 3189.41, "total_tokens": 972408} | |
| {"current_steps": 2480, "total_steps": 3330, "loss": 0.3621, "lr": 9.304696883197541e-05, "epoch": 22.34234234234234, "percentage": 74.47, "elapsed_time": "0:05:05", "remaining_time": "0:01:44", "throughput": 3191.51, "total_tokens": 974520} | |
| {"current_steps": 2485, "total_steps": 3330, "loss": 0.3466, "lr": 9.202923209988198e-05, "epoch": 22.38738738738739, "percentage": 74.62, "elapsed_time": "0:05:05", "remaining_time": "0:01:43", "throughput": 3193.14, "total_tokens": 976504} | |
| {"current_steps": 2490, "total_steps": 3330, "loss": 0.3495, "lr": 9.10158348908758e-05, "epoch": 22.43243243243243, "percentage": 74.77, "elapsed_time": "0:05:06", "remaining_time": "0:01:43", "throughput": 3194.47, "total_tokens": 978296} | |
| {"current_steps": 2495, "total_steps": 3330, "loss": 0.3467, "lr": 9.000680504340205e-05, "epoch": 22.47747747747748, "percentage": 74.92, "elapsed_time": "0:05:06", "remaining_time": "0:01:42", "throughput": 3195.78, "total_tokens": 980216} | |
| {"current_steps": 2500, "total_steps": 3330, "loss": 0.3378, "lr": 8.90021702759329e-05, "epoch": 22.52252252252252, "percentage": 75.08, "elapsed_time": "0:05:07", "remaining_time": "0:01:41", "throughput": 3196.84, "total_tokens": 982136} | |
| {"current_steps": 2505, "total_steps": 3330, "loss": 0.3605, "lr": 8.80019581862058e-05, "epoch": 22.56756756756757, "percentage": 75.23, "elapsed_time": "0:05:07", "remaining_time": "0:01:41", "throughput": 3199.48, "total_tokens": 984472} | |
| {"current_steps": 2505, "total_steps": 3330, "eval_loss": 0.3515712320804596, "epoch": 22.56756756756757, "percentage": 75.23, "elapsed_time": "0:05:09", "remaining_time": "0:01:41", "throughput": 3185.97, "total_tokens": 984472} | |
| {"current_steps": 2510, "total_steps": 3330, "loss": 0.3731, "lr": 8.700619625046525e-05, "epoch": 22.61261261261261, "percentage": 75.38, "elapsed_time": "0:05:11", "remaining_time": "0:01:41", "throughput": 3163.12, "total_tokens": 986488} | |
| {"current_steps": 2515, "total_steps": 3330, "loss": 0.3469, "lr": 8.601491182270812e-05, "epoch": 22.65765765765766, "percentage": 75.53, "elapsed_time": "0:05:12", "remaining_time": "0:01:41", "throughput": 3164.84, "total_tokens": 988568} | |
| {"current_steps": 2520, "total_steps": 3330, "loss": 0.3538, "lr": 8.502813213393254e-05, "epoch": 22.7027027027027, "percentage": 75.68, "elapsed_time": "0:05:12", "remaining_time": "0:01:40", "throughput": 3166.72, "total_tokens": 990744} | |
| {"current_steps": 2525, "total_steps": 3330, "loss": 0.3533, "lr": 8.404588429138946e-05, "epoch": 22.74774774774775, "percentage": 75.83, "elapsed_time": "0:05:13", "remaining_time": "0:01:39", "throughput": 3168.1, "total_tokens": 992728} | |
| {"current_steps": 2530, "total_steps": 3330, "loss": 0.3533, "lr": 8.306819527783791e-05, "epoch": 22.792792792792792, "percentage": 75.98, "elapsed_time": "0:05:13", "remaining_time": "0:01:39", "throughput": 3169.42, "total_tokens": 994584} | |
| {"current_steps": 2535, "total_steps": 3330, "loss": 0.3601, "lr": 8.209509195080428e-05, "epoch": 22.83783783783784, "percentage": 76.13, "elapsed_time": "0:05:14", "remaining_time": "0:01:38", "throughput": 3169.62, "total_tokens": 996216} | |
| {"current_steps": 2540, "total_steps": 3330, "loss": 0.3454, "lr": 8.112660104184399e-05, "epoch": 22.882882882882882, "percentage": 76.28, "elapsed_time": "0:05:14", "remaining_time": "0:01:37", "throughput": 3171.4, "total_tokens": 998328} | |
| {"current_steps": 2545, "total_steps": 3330, "loss": 0.3758, "lr": 8.016274915580753e-05, "epoch": 22.92792792792793, "percentage": 76.43, "elapsed_time": "0:05:15", "remaining_time": "0:01:37", "throughput": 3173.02, "total_tokens": 1000312} | |
| {"current_steps": 2550, "total_steps": 3330, "loss": 0.3666, "lr": 7.920356277010965e-05, "epoch": 22.972972972972972, "percentage": 76.58, "elapsed_time": "0:05:15", "remaining_time": "0:01:36", "throughput": 3175.13, "total_tokens": 1002392} | |
| {"current_steps": 2555, "total_steps": 3330, "loss": 0.3499, "lr": 7.824906823400149e-05, "epoch": 23.01801801801802, "percentage": 76.73, "elapsed_time": "0:05:16", "remaining_time": "0:01:36", "throughput": 3172.3, "total_tokens": 1004488} | |
| {"current_steps": 2560, "total_steps": 3330, "loss": 0.3534, "lr": 7.729929176784722e-05, "epoch": 23.063063063063062, "percentage": 76.88, "elapsed_time": "0:05:17", "remaining_time": "0:01:35", "throughput": 3173.83, "total_tokens": 1006536} | |
| {"current_steps": 2565, "total_steps": 3330, "loss": 0.3425, "lr": 7.635425946240404e-05, "epoch": 23.10810810810811, "percentage": 77.03, "elapsed_time": "0:05:17", "remaining_time": "0:01:34", "throughput": 3175.52, "total_tokens": 1008616} | |
| {"current_steps": 2570, "total_steps": 3330, "loss": 0.338, "lr": 7.541399727810458e-05, "epoch": 23.153153153153152, "percentage": 77.18, "elapsed_time": "0:05:18", "remaining_time": "0:01:34", "throughput": 3175.63, "total_tokens": 1010152} | |
| {"current_steps": 2575, "total_steps": 3330, "loss": 0.3536, "lr": 7.447853104434438e-05, "epoch": 23.1981981981982, "percentage": 77.33, "elapsed_time": "0:05:18", "remaining_time": "0:01:33", "throughput": 3176.35, "total_tokens": 1012008} | |
| {"current_steps": 2580, "total_steps": 3330, "loss": 0.3244, "lr": 7.354788645877244e-05, "epoch": 23.243243243243242, "percentage": 77.48, "elapsed_time": "0:05:19", "remaining_time": "0:01:32", "throughput": 3177.08, "total_tokens": 1013832} | |
| {"current_steps": 2585, "total_steps": 3330, "loss": 0.3689, "lr": 7.262208908658472e-05, "epoch": 23.28828828828829, "percentage": 77.63, "elapsed_time": "0:05:19", "remaining_time": "0:01:32", "throughput": 3178.21, "total_tokens": 1015624} | |
| {"current_steps": 2590, "total_steps": 3330, "loss": 0.3519, "lr": 7.170116435982246e-05, "epoch": 23.333333333333332, "percentage": 77.78, "elapsed_time": "0:05:20", "remaining_time": "0:01:31", "throughput": 3179.31, "total_tokens": 1017416} | |
| {"current_steps": 2595, "total_steps": 3330, "loss": 0.3527, "lr": 7.078513757667329e-05, "epoch": 23.37837837837838, "percentage": 77.93, "elapsed_time": "0:05:20", "remaining_time": "0:01:30", "throughput": 3179.92, "total_tokens": 1019176} | |
| {"current_steps": 2600, "total_steps": 3330, "loss": 0.3393, "lr": 6.98740339007761e-05, "epoch": 23.423423423423422, "percentage": 78.08, "elapsed_time": "0:05:20", "remaining_time": "0:01:30", "throughput": 3181.12, "total_tokens": 1021096} | |
| {"current_steps": 2605, "total_steps": 3330, "loss": 0.3487, "lr": 6.896787836052992e-05, "epoch": 23.46846846846847, "percentage": 78.23, "elapsed_time": "0:05:21", "remaining_time": "0:01:29", "throughput": 3182.05, "total_tokens": 1023016} | |
| {"current_steps": 2610, "total_steps": 3330, "loss": 0.3362, "lr": 6.806669584840689e-05, "epoch": 23.513513513513512, "percentage": 78.38, "elapsed_time": "0:05:22", "remaining_time": "0:01:28", "throughput": 3183.27, "total_tokens": 1025064} | |
| {"current_steps": 2615, "total_steps": 3330, "loss": 0.3288, "lr": 6.71705111202674e-05, "epoch": 23.55855855855856, "percentage": 78.53, "elapsed_time": "0:05:22", "remaining_time": "0:01:28", "throughput": 3183.89, "total_tokens": 1027048} | |
| {"current_steps": 2620, "total_steps": 3330, "loss": 0.3732, "lr": 6.627934879468107e-05, "epoch": 23.603603603603602, "percentage": 78.68, "elapsed_time": "0:05:23", "remaining_time": "0:01:27", "throughput": 3185.77, "total_tokens": 1029256} | |
| {"current_steps": 2625, "total_steps": 3330, "loss": 0.3961, "lr": 6.539323335224965e-05, "epoch": 23.64864864864865, "percentage": 78.83, "elapsed_time": "0:05:23", "remaining_time": "0:01:26", "throughput": 3186.68, "total_tokens": 1031368} | |
| {"current_steps": 2630, "total_steps": 3330, "loss": 0.3485, "lr": 6.451218913493514e-05, "epoch": 23.693693693693692, "percentage": 78.98, "elapsed_time": "0:05:24", "remaining_time": "0:01:26", "throughput": 3187.97, "total_tokens": 1033416} | |
| {"current_steps": 2635, "total_steps": 3330, "loss": 0.3625, "lr": 6.363624034539098e-05, "epoch": 23.73873873873874, "percentage": 79.13, "elapsed_time": "0:05:24", "remaining_time": "0:01:25", "throughput": 3189.22, "total_tokens": 1035400} | |
| {"current_steps": 2640, "total_steps": 3330, "loss": 0.344, "lr": 6.276541104629672e-05, "epoch": 23.783783783783782, "percentage": 79.28, "elapsed_time": "0:05:25", "remaining_time": "0:01:24", "throughput": 3190.64, "total_tokens": 1037512} | |
| {"current_steps": 2645, "total_steps": 3330, "loss": 0.3489, "lr": 6.189972515969752e-05, "epoch": 23.82882882882883, "percentage": 79.43, "elapsed_time": "0:05:25", "remaining_time": "0:01:24", "throughput": 3192.13, "total_tokens": 1039496} | |
| {"current_steps": 2650, "total_steps": 3330, "loss": 0.3334, "lr": 6.103920646634697e-05, "epoch": 23.873873873873872, "percentage": 79.58, "elapsed_time": "0:05:26", "remaining_time": "0:01:23", "throughput": 3192.8, "total_tokens": 1041288} | |
| {"current_steps": 2655, "total_steps": 3330, "loss": 0.3516, "lr": 6.018387860505367e-05, "epoch": 23.91891891891892, "percentage": 79.73, "elapsed_time": "0:05:26", "remaining_time": "0:01:23", "throughput": 3194.63, "total_tokens": 1043432} | |
| {"current_steps": 2660, "total_steps": 3330, "loss": 0.3296, "lr": 5.933376507203164e-05, "epoch": 23.963963963963963, "percentage": 79.88, "elapsed_time": "0:05:27", "remaining_time": "0:01:22", "throughput": 3195.33, "total_tokens": 1045128} | |
| {"current_steps": 2665, "total_steps": 3330, "loss": 0.3435, "lr": 5.848888922025553e-05, "epoch": 24.00900900900901, "percentage": 80.03, "elapsed_time": "0:05:28", "remaining_time": "0:01:21", "throughput": 3192.36, "total_tokens": 1047368} | |
| {"current_steps": 2670, "total_steps": 3330, "loss": 0.3433, "lr": 5.764927425881825e-05, "epoch": 24.054054054054053, "percentage": 80.18, "elapsed_time": "0:05:28", "remaining_time": "0:01:21", "throughput": 3193.79, "total_tokens": 1049416} | |
| {"current_steps": 2672, "total_steps": 3330, "eval_loss": 0.35339227318763733, "epoch": 24.07207207207207, "percentage": 80.24, "elapsed_time": "0:05:30", "remaining_time": "0:01:21", "throughput": 3180.68, "total_tokens": 1050088} | |
| {"current_steps": 2675, "total_steps": 3330, "loss": 0.3359, "lr": 5.681494325229422e-05, "epoch": 24.0990990990991, "percentage": 80.33, "elapsed_time": "0:05:32", "remaining_time": "0:01:21", "throughput": 3158.73, "total_tokens": 1051464} | |
| {"current_steps": 2680, "total_steps": 3330, "loss": 0.3553, "lr": 5.5985919120105254e-05, "epoch": 24.144144144144143, "percentage": 80.48, "elapsed_time": "0:05:33", "remaining_time": "0:01:20", "throughput": 3160.06, "total_tokens": 1053640} | |
| {"current_steps": 2685, "total_steps": 3330, "loss": 0.3536, "lr": 5.516222463589113e-05, "epoch": 24.18918918918919, "percentage": 80.63, "elapsed_time": "0:05:33", "remaining_time": "0:01:20", "throughput": 3161.2, "total_tokens": 1055784} | |
| {"current_steps": 2690, "total_steps": 3330, "loss": 0.3682, "lr": 5.434388242688382e-05, "epoch": 24.234234234234233, "percentage": 80.78, "elapsed_time": "0:05:34", "remaining_time": "0:01:19", "throughput": 3162.47, "total_tokens": 1057896} | |
| {"current_steps": 2695, "total_steps": 3330, "loss": 0.3623, "lr": 5.353091497328627e-05, "epoch": 24.27927927927928, "percentage": 80.93, "elapsed_time": "0:05:34", "remaining_time": "0:01:18", "throughput": 3163.85, "total_tokens": 1059784} | |
| {"current_steps": 2700, "total_steps": 3330, "loss": 0.3425, "lr": 5.272334460765466e-05, "epoch": 24.324324324324323, "percentage": 81.08, "elapsed_time": "0:05:35", "remaining_time": "0:01:18", "throughput": 3165.52, "total_tokens": 1061928} | |
| {"current_steps": 2705, "total_steps": 3330, "loss": 0.3435, "lr": 5.1921193514284674e-05, "epoch": 24.36936936936937, "percentage": 81.23, "elapsed_time": "0:05:36", "remaining_time": "0:01:17", "throughput": 3165.46, "total_tokens": 1063784} | |
| {"current_steps": 2710, "total_steps": 3330, "loss": 0.3404, "lr": 5.1124483728602564e-05, "epoch": 24.414414414414413, "percentage": 81.38, "elapsed_time": "0:05:36", "remaining_time": "0:01:17", "throughput": 3165.18, "total_tokens": 1065480} | |
| {"current_steps": 2715, "total_steps": 3330, "loss": 0.3391, "lr": 5.033323713655935e-05, "epoch": 24.45945945945946, "percentage": 81.53, "elapsed_time": "0:05:37", "remaining_time": "0:01:16", "throughput": 3166.74, "total_tokens": 1067464} | |
| {"current_steps": 2720, "total_steps": 3330, "loss": 0.3446, "lr": 4.954747547403005e-05, "epoch": 24.504504504504503, "percentage": 81.68, "elapsed_time": "0:05:37", "remaining_time": "0:01:15", "throughput": 3167.65, "total_tokens": 1069224} | |
| {"current_steps": 2725, "total_steps": 3330, "loss": 0.3468, "lr": 4.8767220326216306e-05, "epoch": 24.54954954954955, "percentage": 81.83, "elapsed_time": "0:05:38", "remaining_time": "0:01:15", "throughput": 3168.59, "total_tokens": 1071176} | |
| {"current_steps": 2730, "total_steps": 3330, "loss": 0.3272, "lr": 4.799249312705348e-05, "epoch": 24.594594594594593, "percentage": 81.98, "elapsed_time": "0:05:38", "remaining_time": "0:01:14", "throughput": 3170.75, "total_tokens": 1073544} | |
| {"current_steps": 2735, "total_steps": 3330, "loss": 0.3546, "lr": 4.7223315158621746e-05, "epoch": 24.63963963963964, "percentage": 82.13, "elapsed_time": "0:05:39", "remaining_time": "0:01:13", "throughput": 3171.37, "total_tokens": 1075336} | |
| {"current_steps": 2740, "total_steps": 3330, "loss": 0.3562, "lr": 4.645970755056181e-05, "epoch": 24.684684684684683, "percentage": 82.28, "elapsed_time": "0:05:39", "remaining_time": "0:01:13", "throughput": 3172.11, "total_tokens": 1077096} | |
| {"current_steps": 2745, "total_steps": 3330, "loss": 0.3614, "lr": 4.5701691279494166e-05, "epoch": 24.72972972972973, "percentage": 82.43, "elapsed_time": "0:05:40", "remaining_time": "0:01:12", "throughput": 3172.16, "total_tokens": 1078696} | |
| {"current_steps": 2750, "total_steps": 3330, "loss": 0.3348, "lr": 4.4949287168442874e-05, "epoch": 24.774774774774773, "percentage": 82.58, "elapsed_time": "0:05:40", "remaining_time": "0:01:11", "throughput": 3173.55, "total_tokens": 1080744} | |
| {"current_steps": 2755, "total_steps": 3330, "loss": 0.3581, "lr": 4.420251588626373e-05, "epoch": 24.81981981981982, "percentage": 82.73, "elapsed_time": "0:05:41", "remaining_time": "0:01:11", "throughput": 3175.03, "total_tokens": 1082856} | |
| {"current_steps": 2760, "total_steps": 3330, "loss": 0.3407, "lr": 4.346139794707618e-05, "epoch": 24.864864864864863, "percentage": 82.88, "elapsed_time": "0:05:41", "remaining_time": "0:01:10", "throughput": 3175.7, "total_tokens": 1084648} | |
| {"current_steps": 2765, "total_steps": 3330, "loss": 0.3373, "lr": 4.272595370970017e-05, "epoch": 24.90990990990991, "percentage": 83.03, "elapsed_time": "0:05:42", "remaining_time": "0:01:09", "throughput": 3176.26, "total_tokens": 1086376} | |
| {"current_steps": 2770, "total_steps": 3330, "loss": 0.3501, "lr": 4.199620337709661e-05, "epoch": 24.954954954954957, "percentage": 83.18, "elapsed_time": "0:05:42", "remaining_time": "0:01:09", "throughput": 3177.71, "total_tokens": 1088360} | |
| {"current_steps": 2775, "total_steps": 3330, "loss": 0.354, "lr": 4.127216699581246e-05, "epoch": 25.0, "percentage": 83.33, "elapsed_time": "0:05:43", "remaining_time": "0:01:08", "throughput": 3176.82, "total_tokens": 1089936} | |
| {"current_steps": 2780, "total_steps": 3330, "loss": 0.3422, "lr": 4.0553864455429964e-05, "epoch": 25.045045045045047, "percentage": 83.48, "elapsed_time": "0:05:43", "remaining_time": "0:01:08", "throughput": 3173.94, "total_tokens": 1091632} | |
| {"current_steps": 2785, "total_steps": 3330, "loss": 0.343, "lr": 3.9841315488020474e-05, "epoch": 25.09009009009009, "percentage": 83.63, "elapsed_time": "0:05:44", "remaining_time": "0:01:07", "throughput": 3176.7, "total_tokens": 1094064} | |
| {"current_steps": 2790, "total_steps": 3330, "loss": 0.3423, "lr": 3.91345396676023e-05, "epoch": 25.135135135135137, "percentage": 83.78, "elapsed_time": "0:05:44", "remaining_time": "0:01:06", "throughput": 3178.27, "total_tokens": 1096144} | |
| {"current_steps": 2795, "total_steps": 3330, "loss": 0.3399, "lr": 3.843355640960283e-05, "epoch": 25.18018018018018, "percentage": 83.93, "elapsed_time": "0:05:45", "remaining_time": "0:01:06", "throughput": 3179.98, "total_tokens": 1098352} | |
| {"current_steps": 2800, "total_steps": 3330, "loss": 0.3447, "lr": 3.7738384970325586e-05, "epoch": 25.225225225225227, "percentage": 84.08, "elapsed_time": "0:05:45", "remaining_time": "0:01:05", "throughput": 3182.0, "total_tokens": 1100528} | |
| {"current_steps": 2805, "total_steps": 3330, "loss": 0.3525, "lr": 3.704904444642071e-05, "epoch": 25.27027027027027, "percentage": 84.23, "elapsed_time": "0:05:46", "remaining_time": "0:01:04", "throughput": 3183.1, "total_tokens": 1102448} | |
| {"current_steps": 2810, "total_steps": 3330, "loss": 0.3531, "lr": 3.636555377436085e-05, "epoch": 25.315315315315317, "percentage": 84.38, "elapsed_time": "0:05:46", "remaining_time": "0:01:04", "throughput": 3183.88, "total_tokens": 1104368} | |
| {"current_steps": 2815, "total_steps": 3330, "loss": 0.3342, "lr": 3.568793172992082e-05, "epoch": 25.36036036036036, "percentage": 84.53, "elapsed_time": "0:05:47", "remaining_time": "0:01:03", "throughput": 3184.18, "total_tokens": 1106096} | |
| {"current_steps": 2820, "total_steps": 3330, "loss": 0.3375, "lr": 3.5016196927661615e-05, "epoch": 25.405405405405407, "percentage": 84.68, "elapsed_time": "0:05:47", "remaining_time": "0:01:02", "throughput": 3186.87, "total_tokens": 1108560} | |
| {"current_steps": 2825, "total_steps": 3330, "loss": 0.3407, "lr": 3.43503678204192e-05, "epoch": 25.45045045045045, "percentage": 84.83, "elapsed_time": "0:05:48", "remaining_time": "0:01:02", "throughput": 3188.34, "total_tokens": 1110608} | |
| {"current_steps": 2830, "total_steps": 3330, "loss": 0.3599, "lr": 3.369046269879794e-05, "epoch": 25.495495495495497, "percentage": 84.98, "elapsed_time": "0:05:48", "remaining_time": "0:01:01", "throughput": 3188.87, "total_tokens": 1112336} | |
| {"current_steps": 2835, "total_steps": 3330, "loss": 0.3693, "lr": 3.303649969066749e-05, "epoch": 25.54054054054054, "percentage": 85.14, "elapsed_time": "0:05:49", "remaining_time": "0:01:00", "throughput": 3189.38, "total_tokens": 1114128} | |
| {"current_steps": 2839, "total_steps": 3330, "eval_loss": 0.3555394113063812, "epoch": 25.576576576576578, "percentage": 85.26, "elapsed_time": "0:05:51", "remaining_time": "0:01:00", "throughput": 3178.43, "total_tokens": 1115632} | |
| {"current_steps": 2840, "total_steps": 3330, "loss": 0.3348, "lr": 3.23884967606653e-05, "epoch": 25.585585585585587, "percentage": 85.29, "elapsed_time": "0:05:53", "remaining_time": "0:01:00", "throughput": 3156.88, "total_tokens": 1116048} | |
| {"current_steps": 2845, "total_steps": 3330, "loss": 0.3512, "lr": 3.174647170970296e-05, "epoch": 25.63063063063063, "percentage": 85.44, "elapsed_time": "0:05:54", "remaining_time": "0:01:00", "throughput": 3157.37, "total_tokens": 1117744} | |
| {"current_steps": 2850, "total_steps": 3330, "loss": 0.3379, "lr": 3.111044217447731e-05, "epoch": 25.675675675675677, "percentage": 85.59, "elapsed_time": "0:05:54", "remaining_time": "0:00:59", "throughput": 3158.28, "total_tokens": 1119696} | |
| {"current_steps": 2855, "total_steps": 3330, "loss": 0.3517, "lr": 3.0480425626985692e-05, "epoch": 25.72072072072072, "percentage": 85.74, "elapsed_time": "0:05:55", "remaining_time": "0:00:59", "throughput": 3159.26, "total_tokens": 1121584} | |
| {"current_steps": 2860, "total_steps": 3330, "loss": 0.3631, "lr": 2.9856439374046362e-05, "epoch": 25.765765765765767, "percentage": 85.89, "elapsed_time": "0:05:55", "remaining_time": "0:00:58", "throughput": 3161.64, "total_tokens": 1123984} | |
| {"current_steps": 2865, "total_steps": 3330, "loss": 0.347, "lr": 2.9238500556822646e-05, "epoch": 25.81081081081081, "percentage": 86.04, "elapsed_time": "0:05:55", "remaining_time": "0:00:57", "throughput": 3163.42, "total_tokens": 1125968} | |
| {"current_steps": 2870, "total_steps": 3330, "loss": 0.3472, "lr": 2.862662615035244e-05, "epoch": 25.855855855855857, "percentage": 86.19, "elapsed_time": "0:05:56", "remaining_time": "0:00:57", "throughput": 3164.04, "total_tokens": 1127792} | |
| {"current_steps": 2875, "total_steps": 3330, "loss": 0.3416, "lr": 2.8020832963081776e-05, "epoch": 25.9009009009009, "percentage": 86.34, "elapsed_time": "0:05:56", "remaining_time": "0:00:56", "throughput": 3164.49, "total_tokens": 1129680} | |
| {"current_steps": 2880, "total_steps": 3330, "loss": 0.3372, "lr": 2.742113763640286e-05, "epoch": 25.945945945945947, "percentage": 86.49, "elapsed_time": "0:05:57", "remaining_time": "0:00:55", "throughput": 3165.81, "total_tokens": 1131728} | |
| {"current_steps": 2885, "total_steps": 3330, "loss": 0.346, "lr": 2.682755664419717e-05, "epoch": 25.99099099099099, "percentage": 86.64, "elapsed_time": "0:05:57", "remaining_time": "0:00:55", "throughput": 3166.49, "total_tokens": 1133456} | |
| {"current_steps": 2890, "total_steps": 3330, "loss": 0.3535, "lr": 2.624010629238302e-05, "epoch": 26.036036036036037, "percentage": 86.79, "elapsed_time": "0:05:58", "remaining_time": "0:00:54", "throughput": 3162.73, "total_tokens": 1135240} | |
| {"current_steps": 2895, "total_steps": 3330, "loss": 0.3258, "lr": 2.565880271846735e-05, "epoch": 26.08108108108108, "percentage": 86.94, "elapsed_time": "0:05:59", "remaining_time": "0:00:54", "throughput": 3164.2, "total_tokens": 1137320} | |
| {"current_steps": 2900, "total_steps": 3330, "loss": 0.3612, "lr": 2.5083661891102477e-05, "epoch": 26.126126126126128, "percentage": 87.09, "elapsed_time": "0:05:59", "remaining_time": "0:00:53", "throughput": 3167.0, "total_tokens": 1139912} | |
| {"current_steps": 2905, "total_steps": 3330, "loss": 0.3308, "lr": 2.451469960964764e-05, "epoch": 26.17117117117117, "percentage": 87.24, "elapsed_time": "0:06:00", "remaining_time": "0:00:52", "throughput": 3168.6, "total_tokens": 1142248} | |
| {"current_steps": 2910, "total_steps": 3330, "loss": 0.3539, "lr": 2.3951931503734676e-05, "epoch": 26.216216216216218, "percentage": 87.39, "elapsed_time": "0:06:00", "remaining_time": "0:00:52", "throughput": 3169.76, "total_tokens": 1144168} | |
| {"current_steps": 2915, "total_steps": 3330, "loss": 0.3372, "lr": 2.3395373032838924e-05, "epoch": 26.26126126126126, "percentage": 87.54, "elapsed_time": "0:06:01", "remaining_time": "0:00:51", "throughput": 3171.23, "total_tokens": 1146376} | |
| {"current_steps": 2920, "total_steps": 3330, "loss": 0.3526, "lr": 2.2845039485854537e-05, "epoch": 26.306306306306308, "percentage": 87.69, "elapsed_time": "0:06:02", "remaining_time": "0:00:50", "throughput": 3171.6, "total_tokens": 1148136} | |
| {"current_steps": 2925, "total_steps": 3330, "loss": 0.3441, "lr": 2.2300945980674226e-05, "epoch": 26.35135135135135, "percentage": 87.84, "elapsed_time": "0:06:02", "remaining_time": "0:00:50", "throughput": 3171.61, "total_tokens": 1149736} | |
| {"current_steps": 2930, "total_steps": 3330, "loss": 0.3332, "lr": 2.176310746377416e-05, "epoch": 26.396396396396398, "percentage": 87.99, "elapsed_time": "0:06:02", "remaining_time": "0:00:49", "throughput": 3172.44, "total_tokens": 1151496} | |
| {"current_steps": 2935, "total_steps": 3330, "loss": 0.3493, "lr": 2.1231538709803488e-05, "epoch": 26.44144144144144, "percentage": 88.14, "elapsed_time": "0:06:03", "remaining_time": "0:00:48", "throughput": 3172.57, "total_tokens": 1153320} | |
| {"current_steps": 2940, "total_steps": 3330, "loss": 0.3377, "lr": 2.0706254321178288e-05, "epoch": 26.486486486486488, "percentage": 88.29, "elapsed_time": "0:06:04", "remaining_time": "0:00:48", "throughput": 3173.34, "total_tokens": 1155240} | |
| {"current_steps": 2945, "total_steps": 3330, "loss": 0.3597, "lr": 2.0187268727680508e-05, "epoch": 26.53153153153153, "percentage": 88.44, "elapsed_time": "0:06:04", "remaining_time": "0:00:47", "throughput": 3173.83, "total_tokens": 1157032} | |
| {"current_steps": 2950, "total_steps": 3330, "loss": 0.3572, "lr": 1.9674596186061516e-05, "epoch": 26.576576576576578, "percentage": 88.59, "elapsed_time": "0:06:05", "remaining_time": "0:00:47", "throughput": 3174.26, "total_tokens": 1158920} | |
| {"current_steps": 2955, "total_steps": 3330, "loss": 0.3529, "lr": 1.916825077965048e-05, "epoch": 26.62162162162162, "percentage": 88.74, "elapsed_time": "0:06:05", "remaining_time": "0:00:46", "throughput": 3176.15, "total_tokens": 1161320} | |
| {"current_steps": 2960, "total_steps": 3330, "loss": 0.3343, "lr": 1.8668246417967606e-05, "epoch": 26.666666666666668, "percentage": 88.89, "elapsed_time": "0:06:06", "remaining_time": "0:00:45", "throughput": 3176.94, "total_tokens": 1163112} | |
| {"current_steps": 2965, "total_steps": 3330, "loss": 0.3403, "lr": 1.8174596836341927e-05, "epoch": 26.71171171171171, "percentage": 89.04, "elapsed_time": "0:06:06", "remaining_time": "0:00:45", "throughput": 3178.02, "total_tokens": 1165320} | |
| {"current_steps": 2970, "total_steps": 3330, "loss": 0.3671, "lr": 1.7687315595533937e-05, "epoch": 26.756756756756758, "percentage": 89.19, "elapsed_time": "0:06:07", "remaining_time": "0:00:44", "throughput": 3178.96, "total_tokens": 1167304} | |
| {"current_steps": 2975, "total_steps": 3330, "loss": 0.3338, "lr": 1.7206416081363253e-05, "epoch": 26.8018018018018, "percentage": 89.34, "elapsed_time": "0:06:07", "remaining_time": "0:00:43", "throughput": 3180.43, "total_tokens": 1169320} | |
| {"current_steps": 2980, "total_steps": 3330, "loss": 0.3442, "lr": 1.6731911504340667e-05, "epoch": 26.846846846846848, "percentage": 89.49, "elapsed_time": "0:06:08", "remaining_time": "0:00:43", "throughput": 3181.2, "total_tokens": 1171208} | |
| {"current_steps": 2985, "total_steps": 3330, "loss": 0.3428, "lr": 1.626381489930545e-05, "epoch": 26.89189189189189, "percentage": 89.64, "elapsed_time": "0:06:08", "remaining_time": "0:00:42", "throughput": 3181.73, "total_tokens": 1173096} | |
| {"current_steps": 2990, "total_steps": 3330, "loss": 0.3512, "lr": 1.5802139125067256e-05, "epoch": 26.936936936936938, "percentage": 89.79, "elapsed_time": "0:06:09", "remaining_time": "0:00:41", "throughput": 3182.01, "total_tokens": 1174792} | |
| {"current_steps": 2995, "total_steps": 3330, "loss": 0.3498, "lr": 1.534689686405272e-05, "epoch": 26.98198198198198, "percentage": 89.94, "elapsed_time": "0:06:09", "remaining_time": "0:00:41", "throughput": 3183.49, "total_tokens": 1176808} | |
| {"current_steps": 3000, "total_steps": 3330, "loss": 0.3536, "lr": 1.489810062195715e-05, "epoch": 27.027027027027028, "percentage": 90.09, "elapsed_time": "0:06:10", "remaining_time": "0:00:40", "throughput": 3180.8, "total_tokens": 1179040} | |
| {"current_steps": 3005, "total_steps": 3330, "loss": 0.3353, "lr": 1.445576272740115e-05, "epoch": 27.07207207207207, "percentage": 90.24, "elapsed_time": "0:06:11", "remaining_time": "0:00:40", "throughput": 3181.8, "total_tokens": 1180928} | |
| {"current_steps": 3006, "total_steps": 3330, "eval_loss": 0.35029834508895874, "epoch": 27.08108108108108, "percentage": 90.27, "elapsed_time": "0:06:12", "remaining_time": "0:00:40", "throughput": 3170.91, "total_tokens": 1181344} | |
| {"current_steps": 3010, "total_steps": 3330, "loss": 0.3471, "lr": 1.4019895331591787e-05, "epoch": 27.117117117117118, "percentage": 90.39, "elapsed_time": "0:06:15", "remaining_time": "0:00:39", "throughput": 3152.34, "total_tokens": 1183072} | |
| {"current_steps": 3015, "total_steps": 3330, "loss": 0.3242, "lr": 1.3590510407988698e-05, "epoch": 27.16216216216216, "percentage": 90.54, "elapsed_time": "0:06:15", "remaining_time": "0:00:39", "throughput": 3152.44, "total_tokens": 1185024} | |
| {"current_steps": 3020, "total_steps": 3330, "loss": 0.3498, "lr": 1.3167619751975501e-05, "epoch": 27.207207207207208, "percentage": 90.69, "elapsed_time": "0:06:16", "remaining_time": "0:00:38", "throughput": 3153.25, "total_tokens": 1186912} | |
| {"current_steps": 3025, "total_steps": 3330, "loss": 0.3498, "lr": 1.275123498053532e-05, "epoch": 27.25225225225225, "percentage": 90.84, "elapsed_time": "0:06:16", "remaining_time": "0:00:38", "throughput": 3153.8, "total_tokens": 1188768} | |
| {"current_steps": 3030, "total_steps": 3330, "loss": 0.3709, "lr": 1.2341367531932101e-05, "epoch": 27.2972972972973, "percentage": 90.99, "elapsed_time": "0:06:17", "remaining_time": "0:00:37", "throughput": 3153.82, "total_tokens": 1190784} | |
| {"current_steps": 3035, "total_steps": 3330, "loss": 0.3335, "lr": 1.1938028665396173e-05, "epoch": 27.34234234234234, "percentage": 91.14, "elapsed_time": "0:06:18", "remaining_time": "0:00:36", "throughput": 3155.12, "total_tokens": 1192896} | |
| {"current_steps": 3040, "total_steps": 3330, "loss": 0.3332, "lr": 1.1541229460814929e-05, "epoch": 27.38738738738739, "percentage": 91.29, "elapsed_time": "0:06:18", "remaining_time": "0:00:36", "throughput": 3155.45, "total_tokens": 1194560} | |
| {"current_steps": 3045, "total_steps": 3330, "loss": 0.3552, "lr": 1.115098081842844e-05, "epoch": 27.43243243243243, "percentage": 91.44, "elapsed_time": "0:06:19", "remaining_time": "0:00:35", "throughput": 3155.94, "total_tokens": 1196320} | |
| {"current_steps": 3050, "total_steps": 3330, "loss": 0.3316, "lr": 1.0767293458530336e-05, "epoch": 27.47747747747748, "percentage": 91.59, "elapsed_time": "0:06:19", "remaining_time": "0:00:34", "throughput": 3156.95, "total_tokens": 1198368} | |
| {"current_steps": 3055, "total_steps": 3330, "loss": 0.3363, "lr": 1.0390177921172862e-05, "epoch": 27.52252252252252, "percentage": 91.74, "elapsed_time": "0:06:20", "remaining_time": "0:00:34", "throughput": 3158.01, "total_tokens": 1200224} | |
| {"current_steps": 3060, "total_steps": 3330, "loss": 0.3267, "lr": 1.0019644565877562e-05, "epoch": 27.56756756756757, "percentage": 91.89, "elapsed_time": "0:06:20", "remaining_time": "0:00:33", "throughput": 3159.92, "total_tokens": 1202496} | |
| {"current_steps": 3065, "total_steps": 3330, "loss": 0.3418, "lr": 9.655703571350789e-06, "epoch": 27.61261261261261, "percentage": 92.04, "elapsed_time": "0:06:21", "remaining_time": "0:00:32", "throughput": 3160.63, "total_tokens": 1204416} | |
| {"current_steps": 3070, "total_steps": 3330, "loss": 0.3501, "lr": 9.298364935203917e-06, "epoch": 27.65765765765766, "percentage": 92.19, "elapsed_time": "0:06:21", "remaining_time": "0:00:32", "throughput": 3161.01, "total_tokens": 1206176} | |
| {"current_steps": 3075, "total_steps": 3330, "loss": 0.3446, "lr": 8.94763847367877e-06, "epoch": 27.7027027027027, "percentage": 92.34, "elapsed_time": "0:06:22", "remaining_time": "0:00:31", "throughput": 3163.24, "total_tokens": 1208512} | |
| {"current_steps": 3080, "total_steps": 3330, "loss": 0.3553, "lr": 8.603533821378046e-06, "epoch": 27.74774774774775, "percentage": 92.49, "elapsed_time": "0:06:22", "remaining_time": "0:00:31", "throughput": 3164.42, "total_tokens": 1210464} | |
| {"current_steps": 3085, "total_steps": 3330, "loss": 0.3434, "lr": 8.26606043100045e-06, "epoch": 27.792792792792792, "percentage": 92.64, "elapsed_time": "0:06:22", "remaining_time": "0:00:30", "throughput": 3166.19, "total_tokens": 1212608} | |
| {"current_steps": 3090, "total_steps": 3330, "loss": 0.3321, "lr": 7.935227573081183e-06, "epoch": 27.83783783783784, "percentage": 92.79, "elapsed_time": "0:06:23", "remaining_time": "0:00:29", "throughput": 3167.65, "total_tokens": 1214624} | |
| {"current_steps": 3095, "total_steps": 3330, "loss": 0.3448, "lr": 7.611044335737366e-06, "epoch": 27.882882882882882, "percentage": 92.94, "elapsed_time": "0:06:23", "remaining_time": "0:00:29", "throughput": 3168.92, "total_tokens": 1216672} | |
| {"current_steps": 3100, "total_steps": 3330, "loss": 0.3406, "lr": 7.293519624418099e-06, "epoch": 27.92792792792793, "percentage": 93.09, "elapsed_time": "0:06:24", "remaining_time": "0:00:28", "throughput": 3170.08, "total_tokens": 1218624} | |
| {"current_steps": 3105, "total_steps": 3330, "loss": 0.3818, "lr": 6.982662161660047e-06, "epoch": 27.972972972972972, "percentage": 93.24, "elapsed_time": "0:06:24", "remaining_time": "0:00:27", "throughput": 3171.5, "total_tokens": 1220672} | |
| {"current_steps": 3110, "total_steps": 3330, "loss": 0.3454, "lr": 6.678480486847771e-06, "epoch": 28.01801801801802, "percentage": 93.39, "elapsed_time": "0:06:25", "remaining_time": "0:00:27", "throughput": 3168.34, "total_tokens": 1222360} | |
| {"current_steps": 3115, "total_steps": 3330, "loss": 0.3346, "lr": 6.380982955979192e-06, "epoch": 28.063063063063062, "percentage": 93.54, "elapsed_time": "0:06:26", "remaining_time": "0:00:26", "throughput": 3169.65, "total_tokens": 1224440} | |
| {"current_steps": 3120, "total_steps": 3330, "loss": 0.3644, "lr": 6.090177741435915e-06, "epoch": 28.10810810810811, "percentage": 93.69, "elapsed_time": "0:06:26", "remaining_time": "0:00:26", "throughput": 3169.94, "total_tokens": 1226136} | |
| {"current_steps": 3125, "total_steps": 3330, "loss": 0.3603, "lr": 5.806072831758852e-06, "epoch": 28.153153153153152, "percentage": 93.84, "elapsed_time": "0:06:27", "remaining_time": "0:00:25", "throughput": 3172.22, "total_tokens": 1228600} | |
| {"current_steps": 3130, "total_steps": 3330, "loss": 0.3471, "lr": 5.528676031428731e-06, "epoch": 28.1981981981982, "percentage": 93.99, "elapsed_time": "0:06:27", "remaining_time": "0:00:24", "throughput": 3173.24, "total_tokens": 1230520} | |
| {"current_steps": 3135, "total_steps": 3330, "loss": 0.3421, "lr": 5.257994960651713e-06, "epoch": 28.243243243243242, "percentage": 94.14, "elapsed_time": "0:06:28", "remaining_time": "0:00:24", "throughput": 3174.59, "total_tokens": 1232536} | |
| {"current_steps": 3140, "total_steps": 3330, "loss": 0.3431, "lr": 4.994037055150114e-06, "epoch": 28.28828828828829, "percentage": 94.29, "elapsed_time": "0:06:28", "remaining_time": "0:00:23", "throughput": 3175.21, "total_tokens": 1234584} | |
| {"current_steps": 3145, "total_steps": 3330, "loss": 0.3514, "lr": 4.736809565958011e-06, "epoch": 28.333333333333332, "percentage": 94.44, "elapsed_time": "0:06:29", "remaining_time": "0:00:22", "throughput": 3175.04, "total_tokens": 1236248} | |
| {"current_steps": 3150, "total_steps": 3330, "loss": 0.3397, "lr": 4.486319559222101e-06, "epoch": 28.37837837837838, "percentage": 94.59, "elapsed_time": "0:06:29", "remaining_time": "0:00:22", "throughput": 3175.13, "total_tokens": 1238008} | |
| {"current_steps": 3155, "total_steps": 3330, "loss": 0.333, "lr": 4.242573916007686e-06, "epoch": 28.423423423423422, "percentage": 94.74, "elapsed_time": "0:06:30", "remaining_time": "0:00:21", "throughput": 3174.97, "total_tokens": 1239608} | |
| {"current_steps": 3160, "total_steps": 3330, "loss": 0.3506, "lr": 4.005579332109627e-06, "epoch": 28.46846846846847, "percentage": 94.89, "elapsed_time": "0:06:30", "remaining_time": "0:00:21", "throughput": 3175.2, "total_tokens": 1241240} | |
| {"current_steps": 3165, "total_steps": 3330, "loss": 0.3549, "lr": 3.7753423178682466e-06, "epoch": 28.513513513513512, "percentage": 95.05, "elapsed_time": "0:06:31", "remaining_time": "0:00:20", "throughput": 3176.35, "total_tokens": 1243160} | |
| {"current_steps": 3170, "total_steps": 3330, "loss": 0.3507, "lr": 3.5518691979906925e-06, "epoch": 28.55855855855856, "percentage": 95.2, "elapsed_time": "0:06:31", "remaining_time": "0:00:19", "throughput": 3178.6, "total_tokens": 1245496} | |
| {"current_steps": 3173, "total_steps": 3330, "eval_loss": 0.35120949149131775, "epoch": 28.585585585585587, "percentage": 95.29, "elapsed_time": "0:06:33", "remaining_time": "0:00:19", "throughput": 3169.48, "total_tokens": 1246872} | |
| {"current_steps": 3175, "total_steps": 3330, "loss": 0.3448, "lr": 3.3351661113769918e-06, "epoch": 28.603603603603602, "percentage": 95.35, "elapsed_time": "0:06:35", "remaining_time": "0:00:19", "throughput": 3151.5, "total_tokens": 1247640} | |
| {"current_steps": 3180, "total_steps": 3330, "loss": 0.3538, "lr": 3.125239010951686e-06, "epoch": 28.64864864864865, "percentage": 95.5, "elapsed_time": "0:06:36", "remaining_time": "0:00:18", "throughput": 3153.04, "total_tokens": 1249848} | |
| {"current_steps": 3185, "total_steps": 3330, "loss": 0.3518, "lr": 2.9220936635000196e-06, "epoch": 28.693693693693692, "percentage": 95.65, "elapsed_time": "0:06:36", "remaining_time": "0:00:18", "throughput": 3153.81, "total_tokens": 1251608} | |
| {"current_steps": 3190, "total_steps": 3330, "loss": 0.3451, "lr": 2.7257356495096754e-06, "epoch": 28.73873873873874, "percentage": 95.8, "elapsed_time": "0:06:37", "remaining_time": "0:00:17", "throughput": 3154.12, "total_tokens": 1253272} | |
| {"current_steps": 3195, "total_steps": 3330, "loss": 0.341, "lr": 2.536170363017426e-06, "epoch": 28.783783783783782, "percentage": 95.95, "elapsed_time": "0:06:37", "remaining_time": "0:00:16", "throughput": 3155.02, "total_tokens": 1255256} | |
| {"current_steps": 3200, "total_steps": 3330, "loss": 0.3323, "lr": 2.3534030114610585e-06, "epoch": 28.82882882882883, "percentage": 96.1, "elapsed_time": "0:06:38", "remaining_time": "0:00:16", "throughput": 3155.47, "total_tokens": 1257176} | |
| {"current_steps": 3205, "total_steps": 3330, "loss": 0.3504, "lr": 2.1774386155361538e-06, "epoch": 28.873873873873872, "percentage": 96.25, "elapsed_time": "0:06:38", "remaining_time": "0:00:15", "throughput": 3156.92, "total_tokens": 1259320} | |
| {"current_steps": 3210, "total_steps": 3330, "loss": 0.3303, "lr": 2.008282009058282e-06, "epoch": 28.91891891891892, "percentage": 96.4, "elapsed_time": "0:06:39", "remaining_time": "0:00:14", "throughput": 3157.34, "total_tokens": 1261208} | |
| {"current_steps": 3215, "total_steps": 3330, "loss": 0.3429, "lr": 1.8459378388302473e-06, "epoch": 28.963963963963963, "percentage": 96.55, "elapsed_time": "0:06:39", "remaining_time": "0:00:14", "throughput": 3157.84, "total_tokens": 1262872} | |
| {"current_steps": 3220, "total_steps": 3330, "loss": 0.3352, "lr": 1.6904105645142442e-06, "epoch": 29.00900900900901, "percentage": 96.7, "elapsed_time": "0:06:40", "remaining_time": "0:00:13", "throughput": 3155.89, "total_tokens": 1265088} | |
| {"current_steps": 3225, "total_steps": 3330, "loss": 0.3588, "lr": 1.5417044585096517e-06, "epoch": 29.054054054054053, "percentage": 96.85, "elapsed_time": "0:06:41", "remaining_time": "0:00:13", "throughput": 3156.57, "total_tokens": 1267008} | |
| {"current_steps": 3230, "total_steps": 3330, "loss": 0.3505, "lr": 1.3998236058353764e-06, "epoch": 29.0990990990991, "percentage": 97.0, "elapsed_time": "0:06:41", "remaining_time": "0:00:12", "throughput": 3157.4, "total_tokens": 1268992} | |
| {"current_steps": 3235, "total_steps": 3330, "loss": 0.3424, "lr": 1.264771904017803e-06, "epoch": 29.144144144144143, "percentage": 97.15, "elapsed_time": "0:06:42", "remaining_time": "0:00:11", "throughput": 3157.39, "total_tokens": 1270752} | |
| {"current_steps": 3240, "total_steps": 3330, "loss": 0.3463, "lr": 1.1365530629836863e-06, "epoch": 29.18918918918919, "percentage": 97.3, "elapsed_time": "0:06:42", "remaining_time": "0:00:11", "throughput": 3157.64, "total_tokens": 1272512} | |
| {"current_steps": 3245, "total_steps": 3330, "loss": 0.3462, "lr": 1.0151706049582322e-06, "epoch": 29.234234234234233, "percentage": 97.45, "elapsed_time": "0:06:43", "remaining_time": "0:00:10", "throughput": 3157.86, "total_tokens": 1274240} | |
| {"current_steps": 3250, "total_steps": 3330, "loss": 0.3445, "lr": 9.006278643683696e-07, "epoch": 29.27927927927928, "percentage": 97.6, "elapsed_time": "0:06:44", "remaining_time": "0:00:09", "throughput": 3158.5, "total_tokens": 1276320} | |
| {"current_steps": 3255, "total_steps": 3330, "loss": 0.3417, "lr": 7.92927987751102e-07, "epoch": 29.324324324324323, "percentage": 97.75, "elapsed_time": "0:06:44", "remaining_time": "0:00:09", "throughput": 3159.31, "total_tokens": 1278400} | |
| {"current_steps": 3260, "total_steps": 3330, "loss": 0.3496, "lr": 6.920739336670756e-07, "epoch": 29.36936936936937, "percentage": 97.9, "elapsed_time": "0:06:45", "remaining_time": "0:00:08", "throughput": 3159.79, "total_tokens": 1280352} | |
| {"current_steps": 3265, "total_steps": 3330, "loss": 0.3384, "lr": 5.980684726193397e-07, "epoch": 29.414414414414413, "percentage": 98.05, "elapsed_time": "0:06:45", "remaining_time": "0:00:08", "throughput": 3161.63, "total_tokens": 1282752} | |
| {"current_steps": 3270, "total_steps": 3330, "loss": 0.3539, "lr": 5.10914186977296e-07, "epoch": 29.45945945945946, "percentage": 98.2, "elapsed_time": "0:06:46", "remaining_time": "0:00:07", "throughput": 3162.52, "total_tokens": 1284704} | |
| {"current_steps": 3275, "total_steps": 3330, "loss": 0.3227, "lr": 4.3061347090558866e-07, "epoch": 29.504504504504503, "percentage": 98.35, "elapsed_time": "0:06:46", "remaining_time": "0:00:06", "throughput": 3163.52, "total_tokens": 1286720} | |
| {"current_steps": 3280, "total_steps": 3330, "loss": 0.353, "lr": 3.5716853029851837e-07, "epoch": 29.54954954954955, "percentage": 98.5, "elapsed_time": "0:06:47", "remaining_time": "0:00:06", "throughput": 3165.86, "total_tokens": 1289120} | |
| {"current_steps": 3285, "total_steps": 3330, "loss": 0.3473, "lr": 2.905813827193127e-07, "epoch": 29.594594594594593, "percentage": 98.65, "elapsed_time": "0:06:47", "remaining_time": "0:00:05", "throughput": 3166.5, "total_tokens": 1290944} | |
| {"current_steps": 3290, "total_steps": 3330, "loss": 0.3605, "lr": 2.3085385734475384e-07, "epoch": 29.63963963963964, "percentage": 98.8, "elapsed_time": "0:06:48", "remaining_time": "0:00:04", "throughput": 3166.9, "total_tokens": 1292832} | |
| {"current_steps": 3295, "total_steps": 3330, "loss": 0.3441, "lr": 1.7798759491499673e-07, "epoch": 29.684684684684683, "percentage": 98.95, "elapsed_time": "0:06:48", "remaining_time": "0:00:04", "throughput": 3167.06, "total_tokens": 1294560} | |
| {"current_steps": 3300, "total_steps": 3330, "loss": 0.3563, "lr": 1.3198404768835491e-07, "epoch": 29.72972972972973, "percentage": 99.1, "elapsed_time": "0:06:49", "remaining_time": "0:00:03", "throughput": 3167.37, "total_tokens": 1296352} | |
| {"current_steps": 3305, "total_steps": 3330, "loss": 0.339, "lr": 9.284447940152707e-08, "epoch": 29.774774774774773, "percentage": 99.25, "elapsed_time": "0:06:49", "remaining_time": "0:00:03", "throughput": 3167.61, "total_tokens": 1298208} | |
| {"current_steps": 3310, "total_steps": 3330, "loss": 0.357, "lr": 6.056996523484682e-08, "epoch": 29.81981981981982, "percentage": 99.4, "elapsed_time": "0:06:50", "remaining_time": "0:00:02", "throughput": 3169.04, "total_tokens": 1300288} | |
| {"current_steps": 3315, "total_steps": 3330, "loss": 0.3343, "lr": 3.516139178272315e-08, "epoch": 29.864864864864863, "percentage": 99.55, "elapsed_time": "0:06:50", "remaining_time": "0:00:01", "throughput": 3171.15, "total_tokens": 1302656} | |
| {"current_steps": 3320, "total_steps": 3330, "loss": 0.353, "lr": 1.6619457029243278e-08, "epoch": 29.90990990990991, "percentage": 99.7, "elapsed_time": "0:06:51", "remaining_time": "0:00:01", "throughput": 3171.8, "total_tokens": 1304320} | |
| {"current_steps": 3325, "total_steps": 3330, "loss": 0.3341, "lr": 4.944670329187772e-09, "epoch": 29.954954954954957, "percentage": 99.85, "elapsed_time": "0:06:51", "remaining_time": "0:00:00", "throughput": 3173.53, "total_tokens": 1306432} | |
| {"current_steps": 3330, "total_steps": 3330, "loss": 0.3352, "lr": 1.373523937919785e-10, "epoch": 30.0, "percentage": 100.0, "elapsed_time": "0:06:52", "remaining_time": "0:00:00", "throughput": 3173.23, "total_tokens": 1308280} | |
| {"current_steps": 3330, "total_steps": 3330, "epoch": 30.0, "percentage": 100.0, "elapsed_time": "0:06:54", "remaining_time": "0:00:00", "throughput": 3153.9, "total_tokens": 1308280} | |