diff --git "a/trainer_log.jsonl" "b/trainer_log.jsonl" new file mode 100644--- /dev/null +++ "b/trainer_log.jsonl" @@ -0,0 +1,429 @@ +{"current_steps": 5, "total_steps": 42420, "loss": 0.6057, "lr": 4.714757190004715e-08, "epoch": 0.0023573785950023575, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "4:16:53", "throughput": 2853.03, "total_tokens": 5184} +{"current_steps": 10, "total_steps": 42420, "loss": 0.5862, "lr": 1.0608203677510608e-07, "epoch": 0.004714757190004715, "percentage": 0.02, "elapsed_time": "0:00:02", "remaining_time": "3:13:39", "throughput": 3503.96, "total_tokens": 9600} +{"current_steps": 15, "total_steps": 42420, "loss": 0.3688, "lr": 1.6501650165016504e-07, "epoch": 0.007072135785007072, "percentage": 0.04, "elapsed_time": "0:00:03", "remaining_time": "2:57:35", "throughput": 3803.35, "total_tokens": 14336} +{"current_steps": 20, "total_steps": 42420, "loss": 0.3121, "lr": 2.2395096652522396e-07, "epoch": 0.00942951438000943, "percentage": 0.05, "elapsed_time": "0:00:04", "remaining_time": "2:53:24", "throughput": 4010.03, "total_tokens": 19680} +{"current_steps": 25, "total_steps": 42420, "loss": 0.9506, "lr": 2.828854314002829e-07, "epoch": 0.011786892975011787, "percentage": 0.06, "elapsed_time": "0:00:05", "remaining_time": "2:47:12", "throughput": 4153.88, "total_tokens": 24576} +{"current_steps": 30, "total_steps": 42420, "loss": 0.342, "lr": 3.4181989627534184e-07, "epoch": 0.014144271570014143, "percentage": 0.07, "elapsed_time": "0:00:06", "remaining_time": "2:42:21", "throughput": 4237.66, "total_tokens": 29216} +{"current_steps": 35, "total_steps": 42420, "loss": 0.119, "lr": 4.007543611504008e-07, "epoch": 0.0165016501650165, "percentage": 0.08, "elapsed_time": "0:00:07", "remaining_time": "2:38:12", "throughput": 4266.03, "total_tokens": 33440} +{"current_steps": 40, "total_steps": 42420, "loss": 0.5443, "lr": 4.5968882602545973e-07, "epoch": 0.01885902876001886, "percentage": 0.09, "elapsed_time": "0:00:08", "remaining_time": "2:35:54", "throughput": 4331.04, "total_tokens": 38240} +{"current_steps": 45, "total_steps": 42420, "loss": 0.3394, "lr": 5.186232909005186e-07, "epoch": 0.021216407355021217, "percentage": 0.11, "elapsed_time": "0:00:09", "remaining_time": "2:34:42", "throughput": 4392.01, "total_tokens": 43296} +{"current_steps": 50, "total_steps": 42420, "loss": 0.1801, "lr": 5.775577557755775e-07, "epoch": 0.023573785950023574, "percentage": 0.12, "elapsed_time": "0:00:10", "remaining_time": "2:32:34", "throughput": 4422.6, "total_tokens": 47776} +{"current_steps": 55, "total_steps": 42420, "loss": 0.6421, "lr": 6.364922206506365e-07, "epoch": 0.02593116454502593, "percentage": 0.13, "elapsed_time": "0:00:11", "remaining_time": "2:31:04", "throughput": 4465.08, "total_tokens": 52544} +{"current_steps": 60, "total_steps": 42420, "loss": 0.3972, "lr": 6.954266855256955e-07, "epoch": 0.028288543140028287, "percentage": 0.14, "elapsed_time": "0:00:12", "remaining_time": "2:31:50", "throughput": 4508.2, "total_tokens": 58176} +{"current_steps": 65, "total_steps": 42420, "loss": 0.7584, "lr": 7.543611504007544e-07, "epoch": 0.030645921735030647, "percentage": 0.15, "elapsed_time": "0:00:13", "remaining_time": "2:31:10", "throughput": 4519.7, "total_tokens": 62912} +{"current_steps": 70, "total_steps": 42420, "loss": 0.6401, "lr": 8.132956152758133e-07, "epoch": 0.033003300330033, "percentage": 0.17, "elapsed_time": "0:00:15", "remaining_time": "2:32:50", "throughput": 4551.77, "total_tokens": 68992} +{"current_steps": 75, "total_steps": 42420, "loss": 0.8128, "lr": 8.722300801508723e-07, "epoch": 0.03536067892503536, "percentage": 0.18, "elapsed_time": "0:00:16", "remaining_time": "2:31:00", "throughput": 4568.32, "total_tokens": 73312} +{"current_steps": 80, "total_steps": 42420, "loss": 0.6709, "lr": 9.311645450259312e-07, "epoch": 0.03771805752003772, "percentage": 0.19, "elapsed_time": "0:00:17", "remaining_time": "2:32:07", "throughput": 4579.57, "total_tokens": 78976} +{"current_steps": 85, "total_steps": 42420, "loss": 0.4635, "lr": 9.900990099009902e-07, "epoch": 0.040075436115040074, "percentage": 0.2, "elapsed_time": "0:00:18", "remaining_time": "2:31:30", "throughput": 4586.32, "total_tokens": 83712} +{"current_steps": 90, "total_steps": 42420, "loss": 0.3784, "lr": 1.0490334747760492e-06, "epoch": 0.042432814710042434, "percentage": 0.21, "elapsed_time": "0:00:19", "remaining_time": "2:30:19", "throughput": 4585.64, "total_tokens": 87936} +{"current_steps": 95, "total_steps": 42420, "loss": 0.2236, "lr": 1.107967939651108e-06, "epoch": 0.04479019330504479, "percentage": 0.22, "elapsed_time": "0:00:20", "remaining_time": "2:29:48", "throughput": 4594.74, "total_tokens": 92704} +{"current_steps": 100, "total_steps": 42420, "loss": 0.1863, "lr": 1.166902404526167e-06, "epoch": 0.04714757190004715, "percentage": 0.24, "elapsed_time": "0:00:21", "remaining_time": "2:29:33", "throughput": 4605.83, "total_tokens": 97664} +{"current_steps": 105, "total_steps": 42420, "loss": 0.1942, "lr": 1.2258368694012257e-06, "epoch": 0.04950495049504951, "percentage": 0.25, "elapsed_time": "0:00:22", "remaining_time": "2:28:57", "throughput": 4610.2, "total_tokens": 102240} +{"current_steps": 110, "total_steps": 42420, "loss": 0.2198, "lr": 1.284771334276285e-06, "epoch": 0.05186232909005186, "percentage": 0.26, "elapsed_time": "0:00:23", "remaining_time": "2:29:06", "throughput": 4613.13, "total_tokens": 107296} +{"current_steps": 115, "total_steps": 42420, "loss": 0.3155, "lr": 1.343705799151344e-06, "epoch": 0.05421970768505422, "percentage": 0.27, "elapsed_time": "0:00:24", "remaining_time": "2:29:41", "throughput": 4627.89, "total_tokens": 112992} +{"current_steps": 120, "total_steps": 42420, "loss": 0.3416, "lr": 1.4026402640264027e-06, "epoch": 0.056577086280056574, "percentage": 0.28, "elapsed_time": "0:00:25", "remaining_time": "2:30:13", "throughput": 4632.94, "total_tokens": 118464} +{"current_steps": 125, "total_steps": 42420, "loss": 0.4174, "lr": 1.4615747289014617e-06, "epoch": 0.058934464875058934, "percentage": 0.29, "elapsed_time": "0:00:26", "remaining_time": "2:31:25", "throughput": 4640.38, "total_tokens": 124608} +{"current_steps": 130, "total_steps": 42420, "loss": 0.4644, "lr": 1.5205091937765205e-06, "epoch": 0.061291843470061294, "percentage": 0.31, "elapsed_time": "0:00:28", "remaining_time": "2:32:50", "throughput": 4647.51, "total_tokens": 131008} +{"current_steps": 135, "total_steps": 42420, "loss": 0.2333, "lr": 1.5794436586515793e-06, "epoch": 0.06364922206506365, "percentage": 0.32, "elapsed_time": "0:00:29", "remaining_time": "2:33:02", "throughput": 4655.44, "total_tokens": 136480} +{"current_steps": 140, "total_steps": 42420, "loss": 0.6784, "lr": 1.6383781235266383e-06, "epoch": 0.066006600660066, "percentage": 0.33, "elapsed_time": "0:00:30", "remaining_time": "2:32:39", "throughput": 4658.27, "total_tokens": 141280} +{"current_steps": 145, "total_steps": 42420, "loss": 0.6606, "lr": 1.6973125884016973e-06, "epoch": 0.06836397925506836, "percentage": 0.34, "elapsed_time": "0:00:31", "remaining_time": "2:32:33", "throughput": 4662.93, "total_tokens": 146400} +{"current_steps": 150, "total_steps": 42420, "loss": 0.4081, "lr": 1.7562470532767563e-06, "epoch": 0.07072135785007072, "percentage": 0.35, "elapsed_time": "0:00:32", "remaining_time": "2:32:17", "throughput": 4669.16, "total_tokens": 151392} +{"current_steps": 155, "total_steps": 42420, "loss": 0.4494, "lr": 1.8151815181518153e-06, "epoch": 0.07307873644507308, "percentage": 0.37, "elapsed_time": "0:00:33", "remaining_time": "2:32:36", "throughput": 4668.53, "total_tokens": 156768} +{"current_steps": 160, "total_steps": 42420, "loss": 0.2098, "lr": 1.874115983026874e-06, "epoch": 0.07543611504007544, "percentage": 0.38, "elapsed_time": "0:00:34", "remaining_time": "2:32:05", "throughput": 4666.36, "total_tokens": 161216} +{"current_steps": 165, "total_steps": 42420, "loss": 0.3315, "lr": 1.933050447901933e-06, "epoch": 0.07779349363507779, "percentage": 0.39, "elapsed_time": "0:00:35", "remaining_time": "2:32:06", "throughput": 4672.62, "total_tokens": 166528} +{"current_steps": 170, "total_steps": 42420, "loss": 0.3994, "lr": 1.991984912776992e-06, "epoch": 0.08015087223008015, "percentage": 0.4, "elapsed_time": "0:00:36", "remaining_time": "2:32:24", "throughput": 4676.29, "total_tokens": 172064} +{"current_steps": 175, "total_steps": 42420, "loss": 0.4331, "lr": 2.050919377652051e-06, "epoch": 0.08250825082508251, "percentage": 0.41, "elapsed_time": "0:00:37", "remaining_time": "2:32:21", "throughput": 4681.31, "total_tokens": 177280} +{"current_steps": 180, "total_steps": 42420, "loss": 0.5134, "lr": 2.1098538425271103e-06, "epoch": 0.08486562942008487, "percentage": 0.42, "elapsed_time": "0:00:38", "remaining_time": "2:31:38", "throughput": 4679.86, "total_tokens": 181440} +{"current_steps": 185, "total_steps": 42420, "loss": 0.4929, "lr": 2.1687883074021686e-06, "epoch": 0.08722300801508723, "percentage": 0.44, "elapsed_time": "0:00:39", "remaining_time": "2:31:24", "throughput": 4686.59, "total_tokens": 186496} +{"current_steps": 190, "total_steps": 42420, "loss": 0.213, "lr": 2.227722772277228e-06, "epoch": 0.08958038661008957, "percentage": 0.45, "elapsed_time": "0:00:40", "remaining_time": "2:30:46", "throughput": 4685.15, "total_tokens": 190688} +{"current_steps": 195, "total_steps": 42420, "loss": 0.3682, "lr": 2.2866572371522866e-06, "epoch": 0.09193776520509193, "percentage": 0.46, "elapsed_time": "0:00:41", "remaining_time": "2:30:38", "throughput": 4687.34, "total_tokens": 195648} +{"current_steps": 200, "total_steps": 42420, "loss": 0.3624, "lr": 2.345591702027346e-06, "epoch": 0.0942951438000943, "percentage": 0.47, "elapsed_time": "0:00:42", "remaining_time": "2:30:26", "throughput": 4689.53, "total_tokens": 200512} +{"current_steps": 205, "total_steps": 42420, "loss": 0.7829, "lr": 2.4045261669024046e-06, "epoch": 0.09665252239509665, "percentage": 0.48, "elapsed_time": "0:00:43", "remaining_time": "2:30:28", "throughput": 4695.17, "total_tokens": 205856} +{"current_steps": 210, "total_steps": 42420, "loss": 0.5748, "lr": 2.463460631777464e-06, "epoch": 0.09900990099009901, "percentage": 0.5, "elapsed_time": "0:00:44", "remaining_time": "2:30:18", "throughput": 4697.79, "total_tokens": 210784} +{"current_steps": 215, "total_steps": 42420, "loss": 0.4732, "lr": 2.5223950966525226e-06, "epoch": 0.10136727958510136, "percentage": 0.51, "elapsed_time": "0:00:45", "remaining_time": "2:29:58", "throughput": 4697.91, "total_tokens": 215360} +{"current_steps": 220, "total_steps": 42420, "loss": 0.7254, "lr": 2.5813295615275814e-06, "epoch": 0.10372465818010372, "percentage": 0.52, "elapsed_time": "0:00:46", "remaining_time": "2:29:44", "throughput": 4698.24, "total_tokens": 220064} +{"current_steps": 225, "total_steps": 42420, "loss": 0.5303, "lr": 2.6402640264026406e-06, "epoch": 0.10608203677510608, "percentage": 0.53, "elapsed_time": "0:00:47", "remaining_time": "2:29:16", "throughput": 4697.27, "total_tokens": 224352} +{"current_steps": 230, "total_steps": 42420, "loss": 0.4151, "lr": 2.6991984912776994e-06, "epoch": 0.10843941537010844, "percentage": 0.54, "elapsed_time": "0:00:48", "remaining_time": "2:29:01", "throughput": 4699.52, "total_tokens": 229088} +{"current_steps": 235, "total_steps": 42420, "loss": 0.3585, "lr": 2.7581329561527586e-06, "epoch": 0.1107967939651108, "percentage": 0.55, "elapsed_time": "0:00:49", "remaining_time": "2:28:49", "throughput": 4700.79, "total_tokens": 233824} +{"current_steps": 240, "total_steps": 42420, "loss": 0.6327, "lr": 2.817067421027817e-06, "epoch": 0.11315417256011315, "percentage": 0.57, "elapsed_time": "0:00:50", "remaining_time": "2:28:42", "throughput": 4703.88, "total_tokens": 238816} +{"current_steps": 245, "total_steps": 42420, "loss": 0.6112, "lr": 2.876001885902876e-06, "epoch": 0.11551155115511551, "percentage": 0.58, "elapsed_time": "0:00:51", "remaining_time": "2:28:21", "throughput": 4705.45, "total_tokens": 243328} +{"current_steps": 250, "total_steps": 42420, "loss": 0.34, "lr": 2.934936350777935e-06, "epoch": 0.11786892975011787, "percentage": 0.59, "elapsed_time": "0:00:52", "remaining_time": "2:28:35", "throughput": 4709.14, "total_tokens": 248896} +{"current_steps": 255, "total_steps": 42420, "loss": 0.2976, "lr": 2.993870815652994e-06, "epoch": 0.12022630834512023, "percentage": 0.6, "elapsed_time": "0:00:53", "remaining_time": "2:28:15", "throughput": 4711.13, "total_tokens": 253440} +{"current_steps": 260, "total_steps": 42420, "loss": 0.4665, "lr": 3.052805280528053e-06, "epoch": 0.12258368694012259, "percentage": 0.61, "elapsed_time": "0:00:54", "remaining_time": "2:28:07", "throughput": 4710.98, "total_tokens": 258208} +{"current_steps": 265, "total_steps": 42420, "loss": 0.1089, "lr": 3.1117397454031117e-06, "epoch": 0.12494106553512493, "percentage": 0.62, "elapsed_time": "0:00:55", "remaining_time": "2:27:43", "throughput": 4712.58, "total_tokens": 262592} +{"current_steps": 270, "total_steps": 42420, "loss": 0.526, "lr": 3.170674210278171e-06, "epoch": 0.1272984441301273, "percentage": 0.64, "elapsed_time": "0:00:56", "remaining_time": "2:27:38", "throughput": 4716.81, "total_tokens": 267648} +{"current_steps": 275, "total_steps": 42420, "loss": 0.8014, "lr": 3.2296086751532297e-06, "epoch": 0.12965582272512965, "percentage": 0.65, "elapsed_time": "0:00:57", "remaining_time": "2:27:27", "throughput": 4718.28, "total_tokens": 272384} +{"current_steps": 280, "total_steps": 42420, "loss": 0.3956, "lr": 3.2885431400282885e-06, "epoch": 0.132013201320132, "percentage": 0.66, "elapsed_time": "0:00:58", "remaining_time": "2:27:54", "throughput": 4722.32, "total_tokens": 278464} +{"current_steps": 285, "total_steps": 42420, "loss": 0.2679, "lr": 3.3474776049033477e-06, "epoch": 0.13437057991513437, "percentage": 0.67, "elapsed_time": "0:00:59", "remaining_time": "2:27:29", "throughput": 4718.17, "total_tokens": 282432} +{"current_steps": 290, "total_steps": 42420, "loss": 0.4537, "lr": 3.4064120697784065e-06, "epoch": 0.13672795851013672, "percentage": 0.68, "elapsed_time": "0:01:00", "remaining_time": "2:27:35", "throughput": 4720.31, "total_tokens": 287744} +{"current_steps": 295, "total_steps": 42420, "loss": 0.5024, "lr": 3.4653465346534657e-06, "epoch": 0.1390853371051391, "percentage": 0.7, "elapsed_time": "0:01:01", "remaining_time": "2:27:13", "throughput": 4717.77, "total_tokens": 291840} +{"current_steps": 300, "total_steps": 42420, "loss": 0.5184, "lr": 3.5242809995285245e-06, "epoch": 0.14144271570014144, "percentage": 0.71, "elapsed_time": "0:01:02", "remaining_time": "2:27:11", "throughput": 4720.67, "total_tokens": 296928} +{"current_steps": 305, "total_steps": 42420, "loss": 0.3396, "lr": 3.5832154644035833e-06, "epoch": 0.1438000942951438, "percentage": 0.72, "elapsed_time": "0:01:04", "remaining_time": "2:27:18", "throughput": 4722.54, "total_tokens": 302272} +{"current_steps": 310, "total_steps": 42420, "loss": 0.5679, "lr": 3.6421499292786425e-06, "epoch": 0.14615747289014616, "percentage": 0.73, "elapsed_time": "0:01:04", "remaining_time": "2:27:00", "throughput": 4722.94, "total_tokens": 306688} +{"current_steps": 315, "total_steps": 42420, "loss": 0.7053, "lr": 3.7010843941537013e-06, "epoch": 0.1485148514851485, "percentage": 0.74, "elapsed_time": "0:01:05", "remaining_time": "2:26:31", "throughput": 4721.3, "total_tokens": 310528} +{"current_steps": 320, "total_steps": 42420, "loss": 0.4407, "lr": 3.7600188590287605e-06, "epoch": 0.15087223008015088, "percentage": 0.75, "elapsed_time": "0:01:06", "remaining_time": "2:26:15", "throughput": 4721.17, "total_tokens": 314912} +{"current_steps": 325, "total_steps": 42420, "loss": 0.2513, "lr": 3.818953323903819e-06, "epoch": 0.15322960867515323, "percentage": 0.77, "elapsed_time": "0:01:07", "remaining_time": "2:26:08", "throughput": 4722.49, "total_tokens": 319712} +{"current_steps": 330, "total_steps": 42420, "loss": 0.7585, "lr": 3.877887788778878e-06, "epoch": 0.15558698727015557, "percentage": 0.78, "elapsed_time": "0:01:08", "remaining_time": "2:26:07", "throughput": 4723.69, "total_tokens": 324704} +{"current_steps": 335, "total_steps": 42420, "loss": 0.4442, "lr": 3.936822253653937e-06, "epoch": 0.15794436586515795, "percentage": 0.79, "elapsed_time": "0:01:09", "remaining_time": "2:26:13", "throughput": 4726.58, "total_tokens": 330112} +{"current_steps": 340, "total_steps": 42420, "loss": 0.592, "lr": 3.995756718528996e-06, "epoch": 0.1603017444601603, "percentage": 0.8, "elapsed_time": "0:01:11", "remaining_time": "2:27:19", "throughput": 4732.21, "total_tokens": 337984} +{"current_steps": 345, "total_steps": 42420, "loss": 0.6602, "lr": 4.054691183404055e-06, "epoch": 0.16265912305516267, "percentage": 0.81, "elapsed_time": "0:01:12", "remaining_time": "2:27:18", "throughput": 4734.87, "total_tokens": 343136} +{"current_steps": 350, "total_steps": 42420, "loss": 0.4025, "lr": 4.113625648279114e-06, "epoch": 0.16501650165016502, "percentage": 0.83, "elapsed_time": "0:01:13", "remaining_time": "2:27:34", "throughput": 4737.58, "total_tokens": 348992} +{"current_steps": 355, "total_steps": 42420, "loss": 0.6274, "lr": 4.172560113154173e-06, "epoch": 0.16737388024516736, "percentage": 0.84, "elapsed_time": "0:01:14", "remaining_time": "2:27:20", "throughput": 4738.24, "total_tokens": 353504} +{"current_steps": 360, "total_steps": 42420, "loss": 0.4134, "lr": 4.231494578029232e-06, "epoch": 0.16973125884016974, "percentage": 0.85, "elapsed_time": "0:01:15", "remaining_time": "2:27:30", "throughput": 4740.35, "total_tokens": 359104} +{"current_steps": 365, "total_steps": 42420, "loss": 0.3022, "lr": 4.29042904290429e-06, "epoch": 0.17208863743517208, "percentage": 0.86, "elapsed_time": "0:01:16", "remaining_time": "2:27:23", "throughput": 4739.01, "total_tokens": 363744} +{"current_steps": 370, "total_steps": 42420, "loss": 0.6321, "lr": 4.34936350777935e-06, "epoch": 0.17444601603017446, "percentage": 0.87, "elapsed_time": "0:01:17", "remaining_time": "2:27:22", "throughput": 4740.64, "total_tokens": 368864} +{"current_steps": 375, "total_steps": 42420, "loss": 0.4408, "lr": 4.408297972654409e-06, "epoch": 0.1768033946251768, "percentage": 0.88, "elapsed_time": "0:01:18", "remaining_time": "2:27:20", "throughput": 4741.12, "total_tokens": 373824} +{"current_steps": 380, "total_steps": 42420, "loss": 0.4899, "lr": 4.467232437529468e-06, "epoch": 0.17916077322017915, "percentage": 0.9, "elapsed_time": "0:01:19", "remaining_time": "2:27:18", "throughput": 4736.98, "total_tokens": 378432} +{"current_steps": 385, "total_steps": 42420, "loss": 0.3939, "lr": 4.526166902404526e-06, "epoch": 0.18151815181518152, "percentage": 0.91, "elapsed_time": "0:01:20", "remaining_time": "2:27:21", "throughput": 4736.47, "total_tokens": 383552} +{"current_steps": 390, "total_steps": 42420, "loss": 0.5403, "lr": 4.585101367279585e-06, "epoch": 0.18387553041018387, "percentage": 0.92, "elapsed_time": "0:01:22", "remaining_time": "2:27:23", "throughput": 4737.41, "total_tokens": 388768} +{"current_steps": 395, "total_steps": 42420, "loss": 0.2605, "lr": 4.644035832154645e-06, "epoch": 0.18623290900518624, "percentage": 0.93, "elapsed_time": "0:01:22", "remaining_time": "2:27:09", "throughput": 4737.09, "total_tokens": 393120} +{"current_steps": 400, "total_steps": 42420, "loss": 0.2928, "lr": 4.702970297029704e-06, "epoch": 0.1885902876001886, "percentage": 0.94, "elapsed_time": "0:01:23", "remaining_time": "2:27:02", "throughput": 4737.52, "total_tokens": 397856} +{"current_steps": 405, "total_steps": 42420, "loss": 0.5416, "lr": 4.7619047619047615e-06, "epoch": 0.19094766619519093, "percentage": 0.95, "elapsed_time": "0:01:25", "remaining_time": "2:26:58", "throughput": 4739.14, "total_tokens": 402848} +{"current_steps": 410, "total_steps": 42420, "loss": 0.4452, "lr": 4.820839226779821e-06, "epoch": 0.1933050447901933, "percentage": 0.97, "elapsed_time": "0:01:26", "remaining_time": "2:26:56", "throughput": 4741.03, "total_tokens": 407936} +{"current_steps": 415, "total_steps": 42420, "loss": 0.5347, "lr": 4.87977369165488e-06, "epoch": 0.19566242338519566, "percentage": 0.98, "elapsed_time": "0:01:27", "remaining_time": "2:26:55", "throughput": 4742.18, "total_tokens": 413024} +{"current_steps": 420, "total_steps": 42420, "loss": 0.6823, "lr": 4.93870815652994e-06, "epoch": 0.19801980198019803, "percentage": 0.99, "elapsed_time": "0:01:28", "remaining_time": "2:27:08", "throughput": 4742.57, "total_tokens": 418720} +{"current_steps": 425, "total_steps": 42420, "loss": 0.4375, "lr": 4.9976426214049975e-06, "epoch": 0.20037718057520038, "percentage": 1.0, "elapsed_time": "0:01:29", "remaining_time": "2:27:41", "throughput": 4743.87, "total_tokens": 425440} +{"current_steps": 430, "total_steps": 42420, "loss": 0.796, "lr": 5.056577086280056e-06, "epoch": 0.20273455917020272, "percentage": 1.01, "elapsed_time": "0:01:30", "remaining_time": "2:27:40", "throughput": 4744.18, "total_tokens": 430464} +{"current_steps": 435, "total_steps": 42420, "loss": 0.5042, "lr": 5.115511551155116e-06, "epoch": 0.2050919377652051, "percentage": 1.03, "elapsed_time": "0:01:31", "remaining_time": "2:27:43", "throughput": 4745.53, "total_tokens": 435776} +{"current_steps": 440, "total_steps": 42420, "loss": 0.3152, "lr": 5.174446016030175e-06, "epoch": 0.20744931636020744, "percentage": 1.04, "elapsed_time": "0:01:32", "remaining_time": "2:27:36", "throughput": 4743.68, "total_tokens": 440352} +{"current_steps": 445, "total_steps": 42420, "loss": 0.4815, "lr": 5.2333804809052335e-06, "epoch": 0.20980669495520982, "percentage": 1.05, "elapsed_time": "0:01:33", "remaining_time": "2:27:22", "throughput": 4744.37, "total_tokens": 444768} +{"current_steps": 450, "total_steps": 42420, "loss": 0.6835, "lr": 5.292314945780292e-06, "epoch": 0.21216407355021216, "percentage": 1.06, "elapsed_time": "0:01:34", "remaining_time": "2:27:32", "throughput": 4746.47, "total_tokens": 450528} +{"current_steps": 455, "total_steps": 42420, "loss": 0.969, "lr": 5.351249410655351e-06, "epoch": 0.2145214521452145, "percentage": 1.07, "elapsed_time": "0:01:35", "remaining_time": "2:27:23", "throughput": 4745.31, "total_tokens": 454976} +{"current_steps": 460, "total_steps": 42420, "loss": 0.9386, "lr": 5.410183875530411e-06, "epoch": 0.21687883074021688, "percentage": 1.08, "elapsed_time": "0:01:36", "remaining_time": "2:27:04", "throughput": 4745.22, "total_tokens": 459072} +{"current_steps": 465, "total_steps": 42420, "loss": 0.4296, "lr": 5.4691183404054695e-06, "epoch": 0.21923620933521923, "percentage": 1.1, "elapsed_time": "0:01:37", "remaining_time": "2:27:06", "throughput": 4746.44, "total_tokens": 464320} +{"current_steps": 470, "total_steps": 42420, "loss": 0.2475, "lr": 5.528052805280528e-06, "epoch": 0.2215935879302216, "percentage": 1.11, "elapsed_time": "0:01:38", "remaining_time": "2:27:00", "throughput": 4746.98, "total_tokens": 469120} +{"current_steps": 475, "total_steps": 42420, "loss": 0.6134, "lr": 5.586987270155587e-06, "epoch": 0.22395096652522395, "percentage": 1.12, "elapsed_time": "0:01:39", "remaining_time": "2:26:55", "throughput": 4747.25, "total_tokens": 473920} +{"current_steps": 480, "total_steps": 42420, "loss": 0.7439, "lr": 5.645921735030646e-06, "epoch": 0.2263083451202263, "percentage": 1.13, "elapsed_time": "0:01:40", "remaining_time": "2:26:41", "throughput": 4744.61, "total_tokens": 477920} +{"current_steps": 485, "total_steps": 42420, "loss": 0.639, "lr": 5.7048561999057055e-06, "epoch": 0.22866572371522867, "percentage": 1.14, "elapsed_time": "0:01:41", "remaining_time": "2:26:41", "throughput": 4745.35, "total_tokens": 483040} +{"current_steps": 490, "total_steps": 42420, "loss": 0.8233, "lr": 5.763790664780764e-06, "epoch": 0.23102310231023102, "percentage": 1.16, "elapsed_time": "0:01:42", "remaining_time": "2:26:37", "throughput": 4745.18, "total_tokens": 487872} +{"current_steps": 495, "total_steps": 42420, "loss": 0.287, "lr": 5.822725129655823e-06, "epoch": 0.2333804809052334, "percentage": 1.17, "elapsed_time": "0:01:43", "remaining_time": "2:26:35", "throughput": 4747.18, "total_tokens": 492960} +{"current_steps": 500, "total_steps": 42420, "loss": 0.1795, "lr": 5.881659594530882e-06, "epoch": 0.23573785950023574, "percentage": 1.18, "elapsed_time": "0:01:44", "remaining_time": "2:26:39", "throughput": 4748.49, "total_tokens": 498400} +{"current_steps": 505, "total_steps": 42420, "loss": 0.27, "lr": 5.940594059405941e-06, "epoch": 0.23809523809523808, "percentage": 1.19, "elapsed_time": "0:01:45", "remaining_time": "2:26:27", "throughput": 4748.69, "total_tokens": 502784} +{"current_steps": 510, "total_steps": 42420, "loss": 0.4226, "lr": 5.999528524281e-06, "epoch": 0.24045261669024046, "percentage": 1.2, "elapsed_time": "0:01:46", "remaining_time": "2:26:26", "throughput": 4748.99, "total_tokens": 507776} +{"current_steps": 515, "total_steps": 42420, "loss": 0.2738, "lr": 6.058462989156059e-06, "epoch": 0.2428099952852428, "percentage": 1.21, "elapsed_time": "0:01:47", "remaining_time": "2:26:19", "throughput": 4749.65, "total_tokens": 512480} +{"current_steps": 520, "total_steps": 42420, "loss": 0.4753, "lr": 6.117397454031118e-06, "epoch": 0.24516737388024518, "percentage": 1.23, "elapsed_time": "0:01:49", "remaining_time": "2:26:24", "throughput": 4747.97, "total_tokens": 517600} +{"current_steps": 525, "total_steps": 42420, "loss": 0.5113, "lr": 6.176331918906177e-06, "epoch": 0.24752475247524752, "percentage": 1.24, "elapsed_time": "0:01:50", "remaining_time": "2:26:35", "throughput": 4749.79, "total_tokens": 523520} +{"current_steps": 530, "total_steps": 42420, "loss": 0.321, "lr": 6.235266383781235e-06, "epoch": 0.24988213107024987, "percentage": 1.25, "elapsed_time": "0:01:51", "remaining_time": "2:26:20", "throughput": 4748.94, "total_tokens": 527584} +{"current_steps": 535, "total_steps": 42420, "loss": 0.621, "lr": 6.294200848656294e-06, "epoch": 0.2522395096652522, "percentage": 1.26, "elapsed_time": "0:01:52", "remaining_time": "2:26:21", "throughput": 4749.35, "total_tokens": 532736} +{"current_steps": 540, "total_steps": 42420, "loss": 0.6177, "lr": 6.353135313531354e-06, "epoch": 0.2545968882602546, "percentage": 1.27, "elapsed_time": "0:01:53", "remaining_time": "2:26:19", "throughput": 4747.44, "total_tokens": 537440} +{"current_steps": 545, "total_steps": 42420, "loss": 0.4219, "lr": 6.412069778406413e-06, "epoch": 0.25695426685525696, "percentage": 1.28, "elapsed_time": "0:01:54", "remaining_time": "2:26:25", "throughput": 4749.18, "total_tokens": 543040} +{"current_steps": 550, "total_steps": 42420, "loss": 0.7427, "lr": 6.471004243281471e-06, "epoch": 0.2593116454502593, "percentage": 1.3, "elapsed_time": "0:01:55", "remaining_time": "2:26:16", "throughput": 4748.95, "total_tokens": 547520} +{"current_steps": 555, "total_steps": 42420, "loss": 0.2734, "lr": 6.52993870815653e-06, "epoch": 0.26166902404526166, "percentage": 1.31, "elapsed_time": "0:01:56", "remaining_time": "2:26:19", "throughput": 4749.44, "total_tokens": 552800} +{"current_steps": 560, "total_steps": 42420, "loss": 0.097, "lr": 6.588873173031589e-06, "epoch": 0.264026402640264, "percentage": 1.32, "elapsed_time": "0:01:57", "remaining_time": "2:26:12", "throughput": 4749.43, "total_tokens": 557376} +{"current_steps": 565, "total_steps": 42420, "loss": 0.2308, "lr": 6.647807637906649e-06, "epoch": 0.2663837812352664, "percentage": 1.33, "elapsed_time": "0:01:58", "remaining_time": "2:26:29", "throughput": 4751.59, "total_tokens": 563776} +{"current_steps": 570, "total_steps": 42420, "loss": 0.5513, "lr": 6.706742102781707e-06, "epoch": 0.26874115983026875, "percentage": 1.34, "elapsed_time": "0:01:59", "remaining_time": "2:26:13", "throughput": 4750.66, "total_tokens": 567712} +{"current_steps": 575, "total_steps": 42420, "loss": 0.4007, "lr": 6.765676567656766e-06, "epoch": 0.2710985384252711, "percentage": 1.36, "elapsed_time": "0:02:00", "remaining_time": "2:26:16", "throughput": 4751.1, "total_tokens": 572992} +{"current_steps": 580, "total_steps": 42420, "loss": 0.3548, "lr": 6.824611032531825e-06, "epoch": 0.27345591702027344, "percentage": 1.37, "elapsed_time": "0:02:01", "remaining_time": "2:26:24", "throughput": 4753.16, "total_tokens": 578816} +{"current_steps": 585, "total_steps": 42420, "loss": 0.4592, "lr": 6.883545497406884e-06, "epoch": 0.2758132956152758, "percentage": 1.38, "elapsed_time": "0:02:02", "remaining_time": "2:26:18", "throughput": 4753.75, "total_tokens": 583552} +{"current_steps": 590, "total_steps": 42420, "loss": 0.3921, "lr": 6.942479962281943e-06, "epoch": 0.2781706742102782, "percentage": 1.39, "elapsed_time": "0:02:03", "remaining_time": "2:26:26", "throughput": 4751.73, "total_tokens": 588864} +{"current_steps": 595, "total_steps": 42420, "loss": 0.294, "lr": 7.001414427157002e-06, "epoch": 0.28052805280528054, "percentage": 1.4, "elapsed_time": "0:02:04", "remaining_time": "2:26:16", "throughput": 4751.38, "total_tokens": 593248} +{"current_steps": 600, "total_steps": 42420, "loss": 0.4681, "lr": 7.060348892032061e-06, "epoch": 0.2828854314002829, "percentage": 1.41, "elapsed_time": "0:02:06", "remaining_time": "2:26:28", "throughput": 4753.28, "total_tokens": 599360} +{"current_steps": 605, "total_steps": 42420, "loss": 0.3951, "lr": 7.11928335690712e-06, "epoch": 0.28524280999528523, "percentage": 1.43, "elapsed_time": "0:02:07", "remaining_time": "2:26:28", "throughput": 4752.7, "total_tokens": 604352} +{"current_steps": 610, "total_steps": 42420, "loss": 0.3012, "lr": 7.178217821782178e-06, "epoch": 0.2876001885902876, "percentage": 1.44, "elapsed_time": "0:02:08", "remaining_time": "2:26:32", "throughput": 4754.95, "total_tokens": 609952} +{"current_steps": 615, "total_steps": 42420, "loss": 0.2812, "lr": 7.237152286657238e-06, "epoch": 0.28995756718529, "percentage": 1.45, "elapsed_time": "0:02:09", "remaining_time": "2:26:27", "throughput": 4752.89, "total_tokens": 614400} +{"current_steps": 620, "total_steps": 42420, "loss": 0.5168, "lr": 7.296086751532297e-06, "epoch": 0.2923149457802923, "percentage": 1.46, "elapsed_time": "0:02:10", "remaining_time": "2:26:27", "throughput": 4753.46, "total_tokens": 619552} +{"current_steps": 625, "total_steps": 42420, "loss": 0.4996, "lr": 7.355021216407356e-06, "epoch": 0.29467232437529467, "percentage": 1.47, "elapsed_time": "0:02:11", "remaining_time": "2:26:29", "throughput": 4754.6, "total_tokens": 624960} +{"current_steps": 630, "total_steps": 42420, "loss": 0.5305, "lr": 7.413955681282414e-06, "epoch": 0.297029702970297, "percentage": 1.49, "elapsed_time": "0:02:12", "remaining_time": "2:26:46", "throughput": 4756.8, "total_tokens": 631520} +{"current_steps": 635, "total_steps": 42420, "loss": 0.5969, "lr": 7.472890146157472e-06, "epoch": 0.29938708156529936, "percentage": 1.5, "elapsed_time": "0:02:13", "remaining_time": "2:26:57", "throughput": 4758.14, "total_tokens": 637568} +{"current_steps": 640, "total_steps": 42420, "loss": 0.3699, "lr": 7.531824611032533e-06, "epoch": 0.30174446016030176, "percentage": 1.51, "elapsed_time": "0:02:15", "remaining_time": "2:26:58", "throughput": 4757.54, "total_tokens": 642688} +{"current_steps": 645, "total_steps": 42420, "loss": 0.2283, "lr": 7.590759075907592e-06, "epoch": 0.3041018387553041, "percentage": 1.52, "elapsed_time": "0:02:16", "remaining_time": "2:26:52", "throughput": 4757.98, "total_tokens": 647392} +{"current_steps": 650, "total_steps": 42420, "loss": 0.4409, "lr": 7.64969354078265e-06, "epoch": 0.30645921735030646, "percentage": 1.53, "elapsed_time": "0:02:17", "remaining_time": "2:26:55", "throughput": 4758.68, "total_tokens": 652800} +{"current_steps": 655, "total_steps": 42420, "loss": 0.3304, "lr": 7.708628005657708e-06, "epoch": 0.3088165959453088, "percentage": 1.54, "elapsed_time": "0:02:18", "remaining_time": "2:27:11", "throughput": 4760.13, "total_tokens": 659328} +{"current_steps": 660, "total_steps": 42420, "loss": 0.2569, "lr": 7.767562470532768e-06, "epoch": 0.31117397454031115, "percentage": 1.56, "elapsed_time": "0:02:19", "remaining_time": "2:27:00", "throughput": 4759.31, "total_tokens": 663488} +{"current_steps": 665, "total_steps": 42420, "loss": 0.1859, "lr": 7.826496935407828e-06, "epoch": 0.31353135313531355, "percentage": 1.57, "elapsed_time": "0:02:20", "remaining_time": "2:26:55", "throughput": 4759.42, "total_tokens": 668192} +{"current_steps": 670, "total_steps": 42420, "loss": 0.7881, "lr": 7.885431400282886e-06, "epoch": 0.3158887317303159, "percentage": 1.58, "elapsed_time": "0:02:21", "remaining_time": "2:26:58", "throughput": 4759.59, "total_tokens": 673568} +{"current_steps": 675, "total_steps": 42420, "loss": 0.3943, "lr": 7.944365865157945e-06, "epoch": 0.31824611032531824, "percentage": 1.59, "elapsed_time": "0:02:23", "remaining_time": "2:27:26", "throughput": 4761.23, "total_tokens": 681056} +{"current_steps": 680, "total_steps": 42420, "loss": 0.3098, "lr": 8.003300330033003e-06, "epoch": 0.3206034889203206, "percentage": 1.6, "elapsed_time": "0:02:24", "remaining_time": "2:27:25", "throughput": 4759.77, "total_tokens": 685920} +{"current_steps": 685, "total_steps": 42420, "loss": 0.3183, "lr": 8.062234794908063e-06, "epoch": 0.32296086751532294, "percentage": 1.61, "elapsed_time": "0:02:25", "remaining_time": "2:27:21", "throughput": 4760.24, "total_tokens": 690816} +{"current_steps": 690, "total_steps": 42420, "loss": 0.2954, "lr": 8.121169259783122e-06, "epoch": 0.32531824611032534, "percentage": 1.63, "elapsed_time": "0:02:26", "remaining_time": "2:27:17", "throughput": 4760.83, "total_tokens": 695648} +{"current_steps": 695, "total_steps": 42420, "loss": 0.0966, "lr": 8.18010372465818e-06, "epoch": 0.3276756247053277, "percentage": 1.64, "elapsed_time": "0:02:27", "remaining_time": "2:27:18", "throughput": 4761.17, "total_tokens": 700960} +{"current_steps": 700, "total_steps": 42420, "loss": 0.119, "lr": 8.23903818953324e-06, "epoch": 0.33003300330033003, "percentage": 1.65, "elapsed_time": "0:02:28", "remaining_time": "2:27:14", "throughput": 4761.92, "total_tokens": 705856} +{"current_steps": 705, "total_steps": 42420, "loss": 0.2717, "lr": 8.297972654408298e-06, "epoch": 0.3323903818953324, "percentage": 1.66, "elapsed_time": "0:02:29", "remaining_time": "2:27:05", "throughput": 4761.8, "total_tokens": 710272} +{"current_steps": 710, "total_steps": 42420, "loss": 0.4655, "lr": 8.356907119283356e-06, "epoch": 0.3347477604903347, "percentage": 1.67, "elapsed_time": "0:02:30", "remaining_time": "2:26:59", "throughput": 4761.43, "total_tokens": 714816} +{"current_steps": 715, "total_steps": 42420, "loss": 1.053, "lr": 8.415841584158417e-06, "epoch": 0.3371051390853371, "percentage": 1.69, "elapsed_time": "0:02:31", "remaining_time": "2:27:07", "throughput": 4762.26, "total_tokens": 720736} +{"current_steps": 720, "total_steps": 42420, "loss": 0.2614, "lr": 8.474776049033475e-06, "epoch": 0.33946251768033947, "percentage": 1.7, "elapsed_time": "0:02:32", "remaining_time": "2:27:17", "throughput": 4763.18, "total_tokens": 726848} +{"current_steps": 725, "total_steps": 42420, "loss": 0.6954, "lr": 8.533710513908535e-06, "epoch": 0.3418198962753418, "percentage": 1.71, "elapsed_time": "0:02:33", "remaining_time": "2:27:21", "throughput": 4764.57, "total_tokens": 732512} +{"current_steps": 730, "total_steps": 42420, "loss": 0.3064, "lr": 8.592644978783593e-06, "epoch": 0.34417727487034416, "percentage": 1.72, "elapsed_time": "0:02:34", "remaining_time": "2:27:23", "throughput": 4765.38, "total_tokens": 737952} +{"current_steps": 735, "total_steps": 42420, "loss": 0.2503, "lr": 8.65157944365865e-06, "epoch": 0.3465346534653465, "percentage": 1.73, "elapsed_time": "0:02:35", "remaining_time": "2:27:18", "throughput": 4765.93, "total_tokens": 742752} +{"current_steps": 740, "total_steps": 42420, "loss": 0.9087, "lr": 8.710513908533712e-06, "epoch": 0.3488920320603489, "percentage": 1.74, "elapsed_time": "0:02:36", "remaining_time": "2:27:16", "throughput": 4767.32, "total_tokens": 747968} +{"current_steps": 745, "total_steps": 42420, "loss": 0.4773, "lr": 8.76944837340877e-06, "epoch": 0.35124941065535126, "percentage": 1.76, "elapsed_time": "0:02:38", "remaining_time": "2:27:20", "throughput": 4768.47, "total_tokens": 753632} +{"current_steps": 750, "total_steps": 42420, "loss": 0.3417, "lr": 8.82838283828383e-06, "epoch": 0.3536067892503536, "percentage": 1.77, "elapsed_time": "0:02:39", "remaining_time": "2:27:31", "throughput": 4769.7, "total_tokens": 759904} +{"current_steps": 755, "total_steps": 42420, "loss": 0.8336, "lr": 8.887317303158887e-06, "epoch": 0.35596416784535595, "percentage": 1.78, "elapsed_time": "0:02:40", "remaining_time": "2:27:29", "throughput": 4770.03, "total_tokens": 764960} +{"current_steps": 760, "total_steps": 42420, "loss": 0.3557, "lr": 8.946251768033945e-06, "epoch": 0.3583215464403583, "percentage": 1.79, "elapsed_time": "0:02:41", "remaining_time": "2:27:20", "throughput": 4769.6, "total_tokens": 769184} +{"current_steps": 765, "total_steps": 42420, "loss": 0.3932, "lr": 9.005186232909007e-06, "epoch": 0.3606789250353607, "percentage": 1.8, "elapsed_time": "0:02:42", "remaining_time": "2:27:26", "throughput": 4770.93, "total_tokens": 775104} +{"current_steps": 770, "total_steps": 42420, "loss": 0.2737, "lr": 9.064120697784065e-06, "epoch": 0.36303630363036304, "percentage": 1.82, "elapsed_time": "0:02:43", "remaining_time": "2:27:20", "throughput": 4770.79, "total_tokens": 779744} +{"current_steps": 775, "total_steps": 42420, "loss": 0.1985, "lr": 9.123055162659123e-06, "epoch": 0.3653936822253654, "percentage": 1.83, "elapsed_time": "0:02:44", "remaining_time": "2:27:18", "throughput": 4771.35, "total_tokens": 784832} +{"current_steps": 780, "total_steps": 42420, "loss": 0.2983, "lr": 9.181989627534182e-06, "epoch": 0.36775106082036774, "percentage": 1.84, "elapsed_time": "0:02:45", "remaining_time": "2:27:17", "throughput": 4771.54, "total_tokens": 789920} +{"current_steps": 785, "total_steps": 42420, "loss": 0.1321, "lr": 9.24092409240924e-06, "epoch": 0.3701084394153701, "percentage": 1.85, "elapsed_time": "0:02:46", "remaining_time": "2:27:19", "throughput": 4770.85, "total_tokens": 795168} +{"current_steps": 790, "total_steps": 42420, "loss": 0.2687, "lr": 9.299858557284302e-06, "epoch": 0.3724658180103725, "percentage": 1.86, "elapsed_time": "0:02:47", "remaining_time": "2:27:17", "throughput": 4770.97, "total_tokens": 800096} +{"current_steps": 795, "total_steps": 42420, "loss": 0.4568, "lr": 9.35879302215936e-06, "epoch": 0.37482319660537483, "percentage": 1.87, "elapsed_time": "0:02:48", "remaining_time": "2:27:20", "throughput": 4771.57, "total_tokens": 805632} +{"current_steps": 800, "total_steps": 42420, "loss": 0.4557, "lr": 9.417727487034417e-06, "epoch": 0.3771805752003772, "percentage": 1.89, "elapsed_time": "0:02:49", "remaining_time": "2:27:18", "throughput": 4771.67, "total_tokens": 810688} +{"current_steps": 805, "total_steps": 42420, "loss": 0.4632, "lr": 9.476661951909477e-06, "epoch": 0.3795379537953795, "percentage": 1.9, "elapsed_time": "0:02:50", "remaining_time": "2:27:13", "throughput": 4771.14, "total_tokens": 815264} +{"current_steps": 810, "total_steps": 42420, "loss": 0.3664, "lr": 9.535596416784535e-06, "epoch": 0.38189533239038187, "percentage": 1.91, "elapsed_time": "0:02:51", "remaining_time": "2:27:13", "throughput": 4770.75, "total_tokens": 820320} +{"current_steps": 815, "total_steps": 42420, "loss": 0.5474, "lr": 9.594530881659596e-06, "epoch": 0.38425271098538427, "percentage": 1.92, "elapsed_time": "0:02:52", "remaining_time": "2:27:01", "throughput": 4768.58, "total_tokens": 824064} +{"current_steps": 820, "total_steps": 42420, "loss": 0.1674, "lr": 9.653465346534654e-06, "epoch": 0.3866100895803866, "percentage": 1.93, "elapsed_time": "0:02:53", "remaining_time": "2:26:59", "throughput": 4769.16, "total_tokens": 829120} +{"current_steps": 825, "total_steps": 42420, "loss": 0.4126, "lr": 9.712399811409712e-06, "epoch": 0.38896746817538896, "percentage": 1.94, "elapsed_time": "0:02:55", "remaining_time": "2:27:04", "throughput": 4769.89, "total_tokens": 834816} +{"current_steps": 830, "total_steps": 42420, "loss": 0.2481, "lr": 9.771334276284772e-06, "epoch": 0.3913248467703913, "percentage": 1.96, "elapsed_time": "0:02:56", "remaining_time": "2:27:05", "throughput": 4770.55, "total_tokens": 840256} +{"current_steps": 835, "total_steps": 42420, "loss": 0.1093, "lr": 9.83026874115983e-06, "epoch": 0.39368222536539366, "percentage": 1.97, "elapsed_time": "0:02:57", "remaining_time": "2:26:59", "throughput": 4770.68, "total_tokens": 844864} +{"current_steps": 840, "total_steps": 42420, "loss": 0.2687, "lr": 9.88920320603489e-06, "epoch": 0.39603960396039606, "percentage": 1.98, "elapsed_time": "0:02:58", "remaining_time": "2:27:01", "throughput": 4771.3, "total_tokens": 850304} +{"current_steps": 845, "total_steps": 42420, "loss": 0.085, "lr": 9.948137670909949e-06, "epoch": 0.3983969825553984, "percentage": 1.99, "elapsed_time": "0:02:59", "remaining_time": "2:26:58", "throughput": 4771.42, "total_tokens": 855232} +{"current_steps": 850, "total_steps": 42420, "loss": 0.4871, "lr": 1.0007072135785007e-05, "epoch": 0.40075436115040075, "percentage": 2.0, "elapsed_time": "0:03:00", "remaining_time": "2:26:55", "throughput": 4771.9, "total_tokens": 860128} +{"current_steps": 855, "total_steps": 42420, "loss": 0.4733, "lr": 1.0066006600660067e-05, "epoch": 0.4031117397454031, "percentage": 2.02, "elapsed_time": "0:03:01", "remaining_time": "2:26:49", "throughput": 4772.57, "total_tokens": 864896} +{"current_steps": 860, "total_steps": 42420, "loss": 0.4257, "lr": 1.0124941065535125e-05, "epoch": 0.40546911834040544, "percentage": 2.03, "elapsed_time": "0:03:02", "remaining_time": "2:26:43", "throughput": 4772.61, "total_tokens": 869408} +{"current_steps": 865, "total_steps": 42420, "loss": 0.2301, "lr": 1.0183875530410184e-05, "epoch": 0.40782649693540785, "percentage": 2.04, "elapsed_time": "0:03:03", "remaining_time": "2:26:39", "throughput": 4772.49, "total_tokens": 874144} +{"current_steps": 870, "total_steps": 42420, "loss": 0.5145, "lr": 1.0242809995285244e-05, "epoch": 0.4101838755304102, "percentage": 2.05, "elapsed_time": "0:03:04", "remaining_time": "2:26:36", "throughput": 4772.28, "total_tokens": 879040} +{"current_steps": 875, "total_steps": 42420, "loss": 0.3263, "lr": 1.0301744460160302e-05, "epoch": 0.41254125412541254, "percentage": 2.06, "elapsed_time": "0:03:05", "remaining_time": "2:26:32", "throughput": 4771.94, "total_tokens": 883712} +{"current_steps": 880, "total_steps": 42420, "loss": 0.3429, "lr": 1.0360678925035361e-05, "epoch": 0.4148986327204149, "percentage": 2.07, "elapsed_time": "0:03:06", "remaining_time": "2:26:26", "throughput": 4771.39, "total_tokens": 888128} +{"current_steps": 885, "total_steps": 42420, "loss": 0.1917, "lr": 1.041961338991042e-05, "epoch": 0.41725601131541723, "percentage": 2.09, "elapsed_time": "0:03:07", "remaining_time": "2:26:34", "throughput": 4772.04, "total_tokens": 894240} +{"current_steps": 890, "total_steps": 42420, "loss": 0.1971, "lr": 1.0478547854785479e-05, "epoch": 0.41961338991041963, "percentage": 2.1, "elapsed_time": "0:03:08", "remaining_time": "2:26:33", "throughput": 4772.95, "total_tokens": 899456} +{"current_steps": 895, "total_steps": 42420, "loss": 0.1898, "lr": 1.0537482319660539e-05, "epoch": 0.421970768505422, "percentage": 2.11, "elapsed_time": "0:03:09", "remaining_time": "2:26:34", "throughput": 4772.96, "total_tokens": 904672} +{"current_steps": 900, "total_steps": 42420, "loss": 0.2383, "lr": 1.0596416784535597e-05, "epoch": 0.4243281471004243, "percentage": 2.12, "elapsed_time": "0:03:11", "remaining_time": "2:27:05", "throughput": 4773.55, "total_tokens": 913216} +{"current_steps": 905, "total_steps": 42420, "loss": 0.4947, "lr": 1.0655351249410656e-05, "epoch": 0.42668552569542667, "percentage": 2.13, "elapsed_time": "0:03:12", "remaining_time": "2:26:58", "throughput": 4773.38, "total_tokens": 917664} +{"current_steps": 910, "total_steps": 42420, "loss": 0.2127, "lr": 1.0714285714285714e-05, "epoch": 0.429042904290429, "percentage": 2.15, "elapsed_time": "0:03:13", "remaining_time": "2:27:02", "throughput": 4774.22, "total_tokens": 923360} +{"current_steps": 915, "total_steps": 42420, "loss": 0.1959, "lr": 1.0773220179160774e-05, "epoch": 0.4314002828854314, "percentage": 2.16, "elapsed_time": "0:03:14", "remaining_time": "2:27:02", "throughput": 4774.2, "total_tokens": 928608} +{"current_steps": 920, "total_steps": 42420, "loss": 0.15, "lr": 1.0832154644035833e-05, "epoch": 0.43375766148043376, "percentage": 2.17, "elapsed_time": "0:03:15", "remaining_time": "2:26:58", "throughput": 4774.22, "total_tokens": 933312} +{"current_steps": 925, "total_steps": 42420, "loss": 0.3495, "lr": 1.0891089108910891e-05, "epoch": 0.4361150400754361, "percentage": 2.18, "elapsed_time": "0:03:16", "remaining_time": "2:26:51", "throughput": 4774.25, "total_tokens": 937824} +{"current_steps": 930, "total_steps": 42420, "loss": 0.4271, "lr": 1.0950023573785951e-05, "epoch": 0.43847241867043846, "percentage": 2.19, "elapsed_time": "0:03:17", "remaining_time": "2:26:44", "throughput": 4773.15, "total_tokens": 942016} +{"current_steps": 935, "total_steps": 42420, "loss": 0.3949, "lr": 1.1008958038661009e-05, "epoch": 0.4408297972654408, "percentage": 2.2, "elapsed_time": "0:03:18", "remaining_time": "2:26:36", "throughput": 4772.24, "total_tokens": 946080} +{"current_steps": 940, "total_steps": 42420, "loss": 0.0851, "lr": 1.1067892503536068e-05, "epoch": 0.4431871758604432, "percentage": 2.22, "elapsed_time": "0:03:19", "remaining_time": "2:26:34", "throughput": 4772.61, "total_tokens": 951168} +{"current_steps": 945, "total_steps": 42420, "loss": 0.2705, "lr": 1.1126826968411128e-05, "epoch": 0.44554455445544555, "percentage": 2.23, "elapsed_time": "0:03:20", "remaining_time": "2:26:39", "throughput": 4773.16, "total_tokens": 956992} +{"current_steps": 950, "total_steps": 42420, "loss": 0.1497, "lr": 1.1185761433286186e-05, "epoch": 0.4479019330504479, "percentage": 2.24, "elapsed_time": "0:03:21", "remaining_time": "2:26:31", "throughput": 4772.72, "total_tokens": 961248} +{"current_steps": 955, "total_steps": 42420, "loss": 0.3383, "lr": 1.1244695898161246e-05, "epoch": 0.45025931164545024, "percentage": 2.25, "elapsed_time": "0:03:22", "remaining_time": "2:26:22", "throughput": 4772.5, "total_tokens": 965376} +{"current_steps": 960, "total_steps": 42420, "loss": 0.4261, "lr": 1.1303630363036304e-05, "epoch": 0.4526166902404526, "percentage": 2.26, "elapsed_time": "0:03:23", "remaining_time": "2:26:16", "throughput": 4771.67, "total_tokens": 969664} +{"current_steps": 965, "total_steps": 42420, "loss": 0.1665, "lr": 1.1362564827911363e-05, "epoch": 0.454974068835455, "percentage": 2.27, "elapsed_time": "0:03:24", "remaining_time": "2:26:10", "throughput": 4770.99, "total_tokens": 974016} +{"current_steps": 970, "total_steps": 42420, "loss": 0.5085, "lr": 1.1421499292786423e-05, "epoch": 0.45733144743045734, "percentage": 2.29, "elapsed_time": "0:03:25", "remaining_time": "2:26:08", "throughput": 4771.62, "total_tokens": 979072} +{"current_steps": 975, "total_steps": 42420, "loss": 0.2772, "lr": 1.148043375766148e-05, "epoch": 0.4596888260254597, "percentage": 2.3, "elapsed_time": "0:03:26", "remaining_time": "2:26:07", "throughput": 4771.07, "total_tokens": 984032} +{"current_steps": 980, "total_steps": 42420, "loss": 0.0745, "lr": 1.1539368222536539e-05, "epoch": 0.46204620462046203, "percentage": 2.31, "elapsed_time": "0:03:27", "remaining_time": "2:26:01", "throughput": 4771.04, "total_tokens": 988544} +{"current_steps": 985, "total_steps": 42420, "loss": 0.3092, "lr": 1.1598302687411598e-05, "epoch": 0.4644035832154644, "percentage": 2.32, "elapsed_time": "0:03:28", "remaining_time": "2:25:59", "throughput": 4771.37, "total_tokens": 993504} +{"current_steps": 990, "total_steps": 42420, "loss": 0.246, "lr": 1.1657237152286658e-05, "epoch": 0.4667609618104668, "percentage": 2.33, "elapsed_time": "0:03:29", "remaining_time": "2:26:01", "throughput": 4772.27, "total_tokens": 999104} +{"current_steps": 995, "total_steps": 42420, "loss": 0.2047, "lr": 1.1716171617161718e-05, "epoch": 0.4691183404054691, "percentage": 2.35, "elapsed_time": "0:03:30", "remaining_time": "2:26:05", "throughput": 4772.6, "total_tokens": 1004864} +{"current_steps": 1000, "total_steps": 42420, "loss": 0.1107, "lr": 1.1775106082036776e-05, "epoch": 0.47147571900047147, "percentage": 2.36, "elapsed_time": "0:03:31", "remaining_time": "2:26:01", "throughput": 4772.37, "total_tokens": 1009472} +{"current_steps": 1005, "total_steps": 42420, "loss": 0.1808, "lr": 1.1834040546911834e-05, "epoch": 0.4738330975954738, "percentage": 2.37, "elapsed_time": "0:03:32", "remaining_time": "2:25:52", "throughput": 4771.93, "total_tokens": 1013536} +{"current_steps": 1010, "total_steps": 42420, "loss": 0.1219, "lr": 1.1892975011786893e-05, "epoch": 0.47619047619047616, "percentage": 2.38, "elapsed_time": "0:03:33", "remaining_time": "2:25:47", "throughput": 4772.04, "total_tokens": 1018144} +{"current_steps": 1015, "total_steps": 42420, "loss": 0.4522, "lr": 1.1951909476661953e-05, "epoch": 0.47854785478547857, "percentage": 2.39, "elapsed_time": "0:03:34", "remaining_time": "2:25:44", "throughput": 4771.49, "total_tokens": 1022848} +{"current_steps": 1020, "total_steps": 42420, "loss": 0.2205, "lr": 1.2010843941537012e-05, "epoch": 0.4809052333804809, "percentage": 2.4, "elapsed_time": "0:03:35", "remaining_time": "2:25:40", "throughput": 4770.86, "total_tokens": 1027424} +{"current_steps": 1025, "total_steps": 42420, "loss": 0.3865, "lr": 1.206977840641207e-05, "epoch": 0.48326261197548326, "percentage": 2.42, "elapsed_time": "0:03:36", "remaining_time": "2:25:41", "throughput": 4771.45, "total_tokens": 1032768} +{"current_steps": 1030, "total_steps": 42420, "loss": 0.0864, "lr": 1.2128712871287128e-05, "epoch": 0.4856199905704856, "percentage": 2.43, "elapsed_time": "0:03:37", "remaining_time": "2:25:37", "throughput": 4771.33, "total_tokens": 1037440} +{"current_steps": 1035, "total_steps": 42420, "loss": 0.1539, "lr": 1.2187647336162188e-05, "epoch": 0.48797736916548795, "percentage": 2.44, "elapsed_time": "0:03:38", "remaining_time": "2:25:35", "throughput": 4771.61, "total_tokens": 1042400} +{"current_steps": 1040, "total_steps": 42420, "loss": 0.2106, "lr": 1.2246581801037248e-05, "epoch": 0.49033474776049035, "percentage": 2.45, "elapsed_time": "0:03:39", "remaining_time": "2:25:38", "throughput": 4772.16, "total_tokens": 1048032} +{"current_steps": 1045, "total_steps": 42420, "loss": 0.1902, "lr": 1.2305516265912306e-05, "epoch": 0.4926921263554927, "percentage": 2.46, "elapsed_time": "0:03:40", "remaining_time": "2:25:38", "throughput": 4772.71, "total_tokens": 1053408} +{"current_steps": 1050, "total_steps": 42420, "loss": 0.2207, "lr": 1.2364450730787365e-05, "epoch": 0.49504950495049505, "percentage": 2.48, "elapsed_time": "0:03:41", "remaining_time": "2:25:41", "throughput": 4773.42, "total_tokens": 1059008} +{"current_steps": 1055, "total_steps": 42420, "loss": 0.2487, "lr": 1.2423385195662423e-05, "epoch": 0.4974068835454974, "percentage": 2.49, "elapsed_time": "0:03:42", "remaining_time": "2:25:36", "throughput": 4773.57, "total_tokens": 1063712} +{"current_steps": 1060, "total_steps": 42420, "loss": 0.833, "lr": 1.2482319660537483e-05, "epoch": 0.49976426214049974, "percentage": 2.5, "elapsed_time": "0:03:43", "remaining_time": "2:25:33", "throughput": 4772.93, "total_tokens": 1068320} +{"current_steps": 1065, "total_steps": 42420, "loss": 0.1598, "lr": 1.254125412541254e-05, "epoch": 0.5021216407355021, "percentage": 2.51, "elapsed_time": "0:03:44", "remaining_time": "2:25:32", "throughput": 4772.69, "total_tokens": 1073280} +{"current_steps": 1070, "total_steps": 42420, "loss": 0.2317, "lr": 1.26001885902876e-05, "epoch": 0.5044790193305044, "percentage": 2.52, "elapsed_time": "0:03:45", "remaining_time": "2:25:23", "throughput": 4771.78, "total_tokens": 1077184} +{"current_steps": 1075, "total_steps": 42420, "loss": 0.3032, "lr": 1.2659123055162662e-05, "epoch": 0.5068363979255068, "percentage": 2.53, "elapsed_time": "0:03:46", "remaining_time": "2:25:19", "throughput": 4770.87, "total_tokens": 1081664} +{"current_steps": 1080, "total_steps": 42420, "loss": 0.2338, "lr": 1.2718057520037718e-05, "epoch": 0.5091937765205092, "percentage": 2.55, "elapsed_time": "0:03:47", "remaining_time": "2:25:15", "throughput": 4770.81, "total_tokens": 1086304} +{"current_steps": 1085, "total_steps": 42420, "loss": 0.2001, "lr": 1.277699198491278e-05, "epoch": 0.5115511551155115, "percentage": 2.56, "elapsed_time": "0:03:48", "remaining_time": "2:25:16", "throughput": 4771.01, "total_tokens": 1091616} +{"current_steps": 1090, "total_steps": 42420, "loss": 0.1441, "lr": 1.2835926449787835e-05, "epoch": 0.5139085337105139, "percentage": 2.57, "elapsed_time": "0:03:49", "remaining_time": "2:25:08", "throughput": 4770.16, "total_tokens": 1095616} +{"current_steps": 1095, "total_steps": 42420, "loss": 0.5524, "lr": 1.2894860914662895e-05, "epoch": 0.5162659123055162, "percentage": 2.58, "elapsed_time": "0:03:50", "remaining_time": "2:25:14", "throughput": 4770.82, "total_tokens": 1101632} +{"current_steps": 1100, "total_steps": 42420, "loss": 0.2054, "lr": 1.2953795379537956e-05, "epoch": 0.5186232909005186, "percentage": 2.59, "elapsed_time": "0:03:51", "remaining_time": "2:25:08", "throughput": 4770.78, "total_tokens": 1106080} +{"current_steps": 1105, "total_steps": 42420, "loss": 0.1222, "lr": 1.3012729844413013e-05, "epoch": 0.520980669495521, "percentage": 2.6, "elapsed_time": "0:03:52", "remaining_time": "2:25:10", "throughput": 4770.85, "total_tokens": 1111456} +{"current_steps": 1110, "total_steps": 42420, "loss": 0.1466, "lr": 1.3071664309288072e-05, "epoch": 0.5233380480905233, "percentage": 2.62, "elapsed_time": "0:03:54", "remaining_time": "2:25:16", "throughput": 4771.82, "total_tokens": 1117632} +{"current_steps": 1115, "total_steps": 42420, "loss": 0.1511, "lr": 1.313059877416313e-05, "epoch": 0.5256954266855257, "percentage": 2.63, "elapsed_time": "0:03:55", "remaining_time": "2:25:14", "throughput": 4771.8, "total_tokens": 1122496} +{"current_steps": 1120, "total_steps": 42420, "loss": 0.1124, "lr": 1.318953323903819e-05, "epoch": 0.528052805280528, "percentage": 2.64, "elapsed_time": "0:03:56", "remaining_time": "2:25:06", "throughput": 4769.6, "total_tokens": 1126176} +{"current_steps": 1125, "total_steps": 42420, "loss": 0.0938, "lr": 1.3248467703913251e-05, "epoch": 0.5304101838755304, "percentage": 2.65, "elapsed_time": "0:03:57", "remaining_time": "2:25:18", "throughput": 4770.06, "total_tokens": 1133024} +{"current_steps": 1130, "total_steps": 42420, "loss": 0.0714, "lr": 1.3307402168788307e-05, "epoch": 0.5327675624705328, "percentage": 2.66, "elapsed_time": "0:03:58", "remaining_time": "2:25:17", "throughput": 4770.39, "total_tokens": 1138112} +{"current_steps": 1135, "total_steps": 42420, "loss": 0.0988, "lr": 1.3366336633663367e-05, "epoch": 0.5351249410655351, "percentage": 2.68, "elapsed_time": "0:03:59", "remaining_time": "2:25:22", "throughput": 4771.14, "total_tokens": 1144064} +{"current_steps": 1140, "total_steps": 42420, "loss": 0.1838, "lr": 1.3425271098538425e-05, "epoch": 0.5374823196605375, "percentage": 2.69, "elapsed_time": "0:04:00", "remaining_time": "2:25:20", "throughput": 4771.05, "total_tokens": 1148992} +{"current_steps": 1145, "total_steps": 42420, "loss": 0.1091, "lr": 1.3484205563413485e-05, "epoch": 0.5398396982555398, "percentage": 2.7, "elapsed_time": "0:04:01", "remaining_time": "2:25:23", "throughput": 4771.24, "total_tokens": 1154592} +{"current_steps": 1150, "total_steps": 42420, "loss": 0.1669, "lr": 1.3543140028288546e-05, "epoch": 0.5421970768505422, "percentage": 2.71, "elapsed_time": "0:04:03", "remaining_time": "2:25:23", "throughput": 4771.63, "total_tokens": 1159936} +{"current_steps": 1155, "total_steps": 42420, "loss": 0.2084, "lr": 1.3602074493163602e-05, "epoch": 0.5445544554455446, "percentage": 2.72, "elapsed_time": "0:04:04", "remaining_time": "2:25:19", "throughput": 4771.47, "total_tokens": 1164448} +{"current_steps": 1160, "total_steps": 42420, "loss": 0.1912, "lr": 1.3661008958038662e-05, "epoch": 0.5469118340405469, "percentage": 2.73, "elapsed_time": "0:04:04", "remaining_time": "2:25:11", "throughput": 4771.26, "total_tokens": 1168544} +{"current_steps": 1165, "total_steps": 42420, "loss": 0.1213, "lr": 1.371994342291372e-05, "epoch": 0.5492692126355493, "percentage": 2.75, "elapsed_time": "0:04:05", "remaining_time": "2:25:05", "throughput": 4771.42, "total_tokens": 1172960} +{"current_steps": 1170, "total_steps": 42420, "loss": 0.1908, "lr": 1.377887788778878e-05, "epoch": 0.5516265912305516, "percentage": 2.76, "elapsed_time": "0:04:06", "remaining_time": "2:25:05", "throughput": 4771.54, "total_tokens": 1178144} +{"current_steps": 1175, "total_steps": 42420, "loss": 0.3895, "lr": 1.3837812352663839e-05, "epoch": 0.553983969825554, "percentage": 2.77, "elapsed_time": "0:04:08", "remaining_time": "2:25:11", "throughput": 4772.65, "total_tokens": 1184448} +{"current_steps": 1180, "total_steps": 42420, "loss": 0.3661, "lr": 1.3896746817538897e-05, "epoch": 0.5563413484205564, "percentage": 2.78, "elapsed_time": "0:04:09", "remaining_time": "2:25:09", "throughput": 4772.8, "total_tokens": 1189472} +{"current_steps": 1185, "total_steps": 42420, "loss": 0.1873, "lr": 1.3955681282413957e-05, "epoch": 0.5586987270155587, "percentage": 2.79, "elapsed_time": "0:04:10", "remaining_time": "2:25:11", "throughput": 4773.44, "total_tokens": 1195072} +{"current_steps": 1190, "total_steps": 42420, "loss": 0.334, "lr": 1.4014615747289015e-05, "epoch": 0.5610561056105611, "percentage": 2.81, "elapsed_time": "0:04:11", "remaining_time": "2:25:03", "throughput": 4771.71, "total_tokens": 1198656} +{"current_steps": 1195, "total_steps": 42420, "loss": 0.1862, "lr": 1.4073550212164074e-05, "epoch": 0.5634134842055634, "percentage": 2.82, "elapsed_time": "0:04:12", "remaining_time": "2:25:01", "throughput": 4771.72, "total_tokens": 1203616} +{"current_steps": 1200, "total_steps": 42420, "loss": 0.2472, "lr": 1.4132484677039134e-05, "epoch": 0.5657708628005658, "percentage": 2.83, "elapsed_time": "0:04:13", "remaining_time": "2:24:58", "throughput": 4771.19, "total_tokens": 1208224} +{"current_steps": 1205, "total_steps": 42420, "loss": 0.3519, "lr": 1.4191419141914192e-05, "epoch": 0.5681282413955682, "percentage": 2.84, "elapsed_time": "0:04:14", "remaining_time": "2:24:53", "throughput": 4771.02, "total_tokens": 1212608} +{"current_steps": 1210, "total_steps": 42420, "loss": 0.1084, "lr": 1.4250353606789251e-05, "epoch": 0.5704856199905705, "percentage": 2.85, "elapsed_time": "0:04:15", "remaining_time": "2:24:56", "throughput": 4771.27, "total_tokens": 1218336} +{"current_steps": 1215, "total_steps": 42420, "loss": 0.2362, "lr": 1.430928807166431e-05, "epoch": 0.5728429985855729, "percentage": 2.86, "elapsed_time": "0:04:16", "remaining_time": "2:24:54", "throughput": 4771.83, "total_tokens": 1223296} +{"current_steps": 1220, "total_steps": 42420, "loss": 0.2409, "lr": 1.4368222536539369e-05, "epoch": 0.5752003771805752, "percentage": 2.88, "elapsed_time": "0:04:17", "remaining_time": "2:24:52", "throughput": 4772.26, "total_tokens": 1228448} +{"current_steps": 1225, "total_steps": 42420, "loss": 0.5625, "lr": 1.4427157001414429e-05, "epoch": 0.5775577557755776, "percentage": 2.89, "elapsed_time": "0:04:18", "remaining_time": "2:24:48", "throughput": 4772.16, "total_tokens": 1233024} +{"current_steps": 1230, "total_steps": 42420, "loss": 0.4444, "lr": 1.4486091466289486e-05, "epoch": 0.57991513437058, "percentage": 2.9, "elapsed_time": "0:04:19", "remaining_time": "2:24:47", "throughput": 4772.25, "total_tokens": 1238080} +{"current_steps": 1235, "total_steps": 42420, "loss": 0.1018, "lr": 1.4545025931164546e-05, "epoch": 0.5822725129655822, "percentage": 2.91, "elapsed_time": "0:04:20", "remaining_time": "2:24:47", "throughput": 4772.78, "total_tokens": 1243296} +{"current_steps": 1240, "total_steps": 42420, "loss": 0.3606, "lr": 1.4603960396039604e-05, "epoch": 0.5846298915605846, "percentage": 2.92, "elapsed_time": "0:04:21", "remaining_time": "2:24:44", "throughput": 4773.04, "total_tokens": 1248128} +{"current_steps": 1245, "total_steps": 42420, "loss": 0.0974, "lr": 1.4662894860914664e-05, "epoch": 0.5869872701555869, "percentage": 2.93, "elapsed_time": "0:04:22", "remaining_time": "2:24:42", "throughput": 4772.42, "total_tokens": 1252864} +{"current_steps": 1250, "total_steps": 42420, "loss": 0.4528, "lr": 1.4721829325789723e-05, "epoch": 0.5893446487505893, "percentage": 2.95, "elapsed_time": "0:04:23", "remaining_time": "2:24:40", "throughput": 4772.66, "total_tokens": 1257856} +{"current_steps": 1255, "total_steps": 42420, "loss": 0.2641, "lr": 1.4780763790664781e-05, "epoch": 0.5917020273455917, "percentage": 2.96, "elapsed_time": "0:04:24", "remaining_time": "2:24:36", "throughput": 4771.88, "total_tokens": 1262240} +{"current_steps": 1260, "total_steps": 42420, "loss": 0.2806, "lr": 1.4839698255539841e-05, "epoch": 0.594059405940594, "percentage": 2.97, "elapsed_time": "0:04:25", "remaining_time": "2:24:35", "throughput": 4772.36, "total_tokens": 1267392} +{"current_steps": 1265, "total_steps": 42420, "loss": 0.1672, "lr": 1.4898632720414899e-05, "epoch": 0.5964167845355964, "percentage": 2.98, "elapsed_time": "0:04:26", "remaining_time": "2:24:31", "throughput": 4772.63, "total_tokens": 1272064} +{"current_steps": 1270, "total_steps": 42420, "loss": 0.1886, "lr": 1.4957567185289958e-05, "epoch": 0.5987741631305987, "percentage": 2.99, "elapsed_time": "0:04:27", "remaining_time": "2:24:22", "throughput": 4772.51, "total_tokens": 1275968} +{"current_steps": 1275, "total_steps": 42420, "loss": 0.1855, "lr": 1.5016501650165018e-05, "epoch": 0.6011315417256011, "percentage": 3.01, "elapsed_time": "0:04:28", "remaining_time": "2:24:16", "throughput": 4772.31, "total_tokens": 1280096} +{"current_steps": 1280, "total_steps": 42420, "loss": 0.2385, "lr": 1.5075436115040076e-05, "epoch": 0.6034889203206035, "percentage": 3.02, "elapsed_time": "0:04:29", "remaining_time": "2:24:11", "throughput": 4772.61, "total_tokens": 1284704} +{"current_steps": 1285, "total_steps": 42420, "loss": 0.1119, "lr": 1.5134370579915136e-05, "epoch": 0.6058462989156058, "percentage": 3.03, "elapsed_time": "0:04:30", "remaining_time": "2:24:07", "throughput": 4772.8, "total_tokens": 1289312} +{"current_steps": 1290, "total_steps": 42420, "loss": 0.6526, "lr": 1.5193305044790194e-05, "epoch": 0.6082036775106082, "percentage": 3.04, "elapsed_time": "0:04:31", "remaining_time": "2:24:01", "throughput": 4772.51, "total_tokens": 1293440} +{"current_steps": 1295, "total_steps": 42420, "loss": 0.2308, "lr": 1.5252239509665253e-05, "epoch": 0.6105610561056105, "percentage": 3.05, "elapsed_time": "0:04:31", "remaining_time": "2:23:53", "throughput": 4772.0, "total_tokens": 1297408} +{"current_steps": 1300, "total_steps": 42420, "loss": 0.215, "lr": 1.5311173974540313e-05, "epoch": 0.6129184347006129, "percentage": 3.06, "elapsed_time": "0:04:32", "remaining_time": "2:23:52", "throughput": 4771.89, "total_tokens": 1302304} +{"current_steps": 1305, "total_steps": 42420, "loss": 0.393, "lr": 1.537010843941537e-05, "epoch": 0.6152758132956153, "percentage": 3.08, "elapsed_time": "0:04:33", "remaining_time": "2:23:45", "throughput": 4771.92, "total_tokens": 1306368} +{"current_steps": 1310, "total_steps": 42420, "loss": 0.2529, "lr": 1.5429042904290432e-05, "epoch": 0.6176331918906176, "percentage": 3.09, "elapsed_time": "0:04:34", "remaining_time": "2:23:45", "throughput": 4772.31, "total_tokens": 1311712} +{"current_steps": 1315, "total_steps": 42420, "loss": 0.2564, "lr": 1.548797736916549e-05, "epoch": 0.61999057048562, "percentage": 3.1, "elapsed_time": "0:04:36", "remaining_time": "2:24:04", "throughput": 4773.24, "total_tokens": 1320000} +{"current_steps": 1320, "total_steps": 42420, "loss": 0.3685, "lr": 1.5546911834040548e-05, "epoch": 0.6223479490806223, "percentage": 3.11, "elapsed_time": "0:04:37", "remaining_time": "2:24:00", "throughput": 4772.85, "total_tokens": 1324448} +{"current_steps": 1325, "total_steps": 42420, "loss": 0.4194, "lr": 1.5605846298915608e-05, "epoch": 0.6247053276756247, "percentage": 3.12, "elapsed_time": "0:04:38", "remaining_time": "2:23:56", "throughput": 4772.91, "total_tokens": 1329088} +{"current_steps": 1330, "total_steps": 42420, "loss": 0.2135, "lr": 1.5664780763790664e-05, "epoch": 0.6270627062706271, "percentage": 3.14, "elapsed_time": "0:04:39", "remaining_time": "2:23:53", "throughput": 4773.36, "total_tokens": 1333920} +{"current_steps": 1335, "total_steps": 42420, "loss": 0.1912, "lr": 1.5723715228665727e-05, "epoch": 0.6294200848656294, "percentage": 3.15, "elapsed_time": "0:04:40", "remaining_time": "2:23:51", "throughput": 4773.76, "total_tokens": 1338848} +{"current_steps": 1340, "total_steps": 42420, "loss": 0.3977, "lr": 1.5782649693540783e-05, "epoch": 0.6317774634606318, "percentage": 3.16, "elapsed_time": "0:04:41", "remaining_time": "2:23:55", "throughput": 4774.23, "total_tokens": 1344864} +{"current_steps": 1345, "total_steps": 42420, "loss": 0.2709, "lr": 1.5841584158415843e-05, "epoch": 0.6341348420556341, "percentage": 3.17, "elapsed_time": "0:04:42", "remaining_time": "2:23:54", "throughput": 4775.03, "total_tokens": 1350144} +{"current_steps": 1350, "total_steps": 42420, "loss": 0.2211, "lr": 1.5900518623290902e-05, "epoch": 0.6364922206506365, "percentage": 3.18, "elapsed_time": "0:04:43", "remaining_time": "2:23:49", "throughput": 4775.42, "total_tokens": 1354592} +{"current_steps": 1355, "total_steps": 42420, "loss": 0.3181, "lr": 1.595945308816596e-05, "epoch": 0.6388495992456389, "percentage": 3.19, "elapsed_time": "0:04:44", "remaining_time": "2:23:41", "throughput": 4774.72, "total_tokens": 1358368} +{"current_steps": 1360, "total_steps": 42420, "loss": 0.1759, "lr": 1.6018387553041018e-05, "epoch": 0.6412069778406412, "percentage": 3.21, "elapsed_time": "0:04:45", "remaining_time": "2:23:37", "throughput": 4774.72, "total_tokens": 1362848} +{"current_steps": 1365, "total_steps": 42420, "loss": 0.2985, "lr": 1.6077322017916078e-05, "epoch": 0.6435643564356436, "percentage": 3.22, "elapsed_time": "0:04:46", "remaining_time": "2:23:35", "throughput": 4773.9, "total_tokens": 1367456} +{"current_steps": 1370, "total_steps": 42420, "loss": 0.3404, "lr": 1.6136256482791138e-05, "epoch": 0.6459217350306459, "percentage": 3.23, "elapsed_time": "0:04:47", "remaining_time": "2:23:41", "throughput": 4774.74, "total_tokens": 1373888} +{"current_steps": 1375, "total_steps": 42420, "loss": 0.3731, "lr": 1.6195190947666197e-05, "epoch": 0.6482791136256483, "percentage": 3.24, "elapsed_time": "0:04:48", "remaining_time": "2:23:40", "throughput": 4774.87, "total_tokens": 1378944} +{"current_steps": 1380, "total_steps": 42420, "loss": 0.5236, "lr": 1.6254125412541253e-05, "epoch": 0.6506364922206507, "percentage": 3.25, "elapsed_time": "0:04:49", "remaining_time": "2:23:36", "throughput": 4774.63, "total_tokens": 1383328} +{"current_steps": 1385, "total_steps": 42420, "loss": 0.2276, "lr": 1.6313059877416313e-05, "epoch": 0.652993870815653, "percentage": 3.26, "elapsed_time": "0:04:50", "remaining_time": "2:23:37", "throughput": 4775.28, "total_tokens": 1388928} +{"current_steps": 1390, "total_steps": 42420, "loss": 0.269, "lr": 1.6371994342291373e-05, "epoch": 0.6553512494106554, "percentage": 3.28, "elapsed_time": "0:04:52", "remaining_time": "2:23:46", "throughput": 4776.07, "total_tokens": 1395712} +{"current_steps": 1395, "total_steps": 42420, "loss": 0.4696, "lr": 1.6430928807166432e-05, "epoch": 0.6577086280056577, "percentage": 3.29, "elapsed_time": "0:04:53", "remaining_time": "2:23:41", "throughput": 4775.51, "total_tokens": 1399936} +{"current_steps": 1400, "total_steps": 42420, "loss": 0.2222, "lr": 1.6489863272041492e-05, "epoch": 0.6600660066006601, "percentage": 3.3, "elapsed_time": "0:04:54", "remaining_time": "2:23:40", "throughput": 4775.62, "total_tokens": 1404992} +{"current_steps": 1405, "total_steps": 42420, "loss": 0.4071, "lr": 1.6548797736916548e-05, "epoch": 0.6624233851956625, "percentage": 3.31, "elapsed_time": "0:04:55", "remaining_time": "2:23:36", "throughput": 4775.73, "total_tokens": 1409568} +{"current_steps": 1410, "total_steps": 42420, "loss": 0.2935, "lr": 1.6607732201791608e-05, "epoch": 0.6647807637906648, "percentage": 3.32, "elapsed_time": "0:04:56", "remaining_time": "2:23:36", "throughput": 4776.43, "total_tokens": 1415072} +{"current_steps": 1415, "total_steps": 42420, "loss": 0.1782, "lr": 1.6666666666666667e-05, "epoch": 0.6671381423856672, "percentage": 3.34, "elapsed_time": "0:04:57", "remaining_time": "2:23:32", "throughput": 4776.68, "total_tokens": 1419552} +{"current_steps": 1420, "total_steps": 42420, "loss": 0.0904, "lr": 1.6725601131541727e-05, "epoch": 0.6694955209806694, "percentage": 3.35, "elapsed_time": "0:04:58", "remaining_time": "2:23:30", "throughput": 4776.98, "total_tokens": 1424544} +{"current_steps": 1425, "total_steps": 42420, "loss": 0.4594, "lr": 1.6784535596416787e-05, "epoch": 0.6718528995756718, "percentage": 3.36, "elapsed_time": "0:04:59", "remaining_time": "2:23:28", "throughput": 4777.36, "total_tokens": 1429504} +{"current_steps": 1430, "total_steps": 42420, "loss": 0.2362, "lr": 1.6843470061291843e-05, "epoch": 0.6742102781706742, "percentage": 3.37, "elapsed_time": "0:05:00", "remaining_time": "2:23:25", "throughput": 4777.58, "total_tokens": 1434336} +{"current_steps": 1435, "total_steps": 42420, "loss": 0.0993, "lr": 1.6902404526166903e-05, "epoch": 0.6765676567656765, "percentage": 3.38, "elapsed_time": "0:05:01", "remaining_time": "2:23:19", "throughput": 4777.39, "total_tokens": 1438496} +{"current_steps": 1440, "total_steps": 42420, "loss": 0.322, "lr": 1.6961338991041962e-05, "epoch": 0.6789250353606789, "percentage": 3.39, "elapsed_time": "0:05:02", "remaining_time": "2:23:21", "throughput": 4777.69, "total_tokens": 1444032} +{"current_steps": 1445, "total_steps": 42420, "loss": 0.1563, "lr": 1.7020273455917022e-05, "epoch": 0.6812824139556812, "percentage": 3.41, "elapsed_time": "0:05:03", "remaining_time": "2:23:19", "throughput": 4777.65, "total_tokens": 1448864} +{"current_steps": 1450, "total_steps": 42420, "loss": 0.2006, "lr": 1.707920792079208e-05, "epoch": 0.6836397925506836, "percentage": 3.42, "elapsed_time": "0:05:04", "remaining_time": "2:23:15", "throughput": 4777.73, "total_tokens": 1453376} +{"current_steps": 1455, "total_steps": 42420, "loss": 0.1758, "lr": 1.7138142385667138e-05, "epoch": 0.685997171145686, "percentage": 3.43, "elapsed_time": "0:05:05", "remaining_time": "2:23:09", "throughput": 4776.82, "total_tokens": 1457376} +{"current_steps": 1460, "total_steps": 42420, "loss": 0.2783, "lr": 1.7197076850542197e-05, "epoch": 0.6883545497406883, "percentage": 3.44, "elapsed_time": "0:05:06", "remaining_time": "2:23:10", "throughput": 4777.17, "total_tokens": 1462848} +{"current_steps": 1465, "total_steps": 42420, "loss": 0.3199, "lr": 1.7256011315417257e-05, "epoch": 0.6907119283356907, "percentage": 3.45, "elapsed_time": "0:05:07", "remaining_time": "2:23:09", "throughput": 4777.38, "total_tokens": 1467872} +{"current_steps": 1470, "total_steps": 42420, "loss": 0.0979, "lr": 1.7314945780292317e-05, "epoch": 0.693069306930693, "percentage": 3.47, "elapsed_time": "0:05:08", "remaining_time": "2:23:03", "throughput": 4777.37, "total_tokens": 1472096} +{"current_steps": 1475, "total_steps": 42420, "loss": 0.281, "lr": 1.7373880245167376e-05, "epoch": 0.6954266855256954, "percentage": 3.48, "elapsed_time": "0:05:09", "remaining_time": "2:23:04", "throughput": 4777.94, "total_tokens": 1477632} +{"current_steps": 1480, "total_steps": 42420, "loss": 0.2936, "lr": 1.7432814710042433e-05, "epoch": 0.6977840641206978, "percentage": 3.49, "elapsed_time": "0:05:10", "remaining_time": "2:23:05", "throughput": 4778.07, "total_tokens": 1482944} +{"current_steps": 1485, "total_steps": 42420, "loss": 0.0579, "lr": 1.7491749174917492e-05, "epoch": 0.7001414427157001, "percentage": 3.5, "elapsed_time": "0:05:11", "remaining_time": "2:22:59", "throughput": 4777.82, "total_tokens": 1486976} +{"current_steps": 1490, "total_steps": 42420, "loss": 0.2443, "lr": 1.7550683639792552e-05, "epoch": 0.7024988213107025, "percentage": 3.51, "elapsed_time": "0:05:12", "remaining_time": "2:22:54", "throughput": 4777.01, "total_tokens": 1491168} +{"current_steps": 1495, "total_steps": 42420, "loss": 0.3282, "lr": 1.760961810466761e-05, "epoch": 0.7048561999057048, "percentage": 3.52, "elapsed_time": "0:05:13", "remaining_time": "2:22:52", "throughput": 4777.22, "total_tokens": 1496000} +{"current_steps": 1500, "total_steps": 42420, "loss": 0.1869, "lr": 1.766855256954267e-05, "epoch": 0.7072135785007072, "percentage": 3.54, "elapsed_time": "0:05:14", "remaining_time": "2:22:46", "throughput": 4776.81, "total_tokens": 1500064} +{"current_steps": 1505, "total_steps": 42420, "loss": 0.0588, "lr": 1.7727487034417727e-05, "epoch": 0.7095709570957096, "percentage": 3.55, "elapsed_time": "0:05:15", "remaining_time": "2:22:45", "throughput": 4777.08, "total_tokens": 1505120} +{"current_steps": 1510, "total_steps": 42420, "loss": 0.3558, "lr": 1.7786421499292787e-05, "epoch": 0.7119283356907119, "percentage": 3.56, "elapsed_time": "0:05:16", "remaining_time": "2:22:44", "throughput": 4777.44, "total_tokens": 1510240} +{"current_steps": 1515, "total_steps": 42420, "loss": 0.2256, "lr": 1.7845355964167847e-05, "epoch": 0.7142857142857143, "percentage": 3.57, "elapsed_time": "0:05:17", "remaining_time": "2:22:45", "throughput": 4778.06, "total_tokens": 1515712} +{"current_steps": 1520, "total_steps": 42420, "loss": 0.0456, "lr": 1.7904290429042906e-05, "epoch": 0.7166430928807166, "percentage": 3.58, "elapsed_time": "0:05:18", "remaining_time": "2:22:43", "throughput": 4777.74, "total_tokens": 1520512} +{"current_steps": 1525, "total_steps": 42420, "loss": 0.2285, "lr": 1.7963224893917966e-05, "epoch": 0.719000471475719, "percentage": 3.6, "elapsed_time": "0:05:19", "remaining_time": "2:22:36", "throughput": 4777.03, "total_tokens": 1524288} +{"current_steps": 1530, "total_steps": 42420, "loss": 0.1156, "lr": 1.8022159358793022e-05, "epoch": 0.7213578500707214, "percentage": 3.61, "elapsed_time": "0:05:20", "remaining_time": "2:22:33", "throughput": 4776.95, "total_tokens": 1528896} +{"current_steps": 1535, "total_steps": 42420, "loss": 0.1196, "lr": 1.8081093823668082e-05, "epoch": 0.7237152286657237, "percentage": 3.62, "elapsed_time": "0:05:21", "remaining_time": "2:22:35", "throughput": 4777.51, "total_tokens": 1534656} +{"current_steps": 1540, "total_steps": 42420, "loss": 0.3813, "lr": 1.814002828854314e-05, "epoch": 0.7260726072607261, "percentage": 3.63, "elapsed_time": "0:05:22", "remaining_time": "2:22:31", "throughput": 4777.15, "total_tokens": 1538848} +{"current_steps": 1545, "total_steps": 42420, "loss": 0.3749, "lr": 1.81989627534182e-05, "epoch": 0.7284299858557284, "percentage": 3.64, "elapsed_time": "0:05:23", "remaining_time": "2:22:28", "throughput": 4777.36, "total_tokens": 1543712} +{"current_steps": 1550, "total_steps": 42420, "loss": 0.4939, "lr": 1.825789721829326e-05, "epoch": 0.7307873644507308, "percentage": 3.65, "elapsed_time": "0:05:24", "remaining_time": "2:22:30", "throughput": 4777.01, "total_tokens": 1549152} +{"current_steps": 1555, "total_steps": 42420, "loss": 0.0758, "lr": 1.8316831683168317e-05, "epoch": 0.7331447430457332, "percentage": 3.67, "elapsed_time": "0:05:25", "remaining_time": "2:22:29", "throughput": 4776.81, "total_tokens": 1554080} +{"current_steps": 1560, "total_steps": 42420, "loss": 0.0781, "lr": 1.8375766148043376e-05, "epoch": 0.7355021216407355, "percentage": 3.68, "elapsed_time": "0:05:26", "remaining_time": "2:22:28", "throughput": 4777.05, "total_tokens": 1559104} +{"current_steps": 1565, "total_steps": 42420, "loss": 0.1049, "lr": 1.8434700612918436e-05, "epoch": 0.7378595002357379, "percentage": 3.69, "elapsed_time": "0:05:27", "remaining_time": "2:22:31", "throughput": 4777.91, "total_tokens": 1565152} +{"current_steps": 1570, "total_steps": 42420, "loss": 0.3828, "lr": 1.8493635077793496e-05, "epoch": 0.7402168788307402, "percentage": 3.7, "elapsed_time": "0:05:28", "remaining_time": "2:22:32", "throughput": 4778.35, "total_tokens": 1570688} +{"current_steps": 1575, "total_steps": 42420, "loss": 0.3156, "lr": 1.8552569542668555e-05, "epoch": 0.7425742574257426, "percentage": 3.71, "elapsed_time": "0:05:29", "remaining_time": "2:22:34", "throughput": 4778.6, "total_tokens": 1576256} +{"current_steps": 1580, "total_steps": 42420, "loss": 0.4078, "lr": 1.861150400754361e-05, "epoch": 0.744931636020745, "percentage": 3.72, "elapsed_time": "0:05:30", "remaining_time": "2:22:31", "throughput": 4778.82, "total_tokens": 1581056} +{"current_steps": 1585, "total_steps": 42420, "loss": 0.165, "lr": 1.867043847241867e-05, "epoch": 0.7472890146157473, "percentage": 3.74, "elapsed_time": "0:05:31", "remaining_time": "2:22:31", "throughput": 4779.42, "total_tokens": 1586464} +{"current_steps": 1590, "total_steps": 42420, "loss": 0.3333, "lr": 1.872937293729373e-05, "epoch": 0.7496463932107497, "percentage": 3.75, "elapsed_time": "0:05:32", "remaining_time": "2:22:29", "throughput": 4779.83, "total_tokens": 1591296} +{"current_steps": 1595, "total_steps": 42420, "loss": 0.2179, "lr": 1.878830740216879e-05, "epoch": 0.752003771805752, "percentage": 3.76, "elapsed_time": "0:05:33", "remaining_time": "2:22:24", "throughput": 4779.48, "total_tokens": 1595488} +{"current_steps": 1600, "total_steps": 42420, "loss": 0.1738, "lr": 1.884724186704385e-05, "epoch": 0.7543611504007544, "percentage": 3.77, "elapsed_time": "0:05:35", "remaining_time": "2:22:26", "throughput": 4779.88, "total_tokens": 1601312} +{"current_steps": 1605, "total_steps": 42420, "loss": 0.3845, "lr": 1.8906176331918906e-05, "epoch": 0.7567185289957568, "percentage": 3.78, "elapsed_time": "0:05:36", "remaining_time": "2:22:26", "throughput": 4779.39, "total_tokens": 1606336} +{"current_steps": 1610, "total_steps": 42420, "loss": 0.0949, "lr": 1.8965110796793966e-05, "epoch": 0.759075907590759, "percentage": 3.8, "elapsed_time": "0:05:37", "remaining_time": "2:22:23", "throughput": 4779.65, "total_tokens": 1611008} +{"current_steps": 1615, "total_steps": 42420, "loss": 0.1648, "lr": 1.9024045261669026e-05, "epoch": 0.7614332861857614, "percentage": 3.81, "elapsed_time": "0:05:37", "remaining_time": "2:22:19", "throughput": 4779.58, "total_tokens": 1615328} +{"current_steps": 1620, "total_steps": 42420, "loss": 0.3604, "lr": 1.9082979726544085e-05, "epoch": 0.7637906647807637, "percentage": 3.82, "elapsed_time": "0:05:39", "remaining_time": "2:22:17", "throughput": 4779.67, "total_tokens": 1620320} +{"current_steps": 1625, "total_steps": 42420, "loss": 0.263, "lr": 1.9141914191419145e-05, "epoch": 0.7661480433757661, "percentage": 3.83, "elapsed_time": "0:05:40", "remaining_time": "2:22:16", "throughput": 4780.06, "total_tokens": 1625344} +{"current_steps": 1630, "total_steps": 42420, "loss": 0.1152, "lr": 1.92008486562942e-05, "epoch": 0.7685054219707685, "percentage": 3.84, "elapsed_time": "0:05:41", "remaining_time": "2:22:15", "throughput": 4780.6, "total_tokens": 1630624} +{"current_steps": 1635, "total_steps": 42420, "loss": 0.3091, "lr": 1.925978312116926e-05, "epoch": 0.7708628005657708, "percentage": 3.85, "elapsed_time": "0:05:42", "remaining_time": "2:22:12", "throughput": 4780.59, "total_tokens": 1635136} +{"current_steps": 1640, "total_steps": 42420, "loss": 0.3795, "lr": 1.9318717586044317e-05, "epoch": 0.7732201791607732, "percentage": 3.87, "elapsed_time": "0:05:43", "remaining_time": "2:22:12", "throughput": 4781.12, "total_tokens": 1640640} +{"current_steps": 1645, "total_steps": 42420, "loss": 0.1843, "lr": 1.937765205091938e-05, "epoch": 0.7755775577557755, "percentage": 3.88, "elapsed_time": "0:05:44", "remaining_time": "2:22:19", "throughput": 4781.04, "total_tokens": 1647040} +{"current_steps": 1650, "total_steps": 42420, "loss": 0.4343, "lr": 1.943658651579444e-05, "epoch": 0.7779349363507779, "percentage": 3.89, "elapsed_time": "0:05:45", "remaining_time": "2:22:16", "throughput": 4781.08, "total_tokens": 1651712} +{"current_steps": 1655, "total_steps": 42420, "loss": 0.0954, "lr": 1.9495520980669496e-05, "epoch": 0.7802923149457803, "percentage": 3.9, "elapsed_time": "0:05:46", "remaining_time": "2:22:19", "throughput": 4781.28, "total_tokens": 1657536} +{"current_steps": 1660, "total_steps": 42420, "loss": 0.3075, "lr": 1.9554455445544556e-05, "epoch": 0.7826496935407826, "percentage": 3.91, "elapsed_time": "0:05:47", "remaining_time": "2:22:22", "throughput": 4781.99, "total_tokens": 1663616} +{"current_steps": 1665, "total_steps": 42420, "loss": 0.5526, "lr": 1.9613389910419612e-05, "epoch": 0.785007072135785, "percentage": 3.93, "elapsed_time": "0:05:49", "remaining_time": "2:22:25", "throughput": 4782.5, "total_tokens": 1669600} +{"current_steps": 1670, "total_steps": 42420, "loss": 0.2544, "lr": 1.9672324375294675e-05, "epoch": 0.7873644507307873, "percentage": 3.94, "elapsed_time": "0:05:50", "remaining_time": "2:22:23", "throughput": 4782.83, "total_tokens": 1674560} +{"current_steps": 1675, "total_steps": 42420, "loss": 0.2861, "lr": 1.9731258840169734e-05, "epoch": 0.7897218293257897, "percentage": 3.95, "elapsed_time": "0:05:51", "remaining_time": "2:22:22", "throughput": 4783.05, "total_tokens": 1679616} +{"current_steps": 1680, "total_steps": 42420, "loss": 0.0816, "lr": 1.979019330504479e-05, "epoch": 0.7920792079207921, "percentage": 3.96, "elapsed_time": "0:05:52", "remaining_time": "2:22:18", "throughput": 4783.32, "total_tokens": 1684160} +{"current_steps": 1685, "total_steps": 42420, "loss": 0.4813, "lr": 1.984912776991985e-05, "epoch": 0.7944365865157944, "percentage": 3.97, "elapsed_time": "0:05:53", "remaining_time": "2:22:17", "throughput": 4783.2, "total_tokens": 1689280} +{"current_steps": 1690, "total_steps": 42420, "loss": 0.1395, "lr": 1.9908062234794907e-05, "epoch": 0.7967939651107968, "percentage": 3.98, "elapsed_time": "0:05:54", "remaining_time": "2:22:16", "throughput": 4783.62, "total_tokens": 1694368} +{"current_steps": 1695, "total_steps": 42420, "loss": 0.2475, "lr": 1.996699669966997e-05, "epoch": 0.7991513437057991, "percentage": 4.0, "elapsed_time": "0:05:55", "remaining_time": "2:22:13", "throughput": 4783.75, "total_tokens": 1699072} +{"current_steps": 1700, "total_steps": 42420, "loss": 0.1527, "lr": 2.002593116454503e-05, "epoch": 0.8015087223008015, "percentage": 4.01, "elapsed_time": "0:05:56", "remaining_time": "2:22:13", "throughput": 4784.12, "total_tokens": 1704352} +{"current_steps": 1705, "total_steps": 42420, "loss": 0.4713, "lr": 2.0084865629420085e-05, "epoch": 0.8038661008958039, "percentage": 4.02, "elapsed_time": "0:05:57", "remaining_time": "2:22:11", "throughput": 4783.99, "total_tokens": 1709088} +{"current_steps": 1710, "total_steps": 42420, "loss": 0.1139, "lr": 2.0143800094295145e-05, "epoch": 0.8062234794908062, "percentage": 4.03, "elapsed_time": "0:05:58", "remaining_time": "2:22:07", "throughput": 4783.92, "total_tokens": 1713600} +{"current_steps": 1715, "total_steps": 42420, "loss": 0.2536, "lr": 2.02027345591702e-05, "epoch": 0.8085808580858086, "percentage": 4.04, "elapsed_time": "0:05:59", "remaining_time": "2:22:13", "throughput": 4784.56, "total_tokens": 1720224} +{"current_steps": 1720, "total_steps": 42420, "loss": 0.3335, "lr": 2.0261669024045264e-05, "epoch": 0.8109382366808109, "percentage": 4.05, "elapsed_time": "0:06:00", "remaining_time": "2:22:11", "throughput": 4785.04, "total_tokens": 1725280} +{"current_steps": 1725, "total_steps": 42420, "loss": 0.2141, "lr": 2.0320603488920324e-05, "epoch": 0.8132956152758133, "percentage": 4.07, "elapsed_time": "0:06:01", "remaining_time": "2:22:17", "throughput": 4785.43, "total_tokens": 1731904} +{"current_steps": 1730, "total_steps": 42420, "loss": 0.1601, "lr": 2.037953795379538e-05, "epoch": 0.8156529938708157, "percentage": 4.08, "elapsed_time": "0:06:02", "remaining_time": "2:22:16", "throughput": 4785.84, "total_tokens": 1737056} +{"current_steps": 1735, "total_steps": 42420, "loss": 0.1023, "lr": 2.043847241867044e-05, "epoch": 0.818010372465818, "percentage": 4.09, "elapsed_time": "0:06:03", "remaining_time": "2:22:13", "throughput": 4785.89, "total_tokens": 1741568} +{"current_steps": 1740, "total_steps": 42420, "loss": 0.3312, "lr": 2.0497406883545496e-05, "epoch": 0.8203677510608204, "percentage": 4.1, "elapsed_time": "0:06:04", "remaining_time": "2:22:09", "throughput": 4785.66, "total_tokens": 1745984} +{"current_steps": 1745, "total_steps": 42420, "loss": 0.3677, "lr": 2.055634134842056e-05, "epoch": 0.8227251296558227, "percentage": 4.11, "elapsed_time": "0:06:05", "remaining_time": "2:22:07", "throughput": 4785.47, "total_tokens": 1750688} +{"current_steps": 1750, "total_steps": 42420, "loss": 0.3251, "lr": 2.061527581329562e-05, "epoch": 0.8250825082508251, "percentage": 4.13, "elapsed_time": "0:06:06", "remaining_time": "2:22:05", "throughput": 4785.57, "total_tokens": 1755648} +{"current_steps": 1755, "total_steps": 42420, "loss": 0.499, "lr": 2.0674210278170675e-05, "epoch": 0.8274398868458275, "percentage": 4.14, "elapsed_time": "0:06:08", "remaining_time": "2:22:07", "throughput": 4785.91, "total_tokens": 1761408} +{"current_steps": 1760, "total_steps": 42420, "loss": 0.2298, "lr": 2.0733144743045735e-05, "epoch": 0.8297972654408298, "percentage": 4.15, "elapsed_time": "0:06:08", "remaining_time": "2:22:04", "throughput": 4785.97, "total_tokens": 1765984} +{"current_steps": 1765, "total_steps": 42420, "loss": 0.1537, "lr": 2.079207920792079e-05, "epoch": 0.8321546440358322, "percentage": 4.16, "elapsed_time": "0:06:10", "remaining_time": "2:22:03", "throughput": 4786.22, "total_tokens": 1771040} +{"current_steps": 1770, "total_steps": 42420, "loss": 0.2474, "lr": 2.085101367279585e-05, "epoch": 0.8345120226308345, "percentage": 4.17, "elapsed_time": "0:06:11", "remaining_time": "2:22:02", "throughput": 4786.49, "total_tokens": 1776288} +{"current_steps": 1775, "total_steps": 42420, "loss": 0.2176, "lr": 2.0909948137670914e-05, "epoch": 0.8368694012258369, "percentage": 4.18, "elapsed_time": "0:06:12", "remaining_time": "2:22:02", "throughput": 4786.75, "total_tokens": 1781632} +{"current_steps": 1780, "total_steps": 42420, "loss": 0.1939, "lr": 2.096888260254597e-05, "epoch": 0.8392267798208393, "percentage": 4.2, "elapsed_time": "0:06:13", "remaining_time": "2:21:58", "throughput": 4786.88, "total_tokens": 1786048} +{"current_steps": 1785, "total_steps": 42420, "loss": 0.2347, "lr": 2.102781706742103e-05, "epoch": 0.8415841584158416, "percentage": 4.21, "elapsed_time": "0:06:14", "remaining_time": "2:21:56", "throughput": 4786.85, "total_tokens": 1790848} +{"current_steps": 1790, "total_steps": 42420, "loss": 0.0876, "lr": 2.1086751532296086e-05, "epoch": 0.843941537010844, "percentage": 4.22, "elapsed_time": "0:06:15", "remaining_time": "2:21:54", "throughput": 4787.22, "total_tokens": 1795680} +{"current_steps": 1795, "total_steps": 42420, "loss": 0.2951, "lr": 2.1145685997171145e-05, "epoch": 0.8462989156058462, "percentage": 4.23, "elapsed_time": "0:06:16", "remaining_time": "2:21:51", "throughput": 4787.3, "total_tokens": 1800448} +{"current_steps": 1800, "total_steps": 42420, "loss": 0.2409, "lr": 2.120462046204621e-05, "epoch": 0.8486562942008486, "percentage": 4.24, "elapsed_time": "0:06:17", "remaining_time": "2:21:50", "throughput": 4787.52, "total_tokens": 1805568} +{"current_steps": 1805, "total_steps": 42420, "loss": 0.264, "lr": 2.1263554926921265e-05, "epoch": 0.851013672795851, "percentage": 4.26, "elapsed_time": "0:06:18", "remaining_time": "2:21:47", "throughput": 4787.51, "total_tokens": 1810080} +{"current_steps": 1810, "total_steps": 42420, "loss": 0.2488, "lr": 2.1322489391796324e-05, "epoch": 0.8533710513908533, "percentage": 4.27, "elapsed_time": "0:06:19", "remaining_time": "2:21:43", "throughput": 4787.11, "total_tokens": 1814400} +{"current_steps": 1815, "total_steps": 42420, "loss": 0.6154, "lr": 2.138142385667138e-05, "epoch": 0.8557284299858557, "percentage": 4.28, "elapsed_time": "0:06:20", "remaining_time": "2:21:42", "throughput": 4787.11, "total_tokens": 1819296} +{"current_steps": 1820, "total_steps": 42420, "loss": 0.1989, "lr": 2.144035832154644e-05, "epoch": 0.858085808580858, "percentage": 4.29, "elapsed_time": "0:06:21", "remaining_time": "2:21:40", "throughput": 4787.26, "total_tokens": 1824160} +{"current_steps": 1825, "total_steps": 42420, "loss": 0.5156, "lr": 2.1499292786421503e-05, "epoch": 0.8604431871758604, "percentage": 4.3, "elapsed_time": "0:06:22", "remaining_time": "2:21:44", "throughput": 4787.19, "total_tokens": 1830240} +{"current_steps": 1830, "total_steps": 42420, "loss": 0.1872, "lr": 2.155822725129656e-05, "epoch": 0.8628005657708628, "percentage": 4.31, "elapsed_time": "0:06:23", "remaining_time": "2:21:42", "throughput": 4786.43, "total_tokens": 1834752} +{"current_steps": 1835, "total_steps": 42420, "loss": 0.4061, "lr": 2.161716171617162e-05, "epoch": 0.8651579443658651, "percentage": 4.33, "elapsed_time": "0:06:24", "remaining_time": "2:21:41", "throughput": 4786.5, "total_tokens": 1839936} +{"current_steps": 1840, "total_steps": 42420, "loss": 0.0982, "lr": 2.1676096181046675e-05, "epoch": 0.8675153229608675, "percentage": 4.34, "elapsed_time": "0:06:25", "remaining_time": "2:21:38", "throughput": 4786.3, "total_tokens": 1844448} +{"current_steps": 1845, "total_steps": 42420, "loss": 0.1379, "lr": 2.1735030645921735e-05, "epoch": 0.8698727015558698, "percentage": 4.35, "elapsed_time": "0:06:26", "remaining_time": "2:21:34", "throughput": 4786.06, "total_tokens": 1848704} +{"current_steps": 1850, "total_steps": 42420, "loss": 0.0753, "lr": 2.1793965110796798e-05, "epoch": 0.8722300801508722, "percentage": 4.36, "elapsed_time": "0:06:27", "remaining_time": "2:21:35", "throughput": 4786.18, "total_tokens": 1854144} +{"current_steps": 1855, "total_steps": 42420, "loss": 0.1825, "lr": 2.1852899575671854e-05, "epoch": 0.8745874587458746, "percentage": 4.37, "elapsed_time": "0:06:28", "remaining_time": "2:21:34", "throughput": 4786.29, "total_tokens": 1859232} +{"current_steps": 1860, "total_steps": 42420, "loss": 0.0802, "lr": 2.1911834040546914e-05, "epoch": 0.8769448373408769, "percentage": 4.38, "elapsed_time": "0:06:29", "remaining_time": "2:21:35", "throughput": 4786.47, "total_tokens": 1864736} +{"current_steps": 1865, "total_steps": 42420, "loss": 0.1101, "lr": 2.197076850542197e-05, "epoch": 0.8793022159358793, "percentage": 4.4, "elapsed_time": "0:06:30", "remaining_time": "2:21:36", "throughput": 4786.6, "total_tokens": 1870336} +{"current_steps": 1870, "total_steps": 42420, "loss": 0.0761, "lr": 2.202970297029703e-05, "epoch": 0.8816595945308816, "percentage": 4.41, "elapsed_time": "0:06:31", "remaining_time": "2:21:37", "throughput": 4786.79, "total_tokens": 1875872} +{"current_steps": 1875, "total_steps": 42420, "loss": 0.1346, "lr": 2.208863743517209e-05, "epoch": 0.884016973125884, "percentage": 4.42, "elapsed_time": "0:06:32", "remaining_time": "2:21:34", "throughput": 4786.67, "total_tokens": 1880288} +{"current_steps": 1880, "total_steps": 42420, "loss": 0.4116, "lr": 2.214757190004715e-05, "epoch": 0.8863743517208864, "percentage": 4.43, "elapsed_time": "0:06:33", "remaining_time": "2:21:33", "throughput": 4786.81, "total_tokens": 1885376} +{"current_steps": 1885, "total_steps": 42420, "loss": 0.1925, "lr": 2.220650636492221e-05, "epoch": 0.8887317303158887, "percentage": 4.44, "elapsed_time": "0:06:34", "remaining_time": "2:21:32", "throughput": 4786.53, "total_tokens": 1890272} +{"current_steps": 1890, "total_steps": 42420, "loss": 0.0604, "lr": 2.2265440829797265e-05, "epoch": 0.8910891089108911, "percentage": 4.46, "elapsed_time": "0:06:35", "remaining_time": "2:21:29", "throughput": 4786.77, "total_tokens": 1894976} +{"current_steps": 1895, "total_steps": 42420, "loss": 0.1662, "lr": 2.2324375294672324e-05, "epoch": 0.8934464875058934, "percentage": 4.47, "elapsed_time": "0:06:37", "remaining_time": "2:21:32", "throughput": 4786.72, "total_tokens": 1900992} +{"current_steps": 1900, "total_steps": 42420, "loss": 0.3383, "lr": 2.2383309759547384e-05, "epoch": 0.8958038661008958, "percentage": 4.48, "elapsed_time": "0:06:38", "remaining_time": "2:21:31", "throughput": 4786.83, "total_tokens": 1905984} +{"current_steps": 1905, "total_steps": 42420, "loss": 0.1294, "lr": 2.2442244224422444e-05, "epoch": 0.8981612446958982, "percentage": 4.49, "elapsed_time": "0:06:39", "remaining_time": "2:21:30", "throughput": 4786.53, "total_tokens": 1910848} +{"current_steps": 1910, "total_steps": 42420, "loss": 0.1594, "lr": 2.2501178689297503e-05, "epoch": 0.9005186232909005, "percentage": 4.5, "elapsed_time": "0:06:40", "remaining_time": "2:21:31", "throughput": 4786.81, "total_tokens": 1916448} +{"current_steps": 1915, "total_steps": 42420, "loss": 0.1016, "lr": 2.256011315417256e-05, "epoch": 0.9028760018859029, "percentage": 4.51, "elapsed_time": "0:06:41", "remaining_time": "2:21:29", "throughput": 4787.0, "total_tokens": 1921440} +{"current_steps": 1920, "total_steps": 42420, "loss": 0.0926, "lr": 2.261904761904762e-05, "epoch": 0.9052333804809052, "percentage": 4.53, "elapsed_time": "0:06:42", "remaining_time": "2:21:26", "throughput": 4786.99, "total_tokens": 1925984} +{"current_steps": 1925, "total_steps": 42420, "loss": 0.3353, "lr": 2.267798208392268e-05, "epoch": 0.9075907590759076, "percentage": 4.54, "elapsed_time": "0:06:43", "remaining_time": "2:21:30", "throughput": 4787.19, "total_tokens": 1932096} +{"current_steps": 1930, "total_steps": 42420, "loss": 0.3191, "lr": 2.273691654879774e-05, "epoch": 0.90994813767091, "percentage": 4.55, "elapsed_time": "0:06:44", "remaining_time": "2:21:27", "throughput": 4787.1, "total_tokens": 1936640} +{"current_steps": 1935, "total_steps": 42420, "loss": 0.3887, "lr": 2.2795851013672798e-05, "epoch": 0.9123055162659123, "percentage": 4.56, "elapsed_time": "0:06:45", "remaining_time": "2:21:25", "throughput": 4787.12, "total_tokens": 1941472} +{"current_steps": 1940, "total_steps": 42420, "loss": 0.5009, "lr": 2.2854785478547854e-05, "epoch": 0.9146628948609147, "percentage": 4.57, "elapsed_time": "0:06:46", "remaining_time": "2:21:24", "throughput": 4787.2, "total_tokens": 1946528} +{"current_steps": 1945, "total_steps": 42420, "loss": 0.1029, "lr": 2.2913719943422914e-05, "epoch": 0.917020273455917, "percentage": 4.59, "elapsed_time": "0:06:47", "remaining_time": "2:21:26", "throughput": 4787.33, "total_tokens": 1952352} +{"current_steps": 1950, "total_steps": 42420, "loss": 0.5685, "lr": 2.2972654408297974e-05, "epoch": 0.9193776520509194, "percentage": 4.6, "elapsed_time": "0:06:48", "remaining_time": "2:21:25", "throughput": 4787.47, "total_tokens": 1957344} +{"current_steps": 1955, "total_steps": 42420, "loss": 0.1706, "lr": 2.3031588873173033e-05, "epoch": 0.9217350306459218, "percentage": 4.61, "elapsed_time": "0:06:49", "remaining_time": "2:21:22", "throughput": 4787.31, "total_tokens": 1961824} +{"current_steps": 1960, "total_steps": 42420, "loss": 0.2204, "lr": 2.3090523338048093e-05, "epoch": 0.9240924092409241, "percentage": 4.62, "elapsed_time": "0:06:50", "remaining_time": "2:21:19", "throughput": 4787.25, "total_tokens": 1966368} +{"current_steps": 1965, "total_steps": 42420, "loss": 0.1322, "lr": 2.314945780292315e-05, "epoch": 0.9264497878359265, "percentage": 4.63, "elapsed_time": "0:06:52", "remaining_time": "2:21:26", "throughput": 4787.92, "total_tokens": 1973600} +{"current_steps": 1970, "total_steps": 42420, "loss": 0.1577, "lr": 2.320839226779821e-05, "epoch": 0.9288071664309288, "percentage": 4.64, "elapsed_time": "0:06:53", "remaining_time": "2:21:26", "throughput": 4788.13, "total_tokens": 1978880} +{"current_steps": 1975, "total_steps": 42420, "loss": 0.2507, "lr": 2.326732673267327e-05, "epoch": 0.9311645450259312, "percentage": 4.66, "elapsed_time": "0:06:54", "remaining_time": "2:21:27", "throughput": 4788.76, "total_tokens": 1984864} +{"current_steps": 1980, "total_steps": 42420, "loss": 0.2153, "lr": 2.3326261197548328e-05, "epoch": 0.9335219236209336, "percentage": 4.67, "elapsed_time": "0:06:55", "remaining_time": "2:21:29", "throughput": 4789.13, "total_tokens": 1990592} +{"current_steps": 1985, "total_steps": 42420, "loss": 0.1532, "lr": 2.3385195662423388e-05, "epoch": 0.9358793022159358, "percentage": 4.68, "elapsed_time": "0:06:56", "remaining_time": "2:21:31", "throughput": 4789.24, "total_tokens": 1996512} +{"current_steps": 1990, "total_steps": 42420, "loss": 0.0845, "lr": 2.3444130127298444e-05, "epoch": 0.9382366808109383, "percentage": 4.69, "elapsed_time": "0:06:57", "remaining_time": "2:21:29", "throughput": 4788.81, "total_tokens": 2000960} +{"current_steps": 1995, "total_steps": 42420, "loss": 0.0906, "lr": 2.3503064592173503e-05, "epoch": 0.9405940594059405, "percentage": 4.7, "elapsed_time": "0:06:58", "remaining_time": "2:21:24", "throughput": 4788.68, "total_tokens": 2005056} +{"current_steps": 2000, "total_steps": 42420, "loss": 0.1802, "lr": 2.3561999057048563e-05, "epoch": 0.9429514380009429, "percentage": 4.71, "elapsed_time": "0:06:59", "remaining_time": "2:21:20", "throughput": 4788.6, "total_tokens": 2009472} +{"current_steps": 2005, "total_steps": 42420, "loss": 0.615, "lr": 2.3620933521923623e-05, "epoch": 0.9453088165959453, "percentage": 4.73, "elapsed_time": "0:07:00", "remaining_time": "2:21:17", "throughput": 4788.58, "total_tokens": 2013888} +{"current_steps": 2010, "total_steps": 42420, "loss": 0.2034, "lr": 2.3679867986798682e-05, "epoch": 0.9476661951909476, "percentage": 4.74, "elapsed_time": "0:07:01", "remaining_time": "2:21:19", "throughput": 4788.88, "total_tokens": 2019808} +{"current_steps": 2015, "total_steps": 42420, "loss": 0.2061, "lr": 2.373880245167374e-05, "epoch": 0.95002357378595, "percentage": 4.75, "elapsed_time": "0:07:02", "remaining_time": "2:21:20", "throughput": 4789.16, "total_tokens": 2025440} +{"current_steps": 2020, "total_steps": 42420, "loss": 0.2288, "lr": 2.3797736916548798e-05, "epoch": 0.9523809523809523, "percentage": 4.76, "elapsed_time": "0:07:03", "remaining_time": "2:21:19", "throughput": 4789.33, "total_tokens": 2030464} +{"current_steps": 2025, "total_steps": 42420, "loss": 0.2549, "lr": 2.3856671381423858e-05, "epoch": 0.9547383309759547, "percentage": 4.77, "elapsed_time": "0:07:04", "remaining_time": "2:21:17", "throughput": 4789.44, "total_tokens": 2035328} +{"current_steps": 2030, "total_steps": 42420, "loss": 0.1494, "lr": 2.3915605846298918e-05, "epoch": 0.9570957095709571, "percentage": 4.79, "elapsed_time": "0:07:05", "remaining_time": "2:21:13", "throughput": 4789.34, "total_tokens": 2039616} +{"current_steps": 2035, "total_steps": 42420, "loss": 0.2502, "lr": 2.3974540311173977e-05, "epoch": 0.9594530881659594, "percentage": 4.8, "elapsed_time": "0:07:07", "remaining_time": "2:21:14", "throughput": 4789.62, "total_tokens": 2045408} +{"current_steps": 2040, "total_steps": 42420, "loss": 0.1787, "lr": 2.4033474776049033e-05, "epoch": 0.9618104667609618, "percentage": 4.81, "elapsed_time": "0:07:08", "remaining_time": "2:21:15", "throughput": 4789.86, "total_tokens": 2050816} +{"current_steps": 2045, "total_steps": 42420, "loss": 0.1263, "lr": 2.4092409240924093e-05, "epoch": 0.9641678453559641, "percentage": 4.82, "elapsed_time": "0:07:09", "remaining_time": "2:21:15", "throughput": 4789.76, "total_tokens": 2056128} +{"current_steps": 2050, "total_steps": 42420, "loss": 0.2049, "lr": 2.4151343705799153e-05, "epoch": 0.9665252239509665, "percentage": 4.83, "elapsed_time": "0:07:10", "remaining_time": "2:21:14", "throughput": 4789.53, "total_tokens": 2061120} +{"current_steps": 2055, "total_steps": 42420, "loss": 0.3396, "lr": 2.4210278170674212e-05, "epoch": 0.9688826025459689, "percentage": 4.84, "elapsed_time": "0:07:11", "remaining_time": "2:21:13", "throughput": 4789.97, "total_tokens": 2066400} +{"current_steps": 2060, "total_steps": 42420, "loss": 0.2418, "lr": 2.4269212635549272e-05, "epoch": 0.9712399811409712, "percentage": 4.86, "elapsed_time": "0:07:12", "remaining_time": "2:21:11", "throughput": 4790.1, "total_tokens": 2071200} +{"current_steps": 2065, "total_steps": 42420, "loss": 0.2156, "lr": 2.4328147100424328e-05, "epoch": 0.9735973597359736, "percentage": 4.87, "elapsed_time": "0:07:13", "remaining_time": "2:21:10", "throughput": 4790.37, "total_tokens": 2076320} +{"current_steps": 2070, "total_steps": 42420, "loss": 0.8193, "lr": 2.4387081565299388e-05, "epoch": 0.9759547383309759, "percentage": 4.88, "elapsed_time": "0:07:14", "remaining_time": "2:21:11", "throughput": 4790.85, "total_tokens": 2082016} +{"current_steps": 2075, "total_steps": 42420, "loss": 0.1686, "lr": 2.4446016030174447e-05, "epoch": 0.9783121169259783, "percentage": 4.89, "elapsed_time": "0:07:15", "remaining_time": "2:21:10", "throughput": 4791.13, "total_tokens": 2087136} +{"current_steps": 2080, "total_steps": 42420, "loss": 0.5385, "lr": 2.4504950495049507e-05, "epoch": 0.9806694955209807, "percentage": 4.9, "elapsed_time": "0:07:16", "remaining_time": "2:21:09", "throughput": 4791.47, "total_tokens": 2092352} +{"current_steps": 2085, "total_steps": 42420, "loss": 0.0883, "lr": 2.4563884959924567e-05, "epoch": 0.983026874115983, "percentage": 4.92, "elapsed_time": "0:07:17", "remaining_time": "2:21:05", "throughput": 4791.27, "total_tokens": 2096576} +{"current_steps": 2090, "total_steps": 42420, "loss": 0.1547, "lr": 2.4622819424799623e-05, "epoch": 0.9853842527109854, "percentage": 4.93, "elapsed_time": "0:07:18", "remaining_time": "2:21:03", "throughput": 4791.32, "total_tokens": 2101472} +{"current_steps": 2095, "total_steps": 42420, "loss": 0.5854, "lr": 2.4681753889674683e-05, "epoch": 0.9877416313059877, "percentage": 4.94, "elapsed_time": "0:07:19", "remaining_time": "2:21:00", "throughput": 4791.46, "total_tokens": 2106144} +{"current_steps": 2100, "total_steps": 42420, "loss": 0.4764, "lr": 2.4740688354549742e-05, "epoch": 0.9900990099009901, "percentage": 4.95, "elapsed_time": "0:07:20", "remaining_time": "2:20:58", "throughput": 4791.6, "total_tokens": 2110880} +{"current_steps": 2105, "total_steps": 42420, "loss": 0.1748, "lr": 2.4799622819424802e-05, "epoch": 0.9924563884959925, "percentage": 4.96, "elapsed_time": "0:07:21", "remaining_time": "2:20:56", "throughput": 4791.33, "total_tokens": 2115712} +{"current_steps": 2110, "total_steps": 42420, "loss": 0.1007, "lr": 2.485855728429986e-05, "epoch": 0.9948137670909948, "percentage": 4.97, "elapsed_time": "0:07:22", "remaining_time": "2:20:53", "throughput": 4791.48, "total_tokens": 2120192} +{"current_steps": 2115, "total_steps": 42420, "loss": 0.1933, "lr": 2.4917491749174918e-05, "epoch": 0.9971711456859972, "percentage": 4.99, "elapsed_time": "0:07:23", "remaining_time": "2:20:53", "throughput": 4791.87, "total_tokens": 2125728} +{"current_steps": 2120, "total_steps": 42420, "loss": 0.2441, "lr": 2.4976426214049977e-05, "epoch": 0.9995285242809995, "percentage": 5.0, "elapsed_time": "0:07:24", "remaining_time": "2:20:51", "throughput": 4792.06, "total_tokens": 2130624} +{"current_steps": 2121, "total_steps": 42420, "eval_loss": 0.21642036736011505, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:07:40", "remaining_time": "2:25:42", "throughput": 4633.48, "total_tokens": 2131904} +{"current_steps": 2125, "total_steps": 42420, "loss": 0.1318, "lr": 2.5035360678925034e-05, "epoch": 1.0018859028760019, "percentage": 5.01, "elapsed_time": "0:07:42", "remaining_time": "2:26:07", "throughput": 4619.88, "total_tokens": 2136160} +{"current_steps": 2130, "total_steps": 42420, "loss": 0.1356, "lr": 2.5094295143800097e-05, "epoch": 1.0042432814710043, "percentage": 5.02, "elapsed_time": "0:07:43", "remaining_time": "2:26:03", "throughput": 4619.89, "total_tokens": 2140320} +{"current_steps": 2135, "total_steps": 42420, "loss": 0.3517, "lr": 2.5153229608675156e-05, "epoch": 1.0066006600660067, "percentage": 5.03, "elapsed_time": "0:07:44", "remaining_time": "2:26:03", "throughput": 4620.73, "total_tokens": 2145952} +{"current_steps": 2140, "total_steps": 42420, "loss": 0.2544, "lr": 2.5212164073550216e-05, "epoch": 1.0089580386610089, "percentage": 5.04, "elapsed_time": "0:07:45", "remaining_time": "2:26:04", "throughput": 4621.53, "total_tokens": 2151936}