rbelanec commited on
Commit
9f222e0
·
verified ·
1 Parent(s): b6bb9dd

Training in progress, step 24586

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +260 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd8ede6776624b5a3fd5405eb1e984d359e0f2d048e38e6b34b0d0eaa0de9a64
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06c4da71a0eb34d229c44b2d4a521dd54fa75bc698e5ee7c160d0e661413166
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -4677,3 +4677,263 @@
4677
  {"current_steps": 23295, "total_steps": 25880, "loss": 0.426, "lr": 3.0107575500175288e-05, "epoch": 18.002318392581145, "percentage": 90.01, "elapsed_time": "1:08:12", "remaining_time": "0:07:34", "throughput": 1916.06, "total_tokens": 7841840}
4678
  {"current_steps": 23300, "total_steps": 25880, "loss": 0.2083, "lr": 2.9992439837260133e-05, "epoch": 18.006182380216384, "percentage": 90.03, "elapsed_time": "1:08:13", "remaining_time": "0:07:33", "throughput": 1916.06, "total_tokens": 7843472}
4679
  {"current_steps": 23305, "total_steps": 25880, "loss": 0.2494, "lr": 2.987751793659804e-05, "epoch": 18.010046367851622, "percentage": 90.05, "elapsed_time": "1:08:14", "remaining_time": "0:07:32", "throughput": 1916.06, "total_tokens": 7845072}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4677
  {"current_steps": 23295, "total_steps": 25880, "loss": 0.426, "lr": 3.0107575500175288e-05, "epoch": 18.002318392581145, "percentage": 90.01, "elapsed_time": "1:08:12", "remaining_time": "0:07:34", "throughput": 1916.06, "total_tokens": 7841840}
4678
  {"current_steps": 23300, "total_steps": 25880, "loss": 0.2083, "lr": 2.9992439837260133e-05, "epoch": 18.006182380216384, "percentage": 90.03, "elapsed_time": "1:08:13", "remaining_time": "0:07:33", "throughput": 1916.06, "total_tokens": 7843472}
4679
  {"current_steps": 23305, "total_steps": 25880, "loss": 0.2494, "lr": 2.987751793659804e-05, "epoch": 18.010046367851622, "percentage": 90.05, "elapsed_time": "1:08:14", "remaining_time": "0:07:32", "throughput": 1916.06, "total_tokens": 7845072}
4680
+ {"current_steps": 23310, "total_steps": 25880, "loss": 0.3272, "lr": 2.9762809850456374e-05, "epoch": 18.013910355486864, "percentage": 90.07, "elapsed_time": "1:08:15", "remaining_time": "0:07:31", "throughput": 1916.09, "total_tokens": 7846864}
4681
+ {"current_steps": 23315, "total_steps": 25880, "loss": 0.2496, "lr": 2.9648315631004895e-05, "epoch": 18.017774343122102, "percentage": 90.09, "elapsed_time": "1:08:16", "remaining_time": "0:07:30", "throughput": 1916.09, "total_tokens": 7848336}
4682
+ {"current_steps": 23320, "total_steps": 25880, "loss": 0.298, "lr": 2.9534035330316288e-05, "epoch": 18.02163833075734, "percentage": 90.11, "elapsed_time": "1:08:16", "remaining_time": "0:07:29", "throughput": 1916.11, "total_tokens": 7849968}
4683
+ {"current_steps": 23325, "total_steps": 25880, "loss": 0.3028, "lr": 2.9419969000365977e-05, "epoch": 18.025502318392583, "percentage": 90.13, "elapsed_time": "1:08:17", "remaining_time": "0:07:28", "throughput": 1916.12, "total_tokens": 7851632}
4684
+ {"current_steps": 23330, "total_steps": 25880, "loss": 0.2401, "lr": 2.9306116693031903e-05, "epoch": 18.02936630602782, "percentage": 90.15, "elapsed_time": "1:08:18", "remaining_time": "0:07:27", "throughput": 1916.14, "total_tokens": 7853296}
4685
+ {"current_steps": 23335, "total_steps": 25880, "loss": 0.273, "lr": 2.9192478460094763e-05, "epoch": 18.03323029366306, "percentage": 90.17, "elapsed_time": "1:08:19", "remaining_time": "0:07:27", "throughput": 1916.19, "total_tokens": 7855184}
4686
+ {"current_steps": 23340, "total_steps": 25880, "loss": 0.2455, "lr": 2.9079054353238097e-05, "epoch": 18.037094281298298, "percentage": 90.19, "elapsed_time": "1:08:20", "remaining_time": "0:07:26", "throughput": 1916.18, "total_tokens": 7856688}
4687
+ {"current_steps": 23345, "total_steps": 25880, "loss": 0.2645, "lr": 2.8965844424047594e-05, "epoch": 18.04095826893354, "percentage": 90.2, "elapsed_time": "1:08:21", "remaining_time": "0:07:25", "throughput": 1916.17, "total_tokens": 7858256}
4688
+ {"current_steps": 23350, "total_steps": 25880, "loss": 0.2951, "lr": 2.8852848724012115e-05, "epoch": 18.04482225656878, "percentage": 90.22, "elapsed_time": "1:08:21", "remaining_time": "0:07:24", "throughput": 1916.19, "total_tokens": 7860080}
4689
+ {"current_steps": 23355, "total_steps": 25880, "loss": 0.2594, "lr": 2.8740067304522554e-05, "epoch": 18.048686244204017, "percentage": 90.24, "elapsed_time": "1:08:22", "remaining_time": "0:07:23", "throughput": 1916.24, "total_tokens": 7861936}
4690
+ {"current_steps": 23360, "total_steps": 25880, "loss": 0.3124, "lr": 2.8627500216872717e-05, "epoch": 18.05255023183926, "percentage": 90.26, "elapsed_time": "1:08:23", "remaining_time": "0:07:22", "throughput": 1916.26, "total_tokens": 7863568}
4691
+ {"current_steps": 23365, "total_steps": 25880, "loss": 0.328, "lr": 2.8515147512258644e-05, "epoch": 18.056414219474497, "percentage": 90.28, "elapsed_time": "1:08:24", "remaining_time": "0:07:21", "throughput": 1916.3, "total_tokens": 7865360}
4692
+ {"current_steps": 23370, "total_steps": 25880, "loss": 0.2194, "lr": 2.8403009241779077e-05, "epoch": 18.060278207109736, "percentage": 90.3, "elapsed_time": "1:08:25", "remaining_time": "0:07:20", "throughput": 1916.32, "total_tokens": 7867088}
4693
+ {"current_steps": 23375, "total_steps": 25880, "loss": 0.2668, "lr": 2.8291085456435274e-05, "epoch": 18.064142194744978, "percentage": 90.32, "elapsed_time": "1:08:26", "remaining_time": "0:07:20", "throughput": 1916.36, "total_tokens": 7868816}
4694
+ {"current_steps": 23380, "total_steps": 25880, "loss": 0.2826, "lr": 2.8179376207130734e-05, "epoch": 18.068006182380216, "percentage": 90.34, "elapsed_time": "1:08:26", "remaining_time": "0:07:19", "throughput": 1916.35, "total_tokens": 7870288}
4695
+ {"current_steps": 23385, "total_steps": 25880, "loss": 0.2401, "lr": 2.8067881544671436e-05, "epoch": 18.071870170015455, "percentage": 90.36, "elapsed_time": "1:08:27", "remaining_time": "0:07:18", "throughput": 1916.35, "total_tokens": 7871920}
4696
+ {"current_steps": 23390, "total_steps": 25880, "loss": 0.316, "lr": 2.7956601519765978e-05, "epoch": 18.075734157650697, "percentage": 90.38, "elapsed_time": "1:08:28", "remaining_time": "0:07:17", "throughput": 1916.41, "total_tokens": 7873872}
4697
+ {"current_steps": 23395, "total_steps": 25880, "loss": 0.2147, "lr": 2.78455361830251e-05, "epoch": 18.079598145285935, "percentage": 90.4, "elapsed_time": "1:08:29", "remaining_time": "0:07:16", "throughput": 1916.47, "total_tokens": 7875856}
4698
+ {"current_steps": 23400, "total_steps": 25880, "loss": 0.2955, "lr": 2.7734685584962007e-05, "epoch": 18.083462132921174, "percentage": 90.42, "elapsed_time": "1:08:30", "remaining_time": "0:07:15", "throughput": 1916.49, "total_tokens": 7877552}
4699
+ {"current_steps": 23405, "total_steps": 25880, "loss": 0.2387, "lr": 2.762404977599209e-05, "epoch": 18.087326120556416, "percentage": 90.44, "elapsed_time": "1:08:31", "remaining_time": "0:07:14", "throughput": 1916.51, "total_tokens": 7879216}
4700
+ {"current_steps": 23410, "total_steps": 25880, "loss": 0.2417, "lr": 2.7513628806433378e-05, "epoch": 18.091190108191654, "percentage": 90.46, "elapsed_time": "1:08:32", "remaining_time": "0:07:13", "throughput": 1916.51, "total_tokens": 7880720}
4701
+ {"current_steps": 23415, "total_steps": 25880, "loss": 0.2257, "lr": 2.740342272650592e-05, "epoch": 18.095054095826892, "percentage": 90.48, "elapsed_time": "1:08:32", "remaining_time": "0:07:12", "throughput": 1916.57, "total_tokens": 7882640}
4702
+ {"current_steps": 23420, "total_steps": 25880, "loss": 0.225, "lr": 2.7293431586332073e-05, "epoch": 18.098918083462134, "percentage": 90.49, "elapsed_time": "1:08:33", "remaining_time": "0:07:12", "throughput": 1916.57, "total_tokens": 7884176}
4703
+ {"current_steps": 23425, "total_steps": 25880, "loss": 0.2643, "lr": 2.718365543593637e-05, "epoch": 18.102782071097373, "percentage": 90.51, "elapsed_time": "1:08:34", "remaining_time": "0:07:11", "throughput": 1916.58, "total_tokens": 7885808}
4704
+ {"current_steps": 23430, "total_steps": 25880, "loss": 0.274, "lr": 2.7074094325245824e-05, "epoch": 18.10664605873261, "percentage": 90.53, "elapsed_time": "1:08:35", "remaining_time": "0:07:10", "throughput": 1916.59, "total_tokens": 7887376}
4705
+ {"current_steps": 23435, "total_steps": 25880, "loss": 0.2282, "lr": 2.696474830408946e-05, "epoch": 18.110510046367853, "percentage": 90.55, "elapsed_time": "1:08:36", "remaining_time": "0:07:09", "throughput": 1916.65, "total_tokens": 7889360}
4706
+ {"current_steps": 23440, "total_steps": 25880, "loss": 0.2873, "lr": 2.6855617422198332e-05, "epoch": 18.11437403400309, "percentage": 90.57, "elapsed_time": "1:08:37", "remaining_time": "0:07:08", "throughput": 1916.68, "total_tokens": 7891120}
4707
+ {"current_steps": 23445, "total_steps": 25880, "loss": 0.2591, "lr": 2.674670172920607e-05, "epoch": 18.11823802163833, "percentage": 90.59, "elapsed_time": "1:08:37", "remaining_time": "0:07:07", "throughput": 1916.7, "total_tokens": 7892880}
4708
+ {"current_steps": 23450, "total_steps": 25880, "loss": 0.3018, "lr": 2.663800127464805e-05, "epoch": 18.122102009273572, "percentage": 90.61, "elapsed_time": "1:08:38", "remaining_time": "0:07:06", "throughput": 1916.73, "total_tokens": 7894544}
4709
+ {"current_steps": 23455, "total_steps": 25880, "loss": 0.257, "lr": 2.652951610796195e-05, "epoch": 18.12596599690881, "percentage": 90.63, "elapsed_time": "1:08:39", "remaining_time": "0:07:05", "throughput": 1916.77, "total_tokens": 7896336}
4710
+ {"current_steps": 23460, "total_steps": 25880, "loss": 0.275, "lr": 2.6421246278487353e-05, "epoch": 18.12982998454405, "percentage": 90.65, "elapsed_time": "1:08:40", "remaining_time": "0:07:05", "throughput": 1916.77, "total_tokens": 7897904}
4711
+ {"current_steps": 23465, "total_steps": 25880, "loss": 0.2412, "lr": 2.63131918354661e-05, "epoch": 18.133693972179287, "percentage": 90.67, "elapsed_time": "1:08:41", "remaining_time": "0:07:04", "throughput": 1916.77, "total_tokens": 7899376}
4712
+ {"current_steps": 23470, "total_steps": 25880, "loss": 0.2805, "lr": 2.6205352828041984e-05, "epoch": 18.13755795981453, "percentage": 90.69, "elapsed_time": "1:08:42", "remaining_time": "0:07:03", "throughput": 1916.77, "total_tokens": 7900912}
4713
+ {"current_steps": 23475, "total_steps": 25880, "loss": 0.3276, "lr": 2.6097729305260942e-05, "epoch": 18.141421947449768, "percentage": 90.71, "elapsed_time": "1:08:42", "remaining_time": "0:07:02", "throughput": 1916.78, "total_tokens": 7902512}
4714
+ {"current_steps": 23480, "total_steps": 25880, "loss": 0.2723, "lr": 2.5990321316070542e-05, "epoch": 18.145285935085006, "percentage": 90.73, "elapsed_time": "1:08:43", "remaining_time": "0:07:01", "throughput": 1916.81, "total_tokens": 7904272}
4715
+ {"current_steps": 23485, "total_steps": 25880, "loss": 0.2736, "lr": 2.5883128909320873e-05, "epoch": 18.149149922720248, "percentage": 90.75, "elapsed_time": "1:08:44", "remaining_time": "0:07:00", "throughput": 1916.87, "total_tokens": 7906224}
4716
+ {"current_steps": 23490, "total_steps": 25880, "loss": 0.2717, "lr": 2.5776152133763552e-05, "epoch": 18.153013910355487, "percentage": 90.77, "elapsed_time": "1:08:45", "remaining_time": "0:06:59", "throughput": 1916.88, "total_tokens": 7907824}
4717
+ {"current_steps": 23495, "total_steps": 25880, "loss": 0.244, "lr": 2.5669391038052214e-05, "epoch": 18.156877897990725, "percentage": 90.78, "elapsed_time": "1:08:46", "remaining_time": "0:06:58", "throughput": 1916.88, "total_tokens": 7909360}
4718
+ {"current_steps": 23500, "total_steps": 25880, "loss": 0.2303, "lr": 2.556284567074252e-05, "epoch": 18.160741885625967, "percentage": 90.8, "elapsed_time": "1:08:47", "remaining_time": "0:06:57", "throughput": 1916.88, "total_tokens": 7910992}
4719
+ {"current_steps": 23505, "total_steps": 25880, "loss": 0.3572, "lr": 2.5456516080291815e-05, "epoch": 18.164605873261205, "percentage": 90.82, "elapsed_time": "1:08:47", "remaining_time": "0:06:57", "throughput": 1916.92, "total_tokens": 7912880}
4720
+ {"current_steps": 23510, "total_steps": 25880, "loss": 0.2345, "lr": 2.5350402315059586e-05, "epoch": 18.168469860896444, "percentage": 90.84, "elapsed_time": "1:08:48", "remaining_time": "0:06:56", "throughput": 1916.96, "total_tokens": 7914768}
4721
+ {"current_steps": 23515, "total_steps": 25880, "loss": 0.2661, "lr": 2.5244504423306948e-05, "epoch": 18.172333848531686, "percentage": 90.86, "elapsed_time": "1:08:49", "remaining_time": "0:06:55", "throughput": 1916.96, "total_tokens": 7916304}
4722
+ {"current_steps": 23520, "total_steps": 25880, "loss": 0.2206, "lr": 2.5138822453196875e-05, "epoch": 18.176197836166924, "percentage": 90.88, "elapsed_time": "1:08:50", "remaining_time": "0:06:54", "throughput": 1916.99, "total_tokens": 7917968}
4723
+ {"current_steps": 23525, "total_steps": 25880, "loss": 0.2567, "lr": 2.5033356452794255e-05, "epoch": 18.180061823802163, "percentage": 90.9, "elapsed_time": "1:08:51", "remaining_time": "0:06:53", "throughput": 1916.99, "total_tokens": 7919600}
4724
+ {"current_steps": 23530, "total_steps": 25880, "loss": 0.2837, "lr": 2.4928106470065548e-05, "epoch": 18.183925811437405, "percentage": 90.92, "elapsed_time": "1:08:52", "remaining_time": "0:06:52", "throughput": 1917.03, "total_tokens": 7921424}
4725
+ {"current_steps": 23535, "total_steps": 25880, "loss": 0.219, "lr": 2.4823072552879133e-05, "epoch": 18.187789799072643, "percentage": 90.94, "elapsed_time": "1:08:52", "remaining_time": "0:06:51", "throughput": 1917.06, "total_tokens": 7923120}
4726
+ {"current_steps": 23540, "total_steps": 25880, "loss": 0.2721, "lr": 2.4718254749004965e-05, "epoch": 18.19165378670788, "percentage": 90.96, "elapsed_time": "1:08:53", "remaining_time": "0:06:50", "throughput": 1917.03, "total_tokens": 7924464}
4727
+ {"current_steps": 23545, "total_steps": 25880, "loss": 0.2483, "lr": 2.4613653106114908e-05, "epoch": 18.195517774343124, "percentage": 90.98, "elapsed_time": "1:08:54", "remaining_time": "0:06:50", "throughput": 1917.07, "total_tokens": 7926320}
4728
+ {"current_steps": 23550, "total_steps": 25880, "loss": 0.2903, "lr": 2.450926767178241e-05, "epoch": 18.199381761978362, "percentage": 91.0, "elapsed_time": "1:08:55", "remaining_time": "0:06:49", "throughput": 1917.12, "total_tokens": 7928240}
4729
+ {"current_steps": 23555, "total_steps": 25880, "loss": 0.2826, "lr": 2.4405098493482604e-05, "epoch": 18.2032457496136, "percentage": 91.02, "elapsed_time": "1:08:56", "remaining_time": "0:06:48", "throughput": 1917.15, "total_tokens": 7929968}
4730
+ {"current_steps": 23560, "total_steps": 25880, "loss": 0.2889, "lr": 2.4301145618592203e-05, "epoch": 18.207109737248842, "percentage": 91.04, "elapsed_time": "1:08:57", "remaining_time": "0:06:47", "throughput": 1917.17, "total_tokens": 7931664}
4731
+ {"current_steps": 23565, "total_steps": 25880, "loss": 0.2802, "lr": 2.4197409094389666e-05, "epoch": 18.21097372488408, "percentage": 91.05, "elapsed_time": "1:08:58", "remaining_time": "0:06:46", "throughput": 1917.18, "total_tokens": 7933328}
4732
+ {"current_steps": 23570, "total_steps": 25880, "loss": 0.2046, "lr": 2.409388896805492e-05, "epoch": 18.21483771251932, "percentage": 91.07, "elapsed_time": "1:08:58", "remaining_time": "0:06:45", "throughput": 1917.18, "total_tokens": 7934928}
4733
+ {"current_steps": 23575, "total_steps": 25880, "loss": 0.3779, "lr": 2.3990585286669586e-05, "epoch": 18.21870170015456, "percentage": 91.09, "elapsed_time": "1:08:59", "remaining_time": "0:06:44", "throughput": 1917.19, "total_tokens": 7936496}
4734
+ {"current_steps": 23580, "total_steps": 25880, "loss": 0.2552, "lr": 2.3887498097216688e-05, "epoch": 18.2225656877898, "percentage": 91.11, "elapsed_time": "1:09:00", "remaining_time": "0:06:43", "throughput": 1917.21, "total_tokens": 7938224}
4735
+ {"current_steps": 23585, "total_steps": 25880, "loss": 0.2755, "lr": 2.3784627446580998e-05, "epoch": 18.226429675425038, "percentage": 91.13, "elapsed_time": "1:09:01", "remaining_time": "0:06:42", "throughput": 1917.23, "total_tokens": 7939792}
4736
+ {"current_steps": 23590, "total_steps": 25880, "loss": 0.2962, "lr": 2.3681973381548706e-05, "epoch": 18.230293663060277, "percentage": 91.15, "elapsed_time": "1:09:02", "remaining_time": "0:06:42", "throughput": 1917.27, "total_tokens": 7941584}
4737
+ {"current_steps": 23595, "total_steps": 25880, "loss": 0.2347, "lr": 2.357953594880752e-05, "epoch": 18.23415765069552, "percentage": 91.17, "elapsed_time": "1:09:03", "remaining_time": "0:06:41", "throughput": 1917.31, "total_tokens": 7943440}
4738
+ {"current_steps": 23600, "total_steps": 25880, "loss": 0.2945, "lr": 2.3477315194946558e-05, "epoch": 18.238021638330757, "percentage": 91.19, "elapsed_time": "1:09:03", "remaining_time": "0:06:40", "throughput": 1917.34, "total_tokens": 7945136}
4739
+ {"current_steps": 23605, "total_steps": 25880, "loss": 0.1906, "lr": 2.3375311166456414e-05, "epoch": 18.241885625965995, "percentage": 91.21, "elapsed_time": "1:09:04", "remaining_time": "0:06:39", "throughput": 1917.37, "total_tokens": 7946832}
4740
+ {"current_steps": 23610, "total_steps": 25880, "loss": 0.2596, "lr": 2.3273523909729143e-05, "epoch": 18.245749613601237, "percentage": 91.23, "elapsed_time": "1:09:05", "remaining_time": "0:06:38", "throughput": 1917.43, "total_tokens": 7948688}
4741
+ {"current_steps": 23615, "total_steps": 25880, "loss": 0.2728, "lr": 2.317195347105816e-05, "epoch": 18.249613601236476, "percentage": 91.25, "elapsed_time": "1:09:06", "remaining_time": "0:06:37", "throughput": 1917.45, "total_tokens": 7950384}
4742
+ {"current_steps": 23620, "total_steps": 25880, "loss": 0.2214, "lr": 2.30705998966384e-05, "epoch": 18.253477588871714, "percentage": 91.27, "elapsed_time": "1:09:07", "remaining_time": "0:06:36", "throughput": 1917.45, "total_tokens": 7952016}
4743
+ {"current_steps": 23625, "total_steps": 25880, "loss": 0.2934, "lr": 2.2969463232565936e-05, "epoch": 18.257341576506956, "percentage": 91.29, "elapsed_time": "1:09:07", "remaining_time": "0:06:35", "throughput": 1917.47, "total_tokens": 7953648}
4744
+ {"current_steps": 23630, "total_steps": 25880, "loss": 0.2041, "lr": 2.286854352483847e-05, "epoch": 18.261205564142195, "percentage": 91.31, "elapsed_time": "1:09:08", "remaining_time": "0:06:35", "throughput": 1917.5, "total_tokens": 7955344}
4745
+ {"current_steps": 23635, "total_steps": 25880, "loss": 0.23, "lr": 2.27678408193549e-05, "epoch": 18.265069551777433, "percentage": 91.33, "elapsed_time": "1:09:09", "remaining_time": "0:06:34", "throughput": 1917.56, "total_tokens": 7957232}
4746
+ {"current_steps": 23640, "total_steps": 25880, "loss": 0.2448, "lr": 2.2667355161915314e-05, "epoch": 18.268933539412675, "percentage": 91.34, "elapsed_time": "1:09:10", "remaining_time": "0:06:33", "throughput": 1917.59, "total_tokens": 7958960}
4747
+ {"current_steps": 23645, "total_steps": 25880, "loss": 0.2478, "lr": 2.2567086598221266e-05, "epoch": 18.272797527047913, "percentage": 91.36, "elapsed_time": "1:09:11", "remaining_time": "0:06:32", "throughput": 1917.59, "total_tokens": 7960496}
4748
+ {"current_steps": 23650, "total_steps": 25880, "loss": 0.293, "lr": 2.2467035173875328e-05, "epoch": 18.276661514683152, "percentage": 91.38, "elapsed_time": "1:09:12", "remaining_time": "0:06:31", "throughput": 1917.62, "total_tokens": 7962320}
4749
+ {"current_steps": 23655, "total_steps": 25880, "loss": 0.2094, "lr": 2.236720093438177e-05, "epoch": 18.280525502318394, "percentage": 91.4, "elapsed_time": "1:09:12", "remaining_time": "0:06:30", "throughput": 1917.63, "total_tokens": 7963888}
4750
+ {"current_steps": 23660, "total_steps": 25880, "loss": 0.196, "lr": 2.2267583925145608e-05, "epoch": 18.284389489953632, "percentage": 91.42, "elapsed_time": "1:09:13", "remaining_time": "0:06:29", "throughput": 1917.67, "total_tokens": 7965744}
4751
+ {"current_steps": 23665, "total_steps": 25880, "loss": 0.3713, "lr": 2.2168184191473317e-05, "epoch": 18.28825347758887, "percentage": 91.44, "elapsed_time": "1:09:14", "remaining_time": "0:06:28", "throughput": 1917.69, "total_tokens": 7967312}
4752
+ {"current_steps": 23670, "total_steps": 25880, "loss": 0.2304, "lr": 2.2069001778572462e-05, "epoch": 18.292117465224113, "percentage": 91.46, "elapsed_time": "1:09:15", "remaining_time": "0:06:27", "throughput": 1917.71, "total_tokens": 7968976}
4753
+ {"current_steps": 23675, "total_steps": 25880, "loss": 0.2232, "lr": 2.1970036731551846e-05, "epoch": 18.29598145285935, "percentage": 91.48, "elapsed_time": "1:09:16", "remaining_time": "0:06:27", "throughput": 1917.72, "total_tokens": 7970704}
4754
+ {"current_steps": 23680, "total_steps": 25880, "loss": 0.2129, "lr": 2.18712890954213e-05, "epoch": 18.29984544049459, "percentage": 91.5, "elapsed_time": "1:09:17", "remaining_time": "0:06:26", "throughput": 1917.73, "total_tokens": 7972272}
4755
+ {"current_steps": 23685, "total_steps": 25880, "loss": 0.2477, "lr": 2.177275891509184e-05, "epoch": 18.30370942812983, "percentage": 91.52, "elapsed_time": "1:09:17", "remaining_time": "0:06:25", "throughput": 1917.75, "total_tokens": 7973968}
4756
+ {"current_steps": 23690, "total_steps": 25880, "loss": 0.2823, "lr": 2.1674446235375677e-05, "epoch": 18.30757341576507, "percentage": 91.54, "elapsed_time": "1:09:18", "remaining_time": "0:06:24", "throughput": 1917.75, "total_tokens": 7975504}
4757
+ {"current_steps": 23695, "total_steps": 25880, "loss": 0.2836, "lr": 2.1576351100985936e-05, "epoch": 18.31143740340031, "percentage": 91.56, "elapsed_time": "1:09:19", "remaining_time": "0:06:23", "throughput": 1917.73, "total_tokens": 7976944}
4758
+ {"current_steps": 23700, "total_steps": 25880, "loss": 0.2617, "lr": 2.147847355653687e-05, "epoch": 18.315301391035547, "percentage": 91.58, "elapsed_time": "1:09:20", "remaining_time": "0:06:22", "throughput": 1917.74, "total_tokens": 7978544}
4759
+ {"current_steps": 23705, "total_steps": 25880, "loss": 0.276, "lr": 2.138081364654382e-05, "epoch": 18.31916537867079, "percentage": 91.6, "elapsed_time": "1:09:21", "remaining_time": "0:06:21", "throughput": 1917.78, "total_tokens": 7980336}
4760
+ {"current_steps": 23710, "total_steps": 25880, "loss": 0.2712, "lr": 2.128337141542297e-05, "epoch": 18.323029366306027, "percentage": 91.62, "elapsed_time": "1:09:22", "remaining_time": "0:06:20", "throughput": 1917.81, "total_tokens": 7982000}
4761
+ {"current_steps": 23715, "total_steps": 25880, "loss": 0.2718, "lr": 2.118614690749182e-05, "epoch": 18.326893353941266, "percentage": 91.63, "elapsed_time": "1:09:22", "remaining_time": "0:06:20", "throughput": 1917.84, "total_tokens": 7983728}
4762
+ {"current_steps": 23720, "total_steps": 25880, "loss": 0.1964, "lr": 2.1089140166968446e-05, "epoch": 18.330757341576508, "percentage": 91.65, "elapsed_time": "1:09:23", "remaining_time": "0:06:19", "throughput": 1917.86, "total_tokens": 7985424}
4763
+ {"current_steps": 23725, "total_steps": 25880, "loss": 0.2296, "lr": 2.099235123797233e-05, "epoch": 18.334621329211746, "percentage": 91.67, "elapsed_time": "1:09:24", "remaining_time": "0:06:18", "throughput": 1917.85, "total_tokens": 7986864}
4764
+ {"current_steps": 23730, "total_steps": 25880, "loss": 0.2679, "lr": 2.089578016452348e-05, "epoch": 18.338485316846985, "percentage": 91.69, "elapsed_time": "1:09:25", "remaining_time": "0:06:17", "throughput": 1917.86, "total_tokens": 7988496}
4765
+ {"current_steps": 23735, "total_steps": 25880, "loss": 0.3867, "lr": 2.07994269905431e-05, "epoch": 18.342349304482227, "percentage": 91.71, "elapsed_time": "1:09:26", "remaining_time": "0:06:16", "throughput": 1917.85, "total_tokens": 7989936}
4766
+ {"current_steps": 23740, "total_steps": 25880, "loss": 0.2949, "lr": 2.070329175985314e-05, "epoch": 18.346213292117465, "percentage": 91.73, "elapsed_time": "1:09:26", "remaining_time": "0:06:15", "throughput": 1917.86, "total_tokens": 7991504}
4767
+ {"current_steps": 23745, "total_steps": 25880, "loss": 0.2146, "lr": 2.0607374516176448e-05, "epoch": 18.350077279752703, "percentage": 91.75, "elapsed_time": "1:09:27", "remaining_time": "0:06:14", "throughput": 1917.88, "total_tokens": 7993168}
4768
+ {"current_steps": 23750, "total_steps": 25880, "loss": 0.2846, "lr": 2.0511675303136745e-05, "epoch": 18.353941267387945, "percentage": 91.77, "elapsed_time": "1:09:28", "remaining_time": "0:06:13", "throughput": 1917.89, "total_tokens": 7994832}
4769
+ {"current_steps": 23755, "total_steps": 25880, "loss": 0.3446, "lr": 2.0416194164258662e-05, "epoch": 18.357805255023184, "percentage": 91.79, "elapsed_time": "1:09:29", "remaining_time": "0:06:12", "throughput": 1917.9, "total_tokens": 7996432}
4770
+ {"current_steps": 23760, "total_steps": 25880, "loss": 0.2925, "lr": 2.0320931142967624e-05, "epoch": 18.361669242658422, "percentage": 91.81, "elapsed_time": "1:09:30", "remaining_time": "0:06:12", "throughput": 1917.96, "total_tokens": 7998448}
4771
+ {"current_steps": 23765, "total_steps": 25880, "loss": 0.1803, "lr": 2.0225886282589813e-05, "epoch": 18.365533230293664, "percentage": 91.83, "elapsed_time": "1:09:31", "remaining_time": "0:06:11", "throughput": 1917.98, "total_tokens": 8000080}
4772
+ {"current_steps": 23770, "total_steps": 25880, "loss": 0.2326, "lr": 2.01310596263522e-05, "epoch": 18.369397217928903, "percentage": 91.85, "elapsed_time": "1:09:31", "remaining_time": "0:06:10", "throughput": 1918.0, "total_tokens": 8001648}
4773
+ {"current_steps": 23775, "total_steps": 25880, "loss": 0.3382, "lr": 2.0036451217382457e-05, "epoch": 18.37326120556414, "percentage": 91.87, "elapsed_time": "1:09:32", "remaining_time": "0:06:09", "throughput": 1917.97, "total_tokens": 8002992}
4774
+ {"current_steps": 23780, "total_steps": 25880, "loss": 0.2367, "lr": 1.9942061098709107e-05, "epoch": 18.377125193199383, "percentage": 91.89, "elapsed_time": "1:09:33", "remaining_time": "0:06:08", "throughput": 1918.03, "total_tokens": 8004912}
4775
+ {"current_steps": 23785, "total_steps": 25880, "loss": 0.1895, "lr": 1.9847889313261258e-05, "epoch": 18.38098918083462, "percentage": 91.9, "elapsed_time": "1:09:34", "remaining_time": "0:06:07", "throughput": 1918.03, "total_tokens": 8006384}
4776
+ {"current_steps": 23790, "total_steps": 25880, "loss": 0.2365, "lr": 1.9753935903868868e-05, "epoch": 18.38485316846986, "percentage": 91.92, "elapsed_time": "1:09:35", "remaining_time": "0:06:06", "throughput": 1918.1, "total_tokens": 8008464}
4777
+ {"current_steps": 23795, "total_steps": 25880, "loss": 0.2336, "lr": 1.9660200913262537e-05, "epoch": 18.388717156105102, "percentage": 91.94, "elapsed_time": "1:09:36", "remaining_time": "0:06:05", "throughput": 1918.13, "total_tokens": 8010224}
4778
+ {"current_steps": 23800, "total_steps": 25880, "loss": 0.2522, "lr": 1.956668438407355e-05, "epoch": 18.39258114374034, "percentage": 91.96, "elapsed_time": "1:09:36", "remaining_time": "0:06:05", "throughput": 1918.15, "total_tokens": 8011856}
4779
+ {"current_steps": 23805, "total_steps": 25880, "loss": 0.2958, "lr": 1.9473386358833667e-05, "epoch": 18.39644513137558, "percentage": 91.98, "elapsed_time": "1:09:37", "remaining_time": "0:06:04", "throughput": 1918.15, "total_tokens": 8013392}
4780
+ {"current_steps": 23810, "total_steps": 25880, "loss": 0.2741, "lr": 1.9380306879975385e-05, "epoch": 18.40030911901082, "percentage": 92.0, "elapsed_time": "1:09:38", "remaining_time": "0:06:03", "throughput": 1918.14, "total_tokens": 8014832}
4781
+ {"current_steps": 23815, "total_steps": 25880, "loss": 0.2698, "lr": 1.9287445989831898e-05, "epoch": 18.40417310664606, "percentage": 92.02, "elapsed_time": "1:09:39", "remaining_time": "0:06:02", "throughput": 1918.1, "total_tokens": 8016784}
4782
+ {"current_steps": 23820, "total_steps": 25880, "loss": 0.2358, "lr": 1.9194803730636813e-05, "epoch": 18.408037094281298, "percentage": 92.04, "elapsed_time": "1:09:40", "remaining_time": "0:06:01", "throughput": 1918.12, "total_tokens": 8018480}
4783
+ {"current_steps": 23825, "total_steps": 25880, "loss": 0.3073, "lr": 1.910238014452442e-05, "epoch": 18.41190108191654, "percentage": 92.06, "elapsed_time": "1:09:41", "remaining_time": "0:06:00", "throughput": 1918.16, "total_tokens": 8020272}
4784
+ {"current_steps": 23830, "total_steps": 25880, "loss": 0.2461, "lr": 1.9010175273529495e-05, "epoch": 18.415765069551778, "percentage": 92.08, "elapsed_time": "1:09:42", "remaining_time": "0:05:59", "throughput": 1918.16, "total_tokens": 8021808}
4785
+ {"current_steps": 23835, "total_steps": 25880, "loss": 0.2893, "lr": 1.891818915958743e-05, "epoch": 18.419629057187016, "percentage": 92.1, "elapsed_time": "1:09:42", "remaining_time": "0:05:58", "throughput": 1918.18, "total_tokens": 8023504}
4786
+ {"current_steps": 23840, "total_steps": 25880, "loss": 0.2227, "lr": 1.8826421844534035e-05, "epoch": 18.423493044822255, "percentage": 92.12, "elapsed_time": "1:09:43", "remaining_time": "0:05:57", "throughput": 1918.17, "total_tokens": 8024944}
4787
+ {"current_steps": 23845, "total_steps": 25880, "loss": 0.2552, "lr": 1.8734873370105654e-05, "epoch": 18.427357032457497, "percentage": 92.14, "elapsed_time": "1:09:44", "remaining_time": "0:05:57", "throughput": 1918.2, "total_tokens": 8026640}
4788
+ {"current_steps": 23850, "total_steps": 25880, "loss": 0.295, "lr": 1.8643543777938975e-05, "epoch": 18.431221020092735, "percentage": 92.16, "elapsed_time": "1:09:45", "remaining_time": "0:05:56", "throughput": 1918.24, "total_tokens": 8028432}
4789
+ {"current_steps": 23855, "total_steps": 25880, "loss": 0.379, "lr": 1.855243310957133e-05, "epoch": 18.435085007727974, "percentage": 92.18, "elapsed_time": "1:09:46", "remaining_time": "0:05:55", "throughput": 1918.25, "total_tokens": 8030032}
4790
+ {"current_steps": 23860, "total_steps": 25880, "loss": 0.223, "lr": 1.8461541406440406e-05, "epoch": 18.438948995363216, "percentage": 92.19, "elapsed_time": "1:09:46", "remaining_time": "0:05:54", "throughput": 1918.26, "total_tokens": 8031760}
4791
+ {"current_steps": 23865, "total_steps": 25880, "loss": 0.2547, "lr": 1.837086870988425e-05, "epoch": 18.442812982998454, "percentage": 92.21, "elapsed_time": "1:09:47", "remaining_time": "0:05:53", "throughput": 1918.25, "total_tokens": 8033200}
4792
+ {"current_steps": 23870, "total_steps": 25880, "loss": 0.3132, "lr": 1.8280415061141365e-05, "epoch": 18.446676970633693, "percentage": 92.23, "elapsed_time": "1:09:48", "remaining_time": "0:05:52", "throughput": 1918.32, "total_tokens": 8035120}
4793
+ {"current_steps": 23875, "total_steps": 25880, "loss": 0.2988, "lr": 1.819018050135063e-05, "epoch": 18.450540958268935, "percentage": 92.25, "elapsed_time": "1:09:49", "remaining_time": "0:05:51", "throughput": 1918.35, "total_tokens": 8036848}
4794
+ {"current_steps": 23880, "total_steps": 25880, "loss": 0.2691, "lr": 1.8100165071551266e-05, "epoch": 18.454404945904173, "percentage": 92.27, "elapsed_time": "1:09:50", "remaining_time": "0:05:50", "throughput": 1918.39, "total_tokens": 8038576}
4795
+ {"current_steps": 23885, "total_steps": 25880, "loss": 0.2987, "lr": 1.801036881268281e-05, "epoch": 18.45826893353941, "percentage": 92.29, "elapsed_time": "1:09:51", "remaining_time": "0:05:50", "throughput": 1918.39, "total_tokens": 8040176}
4796
+ {"current_steps": 23890, "total_steps": 25880, "loss": 0.2719, "lr": 1.7920791765585086e-05, "epoch": 18.462132921174653, "percentage": 92.31, "elapsed_time": "1:09:51", "remaining_time": "0:05:49", "throughput": 1918.42, "total_tokens": 8041936}
4797
+ {"current_steps": 23895, "total_steps": 25880, "loss": 0.2382, "lr": 1.78314339709984e-05, "epoch": 18.465996908809892, "percentage": 92.33, "elapsed_time": "1:09:52", "remaining_time": "0:05:48", "throughput": 1918.45, "total_tokens": 8043632}
4798
+ {"current_steps": 23900, "total_steps": 25880, "loss": 0.2216, "lr": 1.7742295469563076e-05, "epoch": 18.46986089644513, "percentage": 92.35, "elapsed_time": "1:09:53", "remaining_time": "0:05:47", "throughput": 1918.5, "total_tokens": 8045456}
4799
+ {"current_steps": 23905, "total_steps": 25880, "loss": 0.2981, "lr": 1.7653376301819957e-05, "epoch": 18.473724884080372, "percentage": 92.37, "elapsed_time": "1:09:54", "remaining_time": "0:05:46", "throughput": 1918.53, "total_tokens": 8047216}
4800
+ {"current_steps": 23910, "total_steps": 25880, "loss": 0.2909, "lr": 1.7564676508209864e-05, "epoch": 18.47758887171561, "percentage": 92.39, "elapsed_time": "1:09:55", "remaining_time": "0:05:45", "throughput": 1918.56, "total_tokens": 8048944}
4801
+ {"current_steps": 23915, "total_steps": 25880, "loss": 0.3721, "lr": 1.7476196129074197e-05, "epoch": 18.48145285935085, "percentage": 92.41, "elapsed_time": "1:09:56", "remaining_time": "0:05:44", "throughput": 1918.56, "total_tokens": 8050512}
4802
+ {"current_steps": 23920, "total_steps": 25880, "loss": 0.2337, "lr": 1.7387935204654204e-05, "epoch": 18.48531684698609, "percentage": 92.43, "elapsed_time": "1:09:57", "remaining_time": "0:05:43", "throughput": 1918.58, "total_tokens": 8052304}
4803
+ {"current_steps": 23925, "total_steps": 25880, "loss": 0.319, "lr": 1.7299893775091503e-05, "epoch": 18.48918083462133, "percentage": 92.45, "elapsed_time": "1:09:57", "remaining_time": "0:05:43", "throughput": 1918.61, "total_tokens": 8054032}
4804
+ {"current_steps": 23930, "total_steps": 25880, "loss": 0.3652, "lr": 1.7212071880427948e-05, "epoch": 18.493044822256568, "percentage": 92.47, "elapsed_time": "1:09:58", "remaining_time": "0:05:42", "throughput": 1918.62, "total_tokens": 8055568}
4805
+ {"current_steps": 23935, "total_steps": 25880, "loss": 0.263, "lr": 1.7124469560605372e-05, "epoch": 18.49690880989181, "percentage": 92.48, "elapsed_time": "1:09:59", "remaining_time": "0:05:41", "throughput": 1918.64, "total_tokens": 8057136}
4806
+ {"current_steps": 23940, "total_steps": 25880, "loss": 0.2054, "lr": 1.70370868554659e-05, "epoch": 18.50077279752705, "percentage": 92.5, "elapsed_time": "1:10:00", "remaining_time": "0:05:40", "throughput": 1918.65, "total_tokens": 8058800}
4807
+ {"current_steps": 23945, "total_steps": 25880, "loss": 0.2559, "lr": 1.6949923804751632e-05, "epoch": 18.504636785162287, "percentage": 92.52, "elapsed_time": "1:10:01", "remaining_time": "0:05:39", "throughput": 1918.67, "total_tokens": 8060528}
4808
+ {"current_steps": 23950, "total_steps": 25880, "loss": 0.3333, "lr": 1.6862980448104915e-05, "epoch": 18.508500772797525, "percentage": 92.54, "elapsed_time": "1:10:01", "remaining_time": "0:05:38", "throughput": 1918.71, "total_tokens": 8062416}
4809
+ {"current_steps": 23955, "total_steps": 25880, "loss": 0.2439, "lr": 1.677625682506806e-05, "epoch": 18.512364760432767, "percentage": 92.56, "elapsed_time": "1:10:02", "remaining_time": "0:05:37", "throughput": 1918.75, "total_tokens": 8064272}
4810
+ {"current_steps": 23960, "total_steps": 25880, "loss": 0.261, "lr": 1.6689752975083515e-05, "epoch": 18.516228748068006, "percentage": 92.58, "elapsed_time": "1:10:03", "remaining_time": "0:05:36", "throughput": 1918.79, "total_tokens": 8066096}
4811
+ {"current_steps": 23965, "total_steps": 25880, "loss": 0.2255, "lr": 1.660346893749376e-05, "epoch": 18.520092735703244, "percentage": 92.6, "elapsed_time": "1:10:04", "remaining_time": "0:05:35", "throughput": 1918.79, "total_tokens": 8067600}
4812
+ {"current_steps": 23970, "total_steps": 25880, "loss": 0.2532, "lr": 1.651740475154129e-05, "epoch": 18.523956723338486, "percentage": 92.62, "elapsed_time": "1:10:05", "remaining_time": "0:05:35", "throughput": 1918.84, "total_tokens": 8069488}
4813
+ {"current_steps": 23975, "total_steps": 25880, "loss": 0.3076, "lr": 1.643156045636862e-05, "epoch": 18.527820710973725, "percentage": 92.64, "elapsed_time": "1:10:06", "remaining_time": "0:05:34", "throughput": 1918.84, "total_tokens": 8071024}
4814
+ {"current_steps": 23980, "total_steps": 25880, "loss": 0.2726, "lr": 1.6345936091018255e-05, "epoch": 18.531684698608963, "percentage": 92.66, "elapsed_time": "1:10:06", "remaining_time": "0:05:33", "throughput": 1918.84, "total_tokens": 8072528}
4815
+ {"current_steps": 23985, "total_steps": 25880, "loss": 0.233, "lr": 1.626053169443259e-05, "epoch": 18.535548686244205, "percentage": 92.68, "elapsed_time": "1:10:07", "remaining_time": "0:05:32", "throughput": 1918.83, "total_tokens": 8074000}
4816
+ {"current_steps": 23990, "total_steps": 25880, "loss": 0.2821, "lr": 1.6175347305454114e-05, "epoch": 18.539412673879443, "percentage": 92.7, "elapsed_time": "1:10:08", "remaining_time": "0:05:31", "throughput": 1918.84, "total_tokens": 8075632}
4817
+ {"current_steps": 23995, "total_steps": 25880, "loss": 0.4037, "lr": 1.609038296282517e-05, "epoch": 18.543276661514682, "percentage": 92.72, "elapsed_time": "1:10:09", "remaining_time": "0:05:30", "throughput": 1918.89, "total_tokens": 8077584}
4818
+ {"current_steps": 24000, "total_steps": 25880, "loss": 0.2556, "lr": 1.600563870518812e-05, "epoch": 18.547140649149924, "percentage": 92.74, "elapsed_time": "1:10:10", "remaining_time": "0:05:29", "throughput": 1918.91, "total_tokens": 8079280}
4819
+ {"current_steps": 24005, "total_steps": 25880, "loss": 0.2173, "lr": 1.5921114571085138e-05, "epoch": 18.551004636785162, "percentage": 92.76, "elapsed_time": "1:10:11", "remaining_time": "0:05:28", "throughput": 1918.92, "total_tokens": 8080848}
4820
+ {"current_steps": 24010, "total_steps": 25880, "loss": 0.3448, "lr": 1.5836810598958252e-05, "epoch": 18.5548686244204, "percentage": 92.77, "elapsed_time": "1:10:11", "remaining_time": "0:05:28", "throughput": 1918.93, "total_tokens": 8082512}
4821
+ {"current_steps": 24015, "total_steps": 25880, "loss": 0.3326, "lr": 1.5752726827149456e-05, "epoch": 18.558732612055643, "percentage": 92.79, "elapsed_time": "1:10:12", "remaining_time": "0:05:27", "throughput": 1918.94, "total_tokens": 8084112}
4822
+ {"current_steps": 24020, "total_steps": 25880, "loss": 0.2794, "lr": 1.5668863293900548e-05, "epoch": 18.56259659969088, "percentage": 92.81, "elapsed_time": "1:10:13", "remaining_time": "0:05:26", "throughput": 1918.95, "total_tokens": 8085616}
4823
+ {"current_steps": 24025, "total_steps": 25880, "loss": 0.3018, "lr": 1.5585220037353075e-05, "epoch": 18.56646058732612, "percentage": 92.83, "elapsed_time": "1:10:14", "remaining_time": "0:05:25", "throughput": 1919.01, "total_tokens": 8087568}
4824
+ {"current_steps": 24030, "total_steps": 25880, "loss": 0.294, "lr": 1.550179709554861e-05, "epoch": 18.57032457496136, "percentage": 92.85, "elapsed_time": "1:10:15", "remaining_time": "0:05:24", "throughput": 1919.03, "total_tokens": 8089296}
4825
+ {"current_steps": 24035, "total_steps": 25880, "loss": 0.3392, "lr": 1.5418594506428462e-05, "epoch": 18.5741885625966, "percentage": 92.87, "elapsed_time": "1:10:16", "remaining_time": "0:05:23", "throughput": 1919.07, "total_tokens": 8091120}
4826
+ {"current_steps": 24040, "total_steps": 25880, "loss": 0.3202, "lr": 1.5335612307833534e-05, "epoch": 18.57805255023184, "percentage": 92.89, "elapsed_time": "1:10:16", "remaining_time": "0:05:22", "throughput": 1919.06, "total_tokens": 8092624}
4827
+ {"current_steps": 24045, "total_steps": 25880, "loss": 0.2901, "lr": 1.5252850537504637e-05, "epoch": 18.58191653786708, "percentage": 92.91, "elapsed_time": "1:10:17", "remaining_time": "0:05:21", "throughput": 1919.06, "total_tokens": 8094224}
4828
+ {"current_steps": 24050, "total_steps": 25880, "loss": 0.2602, "lr": 1.517030923308238e-05, "epoch": 18.58578052550232, "percentage": 92.93, "elapsed_time": "1:10:18", "remaining_time": "0:05:21", "throughput": 1919.08, "total_tokens": 8095920}
4829
+ {"current_steps": 24055, "total_steps": 25880, "loss": 0.2613, "lr": 1.5087988432107069e-05, "epoch": 18.589644513137557, "percentage": 92.95, "elapsed_time": "1:10:19", "remaining_time": "0:05:20", "throughput": 1919.11, "total_tokens": 8097648}
4830
+ {"current_steps": 24060, "total_steps": 25880, "loss": 0.2506, "lr": 1.5005888172018534e-05, "epoch": 18.5935085007728, "percentage": 92.97, "elapsed_time": "1:10:20", "remaining_time": "0:05:19", "throughput": 1919.14, "total_tokens": 8099440}
4831
+ {"current_steps": 24065, "total_steps": 25880, "loss": 0.2348, "lr": 1.4924008490156626e-05, "epoch": 18.597372488408038, "percentage": 92.99, "elapsed_time": "1:10:21", "remaining_time": "0:05:18", "throughput": 1919.18, "total_tokens": 8101232}
4832
+ {"current_steps": 24070, "total_steps": 25880, "loss": 0.2705, "lr": 1.4842349423760615e-05, "epoch": 18.601236476043276, "percentage": 93.01, "elapsed_time": "1:10:21", "remaining_time": "0:05:17", "throughput": 1919.16, "total_tokens": 8102640}
4833
+ {"current_steps": 24075, "total_steps": 25880, "loss": 0.2259, "lr": 1.4760911009969625e-05, "epoch": 18.605100463678518, "percentage": 93.03, "elapsed_time": "1:10:22", "remaining_time": "0:05:16", "throughput": 1919.18, "total_tokens": 8104208}
4834
+ {"current_steps": 24080, "total_steps": 25880, "loss": 0.252, "lr": 1.4679693285822305e-05, "epoch": 18.608964451313756, "percentage": 93.04, "elapsed_time": "1:10:23", "remaining_time": "0:05:15", "throughput": 1919.19, "total_tokens": 8105840}
4835
+ {"current_steps": 24085, "total_steps": 25880, "loss": 0.3241, "lr": 1.459869628825694e-05, "epoch": 18.612828438948995, "percentage": 93.06, "elapsed_time": "1:10:24", "remaining_time": "0:05:14", "throughput": 1919.18, "total_tokens": 8107344}
4836
+ {"current_steps": 24090, "total_steps": 25880, "loss": 0.2105, "lr": 1.451792005411151e-05, "epoch": 18.616692426584233, "percentage": 93.08, "elapsed_time": "1:10:25", "remaining_time": "0:05:13", "throughput": 1919.18, "total_tokens": 8108976}
4837
+ {"current_steps": 24095, "total_steps": 25880, "loss": 0.2725, "lr": 1.443736462012335e-05, "epoch": 18.620556414219475, "percentage": 93.1, "elapsed_time": "1:10:26", "remaining_time": "0:05:13", "throughput": 1919.21, "total_tokens": 8110704}
4838
+ {"current_steps": 24100, "total_steps": 25880, "loss": 0.3124, "lr": 1.4357030022929762e-05, "epoch": 18.624420401854714, "percentage": 93.12, "elapsed_time": "1:10:26", "remaining_time": "0:05:12", "throughput": 1919.22, "total_tokens": 8112304}
4839
+ {"current_steps": 24105, "total_steps": 25880, "loss": 0.2153, "lr": 1.4276916299067355e-05, "epoch": 18.628284389489952, "percentage": 93.14, "elapsed_time": "1:10:27", "remaining_time": "0:05:11", "throughput": 1919.25, "total_tokens": 8114064}
4840
+ {"current_steps": 24110, "total_steps": 25880, "loss": 0.1976, "lr": 1.4197023484972205e-05, "epoch": 18.632148377125194, "percentage": 93.16, "elapsed_time": "1:10:28", "remaining_time": "0:05:10", "throughput": 1919.3, "total_tokens": 8115824}
4841
+ {"current_steps": 24115, "total_steps": 25880, "loss": 0.3353, "lr": 1.411735161698019e-05, "epoch": 18.636012364760433, "percentage": 93.18, "elapsed_time": "1:10:29", "remaining_time": "0:05:09", "throughput": 1919.36, "total_tokens": 8117776}
4842
+ {"current_steps": 24120, "total_steps": 25880, "loss": 0.3628, "lr": 1.4037900731326491e-05, "epoch": 18.63987635239567, "percentage": 93.2, "elapsed_time": "1:10:30", "remaining_time": "0:05:08", "throughput": 1919.4, "total_tokens": 8119632}
4843
+ {"current_steps": 24125, "total_steps": 25880, "loss": 0.3236, "lr": 1.3958670864145873e-05, "epoch": 18.643740340030913, "percentage": 93.22, "elapsed_time": "1:10:31", "remaining_time": "0:05:07", "throughput": 1919.41, "total_tokens": 8121200}
4844
+ {"current_steps": 24130, "total_steps": 25880, "loss": 0.223, "lr": 1.3879662051472452e-05, "epoch": 18.64760432766615, "percentage": 93.24, "elapsed_time": "1:10:31", "remaining_time": "0:05:06", "throughput": 1919.43, "total_tokens": 8122896}
4845
+ {"current_steps": 24135, "total_steps": 25880, "loss": 0.3544, "lr": 1.3800874329240042e-05, "epoch": 18.65146831530139, "percentage": 93.26, "elapsed_time": "1:10:32", "remaining_time": "0:05:06", "throughput": 1919.44, "total_tokens": 8124496}
4846
+ {"current_steps": 24140, "total_steps": 25880, "loss": 0.2034, "lr": 1.3722307733281759e-05, "epoch": 18.655332302936632, "percentage": 93.28, "elapsed_time": "1:10:33", "remaining_time": "0:05:05", "throughput": 1919.45, "total_tokens": 8126096}
4847
+ {"current_steps": 24145, "total_steps": 25880, "loss": 0.3106, "lr": 1.3643962299330127e-05, "epoch": 18.65919629057187, "percentage": 93.3, "elapsed_time": "1:10:34", "remaining_time": "0:05:04", "throughput": 1919.48, "total_tokens": 8127824}
4848
+ {"current_steps": 24150, "total_steps": 25880, "loss": 0.3254, "lr": 1.3565838063017032e-05, "epoch": 18.66306027820711, "percentage": 93.32, "elapsed_time": "1:10:35", "remaining_time": "0:05:03", "throughput": 1919.52, "total_tokens": 8129520}
4849
+ {"current_steps": 24155, "total_steps": 25880, "loss": 0.2927, "lr": 1.3487935059873946e-05, "epoch": 18.66692426584235, "percentage": 93.33, "elapsed_time": "1:10:35", "remaining_time": "0:05:02", "throughput": 1919.55, "total_tokens": 8131216}
4850
+ {"current_steps": 24160, "total_steps": 25880, "loss": 0.2989, "lr": 1.3410253325331634e-05, "epoch": 18.67078825347759, "percentage": 93.35, "elapsed_time": "1:10:36", "remaining_time": "0:05:01", "throughput": 1919.56, "total_tokens": 8132880}
4851
+ {"current_steps": 24165, "total_steps": 25880, "loss": 0.3112, "lr": 1.3332792894720169e-05, "epoch": 18.674652241112828, "percentage": 93.37, "elapsed_time": "1:10:37", "remaining_time": "0:05:00", "throughput": 1919.6, "total_tokens": 8134704}
4852
+ {"current_steps": 24170, "total_steps": 25880, "loss": 0.2711, "lr": 1.3255553803269039e-05, "epoch": 18.67851622874807, "percentage": 93.39, "elapsed_time": "1:10:38", "remaining_time": "0:04:59", "throughput": 1919.62, "total_tokens": 8136400}
4853
+ {"current_steps": 24175, "total_steps": 25880, "loss": 0.2338, "lr": 1.317853608610703e-05, "epoch": 18.682380216383308, "percentage": 93.41, "elapsed_time": "1:10:39", "remaining_time": "0:04:58", "throughput": 1919.61, "total_tokens": 8137968}
4854
+ {"current_steps": 24180, "total_steps": 25880, "loss": 0.3172, "lr": 1.310173977826229e-05, "epoch": 18.686244204018546, "percentage": 93.43, "elapsed_time": "1:10:40", "remaining_time": "0:04:58", "throughput": 1919.65, "total_tokens": 8139728}
4855
+ {"current_steps": 24185, "total_steps": 25880, "loss": 0.2687, "lr": 1.3025164914662213e-05, "epoch": 18.69010819165379, "percentage": 93.45, "elapsed_time": "1:10:41", "remaining_time": "0:04:57", "throughput": 1919.64, "total_tokens": 8141232}
4856
+ {"current_steps": 24190, "total_steps": 25880, "loss": 0.2057, "lr": 1.2948811530133441e-05, "epoch": 18.693972179289027, "percentage": 93.47, "elapsed_time": "1:10:41", "remaining_time": "0:04:56", "throughput": 1919.66, "total_tokens": 8142896}
4857
+ {"current_steps": 24195, "total_steps": 25880, "loss": 0.4164, "lr": 1.2872679659402087e-05, "epoch": 18.697836166924265, "percentage": 93.49, "elapsed_time": "1:10:42", "remaining_time": "0:04:55", "throughput": 1919.66, "total_tokens": 8144368}
4858
+ {"current_steps": 24200, "total_steps": 25880, "loss": 0.2435, "lr": 1.2796769337093339e-05, "epoch": 18.701700154559504, "percentage": 93.51, "elapsed_time": "1:10:43", "remaining_time": "0:04:54", "throughput": 1919.69, "total_tokens": 8146064}
4859
+ {"current_steps": 24205, "total_steps": 25880, "loss": 0.2521, "lr": 1.2721080597731526e-05, "epoch": 18.705564142194746, "percentage": 93.53, "elapsed_time": "1:10:44", "remaining_time": "0:04:53", "throughput": 1919.7, "total_tokens": 8147728}
4860
+ {"current_steps": 24210, "total_steps": 25880, "loss": 0.2708, "lr": 1.2645613475740558e-05, "epoch": 18.709428129829984, "percentage": 93.55, "elapsed_time": "1:10:45", "remaining_time": "0:04:52", "throughput": 1919.72, "total_tokens": 8149296}
4861
+ {"current_steps": 24215, "total_steps": 25880, "loss": 0.2108, "lr": 1.2570368005443256e-05, "epoch": 18.713292117465222, "percentage": 93.57, "elapsed_time": "1:10:45", "remaining_time": "0:04:51", "throughput": 1919.72, "total_tokens": 8150928}
4862
+ {"current_steps": 24220, "total_steps": 25880, "loss": 0.2366, "lr": 1.2495344221061632e-05, "epoch": 18.717156105100464, "percentage": 93.59, "elapsed_time": "1:10:46", "remaining_time": "0:04:51", "throughput": 1919.76, "total_tokens": 8152624}
4863
+ {"current_steps": 24225, "total_steps": 25880, "loss": 0.2442, "lr": 1.2420542156717007e-05, "epoch": 18.721020092735703, "percentage": 93.61, "elapsed_time": "1:10:47", "remaining_time": "0:04:50", "throughput": 1919.8, "total_tokens": 8154416}
4864
+ {"current_steps": 24230, "total_steps": 25880, "loss": 0.2642, "lr": 1.2345961846429777e-05, "epoch": 18.72488408037094, "percentage": 93.62, "elapsed_time": "1:10:48", "remaining_time": "0:04:49", "throughput": 1919.82, "total_tokens": 8156208}
4865
+ {"current_steps": 24235, "total_steps": 25880, "loss": 0.3168, "lr": 1.227160332411964e-05, "epoch": 18.728748068006183, "percentage": 93.64, "elapsed_time": "1:10:49", "remaining_time": "0:04:48", "throughput": 1919.86, "total_tokens": 8158000}
4866
+ {"current_steps": 24240, "total_steps": 25880, "loss": 0.2681, "lr": 1.2197466623605102e-05, "epoch": 18.73261205564142, "percentage": 93.66, "elapsed_time": "1:10:50", "remaining_time": "0:04:47", "throughput": 1919.87, "total_tokens": 8159600}
4867
+ {"current_steps": 24245, "total_steps": 25880, "loss": 0.2146, "lr": 1.2123551778604137e-05, "epoch": 18.73647604327666, "percentage": 93.68, "elapsed_time": "1:10:50", "remaining_time": "0:04:46", "throughput": 1919.88, "total_tokens": 8161200}
4868
+ {"current_steps": 24250, "total_steps": 25880, "loss": 0.2201, "lr": 1.2049858822733572e-05, "epoch": 18.740340030911902, "percentage": 93.7, "elapsed_time": "1:10:51", "remaining_time": "0:04:45", "throughput": 1919.93, "total_tokens": 8163088}
4869
+ {"current_steps": 24255, "total_steps": 25880, "loss": 0.3205, "lr": 1.197638778950949e-05, "epoch": 18.74420401854714, "percentage": 93.72, "elapsed_time": "1:10:52", "remaining_time": "0:04:44", "throughput": 1919.94, "total_tokens": 8164688}
4870
+ {"current_steps": 24260, "total_steps": 25880, "loss": 0.2468, "lr": 1.1903138712346828e-05, "epoch": 18.74806800618238, "percentage": 93.74, "elapsed_time": "1:10:53", "remaining_time": "0:04:44", "throughput": 1919.93, "total_tokens": 8166160}
4871
+ {"current_steps": 24265, "total_steps": 25880, "loss": 0.2682, "lr": 1.1830111624559826e-05, "epoch": 18.75193199381762, "percentage": 93.76, "elapsed_time": "1:10:54", "remaining_time": "0:04:43", "throughput": 1919.93, "total_tokens": 8167664}
4872
+ {"current_steps": 24270, "total_steps": 25880, "loss": 0.2501, "lr": 1.1757306559361525e-05, "epoch": 18.75579598145286, "percentage": 93.78, "elapsed_time": "1:10:55", "remaining_time": "0:04:42", "throughput": 1919.98, "total_tokens": 8169584}
4873
+ {"current_steps": 24275, "total_steps": 25880, "loss": 0.2953, "lr": 1.1684723549864217e-05, "epoch": 18.759659969088098, "percentage": 93.8, "elapsed_time": "1:10:55", "remaining_time": "0:04:41", "throughput": 1920.01, "total_tokens": 8171280}
4874
+ {"current_steps": 24280, "total_steps": 25880, "loss": 0.2907, "lr": 1.1612362629079054e-05, "epoch": 18.76352395672334, "percentage": 93.82, "elapsed_time": "1:10:56", "remaining_time": "0:04:40", "throughput": 1920.03, "total_tokens": 8172944}
4875
+ {"current_steps": 24285, "total_steps": 25880, "loss": 0.377, "lr": 1.1540223829916208e-05, "epoch": 18.76738794435858, "percentage": 93.84, "elapsed_time": "1:10:57", "remaining_time": "0:04:39", "throughput": 1920.05, "total_tokens": 8174544}
4876
+ {"current_steps": 24290, "total_steps": 25880, "loss": 0.2388, "lr": 1.1468307185184824e-05, "epoch": 18.771251931993817, "percentage": 93.86, "elapsed_time": "1:10:58", "remaining_time": "0:04:38", "throughput": 1920.08, "total_tokens": 8176304}
4877
+ {"current_steps": 24295, "total_steps": 25880, "loss": 0.2722, "lr": 1.139661272759307e-05, "epoch": 18.77511591962906, "percentage": 93.88, "elapsed_time": "1:10:59", "remaining_time": "0:04:37", "throughput": 1920.08, "total_tokens": 8177840}
4878
+ {"current_steps": 24300, "total_steps": 25880, "loss": 0.2694, "lr": 1.132514048974792e-05, "epoch": 18.778979907264297, "percentage": 93.89, "elapsed_time": "1:10:59", "remaining_time": "0:04:36", "throughput": 1920.09, "total_tokens": 8179504}
4879
+ {"current_steps": 24305, "total_steps": 25880, "loss": 0.2293, "lr": 1.1253890504155428e-05, "epoch": 18.782843894899536, "percentage": 93.91, "elapsed_time": "1:11:00", "remaining_time": "0:04:36", "throughput": 1920.1, "total_tokens": 8181104}
4880
+ {"current_steps": 24310, "total_steps": 25880, "loss": 0.3841, "lr": 1.1182862803220506e-05, "epoch": 18.786707882534778, "percentage": 93.93, "elapsed_time": "1:11:01", "remaining_time": "0:04:35", "throughput": 1920.11, "total_tokens": 8182640}
4881
+ {"current_steps": 24315, "total_steps": 25880, "loss": 0.2784, "lr": 1.1112057419247091e-05, "epoch": 18.790571870170016, "percentage": 93.95, "elapsed_time": "1:11:02", "remaining_time": "0:04:34", "throughput": 1920.11, "total_tokens": 8184144}
4882
+ {"current_steps": 24320, "total_steps": 25880, "loss": 0.2557, "lr": 1.1041474384437755e-05, "epoch": 18.794435857805254, "percentage": 93.97, "elapsed_time": "1:11:03", "remaining_time": "0:04:33", "throughput": 1920.14, "total_tokens": 8185936}
4883
+ {"current_steps": 24325, "total_steps": 25880, "loss": 0.2598, "lr": 1.09711137308941e-05, "epoch": 18.798299845440496, "percentage": 93.99, "elapsed_time": "1:11:03", "remaining_time": "0:04:32", "throughput": 1920.14, "total_tokens": 8187440}
4884
+ {"current_steps": 24330, "total_steps": 25880, "loss": 0.2076, "lr": 1.0900975490616638e-05, "epoch": 18.802163833075735, "percentage": 94.01, "elapsed_time": "1:11:04", "remaining_time": "0:04:31", "throughput": 1920.18, "total_tokens": 8189168}
4885
+ {"current_steps": 24335, "total_steps": 25880, "loss": 0.2147, "lr": 1.0831059695504575e-05, "epoch": 18.806027820710973, "percentage": 94.03, "elapsed_time": "1:11:05", "remaining_time": "0:04:30", "throughput": 1920.18, "total_tokens": 8190640}
4886
+ {"current_steps": 24340, "total_steps": 25880, "loss": 0.2875, "lr": 1.0761366377356085e-05, "epoch": 18.80989180834621, "percentage": 94.05, "elapsed_time": "1:11:06", "remaining_time": "0:04:29", "throughput": 1920.2, "total_tokens": 8192272}
4887
+ {"current_steps": 24345, "total_steps": 25880, "loss": 0.2578, "lr": 1.0691895567868148e-05, "epoch": 18.813755795981454, "percentage": 94.07, "elapsed_time": "1:11:07", "remaining_time": "0:04:29", "throughput": 1920.21, "total_tokens": 8193840}
4888
+ {"current_steps": 24350, "total_steps": 25880, "loss": 0.4023, "lr": 1.062264729863638e-05, "epoch": 18.817619783616692, "percentage": 94.09, "elapsed_time": "1:11:07", "remaining_time": "0:04:28", "throughput": 1920.22, "total_tokens": 8195408}
4889
+ {"current_steps": 24355, "total_steps": 25880, "loss": 0.2849, "lr": 1.0553621601155472e-05, "epoch": 18.82148377125193, "percentage": 94.11, "elapsed_time": "1:11:08", "remaining_time": "0:04:27", "throughput": 1920.29, "total_tokens": 8197424}
4890
+ {"current_steps": 24360, "total_steps": 25880, "loss": 0.2368, "lr": 1.0484818506818594e-05, "epoch": 18.825347758887172, "percentage": 94.13, "elapsed_time": "1:11:09", "remaining_time": "0:04:26", "throughput": 1920.28, "total_tokens": 8198928}
4891
+ {"current_steps": 24365, "total_steps": 25880, "loss": 0.2732, "lr": 1.0416238046917881e-05, "epoch": 18.82921174652241, "percentage": 94.15, "elapsed_time": "1:11:10", "remaining_time": "0:04:25", "throughput": 1920.31, "total_tokens": 8200752}
4892
+ {"current_steps": 24370, "total_steps": 25880, "loss": 0.2891, "lr": 1.0347880252644104e-05, "epoch": 18.83307573415765, "percentage": 94.17, "elapsed_time": "1:11:11", "remaining_time": "0:04:24", "throughput": 1920.33, "total_tokens": 8202448}
4893
+ {"current_steps": 24375, "total_steps": 25880, "loss": 0.2926, "lr": 1.0279745155086729e-05, "epoch": 18.83693972179289, "percentage": 94.18, "elapsed_time": "1:11:12", "remaining_time": "0:04:23", "throughput": 1920.38, "total_tokens": 8204336}
4894
+ {"current_steps": 24380, "total_steps": 25880, "loss": 0.2498, "lr": 1.0211832785234132e-05, "epoch": 18.84080370942813, "percentage": 94.2, "elapsed_time": "1:11:13", "remaining_time": "0:04:22", "throughput": 1920.39, "total_tokens": 8206000}
4895
+ {"current_steps": 24385, "total_steps": 25880, "loss": 0.2412, "lr": 1.014414317397322e-05, "epoch": 18.844667697063368, "percentage": 94.22, "elapsed_time": "1:11:13", "remaining_time": "0:04:22", "throughput": 1920.39, "total_tokens": 8207504}
4896
+ {"current_steps": 24390, "total_steps": 25880, "loss": 0.2423, "lr": 1.0076676352089586e-05, "epoch": 18.84853168469861, "percentage": 94.24, "elapsed_time": "1:11:14", "remaining_time": "0:04:21", "throughput": 1920.4, "total_tokens": 8209008}
4897
+ {"current_steps": 24395, "total_steps": 25880, "loss": 0.275, "lr": 1.0009432350267633e-05, "epoch": 18.85239567233385, "percentage": 94.26, "elapsed_time": "1:11:15", "remaining_time": "0:04:20", "throughput": 1920.39, "total_tokens": 8210512}
4898
+ {"current_steps": 24400, "total_steps": 25880, "loss": 0.2506, "lr": 9.942411199090229e-06, "epoch": 18.856259659969087, "percentage": 94.28, "elapsed_time": "1:11:16", "remaining_time": "0:04:19", "throughput": 1920.4, "total_tokens": 8212048}
4899
+ {"current_steps": 24405, "total_steps": 25880, "loss": 0.221, "lr": 9.875612929039101e-06, "epoch": 18.86012364760433, "percentage": 94.3, "elapsed_time": "1:11:17", "remaining_time": "0:04:18", "throughput": 1920.46, "total_tokens": 8214064}
4900
+ {"current_steps": 24410, "total_steps": 25880, "loss": 0.2657, "lr": 9.80903757049445e-06, "epoch": 18.863987635239567, "percentage": 94.32, "elapsed_time": "1:11:17", "remaining_time": "0:04:17", "throughput": 1920.5, "total_tokens": 8215856}
4901
+ {"current_steps": 24415, "total_steps": 25880, "loss": 0.2434, "lr": 9.742685153735109e-06, "epoch": 18.867851622874806, "percentage": 94.34, "elapsed_time": "1:11:18", "remaining_time": "0:04:16", "throughput": 1920.54, "total_tokens": 8217680}
4902
+ {"current_steps": 24420, "total_steps": 25880, "loss": 0.2572, "lr": 9.676555708938662e-06, "epoch": 18.871715610510048, "percentage": 94.36, "elapsed_time": "1:11:19", "remaining_time": "0:04:15", "throughput": 1920.58, "total_tokens": 8219536}
4903
+ {"current_steps": 24425, "total_steps": 25880, "loss": 0.2468, "lr": 9.610649266181049e-06, "epoch": 18.875579598145286, "percentage": 94.38, "elapsed_time": "1:11:20", "remaining_time": "0:04:14", "throughput": 1920.6, "total_tokens": 8221328}
4904
+ {"current_steps": 24430, "total_steps": 25880, "loss": 0.2914, "lr": 9.54496585543696e-06, "epoch": 18.879443585780525, "percentage": 94.4, "elapsed_time": "1:11:21", "remaining_time": "0:04:14", "throughput": 1920.59, "total_tokens": 8222736}
4905
+ {"current_steps": 24435, "total_steps": 25880, "loss": 0.212, "lr": 9.479505506579667e-06, "epoch": 18.883307573415767, "percentage": 94.42, "elapsed_time": "1:11:22", "remaining_time": "0:04:13", "throughput": 1920.61, "total_tokens": 8224496}
4906
+ {"current_steps": 24440, "total_steps": 25880, "loss": 0.3815, "lr": 9.41426824938091e-06, "epoch": 18.887171561051005, "percentage": 94.44, "elapsed_time": "1:11:23", "remaining_time": "0:04:12", "throughput": 1920.64, "total_tokens": 8226256}
4907
+ {"current_steps": 24445, "total_steps": 25880, "loss": 0.2339, "lr": 9.349254113510897e-06, "epoch": 18.891035548686244, "percentage": 94.46, "elapsed_time": "1:11:23", "remaining_time": "0:04:11", "throughput": 1920.66, "total_tokens": 8227888}
4908
+ {"current_steps": 24450, "total_steps": 25880, "loss": 0.2595, "lr": 9.284463128538533e-06, "epoch": 18.894899536321482, "percentage": 94.47, "elapsed_time": "1:11:24", "remaining_time": "0:04:10", "throughput": 1920.7, "total_tokens": 8229776}
4909
+ {"current_steps": 24455, "total_steps": 25880, "loss": 0.304, "lr": 9.219895323931138e-06, "epoch": 18.898763523956724, "percentage": 94.49, "elapsed_time": "1:11:25", "remaining_time": "0:04:09", "throughput": 1920.71, "total_tokens": 8231440}
4910
+ {"current_steps": 24460, "total_steps": 25880, "loss": 0.276, "lr": 9.155550729054496e-06, "epoch": 18.902627511591962, "percentage": 94.51, "elapsed_time": "1:11:26", "remaining_time": "0:04:08", "throughput": 1920.77, "total_tokens": 8233360}
4911
+ {"current_steps": 24465, "total_steps": 25880, "loss": 0.1919, "lr": 9.091429373172921e-06, "epoch": 18.9064914992272, "percentage": 94.53, "elapsed_time": "1:11:27", "remaining_time": "0:04:07", "throughput": 1920.79, "total_tokens": 8235024}
4912
+ {"current_steps": 24470, "total_steps": 25880, "loss": 0.2226, "lr": 9.027531285449198e-06, "epoch": 18.910355486862443, "percentage": 94.55, "elapsed_time": "1:11:28", "remaining_time": "0:04:07", "throughput": 1920.85, "total_tokens": 8236976}
4913
+ {"current_steps": 24475, "total_steps": 25880, "loss": 0.2819, "lr": 8.963856494944577e-06, "epoch": 18.91421947449768, "percentage": 94.57, "elapsed_time": "1:11:29", "remaining_time": "0:04:06", "throughput": 1920.88, "total_tokens": 8238768}
4914
+ {"current_steps": 24480, "total_steps": 25880, "loss": 0.3579, "lr": 8.900405030618675e-06, "epoch": 18.91808346213292, "percentage": 94.59, "elapsed_time": "1:11:29", "remaining_time": "0:04:05", "throughput": 1920.93, "total_tokens": 8240624}
4915
+ {"current_steps": 24485, "total_steps": 25880, "loss": 0.3549, "lr": 8.837176921329738e-06, "epoch": 18.92194744976816, "percentage": 94.61, "elapsed_time": "1:11:30", "remaining_time": "0:04:04", "throughput": 1920.98, "total_tokens": 8242448}
4916
+ {"current_steps": 24490, "total_steps": 25880, "loss": 0.2911, "lr": 8.774172195834151e-06, "epoch": 18.9258114374034, "percentage": 94.63, "elapsed_time": "1:11:31", "remaining_time": "0:04:03", "throughput": 1921.01, "total_tokens": 8244144}
4917
+ {"current_steps": 24495, "total_steps": 25880, "loss": 0.2009, "lr": 8.711390882786884e-06, "epoch": 18.92967542503864, "percentage": 94.65, "elapsed_time": "1:11:32", "remaining_time": "0:04:02", "throughput": 1921.08, "total_tokens": 8246224}
4918
+ {"current_steps": 24500, "total_steps": 25880, "loss": 0.2481, "lr": 8.648833010741263e-06, "epoch": 18.93353941267388, "percentage": 94.67, "elapsed_time": "1:11:33", "remaining_time": "0:04:01", "throughput": 1921.07, "total_tokens": 8247728}
4919
+ {"current_steps": 24505, "total_steps": 25880, "loss": 0.4692, "lr": 8.586498608148974e-06, "epoch": 18.93740340030912, "percentage": 94.69, "elapsed_time": "1:11:34", "remaining_time": "0:04:00", "throughput": 1921.08, "total_tokens": 8249360}
4920
+ {"current_steps": 24510, "total_steps": 25880, "loss": 0.2491, "lr": 8.52438770336006e-06, "epoch": 18.941267387944357, "percentage": 94.71, "elapsed_time": "1:11:34", "remaining_time": "0:04:00", "throughput": 1921.08, "total_tokens": 8250832}
4921
+ {"current_steps": 24515, "total_steps": 25880, "loss": 0.3216, "lr": 8.462500324622923e-06, "epoch": 18.9451313755796, "percentage": 94.73, "elapsed_time": "1:11:35", "remaining_time": "0:03:59", "throughput": 1921.1, "total_tokens": 8252432}
4922
+ {"current_steps": 24520, "total_steps": 25880, "loss": 0.2613, "lr": 8.400836500084386e-06, "epoch": 18.948995363214838, "percentage": 94.74, "elapsed_time": "1:11:36", "remaining_time": "0:03:58", "throughput": 1921.1, "total_tokens": 8253936}
4923
+ {"current_steps": 24525, "total_steps": 25880, "loss": 0.2209, "lr": 8.339396257789511e-06, "epoch": 18.952859350850076, "percentage": 94.76, "elapsed_time": "1:11:37", "remaining_time": "0:03:57", "throughput": 1921.1, "total_tokens": 8255408}
4924
+ {"current_steps": 24530, "total_steps": 25880, "loss": 0.3065, "lr": 8.278179625681614e-06, "epoch": 18.956723338485318, "percentage": 94.78, "elapsed_time": "1:11:38", "remaining_time": "0:03:56", "throughput": 1921.12, "total_tokens": 8257136}
4925
+ {"current_steps": 24535, "total_steps": 25880, "loss": 0.2116, "lr": 8.21718663160248e-06, "epoch": 18.960587326120557, "percentage": 94.8, "elapsed_time": "1:11:38", "remaining_time": "0:03:55", "throughput": 1921.15, "total_tokens": 8258832}
4926
+ {"current_steps": 24540, "total_steps": 25880, "loss": 0.4509, "lr": 8.156417303291975e-06, "epoch": 18.964451313755795, "percentage": 94.82, "elapsed_time": "1:11:39", "remaining_time": "0:03:54", "throughput": 1921.16, "total_tokens": 8260400}
4927
+ {"current_steps": 24545, "total_steps": 25880, "loss": 0.2834, "lr": 8.095871668388432e-06, "epoch": 18.968315301391037, "percentage": 94.84, "elapsed_time": "1:11:40", "remaining_time": "0:03:53", "throughput": 1921.16, "total_tokens": 8261904}
4928
+ {"current_steps": 24550, "total_steps": 25880, "loss": 0.2389, "lr": 8.035549754428328e-06, "epoch": 18.972179289026275, "percentage": 94.86, "elapsed_time": "1:11:41", "remaining_time": "0:03:53", "throughput": 1921.15, "total_tokens": 8263408}
4929
+ {"current_steps": 24555, "total_steps": 25880, "loss": 0.2993, "lr": 7.975451588846495e-06, "epoch": 18.976043276661514, "percentage": 94.88, "elapsed_time": "1:11:42", "remaining_time": "0:03:52", "throughput": 1921.16, "total_tokens": 8265008}
4930
+ {"current_steps": 24560, "total_steps": 25880, "loss": 0.3321, "lr": 7.915577198975898e-06, "epoch": 18.979907264296756, "percentage": 94.9, "elapsed_time": "1:11:42", "remaining_time": "0:03:51", "throughput": 1921.22, "total_tokens": 8266960}
4931
+ {"current_steps": 24565, "total_steps": 25880, "loss": 0.3021, "lr": 7.855926612047759e-06, "epoch": 18.983771251931994, "percentage": 94.92, "elapsed_time": "1:11:43", "remaining_time": "0:03:50", "throughput": 1921.27, "total_tokens": 8268752}
4932
+ {"current_steps": 24570, "total_steps": 25880, "loss": 0.2285, "lr": 7.796499855191541e-06, "epoch": 18.987635239567233, "percentage": 94.94, "elapsed_time": "1:11:44", "remaining_time": "0:03:49", "throughput": 1921.3, "total_tokens": 8270544}
4933
+ {"current_steps": 24575, "total_steps": 25880, "loss": 0.2403, "lr": 7.737296955434903e-06, "epoch": 18.991499227202475, "percentage": 94.96, "elapsed_time": "1:11:45", "remaining_time": "0:03:48", "throughput": 1921.28, "total_tokens": 8271952}
4934
+ {"current_steps": 24580, "total_steps": 25880, "loss": 0.2376, "lr": 7.678317939703583e-06, "epoch": 18.995363214837713, "percentage": 94.98, "elapsed_time": "1:11:46", "remaining_time": "0:03:47", "throughput": 1921.34, "total_tokens": 8273904}
4935
+ {"current_steps": 24585, "total_steps": 25880, "loss": 0.2941, "lr": 7.619562834821736e-06, "epoch": 18.99922720247295, "percentage": 95.0, "elapsed_time": "1:11:47", "remaining_time": "0:03:46", "throughput": 1921.41, "total_tokens": 8275984}
4936
+ {"current_steps": 24586, "total_steps": 25880, "eval_loss": 0.5602178573608398, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "1:11:58", "remaining_time": "0:03:47", "throughput": 1916.48, "total_tokens": 8276160}
4937
+ {"current_steps": 24590, "total_steps": 25880, "loss": 0.2456, "lr": 7.561031667511431e-06, "epoch": 19.00309119010819, "percentage": 95.02, "elapsed_time": "1:12:00", "remaining_time": "0:03:46", "throughput": 1915.88, "total_tokens": 8277504}
4938
+ {"current_steps": 24595, "total_steps": 25880, "loss": 0.2172, "lr": 7.502724464392985e-06, "epoch": 19.006955177743432, "percentage": 95.03, "elapsed_time": "1:12:01", "remaining_time": "0:03:45", "throughput": 1915.87, "total_tokens": 8279040}
4939
+ {"current_steps": 24600, "total_steps": 25880, "loss": 0.3581, "lr": 7.444641251984963e-06, "epoch": 19.01081916537867, "percentage": 95.05, "elapsed_time": "1:12:02", "remaining_time": "0:03:44", "throughput": 1915.88, "total_tokens": 8280672}