rbelanec commited on
Commit
77836ab
verified
1 Parent(s): 4e11561

Training in progress, step 19240

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +190 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a19f154b38c809e69139b6e2a6095e2400b44bf5953242b0fb1fb579fd4d289
3
  size 26214528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6272667cd60dca1a9007e5806625239d3d517b3aee930df620d79994277a9c86
3
  size 26214528
trainer_log.jsonl CHANGED
@@ -3677,3 +3677,193 @@
3677
  {"current_steps": 18290, "total_steps": 19240, "loss": 0.2521, "lr": 3.71191473062571e-07, "epoch": 9.506237006237006, "percentage": 95.06, "elapsed_time": "0:28:05", "remaining_time": "0:01:27", "throughput": 2069.71, "total_tokens": 3487680}
3678
  {"current_steps": 18295, "total_steps": 19240, "loss": 0.2349, "lr": 3.6730812462535404e-07, "epoch": 9.508835758835758, "percentage": 95.09, "elapsed_time": "0:28:05", "remaining_time": "0:01:27", "throughput": 2069.75, "total_tokens": 3488640}
3679
  {"current_steps": 18300, "total_steps": 19240, "loss": 0.2359, "lr": 3.6344504631395934e-07, "epoch": 9.511434511434512, "percentage": 95.11, "elapsed_time": "0:28:05", "remaining_time": "0:01:26", "throughput": 2069.82, "total_tokens": 3489632}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3677
  {"current_steps": 18290, "total_steps": 19240, "loss": 0.2521, "lr": 3.71191473062571e-07, "epoch": 9.506237006237006, "percentage": 95.06, "elapsed_time": "0:28:05", "remaining_time": "0:01:27", "throughput": 2069.71, "total_tokens": 3487680}
3678
  {"current_steps": 18295, "total_steps": 19240, "loss": 0.2349, "lr": 3.6730812462535404e-07, "epoch": 9.508835758835758, "percentage": 95.09, "elapsed_time": "0:28:05", "remaining_time": "0:01:27", "throughput": 2069.75, "total_tokens": 3488640}
3679
  {"current_steps": 18300, "total_steps": 19240, "loss": 0.2359, "lr": 3.6344504631395934e-07, "epoch": 9.511434511434512, "percentage": 95.11, "elapsed_time": "0:28:05", "remaining_time": "0:01:26", "throughput": 2069.82, "total_tokens": 3489632}
3680
+ {"current_steps": 18305, "total_steps": 19240, "loss": 0.2436, "lr": 3.5960224130728857e-07, "epoch": 9.514033264033264, "percentage": 95.14, "elapsed_time": "0:28:06", "remaining_time": "0:01:26", "throughput": 2069.83, "total_tokens": 3490528}
3681
+ {"current_steps": 18310, "total_steps": 19240, "loss": 0.3032, "lr": 3.5577971276757325e-07, "epoch": 9.516632016632016, "percentage": 95.17, "elapsed_time": "0:28:06", "remaining_time": "0:01:25", "throughput": 2069.86, "total_tokens": 3491424}
3682
+ {"current_steps": 18315, "total_steps": 19240, "loss": 0.2596, "lr": 3.519774638403472e-07, "epoch": 9.51923076923077, "percentage": 95.19, "elapsed_time": "0:28:07", "remaining_time": "0:01:25", "throughput": 2069.85, "total_tokens": 3492256}
3683
+ {"current_steps": 18320, "total_steps": 19240, "loss": 0.3007, "lr": 3.481954976544716e-07, "epoch": 9.521829521829522, "percentage": 95.22, "elapsed_time": "0:28:07", "remaining_time": "0:01:24", "throughput": 2069.88, "total_tokens": 3493152}
3684
+ {"current_steps": 18325, "total_steps": 19240, "loss": 0.2834, "lr": 3.44433817322104e-07, "epoch": 9.524428274428274, "percentage": 95.24, "elapsed_time": "0:28:08", "remaining_time": "0:01:24", "throughput": 2069.9, "total_tokens": 3494048}
3685
+ {"current_steps": 18330, "total_steps": 19240, "loss": 0.2406, "lr": 3.406924259387101e-07, "epoch": 9.527027027027026, "percentage": 95.27, "elapsed_time": "0:28:08", "remaining_time": "0:01:23", "throughput": 2069.99, "total_tokens": 3495040}
3686
+ {"current_steps": 18335, "total_steps": 19240, "loss": 0.2414, "lr": 3.369713265830715e-07, "epoch": 9.52962577962578, "percentage": 95.3, "elapsed_time": "0:28:08", "remaining_time": "0:01:23", "throughput": 2070.08, "total_tokens": 3496064}
3687
+ {"current_steps": 18340, "total_steps": 19240, "loss": 0.255, "lr": 3.3327052231725276e-07, "epoch": 9.532224532224532, "percentage": 95.32, "elapsed_time": "0:28:09", "remaining_time": "0:01:22", "throughput": 2070.12, "total_tokens": 3496992}
3688
+ {"current_steps": 18345, "total_steps": 19240, "loss": 0.2418, "lr": 3.2959001618664e-07, "epoch": 9.534823284823284, "percentage": 95.35, "elapsed_time": "0:28:09", "remaining_time": "0:01:22", "throughput": 2070.19, "total_tokens": 3497952}
3689
+ {"current_steps": 18350, "total_steps": 19240, "loss": 0.2347, "lr": 3.2592981121989384e-07, "epoch": 9.537422037422038, "percentage": 95.37, "elapsed_time": "0:28:10", "remaining_time": "0:01:21", "throughput": 2070.25, "total_tokens": 3498912}
3690
+ {"current_steps": 18355, "total_steps": 19240, "loss": 0.2645, "lr": 3.222899104289856e-07, "epoch": 9.54002079002079, "percentage": 95.4, "elapsed_time": "0:28:10", "remaining_time": "0:01:21", "throughput": 2070.29, "total_tokens": 3499840}
3691
+ {"current_steps": 18360, "total_steps": 19240, "loss": 0.3277, "lr": 3.18670316809172e-07, "epoch": 9.542619542619542, "percentage": 95.43, "elapsed_time": "0:28:10", "remaining_time": "0:01:21", "throughput": 2070.34, "total_tokens": 3500768}
3692
+ {"current_steps": 18365, "total_steps": 19240, "loss": 0.3109, "lr": 3.150710333389983e-07, "epoch": 9.545218295218294, "percentage": 95.45, "elapsed_time": "0:28:11", "remaining_time": "0:01:20", "throughput": 2070.42, "total_tokens": 3501760}
3693
+ {"current_steps": 18370, "total_steps": 19240, "loss": 0.2982, "lr": 3.114920629802981e-07, "epoch": 9.547817047817048, "percentage": 95.48, "elapsed_time": "0:28:11", "remaining_time": "0:01:20", "throughput": 2070.51, "total_tokens": 3502784}
3694
+ {"current_steps": 18375, "total_steps": 19240, "loss": 0.2516, "lr": 3.0793340867818763e-07, "epoch": 9.5504158004158, "percentage": 95.5, "elapsed_time": "0:28:12", "remaining_time": "0:01:19", "throughput": 2070.54, "total_tokens": 3503680}
3695
+ {"current_steps": 18380, "total_steps": 19240, "loss": 0.2223, "lr": 3.04395073361069e-07, "epoch": 9.553014553014552, "percentage": 95.53, "elapsed_time": "0:28:12", "remaining_time": "0:01:19", "throughput": 2070.61, "total_tokens": 3504640}
3696
+ {"current_steps": 18385, "total_steps": 19240, "loss": 0.3066, "lr": 3.008770599406213e-07, "epoch": 9.555613305613306, "percentage": 95.56, "elapsed_time": "0:28:12", "remaining_time": "0:01:18", "throughput": 2070.62, "total_tokens": 3505504}
3697
+ {"current_steps": 18390, "total_steps": 19240, "loss": 0.214, "lr": 2.973793713118039e-07, "epoch": 9.558212058212058, "percentage": 95.58, "elapsed_time": "0:28:13", "remaining_time": "0:01:18", "throughput": 2070.67, "total_tokens": 3506432}
3698
+ {"current_steps": 18395, "total_steps": 19240, "loss": 0.2611, "lr": 2.9390201035284226e-07, "epoch": 9.56081081081081, "percentage": 95.61, "elapsed_time": "0:28:13", "remaining_time": "0:01:17", "throughput": 2070.7, "total_tokens": 3507328}
3699
+ {"current_steps": 18400, "total_steps": 19240, "loss": 0.2533, "lr": 2.904449799252418e-07, "epoch": 9.563409563409563, "percentage": 95.63, "elapsed_time": "0:28:14", "remaining_time": "0:01:17", "throughput": 2070.71, "total_tokens": 3508192}
3700
+ {"current_steps": 18405, "total_steps": 19240, "loss": 0.2544, "lr": 2.870082828737797e-07, "epoch": 9.566008316008316, "percentage": 95.66, "elapsed_time": "0:28:14", "remaining_time": "0:01:16", "throughput": 2070.74, "total_tokens": 3509088}
3701
+ {"current_steps": 18410, "total_steps": 19240, "loss": 0.2647, "lr": 2.8359192202649376e-07, "epoch": 9.568607068607069, "percentage": 95.69, "elapsed_time": "0:28:15", "remaining_time": "0:01:16", "throughput": 2070.84, "total_tokens": 3510112}
3702
+ {"current_steps": 18415, "total_steps": 19240, "loss": 0.2476, "lr": 2.8019590019469633e-07, "epoch": 9.57120582120582, "percentage": 95.71, "elapsed_time": "0:28:15", "remaining_time": "0:01:15", "throughput": 2070.85, "total_tokens": 3510976}
3703
+ {"current_steps": 18420, "total_steps": 19240, "loss": 0.2228, "lr": 2.7682022017295197e-07, "epoch": 9.573804573804575, "percentage": 95.74, "elapsed_time": "0:28:15", "remaining_time": "0:01:15", "throughput": 2070.91, "total_tokens": 3511936}
3704
+ {"current_steps": 18425, "total_steps": 19240, "loss": 0.243, "lr": 2.734648847390997e-07, "epoch": 9.576403326403327, "percentage": 95.76, "elapsed_time": "0:28:16", "remaining_time": "0:01:15", "throughput": 2070.97, "total_tokens": 3512896}
3705
+ {"current_steps": 18430, "total_steps": 19240, "loss": 0.2099, "lr": 2.7012989665421706e-07, "epoch": 9.579002079002079, "percentage": 95.79, "elapsed_time": "0:28:16", "remaining_time": "0:01:14", "throughput": 2071.0, "total_tokens": 3513792}
3706
+ {"current_steps": 18435, "total_steps": 19240, "loss": 0.2314, "lr": 2.6681525866266157e-07, "epoch": 9.58160083160083, "percentage": 95.82, "elapsed_time": "0:28:17", "remaining_time": "0:01:14", "throughput": 2071.1, "total_tokens": 3514816}
3707
+ {"current_steps": 18440, "total_steps": 19240, "loss": 0.2011, "lr": 2.635209734920291e-07, "epoch": 9.584199584199585, "percentage": 95.84, "elapsed_time": "0:28:17", "remaining_time": "0:01:13", "throughput": 2071.2, "total_tokens": 3515840}
3708
+ {"current_steps": 18445, "total_steps": 19240, "loss": 0.3124, "lr": 2.602470438531679e-07, "epoch": 9.586798336798337, "percentage": 95.87, "elapsed_time": "0:28:17", "remaining_time": "0:01:13", "throughput": 2071.21, "total_tokens": 3516704}
3709
+ {"current_steps": 18450, "total_steps": 19240, "loss": 0.2375, "lr": 2.5699347244018404e-07, "epoch": 9.589397089397089, "percentage": 95.89, "elapsed_time": "0:28:18", "remaining_time": "0:01:12", "throughput": 2071.28, "total_tokens": 3517664}
3710
+ {"current_steps": 18455, "total_steps": 19240, "loss": 0.2884, "lr": 2.537602619304247e-07, "epoch": 9.591995841995843, "percentage": 95.92, "elapsed_time": "0:28:18", "remaining_time": "0:01:12", "throughput": 2071.37, "total_tokens": 3518688}
3711
+ {"current_steps": 18460, "total_steps": 19240, "loss": 0.2286, "lr": 2.5054741498448386e-07, "epoch": 9.594594594594595, "percentage": 95.95, "elapsed_time": "0:28:19", "remaining_time": "0:01:11", "throughput": 2071.44, "total_tokens": 3519648}
3712
+ {"current_steps": 18465, "total_steps": 19240, "loss": 0.2699, "lr": 2.4735493424619394e-07, "epoch": 9.597193347193347, "percentage": 95.97, "elapsed_time": "0:28:19", "remaining_time": "0:01:11", "throughput": 2071.49, "total_tokens": 3520576}
3713
+ {"current_steps": 18470, "total_steps": 19240, "loss": 0.2567, "lr": 2.4418282234263957e-07, "epoch": 9.5997920997921, "percentage": 96.0, "elapsed_time": "0:28:19", "remaining_time": "0:01:10", "throughput": 2071.55, "total_tokens": 3521536}
3714
+ {"current_steps": 18475, "total_steps": 19240, "loss": 0.2374, "lr": 2.410310818841299e-07, "epoch": 9.602390852390853, "percentage": 96.02, "elapsed_time": "0:28:20", "remaining_time": "0:01:10", "throughput": 2071.64, "total_tokens": 3522528}
3715
+ {"current_steps": 18480, "total_steps": 19240, "loss": 0.2634, "lr": 2.3789971546422374e-07, "epoch": 9.604989604989605, "percentage": 96.05, "elapsed_time": "0:28:20", "remaining_time": "0:01:09", "throughput": 2071.72, "total_tokens": 3523520}
3716
+ {"current_steps": 18485, "total_steps": 19240, "loss": 0.2574, "lr": 2.3478872565969867e-07, "epoch": 9.607588357588357, "percentage": 96.08, "elapsed_time": "0:28:21", "remaining_time": "0:01:09", "throughput": 2071.78, "total_tokens": 3524480}
3717
+ {"current_steps": 18490, "total_steps": 19240, "loss": 0.3252, "lr": 2.316981150305847e-07, "epoch": 9.61018711018711, "percentage": 96.1, "elapsed_time": "0:28:21", "remaining_time": "0:01:09", "throughput": 2071.83, "total_tokens": 3525408}
3718
+ {"current_steps": 18495, "total_steps": 19240, "loss": 0.2755, "lr": 2.2862788612012244e-07, "epoch": 9.612785862785863, "percentage": 96.13, "elapsed_time": "0:28:22", "remaining_time": "0:01:08", "throughput": 2071.86, "total_tokens": 3526304}
3719
+ {"current_steps": 18500, "total_steps": 19240, "loss": 0.2495, "lr": 2.255780414547909e-07, "epoch": 9.615384615384615, "percentage": 96.15, "elapsed_time": "0:28:22", "remaining_time": "0:01:08", "throughput": 2071.9, "total_tokens": 3527232}
3720
+ {"current_steps": 18505, "total_steps": 19240, "loss": 0.2801, "lr": 2.2254858354429364e-07, "epoch": 9.617983367983367, "percentage": 96.18, "elapsed_time": "0:28:22", "remaining_time": "0:01:07", "throughput": 2071.99, "total_tokens": 3528224}
3721
+ {"current_steps": 18510, "total_steps": 19240, "loss": 0.2494, "lr": 2.19539514881556e-07, "epoch": 9.620582120582121, "percentage": 96.21, "elapsed_time": "0:28:23", "remaining_time": "0:01:07", "throughput": 2072.02, "total_tokens": 3529120}
3722
+ {"current_steps": 18515, "total_steps": 19240, "loss": 0.2987, "lr": 2.165508379427278e-07, "epoch": 9.623180873180873, "percentage": 96.23, "elapsed_time": "0:28:23", "remaining_time": "0:01:06", "throughput": 2072.07, "total_tokens": 3530048}
3723
+ {"current_steps": 18520, "total_steps": 19240, "loss": 0.2507, "lr": 2.1358255518717786e-07, "epoch": 9.625779625779625, "percentage": 96.26, "elapsed_time": "0:28:24", "remaining_time": "0:01:06", "throughput": 2072.15, "total_tokens": 3531040}
3724
+ {"current_steps": 18525, "total_steps": 19240, "loss": 0.2094, "lr": 2.106346690574912e-07, "epoch": 9.628378378378379, "percentage": 96.28, "elapsed_time": "0:28:24", "remaining_time": "0:01:05", "throughput": 2072.27, "total_tokens": 3532096}
3725
+ {"current_steps": 18530, "total_steps": 19240, "loss": 0.233, "lr": 2.0770718197946625e-07, "epoch": 9.630977130977131, "percentage": 96.31, "elapsed_time": "0:28:24", "remaining_time": "0:01:05", "throughput": 2072.35, "total_tokens": 3533088}
3726
+ {"current_steps": 18535, "total_steps": 19240, "loss": 0.2596, "lr": 2.0480009636212327e-07, "epoch": 9.633575883575883, "percentage": 96.34, "elapsed_time": "0:28:25", "remaining_time": "0:01:04", "throughput": 2072.43, "total_tokens": 3534080}
3727
+ {"current_steps": 18540, "total_steps": 19240, "loss": 0.2583, "lr": 2.0191341459768475e-07, "epoch": 9.636174636174637, "percentage": 96.36, "elapsed_time": "0:28:25", "remaining_time": "0:01:04", "throughput": 2072.46, "total_tokens": 3534976}
3728
+ {"current_steps": 18545, "total_steps": 19240, "loss": 0.2756, "lr": 1.9904713906159224e-07, "epoch": 9.638773388773389, "percentage": 96.39, "elapsed_time": "0:28:26", "remaining_time": "0:01:03", "throughput": 2072.53, "total_tokens": 3535936}
3729
+ {"current_steps": 18550, "total_steps": 19240, "loss": 0.2278, "lr": 1.9620127211248672e-07, "epoch": 9.641372141372141, "percentage": 96.41, "elapsed_time": "0:28:26", "remaining_time": "0:01:03", "throughput": 2072.6, "total_tokens": 3536896}
3730
+ {"current_steps": 18555, "total_steps": 19240, "loss": 0.2445, "lr": 1.9337581609222277e-07, "epoch": 9.643970893970893, "percentage": 96.44, "elapsed_time": "0:28:26", "remaining_time": "0:01:03", "throughput": 2072.66, "total_tokens": 3537856}
3731
+ {"current_steps": 18560, "total_steps": 19240, "loss": 0.2647, "lr": 1.9057077332584883e-07, "epoch": 9.646569646569647, "percentage": 96.47, "elapsed_time": "0:28:27", "remaining_time": "0:01:02", "throughput": 2072.69, "total_tokens": 3538752}
3732
+ {"current_steps": 18565, "total_steps": 19240, "loss": 0.2724, "lr": 1.8778614612162404e-07, "epoch": 9.6491683991684, "percentage": 96.49, "elapsed_time": "0:28:27", "remaining_time": "0:01:02", "throughput": 2072.77, "total_tokens": 3539744}
3733
+ {"current_steps": 18570, "total_steps": 19240, "loss": 0.2252, "lr": 1.850219367710071e-07, "epoch": 9.651767151767151, "percentage": 96.52, "elapsed_time": "0:28:28", "remaining_time": "0:01:01", "throughput": 2072.8, "total_tokens": 3540640}
3734
+ {"current_steps": 18575, "total_steps": 19240, "loss": 0.1944, "lr": 1.8227814754865068e-07, "epoch": 9.654365904365905, "percentage": 96.54, "elapsed_time": "0:28:28", "remaining_time": "0:01:01", "throughput": 2072.83, "total_tokens": 3541536}
3735
+ {"current_steps": 18580, "total_steps": 19240, "loss": 0.2358, "lr": 1.7955478071240706e-07, "epoch": 9.656964656964657, "percentage": 96.57, "elapsed_time": "0:28:28", "remaining_time": "0:01:00", "throughput": 2072.86, "total_tokens": 3542432}
3736
+ {"current_steps": 18585, "total_steps": 19240, "loss": 0.2451, "lr": 1.7685183850331965e-07, "epoch": 9.65956340956341, "percentage": 96.6, "elapsed_time": "0:28:29", "remaining_time": "0:01:00", "throughput": 2072.93, "total_tokens": 3543392}
3737
+ {"current_steps": 18590, "total_steps": 19240, "loss": 0.2353, "lr": 1.7416932314562872e-07, "epoch": 9.662162162162161, "percentage": 96.62, "elapsed_time": "0:28:29", "remaining_time": "0:00:59", "throughput": 2072.99, "total_tokens": 3544352}
3738
+ {"current_steps": 18595, "total_steps": 19240, "loss": 0.2393, "lr": 1.7150723684676572e-07, "epoch": 9.664760914760915, "percentage": 96.65, "elapsed_time": "0:28:30", "remaining_time": "0:00:59", "throughput": 2073.04, "total_tokens": 3545280}
3739
+ {"current_steps": 18600, "total_steps": 19240, "loss": 0.2707, "lr": 1.6886558179734225e-07, "epoch": 9.667359667359667, "percentage": 96.67, "elapsed_time": "0:28:30", "remaining_time": "0:00:58", "throughput": 2073.11, "total_tokens": 3546240}
3740
+ {"current_steps": 18605, "total_steps": 19240, "loss": 0.2327, "lr": 1.662443601711694e-07, "epoch": 9.66995841995842, "percentage": 96.7, "elapsed_time": "0:28:31", "remaining_time": "0:00:58", "throughput": 2073.17, "total_tokens": 3547200}
3741
+ {"current_steps": 18610, "total_steps": 19240, "loss": 0.2993, "lr": 1.6364357412523845e-07, "epoch": 9.672557172557173, "percentage": 96.73, "elapsed_time": "0:28:31", "remaining_time": "0:00:57", "throughput": 2073.27, "total_tokens": 3548224}
3742
+ {"current_steps": 18615, "total_steps": 19240, "loss": 0.2528, "lr": 1.6106322579972077e-07, "epoch": 9.675155925155925, "percentage": 96.75, "elapsed_time": "0:28:31", "remaining_time": "0:00:57", "throughput": 2073.35, "total_tokens": 3549216}
3743
+ {"current_steps": 18620, "total_steps": 19240, "loss": 0.2658, "lr": 1.585033173179734e-07, "epoch": 9.677754677754677, "percentage": 96.78, "elapsed_time": "0:28:32", "remaining_time": "0:00:57", "throughput": 2073.43, "total_tokens": 3550208}
3744
+ {"current_steps": 18625, "total_steps": 19240, "loss": 0.3102, "lr": 1.5596385078653353e-07, "epoch": 9.68035343035343, "percentage": 96.8, "elapsed_time": "0:28:32", "remaining_time": "0:00:56", "throughput": 2073.49, "total_tokens": 3551168}
3745
+ {"current_steps": 18630, "total_steps": 19240, "loss": 0.2671, "lr": 1.5344482829511842e-07, "epoch": 9.682952182952183, "percentage": 96.83, "elapsed_time": "0:28:33", "remaining_time": "0:00:56", "throughput": 2073.58, "total_tokens": 3552160}
3746
+ {"current_steps": 18635, "total_steps": 19240, "loss": 0.2852, "lr": 1.5094625191661715e-07, "epoch": 9.685550935550935, "percentage": 96.86, "elapsed_time": "0:28:33", "remaining_time": "0:00:55", "throughput": 2073.66, "total_tokens": 3553152}
3747
+ {"current_steps": 18640, "total_steps": 19240, "loss": 0.2785, "lr": 1.4846812370709617e-07, "epoch": 9.688149688149688, "percentage": 96.88, "elapsed_time": "0:28:33", "remaining_time": "0:00:55", "throughput": 2073.71, "total_tokens": 3554080}
3748
+ {"current_steps": 18645, "total_steps": 19240, "loss": 0.2745, "lr": 1.4601044570579647e-07, "epoch": 9.690748440748441, "percentage": 96.91, "elapsed_time": "0:28:34", "remaining_time": "0:00:54", "throughput": 2073.78, "total_tokens": 3555040}
3749
+ {"current_steps": 18650, "total_steps": 19240, "loss": 0.2583, "lr": 1.4357321993513084e-07, "epoch": 9.693347193347194, "percentage": 96.93, "elapsed_time": "0:28:34", "remaining_time": "0:00:54", "throughput": 2073.86, "total_tokens": 3556032}
3750
+ {"current_steps": 18655, "total_steps": 19240, "loss": 0.2727, "lr": 1.4115644840067833e-07, "epoch": 9.695945945945946, "percentage": 96.96, "elapsed_time": "0:28:35", "remaining_time": "0:00:53", "throughput": 2073.94, "total_tokens": 3557024}
3751
+ {"current_steps": 18660, "total_steps": 19240, "loss": 0.2682, "lr": 1.3876013309118697e-07, "epoch": 9.698544698544698, "percentage": 96.99, "elapsed_time": "0:28:35", "remaining_time": "0:00:53", "throughput": 2074.01, "total_tokens": 3557984}
3752
+ {"current_steps": 18665, "total_steps": 19240, "loss": 0.2469, "lr": 1.363842759785794e-07, "epoch": 9.701143451143452, "percentage": 97.01, "elapsed_time": "0:28:35", "remaining_time": "0:00:52", "throughput": 2074.09, "total_tokens": 3558976}
3753
+ {"current_steps": 18670, "total_steps": 19240, "loss": 0.2534, "lr": 1.3402887901793338e-07, "epoch": 9.703742203742204, "percentage": 97.04, "elapsed_time": "0:28:36", "remaining_time": "0:00:52", "throughput": 2074.12, "total_tokens": 3559872}
3754
+ {"current_steps": 18675, "total_steps": 19240, "loss": 0.2719, "lr": 1.316939441474957e-07, "epoch": 9.706340956340956, "percentage": 97.06, "elapsed_time": "0:28:36", "remaining_time": "0:00:51", "throughput": 2074.17, "total_tokens": 3560800}
3755
+ {"current_steps": 18680, "total_steps": 19240, "loss": 0.2664, "lr": 1.2937947328867106e-07, "epoch": 9.70893970893971, "percentage": 97.09, "elapsed_time": "0:28:37", "remaining_time": "0:00:51", "throughput": 2074.24, "total_tokens": 3561760}
3756
+ {"current_steps": 18685, "total_steps": 19240, "loss": 0.2885, "lr": 1.270854683460304e-07, "epoch": 9.711538461538462, "percentage": 97.12, "elapsed_time": "0:28:37", "remaining_time": "0:00:51", "throughput": 2074.28, "total_tokens": 3562688}
3757
+ {"current_steps": 18690, "total_steps": 19240, "loss": 0.223, "lr": 1.2481193120729427e-07, "epoch": 9.714137214137214, "percentage": 97.14, "elapsed_time": "0:28:37", "remaining_time": "0:00:50", "throughput": 2074.4, "total_tokens": 3563744}
3758
+ {"current_steps": 18695, "total_steps": 19240, "loss": 0.2884, "lr": 1.2255886374334946e-07, "epoch": 9.716735966735968, "percentage": 97.17, "elapsed_time": "0:28:38", "remaining_time": "0:00:50", "throughput": 2074.45, "total_tokens": 3564672}
3759
+ {"current_steps": 18700, "total_steps": 19240, "loss": 0.2341, "lr": 1.203262678082323e-07, "epoch": 9.71933471933472, "percentage": 97.19, "elapsed_time": "0:28:38", "remaining_time": "0:00:49", "throughput": 2074.5, "total_tokens": 3565600}
3760
+ {"current_steps": 18705, "total_steps": 19240, "loss": 0.2322, "lr": 1.1811414523913711e-07, "epoch": 9.721933471933472, "percentage": 97.22, "elapsed_time": "0:28:39", "remaining_time": "0:00:49", "throughput": 2074.59, "total_tokens": 3566624}
3761
+ {"current_steps": 18710, "total_steps": 19240, "loss": 0.2369, "lr": 1.1592249785641052e-07, "epoch": 9.724532224532224, "percentage": 97.25, "elapsed_time": "0:28:39", "remaining_time": "0:00:48", "throughput": 2074.65, "total_tokens": 3567584}
3762
+ {"current_steps": 18715, "total_steps": 19240, "loss": 0.3308, "lr": 1.1375132746354322e-07, "epoch": 9.727130977130978, "percentage": 97.27, "elapsed_time": "0:28:40", "remaining_time": "0:00:48", "throughput": 2074.69, "total_tokens": 3568480}
3763
+ {"current_steps": 18720, "total_steps": 19240, "loss": 0.2341, "lr": 1.1160063584718661e-07, "epoch": 9.72972972972973, "percentage": 97.3, "elapsed_time": "0:28:40", "remaining_time": "0:00:47", "throughput": 2074.73, "total_tokens": 3569408}
3764
+ {"current_steps": 18725, "total_steps": 19240, "loss": 0.227, "lr": 1.0947042477713332e-07, "epoch": 9.732328482328482, "percentage": 97.32, "elapsed_time": "0:28:40", "remaining_time": "0:00:47", "throughput": 2074.8, "total_tokens": 3570368}
3765
+ {"current_steps": 18730, "total_steps": 19240, "loss": 0.2922, "lr": 1.0736069600632281e-07, "epoch": 9.734927234927234, "percentage": 97.35, "elapsed_time": "0:28:41", "remaining_time": "0:00:46", "throughput": 2074.87, "total_tokens": 3571328}
3766
+ {"current_steps": 18735, "total_steps": 19240, "loss": 0.2243, "lr": 1.0527145127084136e-07, "epoch": 9.737525987525988, "percentage": 97.38, "elapsed_time": "0:28:41", "remaining_time": "0:00:46", "throughput": 2074.98, "total_tokens": 3572384}
3767
+ {"current_steps": 18740, "total_steps": 19240, "loss": 0.2677, "lr": 1.032026922899193e-07, "epoch": 9.74012474012474, "percentage": 97.4, "elapsed_time": "0:28:42", "remaining_time": "0:00:45", "throughput": 2075.06, "total_tokens": 3573376}
3768
+ {"current_steps": 18745, "total_steps": 19240, "loss": 0.2508, "lr": 1.0115442076592541e-07, "epoch": 9.742723492723492, "percentage": 97.43, "elapsed_time": "0:28:42", "remaining_time": "0:00:45", "throughput": 2075.11, "total_tokens": 3574304}
3769
+ {"current_steps": 18750, "total_steps": 19240, "loss": 0.2631, "lr": 9.912663838437808e-08, "epoch": 9.745322245322246, "percentage": 97.45, "elapsed_time": "0:28:42", "remaining_time": "0:00:45", "throughput": 2075.21, "total_tokens": 3575328}
3770
+ {"current_steps": 18755, "total_steps": 19240, "loss": 0.232, "lr": 9.711934681392587e-08, "epoch": 9.747920997920998, "percentage": 97.48, "elapsed_time": "0:28:43", "remaining_time": "0:00:44", "throughput": 2075.28, "total_tokens": 3576288}
3771
+ {"current_steps": 18760, "total_steps": 19240, "loss": 0.2597, "lr": 9.513254770636137e-08, "epoch": 9.75051975051975, "percentage": 97.51, "elapsed_time": "0:28:43", "remaining_time": "0:00:44", "throughput": 2075.37, "total_tokens": 3577312}
3772
+ {"current_steps": 18765, "total_steps": 19240, "loss": 0.2204, "lr": 9.31662426966129e-08, "epoch": 9.753118503118504, "percentage": 97.53, "elapsed_time": "0:28:44", "remaining_time": "0:00:43", "throughput": 2075.42, "total_tokens": 3578240}
3773
+ {"current_steps": 18770, "total_steps": 19240, "loss": 0.2716, "lr": 9.122043340273889e-08, "epoch": 9.755717255717256, "percentage": 97.56, "elapsed_time": "0:28:44", "remaining_time": "0:00:43", "throughput": 2075.47, "total_tokens": 3579168}
3774
+ {"current_steps": 18775, "total_steps": 19240, "loss": 0.2155, "lr": 8.929512142594187e-08, "epoch": 9.758316008316008, "percentage": 97.58, "elapsed_time": "0:28:44", "remaining_time": "0:00:42", "throughput": 2075.52, "total_tokens": 3580096}
3775
+ {"current_steps": 18780, "total_steps": 19240, "loss": 0.1968, "lr": 8.739030835055173e-08, "epoch": 9.76091476091476, "percentage": 97.61, "elapsed_time": "0:28:45", "remaining_time": "0:00:42", "throughput": 2075.55, "total_tokens": 3580992}
3776
+ {"current_steps": 18785, "total_steps": 19240, "loss": 0.2659, "lr": 8.550599574402574e-08, "epoch": 9.763513513513514, "percentage": 97.64, "elapsed_time": "0:28:45", "remaining_time": "0:00:41", "throughput": 2075.63, "total_tokens": 3581984}
3777
+ {"current_steps": 18790, "total_steps": 19240, "loss": 0.2565, "lr": 8.364218515695965e-08, "epoch": 9.766112266112266, "percentage": 97.66, "elapsed_time": "0:28:46", "remaining_time": "0:00:41", "throughput": 2075.68, "total_tokens": 3582912}
3778
+ {"current_steps": 18795, "total_steps": 19240, "loss": 0.2671, "lr": 8.179887812307386e-08, "epoch": 9.768711018711018, "percentage": 97.69, "elapsed_time": "0:28:46", "remaining_time": "0:00:40", "throughput": 2075.74, "total_tokens": 3583872}
3779
+ {"current_steps": 18800, "total_steps": 19240, "loss": 0.2307, "lr": 7.99760761592161e-08, "epoch": 9.771309771309772, "percentage": 97.71, "elapsed_time": "0:28:46", "remaining_time": "0:00:40", "throughput": 2075.77, "total_tokens": 3584768}
3780
+ {"current_steps": 18805, "total_steps": 19240, "loss": 0.2718, "lr": 7.817378076536153e-08, "epoch": 9.773908523908524, "percentage": 97.74, "elapsed_time": "0:28:47", "remaining_time": "0:00:39", "throughput": 2075.8, "total_tokens": 3585664}
3781
+ {"current_steps": 18810, "total_steps": 19240, "loss": 0.2453, "lr": 7.63919934246099e-08, "epoch": 9.776507276507276, "percentage": 97.77, "elapsed_time": "0:28:47", "remaining_time": "0:00:39", "throughput": 2075.86, "total_tokens": 3586624}
3782
+ {"current_steps": 18815, "total_steps": 19240, "loss": 0.2596, "lr": 7.463071560318835e-08, "epoch": 9.779106029106028, "percentage": 97.79, "elapsed_time": "0:28:48", "remaining_time": "0:00:39", "throughput": 2075.94, "total_tokens": 3587616}
3783
+ {"current_steps": 18820, "total_steps": 19240, "loss": 0.3123, "lr": 7.288994875044308e-08, "epoch": 9.781704781704782, "percentage": 97.82, "elapsed_time": "0:28:48", "remaining_time": "0:00:38", "throughput": 2075.99, "total_tokens": 3588544}
3784
+ {"current_steps": 18825, "total_steps": 19240, "loss": 0.2599, "lr": 7.116969429883935e-08, "epoch": 9.784303534303534, "percentage": 97.84, "elapsed_time": "0:28:49", "remaining_time": "0:00:38", "throughput": 2076.04, "total_tokens": 3589472}
3785
+ {"current_steps": 18830, "total_steps": 19240, "loss": 0.2172, "lr": 6.946995366397257e-08, "epoch": 9.786902286902286, "percentage": 97.87, "elapsed_time": "0:28:49", "remaining_time": "0:00:37", "throughput": 2076.1, "total_tokens": 3590432}
3786
+ {"current_steps": 18835, "total_steps": 19240, "loss": 0.261, "lr": 6.779072824454614e-08, "epoch": 9.78950103950104, "percentage": 97.9, "elapsed_time": "0:28:49", "remaining_time": "0:00:37", "throughput": 2076.18, "total_tokens": 3591424}
3787
+ {"current_steps": 18840, "total_steps": 19240, "loss": 0.2757, "lr": 6.6132019422388e-08, "epoch": 9.792099792099792, "percentage": 97.92, "elapsed_time": "0:28:50", "remaining_time": "0:00:36", "throughput": 2076.28, "total_tokens": 3592448}
3788
+ {"current_steps": 18845, "total_steps": 19240, "loss": 0.2679, "lr": 6.449382856244246e-08, "epoch": 9.794698544698544, "percentage": 97.95, "elapsed_time": "0:28:50", "remaining_time": "0:00:36", "throughput": 2076.38, "total_tokens": 3593472}
3789
+ {"current_steps": 18850, "total_steps": 19240, "loss": 0.2554, "lr": 6.287615701277005e-08, "epoch": 9.797297297297296, "percentage": 97.97, "elapsed_time": "0:28:51", "remaining_time": "0:00:35", "throughput": 2076.48, "total_tokens": 3594496}
3790
+ {"current_steps": 18855, "total_steps": 19240, "loss": 0.2092, "lr": 6.127900610454207e-08, "epoch": 9.79989604989605, "percentage": 98.0, "elapsed_time": "0:28:51", "remaining_time": "0:00:35", "throughput": 2076.55, "total_tokens": 3595456}
3791
+ {"current_steps": 18860, "total_steps": 19240, "loss": 0.2939, "lr": 5.970237715204885e-08, "epoch": 9.802494802494802, "percentage": 98.02, "elapsed_time": "0:28:51", "remaining_time": "0:00:34", "throughput": 2076.58, "total_tokens": 3596352}
3792
+ {"current_steps": 18865, "total_steps": 19240, "loss": 0.2817, "lr": 5.814627145269147e-08, "epoch": 9.805093555093555, "percentage": 98.05, "elapsed_time": "0:28:52", "remaining_time": "0:00:34", "throughput": 2076.66, "total_tokens": 3597344}
3793
+ {"current_steps": 18870, "total_steps": 19240, "loss": 0.3313, "lr": 5.661069028697896e-08, "epoch": 9.807692307692308, "percentage": 98.08, "elapsed_time": "0:28:52", "remaining_time": "0:00:33", "throughput": 2076.71, "total_tokens": 3598272}
3794
+ {"current_steps": 18875, "total_steps": 19240, "loss": 0.2513, "lr": 5.509563491853942e-08, "epoch": 9.81029106029106, "percentage": 98.1, "elapsed_time": "0:28:53", "remaining_time": "0:00:33", "throughput": 2076.75, "total_tokens": 3599200}
3795
+ {"current_steps": 18880, "total_steps": 19240, "loss": 0.1963, "lr": 5.3601106594097784e-08, "epoch": 9.812889812889813, "percentage": 98.13, "elapsed_time": "0:28:53", "remaining_time": "0:00:33", "throughput": 2076.85, "total_tokens": 3600224}
3796
+ {"current_steps": 18885, "total_steps": 19240, "loss": 0.2091, "lr": 5.2127106543498063e-08, "epoch": 9.815488565488565, "percentage": 98.15, "elapsed_time": "0:28:53", "remaining_time": "0:00:32", "throughput": 2076.88, "total_tokens": 3601120}
3797
+ {"current_steps": 18890, "total_steps": 19240, "loss": 0.2394, "lr": 5.0673635979686665e-08, "epoch": 9.818087318087318, "percentage": 98.18, "elapsed_time": "0:28:54", "remaining_time": "0:00:32", "throughput": 2076.95, "total_tokens": 3602080}
3798
+ {"current_steps": 18895, "total_steps": 19240, "loss": 0.2571, "lr": 4.924069609872073e-08, "epoch": 9.82068607068607, "percentage": 98.21, "elapsed_time": "0:28:54", "remaining_time": "0:00:31", "throughput": 2077.02, "total_tokens": 3603072}
3799
+ {"current_steps": 18900, "total_steps": 19240, "loss": 0.2986, "lr": 4.7828288079757035e-08, "epoch": 9.823284823284823, "percentage": 98.23, "elapsed_time": "0:28:55", "remaining_time": "0:00:31", "throughput": 2077.09, "total_tokens": 3604032}
3800
+ {"current_steps": 18905, "total_steps": 19240, "loss": 0.251, "lr": 4.643641308505753e-08, "epoch": 9.825883575883577, "percentage": 98.26, "elapsed_time": "0:28:55", "remaining_time": "0:00:30", "throughput": 2077.14, "total_tokens": 3604960}
3801
+ {"current_steps": 18910, "total_steps": 19240, "loss": 0.229, "lr": 4.50650722599949e-08, "epoch": 9.828482328482329, "percentage": 98.28, "elapsed_time": "0:28:55", "remaining_time": "0:00:30", "throughput": 2077.17, "total_tokens": 3605856}
3802
+ {"current_steps": 18915, "total_steps": 19240, "loss": 0.2275, "lr": 4.3714266733035914e-08, "epoch": 9.83108108108108, "percentage": 98.31, "elapsed_time": "0:28:56", "remaining_time": "0:00:29", "throughput": 2077.22, "total_tokens": 3606816}
3803
+ {"current_steps": 18920, "total_steps": 19240, "loss": 0.2475, "lr": 4.238399761574974e-08, "epoch": 9.833679833679835, "percentage": 98.34, "elapsed_time": "0:28:56", "remaining_time": "0:00:29", "throughput": 2077.29, "total_tokens": 3607776}
3804
+ {"current_steps": 18925, "total_steps": 19240, "loss": 0.2915, "lr": 4.10742660028135e-08, "epoch": 9.836278586278587, "percentage": 98.36, "elapsed_time": "0:28:57", "remaining_time": "0:00:28", "throughput": 2077.35, "total_tokens": 3608736}
3805
+ {"current_steps": 18930, "total_steps": 19240, "loss": 0.249, "lr": 3.978507297199285e-08, "epoch": 9.838877338877339, "percentage": 98.39, "elapsed_time": "0:28:57", "remaining_time": "0:00:28", "throughput": 2077.43, "total_tokens": 3609728}
3806
+ {"current_steps": 18935, "total_steps": 19240, "loss": 0.2304, "lr": 3.851641958416696e-08, "epoch": 9.84147609147609, "percentage": 98.41, "elapsed_time": "0:28:58", "remaining_time": "0:00:27", "throughput": 2077.51, "total_tokens": 3610720}
3807
+ {"current_steps": 18940, "total_steps": 19240, "loss": 0.2498, "lr": 3.7268306883297966e-08, "epoch": 9.844074844074845, "percentage": 98.44, "elapsed_time": "0:28:58", "remaining_time": "0:00:27", "throughput": 2077.56, "total_tokens": 3611648}
3808
+ {"current_steps": 18945, "total_steps": 19240, "loss": 0.2587, "lr": 3.604073589645596e-08, "epoch": 9.846673596673597, "percentage": 98.47, "elapsed_time": "0:28:58", "remaining_time": "0:00:27", "throughput": 2077.62, "total_tokens": 3612608}
3809
+ {"current_steps": 18950, "total_steps": 19240, "loss": 0.2574, "lr": 3.4833707633799565e-08, "epoch": 9.849272349272349, "percentage": 98.49, "elapsed_time": "0:28:59", "remaining_time": "0:00:26", "throughput": 2077.71, "total_tokens": 3613600}
3810
+ {"current_steps": 18955, "total_steps": 19240, "loss": 0.2937, "lr": 3.3647223088589805e-08, "epoch": 9.851871101871101, "percentage": 98.52, "elapsed_time": "0:28:59", "remaining_time": "0:00:26", "throughput": 2077.77, "total_tokens": 3614560}
3811
+ {"current_steps": 18960, "total_steps": 19240, "loss": 0.251, "lr": 3.248128323717625e-08, "epoch": 9.854469854469855, "percentage": 98.54, "elapsed_time": "0:29:00", "remaining_time": "0:00:25", "throughput": 2077.81, "total_tokens": 3615488}
3812
+ {"current_steps": 18965, "total_steps": 19240, "loss": 0.2481, "lr": 3.133588903900808e-08, "epoch": 9.857068607068607, "percentage": 98.57, "elapsed_time": "0:29:00", "remaining_time": "0:00:25", "throughput": 2077.87, "total_tokens": 3616448}
3813
+ {"current_steps": 18970, "total_steps": 19240, "loss": 0.2342, "lr": 3.021104143662301e-08, "epoch": 9.859667359667359, "percentage": 98.6, "elapsed_time": "0:29:00", "remaining_time": "0:00:24", "throughput": 2077.88, "total_tokens": 3617312}
3814
+ {"current_steps": 18975, "total_steps": 19240, "loss": 0.2949, "lr": 2.910674135565561e-08, "epoch": 9.862266112266113, "percentage": 98.62, "elapsed_time": "0:29:01", "remaining_time": "0:00:24", "throughput": 2077.95, "total_tokens": 3618272}
3815
+ {"current_steps": 18980, "total_steps": 19240, "loss": 0.2707, "lr": 2.8022989704826196e-08, "epoch": 9.864864864864865, "percentage": 98.65, "elapsed_time": "0:29:01", "remaining_time": "0:00:23", "throughput": 2077.96, "total_tokens": 3619136}
3816
+ {"current_steps": 18985, "total_steps": 19240, "loss": 0.241, "lr": 2.6959787375949174e-08, "epoch": 9.867463617463617, "percentage": 98.67, "elapsed_time": "0:29:02", "remaining_time": "0:00:23", "throughput": 2078.05, "total_tokens": 3620160}
3817
+ {"current_steps": 18990, "total_steps": 19240, "loss": 0.2455, "lr": 2.5917135243930245e-08, "epoch": 9.87006237006237, "percentage": 98.7, "elapsed_time": "0:29:02", "remaining_time": "0:00:22", "throughput": 2078.1, "total_tokens": 3621088}
3818
+ {"current_steps": 18995, "total_steps": 19240, "loss": 0.3202, "lr": 2.4895034166760865e-08, "epoch": 9.872661122661123, "percentage": 98.73, "elapsed_time": "0:29:02", "remaining_time": "0:00:22", "throughput": 2078.17, "total_tokens": 3622048}
3819
+ {"current_steps": 19000, "total_steps": 19240, "loss": 0.2429, "lr": 2.389348498552657e-08, "epoch": 9.875259875259875, "percentage": 98.75, "elapsed_time": "0:29:03", "remaining_time": "0:00:22", "throughput": 2078.21, "total_tokens": 3622976}
3820
+ {"current_steps": 19005, "total_steps": 19240, "loss": 0.2704, "lr": 2.2912488524393095e-08, "epoch": 9.877858627858627, "percentage": 98.78, "elapsed_time": "0:29:03", "remaining_time": "0:00:21", "throughput": 2078.29, "total_tokens": 3623968}
3821
+ {"current_steps": 19010, "total_steps": 19240, "loss": 0.3275, "lr": 2.1952045590620253e-08, "epoch": 9.880457380457381, "percentage": 98.8, "elapsed_time": "0:29:04", "remaining_time": "0:00:21", "throughput": 2078.37, "total_tokens": 3624960}
3822
+ {"current_steps": 19015, "total_steps": 19240, "loss": 0.2599, "lr": 2.101215697455361e-08, "epoch": 9.883056133056133, "percentage": 98.83, "elapsed_time": "0:29:04", "remaining_time": "0:00:20", "throughput": 2078.45, "total_tokens": 3625952}
3823
+ {"current_steps": 19020, "total_steps": 19240, "loss": 0.2286, "lr": 2.0092823449618935e-08, "epoch": 9.885654885654885, "percentage": 98.86, "elapsed_time": "0:29:04", "remaining_time": "0:00:20", "throughput": 2078.53, "total_tokens": 3626944}
3824
+ {"current_steps": 19025, "total_steps": 19240, "loss": 0.2803, "lr": 1.9194045772336077e-08, "epoch": 9.888253638253639, "percentage": 98.88, "elapsed_time": "0:29:05", "remaining_time": "0:00:19", "throughput": 2078.63, "total_tokens": 3627968}
3825
+ {"current_steps": 19030, "total_steps": 19240, "loss": 0.2683, "lr": 1.831582468229953e-08, "epoch": 9.890852390852391, "percentage": 98.91, "elapsed_time": "0:29:05", "remaining_time": "0:00:19", "throughput": 2078.69, "total_tokens": 3628928}
3826
+ {"current_steps": 19035, "total_steps": 19240, "loss": 0.22, "lr": 1.7458160902197872e-08, "epoch": 9.893451143451143, "percentage": 98.93, "elapsed_time": "0:29:06", "remaining_time": "0:00:18", "throughput": 2078.75, "total_tokens": 3629888}
3827
+ {"current_steps": 19040, "total_steps": 19240, "loss": 0.3089, "lr": 1.6621055137797105e-08, "epoch": 9.896049896049895, "percentage": 98.96, "elapsed_time": "0:29:06", "remaining_time": "0:00:18", "throughput": 2078.86, "total_tokens": 3630944}
3828
+ {"current_steps": 19045, "total_steps": 19240, "loss": 0.2275, "lr": 1.5804508077946202e-08, "epoch": 9.89864864864865, "percentage": 98.99, "elapsed_time": "0:29:07", "remaining_time": "0:00:17", "throughput": 2078.93, "total_tokens": 3631904}
3829
+ {"current_steps": 19050, "total_steps": 19240, "loss": 0.2618, "lr": 1.500852039458267e-08, "epoch": 9.901247401247401, "percentage": 99.01, "elapsed_time": "0:29:07", "remaining_time": "0:00:17", "throughput": 2078.97, "total_tokens": 3632832}
3830
+ {"current_steps": 19055, "total_steps": 19240, "loss": 0.1758, "lr": 1.4233092742713116e-08, "epoch": 9.903846153846153, "percentage": 99.04, "elapsed_time": "0:29:07", "remaining_time": "0:00:16", "throughput": 2079.01, "total_tokens": 3633760}
3831
+ {"current_steps": 19060, "total_steps": 19240, "loss": 0.2905, "lr": 1.3478225760441e-08, "epoch": 9.906444906444907, "percentage": 99.06, "elapsed_time": "0:29:08", "remaining_time": "0:00:16", "throughput": 2079.09, "total_tokens": 3634752}
3832
+ {"current_steps": 19065, "total_steps": 19240, "loss": 0.2631, "lr": 1.2743920068938874e-08, "epoch": 9.90904365904366, "percentage": 99.09, "elapsed_time": "0:29:08", "remaining_time": "0:00:16", "throughput": 2079.13, "total_tokens": 3635680}
3833
+ {"current_steps": 19070, "total_steps": 19240, "loss": 0.1893, "lr": 1.203017627246228e-08, "epoch": 9.911642411642411, "percentage": 99.12, "elapsed_time": "0:29:09", "remaining_time": "0:00:15", "throughput": 2079.21, "total_tokens": 3636672}
3834
+ {"current_steps": 19075, "total_steps": 19240, "loss": 0.2565, "lr": 1.1336994958349723e-08, "epoch": 9.914241164241163, "percentage": 99.14, "elapsed_time": "0:29:09", "remaining_time": "0:00:15", "throughput": 2079.28, "total_tokens": 3637632}
3835
+ {"current_steps": 19080, "total_steps": 19240, "loss": 0.2463, "lr": 1.0664376697017142e-08, "epoch": 9.916839916839917, "percentage": 99.17, "elapsed_time": "0:29:09", "remaining_time": "0:00:14", "throughput": 2079.34, "total_tokens": 3638592}
3836
+ {"current_steps": 19085, "total_steps": 19240, "loss": 0.2523, "lr": 1.0012322041960676e-08, "epoch": 9.91943866943867, "percentage": 99.19, "elapsed_time": "0:29:10", "remaining_time": "0:00:14", "throughput": 2079.41, "total_tokens": 3639552}
3837
+ {"current_steps": 19090, "total_steps": 19240, "loss": 0.2567, "lr": 9.38083152974556e-09, "epoch": 9.922037422037421, "percentage": 99.22, "elapsed_time": "0:29:10", "remaining_time": "0:00:13", "throughput": 2079.45, "total_tokens": 3640480}
3838
+ {"current_steps": 19095, "total_steps": 19240, "loss": 0.2261, "lr": 8.76990568003111e-09, "epoch": 9.924636174636175, "percentage": 99.25, "elapsed_time": "0:29:11", "remaining_time": "0:00:13", "throughput": 2079.53, "total_tokens": 3641472}
3839
+ {"current_steps": 19100, "total_steps": 19240, "loss": 0.2806, "lr": 8.17954499554019e-09, "epoch": 9.927234927234927, "percentage": 99.27, "elapsed_time": "0:29:11", "remaining_time": "0:00:12", "throughput": 2079.6, "total_tokens": 3642464}
3840
+ {"current_steps": 19105, "total_steps": 19240, "loss": 0.257, "lr": 7.609749962081413e-09, "epoch": 9.92983367983368, "percentage": 99.3, "elapsed_time": "0:29:11", "remaining_time": "0:00:12", "throughput": 2079.68, "total_tokens": 3643456}
3841
+ {"current_steps": 19110, "total_steps": 19240, "loss": 0.2734, "lr": 7.060521048532498e-09, "epoch": 9.932432432432432, "percentage": 99.32, "elapsed_time": "0:29:12", "remaining_time": "0:00:11", "throughput": 2079.71, "total_tokens": 3644352}
3842
+ {"current_steps": 19115, "total_steps": 19240, "loss": 0.25, "lr": 6.5318587068541325e-09, "epoch": 9.935031185031185, "percentage": 99.35, "elapsed_time": "0:29:12", "remaining_time": "0:00:11", "throughput": 2079.79, "total_tokens": 3645344}
3843
+ {"current_steps": 19120, "total_steps": 19240, "loss": 0.2383, "lr": 6.023763372076108e-09, "epoch": 9.937629937629938, "percentage": 99.38, "elapsed_time": "0:29:13", "remaining_time": "0:00:11", "throughput": 2079.87, "total_tokens": 3646336}
3844
+ {"current_steps": 19125, "total_steps": 19240, "loss": 0.3058, "lr": 5.536235462313965e-09, "epoch": 9.94022869022869, "percentage": 99.4, "elapsed_time": "0:29:13", "remaining_time": "0:00:10", "throughput": 2079.94, "total_tokens": 3647296}
3845
+ {"current_steps": 19130, "total_steps": 19240, "loss": 0.241, "lr": 5.069275378746796e-09, "epoch": 9.942827442827443, "percentage": 99.43, "elapsed_time": "0:29:13", "remaining_time": "0:00:10", "throughput": 2079.98, "total_tokens": 3648224}
3846
+ {"current_steps": 19135, "total_steps": 19240, "loss": 0.2468, "lr": 4.622883505636666e-09, "epoch": 9.945426195426196, "percentage": 99.45, "elapsed_time": "0:29:14", "remaining_time": "0:00:09", "throughput": 2080.03, "total_tokens": 3649152}
3847
+ {"current_steps": 19140, "total_steps": 19240, "loss": 0.2391, "lr": 4.197060210317516e-09, "epoch": 9.948024948024948, "percentage": 99.48, "elapsed_time": "0:29:14", "remaining_time": "0:00:09", "throughput": 2080.06, "total_tokens": 3650048}
3848
+ {"current_steps": 19145, "total_steps": 19240, "loss": 0.3219, "lr": 3.791805843195162e-09, "epoch": 9.950623700623701, "percentage": 99.51, "elapsed_time": "0:29:15", "remaining_time": "0:00:08", "throughput": 2080.09, "total_tokens": 3650944}
3849
+ {"current_steps": 19150, "total_steps": 19240, "loss": 0.2518, "lr": 3.4071207377500693e-09, "epoch": 9.953222453222454, "percentage": 99.53, "elapsed_time": "0:29:15", "remaining_time": "0:00:08", "throughput": 2080.17, "total_tokens": 3651936}
3850
+ {"current_steps": 19155, "total_steps": 19240, "loss": 0.2813, "lr": 3.043005210542904e-09, "epoch": 9.955821205821206, "percentage": 99.56, "elapsed_time": "0:29:16", "remaining_time": "0:00:07", "throughput": 2080.21, "total_tokens": 3652864}
3851
+ {"current_steps": 19160, "total_steps": 19240, "loss": 0.2783, "lr": 2.6994595612006566e-09, "epoch": 9.958419958419958, "percentage": 99.58, "elapsed_time": "0:29:16", "remaining_time": "0:00:07", "throughput": 2080.29, "total_tokens": 3653856}
3852
+ {"current_steps": 19165, "total_steps": 19240, "loss": 0.2675, "lr": 2.376484072424967e-09, "epoch": 9.961018711018712, "percentage": 99.61, "elapsed_time": "0:29:16", "remaining_time": "0:00:06", "throughput": 2080.34, "total_tokens": 3654784}
3853
+ {"current_steps": 19170, "total_steps": 19240, "loss": 0.2758, "lr": 2.074079009989349e-09, "epoch": 9.963617463617464, "percentage": 99.64, "elapsed_time": "0:29:17", "remaining_time": "0:00:06", "throughput": 2080.38, "total_tokens": 3655712}
3854
+ {"current_steps": 19175, "total_steps": 19240, "loss": 0.2441, "lr": 1.7922446227447432e-09, "epoch": 9.966216216216216, "percentage": 99.66, "elapsed_time": "0:29:17", "remaining_time": "0:00:05", "throughput": 2080.46, "total_tokens": 3656704}
3855
+ {"current_steps": 19180, "total_steps": 19240, "loss": 0.2548, "lr": 1.5309811426056364e-09, "epoch": 9.96881496881497, "percentage": 99.69, "elapsed_time": "0:29:18", "remaining_time": "0:00:05", "throughput": 2080.52, "total_tokens": 3657664}
3856
+ {"current_steps": 19185, "total_steps": 19240, "loss": 0.2695, "lr": 1.2902887845722688e-09, "epoch": 9.971413721413722, "percentage": 99.71, "elapsed_time": "0:29:18", "remaining_time": "0:00:05", "throughput": 2080.6, "total_tokens": 3658656}
3857
+ {"current_steps": 19190, "total_steps": 19240, "loss": 0.2381, "lr": 1.070167746702877e-09, "epoch": 9.974012474012474, "percentage": 99.74, "elapsed_time": "0:29:18", "remaining_time": "0:00:04", "throughput": 2080.66, "total_tokens": 3659616}
3858
+ {"current_steps": 19195, "total_steps": 19240, "loss": 0.2297, "lr": 8.70618210138674e-10, "epoch": 9.976611226611226, "percentage": 99.77, "elapsed_time": "0:29:19", "remaining_time": "0:00:04", "throughput": 2080.76, "total_tokens": 3660640}
3859
+ {"current_steps": 19200, "total_steps": 19240, "loss": 0.231, "lr": 6.916403390844206e-10, "epoch": 9.97920997920998, "percentage": 99.79, "elapsed_time": "0:29:19", "remaining_time": "0:00:03", "throughput": 2080.8, "total_tokens": 3661568}
3860
+ {"current_steps": 19205, "total_steps": 19240, "loss": 0.2461, "lr": 5.332342808223034e-10, "epoch": 9.981808731808732, "percentage": 99.82, "elapsed_time": "0:29:20", "remaining_time": "0:00:03", "throughput": 2080.88, "total_tokens": 3662560}
3861
+ {"current_steps": 19210, "total_steps": 19240, "loss": 0.2594, "lr": 3.9540016570083215e-10, "epoch": 9.984407484407484, "percentage": 99.84, "elapsed_time": "0:29:20", "remaining_time": "0:00:02", "throughput": 2080.95, "total_tokens": 3663520}
3862
+ {"current_steps": 19215, "total_steps": 19240, "loss": 0.2462, "lr": 2.7813810714871767e-10, "epoch": 9.987006237006238, "percentage": 99.87, "elapsed_time": "0:29:20", "remaining_time": "0:00:02", "throughput": 2080.98, "total_tokens": 3664416}
3863
+ {"current_steps": 19220, "total_steps": 19240, "loss": 0.2073, "lr": 1.8144820165544307e-10, "epoch": 9.98960498960499, "percentage": 99.9, "elapsed_time": "0:29:21", "remaining_time": "0:00:01", "throughput": 2081.0, "total_tokens": 3665312}
3864
+ {"current_steps": 19225, "total_steps": 19240, "loss": 0.2718, "lr": 1.0533052878791694e-10, "epoch": 9.992203742203742, "percentage": 99.92, "elapsed_time": "0:29:21", "remaining_time": "0:00:01", "throughput": 2081.07, "total_tokens": 3666272}
3865
+ {"current_steps": 19230, "total_steps": 19240, "loss": 0.2291, "lr": 4.978515118214677e-11, "epoch": 9.994802494802494, "percentage": 99.95, "elapsed_time": "0:29:22", "remaining_time": "0:00:00", "throughput": 2081.11, "total_tokens": 3667200}
3866
+ {"current_steps": 19235, "total_steps": 19240, "loss": 0.2725, "lr": 1.4812114548790057e-11, "epoch": 9.997401247401248, "percentage": 99.97, "elapsed_time": "0:29:22", "remaining_time": "0:00:00", "throughput": 2081.23, "total_tokens": 3668256}
3867
+ {"current_steps": 19240, "total_steps": 19240, "loss": 0.2594, "lr": 4.114476648275911e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:29:23", "remaining_time": "0:00:00", "throughput": 2081.18, "total_tokens": 3669168}
3868
+ {"current_steps": 19240, "total_steps": 19240, "eval_loss": 0.24947991967201233, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:29:30", "remaining_time": "0:00:00", "throughput": 2071.86, "total_tokens": 3669168}
3869
+ {"current_steps": 19240, "total_steps": 19240, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:29:31", "remaining_time": "0:00:00", "throughput": 2070.7, "total_tokens": 3669168}