rbelanec commited on
Commit
cfd93e6
verified
1 Parent(s): eb7406a

Training in progress, step 16520

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +155 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:610bbbcc67deb5e39baf53fd40e9b2f54d499d13817d46c14c7647ff725e2aaa
3
  size 58745928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbf0e705fd999da3ac0472eecce4dfe87cd539c355660fa436ec89d30f5325d
3
  size 58745928
trainer_log.jsonl CHANGED
@@ -3168,3 +3168,158 @@
3168
  {"current_steps": 15745, "total_steps": 16520, "loss": 0.0, "lr": 3.353164497904987e-07, "epoch": 19.06174334140436, "percentage": 95.31, "elapsed_time": "0:52:31", "remaining_time": "0:02:35", "throughput": 2046.62, "total_tokens": 6450440}
3169
  {"current_steps": 15750, "total_steps": 16520, "loss": 0.0, "lr": 3.31018813005407e-07, "epoch": 19.06779661016949, "percentage": 95.34, "elapsed_time": "0:52:32", "remaining_time": "0:02:34", "throughput": 2046.66, "total_tokens": 6452552}
3170
  {"current_steps": 15755, "total_steps": 16520, "loss": 0.0, "lr": 3.267487112673412e-07, "epoch": 19.073849878934624, "percentage": 95.37, "elapsed_time": "0:52:33", "remaining_time": "0:02:33", "throughput": 2046.68, "total_tokens": 6454568}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3168
  {"current_steps": 15745, "total_steps": 16520, "loss": 0.0, "lr": 3.353164497904987e-07, "epoch": 19.06174334140436, "percentage": 95.31, "elapsed_time": "0:52:31", "remaining_time": "0:02:35", "throughput": 2046.62, "total_tokens": 6450440}
3169
  {"current_steps": 15750, "total_steps": 16520, "loss": 0.0, "lr": 3.31018813005407e-07, "epoch": 19.06779661016949, "percentage": 95.34, "elapsed_time": "0:52:32", "remaining_time": "0:02:34", "throughput": 2046.66, "total_tokens": 6452552}
3170
  {"current_steps": 15755, "total_steps": 16520, "loss": 0.0, "lr": 3.267487112673412e-07, "epoch": 19.073849878934624, "percentage": 95.37, "elapsed_time": "0:52:33", "remaining_time": "0:02:33", "throughput": 2046.68, "total_tokens": 6454568}
3171
+ {"current_steps": 15760, "total_steps": 16520, "loss": 0.0, "lr": 3.225061493425108e-07, "epoch": 19.079903147699756, "percentage": 95.4, "elapsed_time": "0:52:34", "remaining_time": "0:02:32", "throughput": 2046.69, "total_tokens": 6456552}
3172
+ {"current_steps": 15765, "total_steps": 16520, "loss": 0.0, "lr": 3.1829113196638614e-07, "epoch": 19.085956416464892, "percentage": 95.43, "elapsed_time": "0:52:35", "remaining_time": "0:02:31", "throughput": 2046.72, "total_tokens": 6458600}
3173
+ {"current_steps": 15770, "total_steps": 16520, "loss": 0.0, "lr": 3.141036638436845e-07, "epoch": 19.092009685230025, "percentage": 95.46, "elapsed_time": "0:52:36", "remaining_time": "0:02:30", "throughput": 2046.73, "total_tokens": 6460552}
3174
+ {"current_steps": 15775, "total_steps": 16520, "loss": 0.0, "lr": 3.099437496483837e-07, "epoch": 19.098062953995157, "percentage": 95.49, "elapsed_time": "0:52:37", "remaining_time": "0:02:29", "throughput": 2046.75, "total_tokens": 6462600}
3175
+ {"current_steps": 15780, "total_steps": 16520, "loss": 0.0, "lr": 3.058113940236945e-07, "epoch": 19.10411622276029, "percentage": 95.52, "elapsed_time": "0:52:38", "remaining_time": "0:02:28", "throughput": 2046.75, "total_tokens": 6464520}
3176
+ {"current_steps": 15785, "total_steps": 16520, "loss": 0.0, "lr": 3.017066015820774e-07, "epoch": 19.110169491525422, "percentage": 95.55, "elapsed_time": "0:52:39", "remaining_time": "0:02:27", "throughput": 2046.75, "total_tokens": 6466440}
3177
+ {"current_steps": 15790, "total_steps": 16520, "loss": 0.0, "lr": 2.976293769052202e-07, "epoch": 19.116222760290558, "percentage": 95.58, "elapsed_time": "0:52:40", "remaining_time": "0:02:26", "throughput": 2046.77, "total_tokens": 6468424}
3178
+ {"current_steps": 15795, "total_steps": 16520, "loss": 0.0, "lr": 2.9357972454404637e-07, "epoch": 19.12227602905569, "percentage": 95.61, "elapsed_time": "0:52:41", "remaining_time": "0:02:25", "throughput": 2046.8, "total_tokens": 6470472}
3179
+ {"current_steps": 15800, "total_steps": 16520, "loss": 0.0, "lr": 2.895576490187041e-07, "epoch": 19.128329297820823, "percentage": 95.64, "elapsed_time": "0:52:42", "remaining_time": "0:02:24", "throughput": 2046.81, "total_tokens": 6472456}
3180
+ {"current_steps": 15805, "total_steps": 16520, "loss": 0.0, "lr": 2.8556315481854943e-07, "epoch": 19.134382566585955, "percentage": 95.67, "elapsed_time": "0:52:43", "remaining_time": "0:02:23", "throughput": 2046.8, "total_tokens": 6474344}
3181
+ {"current_steps": 15810, "total_steps": 16520, "loss": 0.0, "lr": 2.8159624640216597e-07, "epoch": 19.140435835351088, "percentage": 95.7, "elapsed_time": "0:52:44", "remaining_time": "0:02:22", "throughput": 2046.78, "total_tokens": 6476168}
3182
+ {"current_steps": 15815, "total_steps": 16520, "loss": 0.0, "lr": 2.7765692819734236e-07, "epoch": 19.146489104116224, "percentage": 95.73, "elapsed_time": "0:52:45", "remaining_time": "0:02:21", "throughput": 2046.82, "total_tokens": 6478312}
3183
+ {"current_steps": 15820, "total_steps": 16520, "loss": 0.0, "lr": 2.737452046010641e-07, "epoch": 19.152542372881356, "percentage": 95.76, "elapsed_time": "0:52:46", "remaining_time": "0:02:20", "throughput": 2046.84, "total_tokens": 6480360}
3184
+ {"current_steps": 15825, "total_steps": 16520, "loss": 0.0, "lr": 2.6986107997953035e-07, "epoch": 19.15859564164649, "percentage": 95.79, "elapsed_time": "0:52:47", "remaining_time": "0:02:19", "throughput": 2046.89, "total_tokens": 6482536}
3185
+ {"current_steps": 15830, "total_steps": 16520, "loss": 0.0, "lr": 2.660045586681231e-07, "epoch": 19.16464891041162, "percentage": 95.82, "elapsed_time": "0:52:47", "remaining_time": "0:02:18", "throughput": 2046.92, "total_tokens": 6484584}
3186
+ {"current_steps": 15835, "total_steps": 16520, "loss": 0.0, "lr": 2.621756449714158e-07, "epoch": 19.170702179176754, "percentage": 95.85, "elapsed_time": "0:52:48", "remaining_time": "0:02:17", "throughput": 2047.0, "total_tokens": 6486888}
3187
+ {"current_steps": 15840, "total_steps": 16520, "loss": 0.0, "lr": 2.5837434316317574e-07, "epoch": 19.17675544794189, "percentage": 95.88, "elapsed_time": "0:52:49", "remaining_time": "0:02:16", "throughput": 2047.02, "total_tokens": 6488840}
3188
+ {"current_steps": 15845, "total_steps": 16520, "loss": 0.0, "lr": 2.546006574863369e-07, "epoch": 19.182808716707022, "percentage": 95.91, "elapsed_time": "0:52:50", "remaining_time": "0:02:15", "throughput": 2047.01, "total_tokens": 6490696}
3189
+ {"current_steps": 15850, "total_steps": 16520, "loss": 0.0, "lr": 2.508545921530159e-07, "epoch": 19.188861985472155, "percentage": 95.94, "elapsed_time": "0:52:51", "remaining_time": "0:02:14", "throughput": 2047.05, "total_tokens": 6492808}
3190
+ {"current_steps": 15855, "total_steps": 16520, "loss": 0.0, "lr": 2.47136151344507e-07, "epoch": 19.194915254237287, "percentage": 95.97, "elapsed_time": "0:52:52", "remaining_time": "0:02:13", "throughput": 2047.06, "total_tokens": 6494760}
3191
+ {"current_steps": 15860, "total_steps": 16520, "loss": 0.0, "lr": 2.43445339211254e-07, "epoch": 19.20096852300242, "percentage": 96.0, "elapsed_time": "0:52:53", "remaining_time": "0:02:12", "throughput": 2047.1, "total_tokens": 6496840}
3192
+ {"current_steps": 15865, "total_steps": 16520, "loss": 0.0, "lr": 2.3978215987287554e-07, "epoch": 19.207021791767556, "percentage": 96.04, "elapsed_time": "0:52:54", "remaining_time": "0:02:11", "throughput": 2047.13, "total_tokens": 6498920}
3193
+ {"current_steps": 15870, "total_steps": 16520, "loss": 0.0, "lr": 2.361466174181426e-07, "epoch": 19.213075060532688, "percentage": 96.07, "elapsed_time": "0:52:55", "remaining_time": "0:02:10", "throughput": 2047.15, "total_tokens": 6500968}
3194
+ {"current_steps": 15875, "total_steps": 16520, "loss": 0.0, "lr": 2.3253871590497856e-07, "epoch": 19.21912832929782, "percentage": 96.1, "elapsed_time": "0:52:56", "remaining_time": "0:02:09", "throughput": 2047.21, "total_tokens": 6503144}
3195
+ {"current_steps": 15880, "total_steps": 16520, "loss": 0.0, "lr": 2.28958459360451e-07, "epoch": 19.225181598062953, "percentage": 96.13, "elapsed_time": "0:52:57", "remaining_time": "0:02:08", "throughput": 2047.24, "total_tokens": 6505224}
3196
+ {"current_steps": 15885, "total_steps": 16520, "loss": 0.0, "lr": 2.2540585178078e-07, "epoch": 19.231234866828085, "percentage": 96.16, "elapsed_time": "0:52:58", "remaining_time": "0:02:07", "throughput": 2047.25, "total_tokens": 6507176}
3197
+ {"current_steps": 15890, "total_steps": 16520, "loss": 0.0, "lr": 2.21880897131313e-07, "epoch": 19.23728813559322, "percentage": 96.19, "elapsed_time": "0:52:59", "remaining_time": "0:02:06", "throughput": 2047.28, "total_tokens": 6509256}
3198
+ {"current_steps": 15895, "total_steps": 16520, "loss": 0.0, "lr": 2.1838359934653884e-07, "epoch": 19.243341404358354, "percentage": 96.22, "elapsed_time": "0:53:00", "remaining_time": "0:02:05", "throughput": 2047.27, "total_tokens": 6511144}
3199
+ {"current_steps": 15900, "total_steps": 16520, "loss": 0.0, "lr": 2.1491396233007665e-07, "epoch": 19.249394673123486, "percentage": 96.25, "elapsed_time": "0:53:01", "remaining_time": "0:02:04", "throughput": 2047.28, "total_tokens": 6513128}
3200
+ {"current_steps": 15905, "total_steps": 16520, "loss": 0.0, "lr": 2.1147198995466467e-07, "epoch": 19.25544794188862, "percentage": 96.28, "elapsed_time": "0:53:02", "remaining_time": "0:02:03", "throughput": 2047.29, "total_tokens": 6515080}
3201
+ {"current_steps": 15910, "total_steps": 16520, "loss": 0.0, "lr": 2.0805768606217412e-07, "epoch": 19.26150121065375, "percentage": 96.31, "elapsed_time": "0:53:03", "remaining_time": "0:02:02", "throughput": 2047.33, "total_tokens": 6517192}
3202
+ {"current_steps": 15915, "total_steps": 16520, "loss": 0.0, "lr": 2.046710544635788e-07, "epoch": 19.267554479418887, "percentage": 96.34, "elapsed_time": "0:53:04", "remaining_time": "0:02:01", "throughput": 2047.36, "total_tokens": 6519240}
3203
+ {"current_steps": 15920, "total_steps": 16520, "loss": 0.0, "lr": 2.0131209893897994e-07, "epoch": 19.27360774818402, "percentage": 96.37, "elapsed_time": "0:53:05", "remaining_time": "0:02:00", "throughput": 2047.38, "total_tokens": 6521256}
3204
+ {"current_steps": 15925, "total_steps": 16520, "loss": 0.0, "lr": 1.9798082323757016e-07, "epoch": 19.279661016949152, "percentage": 96.4, "elapsed_time": "0:53:06", "remaining_time": "0:01:59", "throughput": 2047.43, "total_tokens": 6523400}
3205
+ {"current_steps": 15930, "total_steps": 16520, "loss": 0.0, "lr": 1.94677231077664e-07, "epoch": 19.285714285714285, "percentage": 96.43, "elapsed_time": "0:53:07", "remaining_time": "0:01:58", "throughput": 2047.45, "total_tokens": 6525416}
3206
+ {"current_steps": 15935, "total_steps": 16520, "loss": 0.0, "lr": 1.9140132614666463e-07, "epoch": 19.291767554479417, "percentage": 96.46, "elapsed_time": "0:53:08", "remaining_time": "0:01:57", "throughput": 2047.49, "total_tokens": 6527496}
3207
+ {"current_steps": 15940, "total_steps": 16520, "loss": 0.0, "lr": 1.881531121010749e-07, "epoch": 19.297820823244553, "percentage": 96.49, "elapsed_time": "0:53:08", "remaining_time": "0:01:56", "throughput": 2047.52, "total_tokens": 6529512}
3208
+ {"current_steps": 15945, "total_steps": 16520, "loss": 0.0, "lr": 1.8493259256649186e-07, "epoch": 19.303874092009686, "percentage": 96.52, "elapsed_time": "0:53:09", "remaining_time": "0:01:55", "throughput": 2047.57, "total_tokens": 6531688}
3209
+ {"current_steps": 15950, "total_steps": 16520, "loss": 0.0, "lr": 1.8173977113759288e-07, "epoch": 19.309927360774818, "percentage": 96.55, "elapsed_time": "0:53:10", "remaining_time": "0:01:54", "throughput": 2047.56, "total_tokens": 6533544}
3210
+ {"current_steps": 15955, "total_steps": 16520, "loss": 0.0, "lr": 1.7857465137814944e-07, "epoch": 19.31598062953995, "percentage": 96.58, "elapsed_time": "0:53:11", "remaining_time": "0:01:53", "throughput": 2047.59, "total_tokens": 6535592}
3211
+ {"current_steps": 15960, "total_steps": 16520, "loss": 0.0, "lr": 1.7543723682100777e-07, "epoch": 19.322033898305083, "percentage": 96.61, "elapsed_time": "0:53:12", "remaining_time": "0:01:52", "throughput": 2047.65, "total_tokens": 6537768}
3212
+ {"current_steps": 15965, "total_steps": 16520, "loss": 0.0, "lr": 1.7232753096808607e-07, "epoch": 19.32808716707022, "percentage": 96.64, "elapsed_time": "0:53:13", "remaining_time": "0:01:51", "throughput": 2047.65, "total_tokens": 6539720}
3213
+ {"current_steps": 15970, "total_steps": 16520, "loss": 0.0, "lr": 1.6924553729038285e-07, "epoch": 19.33414043583535, "percentage": 96.67, "elapsed_time": "0:53:14", "remaining_time": "0:01:50", "throughput": 2047.65, "total_tokens": 6541608}
3214
+ {"current_steps": 15975, "total_steps": 16520, "loss": 0.0, "lr": 1.661912592279602e-07, "epoch": 19.340193704600484, "percentage": 96.7, "elapsed_time": "0:53:15", "remaining_time": "0:01:49", "throughput": 2047.66, "total_tokens": 6543592}
3215
+ {"current_steps": 15980, "total_steps": 16520, "loss": 0.0, "lr": 1.6316470018994112e-07, "epoch": 19.346246973365616, "percentage": 96.73, "elapsed_time": "0:53:16", "remaining_time": "0:01:48", "throughput": 2047.72, "total_tokens": 6545800}
3216
+ {"current_steps": 15985, "total_steps": 16520, "loss": 0.0, "lr": 1.6016586355452056e-07, "epoch": 19.352300242130752, "percentage": 96.76, "elapsed_time": "0:53:17", "remaining_time": "0:01:47", "throughput": 2047.77, "total_tokens": 6547912}
3217
+ {"current_steps": 15990, "total_steps": 16520, "loss": 0.0, "lr": 1.571947526689349e-07, "epoch": 19.358353510895885, "percentage": 96.79, "elapsed_time": "0:53:18", "remaining_time": "0:01:46", "throughput": 2047.83, "total_tokens": 6550088}
3218
+ {"current_steps": 15995, "total_steps": 16520, "loss": 0.0, "lr": 1.5425137084948692e-07, "epoch": 19.364406779661017, "percentage": 96.82, "elapsed_time": "0:53:19", "remaining_time": "0:01:45", "throughput": 2047.87, "total_tokens": 6552200}
3219
+ {"current_steps": 16000, "total_steps": 16520, "loss": 0.0, "lr": 1.5133572138152364e-07, "epoch": 19.37046004842615, "percentage": 96.85, "elapsed_time": "0:53:20", "remaining_time": "0:01:44", "throughput": 2047.89, "total_tokens": 6554184}
3220
+ {"current_steps": 16005, "total_steps": 16520, "loss": 0.0, "lr": 1.4844780751943345e-07, "epoch": 19.376513317191282, "percentage": 96.88, "elapsed_time": "0:53:21", "remaining_time": "0:01:43", "throughput": 2047.91, "total_tokens": 6556200}
3221
+ {"current_steps": 16010, "total_steps": 16520, "loss": 0.0, "lr": 1.4558763248665175e-07, "epoch": 19.38256658595642, "percentage": 96.91, "elapsed_time": "0:53:22", "remaining_time": "0:01:42", "throughput": 2047.95, "total_tokens": 6558312}
3222
+ {"current_steps": 16015, "total_steps": 16520, "loss": 0.0, "lr": 1.4275519947565542e-07, "epoch": 19.38861985472155, "percentage": 96.94, "elapsed_time": "0:53:23", "remaining_time": "0:01:41", "throughput": 2048.0, "total_tokens": 6560456}
3223
+ {"current_steps": 16020, "total_steps": 16520, "loss": 0.0, "lr": 1.3995051164794604e-07, "epoch": 19.394673123486683, "percentage": 96.97, "elapsed_time": "0:53:24", "remaining_time": "0:01:40", "throughput": 2048.04, "total_tokens": 6562568}
3224
+ {"current_steps": 16025, "total_steps": 16520, "loss": 0.0, "lr": 1.3717357213406667e-07, "epoch": 19.400726392251816, "percentage": 97.0, "elapsed_time": "0:53:25", "remaining_time": "0:01:39", "throughput": 2048.06, "total_tokens": 6564584}
3225
+ {"current_steps": 16030, "total_steps": 16520, "loss": 0.0, "lr": 1.3442438403358515e-07, "epoch": 19.406779661016948, "percentage": 97.03, "elapsed_time": "0:53:26", "remaining_time": "0:01:38", "throughput": 2048.08, "total_tokens": 6566568}
3226
+ {"current_steps": 16035, "total_steps": 16520, "loss": 0.0, "lr": 1.3170295041509128e-07, "epoch": 19.412832929782084, "percentage": 97.06, "elapsed_time": "0:53:27", "remaining_time": "0:01:37", "throughput": 2048.07, "total_tokens": 6568456}
3227
+ {"current_steps": 16040, "total_steps": 16520, "loss": 0.0, "lr": 1.290092743161997e-07, "epoch": 19.418886198547217, "percentage": 97.09, "elapsed_time": "0:53:28", "remaining_time": "0:01:36", "throughput": 2048.08, "total_tokens": 6570440}
3228
+ {"current_steps": 16045, "total_steps": 16520, "loss": 0.0, "lr": 1.2634335874353585e-07, "epoch": 19.42493946731235, "percentage": 97.12, "elapsed_time": "0:53:29", "remaining_time": "0:01:35", "throughput": 2048.1, "total_tokens": 6572392}
3229
+ {"current_steps": 16050, "total_steps": 16520, "loss": 0.0, "lr": 1.2370520667274733e-07, "epoch": 19.43099273607748, "percentage": 97.15, "elapsed_time": "0:53:29", "remaining_time": "0:01:33", "throughput": 2048.12, "total_tokens": 6574408}
3230
+ {"current_steps": 16055, "total_steps": 16520, "loss": 0.0, "lr": 1.2109482104848692e-07, "epoch": 19.437046004842614, "percentage": 97.19, "elapsed_time": "0:53:30", "remaining_time": "0:01:32", "throughput": 2048.1, "total_tokens": 6576264}
3231
+ {"current_steps": 16060, "total_steps": 16520, "loss": 0.0, "lr": 1.1851220478442115e-07, "epoch": 19.44309927360775, "percentage": 97.22, "elapsed_time": "0:53:31", "remaining_time": "0:01:31", "throughput": 2048.12, "total_tokens": 6578280}
3232
+ {"current_steps": 16065, "total_steps": 16520, "loss": 0.0, "lr": 1.1595736076321362e-07, "epoch": 19.449152542372882, "percentage": 97.25, "elapsed_time": "0:53:32", "remaining_time": "0:01:30", "throughput": 2048.15, "total_tokens": 6580328}
3233
+ {"current_steps": 16070, "total_steps": 16520, "loss": 0.0, "lr": 1.134302918365332e-07, "epoch": 19.455205811138015, "percentage": 97.28, "elapsed_time": "0:53:33", "remaining_time": "0:01:29", "throughput": 2048.17, "total_tokens": 6582376}
3234
+ {"current_steps": 16075, "total_steps": 16520, "loss": 0.0, "lr": 1.1093100082504581e-07, "epoch": 19.461259079903147, "percentage": 97.31, "elapsed_time": "0:53:34", "remaining_time": "0:01:28", "throughput": 2048.23, "total_tokens": 6584552}
3235
+ {"current_steps": 16080, "total_steps": 16520, "loss": 0.0, "lr": 1.0845949051841441e-07, "epoch": 19.46731234866828, "percentage": 97.34, "elapsed_time": "0:53:35", "remaining_time": "0:01:27", "throughput": 2048.24, "total_tokens": 6586504}
3236
+ {"current_steps": 16085, "total_steps": 16520, "loss": 0.0, "lr": 1.0601576367529065e-07, "epoch": 19.473365617433416, "percentage": 97.37, "elapsed_time": "0:53:36", "remaining_time": "0:01:26", "throughput": 2048.29, "total_tokens": 6588648}
3237
+ {"current_steps": 16090, "total_steps": 16520, "loss": 0.0, "lr": 1.0359982302331484e-07, "epoch": 19.479418886198548, "percentage": 97.4, "elapsed_time": "0:53:37", "remaining_time": "0:01:25", "throughput": 2048.33, "total_tokens": 6590760}
3238
+ {"current_steps": 16095, "total_steps": 16520, "loss": 0.0, "lr": 1.0121167125911601e-07, "epoch": 19.48547215496368, "percentage": 97.43, "elapsed_time": "0:53:38", "remaining_time": "0:01:24", "throughput": 2048.38, "total_tokens": 6593000}
3239
+ {"current_steps": 16100, "total_steps": 16520, "loss": 0.0, "lr": 9.885131104830358e-08, "epoch": 19.491525423728813, "percentage": 97.46, "elapsed_time": "0:53:39", "remaining_time": "0:01:23", "throughput": 2048.43, "total_tokens": 6595176}
3240
+ {"current_steps": 16105, "total_steps": 16520, "loss": 0.0, "lr": 9.651874502546454e-08, "epoch": 19.497578692493946, "percentage": 97.49, "elapsed_time": "0:53:40", "remaining_time": "0:01:22", "throughput": 2048.47, "total_tokens": 6597224}
3241
+ {"current_steps": 16110, "total_steps": 16520, "loss": 0.0, "lr": 9.421397579416625e-08, "epoch": 19.50363196125908, "percentage": 97.52, "elapsed_time": "0:53:41", "remaining_time": "0:01:21", "throughput": 2048.51, "total_tokens": 6599304}
3242
+ {"current_steps": 16115, "total_steps": 16520, "loss": 0.0, "lr": 9.193700592694532e-08, "epoch": 19.509685230024214, "percentage": 97.55, "elapsed_time": "0:53:42", "remaining_time": "0:01:20", "throughput": 2048.52, "total_tokens": 6601288}
3243
+ {"current_steps": 16120, "total_steps": 16520, "loss": 0.0, "lr": 8.9687837965316e-08, "epoch": 19.515738498789347, "percentage": 97.58, "elapsed_time": "0:53:43", "remaining_time": "0:01:19", "throughput": 2048.56, "total_tokens": 6603368}
3244
+ {"current_steps": 16125, "total_steps": 16520, "loss": 0.0, "lr": 8.74664744197562e-08, "epoch": 19.52179176755448, "percentage": 97.61, "elapsed_time": "0:53:44", "remaining_time": "0:01:18", "throughput": 2048.57, "total_tokens": 6605352}
3245
+ {"current_steps": 16130, "total_steps": 16520, "loss": 0.0, "lr": 8.527291776970759e-08, "epoch": 19.52784503631961, "percentage": 97.64, "elapsed_time": "0:53:45", "remaining_time": "0:01:17", "throughput": 2048.56, "total_tokens": 6607240}
3246
+ {"current_steps": 16135, "total_steps": 16520, "loss": 0.0, "lr": 8.310717046358108e-08, "epoch": 19.533898305084747, "percentage": 97.67, "elapsed_time": "0:53:46", "remaining_time": "0:01:16", "throughput": 2048.58, "total_tokens": 6609256}
3247
+ {"current_steps": 16140, "total_steps": 16520, "loss": 0.0, "lr": 8.096923491873465e-08, "epoch": 19.53995157384988, "percentage": 97.7, "elapsed_time": "0:53:47", "remaining_time": "0:01:15", "throughput": 2048.65, "total_tokens": 6611528}
3248
+ {"current_steps": 16145, "total_steps": 16520, "loss": 0.0, "lr": 7.885911352149832e-08, "epoch": 19.546004842615012, "percentage": 97.73, "elapsed_time": "0:53:48", "remaining_time": "0:01:14", "throughput": 2048.68, "total_tokens": 6613576}
3249
+ {"current_steps": 16150, "total_steps": 16520, "loss": 0.0, "lr": 7.677680862714365e-08, "epoch": 19.552058111380145, "percentage": 97.76, "elapsed_time": "0:53:49", "remaining_time": "0:01:13", "throughput": 2048.72, "total_tokens": 6615656}
3250
+ {"current_steps": 16155, "total_steps": 16520, "loss": 0.0, "lr": 7.472232255990585e-08, "epoch": 19.558111380145277, "percentage": 97.79, "elapsed_time": "0:53:50", "remaining_time": "0:01:12", "throughput": 2048.73, "total_tokens": 6617640}
3251
+ {"current_steps": 16160, "total_steps": 16520, "loss": 0.0, "lr": 7.269565761295893e-08, "epoch": 19.564164648910413, "percentage": 97.82, "elapsed_time": "0:53:51", "remaining_time": "0:01:11", "throughput": 2048.79, "total_tokens": 6619848}
3252
+ {"current_steps": 16165, "total_steps": 16520, "loss": 0.0, "lr": 7.069681604842949e-08, "epoch": 19.570217917675546, "percentage": 97.85, "elapsed_time": "0:53:52", "remaining_time": "0:01:10", "throughput": 2048.85, "total_tokens": 6622024}
3253
+ {"current_steps": 16170, "total_steps": 16520, "loss": 0.0, "lr": 6.872580009738283e-08, "epoch": 19.576271186440678, "percentage": 97.88, "elapsed_time": "0:53:53", "remaining_time": "0:01:09", "throughput": 2048.88, "total_tokens": 6624072}
3254
+ {"current_steps": 16175, "total_steps": 16520, "loss": 0.0, "lr": 6.678261195983693e-08, "epoch": 19.58232445520581, "percentage": 97.91, "elapsed_time": "0:53:53", "remaining_time": "0:01:08", "throughput": 2048.9, "total_tokens": 6626120}
3255
+ {"current_steps": 16180, "total_steps": 16520, "loss": 0.0, "lr": 6.486725380473457e-08, "epoch": 19.588377723970943, "percentage": 97.94, "elapsed_time": "0:53:54", "remaining_time": "0:01:07", "throughput": 2048.93, "total_tokens": 6628168}
3256
+ {"current_steps": 16185, "total_steps": 16520, "loss": 0.0, "lr": 6.297972776996286e-08, "epoch": 19.59443099273608, "percentage": 97.97, "elapsed_time": "0:53:55", "remaining_time": "0:01:06", "throughput": 2049.0, "total_tokens": 6630376}
3257
+ {"current_steps": 16190, "total_steps": 16520, "loss": 0.0, "lr": 6.112003596234484e-08, "epoch": 19.60048426150121, "percentage": 98.0, "elapsed_time": "0:53:56", "remaining_time": "0:01:05", "throughput": 2049.0, "total_tokens": 6632520}
3258
+ {"current_steps": 16195, "total_steps": 16520, "loss": 0.0, "lr": 5.9288180457633954e-08, "epoch": 19.606537530266344, "percentage": 98.03, "elapsed_time": "0:53:57", "remaining_time": "0:01:04", "throughput": 2049.02, "total_tokens": 6634568}
3259
+ {"current_steps": 16200, "total_steps": 16520, "loss": 0.0, "lr": 5.7484163300508545e-08, "epoch": 19.612590799031477, "percentage": 98.06, "elapsed_time": "0:53:58", "remaining_time": "0:01:03", "throughput": 2049.02, "total_tokens": 6636520}
3260
+ {"current_steps": 16205, "total_steps": 16520, "loss": 0.0, "lr": 5.570798650458009e-08, "epoch": 19.61864406779661, "percentage": 98.09, "elapsed_time": "0:53:59", "remaining_time": "0:01:02", "throughput": 2049.02, "total_tokens": 6638408}
3261
+ {"current_steps": 16210, "total_steps": 16520, "loss": 0.0, "lr": 5.3959652052384954e-08, "epoch": 19.624697336561745, "percentage": 98.12, "elapsed_time": "0:54:00", "remaining_time": "0:01:01", "throughput": 2049.08, "total_tokens": 6640616}
3262
+ {"current_steps": 16215, "total_steps": 16520, "loss": 0.0, "lr": 5.2239161895378806e-08, "epoch": 19.630750605326877, "percentage": 98.15, "elapsed_time": "0:54:01", "remaining_time": "0:01:00", "throughput": 2049.12, "total_tokens": 6642696}
3263
+ {"current_steps": 16220, "total_steps": 16520, "loss": 0.0, "lr": 5.054651795393939e-08, "epoch": 19.63680387409201, "percentage": 98.18, "elapsed_time": "0:54:02", "remaining_time": "0:00:59", "throughput": 2049.14, "total_tokens": 6644744}
3264
+ {"current_steps": 16225, "total_steps": 16520, "loss": 0.0, "lr": 4.888172211736375e-08, "epoch": 19.642857142857142, "percentage": 98.21, "elapsed_time": "0:54:03", "remaining_time": "0:00:58", "throughput": 2049.19, "total_tokens": 6646888}
3265
+ {"current_steps": 16230, "total_steps": 16520, "loss": 0.0, "lr": 4.724477624386825e-08, "epoch": 19.648910411622275, "percentage": 98.24, "elapsed_time": "0:54:04", "remaining_time": "0:00:57", "throughput": 2049.25, "total_tokens": 6649128}
3266
+ {"current_steps": 16235, "total_steps": 16520, "loss": 0.0, "lr": 4.563568216057745e-08, "epoch": 19.65496368038741, "percentage": 98.27, "elapsed_time": "0:54:05", "remaining_time": "0:00:56", "throughput": 2049.3, "total_tokens": 6651272}
3267
+ {"current_steps": 16240, "total_steps": 16520, "loss": 0.0, "lr": 4.405444166353523e-08, "epoch": 19.661016949152543, "percentage": 98.31, "elapsed_time": "0:54:06", "remaining_time": "0:00:55", "throughput": 2049.32, "total_tokens": 6653256}
3268
+ {"current_steps": 16245, "total_steps": 16520, "loss": 0.0, "lr": 4.25010565176881e-08, "epoch": 19.667070217917676, "percentage": 98.34, "elapsed_time": "0:54:07", "remaining_time": "0:00:54", "throughput": 2049.33, "total_tokens": 6655240}
3269
+ {"current_steps": 16250, "total_steps": 16520, "loss": 0.0, "lr": 4.097552845689634e-08, "epoch": 19.673123486682808, "percentage": 98.37, "elapsed_time": "0:54:08", "remaining_time": "0:00:53", "throughput": 2049.34, "total_tokens": 6657160}
3270
+ {"current_steps": 16255, "total_steps": 16520, "loss": 0.0, "lr": 3.9477859183925657e-08, "epoch": 19.67917675544794, "percentage": 98.4, "elapsed_time": "0:54:09", "remaining_time": "0:00:52", "throughput": 2049.35, "total_tokens": 6659144}
3271
+ {"current_steps": 16260, "total_steps": 16520, "loss": 0.0, "lr": 3.8008050370444415e-08, "epoch": 19.685230024213077, "percentage": 98.43, "elapsed_time": "0:54:10", "remaining_time": "0:00:51", "throughput": 2049.37, "total_tokens": 6661160}
3272
+ {"current_steps": 16265, "total_steps": 16520, "loss": 0.0, "lr": 3.656610365702917e-08, "epoch": 19.69128329297821, "percentage": 98.46, "elapsed_time": "0:54:11", "remaining_time": "0:00:50", "throughput": 2049.41, "total_tokens": 6663272}
3273
+ {"current_steps": 16270, "total_steps": 16520, "loss": 0.0, "lr": 3.515202065314804e-08, "epoch": 19.69733656174334, "percentage": 98.49, "elapsed_time": "0:54:12", "remaining_time": "0:00:49", "throughput": 2049.43, "total_tokens": 6665288}
3274
+ {"current_steps": 16275, "total_steps": 16520, "loss": 0.0, "lr": 3.3765802937177346e-08, "epoch": 19.703389830508474, "percentage": 98.52, "elapsed_time": "0:54:13", "remaining_time": "0:00:48", "throughput": 2049.46, "total_tokens": 6667336}
3275
+ {"current_steps": 16280, "total_steps": 16520, "loss": 0.0, "lr": 3.240745205638773e-08, "epoch": 19.709443099273606, "percentage": 98.55, "elapsed_time": "0:54:14", "remaining_time": "0:00:47", "throughput": 2049.46, "total_tokens": 6669288}
3276
+ {"current_steps": 16285, "total_steps": 16520, "loss": 0.0, "lr": 3.107696952694139e-08, "epoch": 19.715496368038743, "percentage": 98.58, "elapsed_time": "0:54:15", "remaining_time": "0:00:46", "throughput": 2049.49, "total_tokens": 6671368}
3277
+ {"current_steps": 16290, "total_steps": 16520, "loss": 0.0, "lr": 2.977435683389762e-08, "epoch": 19.721549636803875, "percentage": 98.61, "elapsed_time": "0:54:16", "remaining_time": "0:00:45", "throughput": 2049.52, "total_tokens": 6673448}
3278
+ {"current_steps": 16295, "total_steps": 16520, "loss": 0.0, "lr": 2.8499615431212824e-08, "epoch": 19.727602905569007, "percentage": 98.64, "elapsed_time": "0:54:17", "remaining_time": "0:00:44", "throughput": 2049.54, "total_tokens": 6675432}
3279
+ {"current_steps": 16300, "total_steps": 16520, "loss": 0.0, "lr": 2.725274674172107e-08, "epoch": 19.73365617433414, "percentage": 98.67, "elapsed_time": "0:54:17", "remaining_time": "0:00:43", "throughput": 2049.54, "total_tokens": 6677384}
3280
+ {"current_steps": 16305, "total_steps": 16520, "loss": 0.0, "lr": 2.6033752157161862e-08, "epoch": 19.739709443099272, "percentage": 98.7, "elapsed_time": "0:54:18", "remaining_time": "0:00:42", "throughput": 2049.56, "total_tokens": 6679400}
3281
+ {"current_steps": 16310, "total_steps": 16520, "loss": 0.0, "lr": 2.4842633038146822e-08, "epoch": 19.74576271186441, "percentage": 98.73, "elapsed_time": "0:54:19", "remaining_time": "0:00:41", "throughput": 2049.6, "total_tokens": 6681512}
3282
+ {"current_steps": 16315, "total_steps": 16520, "loss": 0.0, "lr": 2.367939071418468e-08, "epoch": 19.75181598062954, "percentage": 98.76, "elapsed_time": "0:54:20", "remaining_time": "0:00:40", "throughput": 2049.64, "total_tokens": 6683624}
3283
+ {"current_steps": 16320, "total_steps": 16520, "loss": 0.0, "lr": 2.2544026483664606e-08, "epoch": 19.757869249394673, "percentage": 98.79, "elapsed_time": "0:54:21", "remaining_time": "0:00:39", "throughput": 2049.67, "total_tokens": 6685672}
3284
+ {"current_steps": 16325, "total_steps": 16520, "loss": 0.0, "lr": 2.1436541613853444e-08, "epoch": 19.763922518159806, "percentage": 98.82, "elapsed_time": "0:54:22", "remaining_time": "0:00:38", "throughput": 2049.72, "total_tokens": 6687816}
3285
+ {"current_steps": 16330, "total_steps": 16520, "loss": 0.0, "lr": 2.03569373409096e-08, "epoch": 19.769975786924938, "percentage": 98.85, "elapsed_time": "0:54:23", "remaining_time": "0:00:37", "throughput": 2049.73, "total_tokens": 6689768}
3286
+ {"current_steps": 16335, "total_steps": 16520, "loss": 0.0, "lr": 1.930521486986636e-08, "epoch": 19.776029055690074, "percentage": 98.88, "elapsed_time": "0:54:24", "remaining_time": "0:00:36", "throughput": 2049.76, "total_tokens": 6691848}
3287
+ {"current_steps": 16340, "total_steps": 16520, "loss": 0.0, "lr": 1.8281375374634702e-08, "epoch": 19.782082324455207, "percentage": 98.91, "elapsed_time": "0:54:25", "remaining_time": "0:00:35", "throughput": 2049.79, "total_tokens": 6693896}
3288
+ {"current_steps": 16345, "total_steps": 16520, "loss": 0.0, "lr": 1.7285419998006035e-08, "epoch": 19.78813559322034, "percentage": 98.94, "elapsed_time": "0:54:26", "remaining_time": "0:00:34", "throughput": 2049.84, "total_tokens": 6696104}
3289
+ {"current_steps": 16350, "total_steps": 16520, "loss": 0.0, "lr": 1.6317349851646678e-08, "epoch": 19.79418886198547, "percentage": 98.97, "elapsed_time": "0:54:27", "remaining_time": "0:00:33", "throughput": 2049.85, "total_tokens": 6698088}
3290
+ {"current_steps": 16355, "total_steps": 16520, "loss": 0.0, "lr": 1.5377166016097844e-08, "epoch": 19.800242130750604, "percentage": 99.0, "elapsed_time": "0:54:28", "remaining_time": "0:00:32", "throughput": 2049.89, "total_tokens": 6700168}
3291
+ {"current_steps": 16360, "total_steps": 16520, "loss": 0.0, "lr": 1.4464869540772863e-08, "epoch": 19.80629539951574, "percentage": 99.03, "elapsed_time": "0:54:29", "remaining_time": "0:00:31", "throughput": 2049.92, "total_tokens": 6702280}
3292
+ {"current_steps": 16365, "total_steps": 16520, "loss": 0.0, "lr": 1.3580461443962743e-08, "epoch": 19.812348668280872, "percentage": 99.06, "elapsed_time": "0:54:30", "remaining_time": "0:00:30", "throughput": 2049.96, "total_tokens": 6704360}
3293
+ {"current_steps": 16370, "total_steps": 16520, "loss": 0.0, "lr": 1.2723942712825065e-08, "epoch": 19.818401937046005, "percentage": 99.09, "elapsed_time": "0:54:31", "remaining_time": "0:00:29", "throughput": 2049.99, "total_tokens": 6706408}
3294
+ {"current_steps": 16375, "total_steps": 16520, "loss": 0.0, "lr": 1.1895314303389526e-08, "epoch": 19.824455205811137, "percentage": 99.12, "elapsed_time": "0:54:32", "remaining_time": "0:00:28", "throughput": 2049.99, "total_tokens": 6708328}
3295
+ {"current_steps": 16380, "total_steps": 16520, "loss": 0.0, "lr": 1.109457714055795e-08, "epoch": 19.83050847457627, "percentage": 99.15, "elapsed_time": "0:54:33", "remaining_time": "0:00:27", "throughput": 2050.01, "total_tokens": 6710312}
3296
+ {"current_steps": 16385, "total_steps": 16520, "loss": 0.0, "lr": 1.0321732118095951e-08, "epoch": 19.836561743341406, "percentage": 99.18, "elapsed_time": "0:54:34", "remaining_time": "0:00:26", "throughput": 2050.04, "total_tokens": 6712360}
3297
+ {"current_steps": 16390, "total_steps": 16520, "loss": 0.0, "lr": 9.576780098638494e-09, "epoch": 19.84261501210654, "percentage": 99.21, "elapsed_time": "0:54:35", "remaining_time": "0:00:25", "throughput": 2050.05, "total_tokens": 6714344}
3298
+ {"current_steps": 16395, "total_steps": 16520, "loss": 0.0, "lr": 8.859721913684339e-09, "epoch": 19.84866828087167, "percentage": 99.24, "elapsed_time": "0:54:36", "remaining_time": "0:00:24", "throughput": 2050.12, "total_tokens": 6716616}
3299
+ {"current_steps": 16400, "total_steps": 16520, "loss": 0.0, "lr": 8.170558363607139e-09, "epoch": 19.854721549636803, "percentage": 99.27, "elapsed_time": "0:54:37", "remaining_time": "0:00:23", "throughput": 2050.17, "total_tokens": 6718728}
3300
+ {"current_steps": 16405, "total_steps": 16520, "loss": 0.0, "lr": 7.50929021763047e-09, "epoch": 19.860774818401936, "percentage": 99.3, "elapsed_time": "0:54:38", "remaining_time": "0:00:22", "throughput": 2050.18, "total_tokens": 6720712}
3301
+ {"current_steps": 16410, "total_steps": 16520, "loss": 0.0, "lr": 6.8759182138528055e-09, "epoch": 19.86682808716707, "percentage": 99.33, "elapsed_time": "0:54:39", "remaining_time": "0:00:21", "throughput": 2050.18, "total_tokens": 6722632}
3302
+ {"current_steps": 16415, "total_steps": 16520, "loss": 0.0, "lr": 6.2704430592336326e-09, "epoch": 19.872881355932204, "percentage": 99.36, "elapsed_time": "0:54:40", "remaining_time": "0:00:20", "throughput": 2050.2, "total_tokens": 6724648}
3303
+ {"current_steps": 16420, "total_steps": 16520, "loss": 0.0, "lr": 5.692865429590688e-09, "epoch": 19.878934624697337, "percentage": 99.39, "elapsed_time": "0:54:40", "remaining_time": "0:00:19", "throughput": 2050.21, "total_tokens": 6726664}
3304
+ {"current_steps": 16425, "total_steps": 16520, "loss": 0.0, "lr": 5.143185969602726e-09, "epoch": 19.88498789346247, "percentage": 99.42, "elapsed_time": "0:54:41", "remaining_time": "0:00:18", "throughput": 2050.2, "total_tokens": 6728552}
3305
+ {"current_steps": 16430, "total_steps": 16520, "loss": 0.0, "lr": 4.6214052928150734e-09, "epoch": 19.8910411622276, "percentage": 99.46, "elapsed_time": "0:54:42", "remaining_time": "0:00:17", "throughput": 2050.22, "total_tokens": 6730536}
3306
+ {"current_steps": 16435, "total_steps": 16520, "loss": 0.0, "lr": 4.127523981631298e-09, "epoch": 19.897094430992738, "percentage": 99.49, "elapsed_time": "0:54:43", "remaining_time": "0:00:16", "throughput": 2050.24, "total_tokens": 6732552}
3307
+ {"current_steps": 16440, "total_steps": 16520, "loss": 0.0, "lr": 3.661542587304889e-09, "epoch": 19.90314769975787, "percentage": 99.52, "elapsed_time": "0:54:44", "remaining_time": "0:00:15", "throughput": 2050.25, "total_tokens": 6734536}
3308
+ {"current_steps": 16445, "total_steps": 16520, "loss": 0.0, "lr": 3.2234616299642306e-09, "epoch": 19.909200968523002, "percentage": 99.55, "elapsed_time": "0:54:45", "remaining_time": "0:00:14", "throughput": 2050.29, "total_tokens": 6736680}
3309
+ {"current_steps": 16450, "total_steps": 16520, "loss": 0.0, "lr": 2.813281598579298e-09, "epoch": 19.915254237288135, "percentage": 99.58, "elapsed_time": "0:54:46", "remaining_time": "0:00:13", "throughput": 2050.32, "total_tokens": 6738728}
3310
+ {"current_steps": 16455, "total_steps": 16520, "loss": 0.0, "lr": 2.431002950989414e-09, "epoch": 19.921307506053267, "percentage": 99.61, "elapsed_time": "0:54:47", "remaining_time": "0:00:12", "throughput": 2050.34, "total_tokens": 6740712}
3311
+ {"current_steps": 16460, "total_steps": 16520, "loss": 0.0, "lr": 2.076626113886593e-09, "epoch": 19.927360774818403, "percentage": 99.64, "elapsed_time": "0:54:48", "remaining_time": "0:00:11", "throughput": 2050.34, "total_tokens": 6742664}
3312
+ {"current_steps": 16465, "total_steps": 16520, "loss": 0.0, "lr": 1.7501514828183185e-09, "epoch": 19.933414043583536, "percentage": 99.67, "elapsed_time": "0:54:49", "remaining_time": "0:00:10", "throughput": 2050.37, "total_tokens": 6744744}
3313
+ {"current_steps": 16470, "total_steps": 16520, "loss": 0.0, "lr": 1.4515794221875434e-09, "epoch": 19.93946731234867, "percentage": 99.7, "elapsed_time": "0:54:50", "remaining_time": "0:00:09", "throughput": 2050.38, "total_tokens": 6746728}
3314
+ {"current_steps": 16475, "total_steps": 16520, "loss": 0.0, "lr": 1.1809102652610148e-09, "epoch": 19.9455205811138, "percentage": 99.73, "elapsed_time": "0:54:51", "remaining_time": "0:00:08", "throughput": 2050.4, "total_tokens": 6748776}
3315
+ {"current_steps": 16480, "total_steps": 16520, "loss": 0.0, "lr": 9.381443141470714e-10, "epoch": 19.951573849878933, "percentage": 99.76, "elapsed_time": "0:54:52", "remaining_time": "0:00:07", "throughput": 2050.44, "total_tokens": 6750888}
3316
+ {"current_steps": 16485, "total_steps": 16520, "loss": 0.0, "lr": 7.23281839820622e-10, "epoch": 19.95762711864407, "percentage": 99.79, "elapsed_time": "0:54:53", "remaining_time": "0:00:06", "throughput": 2050.47, "total_tokens": 6752936}
3317
+ {"current_steps": 16490, "total_steps": 16520, "loss": 0.0, "lr": 5.363230821064935e-10, "epoch": 19.9636803874092, "percentage": 99.82, "elapsed_time": "0:54:54", "remaining_time": "0:00:05", "throughput": 2050.5, "total_tokens": 6754984}
3318
+ {"current_steps": 16495, "total_steps": 16520, "loss": 0.0, "lr": 3.772682496849811e-10, "epoch": 19.969733656174334, "percentage": 99.85, "elapsed_time": "0:54:55", "remaining_time": "0:00:04", "throughput": 2050.54, "total_tokens": 6757128}
3319
+ {"current_steps": 16500, "total_steps": 16520, "loss": 0.0, "lr": 2.4611752008907307e-10, "epoch": 19.975786924939467, "percentage": 99.88, "elapsed_time": "0:54:56", "remaining_time": "0:00:03", "throughput": 2050.55, "total_tokens": 6759080}
3320
+ {"current_steps": 16505, "total_steps": 16520, "loss": 0.0, "lr": 1.4287103970722638e-10, "epoch": 19.9818401937046, "percentage": 99.91, "elapsed_time": "0:54:57", "remaining_time": "0:00:02", "throughput": 2050.57, "total_tokens": 6761128}
3321
+ {"current_steps": 16510, "total_steps": 16520, "loss": 0.0, "lr": 6.752892378059095e-11, "epoch": 19.987893462469735, "percentage": 99.94, "elapsed_time": "0:54:58", "remaining_time": "0:00:01", "throughput": 2050.61, "total_tokens": 6763240}
3322
+ {"current_steps": 16515, "total_steps": 16520, "loss": 0.0, "lr": 2.0091256403009794e-11, "epoch": 19.993946731234868, "percentage": 99.97, "elapsed_time": "0:54:59", "remaining_time": "0:00:00", "throughput": 2050.64, "total_tokens": 6765288}
3323
+ {"current_steps": 16520, "total_steps": 16520, "loss": 0.0, "lr": 5.58090529345634e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:55:00", "remaining_time": "0:00:00", "throughput": 2050.57, "total_tokens": 6767120}
3324
+ {"current_steps": 16520, "total_steps": 16520, "eval_loss": 0.4480169117450714, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:55:05", "remaining_time": "0:00:00", "throughput": 2047.48, "total_tokens": 6767120}
3325
+ {"current_steps": 16520, "total_steps": 16520, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:55:07", "remaining_time": "0:00:00", "throughput": 2046.29, "total_tokens": 6767120}