Training in progress, step 3500
Browse files
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4976698672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3705c1168d3a0eeb317812255c3d22c57edbfb85a160ad8d0accf1015fc8989
|
| 3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999802720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cc7a8fd1545890aa30ae94b1f0f3a2c6f3e3a08de31af440a6d7b1bd0eb6485
|
| 3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b40ad5cbb1044bd6d502397d5658c3f4b096ad31f31192423cb9510bf1ff212a
|
| 3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1168138808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cf2e717496f90314f77091b6ed5c61e49d573285373accb5ba1b93d0677da12
|
| 3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
|
@@ -358,3 +358,63 @@
|
|
| 358 |
{"current_steps": 2990, "total_steps": 3751, "loss": 0.2097, "lr": 1.202886355941546e-06, "epoch": 0.7971074862122398, "percentage": 79.71, "elapsed_time": "15:57:58", "remaining_time": "4:03:49"}
|
| 359 |
{"current_steps": 3000, "total_steps": 3751, "loss": 0.2107, "lr": 1.1727711127355118e-06, "epoch": 0.7997733975373644, "percentage": 79.98, "elapsed_time": "16:00:17", "remaining_time": "4:00:23"}
|
| 360 |
{"current_steps": 3000, "total_steps": 3751, "eval_loss": 0.20849083364009857, "epoch": 0.7997733975373644, "percentage": 79.98, "elapsed_time": "16:04:39", "remaining_time": "4:01:29"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
{"current_steps": 2990, "total_steps": 3751, "loss": 0.2097, "lr": 1.202886355941546e-06, "epoch": 0.7971074862122398, "percentage": 79.71, "elapsed_time": "15:57:58", "remaining_time": "4:03:49"}
|
| 359 |
{"current_steps": 3000, "total_steps": 3751, "loss": 0.2107, "lr": 1.1727711127355118e-06, "epoch": 0.7997733975373644, "percentage": 79.98, "elapsed_time": "16:00:17", "remaining_time": "4:00:23"}
|
| 360 |
{"current_steps": 3000, "total_steps": 3751, "eval_loss": 0.20849083364009857, "epoch": 0.7997733975373644, "percentage": 79.98, "elapsed_time": "16:04:39", "remaining_time": "4:01:29"}
|
| 361 |
+
{"current_steps": 3010, "total_steps": 3751, "loss": 0.2126, "lr": 1.1429874840135492e-06, "epoch": 0.8024393088624889, "percentage": 80.25, "elapsed_time": "16:08:09", "remaining_time": "3:58:20"}
|
| 362 |
+
{"current_steps": 3020, "total_steps": 3751, "loss": 0.2024, "lr": 1.1135380504111476e-06, "epoch": 0.8051052201876135, "percentage": 80.51, "elapsed_time": "16:10:28", "remaining_time": "3:54:54"}
|
| 363 |
+
{"current_steps": 3030, "total_steps": 3751, "loss": 0.2103, "lr": 1.0844253636070805e-06, "epoch": 0.807771131512738, "percentage": 80.78, "elapsed_time": "16:12:43", "remaining_time": "3:51:27"}
|
| 364 |
+
{"current_steps": 3040, "total_steps": 3751, "loss": 0.2093, "lr": 1.0556519461023301e-06, "epoch": 0.8104370428378626, "percentage": 81.05, "elapsed_time": "16:15:02", "remaining_time": "3:48:02"}
|
| 365 |
+
{"current_steps": 3050, "total_steps": 3751, "loss": 0.2055, "lr": 1.0272202910015083e-06, "epoch": 0.8131029541629872, "percentage": 81.31, "elapsed_time": "16:17:22", "remaining_time": "3:44:38"}
|
| 366 |
+
{"current_steps": 3050, "total_steps": 3751, "eval_loss": 0.2096840888261795, "epoch": 0.8131029541629872, "percentage": 81.31, "elapsed_time": "16:21:45", "remaining_time": "3:45:38"}
|
| 367 |
+
{"current_steps": 3060, "total_steps": 3751, "loss": 0.2084, "lr": 9.991328617968509e-07, "epoch": 0.8157688654881117, "percentage": 81.58, "elapsed_time": "16:24:03", "remaining_time": "3:42:12"}
|
| 368 |
+
{"current_steps": 3070, "total_steps": 3751, "loss": 0.2076, "lr": 9.713920921547532e-07, "epoch": 0.8184347768132363, "percentage": 81.84, "elapsed_time": "16:26:19", "remaining_time": "3:38:47"}
|
| 369 |
+
{"current_steps": 3080, "total_steps": 3751, "loss": 0.2072, "lr": 9.440003857049173e-07, "epoch": 0.8211006881383608, "percentage": 82.11, "elapsed_time": "16:28:39", "remaining_time": "3:35:23"}
|
| 370 |
+
{"current_steps": 3090, "total_steps": 3751, "loss": 0.2252, "lr": 9.169601158320707e-07, "epoch": 0.8237665994634853, "percentage": 82.38, "elapsed_time": "16:30:56", "remaining_time": "3:31:58"}
|
| 371 |
+
{"current_steps": 3100, "total_steps": 3751, "loss": 0.2045, "lr": 8.902736254703347e-07, "epoch": 0.8264325107886099, "percentage": 82.64, "elapsed_time": "16:33:16", "remaining_time": "3:28:35"}
|
| 372 |
+
{"current_steps": 3100, "total_steps": 3751, "eval_loss": 0.20747578144073486, "epoch": 0.8264325107886099, "percentage": 82.64, "elapsed_time": "16:37:39", "remaining_time": "3:29:30"}
|
| 373 |
+
{"current_steps": 3110, "total_steps": 3751, "loss": 0.2038, "lr": 8.639432269002102e-07, "epoch": 0.8290984221137344, "percentage": 82.91, "elapsed_time": "16:39:57", "remaining_time": "3:26:05"}
|
| 374 |
+
{"current_steps": 3120, "total_steps": 3751, "loss": 0.2071, "lr": 8.379712015482333e-07, "epoch": 0.831764333438859, "percentage": 83.18, "elapsed_time": "16:42:16", "remaining_time": "3:22:42"}
|
| 375 |
+
{"current_steps": 3130, "total_steps": 3751, "loss": 0.1988, "lr": 8.123597997892918e-07, "epoch": 0.8344302447639835, "percentage": 83.44, "elapsed_time": "16:44:37", "remaining_time": "3:19:19"}
|
| 376 |
+
{"current_steps": 3140, "total_steps": 3751, "loss": 0.2181, "lr": 7.871112407516474e-07, "epoch": 0.837096156089108, "percentage": 83.71, "elapsed_time": "16:46:53", "remaining_time": "3:15:55"}
|
| 377 |
+
{"current_steps": 3150, "total_steps": 3751, "loss": 0.2172, "lr": 7.622277121246513e-07, "epoch": 0.8397620674142326, "percentage": 83.98, "elapsed_time": "16:49:09", "remaining_time": "3:12:32"}
|
| 378 |
+
{"current_steps": 3150, "total_steps": 3751, "eval_loss": 0.2062389850616455, "epoch": 0.8397620674142326, "percentage": 83.98, "elapsed_time": "16:53:32", "remaining_time": "3:13:22"}
|
| 379 |
+
{"current_steps": 3160, "total_steps": 3751, "loss": 0.2045, "lr": 7.377113699691879e-07, "epoch": 0.8424279787393572, "percentage": 84.24, "elapsed_time": "16:55:52", "remaining_time": "3:09:59"}
|
| 380 |
+
{"current_steps": 3170, "total_steps": 3751, "loss": 0.2162, "lr": 7.135643385308677e-07, "epoch": 0.8450938900644818, "percentage": 84.51, "elapsed_time": "16:58:13", "remaining_time": "3:06:37"}
|
| 381 |
+
{"current_steps": 3180, "total_steps": 3751, "loss": 0.2089, "lr": 6.897887100559608e-07, "epoch": 0.8477598013896063, "percentage": 84.78, "elapsed_time": "17:00:32", "remaining_time": "3:03:14"}
|
| 382 |
+
{"current_steps": 3190, "total_steps": 3751, "loss": 0.216, "lr": 6.663865446101192e-07, "epoch": 0.8504257127147308, "percentage": 85.04, "elapsed_time": "17:02:49", "remaining_time": "2:59:52"}
|
| 383 |
+
{"current_steps": 3200, "total_steps": 3751, "loss": 0.2138, "lr": 6.433598698998766e-07, "epoch": 0.8530916240398554, "percentage": 85.31, "elapsed_time": "17:05:06", "remaining_time": "2:56:30"}
|
| 384 |
+
{"current_steps": 3200, "total_steps": 3751, "eval_loss": 0.2074633538722992, "epoch": 0.8530916240398554, "percentage": 85.31, "elapsed_time": "17:09:29", "remaining_time": "2:57:15"}
|
| 385 |
+
{"current_steps": 3210, "total_steps": 3751, "loss": 0.2121, "lr": 6.207106810969577e-07, "epoch": 0.8557575353649799, "percentage": 85.58, "elapsed_time": "17:11:45", "remaining_time": "2:53:53"}
|
| 386 |
+
{"current_steps": 3220, "total_steps": 3751, "loss": 0.2141, "lr": 5.98440940665399e-07, "epoch": 0.8584234466901045, "percentage": 85.84, "elapsed_time": "17:14:03", "remaining_time": "2:50:31"}
|
| 387 |
+
{"current_steps": 3230, "total_steps": 3751, "loss": 0.1917, "lr": 5.765525781915172e-07, "epoch": 0.861089358015229, "percentage": 86.11, "elapsed_time": "17:16:20", "remaining_time": "2:47:09"}
|
| 388 |
+
{"current_steps": 3240, "total_steps": 3751, "loss": 0.213, "lr": 5.550474902167091e-07, "epoch": 0.8637552693403535, "percentage": 86.38, "elapsed_time": "17:18:37", "remaining_time": "2:43:48"}
|
| 389 |
+
{"current_steps": 3250, "total_steps": 3751, "loss": 0.194, "lr": 5.339275400731331e-07, "epoch": 0.8664211806654781, "percentage": 86.64, "elapsed_time": "17:20:55", "remaining_time": "2:40:27"}
|
| 390 |
+
{"current_steps": 3250, "total_steps": 3751, "eval_loss": 0.2050597220659256, "epoch": 0.8664211806654781, "percentage": 86.64, "elapsed_time": "17:25:18", "remaining_time": "2:41:08"}
|
| 391 |
+
{"current_steps": 3260, "total_steps": 3751, "loss": 0.2031, "lr": 5.131945577222485e-07, "epoch": 0.8690870919906026, "percentage": 86.91, "elapsed_time": "17:27:33", "remaining_time": "2:37:46"}
|
| 392 |
+
{"current_steps": 3270, "total_steps": 3751, "loss": 0.2061, "lr": 4.92850339596268e-07, "epoch": 0.8717530033157272, "percentage": 87.18, "elapsed_time": "17:29:51", "remaining_time": "2:34:25"}
|
| 393 |
+
{"current_steps": 3280, "total_steps": 3751, "loss": 0.2148, "lr": 4.728966484424913e-07, "epoch": 0.8744189146408518, "percentage": 87.44, "elapsed_time": "17:32:13", "remaining_time": "2:31:05"}
|
| 394 |
+
{"current_steps": 3290, "total_steps": 3751, "loss": 0.2181, "lr": 4.5333521317058207e-07, "epoch": 0.8770848259659763, "percentage": 87.71, "elapsed_time": "17:34:33", "remaining_time": "2:27:45"}
|
| 395 |
+
{"current_steps": 3300, "total_steps": 3751, "loss": 0.2133, "lr": 4.3416772870275295e-07, "epoch": 0.8797507372911009, "percentage": 87.98, "elapsed_time": "17:36:51", "remaining_time": "2:24:26"}
|
| 396 |
+
{"current_steps": 3300, "total_steps": 3751, "eval_loss": 0.20511919260025024, "epoch": 0.8797507372911009, "percentage": 87.98, "elapsed_time": "17:41:15", "remaining_time": "2:25:02"}
|
| 397 |
+
{"current_steps": 3310, "total_steps": 3751, "loss": 0.1987, "lr": 4.153958558269189e-07, "epoch": 0.8824166486162254, "percentage": 88.24, "elapsed_time": "17:43:39", "remaining_time": "2:21:42"}
|
| 398 |
+
{"current_steps": 3320, "total_steps": 3751, "loss": 0.2, "lr": 3.9702122105278405e-07, "epoch": 0.88508255994135, "percentage": 88.51, "elapsed_time": "17:46:05", "remaining_time": "2:18:23"}
|
| 399 |
+
{"current_steps": 3330, "total_steps": 3751, "loss": 0.1995, "lr": 3.7904541647092506e-07, "epoch": 0.8877484712664745, "percentage": 88.78, "elapsed_time": "17:48:24", "remaining_time": "2:15:04"}
|
| 400 |
+
{"current_steps": 3340, "total_steps": 3751, "loss": 0.1988, "lr": 3.614699996148285e-07, "epoch": 0.890414382591599, "percentage": 89.04, "elapsed_time": "17:50:41", "remaining_time": "2:11:45"}
|
| 401 |
+
{"current_steps": 3350, "total_steps": 3751, "loss": 0.2025, "lr": 3.442964933259474e-07, "epoch": 0.8930802939167236, "percentage": 89.31, "elapsed_time": "17:53:01", "remaining_time": "2:08:26"}
|
| 402 |
+
{"current_steps": 3350, "total_steps": 3751, "eval_loss": 0.20472171902656555, "epoch": 0.8930802939167236, "percentage": 89.31, "elapsed_time": "17:57:24", "remaining_time": "2:08:58"}
|
| 403 |
+
{"current_steps": 3360, "total_steps": 3751, "loss": 0.2066, "lr": 3.275263856217442e-07, "epoch": 0.8957462052418481, "percentage": 89.58, "elapsed_time": "17:59:41", "remaining_time": "2:05:38"}
|
| 404 |
+
{"current_steps": 3370, "total_steps": 3751, "loss": 0.2041, "lr": 3.1116112956677045e-07, "epoch": 0.8984121165669727, "percentage": 89.84, "elapsed_time": "18:02:00", "remaining_time": "2:02:19"}
|
| 405 |
+
{"current_steps": 3380, "total_steps": 3751, "loss": 0.2075, "lr": 2.952021431467522e-07, "epoch": 0.9010780278920972, "percentage": 90.11, "elapsed_time": "18:04:19", "remaining_time": "1:59:01"}
|
| 406 |
+
{"current_steps": 3390, "total_steps": 3751, "loss": 0.2102, "lr": 2.7965080914573786e-07, "epoch": 0.9037439392172217, "percentage": 90.38, "elapsed_time": "18:06:35", "remaining_time": "1:55:42"}
|
| 407 |
+
{"current_steps": 3400, "total_steps": 3751, "loss": 0.2088, "lr": 2.6450847502627883e-07, "epoch": 0.9064098505423464, "percentage": 90.64, "elapsed_time": "18:08:55", "remaining_time": "1:52:24"}
|
| 408 |
+
{"current_steps": 3400, "total_steps": 3751, "eval_loss": 0.20498156547546387, "epoch": 0.9064098505423464, "percentage": 90.64, "elapsed_time": "18:13:18", "remaining_time": "1:52:52"}
|
| 409 |
+
{"current_steps": 3410, "total_steps": 3751, "loss": 0.2127, "lr": 2.497764528126778e-07, "epoch": 0.9090757618674709, "percentage": 90.91, "elapsed_time": "18:15:37", "remaining_time": "1:49:33"}
|
| 410 |
+
{"current_steps": 3420, "total_steps": 3751, "loss": 0.2087, "lr": 2.3545601897731085e-07, "epoch": 0.9117416731925955, "percentage": 91.18, "elapsed_time": "18:17:56", "remaining_time": "1:46:15"}
|
| 411 |
+
{"current_steps": 3430, "total_steps": 3751, "loss": 0.208, "lr": 2.2154841433002062e-07, "epoch": 0.91440758451772, "percentage": 91.44, "elapsed_time": "18:20:11", "remaining_time": "1:42:57"}
|
| 412 |
+
{"current_steps": 3440, "total_steps": 3751, "loss": 0.197, "lr": 2.0805484391061003e-07, "epoch": 0.9170734958428445, "percentage": 91.71, "elapsed_time": "18:22:30", "remaining_time": "1:39:40"}
|
| 413 |
+
{"current_steps": 3450, "total_steps": 3751, "loss": 0.204, "lr": 1.9497647688442478e-07, "epoch": 0.9197394071679691, "percentage": 91.98, "elapsed_time": "18:24:49", "remaining_time": "1:36:23"}
|
| 414 |
+
{"current_steps": 3450, "total_steps": 3751, "eval_loss": 0.20443060994148254, "epoch": 0.9197394071679691, "percentage": 91.98, "elapsed_time": "18:29:12", "remaining_time": "1:36:46"}
|
| 415 |
+
{"current_steps": 3460, "total_steps": 3751, "loss": 0.2063, "lr": 1.8231444644105755e-07, "epoch": 0.9224053184930936, "percentage": 92.24, "elapsed_time": "18:31:30", "remaining_time": "1:33:28"}
|
| 416 |
+
{"current_steps": 3470, "total_steps": 3751, "loss": 0.2063, "lr": 1.7006984969615226e-07, "epoch": 0.9250712298182182, "percentage": 92.51, "elapsed_time": "18:33:48", "remaining_time": "1:30:11"}
|
| 417 |
+
{"current_steps": 3480, "total_steps": 3751, "loss": 0.2103, "lr": 1.5824374759635165e-07, "epoch": 0.9277371411433427, "percentage": 92.78, "elapsed_time": "18:36:09", "remaining_time": "1:26:55"}
|
| 418 |
+
{"current_steps": 3490, "total_steps": 3751, "loss": 0.1999, "lr": 1.4683716482736364e-07, "epoch": 0.9304030524684672, "percentage": 93.04, "elapsed_time": "18:38:26", "remaining_time": "1:23:38"}
|
| 419 |
+
{"current_steps": 3500, "total_steps": 3751, "loss": 0.2059, "lr": 1.358510897251808e-07, "epoch": 0.9330689637935918, "percentage": 93.31, "elapsed_time": "18:40:44", "remaining_time": "1:20:22"}
|
| 420 |
+
{"current_steps": 3500, "total_steps": 3751, "eval_loss": 0.20389488339424133, "epoch": 0.9330689637935918, "percentage": 93.31, "elapsed_time": "18:45:07", "remaining_time": "1:20:41"}
|