Training in progress, step 3600
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +35 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8388736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a10b696da17becb7ec1d23c98e9aebe218973d41595e528b57f38902f5629162
|
| 3 |
size 8388736
|
trainer_log.jsonl
CHANGED
|
@@ -704,3 +704,38 @@
|
|
| 704 |
{"current_steps": 3425, "total_steps": 3600, "loss": 0.0218, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:33", "remaining_time": "0:00:17", "throughput": 1562.54, "total_tokens": 520624}
|
| 705 |
{"current_steps": 3430, "total_steps": 3600, "loss": 0.0019, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:33", "remaining_time": "0:00:16", "throughput": 1562.83, "total_tokens": 521392}
|
| 706 |
{"current_steps": 3435, "total_steps": 3600, "loss": 0.0078, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:05:34", "remaining_time": "0:00:16", "throughput": 1563.11, "total_tokens": 522144}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 704 |
{"current_steps": 3425, "total_steps": 3600, "loss": 0.0218, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:33", "remaining_time": "0:00:17", "throughput": 1562.54, "total_tokens": 520624}
|
| 705 |
{"current_steps": 3430, "total_steps": 3600, "loss": 0.0019, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:33", "remaining_time": "0:00:16", "throughput": 1562.83, "total_tokens": 521392}
|
| 706 |
{"current_steps": 3435, "total_steps": 3600, "loss": 0.0078, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:05:34", "remaining_time": "0:00:16", "throughput": 1563.11, "total_tokens": 522144}
|
| 707 |
+
{"current_steps": 3440, "total_steps": 3600, "loss": 0.0194, "lr": 3.040110147984221e-07, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:05:34", "remaining_time": "0:00:15", "throughput": 1563.36, "total_tokens": 522896}
|
| 708 |
+
{"current_steps": 3445, "total_steps": 3600, "loss": 0.0008, "lr": 2.8545701257221e-07, "epoch": 19.13888888888889, "percentage": 95.69, "elapsed_time": "0:05:34", "remaining_time": "0:00:15", "throughput": 1563.59, "total_tokens": 523632}
|
| 709 |
+
{"current_steps": 3450, "total_steps": 3600, "loss": 0.0007, "lr": 2.674839104671367e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:05:35", "remaining_time": "0:00:14", "throughput": 1563.92, "total_tokens": 524400}
|
| 710 |
+
{"current_steps": 3455, "total_steps": 3600, "loss": 0.0131, "lr": 2.5009213092991034e-07, "epoch": 19.194444444444443, "percentage": 95.97, "elapsed_time": "0:05:35", "remaining_time": "0:00:14", "throughput": 1564.33, "total_tokens": 525200}
|
| 711 |
+
{"current_steps": 3460, "total_steps": 3600, "loss": 0.0003, "lr": 2.3328208274359942e-07, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:05:36", "remaining_time": "0:00:13", "throughput": 1564.65, "total_tokens": 525952}
|
| 712 |
+
{"current_steps": 3465, "total_steps": 3600, "loss": 0.0003, "lr": 2.170541610180432e-07, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:05:36", "remaining_time": "0:00:13", "throughput": 1564.94, "total_tokens": 526704}
|
| 713 |
+
{"current_steps": 3470, "total_steps": 3600, "loss": 0.0009, "lr": 2.014087471805509e-07, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:05:36", "remaining_time": "0:00:12", "throughput": 1565.16, "total_tokens": 527440}
|
| 714 |
+
{"current_steps": 3475, "total_steps": 3600, "loss": 0.0005, "lr": 1.8634620896695043e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "0:05:37", "remaining_time": "0:00:12", "throughput": 1565.5, "total_tokens": 528208}
|
| 715 |
+
{"current_steps": 3480, "total_steps": 3600, "loss": 0.0009, "lr": 1.7186690041292586e-07, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:05:37", "remaining_time": "0:00:11", "throughput": 1565.93, "total_tokens": 529008}
|
| 716 |
+
{"current_steps": 3485, "total_steps": 3600, "loss": 0.0011, "lr": 1.5797116184571304e-07, "epoch": 19.36111111111111, "percentage": 96.81, "elapsed_time": "0:05:38", "remaining_time": "0:00:11", "throughput": 1566.24, "total_tokens": 529760}
|
| 717 |
+
{"current_steps": 3490, "total_steps": 3600, "loss": 0.0019, "lr": 1.4465931987609482e-07, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:05:38", "remaining_time": "0:00:10", "throughput": 1566.45, "total_tokens": 530480}
|
| 718 |
+
{"current_steps": 3495, "total_steps": 3600, "loss": 0.0017, "lr": 1.319316873907267e-07, "epoch": 19.416666666666668, "percentage": 97.08, "elapsed_time": "0:05:39", "remaining_time": "0:00:10", "throughput": 1566.67, "total_tokens": 531216}
|
| 719 |
+
{"current_steps": 3500, "total_steps": 3600, "loss": 0.0654, "lr": 1.1978856354477595e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:05:39", "remaining_time": "0:00:09", "throughput": 1566.96, "total_tokens": 531968}
|
| 720 |
+
{"current_steps": 3505, "total_steps": 3600, "loss": 0.0026, "lr": 1.0823023375489127e-07, "epoch": 19.47222222222222, "percentage": 97.36, "elapsed_time": "0:05:39", "remaining_time": "0:00:09", "throughput": 1567.31, "total_tokens": 532736}
|
| 721 |
+
{"current_steps": 3510, "total_steps": 3600, "loss": 0.0137, "lr": 9.725696969249965e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:05:40", "remaining_time": "0:00:08", "throughput": 1567.59, "total_tokens": 533504}
|
| 722 |
+
{"current_steps": 3515, "total_steps": 3600, "loss": 0.0004, "lr": 8.686902927741991e-08, "epoch": 19.52777777777778, "percentage": 97.64, "elapsed_time": "0:05:40", "remaining_time": "0:00:08", "throughput": 1567.83, "total_tokens": 534256}
|
| 723 |
+
{"current_steps": 3520, "total_steps": 3600, "loss": 0.0004, "lr": 7.706665667180091e-08, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:05:41", "remaining_time": "0:00:07", "throughput": 1568.06, "total_tokens": 535008}
|
| 724 |
+
{"current_steps": 3525, "total_steps": 3600, "loss": 0.0438, "lr": 6.785008227437329e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "0:05:41", "remaining_time": "0:00:07", "throughput": 1568.46, "total_tokens": 535792}
|
| 725 |
+
{"current_steps": 3530, "total_steps": 3600, "loss": 0.0086, "lr": 5.921952271504827e-08, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:05:42", "remaining_time": "0:00:06", "throughput": 1568.79, "total_tokens": 536560}
|
| 726 |
+
{"current_steps": 3535, "total_steps": 3600, "loss": 0.0005, "lr": 5.117518084981621e-08, "epoch": 19.63888888888889, "percentage": 98.19, "elapsed_time": "0:05:42", "remaining_time": "0:00:06", "throughput": 1569.11, "total_tokens": 537328}
|
| 727 |
+
{"current_steps": 3540, "total_steps": 3600, "loss": 0.0002, "lr": 4.371724575597535e-08, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:05:42", "remaining_time": "0:00:05", "throughput": 1569.33, "total_tokens": 538064}
|
| 728 |
+
{"current_steps": 3545, "total_steps": 3600, "loss": 0.1792, "lr": 3.684589272771044e-08, "epoch": 19.694444444444443, "percentage": 98.47, "elapsed_time": "0:05:43", "remaining_time": "0:00:05", "throughput": 1569.71, "total_tokens": 538848}
|
| 729 |
+
{"current_steps": 3550, "total_steps": 3600, "loss": 0.0761, "lr": 3.056128327193486e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:05:43", "remaining_time": "0:00:04", "throughput": 1570.03, "total_tokens": 539616}
|
| 730 |
+
{"current_steps": 3555, "total_steps": 3600, "loss": 0.001, "lr": 2.486356510453258e-08, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:05:44", "remaining_time": "0:00:04", "throughput": 1570.29, "total_tokens": 540352}
|
| 731 |
+
{"current_steps": 3560, "total_steps": 3600, "loss": 0.0004, "lr": 1.975287214685817e-08, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:05:44", "remaining_time": "0:00:03", "throughput": 1570.57, "total_tokens": 541104}
|
| 732 |
+
{"current_steps": 3565, "total_steps": 3600, "loss": 0.0166, "lr": 1.522932452260595e-08, "epoch": 19.805555555555557, "percentage": 99.03, "elapsed_time": "0:05:44", "remaining_time": "0:00:03", "throughput": 1570.83, "total_tokens": 541856}
|
| 733 |
+
{"current_steps": 3570, "total_steps": 3600, "loss": 0.001, "lr": 1.1293028554978935e-08, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:05:45", "remaining_time": "0:00:02", "throughput": 1571.18, "total_tokens": 542640}
|
| 734 |
+
{"current_steps": 3575, "total_steps": 3600, "loss": 0.0002, "lr": 7.944076764190845e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "0:05:45", "remaining_time": "0:00:02", "throughput": 1571.46, "total_tokens": 543392}
|
| 735 |
+
{"current_steps": 3580, "total_steps": 3600, "loss": 0.0512, "lr": 5.182547865290044e-09, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:05:46", "remaining_time": "0:00:01", "throughput": 1571.66, "total_tokens": 544128}
|
| 736 |
+
{"current_steps": 3585, "total_steps": 3600, "loss": 0.155, "lr": 3.008506766313812e-09, "epoch": 19.916666666666668, "percentage": 99.58, "elapsed_time": "0:05:46", "remaining_time": "0:00:01", "throughput": 1571.92, "total_tokens": 544880}
|
| 737 |
+
{"current_steps": 3590, "total_steps": 3600, "loss": 0.0003, "lr": 1.4220045667645566e-09, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.09, "total_tokens": 545600}
|
| 738 |
+
{"current_steps": 3595, "total_steps": 3600, "loss": 0.006, "lr": 4.2307855639411865e-10, "epoch": 19.97222222222222, "percentage": 99.86, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.47, "total_tokens": 546384}
|
| 739 |
+
{"current_steps": 3600, "total_steps": 3600, "loss": 0.0002, "lr": 1.1752214348903501e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.5, "total_tokens": 547136}
|
| 740 |
+
{"current_steps": 3600, "total_steps": 3600, "eval_loss": 1.036588430404663, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:48", "remaining_time": "0:00:00", "throughput": 1568.57, "total_tokens": 547136}
|
| 741 |
+
{"current_steps": 3600, "total_steps": 3600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:50", "remaining_time": "0:00:00", "throughput": 1562.85, "total_tokens": 547136}
|