rbelanec commited on
Commit
66b8a21
verified
1 Parent(s): bebff6e

Training in progress, step 3600

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +35 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae7a1456d320f5dcc61d4ade6e6ddee02b6d5d683e5cd122d599b9fb54083794
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10b696da17becb7ec1d23c98e9aebe218973d41595e528b57f38902f5629162
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -704,3 +704,38 @@
704
  {"current_steps": 3425, "total_steps": 3600, "loss": 0.0218, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:33", "remaining_time": "0:00:17", "throughput": 1562.54, "total_tokens": 520624}
705
  {"current_steps": 3430, "total_steps": 3600, "loss": 0.0019, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:33", "remaining_time": "0:00:16", "throughput": 1562.83, "total_tokens": 521392}
706
  {"current_steps": 3435, "total_steps": 3600, "loss": 0.0078, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:05:34", "remaining_time": "0:00:16", "throughput": 1563.11, "total_tokens": 522144}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
  {"current_steps": 3425, "total_steps": 3600, "loss": 0.0218, "lr": 3.6315399299321484e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "0:05:33", "remaining_time": "0:00:17", "throughput": 1562.54, "total_tokens": 520624}
705
  {"current_steps": 3430, "total_steps": 3600, "loss": 0.0019, "lr": 3.428599615692141e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:05:33", "remaining_time": "0:00:16", "throughput": 1562.83, "total_tokens": 521392}
706
  {"current_steps": 3435, "total_steps": 3600, "loss": 0.0078, "lr": 3.2314548104537545e-07, "epoch": 19.083333333333332, "percentage": 95.42, "elapsed_time": "0:05:34", "remaining_time": "0:00:16", "throughput": 1563.11, "total_tokens": 522144}
707
+ {"current_steps": 3440, "total_steps": 3600, "loss": 0.0194, "lr": 3.040110147984221e-07, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:05:34", "remaining_time": "0:00:15", "throughput": 1563.36, "total_tokens": 522896}
708
+ {"current_steps": 3445, "total_steps": 3600, "loss": 0.0008, "lr": 2.8545701257221e-07, "epoch": 19.13888888888889, "percentage": 95.69, "elapsed_time": "0:05:34", "remaining_time": "0:00:15", "throughput": 1563.59, "total_tokens": 523632}
709
+ {"current_steps": 3450, "total_steps": 3600, "loss": 0.0007, "lr": 2.674839104671367e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:05:35", "remaining_time": "0:00:14", "throughput": 1563.92, "total_tokens": 524400}
710
+ {"current_steps": 3455, "total_steps": 3600, "loss": 0.0131, "lr": 2.5009213092991034e-07, "epoch": 19.194444444444443, "percentage": 95.97, "elapsed_time": "0:05:35", "remaining_time": "0:00:14", "throughput": 1564.33, "total_tokens": 525200}
711
+ {"current_steps": 3460, "total_steps": 3600, "loss": 0.0003, "lr": 2.3328208274359942e-07, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:05:36", "remaining_time": "0:00:13", "throughput": 1564.65, "total_tokens": 525952}
712
+ {"current_steps": 3465, "total_steps": 3600, "loss": 0.0003, "lr": 2.170541610180432e-07, "epoch": 19.25, "percentage": 96.25, "elapsed_time": "0:05:36", "remaining_time": "0:00:13", "throughput": 1564.94, "total_tokens": 526704}
713
+ {"current_steps": 3470, "total_steps": 3600, "loss": 0.0009, "lr": 2.014087471805509e-07, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:05:36", "remaining_time": "0:00:12", "throughput": 1565.16, "total_tokens": 527440}
714
+ {"current_steps": 3475, "total_steps": 3600, "loss": 0.0005, "lr": 1.8634620896695043e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "0:05:37", "remaining_time": "0:00:12", "throughput": 1565.5, "total_tokens": 528208}
715
+ {"current_steps": 3480, "total_steps": 3600, "loss": 0.0009, "lr": 1.7186690041292586e-07, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:05:37", "remaining_time": "0:00:11", "throughput": 1565.93, "total_tokens": 529008}
716
+ {"current_steps": 3485, "total_steps": 3600, "loss": 0.0011, "lr": 1.5797116184571304e-07, "epoch": 19.36111111111111, "percentage": 96.81, "elapsed_time": "0:05:38", "remaining_time": "0:00:11", "throughput": 1566.24, "total_tokens": 529760}
717
+ {"current_steps": 3490, "total_steps": 3600, "loss": 0.0019, "lr": 1.4465931987609482e-07, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:05:38", "remaining_time": "0:00:10", "throughput": 1566.45, "total_tokens": 530480}
718
+ {"current_steps": 3495, "total_steps": 3600, "loss": 0.0017, "lr": 1.319316873907267e-07, "epoch": 19.416666666666668, "percentage": 97.08, "elapsed_time": "0:05:39", "remaining_time": "0:00:10", "throughput": 1566.67, "total_tokens": 531216}
719
+ {"current_steps": 3500, "total_steps": 3600, "loss": 0.0654, "lr": 1.1978856354477595e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:05:39", "remaining_time": "0:00:09", "throughput": 1566.96, "total_tokens": 531968}
720
+ {"current_steps": 3505, "total_steps": 3600, "loss": 0.0026, "lr": 1.0823023375489127e-07, "epoch": 19.47222222222222, "percentage": 97.36, "elapsed_time": "0:05:39", "remaining_time": "0:00:09", "throughput": 1567.31, "total_tokens": 532736}
721
+ {"current_steps": 3510, "total_steps": 3600, "loss": 0.0137, "lr": 9.725696969249965e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:05:40", "remaining_time": "0:00:08", "throughput": 1567.59, "total_tokens": 533504}
722
+ {"current_steps": 3515, "total_steps": 3600, "loss": 0.0004, "lr": 8.686902927741991e-08, "epoch": 19.52777777777778, "percentage": 97.64, "elapsed_time": "0:05:40", "remaining_time": "0:00:08", "throughput": 1567.83, "total_tokens": 534256}
723
+ {"current_steps": 3520, "total_steps": 3600, "loss": 0.0004, "lr": 7.706665667180091e-08, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:05:41", "remaining_time": "0:00:07", "throughput": 1568.06, "total_tokens": 535008}
724
+ {"current_steps": 3525, "total_steps": 3600, "loss": 0.0438, "lr": 6.785008227437329e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "0:05:41", "remaining_time": "0:00:07", "throughput": 1568.46, "total_tokens": 535792}
725
+ {"current_steps": 3530, "total_steps": 3600, "loss": 0.0086, "lr": 5.921952271504827e-08, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:05:42", "remaining_time": "0:00:06", "throughput": 1568.79, "total_tokens": 536560}
726
+ {"current_steps": 3535, "total_steps": 3600, "loss": 0.0005, "lr": 5.117518084981621e-08, "epoch": 19.63888888888889, "percentage": 98.19, "elapsed_time": "0:05:42", "remaining_time": "0:00:06", "throughput": 1569.11, "total_tokens": 537328}
727
+ {"current_steps": 3540, "total_steps": 3600, "loss": 0.0002, "lr": 4.371724575597535e-08, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:05:42", "remaining_time": "0:00:05", "throughput": 1569.33, "total_tokens": 538064}
728
+ {"current_steps": 3545, "total_steps": 3600, "loss": 0.1792, "lr": 3.684589272771044e-08, "epoch": 19.694444444444443, "percentage": 98.47, "elapsed_time": "0:05:43", "remaining_time": "0:00:05", "throughput": 1569.71, "total_tokens": 538848}
729
+ {"current_steps": 3550, "total_steps": 3600, "loss": 0.0761, "lr": 3.056128327193486e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:05:43", "remaining_time": "0:00:04", "throughput": 1570.03, "total_tokens": 539616}
730
+ {"current_steps": 3555, "total_steps": 3600, "loss": 0.001, "lr": 2.486356510453258e-08, "epoch": 19.75, "percentage": 98.75, "elapsed_time": "0:05:44", "remaining_time": "0:00:04", "throughput": 1570.29, "total_tokens": 540352}
731
+ {"current_steps": 3560, "total_steps": 3600, "loss": 0.0004, "lr": 1.975287214685817e-08, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:05:44", "remaining_time": "0:00:03", "throughput": 1570.57, "total_tokens": 541104}
732
+ {"current_steps": 3565, "total_steps": 3600, "loss": 0.0166, "lr": 1.522932452260595e-08, "epoch": 19.805555555555557, "percentage": 99.03, "elapsed_time": "0:05:44", "remaining_time": "0:00:03", "throughput": 1570.83, "total_tokens": 541856}
733
+ {"current_steps": 3570, "total_steps": 3600, "loss": 0.001, "lr": 1.1293028554978935e-08, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:05:45", "remaining_time": "0:00:02", "throughput": 1571.18, "total_tokens": 542640}
734
+ {"current_steps": 3575, "total_steps": 3600, "loss": 0.0002, "lr": 7.944076764190845e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "0:05:45", "remaining_time": "0:00:02", "throughput": 1571.46, "total_tokens": 543392}
735
+ {"current_steps": 3580, "total_steps": 3600, "loss": 0.0512, "lr": 5.182547865290044e-09, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:05:46", "remaining_time": "0:00:01", "throughput": 1571.66, "total_tokens": 544128}
736
+ {"current_steps": 3585, "total_steps": 3600, "loss": 0.155, "lr": 3.008506766313812e-09, "epoch": 19.916666666666668, "percentage": 99.58, "elapsed_time": "0:05:46", "remaining_time": "0:00:01", "throughput": 1571.92, "total_tokens": 544880}
737
+ {"current_steps": 3590, "total_steps": 3600, "loss": 0.0003, "lr": 1.4220045667645566e-09, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.09, "total_tokens": 545600}
738
+ {"current_steps": 3595, "total_steps": 3600, "loss": 0.006, "lr": 4.2307855639411865e-10, "epoch": 19.97222222222222, "percentage": 99.86, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.47, "total_tokens": 546384}
739
+ {"current_steps": 3600, "total_steps": 3600, "loss": 0.0002, "lr": 1.1752214348903501e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:47", "remaining_time": "0:00:00", "throughput": 1572.5, "total_tokens": 547136}
740
+ {"current_steps": 3600, "total_steps": 3600, "eval_loss": 1.036588430404663, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:48", "remaining_time": "0:00:00", "throughput": 1568.57, "total_tokens": 547136}
741
+ {"current_steps": 3600, "total_steps": 3600, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:05:50", "remaining_time": "0:00:00", "throughput": 1562.85, "total_tokens": 547136}