qingy2024 commited on
Commit
c3cff6d
·
verified ·
1 Parent(s): ec029f6

Upload checkpoint 7600

Browse files
Files changed (6) hide show
  1. README.md +3 -3
  2. adapter_model.safetensors +1 -1
  3. loss.png +2 -2
  4. optimizer.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +703 -3
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: Qwen/Qwen2.5-3B-Instruct
3
  library_name: peft
4
  ---
5
- # Gradience T1 3B (Step 7500 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
@@ -38,11 +38,11 @@ library_name: peft
38
  </head>
39
  <body>
40
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
41
- <div style="height: 30px; width: 76.24%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
42
  <!-- 3.75% -->
43
  </div>
44
  </div>
45
- <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 7500 out of 9838 steps</p>
46
  </body>
47
  </html>
48
 
 
2
  base_model: Qwen/Qwen2.5-3B-Instruct
3
  library_name: peft
4
  ---
5
+ # Gradience T1 3B (Step 7600 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
 
38
  </head>
39
  <body>
40
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
41
+ <div style="height: 30px; width: 77.25%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
42
  <!-- 3.75% -->
43
  </div>
44
  </div>
45
+ <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 7600 out of 9838 steps</p>
46
  </body>
47
  </html>
48
 
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ba7de9543f1f34b4b94e34ebafefebbb9e772bbfa1a36d1442368c7d913cd4f
3
  size 119801528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa1a8b2084abf4c0aa143ad82d5673ad22c47fc43f7f35a295ebd5f949602b1
3
  size 119801528
loss.png CHANGED

Git LFS Details

  • SHA256: b7c74551b71b7f1b5af4da0bca6229560fb22469d7f787f628e404409beedab3
  • Pointer size: 131 Bytes
  • Size of remote file: 177 kB

Git LFS Details

  • SHA256: 5cef93a5effa7e25be405a0ddd1534a84ea7fe5f352c25b178ac088ad207e690
  • Pointer size: 131 Bytes
  • Size of remote file: 176 kB
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1be61e6aa4af344c33fc4c28ffd8c21f96ebcaee499984ec5ff9ce87c59b1e2
3
  size 61392692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d724f96203373dcec88adb5dfa4341c8156f923a7472825798252a2a46750bd3
3
  size 61392692
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c3e76495dee6dbbdad9587f79a6f58b9eb8e66519d8012f0cf4e9c842847e55
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5559d18ced05b328f0925fbaa730c7e7e3e59068bfb889a48ac53bc0cce0a8c7
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.5247001423053466,
6
  "eval_steps": 500,
7
- "global_step": 7500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -52508,6 +52508,706 @@
52508
  "learning_rate": 4.757449405064579e-05,
52509
  "loss": 0.9683,
52510
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52511
  }
52512
  ],
52513
  "logging_steps": 1,
@@ -52527,7 +53227,7 @@
52527
  "attributes": {}
52528
  }
52529
  },
52530
- "total_flos": 4.2064980872392704e+18,
52531
  "train_batch_size": 8,
52532
  "trial_name": null,
52533
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.5450294775360844,
6
  "eval_steps": 500,
7
+ "global_step": 7600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
52508
  "learning_rate": 4.757449405064579e-05,
52509
  "loss": 0.9683,
52510
  "step": 7500
52511
+ },
52512
+ {
52513
+ "epoch": 1.524903435657654,
52514
+ "grad_norm": 0.16842851042747498,
52515
+ "learning_rate": 4.755415437811451e-05,
52516
+ "loss": 1.1311,
52517
+ "step": 7501
52518
+ },
52519
+ {
52520
+ "epoch": 1.5251067290099614,
52521
+ "grad_norm": 0.1710801124572754,
52522
+ "learning_rate": 4.753381470558324e-05,
52523
+ "loss": 1.051,
52524
+ "step": 7502
52525
+ },
52526
+ {
52527
+ "epoch": 1.5253100223622686,
52528
+ "grad_norm": 0.14032766222953796,
52529
+ "learning_rate": 4.751347503305197e-05,
52530
+ "loss": 0.886,
52531
+ "step": 7503
52532
+ },
52533
+ {
52534
+ "epoch": 1.5255133157145762,
52535
+ "grad_norm": 0.16021350026130676,
52536
+ "learning_rate": 4.74931353605207e-05,
52537
+ "loss": 1.1057,
52538
+ "step": 7504
52539
+ },
52540
+ {
52541
+ "epoch": 1.5257166090668837,
52542
+ "grad_norm": 0.13777956366539001,
52543
+ "learning_rate": 4.747279568798942e-05,
52544
+ "loss": 0.8405,
52545
+ "step": 7505
52546
+ },
52547
+ {
52548
+ "epoch": 1.525919902419191,
52549
+ "grad_norm": 0.15827859938144684,
52550
+ "learning_rate": 4.7452456015458155e-05,
52551
+ "loss": 0.9807,
52552
+ "step": 7506
52553
+ },
52554
+ {
52555
+ "epoch": 1.5261231957714982,
52556
+ "grad_norm": 0.14875048398971558,
52557
+ "learning_rate": 4.743211634292688e-05,
52558
+ "loss": 0.9142,
52559
+ "step": 7507
52560
+ },
52561
+ {
52562
+ "epoch": 1.5263264891238055,
52563
+ "grad_norm": 0.1471138298511505,
52564
+ "learning_rate": 4.741177667039561e-05,
52565
+ "loss": 0.9305,
52566
+ "step": 7508
52567
+ },
52568
+ {
52569
+ "epoch": 1.526529782476113,
52570
+ "grad_norm": 0.15858818590641022,
52571
+ "learning_rate": 4.7391436997864335e-05,
52572
+ "loss": 1.0326,
52573
+ "step": 7509
52574
+ },
52575
+ {
52576
+ "epoch": 1.5267330758284205,
52577
+ "grad_norm": 0.14757820963859558,
52578
+ "learning_rate": 4.7371097325333066e-05,
52579
+ "loss": 0.9423,
52580
+ "step": 7510
52581
+ },
52582
+ {
52583
+ "epoch": 1.5269363691807278,
52584
+ "grad_norm": 0.14671318233013153,
52585
+ "learning_rate": 4.735075765280179e-05,
52586
+ "loss": 0.9953,
52587
+ "step": 7511
52588
+ },
52589
+ {
52590
+ "epoch": 1.5271396625330351,
52591
+ "grad_norm": 0.16315753757953644,
52592
+ "learning_rate": 4.733041798027052e-05,
52593
+ "loss": 1.0674,
52594
+ "step": 7512
52595
+ },
52596
+ {
52597
+ "epoch": 1.5273429558853424,
52598
+ "grad_norm": 0.15158745646476746,
52599
+ "learning_rate": 4.7310078307739246e-05,
52600
+ "loss": 0.9416,
52601
+ "step": 7513
52602
+ },
52603
+ {
52604
+ "epoch": 1.52754624923765,
52605
+ "grad_norm": 0.1569458246231079,
52606
+ "learning_rate": 4.728973863520798e-05,
52607
+ "loss": 1.1336,
52608
+ "step": 7514
52609
+ },
52610
+ {
52611
+ "epoch": 1.5277495425899574,
52612
+ "grad_norm": 0.15651223063468933,
52613
+ "learning_rate": 4.72693989626767e-05,
52614
+ "loss": 0.9724,
52615
+ "step": 7515
52616
+ },
52617
+ {
52618
+ "epoch": 1.5279528359422647,
52619
+ "grad_norm": 0.1413620412349701,
52620
+ "learning_rate": 4.724905929014543e-05,
52621
+ "loss": 0.9164,
52622
+ "step": 7516
52623
+ },
52624
+ {
52625
+ "epoch": 1.528156129294572,
52626
+ "grad_norm": 0.14873231947422028,
52627
+ "learning_rate": 4.722871961761416e-05,
52628
+ "loss": 0.9904,
52629
+ "step": 7517
52630
+ },
52631
+ {
52632
+ "epoch": 1.5283594226468793,
52633
+ "grad_norm": 0.16632919013500214,
52634
+ "learning_rate": 4.720837994508289e-05,
52635
+ "loss": 1.0332,
52636
+ "step": 7518
52637
+ },
52638
+ {
52639
+ "epoch": 1.5285627159991868,
52640
+ "grad_norm": 0.1570500135421753,
52641
+ "learning_rate": 4.718804027255161e-05,
52642
+ "loss": 1.0491,
52643
+ "step": 7519
52644
+ },
52645
+ {
52646
+ "epoch": 1.5287660093514943,
52647
+ "grad_norm": 0.17738598585128784,
52648
+ "learning_rate": 4.7167700600020344e-05,
52649
+ "loss": 1.049,
52650
+ "step": 7520
52651
+ },
52652
+ {
52653
+ "epoch": 1.5289693027038016,
52654
+ "grad_norm": 0.1635134369134903,
52655
+ "learning_rate": 4.714736092748907e-05,
52656
+ "loss": 1.1412,
52657
+ "step": 7521
52658
+ },
52659
+ {
52660
+ "epoch": 1.529172596056109,
52661
+ "grad_norm": 0.16301599144935608,
52662
+ "learning_rate": 4.71270212549578e-05,
52663
+ "loss": 1.0967,
52664
+ "step": 7522
52665
+ },
52666
+ {
52667
+ "epoch": 1.5293758894084164,
52668
+ "grad_norm": 0.14440634846687317,
52669
+ "learning_rate": 4.7106681582426525e-05,
52670
+ "loss": 0.8357,
52671
+ "step": 7523
52672
+ },
52673
+ {
52674
+ "epoch": 1.5295791827607237,
52675
+ "grad_norm": 0.144694983959198,
52676
+ "learning_rate": 4.7086341909895256e-05,
52677
+ "loss": 0.8722,
52678
+ "step": 7524
52679
+ },
52680
+ {
52681
+ "epoch": 1.5297824761130312,
52682
+ "grad_norm": 0.14646680653095245,
52683
+ "learning_rate": 4.706600223736398e-05,
52684
+ "loss": 0.8988,
52685
+ "step": 7525
52686
+ },
52687
+ {
52688
+ "epoch": 1.5299857694653385,
52689
+ "grad_norm": 0.1644057333469391,
52690
+ "learning_rate": 4.704566256483271e-05,
52691
+ "loss": 1.1197,
52692
+ "step": 7526
52693
+ },
52694
+ {
52695
+ "epoch": 1.5301890628176458,
52696
+ "grad_norm": 0.1693752557039261,
52697
+ "learning_rate": 4.7025322892301436e-05,
52698
+ "loss": 1.0486,
52699
+ "step": 7527
52700
+ },
52701
+ {
52702
+ "epoch": 1.5303923561699533,
52703
+ "grad_norm": 0.1716986894607544,
52704
+ "learning_rate": 4.700498321977017e-05,
52705
+ "loss": 1.1087,
52706
+ "step": 7528
52707
+ },
52708
+ {
52709
+ "epoch": 1.5305956495222606,
52710
+ "grad_norm": 0.1740422248840332,
52711
+ "learning_rate": 4.698464354723889e-05,
52712
+ "loss": 1.0909,
52713
+ "step": 7529
52714
+ },
52715
+ {
52716
+ "epoch": 1.530798942874568,
52717
+ "grad_norm": 0.15906310081481934,
52718
+ "learning_rate": 4.696430387470762e-05,
52719
+ "loss": 1.0841,
52720
+ "step": 7530
52721
+ },
52722
+ {
52723
+ "epoch": 1.5310022362268754,
52724
+ "grad_norm": 0.14159859716892242,
52725
+ "learning_rate": 4.694396420217635e-05,
52726
+ "loss": 0.8766,
52727
+ "step": 7531
52728
+ },
52729
+ {
52730
+ "epoch": 1.5312055295791827,
52731
+ "grad_norm": 0.17096573114395142,
52732
+ "learning_rate": 4.692362452964508e-05,
52733
+ "loss": 1.1308,
52734
+ "step": 7532
52735
+ },
52736
+ {
52737
+ "epoch": 1.5314088229314902,
52738
+ "grad_norm": 0.16331720352172852,
52739
+ "learning_rate": 4.69032848571138e-05,
52740
+ "loss": 0.9884,
52741
+ "step": 7533
52742
+ },
52743
+ {
52744
+ "epoch": 1.5316121162837977,
52745
+ "grad_norm": 0.15612895786762238,
52746
+ "learning_rate": 4.6882945184582534e-05,
52747
+ "loss": 0.9737,
52748
+ "step": 7534
52749
+ },
52750
+ {
52751
+ "epoch": 1.531815409636105,
52752
+ "grad_norm": 0.1716272234916687,
52753
+ "learning_rate": 4.686260551205126e-05,
52754
+ "loss": 1.2049,
52755
+ "step": 7535
52756
+ },
52757
+ {
52758
+ "epoch": 1.5320187029884123,
52759
+ "grad_norm": 0.15378396213054657,
52760
+ "learning_rate": 4.684226583951999e-05,
52761
+ "loss": 1.0315,
52762
+ "step": 7536
52763
+ },
52764
+ {
52765
+ "epoch": 1.5322219963407195,
52766
+ "grad_norm": 0.16745533049106598,
52767
+ "learning_rate": 4.682192616698871e-05,
52768
+ "loss": 1.1749,
52769
+ "step": 7537
52770
+ },
52771
+ {
52772
+ "epoch": 1.532425289693027,
52773
+ "grad_norm": 0.16122505068778992,
52774
+ "learning_rate": 4.680158649445744e-05,
52775
+ "loss": 1.1481,
52776
+ "step": 7538
52777
+ },
52778
+ {
52779
+ "epoch": 1.5326285830453346,
52780
+ "grad_norm": 0.15753133594989777,
52781
+ "learning_rate": 4.678124682192617e-05,
52782
+ "loss": 1.0085,
52783
+ "step": 7539
52784
+ },
52785
+ {
52786
+ "epoch": 1.5328318763976418,
52787
+ "grad_norm": 0.15394344925880432,
52788
+ "learning_rate": 4.6760907149394895e-05,
52789
+ "loss": 0.9611,
52790
+ "step": 7540
52791
+ },
52792
+ {
52793
+ "epoch": 1.5330351697499491,
52794
+ "grad_norm": 0.1620665341615677,
52795
+ "learning_rate": 4.6740567476863626e-05,
52796
+ "loss": 0.9435,
52797
+ "step": 7541
52798
+ },
52799
+ {
52800
+ "epoch": 1.5332384631022564,
52801
+ "grad_norm": 0.15785206854343414,
52802
+ "learning_rate": 4.672022780433235e-05,
52803
+ "loss": 1.0311,
52804
+ "step": 7542
52805
+ },
52806
+ {
52807
+ "epoch": 1.533441756454564,
52808
+ "grad_norm": 0.15812784433364868,
52809
+ "learning_rate": 4.669988813180108e-05,
52810
+ "loss": 0.9304,
52811
+ "step": 7543
52812
+ },
52813
+ {
52814
+ "epoch": 1.5336450498068714,
52815
+ "grad_norm": 0.14829683303833008,
52816
+ "learning_rate": 4.6679548459269806e-05,
52817
+ "loss": 0.9735,
52818
+ "step": 7544
52819
+ },
52820
+ {
52821
+ "epoch": 1.5338483431591787,
52822
+ "grad_norm": 0.15306831896305084,
52823
+ "learning_rate": 4.665920878673854e-05,
52824
+ "loss": 0.9459,
52825
+ "step": 7545
52826
+ },
52827
+ {
52828
+ "epoch": 1.534051636511486,
52829
+ "grad_norm": 0.1524849534034729,
52830
+ "learning_rate": 4.663886911420726e-05,
52831
+ "loss": 0.9989,
52832
+ "step": 7546
52833
+ },
52834
+ {
52835
+ "epoch": 1.5342549298637933,
52836
+ "grad_norm": 0.1524866819381714,
52837
+ "learning_rate": 4.661852944167599e-05,
52838
+ "loss": 0.9516,
52839
+ "step": 7547
52840
+ },
52841
+ {
52842
+ "epoch": 1.5344582232161008,
52843
+ "grad_norm": 0.1561049073934555,
52844
+ "learning_rate": 4.659818976914472e-05,
52845
+ "loss": 0.9629,
52846
+ "step": 7548
52847
+ },
52848
+ {
52849
+ "epoch": 1.5346615165684083,
52850
+ "grad_norm": 0.15052708983421326,
52851
+ "learning_rate": 4.657785009661345e-05,
52852
+ "loss": 0.9709,
52853
+ "step": 7549
52854
+ },
52855
+ {
52856
+ "epoch": 1.5348648099207156,
52857
+ "grad_norm": 0.16317294538021088,
52858
+ "learning_rate": 4.655751042408217e-05,
52859
+ "loss": 1.0431,
52860
+ "step": 7550
52861
+ },
52862
+ {
52863
+ "epoch": 1.535068103273023,
52864
+ "grad_norm": 0.1577170193195343,
52865
+ "learning_rate": 4.6537170751550904e-05,
52866
+ "loss": 1.0794,
52867
+ "step": 7551
52868
+ },
52869
+ {
52870
+ "epoch": 1.5352713966253302,
52871
+ "grad_norm": 0.16741138696670532,
52872
+ "learning_rate": 4.651683107901963e-05,
52873
+ "loss": 1.2215,
52874
+ "step": 7552
52875
+ },
52876
+ {
52877
+ "epoch": 1.5354746899776377,
52878
+ "grad_norm": 0.1500609666109085,
52879
+ "learning_rate": 4.649649140648836e-05,
52880
+ "loss": 0.9439,
52881
+ "step": 7553
52882
+ },
52883
+ {
52884
+ "epoch": 1.5356779833299452,
52885
+ "grad_norm": 0.15758995711803436,
52886
+ "learning_rate": 4.6476151733957085e-05,
52887
+ "loss": 0.8848,
52888
+ "step": 7554
52889
+ },
52890
+ {
52891
+ "epoch": 1.5358812766822525,
52892
+ "grad_norm": 0.14967188239097595,
52893
+ "learning_rate": 4.6455812061425816e-05,
52894
+ "loss": 0.9408,
52895
+ "step": 7555
52896
+ },
52897
+ {
52898
+ "epoch": 1.5360845700345598,
52899
+ "grad_norm": 0.13587024807929993,
52900
+ "learning_rate": 4.643547238889454e-05,
52901
+ "loss": 0.9078,
52902
+ "step": 7556
52903
+ },
52904
+ {
52905
+ "epoch": 1.5362878633868673,
52906
+ "grad_norm": 0.1551710069179535,
52907
+ "learning_rate": 4.641513271636327e-05,
52908
+ "loss": 1.0213,
52909
+ "step": 7557
52910
+ },
52911
+ {
52912
+ "epoch": 1.5364911567391746,
52913
+ "grad_norm": 0.15696901082992554,
52914
+ "learning_rate": 4.6394793043831996e-05,
52915
+ "loss": 0.9854,
52916
+ "step": 7558
52917
+ },
52918
+ {
52919
+ "epoch": 1.536694450091482,
52920
+ "grad_norm": 0.14111942052841187,
52921
+ "learning_rate": 4.637445337130073e-05,
52922
+ "loss": 0.8104,
52923
+ "step": 7559
52924
+ },
52925
+ {
52926
+ "epoch": 1.5368977434437894,
52927
+ "grad_norm": 0.16344057023525238,
52928
+ "learning_rate": 4.635411369876945e-05,
52929
+ "loss": 1.0198,
52930
+ "step": 7560
52931
+ },
52932
+ {
52933
+ "epoch": 1.5371010367960967,
52934
+ "grad_norm": 0.15371447801589966,
52935
+ "learning_rate": 4.633377402623818e-05,
52936
+ "loss": 1.1088,
52937
+ "step": 7561
52938
+ },
52939
+ {
52940
+ "epoch": 1.5373043301484042,
52941
+ "grad_norm": 0.16405069828033447,
52942
+ "learning_rate": 4.631343435370691e-05,
52943
+ "loss": 1.1632,
52944
+ "step": 7562
52945
+ },
52946
+ {
52947
+ "epoch": 1.5375076235007117,
52948
+ "grad_norm": 0.17231358587741852,
52949
+ "learning_rate": 4.629309468117564e-05,
52950
+ "loss": 1.0729,
52951
+ "step": 7563
52952
+ },
52953
+ {
52954
+ "epoch": 1.537710916853019,
52955
+ "grad_norm": 0.16849292814731598,
52956
+ "learning_rate": 4.627275500864436e-05,
52957
+ "loss": 1.1287,
52958
+ "step": 7564
52959
+ },
52960
+ {
52961
+ "epoch": 1.5379142102053263,
52962
+ "grad_norm": 0.14124159514904022,
52963
+ "learning_rate": 4.6252415336113094e-05,
52964
+ "loss": 0.9726,
52965
+ "step": 7565
52966
+ },
52967
+ {
52968
+ "epoch": 1.5381175035576335,
52969
+ "grad_norm": 0.16582997143268585,
52970
+ "learning_rate": 4.623207566358182e-05,
52971
+ "loss": 1.048,
52972
+ "step": 7566
52973
+ },
52974
+ {
52975
+ "epoch": 1.538320796909941,
52976
+ "grad_norm": 0.15703178942203522,
52977
+ "learning_rate": 4.621173599105055e-05,
52978
+ "loss": 0.9917,
52979
+ "step": 7567
52980
+ },
52981
+ {
52982
+ "epoch": 1.5385240902622486,
52983
+ "grad_norm": 0.1521129459142685,
52984
+ "learning_rate": 4.6191396318519274e-05,
52985
+ "loss": 0.9314,
52986
+ "step": 7568
52987
+ },
52988
+ {
52989
+ "epoch": 1.5387273836145559,
52990
+ "grad_norm": 0.18239177763462067,
52991
+ "learning_rate": 4.6171056645988006e-05,
52992
+ "loss": 1.1309,
52993
+ "step": 7569
52994
+ },
52995
+ {
52996
+ "epoch": 1.5389306769668631,
52997
+ "grad_norm": 0.15608282387256622,
52998
+ "learning_rate": 4.615071697345673e-05,
52999
+ "loss": 0.9907,
53000
+ "step": 7570
53001
+ },
53002
+ {
53003
+ "epoch": 1.5391339703191704,
53004
+ "grad_norm": 0.14907321333885193,
53005
+ "learning_rate": 4.613037730092546e-05,
53006
+ "loss": 0.948,
53007
+ "step": 7571
53008
+ },
53009
+ {
53010
+ "epoch": 1.539337263671478,
53011
+ "grad_norm": 0.15870921313762665,
53012
+ "learning_rate": 4.6110037628394186e-05,
53013
+ "loss": 1.0293,
53014
+ "step": 7572
53015
+ },
53016
+ {
53017
+ "epoch": 1.5395405570237854,
53018
+ "grad_norm": 0.1471608281135559,
53019
+ "learning_rate": 4.608969795586292e-05,
53020
+ "loss": 0.9045,
53021
+ "step": 7573
53022
+ },
53023
+ {
53024
+ "epoch": 1.5397438503760927,
53025
+ "grad_norm": 0.1473323255777359,
53026
+ "learning_rate": 4.606935828333164e-05,
53027
+ "loss": 0.9773,
53028
+ "step": 7574
53029
+ },
53030
+ {
53031
+ "epoch": 1.5399471437284,
53032
+ "grad_norm": 0.15672756731510162,
53033
+ "learning_rate": 4.604901861080037e-05,
53034
+ "loss": 0.9564,
53035
+ "step": 7575
53036
+ },
53037
+ {
53038
+ "epoch": 1.5401504370807073,
53039
+ "grad_norm": 0.13355454802513123,
53040
+ "learning_rate": 4.60286789382691e-05,
53041
+ "loss": 0.9043,
53042
+ "step": 7576
53043
+ },
53044
+ {
53045
+ "epoch": 1.5403537304330148,
53046
+ "grad_norm": 0.16888266801834106,
53047
+ "learning_rate": 4.600833926573783e-05,
53048
+ "loss": 1.049,
53049
+ "step": 7577
53050
+ },
53051
+ {
53052
+ "epoch": 1.5405570237853223,
53053
+ "grad_norm": 0.14586526155471802,
53054
+ "learning_rate": 4.5987999593206546e-05,
53055
+ "loss": 0.991,
53056
+ "step": 7578
53057
+ },
53058
+ {
53059
+ "epoch": 1.5407603171376296,
53060
+ "grad_norm": 0.15697935223579407,
53061
+ "learning_rate": 4.596765992067528e-05,
53062
+ "loss": 0.9276,
53063
+ "step": 7579
53064
+ },
53065
+ {
53066
+ "epoch": 1.540963610489937,
53067
+ "grad_norm": 0.1606079787015915,
53068
+ "learning_rate": 4.5947320248144e-05,
53069
+ "loss": 1.1167,
53070
+ "step": 7580
53071
+ },
53072
+ {
53073
+ "epoch": 1.5411669038422442,
53074
+ "grad_norm": 0.14496320486068726,
53075
+ "learning_rate": 4.592698057561273e-05,
53076
+ "loss": 0.978,
53077
+ "step": 7581
53078
+ },
53079
+ {
53080
+ "epoch": 1.5413701971945517,
53081
+ "grad_norm": 0.1540028601884842,
53082
+ "learning_rate": 4.590664090308146e-05,
53083
+ "loss": 0.9584,
53084
+ "step": 7582
53085
+ },
53086
+ {
53087
+ "epoch": 1.5415734905468592,
53088
+ "grad_norm": 0.14273619651794434,
53089
+ "learning_rate": 4.588630123055019e-05,
53090
+ "loss": 0.9559,
53091
+ "step": 7583
53092
+ },
53093
+ {
53094
+ "epoch": 1.5417767838991665,
53095
+ "grad_norm": 0.15364350378513336,
53096
+ "learning_rate": 4.586596155801891e-05,
53097
+ "loss": 1.067,
53098
+ "step": 7584
53099
+ },
53100
+ {
53101
+ "epoch": 1.5419800772514738,
53102
+ "grad_norm": 0.15916843712329865,
53103
+ "learning_rate": 4.5845621885487644e-05,
53104
+ "loss": 0.9734,
53105
+ "step": 7585
53106
+ },
53107
+ {
53108
+ "epoch": 1.5421833706037813,
53109
+ "grad_norm": 0.16618654131889343,
53110
+ "learning_rate": 4.582528221295637e-05,
53111
+ "loss": 1.0467,
53112
+ "step": 7586
53113
+ },
53114
+ {
53115
+ "epoch": 1.5423866639560886,
53116
+ "grad_norm": 0.1487346738576889,
53117
+ "learning_rate": 4.58049425404251e-05,
53118
+ "loss": 0.9555,
53119
+ "step": 7587
53120
+ },
53121
+ {
53122
+ "epoch": 1.542589957308396,
53123
+ "grad_norm": 0.1543288677930832,
53124
+ "learning_rate": 4.5784602867893825e-05,
53125
+ "loss": 1.0203,
53126
+ "step": 7588
53127
+ },
53128
+ {
53129
+ "epoch": 1.5427932506607034,
53130
+ "grad_norm": 0.15385927259922028,
53131
+ "learning_rate": 4.5764263195362556e-05,
53132
+ "loss": 1.0728,
53133
+ "step": 7589
53134
+ },
53135
+ {
53136
+ "epoch": 1.5429965440130107,
53137
+ "grad_norm": 0.18972186744213104,
53138
+ "learning_rate": 4.574392352283128e-05,
53139
+ "loss": 1.1087,
53140
+ "step": 7590
53141
+ },
53142
+ {
53143
+ "epoch": 1.5431998373653182,
53144
+ "grad_norm": 0.17217358946800232,
53145
+ "learning_rate": 4.572358385030001e-05,
53146
+ "loss": 1.0384,
53147
+ "step": 7591
53148
+ },
53149
+ {
53150
+ "epoch": 1.5434031307176257,
53151
+ "grad_norm": 0.15717031061649323,
53152
+ "learning_rate": 4.5703244177768736e-05,
53153
+ "loss": 0.8593,
53154
+ "step": 7592
53155
+ },
53156
+ {
53157
+ "epoch": 1.543606424069933,
53158
+ "grad_norm": 0.17360135912895203,
53159
+ "learning_rate": 4.568290450523747e-05,
53160
+ "loss": 1.1914,
53161
+ "step": 7593
53162
+ },
53163
+ {
53164
+ "epoch": 1.5438097174222403,
53165
+ "grad_norm": 0.15492455661296844,
53166
+ "learning_rate": 4.566256483270619e-05,
53167
+ "loss": 0.9039,
53168
+ "step": 7594
53169
+ },
53170
+ {
53171
+ "epoch": 1.5440130107745476,
53172
+ "grad_norm": 0.15058903396129608,
53173
+ "learning_rate": 4.564222516017492e-05,
53174
+ "loss": 0.9224,
53175
+ "step": 7595
53176
+ },
53177
+ {
53178
+ "epoch": 1.544216304126855,
53179
+ "grad_norm": 0.16502228379249573,
53180
+ "learning_rate": 4.562188548764365e-05,
53181
+ "loss": 0.9956,
53182
+ "step": 7596
53183
+ },
53184
+ {
53185
+ "epoch": 1.5444195974791626,
53186
+ "grad_norm": 0.15759393572807312,
53187
+ "learning_rate": 4.560154581511238e-05,
53188
+ "loss": 1.0067,
53189
+ "step": 7597
53190
+ },
53191
+ {
53192
+ "epoch": 1.5446228908314699,
53193
+ "grad_norm": 0.1422048658132553,
53194
+ "learning_rate": 4.558120614258111e-05,
53195
+ "loss": 0.9564,
53196
+ "step": 7598
53197
+ },
53198
+ {
53199
+ "epoch": 1.5448261841837772,
53200
+ "grad_norm": 0.13447371125221252,
53201
+ "learning_rate": 4.5560866470049834e-05,
53202
+ "loss": 0.9567,
53203
+ "step": 7599
53204
+ },
53205
+ {
53206
+ "epoch": 1.5450294775360844,
53207
+ "grad_norm": 0.1465720683336258,
53208
+ "learning_rate": 4.5540526797518566e-05,
53209
+ "loss": 0.945,
53210
+ "step": 7600
53211
  }
53212
  ],
53213
  "logging_steps": 1,
 
53227
  "attributes": {}
53228
  }
53229
  },
53230
+ "total_flos": 4.261864221527556e+18,
53231
  "train_batch_size": 8,
53232
  "trial_name": null,
53233
  "trial_params": null