qingy2024 commited on
Commit
e89e89a
·
verified ·
1 Parent(s): 5a522db

Upload checkpoint 1900

Browse files
Files changed (6) hide show
  1. README.md +4 -4
  2. adapter_config.json +1 -1
  3. loss.png +2 -2
  4. optimizer.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +703 -3
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
- # Gradience T1 7B (Step 1800 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
- <div style="height: 30px; width: 36.60%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
- 36.6%
20
  </div>
21
  </div>
22
- <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 1800 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
 
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
+ # Gradience T1 7B (Step 1900 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
 
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
+ <div style="height: 30px; width: 38.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
+ 38.6%
20
  </div>
21
  </div>
22
+ <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 1900 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
loss.png CHANGED

Git LFS Details

  • SHA256: 06dc6ac087be67766cde1c74f8ff74c5e89a2984533f082b9b9365e2ef9fdf9a
  • Pointer size: 130 Bytes
  • Size of remote file: 87.3 kB

Git LFS Details

  • SHA256: f2345eb8a5b0a38e34b7cee23371d45337ca4014475418d77535ee39c4a087cb
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea38d3a2395e7d462eb9cacb0d3f3fc856bcea16870286d46e7229a9d7f20632
3
  size 82461044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed3b5ff30e3e07ae751c0198bf9476de1a89a755bb4dc5a8149b83298170716
3
  size 82461044
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecd022be4c8582d05a53dd3cc5229272f1c0cf1b9bc4803f5070112cb2fa2c34
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec8ba207deb0762ece0e7f805dcaf1e7c6d96da2ae9e39aab68cb2700888f6ea
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7318560683065664,
6
  "eval_steps": 500,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -12608,6 +12608,706 @@
12608
  "learning_rate": 0.00012696926521473643,
12609
  "loss": 0.9162,
12610
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12611
  }
12612
  ],
12613
  "logging_steps": 1,
@@ -12627,7 +13327,7 @@
12627
  "attributes": {}
12628
  }
12629
  },
12630
- "total_flos": 5.773449995988025e+18,
12631
  "train_batch_size": 16,
12632
  "trial_name": null,
12633
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7725147387680423,
6
  "eval_steps": 500,
7
+ "global_step": 1900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
12608
  "learning_rate": 0.00012696926521473643,
12609
  "loss": 0.9162,
12610
  "step": 1800
12611
+ },
12612
+ {
12613
+ "epoch": 0.7322626550111812,
12614
+ "grad_norm": 0.09074793756008148,
12615
+ "learning_rate": 0.00012692855688988398,
12616
+ "loss": 0.9388,
12617
+ "step": 1801
12618
+ },
12619
+ {
12620
+ "epoch": 0.7326692417157958,
12621
+ "grad_norm": 0.10199327766895294,
12622
+ "learning_rate": 0.00012688784856503156,
12623
+ "loss": 0.9585,
12624
+ "step": 1802
12625
+ },
12626
+ {
12627
+ "epoch": 0.7330758284204106,
12628
+ "grad_norm": 0.10722784698009491,
12629
+ "learning_rate": 0.00012684714024017912,
12630
+ "loss": 1.0226,
12631
+ "step": 1803
12632
+ },
12633
+ {
12634
+ "epoch": 0.7334824151250254,
12635
+ "grad_norm": 0.10113389045000076,
12636
+ "learning_rate": 0.0001268064319153267,
12637
+ "loss": 1.0593,
12638
+ "step": 1804
12639
+ },
12640
+ {
12641
+ "epoch": 0.7338890018296401,
12642
+ "grad_norm": 0.1125817522406578,
12643
+ "learning_rate": 0.00012676572359047425,
12644
+ "loss": 0.8962,
12645
+ "step": 1805
12646
+ },
12647
+ {
12648
+ "epoch": 0.7342955885342549,
12649
+ "grad_norm": 0.10177897661924362,
12650
+ "learning_rate": 0.0001267250152656218,
12651
+ "loss": 1.0323,
12652
+ "step": 1806
12653
+ },
12654
+ {
12655
+ "epoch": 0.7347021752388697,
12656
+ "grad_norm": 0.10272479057312012,
12657
+ "learning_rate": 0.00012668430694076941,
12658
+ "loss": 0.9947,
12659
+ "step": 1807
12660
+ },
12661
+ {
12662
+ "epoch": 0.7351087619434844,
12663
+ "grad_norm": 0.11395642906427383,
12664
+ "learning_rate": 0.00012664359861591697,
12665
+ "loss": 1.0144,
12666
+ "step": 1808
12667
+ },
12668
+ {
12669
+ "epoch": 0.7355153486480992,
12670
+ "grad_norm": 0.09565427899360657,
12671
+ "learning_rate": 0.00012660289029106452,
12672
+ "loss": 1.0052,
12673
+ "step": 1809
12674
+ },
12675
+ {
12676
+ "epoch": 0.735921935352714,
12677
+ "grad_norm": 0.09244798123836517,
12678
+ "learning_rate": 0.0001265621819662121,
12679
+ "loss": 0.8411,
12680
+ "step": 1810
12681
+ },
12682
+ {
12683
+ "epoch": 0.7363285220573287,
12684
+ "grad_norm": 0.08985315263271332,
12685
+ "learning_rate": 0.00012652147364135966,
12686
+ "loss": 1.0301,
12687
+ "step": 1811
12688
+ },
12689
+ {
12690
+ "epoch": 0.7367351087619435,
12691
+ "grad_norm": 0.09606938809156418,
12692
+ "learning_rate": 0.00012648076531650724,
12693
+ "loss": 1.0053,
12694
+ "step": 1812
12695
+ },
12696
+ {
12697
+ "epoch": 0.7371416954665583,
12698
+ "grad_norm": 0.10566183179616928,
12699
+ "learning_rate": 0.0001264400569916548,
12700
+ "loss": 0.9527,
12701
+ "step": 1813
12702
+ },
12703
+ {
12704
+ "epoch": 0.737548282171173,
12705
+ "grad_norm": 0.10999652743339539,
12706
+ "learning_rate": 0.00012639934866680237,
12707
+ "loss": 1.0756,
12708
+ "step": 1814
12709
+ },
12710
+ {
12711
+ "epoch": 0.7379548688757878,
12712
+ "grad_norm": 0.09473931044340134,
12713
+ "learning_rate": 0.00012635864034194993,
12714
+ "loss": 0.94,
12715
+ "step": 1815
12716
+ },
12717
+ {
12718
+ "epoch": 0.7383614555804026,
12719
+ "grad_norm": 0.09815262258052826,
12720
+ "learning_rate": 0.0001263179320170975,
12721
+ "loss": 1.0436,
12722
+ "step": 1816
12723
+ },
12724
+ {
12725
+ "epoch": 0.7387680422850172,
12726
+ "grad_norm": 0.08889912813901901,
12727
+ "learning_rate": 0.00012627722369224506,
12728
+ "loss": 0.9368,
12729
+ "step": 1817
12730
+ },
12731
+ {
12732
+ "epoch": 0.739174628989632,
12733
+ "grad_norm": 0.09337257593870163,
12734
+ "learning_rate": 0.00012623651536739262,
12735
+ "loss": 1.0949,
12736
+ "step": 1818
12737
+ },
12738
+ {
12739
+ "epoch": 0.7395812156942468,
12740
+ "grad_norm": 0.09112720191478729,
12741
+ "learning_rate": 0.00012619580704254023,
12742
+ "loss": 1.0239,
12743
+ "step": 1819
12744
+ },
12745
+ {
12746
+ "epoch": 0.7399878023988615,
12747
+ "grad_norm": 0.0988708958029747,
12748
+ "learning_rate": 0.00012615509871768778,
12749
+ "loss": 1.0648,
12750
+ "step": 1820
12751
+ },
12752
+ {
12753
+ "epoch": 0.7403943891034763,
12754
+ "grad_norm": 0.09849932789802551,
12755
+ "learning_rate": 0.00012611439039283533,
12756
+ "loss": 0.9867,
12757
+ "step": 1821
12758
+ },
12759
+ {
12760
+ "epoch": 0.7408009758080911,
12761
+ "grad_norm": 0.09254156798124313,
12762
+ "learning_rate": 0.00012607368206798291,
12763
+ "loss": 0.9903,
12764
+ "step": 1822
12765
+ },
12766
+ {
12767
+ "epoch": 0.7412075625127058,
12768
+ "grad_norm": 0.0954776182770729,
12769
+ "learning_rate": 0.00012603297374313047,
12770
+ "loss": 1.0081,
12771
+ "step": 1823
12772
+ },
12773
+ {
12774
+ "epoch": 0.7416141492173206,
12775
+ "grad_norm": 0.08610807359218597,
12776
+ "learning_rate": 0.00012599226541827805,
12777
+ "loss": 0.9229,
12778
+ "step": 1824
12779
+ },
12780
+ {
12781
+ "epoch": 0.7420207359219354,
12782
+ "grad_norm": 0.0977591797709465,
12783
+ "learning_rate": 0.0001259515570934256,
12784
+ "loss": 0.9076,
12785
+ "step": 1825
12786
+ },
12787
+ {
12788
+ "epoch": 0.7424273226265501,
12789
+ "grad_norm": 0.0858481377363205,
12790
+ "learning_rate": 0.00012591084876857319,
12791
+ "loss": 0.8604,
12792
+ "step": 1826
12793
+ },
12794
+ {
12795
+ "epoch": 0.7428339093311649,
12796
+ "grad_norm": 0.09642601758241653,
12797
+ "learning_rate": 0.00012587014044372074,
12798
+ "loss": 1.0476,
12799
+ "step": 1827
12800
+ },
12801
+ {
12802
+ "epoch": 0.7432404960357797,
12803
+ "grad_norm": 0.08871784061193466,
12804
+ "learning_rate": 0.0001258294321188683,
12805
+ "loss": 0.9597,
12806
+ "step": 1828
12807
+ },
12808
+ {
12809
+ "epoch": 0.7436470827403944,
12810
+ "grad_norm": 0.10808097571134567,
12811
+ "learning_rate": 0.00012578872379401587,
12812
+ "loss": 1.1415,
12813
+ "step": 1829
12814
+ },
12815
+ {
12816
+ "epoch": 0.7440536694450092,
12817
+ "grad_norm": 0.09339917451143265,
12818
+ "learning_rate": 0.00012574801546916346,
12819
+ "loss": 0.9437,
12820
+ "step": 1830
12821
+ },
12822
+ {
12823
+ "epoch": 0.7444602561496239,
12824
+ "grad_norm": 0.08945673704147339,
12825
+ "learning_rate": 0.00012570730714431104,
12826
+ "loss": 0.9714,
12827
+ "step": 1831
12828
+ },
12829
+ {
12830
+ "epoch": 0.7448668428542387,
12831
+ "grad_norm": 0.0939527079463005,
12832
+ "learning_rate": 0.0001256665988194586,
12833
+ "loss": 0.9868,
12834
+ "step": 1832
12835
+ },
12836
+ {
12837
+ "epoch": 0.7452734295588535,
12838
+ "grad_norm": 0.09327416867017746,
12839
+ "learning_rate": 0.00012562589049460615,
12840
+ "loss": 1.0001,
12841
+ "step": 1833
12842
+ },
12843
+ {
12844
+ "epoch": 0.7456800162634681,
12845
+ "grad_norm": 0.10278622061014175,
12846
+ "learning_rate": 0.00012558518216975373,
12847
+ "loss": 1.0724,
12848
+ "step": 1834
12849
+ },
12850
+ {
12851
+ "epoch": 0.7460866029680829,
12852
+ "grad_norm": 0.09421471506357193,
12853
+ "learning_rate": 0.00012554447384490128,
12854
+ "loss": 1.0088,
12855
+ "step": 1835
12856
+ },
12857
+ {
12858
+ "epoch": 0.7464931896726977,
12859
+ "grad_norm": 0.1009073331952095,
12860
+ "learning_rate": 0.00012550376552004886,
12861
+ "loss": 1.0485,
12862
+ "step": 1836
12863
+ },
12864
+ {
12865
+ "epoch": 0.7468997763773124,
12866
+ "grad_norm": 0.09199651330709457,
12867
+ "learning_rate": 0.00012546305719519642,
12868
+ "loss": 0.9765,
12869
+ "step": 1837
12870
+ },
12871
+ {
12872
+ "epoch": 0.7473063630819272,
12873
+ "grad_norm": 0.09672168642282486,
12874
+ "learning_rate": 0.000125422348870344,
12875
+ "loss": 1.018,
12876
+ "step": 1838
12877
+ },
12878
+ {
12879
+ "epoch": 0.747712949786542,
12880
+ "grad_norm": 0.09036868065595627,
12881
+ "learning_rate": 0.00012538164054549155,
12882
+ "loss": 0.9067,
12883
+ "step": 1839
12884
+ },
12885
+ {
12886
+ "epoch": 0.7481195364911567,
12887
+ "grad_norm": 0.09706352651119232,
12888
+ "learning_rate": 0.0001253409322206391,
12889
+ "loss": 1.0439,
12890
+ "step": 1840
12891
+ },
12892
+ {
12893
+ "epoch": 0.7485261231957715,
12894
+ "grad_norm": 0.09940480440855026,
12895
+ "learning_rate": 0.00012530022389578669,
12896
+ "loss": 1.0936,
12897
+ "step": 1841
12898
+ },
12899
+ {
12900
+ "epoch": 0.7489327099003863,
12901
+ "grad_norm": 0.09489309787750244,
12902
+ "learning_rate": 0.00012525951557093427,
12903
+ "loss": 1.0606,
12904
+ "step": 1842
12905
+ },
12906
+ {
12907
+ "epoch": 0.749339296605001,
12908
+ "grad_norm": 0.07897097617387772,
12909
+ "learning_rate": 0.00012521880724608185,
12910
+ "loss": 0.8109,
12911
+ "step": 1843
12912
+ },
12913
+ {
12914
+ "epoch": 0.7497458833096158,
12915
+ "grad_norm": 0.09423919022083282,
12916
+ "learning_rate": 0.0001251780989212294,
12917
+ "loss": 1.0703,
12918
+ "step": 1844
12919
+ },
12920
+ {
12921
+ "epoch": 0.7501524700142306,
12922
+ "grad_norm": 0.09601794928312302,
12923
+ "learning_rate": 0.00012513739059637696,
12924
+ "loss": 0.9692,
12925
+ "step": 1845
12926
+ },
12927
+ {
12928
+ "epoch": 0.7505590567188453,
12929
+ "grad_norm": 0.09051002562046051,
12930
+ "learning_rate": 0.00012509668227152454,
12931
+ "loss": 0.9727,
12932
+ "step": 1846
12933
+ },
12934
+ {
12935
+ "epoch": 0.7509656434234601,
12936
+ "grad_norm": 0.09665656834840775,
12937
+ "learning_rate": 0.0001250559739466721,
12938
+ "loss": 1.0701,
12939
+ "step": 1847
12940
+ },
12941
+ {
12942
+ "epoch": 0.7513722301280749,
12943
+ "grad_norm": 0.08956587314605713,
12944
+ "learning_rate": 0.00012501526562181967,
12945
+ "loss": 0.9863,
12946
+ "step": 1848
12947
+ },
12948
+ {
12949
+ "epoch": 0.7517788168326895,
12950
+ "grad_norm": 0.09464751929044724,
12951
+ "learning_rate": 0.00012497455729696723,
12952
+ "loss": 1.043,
12953
+ "step": 1849
12954
+ },
12955
+ {
12956
+ "epoch": 0.7521854035373043,
12957
+ "grad_norm": 0.09246315807104111,
12958
+ "learning_rate": 0.0001249338489721148,
12959
+ "loss": 1.0306,
12960
+ "step": 1850
12961
+ },
12962
+ {
12963
+ "epoch": 0.7525919902419191,
12964
+ "grad_norm": 0.0943431407213211,
12965
+ "learning_rate": 0.00012489314064726236,
12966
+ "loss": 0.9251,
12967
+ "step": 1851
12968
+ },
12969
+ {
12970
+ "epoch": 0.7529985769465338,
12971
+ "grad_norm": 0.08852697908878326,
12972
+ "learning_rate": 0.00012485243232240992,
12973
+ "loss": 0.919,
12974
+ "step": 1852
12975
+ },
12976
+ {
12977
+ "epoch": 0.7534051636511486,
12978
+ "grad_norm": 0.08856131881475449,
12979
+ "learning_rate": 0.00012481172399755752,
12980
+ "loss": 0.9874,
12981
+ "step": 1853
12982
+ },
12983
+ {
12984
+ "epoch": 0.7538117503557634,
12985
+ "grad_norm": 0.08715582638978958,
12986
+ "learning_rate": 0.00012477101567270508,
12987
+ "loss": 0.9569,
12988
+ "step": 1854
12989
+ },
12990
+ {
12991
+ "epoch": 0.7542183370603781,
12992
+ "grad_norm": 0.1005750522017479,
12993
+ "learning_rate": 0.00012473030734785266,
12994
+ "loss": 1.118,
12995
+ "step": 1855
12996
+ },
12997
+ {
12998
+ "epoch": 0.7546249237649929,
12999
+ "grad_norm": 0.0848010703921318,
13000
+ "learning_rate": 0.00012468959902300021,
13001
+ "loss": 0.8808,
13002
+ "step": 1856
13003
+ },
13004
+ {
13005
+ "epoch": 0.7550315104696076,
13006
+ "grad_norm": 0.10509838908910751,
13007
+ "learning_rate": 0.00012464889069814777,
13008
+ "loss": 1.0019,
13009
+ "step": 1857
13010
+ },
13011
+ {
13012
+ "epoch": 0.7554380971742224,
13013
+ "grad_norm": 0.09729699045419693,
13014
+ "learning_rate": 0.00012460818237329535,
13015
+ "loss": 0.9275,
13016
+ "step": 1858
13017
+ },
13018
+ {
13019
+ "epoch": 0.7558446838788372,
13020
+ "grad_norm": 0.0901610478758812,
13021
+ "learning_rate": 0.0001245674740484429,
13022
+ "loss": 1.0285,
13023
+ "step": 1859
13024
+ },
13025
+ {
13026
+ "epoch": 0.7562512705834519,
13027
+ "grad_norm": 0.08691520988941193,
13028
+ "learning_rate": 0.00012452676572359048,
13029
+ "loss": 0.9524,
13030
+ "step": 1860
13031
+ },
13032
+ {
13033
+ "epoch": 0.7566578572880667,
13034
+ "grad_norm": 0.09559500962495804,
13035
+ "learning_rate": 0.00012448605739873804,
13036
+ "loss": 1.0781,
13037
+ "step": 1861
13038
+ },
13039
+ {
13040
+ "epoch": 0.7570644439926815,
13041
+ "grad_norm": 0.09581112861633301,
13042
+ "learning_rate": 0.00012444534907388562,
13043
+ "loss": 1.068,
13044
+ "step": 1862
13045
+ },
13046
+ {
13047
+ "epoch": 0.7574710306972962,
13048
+ "grad_norm": 0.10235914587974548,
13049
+ "learning_rate": 0.00012440464074903317,
13050
+ "loss": 1.078,
13051
+ "step": 1863
13052
+ },
13053
+ {
13054
+ "epoch": 0.757877617401911,
13055
+ "grad_norm": 0.09794023633003235,
13056
+ "learning_rate": 0.00012436393242418073,
13057
+ "loss": 1.0951,
13058
+ "step": 1864
13059
+ },
13060
+ {
13061
+ "epoch": 0.7582842041065257,
13062
+ "grad_norm": 0.08910951763391495,
13063
+ "learning_rate": 0.00012432322409932834,
13064
+ "loss": 1.002,
13065
+ "step": 1865
13066
+ },
13067
+ {
13068
+ "epoch": 0.7586907908111404,
13069
+ "grad_norm": 0.08909524232149124,
13070
+ "learning_rate": 0.0001242825157744759,
13071
+ "loss": 0.9027,
13072
+ "step": 1866
13073
+ },
13074
+ {
13075
+ "epoch": 0.7590973775157552,
13076
+ "grad_norm": 0.09639742970466614,
13077
+ "learning_rate": 0.00012424180744962347,
13078
+ "loss": 1.1356,
13079
+ "step": 1867
13080
+ },
13081
+ {
13082
+ "epoch": 0.75950396422037,
13083
+ "grad_norm": 0.08606995642185211,
13084
+ "learning_rate": 0.00012420109912477103,
13085
+ "loss": 0.8974,
13086
+ "step": 1868
13087
+ },
13088
+ {
13089
+ "epoch": 0.7599105509249847,
13090
+ "grad_norm": 0.09715355932712555,
13091
+ "learning_rate": 0.00012416039079991858,
13092
+ "loss": 1.078,
13093
+ "step": 1869
13094
+ },
13095
+ {
13096
+ "epoch": 0.7603171376295995,
13097
+ "grad_norm": 0.08933407068252563,
13098
+ "learning_rate": 0.00012411968247506616,
13099
+ "loss": 0.9177,
13100
+ "step": 1870
13101
+ },
13102
+ {
13103
+ "epoch": 0.7607237243342143,
13104
+ "grad_norm": 0.0859113335609436,
13105
+ "learning_rate": 0.00012407897415021372,
13106
+ "loss": 0.9703,
13107
+ "step": 1871
13108
+ },
13109
+ {
13110
+ "epoch": 0.761130311038829,
13111
+ "grad_norm": 0.09086931496858597,
13112
+ "learning_rate": 0.0001240382658253613,
13113
+ "loss": 1.0298,
13114
+ "step": 1872
13115
+ },
13116
+ {
13117
+ "epoch": 0.7615368977434438,
13118
+ "grad_norm": 0.09112663567066193,
13119
+ "learning_rate": 0.00012399755750050885,
13120
+ "loss": 0.9918,
13121
+ "step": 1873
13122
+ },
13123
+ {
13124
+ "epoch": 0.7619434844480586,
13125
+ "grad_norm": 0.09044841676950455,
13126
+ "learning_rate": 0.00012395684917565643,
13127
+ "loss": 0.9469,
13128
+ "step": 1874
13129
+ },
13130
+ {
13131
+ "epoch": 0.7623500711526733,
13132
+ "grad_norm": 0.08345028758049011,
13133
+ "learning_rate": 0.00012391614085080399,
13134
+ "loss": 0.879,
13135
+ "step": 1875
13136
+ },
13137
+ {
13138
+ "epoch": 0.7627566578572881,
13139
+ "grad_norm": 0.10249708592891693,
13140
+ "learning_rate": 0.00012387543252595157,
13141
+ "loss": 1.0247,
13142
+ "step": 1876
13143
+ },
13144
+ {
13145
+ "epoch": 0.7631632445619029,
13146
+ "grad_norm": 0.0914909839630127,
13147
+ "learning_rate": 0.00012383472420109915,
13148
+ "loss": 0.9341,
13149
+ "step": 1877
13150
+ },
13151
+ {
13152
+ "epoch": 0.7635698312665176,
13153
+ "grad_norm": 0.08616846054792404,
13154
+ "learning_rate": 0.0001237940158762467,
13155
+ "loss": 0.918,
13156
+ "step": 1878
13157
+ },
13158
+ {
13159
+ "epoch": 0.7639764179711324,
13160
+ "grad_norm": 0.0853181779384613,
13161
+ "learning_rate": 0.00012375330755139428,
13162
+ "loss": 0.8903,
13163
+ "step": 1879
13164
+ },
13165
+ {
13166
+ "epoch": 0.7643830046757472,
13167
+ "grad_norm": 0.0943385511636734,
13168
+ "learning_rate": 0.00012371259922654184,
13169
+ "loss": 1.0437,
13170
+ "step": 1880
13171
+ },
13172
+ {
13173
+ "epoch": 0.7647895913803618,
13174
+ "grad_norm": 0.08487629890441895,
13175
+ "learning_rate": 0.0001236718909016894,
13176
+ "loss": 0.9655,
13177
+ "step": 1881
13178
+ },
13179
+ {
13180
+ "epoch": 0.7651961780849766,
13181
+ "grad_norm": 0.09635015577077866,
13182
+ "learning_rate": 0.00012363118257683697,
13183
+ "loss": 1.0047,
13184
+ "step": 1882
13185
+ },
13186
+ {
13187
+ "epoch": 0.7656027647895913,
13188
+ "grad_norm": 0.09787151217460632,
13189
+ "learning_rate": 0.00012359047425198453,
13190
+ "loss": 1.1058,
13191
+ "step": 1883
13192
+ },
13193
+ {
13194
+ "epoch": 0.7660093514942061,
13195
+ "grad_norm": 0.10217342525720596,
13196
+ "learning_rate": 0.0001235497659271321,
13197
+ "loss": 1.1407,
13198
+ "step": 1884
13199
+ },
13200
+ {
13201
+ "epoch": 0.7664159381988209,
13202
+ "grad_norm": 0.08770392835140228,
13203
+ "learning_rate": 0.00012350905760227966,
13204
+ "loss": 0.8851,
13205
+ "step": 1885
13206
+ },
13207
+ {
13208
+ "epoch": 0.7668225249034356,
13209
+ "grad_norm": 0.08978156745433807,
13210
+ "learning_rate": 0.00012346834927742724,
13211
+ "loss": 1.0138,
13212
+ "step": 1886
13213
+ },
13214
+ {
13215
+ "epoch": 0.7672291116080504,
13216
+ "grad_norm": 0.09110313653945923,
13217
+ "learning_rate": 0.0001234276409525748,
13218
+ "loss": 0.8872,
13219
+ "step": 1887
13220
+ },
13221
+ {
13222
+ "epoch": 0.7676356983126652,
13223
+ "grad_norm": 0.0905870720744133,
13224
+ "learning_rate": 0.00012338693262772238,
13225
+ "loss": 0.9819,
13226
+ "step": 1888
13227
+ },
13228
+ {
13229
+ "epoch": 0.7680422850172799,
13230
+ "grad_norm": 0.09418340027332306,
13231
+ "learning_rate": 0.00012334622430286996,
13232
+ "loss": 1.0486,
13233
+ "step": 1889
13234
+ },
13235
+ {
13236
+ "epoch": 0.7684488717218947,
13237
+ "grad_norm": 0.09140585362911224,
13238
+ "learning_rate": 0.00012330551597801751,
13239
+ "loss": 0.9463,
13240
+ "step": 1890
13241
+ },
13242
+ {
13243
+ "epoch": 0.7688554584265095,
13244
+ "grad_norm": 0.08720141649246216,
13245
+ "learning_rate": 0.0001232648076531651,
13246
+ "loss": 0.9833,
13247
+ "step": 1891
13248
+ },
13249
+ {
13250
+ "epoch": 0.7692620451311242,
13251
+ "grad_norm": 0.09206419438123703,
13252
+ "learning_rate": 0.00012322409932831265,
13253
+ "loss": 0.9554,
13254
+ "step": 1892
13255
+ },
13256
+ {
13257
+ "epoch": 0.769668631835739,
13258
+ "grad_norm": 0.09324870258569717,
13259
+ "learning_rate": 0.0001231833910034602,
13260
+ "loss": 1.0703,
13261
+ "step": 1893
13262
+ },
13263
+ {
13264
+ "epoch": 0.7700752185403538,
13265
+ "grad_norm": 0.0868481770157814,
13266
+ "learning_rate": 0.00012314268267860778,
13267
+ "loss": 0.9374,
13268
+ "step": 1894
13269
+ },
13270
+ {
13271
+ "epoch": 0.7704818052449685,
13272
+ "grad_norm": 0.0907289981842041,
13273
+ "learning_rate": 0.00012310197435375534,
13274
+ "loss": 1.0148,
13275
+ "step": 1895
13276
+ },
13277
+ {
13278
+ "epoch": 0.7708883919495833,
13279
+ "grad_norm": 0.09804967790842056,
13280
+ "learning_rate": 0.00012306126602890292,
13281
+ "loss": 1.0541,
13282
+ "step": 1896
13283
+ },
13284
+ {
13285
+ "epoch": 0.771294978654198,
13286
+ "grad_norm": 0.09168083965778351,
13287
+ "learning_rate": 0.00012302055770405047,
13288
+ "loss": 0.9363,
13289
+ "step": 1897
13290
+ },
13291
+ {
13292
+ "epoch": 0.7717015653588127,
13293
+ "grad_norm": 0.09078045189380646,
13294
+ "learning_rate": 0.00012297984937919805,
13295
+ "loss": 1.0683,
13296
+ "step": 1898
13297
+ },
13298
+ {
13299
+ "epoch": 0.7721081520634275,
13300
+ "grad_norm": 0.08930620551109314,
13301
+ "learning_rate": 0.00012293914105434564,
13302
+ "loss": 0.9659,
13303
+ "step": 1899
13304
+ },
13305
+ {
13306
+ "epoch": 0.7725147387680423,
13307
+ "grad_norm": 0.09990911930799484,
13308
+ "learning_rate": 0.0001228984327294932,
13309
+ "loss": 1.1301,
13310
+ "step": 1900
13311
  }
13312
  ],
13313
  "logging_steps": 1,
 
13327
  "attributes": {}
13328
  }
13329
  },
13330
+ "total_flos": 6.085340035177267e+18,
13331
  "train_batch_size": 16,
13332
  "trial_name": null,
13333
  "trial_params": null