qingy2024 commited on
Commit
d756067
·
verified ·
1 Parent(s): 8a114ca

Upload checkpoint 2500

Browse files
Files changed (7) hide show
  1. README.md +4 -4
  2. adapter_config.json +1 -1
  3. loss.png +2 -2
  4. optimizer.pt +1 -1
  5. rng_state.pth +1 -1
  6. scheduler.pt +1 -1
  7. trainer_state.json +703 -3
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
- # Gradience T1 7B (Step 2400 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
- <div style="height: 30px; width: 48.80%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
- 48.8%
20
  </div>
21
  </div>
22
- <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 2400 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
 
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
+ # Gradience T1 7B (Step 2500 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
 
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
+ <div style="height: 30px; width: 50.83%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
+ 50.8%
20
  </div>
21
  </div>
22
+ <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 2500 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
loss.png CHANGED

Git LFS Details

  • SHA256: 36f04eea377c705909f52258cc7b83ef5aab4a425c68a91194dbc3f69e0d5748
  • Pointer size: 131 Bytes
  • Size of remote file: 121 kB

Git LFS Details

  • SHA256: 03fca3f5d439c68fe899a75db58a5be74726de30dcafe1a09f3f966d4d649566
  • Pointer size: 131 Bytes
  • Size of remote file: 120 kB
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:513c1fd3be34fa7e0c978965231189365ad72db01f79b6ab64348ee71938bf51
3
  size 82461044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ec5d1bb42b806432b0649c962fdaffdecdc557c94c8c01e010c06d455e651d3
3
  size 82461044
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:608fccb6c056ce88cdfd5355e6be2046f4d107a24a87c6b0d2c3b200ce6bb4ea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ae31f3bd6abd5e088309ad57fa2e995bc6dd61c02221bc158a3d63e6ad1f06
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e10a3584c6ffbdce35c888630a41e0ce4967d2a9c9028a7ea9acf61c5078add
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f30623027facf9b238397ebc799b819df1f5ae1bf4da593c1b0199dcd6b102f
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9758080910754219,
6
  "eval_steps": 500,
7
- "global_step": 2400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -16808,6 +16808,706 @@
16808
  "learning_rate": 0.00010254427030327702,
16809
  "loss": 0.8787,
16810
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16811
  }
16812
  ],
16813
  "logging_steps": 1,
@@ -16827,7 +17527,7 @@
16827
  "attributes": {}
16828
  }
16829
  },
16830
- "total_flos": 7.673519083047272e+18,
16831
  "train_batch_size": 16,
16832
  "trial_name": null,
16833
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0162634681845903,
6
  "eval_steps": 500,
7
+ "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
16808
  "learning_rate": 0.00010254427030327702,
16809
  "loss": 0.8787,
16810
  "step": 2400
16811
+ },
16812
+ {
16813
+ "epoch": 0.9762146777800366,
16814
+ "grad_norm": 0.09117105603218079,
16815
+ "learning_rate": 0.00010250356197842459,
16816
+ "loss": 0.9053,
16817
+ "step": 2401
16818
+ },
16819
+ {
16820
+ "epoch": 0.9766212644846514,
16821
+ "grad_norm": 0.09840644896030426,
16822
+ "learning_rate": 0.00010246285365357216,
16823
+ "loss": 1.0462,
16824
+ "step": 2402
16825
+ },
16826
+ {
16827
+ "epoch": 0.9770278511892662,
16828
+ "grad_norm": 0.09379451721906662,
16829
+ "learning_rate": 0.00010242214532871972,
16830
+ "loss": 0.9617,
16831
+ "step": 2403
16832
+ },
16833
+ {
16834
+ "epoch": 0.9774344378938808,
16835
+ "grad_norm": 0.09142056852579117,
16836
+ "learning_rate": 0.00010238143700386729,
16837
+ "loss": 1.0022,
16838
+ "step": 2404
16839
+ },
16840
+ {
16841
+ "epoch": 0.9778410245984956,
16842
+ "grad_norm": 0.09325367957353592,
16843
+ "learning_rate": 0.00010234072867901487,
16844
+ "loss": 0.9356,
16845
+ "step": 2405
16846
+ },
16847
+ {
16848
+ "epoch": 0.9782476113031104,
16849
+ "grad_norm": 0.09714538604021072,
16850
+ "learning_rate": 0.00010230002035416244,
16851
+ "loss": 1.0685,
16852
+ "step": 2406
16853
+ },
16854
+ {
16855
+ "epoch": 0.9786541980077251,
16856
+ "grad_norm": 0.09502388536930084,
16857
+ "learning_rate": 0.00010225931202931001,
16858
+ "loss": 1.0158,
16859
+ "step": 2407
16860
+ },
16861
+ {
16862
+ "epoch": 0.9790607847123399,
16863
+ "grad_norm": 0.09626177698373795,
16864
+ "learning_rate": 0.00010221860370445758,
16865
+ "loss": 1.0249,
16866
+ "step": 2408
16867
+ },
16868
+ {
16869
+ "epoch": 0.9794673714169546,
16870
+ "grad_norm": 0.09790710359811783,
16871
+ "learning_rate": 0.00010217789537960514,
16872
+ "loss": 0.9974,
16873
+ "step": 2409
16874
+ },
16875
+ {
16876
+ "epoch": 0.9798739581215694,
16877
+ "grad_norm": 0.0907469391822815,
16878
+ "learning_rate": 0.0001021371870547527,
16879
+ "loss": 0.994,
16880
+ "step": 2410
16881
+ },
16882
+ {
16883
+ "epoch": 0.9802805448261842,
16884
+ "grad_norm": 0.10248905420303345,
16885
+ "learning_rate": 0.00010209647872990026,
16886
+ "loss": 1.0214,
16887
+ "step": 2411
16888
+ },
16889
+ {
16890
+ "epoch": 0.9806871315307989,
16891
+ "grad_norm": 0.09504317492246628,
16892
+ "learning_rate": 0.00010205577040504783,
16893
+ "loss": 1.0642,
16894
+ "step": 2412
16895
+ },
16896
+ {
16897
+ "epoch": 0.9810937182354137,
16898
+ "grad_norm": 0.09868543595075607,
16899
+ "learning_rate": 0.0001020150620801954,
16900
+ "loss": 1.0595,
16901
+ "step": 2413
16902
+ },
16903
+ {
16904
+ "epoch": 0.9815003049400285,
16905
+ "grad_norm": 0.08648547530174255,
16906
+ "learning_rate": 0.00010197435375534297,
16907
+ "loss": 0.9273,
16908
+ "step": 2414
16909
+ },
16910
+ {
16911
+ "epoch": 0.9819068916446432,
16912
+ "grad_norm": 0.0870203897356987,
16913
+ "learning_rate": 0.00010193364543049054,
16914
+ "loss": 0.8661,
16915
+ "step": 2415
16916
+ },
16917
+ {
16918
+ "epoch": 0.982313478349258,
16919
+ "grad_norm": 0.09689280390739441,
16920
+ "learning_rate": 0.0001018929371056381,
16921
+ "loss": 1.0179,
16922
+ "step": 2416
16923
+ },
16924
+ {
16925
+ "epoch": 0.9827200650538728,
16926
+ "grad_norm": 0.09497373551130295,
16927
+ "learning_rate": 0.00010185222878078568,
16928
+ "loss": 0.9292,
16929
+ "step": 2417
16930
+ },
16931
+ {
16932
+ "epoch": 0.9831266517584875,
16933
+ "grad_norm": 0.09194166213274002,
16934
+ "learning_rate": 0.00010181152045593325,
16935
+ "loss": 0.969,
16936
+ "step": 2418
16937
+ },
16938
+ {
16939
+ "epoch": 0.9835332384631023,
16940
+ "grad_norm": 0.08828569948673248,
16941
+ "learning_rate": 0.00010177081213108082,
16942
+ "loss": 0.8936,
16943
+ "step": 2419
16944
+ },
16945
+ {
16946
+ "epoch": 0.983939825167717,
16947
+ "grad_norm": 0.095185786485672,
16948
+ "learning_rate": 0.00010173010380622839,
16949
+ "loss": 0.9859,
16950
+ "step": 2420
16951
+ },
16952
+ {
16953
+ "epoch": 0.9843464118723317,
16954
+ "grad_norm": 0.09699594974517822,
16955
+ "learning_rate": 0.00010168939548137595,
16956
+ "loss": 1.0568,
16957
+ "step": 2421
16958
+ },
16959
+ {
16960
+ "epoch": 0.9847529985769465,
16961
+ "grad_norm": 0.09333425760269165,
16962
+ "learning_rate": 0.00010164868715652351,
16963
+ "loss": 0.9503,
16964
+ "step": 2422
16965
+ },
16966
+ {
16967
+ "epoch": 0.9851595852815613,
16968
+ "grad_norm": 0.0883539542555809,
16969
+ "learning_rate": 0.00010160797883167108,
16970
+ "loss": 0.9711,
16971
+ "step": 2423
16972
+ },
16973
+ {
16974
+ "epoch": 0.985566171986176,
16975
+ "grad_norm": 0.09544458985328674,
16976
+ "learning_rate": 0.00010156727050681864,
16977
+ "loss": 0.8668,
16978
+ "step": 2424
16979
+ },
16980
+ {
16981
+ "epoch": 0.9859727586907908,
16982
+ "grad_norm": 0.0979728177189827,
16983
+ "learning_rate": 0.00010152656218196621,
16984
+ "loss": 1.0685,
16985
+ "step": 2425
16986
+ },
16987
+ {
16988
+ "epoch": 0.9863793453954056,
16989
+ "grad_norm": 0.08907411992549896,
16990
+ "learning_rate": 0.00010148585385711378,
16991
+ "loss": 0.8947,
16992
+ "step": 2426
16993
+ },
16994
+ {
16995
+ "epoch": 0.9867859321000203,
16996
+ "grad_norm": 0.09532100707292557,
16997
+ "learning_rate": 0.00010144514553226135,
16998
+ "loss": 1.0793,
16999
+ "step": 2427
17000
+ },
17001
+ {
17002
+ "epoch": 0.9871925188046351,
17003
+ "grad_norm": 0.0916009321808815,
17004
+ "learning_rate": 0.00010140443720740893,
17005
+ "loss": 0.9604,
17006
+ "step": 2428
17007
+ },
17008
+ {
17009
+ "epoch": 0.9875991055092499,
17010
+ "grad_norm": 0.0960593968629837,
17011
+ "learning_rate": 0.0001013637288825565,
17012
+ "loss": 1.0012,
17013
+ "step": 2429
17014
+ },
17015
+ {
17016
+ "epoch": 0.9880056922138646,
17017
+ "grad_norm": 0.0948946550488472,
17018
+ "learning_rate": 0.00010132302055770406,
17019
+ "loss": 0.9555,
17020
+ "step": 2430
17021
+ },
17022
+ {
17023
+ "epoch": 0.9884122789184794,
17024
+ "grad_norm": 0.08670156449079514,
17025
+ "learning_rate": 0.00010128231223285163,
17026
+ "loss": 0.8863,
17027
+ "step": 2431
17028
+ },
17029
+ {
17030
+ "epoch": 0.9888188656230942,
17031
+ "grad_norm": 0.0870981365442276,
17032
+ "learning_rate": 0.0001012416039079992,
17033
+ "loss": 0.949,
17034
+ "step": 2432
17035
+ },
17036
+ {
17037
+ "epoch": 0.9892254523277089,
17038
+ "grad_norm": 0.09065506607294083,
17039
+ "learning_rate": 0.00010120089558314677,
17040
+ "loss": 1.0791,
17041
+ "step": 2433
17042
+ },
17043
+ {
17044
+ "epoch": 0.9896320390323237,
17045
+ "grad_norm": 0.08753534406423569,
17046
+ "learning_rate": 0.00010116018725829432,
17047
+ "loss": 0.8656,
17048
+ "step": 2434
17049
+ },
17050
+ {
17051
+ "epoch": 0.9900386257369383,
17052
+ "grad_norm": 0.08939878642559052,
17053
+ "learning_rate": 0.00010111947893344189,
17054
+ "loss": 0.8983,
17055
+ "step": 2435
17056
+ },
17057
+ {
17058
+ "epoch": 0.9904452124415531,
17059
+ "grad_norm": 0.09110575914382935,
17060
+ "learning_rate": 0.00010107877060858946,
17061
+ "loss": 0.8971,
17062
+ "step": 2436
17063
+ },
17064
+ {
17065
+ "epoch": 0.9908517991461679,
17066
+ "grad_norm": 0.08614566922187805,
17067
+ "learning_rate": 0.00010103806228373702,
17068
+ "loss": 0.9746,
17069
+ "step": 2437
17070
+ },
17071
+ {
17072
+ "epoch": 0.9912583858507826,
17073
+ "grad_norm": 0.09685923904180527,
17074
+ "learning_rate": 0.00010099735395888459,
17075
+ "loss": 0.9638,
17076
+ "step": 2438
17077
+ },
17078
+ {
17079
+ "epoch": 0.9916649725553974,
17080
+ "grad_norm": 0.10014784336090088,
17081
+ "learning_rate": 0.00010095664563403216,
17082
+ "loss": 1.0335,
17083
+ "step": 2439
17084
+ },
17085
+ {
17086
+ "epoch": 0.9920715592600122,
17087
+ "grad_norm": 0.09917939454317093,
17088
+ "learning_rate": 0.00010091593730917974,
17089
+ "loss": 1.0288,
17090
+ "step": 2440
17091
+ },
17092
+ {
17093
+ "epoch": 0.9924781459646269,
17094
+ "grad_norm": 0.09158805757761002,
17095
+ "learning_rate": 0.00010087522898432731,
17096
+ "loss": 0.9372,
17097
+ "step": 2441
17098
+ },
17099
+ {
17100
+ "epoch": 0.9928847326692417,
17101
+ "grad_norm": 0.09151756763458252,
17102
+ "learning_rate": 0.00010083452065947488,
17103
+ "loss": 1.0042,
17104
+ "step": 2442
17105
+ },
17106
+ {
17107
+ "epoch": 0.9932913193738565,
17108
+ "grad_norm": 0.09201864898204803,
17109
+ "learning_rate": 0.00010079381233462244,
17110
+ "loss": 0.937,
17111
+ "step": 2443
17112
+ },
17113
+ {
17114
+ "epoch": 0.9936979060784712,
17115
+ "grad_norm": 0.10031972825527191,
17116
+ "learning_rate": 0.00010075310400977001,
17117
+ "loss": 0.989,
17118
+ "step": 2444
17119
+ },
17120
+ {
17121
+ "epoch": 0.994104492783086,
17122
+ "grad_norm": 0.09593512862920761,
17123
+ "learning_rate": 0.00010071239568491756,
17124
+ "loss": 0.9259,
17125
+ "step": 2445
17126
+ },
17127
+ {
17128
+ "epoch": 0.9945110794877008,
17129
+ "grad_norm": 0.10088519006967545,
17130
+ "learning_rate": 0.00010067168736006513,
17131
+ "loss": 1.0888,
17132
+ "step": 2446
17133
+ },
17134
+ {
17135
+ "epoch": 0.9949176661923155,
17136
+ "grad_norm": 0.09052947163581848,
17137
+ "learning_rate": 0.0001006309790352127,
17138
+ "loss": 0.9643,
17139
+ "step": 2447
17140
+ },
17141
+ {
17142
+ "epoch": 0.9953242528969303,
17143
+ "grad_norm": 0.0943833664059639,
17144
+ "learning_rate": 0.00010059027071036027,
17145
+ "loss": 1.0308,
17146
+ "step": 2448
17147
+ },
17148
+ {
17149
+ "epoch": 0.9957308396015451,
17150
+ "grad_norm": 0.0929458737373352,
17151
+ "learning_rate": 0.00010054956238550783,
17152
+ "loss": 0.8993,
17153
+ "step": 2449
17154
+ },
17155
+ {
17156
+ "epoch": 0.9961374263061598,
17157
+ "grad_norm": 0.09643827378749847,
17158
+ "learning_rate": 0.0001005088540606554,
17159
+ "loss": 0.9708,
17160
+ "step": 2450
17161
+ },
17162
+ {
17163
+ "epoch": 0.9965440130107746,
17164
+ "grad_norm": 0.08925779908895493,
17165
+ "learning_rate": 0.00010046814573580298,
17166
+ "loss": 0.9209,
17167
+ "step": 2451
17168
+ },
17169
+ {
17170
+ "epoch": 0.9969505997153894,
17171
+ "grad_norm": 0.08630047738552094,
17172
+ "learning_rate": 0.00010042743741095055,
17173
+ "loss": 0.9324,
17174
+ "step": 2452
17175
+ },
17176
+ {
17177
+ "epoch": 0.997357186420004,
17178
+ "grad_norm": 0.10127938538789749,
17179
+ "learning_rate": 0.00010038672908609812,
17180
+ "loss": 0.9926,
17181
+ "step": 2453
17182
+ },
17183
+ {
17184
+ "epoch": 0.9977637731246188,
17185
+ "grad_norm": 0.09573110938072205,
17186
+ "learning_rate": 0.00010034602076124569,
17187
+ "loss": 0.9801,
17188
+ "step": 2454
17189
+ },
17190
+ {
17191
+ "epoch": 0.9981703598292336,
17192
+ "grad_norm": 0.0963260605931282,
17193
+ "learning_rate": 0.00010030531243639325,
17194
+ "loss": 0.98,
17195
+ "step": 2455
17196
+ },
17197
+ {
17198
+ "epoch": 0.9985769465338483,
17199
+ "grad_norm": 0.08414101600646973,
17200
+ "learning_rate": 0.00010026460411154082,
17201
+ "loss": 0.8676,
17202
+ "step": 2456
17203
+ },
17204
+ {
17205
+ "epoch": 0.9989835332384631,
17206
+ "grad_norm": 0.09320447593927383,
17207
+ "learning_rate": 0.00010022389578668838,
17208
+ "loss": 0.998,
17209
+ "step": 2457
17210
+ },
17211
+ {
17212
+ "epoch": 0.9993901199430779,
17213
+ "grad_norm": 0.09721797704696655,
17214
+ "learning_rate": 0.00010018318746183594,
17215
+ "loss": 1.0123,
17216
+ "step": 2458
17217
+ },
17218
+ {
17219
+ "epoch": 0.9997967066476926,
17220
+ "grad_norm": 0.08773447573184967,
17221
+ "learning_rate": 0.00010014247913698351,
17222
+ "loss": 0.9673,
17223
+ "step": 2459
17224
+ },
17225
+ {
17226
+ "epoch": 1.0,
17227
+ "grad_norm": 0.15718789398670197,
17228
+ "learning_rate": 0.00010010177081213108,
17229
+ "loss": 1.1286,
17230
+ "step": 2460
17231
+ },
17232
+ {
17233
+ "epoch": 1.0004065867046148,
17234
+ "grad_norm": 0.09029074758291245,
17235
+ "learning_rate": 0.00010006106248727865,
17236
+ "loss": 0.9905,
17237
+ "step": 2461
17238
+ },
17239
+ {
17240
+ "epoch": 1.0008131734092296,
17241
+ "grad_norm": 0.09984813630580902,
17242
+ "learning_rate": 0.00010002035416242621,
17243
+ "loss": 0.9981,
17244
+ "step": 2462
17245
+ },
17246
+ {
17247
+ "epoch": 1.0012197601138442,
17248
+ "grad_norm": 0.09808840602636337,
17249
+ "learning_rate": 9.997964583757378e-05,
17250
+ "loss": 1.0156,
17251
+ "step": 2463
17252
+ },
17253
+ {
17254
+ "epoch": 1.001626346818459,
17255
+ "grad_norm": 0.08917602896690369,
17256
+ "learning_rate": 9.993893751272135e-05,
17257
+ "loss": 0.944,
17258
+ "step": 2464
17259
+ },
17260
+ {
17261
+ "epoch": 1.0020329335230738,
17262
+ "grad_norm": 0.0943906158208847,
17263
+ "learning_rate": 9.989822918786892e-05,
17264
+ "loss": 0.9294,
17265
+ "step": 2465
17266
+ },
17267
+ {
17268
+ "epoch": 1.0024395202276886,
17269
+ "grad_norm": 0.09091315418481827,
17270
+ "learning_rate": 9.98575208630165e-05,
17271
+ "loss": 0.9707,
17272
+ "step": 2466
17273
+ },
17274
+ {
17275
+ "epoch": 1.0028461069323034,
17276
+ "grad_norm": 0.09035106003284454,
17277
+ "learning_rate": 9.981681253816407e-05,
17278
+ "loss": 0.9562,
17279
+ "step": 2467
17280
+ },
17281
+ {
17282
+ "epoch": 1.0032526936369182,
17283
+ "grad_norm": 0.09709779173135757,
17284
+ "learning_rate": 9.977610421331163e-05,
17285
+ "loss": 0.9287,
17286
+ "step": 2468
17287
+ },
17288
+ {
17289
+ "epoch": 1.0036592803415327,
17290
+ "grad_norm": 0.09063035994768143,
17291
+ "learning_rate": 9.973539588845919e-05,
17292
+ "loss": 0.9138,
17293
+ "step": 2469
17294
+ },
17295
+ {
17296
+ "epoch": 1.0040658670461475,
17297
+ "grad_norm": 0.09490003436803818,
17298
+ "learning_rate": 9.969468756360676e-05,
17299
+ "loss": 0.9475,
17300
+ "step": 2470
17301
+ },
17302
+ {
17303
+ "epoch": 1.0044724537507623,
17304
+ "grad_norm": 0.10134010761976242,
17305
+ "learning_rate": 9.965397923875432e-05,
17306
+ "loss": 1.0092,
17307
+ "step": 2471
17308
+ },
17309
+ {
17310
+ "epoch": 1.0048790404553771,
17311
+ "grad_norm": 0.09728873521089554,
17312
+ "learning_rate": 9.96132709139019e-05,
17313
+ "loss": 0.9498,
17314
+ "step": 2472
17315
+ },
17316
+ {
17317
+ "epoch": 1.005285627159992,
17318
+ "grad_norm": 0.09160648286342621,
17319
+ "learning_rate": 9.957256258904947e-05,
17320
+ "loss": 0.8707,
17321
+ "step": 2473
17322
+ },
17323
+ {
17324
+ "epoch": 1.0056922138646067,
17325
+ "grad_norm": 0.0939764603972435,
17326
+ "learning_rate": 9.953185426419704e-05,
17327
+ "loss": 0.9619,
17328
+ "step": 2474
17329
+ },
17330
+ {
17331
+ "epoch": 1.0060988005692213,
17332
+ "grad_norm": 0.08643637597560883,
17333
+ "learning_rate": 9.94911459393446e-05,
17334
+ "loss": 0.9377,
17335
+ "step": 2475
17336
+ },
17337
+ {
17338
+ "epoch": 1.006505387273836,
17339
+ "grad_norm": 0.09141729027032852,
17340
+ "learning_rate": 9.945043761449216e-05,
17341
+ "loss": 0.8859,
17342
+ "step": 2476
17343
+ },
17344
+ {
17345
+ "epoch": 1.006911973978451,
17346
+ "grad_norm": 0.09555509686470032,
17347
+ "learning_rate": 9.940972928963974e-05,
17348
+ "loss": 0.933,
17349
+ "step": 2477
17350
+ },
17351
+ {
17352
+ "epoch": 1.0073185606830657,
17353
+ "grad_norm": 0.0935022309422493,
17354
+ "learning_rate": 9.936902096478731e-05,
17355
+ "loss": 0.9368,
17356
+ "step": 2478
17357
+ },
17358
+ {
17359
+ "epoch": 1.0077251473876805,
17360
+ "grad_norm": 0.09959034621715546,
17361
+ "learning_rate": 9.932831263993488e-05,
17362
+ "loss": 0.974,
17363
+ "step": 2479
17364
+ },
17365
+ {
17366
+ "epoch": 1.0081317340922953,
17367
+ "grad_norm": 0.09246455878019333,
17368
+ "learning_rate": 9.928760431508245e-05,
17369
+ "loss": 0.9248,
17370
+ "step": 2480
17371
+ },
17372
+ {
17373
+ "epoch": 1.0085383207969099,
17374
+ "grad_norm": 0.10091500729322433,
17375
+ "learning_rate": 9.924689599023e-05,
17376
+ "loss": 1.122,
17377
+ "step": 2481
17378
+ },
17379
+ {
17380
+ "epoch": 1.0089449075015247,
17381
+ "grad_norm": 0.10083048790693283,
17382
+ "learning_rate": 9.920618766537757e-05,
17383
+ "loss": 1.0199,
17384
+ "step": 2482
17385
+ },
17386
+ {
17387
+ "epoch": 1.0093514942061395,
17388
+ "grad_norm": 0.09641805291175842,
17389
+ "learning_rate": 9.916547934052515e-05,
17390
+ "loss": 0.9971,
17391
+ "step": 2483
17392
+ },
17393
+ {
17394
+ "epoch": 1.0097580809107543,
17395
+ "grad_norm": 0.10362432897090912,
17396
+ "learning_rate": 9.912477101567272e-05,
17397
+ "loss": 0.9596,
17398
+ "step": 2484
17399
+ },
17400
+ {
17401
+ "epoch": 1.010164667615369,
17402
+ "grad_norm": 0.09050238877534866,
17403
+ "learning_rate": 9.908406269082028e-05,
17404
+ "loss": 0.9423,
17405
+ "step": 2485
17406
+ },
17407
+ {
17408
+ "epoch": 1.0105712543199838,
17409
+ "grad_norm": 0.10209590941667557,
17410
+ "learning_rate": 9.904335436596785e-05,
17411
+ "loss": 0.9366,
17412
+ "step": 2486
17413
+ },
17414
+ {
17415
+ "epoch": 1.0109778410245984,
17416
+ "grad_norm": 0.104631707072258,
17417
+ "learning_rate": 9.90026460411154e-05,
17418
+ "loss": 1.0476,
17419
+ "step": 2487
17420
+ },
17421
+ {
17422
+ "epoch": 1.0113844277292132,
17423
+ "grad_norm": 0.09572993963956833,
17424
+ "learning_rate": 9.896193771626297e-05,
17425
+ "loss": 1.0523,
17426
+ "step": 2488
17427
+ },
17428
+ {
17429
+ "epoch": 1.011791014433828,
17430
+ "grad_norm": 0.10640837252140045,
17431
+ "learning_rate": 9.892122939141055e-05,
17432
+ "loss": 1.1238,
17433
+ "step": 2489
17434
+ },
17435
+ {
17436
+ "epoch": 1.0121976011384428,
17437
+ "grad_norm": 0.09798834472894669,
17438
+ "learning_rate": 9.888052106655812e-05,
17439
+ "loss": 0.9597,
17440
+ "step": 2490
17441
+ },
17442
+ {
17443
+ "epoch": 1.0126041878430576,
17444
+ "grad_norm": 0.08913593739271164,
17445
+ "learning_rate": 9.883981274170569e-05,
17446
+ "loss": 0.9258,
17447
+ "step": 2491
17448
+ },
17449
+ {
17450
+ "epoch": 1.0130107745476722,
17451
+ "grad_norm": 0.09719277173280716,
17452
+ "learning_rate": 9.879910441685324e-05,
17453
+ "loss": 0.9812,
17454
+ "step": 2492
17455
+ },
17456
+ {
17457
+ "epoch": 1.013417361252287,
17458
+ "grad_norm": 0.09699688851833344,
17459
+ "learning_rate": 9.875839609200081e-05,
17460
+ "loss": 0.8946,
17461
+ "step": 2493
17462
+ },
17463
+ {
17464
+ "epoch": 1.0138239479569018,
17465
+ "grad_norm": 0.09061427414417267,
17466
+ "learning_rate": 9.871768776714838e-05,
17467
+ "loss": 0.9075,
17468
+ "step": 2494
17469
+ },
17470
+ {
17471
+ "epoch": 1.0142305346615166,
17472
+ "grad_norm": 0.08979996293783188,
17473
+ "learning_rate": 9.867697944229596e-05,
17474
+ "loss": 0.933,
17475
+ "step": 2495
17476
+ },
17477
+ {
17478
+ "epoch": 1.0146371213661314,
17479
+ "grad_norm": 0.09325064718723297,
17480
+ "learning_rate": 9.863627111744353e-05,
17481
+ "loss": 0.9604,
17482
+ "step": 2496
17483
+ },
17484
+ {
17485
+ "epoch": 1.0150437080707462,
17486
+ "grad_norm": 0.09821408241987228,
17487
+ "learning_rate": 9.85955627925911e-05,
17488
+ "loss": 1.0871,
17489
+ "step": 2497
17490
+ },
17491
+ {
17492
+ "epoch": 1.0154502947753608,
17493
+ "grad_norm": 0.09746625274419785,
17494
+ "learning_rate": 9.855485446773865e-05,
17495
+ "loss": 0.9304,
17496
+ "step": 2498
17497
+ },
17498
+ {
17499
+ "epoch": 1.0158568814799755,
17500
+ "grad_norm": 0.09508597105741501,
17501
+ "learning_rate": 9.851414614288622e-05,
17502
+ "loss": 0.9469,
17503
+ "step": 2499
17504
+ },
17505
+ {
17506
+ "epoch": 1.0162634681845903,
17507
+ "grad_norm": 0.10357919335365295,
17508
+ "learning_rate": 9.84734378180338e-05,
17509
+ "loss": 1.0272,
17510
+ "step": 2500
17511
  }
17512
  ],
17513
  "logging_steps": 1,
 
17527
  "attributes": {}
17528
  }
17529
  },
17530
+ "total_flos": 7.991705590388761e+18,
17531
  "train_batch_size": 16,
17532
  "trial_name": null,
17533
  "trial_params": null