Upload checkpoint 4900

Browse files

Files changed (6) hide show

README.md +4 -4
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
-# Gradience T1 7B (Step 4800 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 97.60%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    97.6%
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4800 out of 4918 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
+# Gradience T1 7B (Step 4900 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 99.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    99.6%
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4900 out of 4918 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: 7d266437e2769ac19c2011ecb7fac0ce7ea7a99ea7d451f09506c4fbc0c34953
Pointer size: 131 Bytes
Size of remote file: 110 kB

Git LFS Details

SHA256: 78e4b22d4a15c87c21a718df21fe716b05046588c081dda8953d79a6e74b8838
Pointer size: 131 Bytes
Size of remote file: 110 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cf990e15a39324fefa2e30504f0e989c9b410d09a507ee348a4f4a4ecbca336
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:8692dc55cf936e814593059ddc130c86529b75ce648f04fffb2c51d8a817cd80
 size 82461044

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e24417db835bb8bbc64e872826cd0197eb782d12e2a39e60d01a5ed5a07ba83
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfaa254ae737802dd7cd5e65a69fbb58067ebe2f88a794ce9ee8b1c2a69498b4
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9514128887985362,
   "eval_steps": 500,
-  "global_step": 4800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -33608,6 +33608,706 @@
       "learning_rate": 4.8442906574394464e-06,
       "loss": 0.746,
       "step": 4800
     }
   ],
   "logging_steps": 1,
@@ -33627,7 +34327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.5342752116840587e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9920715592600122,
   "eval_steps": 500,
+  "global_step": 4900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.8442906574394464e-06,
       "loss": 0.746,
       "step": 4800
+    },
+    {
+      "epoch": 1.951819475503151,
+      "grad_norm": 0.10632304102182388,
+      "learning_rate": 4.803582332587014e-06,
+      "loss": 0.9029,
+      "step": 4801
+    },
+    {
+      "epoch": 1.9522260622077658,
+      "grad_norm": 0.11486592143774033,
+      "learning_rate": 4.7628740077345826e-06,
+      "loss": 1.0341,
+      "step": 4802
+    },
+    {
+      "epoch": 1.9526326489123806,
+      "grad_norm": 0.10841212421655655,
+      "learning_rate": 4.722165682882149e-06,
+      "loss": 0.9374,
+      "step": 4803
+    },
+    {
+      "epoch": 1.9530392356169952,
+      "grad_norm": 0.11145360767841339,
+      "learning_rate": 4.681457358029717e-06,
+      "loss": 0.9147,
+      "step": 4804
+    },
+    {
+      "epoch": 1.9534458223216102,
+      "grad_norm": 0.11122753471136093,
+      "learning_rate": 4.640749033177285e-06,
+      "loss": 0.9332,
+      "step": 4805
+    },
+    {
+      "epoch": 1.9538524090262248,
+      "grad_norm": 0.10207870602607727,
+      "learning_rate": 4.600040708324853e-06,
+      "loss": 0.937,
+      "step": 4806
+    },
+    {
+      "epoch": 1.9542589957308396,
+      "grad_norm": 0.11454325169324875,
+      "learning_rate": 4.559332383472421e-06,
+      "loss": 1.0435,
+      "step": 4807
+    },
+    {
+      "epoch": 1.9546655824354544,
+      "grad_norm": 0.10648126155138016,
+      "learning_rate": 4.5186240586199875e-06,
+      "loss": 0.927,
+      "step": 4808
+    },
+    {
+      "epoch": 1.955072169140069,
+      "grad_norm": 0.10996894538402557,
+      "learning_rate": 4.477915733767556e-06,
+      "loss": 0.9693,
+      "step": 4809
+    },
+    {
+      "epoch": 1.955478755844684,
+      "grad_norm": 0.10057996213436127,
+      "learning_rate": 4.437207408915124e-06,
+      "loss": 0.9325,
+      "step": 4810
+    },
+    {
+      "epoch": 1.9558853425492986,
+      "grad_norm": 0.10628996044397354,
+      "learning_rate": 4.396499084062691e-06,
+      "loss": 0.891,
+      "step": 4811
+    },
+    {
+      "epoch": 1.9562919292539134,
+      "grad_norm": 0.10557537525892258,
+      "learning_rate": 4.355790759210258e-06,
+      "loss": 0.8736,
+      "step": 4812
+    },
+    {
+      "epoch": 1.9566985159585282,
+      "grad_norm": 0.10447331517934799,
+      "learning_rate": 4.3150824343578265e-06,
+      "loss": 0.9717,
+      "step": 4813
+    },
+    {
+      "epoch": 1.957105102663143,
+      "grad_norm": 0.10446681082248688,
+      "learning_rate": 4.274374109505394e-06,
+      "loss": 1.0103,
+      "step": 4814
+    },
+    {
+      "epoch": 1.9575116893677578,
+      "grad_norm": 0.10121920704841614,
+      "learning_rate": 4.233665784652962e-06,
+      "loss": 0.875,
+      "step": 4815
+    },
+    {
+      "epoch": 1.9579182760723723,
+      "grad_norm": 0.10913816094398499,
+      "learning_rate": 4.1929574598005294e-06,
+      "loss": 1.0491,
+      "step": 4816
+    },
+    {
+      "epoch": 1.9583248627769871,
+      "grad_norm": 0.11767001450061798,
+      "learning_rate": 4.152249134948097e-06,
+      "loss": 1.025,
+      "step": 4817
+    },
+    {
+      "epoch": 1.958731449481602,
+      "grad_norm": 0.10180991888046265,
+      "learning_rate": 4.111540810095665e-06,
+      "loss": 0.892,
+      "step": 4818
+    },
+    {
+      "epoch": 1.9591380361862167,
+      "grad_norm": 0.11216012388467789,
+      "learning_rate": 4.070832485243232e-06,
+      "loss": 0.9754,
+      "step": 4819
+    },
+    {
+      "epoch": 1.9595446228908315,
+      "grad_norm": 0.1098812147974968,
+      "learning_rate": 4.0301241603908e-06,
+      "loss": 0.9805,
+      "step": 4820
+    },
+    {
+      "epoch": 1.959951209595446,
+      "grad_norm": 0.10524158924818039,
+      "learning_rate": 3.989415835538368e-06,
+      "loss": 0.9045,
+      "step": 4821
+    },
+    {
+      "epoch": 1.9603577963000611,
+      "grad_norm": 0.09650178253650665,
+      "learning_rate": 3.948707510685935e-06,
+      "loss": 0.7913,
+      "step": 4822
+    },
+    {
+      "epoch": 1.9607643830046757,
+      "grad_norm": 0.11418919265270233,
+      "learning_rate": 3.907999185833503e-06,
+      "loss": 0.9991,
+      "step": 4823
+    },
+    {
+      "epoch": 1.9611709697092905,
+      "grad_norm": 0.11137097328901291,
+      "learning_rate": 3.867290860981071e-06,
+      "loss": 0.978,
+      "step": 4824
+    },
+    {
+      "epoch": 1.9615775564139053,
+      "grad_norm": 0.1029028594493866,
+      "learning_rate": 3.826582536128639e-06,
+      "loss": 0.8791,
+      "step": 4825
+    },
+    {
+      "epoch": 1.9619841431185199,
+      "grad_norm": 0.10152295976877213,
+      "learning_rate": 3.7858742112762058e-06,
+      "loss": 0.8855,
+      "step": 4826
+    },
+    {
+      "epoch": 1.962390729823135,
+      "grad_norm": 0.11157593131065369,
+      "learning_rate": 3.745165886423774e-06,
+      "loss": 1.0097,
+      "step": 4827
+    },
+    {
+      "epoch": 1.9627973165277495,
+      "grad_norm": 0.10975543409585953,
+      "learning_rate": 3.7044575615713415e-06,
+      "loss": 1.0269,
+      "step": 4828
+    },
+    {
+      "epoch": 1.9632039032323643,
+      "grad_norm": 0.10318556427955627,
+      "learning_rate": 3.6637492367189095e-06,
+      "loss": 0.9094,
+      "step": 4829
+    },
+    {
+      "epoch": 1.963610489936979,
+      "grad_norm": 0.09540821611881256,
+      "learning_rate": 3.6230409118664767e-06,
+      "loss": 0.7923,
+      "step": 4830
+    },
+    {
+      "epoch": 1.9640170766415939,
+      "grad_norm": 0.11185004562139511,
+      "learning_rate": 3.5823325870140444e-06,
+      "loss": 0.9945,
+      "step": 4831
+    },
+    {
+      "epoch": 1.9644236633462087,
+      "grad_norm": 0.1030164510011673,
+      "learning_rate": 3.541624262161612e-06,
+      "loss": 0.8952,
+      "step": 4832
+    },
+    {
+      "epoch": 1.9648302500508232,
+      "grad_norm": 0.10606315732002258,
+      "learning_rate": 3.50091593730918e-06,
+      "loss": 0.8872,
+      "step": 4833
+    },
+    {
+      "epoch": 1.9652368367554383,
+      "grad_norm": 0.10676340013742447,
+      "learning_rate": 3.4602076124567477e-06,
+      "loss": 0.9616,
+      "step": 4834
+    },
+    {
+      "epoch": 1.9656434234600528,
+      "grad_norm": 0.11374758929014206,
+      "learning_rate": 3.419499287604315e-06,
+      "loss": 1.0619,
+      "step": 4835
+    },
+    {
+      "epoch": 1.9660500101646676,
+      "grad_norm": 0.10142536461353302,
+      "learning_rate": 3.378790962751883e-06,
+      "loss": 0.8787,
+      "step": 4836
+    },
+    {
+      "epoch": 1.9664565968692824,
+      "grad_norm": 0.1088085025548935,
+      "learning_rate": 3.3380826378994506e-06,
+      "loss": 1.0706,
+      "step": 4837
+    },
+    {
+      "epoch": 1.966863183573897,
+      "grad_norm": 0.11617989093065262,
+      "learning_rate": 3.2973743130470187e-06,
+      "loss": 1.0758,
+      "step": 4838
+    },
+    {
+      "epoch": 1.967269770278512,
+      "grad_norm": 0.10999471694231033,
+      "learning_rate": 3.2566659881945863e-06,
+      "loss": 0.8955,
+      "step": 4839
+    },
+    {
+      "epoch": 1.9676763569831266,
+      "grad_norm": 0.10413683950901031,
+      "learning_rate": 3.2159576633421535e-06,
+      "loss": 0.8774,
+      "step": 4840
+    },
+    {
+      "epoch": 1.9680829436877414,
+      "grad_norm": 0.10912149399518967,
+      "learning_rate": 3.175249338489721e-06,
+      "loss": 0.9151,
+      "step": 4841
+    },
+    {
+      "epoch": 1.9684895303923562,
+      "grad_norm": 0.10065335780382156,
+      "learning_rate": 3.134541013637289e-06,
+      "loss": 0.8947,
+      "step": 4842
+    },
+    {
+      "epoch": 1.9688961170969708,
+      "grad_norm": 0.10842598974704742,
+      "learning_rate": 3.0938326887848564e-06,
+      "loss": 0.9149,
+      "step": 4843
+    },
+    {
+      "epoch": 1.9693027038015858,
+      "grad_norm": 0.09546621143817902,
+      "learning_rate": 3.0531243639324245e-06,
+      "loss": 0.8106,
+      "step": 4844
+    },
+    {
+      "epoch": 1.9697092905062004,
+      "grad_norm": 0.10605739057064056,
+      "learning_rate": 3.0124160390799917e-06,
+      "loss": 0.8663,
+      "step": 4845
+    },
+    {
+      "epoch": 1.9701158772108152,
+      "grad_norm": 0.11531540751457214,
+      "learning_rate": 2.9717077142275597e-06,
+      "loss": 0.9487,
+      "step": 4846
+    },
+    {
+      "epoch": 1.97052246391543,
+      "grad_norm": 0.112498939037323,
+      "learning_rate": 2.9309993893751274e-06,
+      "loss": 0.9846,
+      "step": 4847
+    },
+    {
+      "epoch": 1.9709290506200448,
+      "grad_norm": 0.10680878907442093,
+      "learning_rate": 2.890291064522695e-06,
+      "loss": 0.9092,
+      "step": 4848
+    },
+    {
+      "epoch": 1.9713356373246596,
+      "grad_norm": 0.11008645594120026,
+      "learning_rate": 2.8495827396702626e-06,
+      "loss": 0.918,
+      "step": 4849
+    },
+    {
+      "epoch": 1.9717422240292741,
+      "grad_norm": 0.1180918887257576,
+      "learning_rate": 2.8088744148178303e-06,
+      "loss": 1.1026,
+      "step": 4850
+    },
+    {
+      "epoch": 1.9721488107338891,
+      "grad_norm": 0.10788023471832275,
+      "learning_rate": 2.7681660899653983e-06,
+      "loss": 0.9422,
+      "step": 4851
+    },
+    {
+      "epoch": 1.9725553974385037,
+      "grad_norm": 0.11532583087682724,
+      "learning_rate": 2.7274577651129655e-06,
+      "loss": 0.9619,
+      "step": 4852
+    },
+    {
+      "epoch": 1.9729619841431185,
+      "grad_norm": 0.1164373904466629,
+      "learning_rate": 2.6867494402605336e-06,
+      "loss": 1.0735,
+      "step": 4853
+    },
+    {
+      "epoch": 1.9733685708477333,
+      "grad_norm": 0.10352805256843567,
+      "learning_rate": 2.646041115408101e-06,
+      "loss": 0.9302,
+      "step": 4854
+    },
+    {
+      "epoch": 1.973775157552348,
+      "grad_norm": 0.09697481989860535,
+      "learning_rate": 2.605332790555669e-06,
+      "loss": 0.8169,
+      "step": 4855
+    },
+    {
+      "epoch": 1.974181744256963,
+      "grad_norm": 0.10641641169786453,
+      "learning_rate": 2.5646244657032365e-06,
+      "loss": 0.9379,
+      "step": 4856
+    },
+    {
+      "epoch": 1.9745883309615775,
+      "grad_norm": 0.12247955799102783,
+      "learning_rate": 2.523916140850804e-06,
+      "loss": 1.1005,
+      "step": 4857
+    },
+    {
+      "epoch": 1.9749949176661923,
+      "grad_norm": 0.11470235139131546,
+      "learning_rate": 2.4832078159983718e-06,
+      "loss": 1.0682,
+      "step": 4858
+    },
+    {
+      "epoch": 1.975401504370807,
+      "grad_norm": 0.10415980964899063,
+      "learning_rate": 2.4424994911459394e-06,
+      "loss": 0.9184,
+      "step": 4859
+    },
+    {
+      "epoch": 1.9758080910754219,
+      "grad_norm": 0.10580716282129288,
+      "learning_rate": 2.401791166293507e-06,
+      "loss": 0.9137,
+      "step": 4860
+    },
+    {
+      "epoch": 1.9762146777800367,
+      "grad_norm": 0.10806702822446823,
+      "learning_rate": 2.3610828414410747e-06,
+      "loss": 1.0023,
+      "step": 4861
+    },
+    {
+      "epoch": 1.9766212644846513,
+      "grad_norm": 0.10730385035276413,
+      "learning_rate": 2.3203745165886423e-06,
+      "loss": 0.9394,
+      "step": 4862
+    },
+    {
+      "epoch": 1.9770278511892663,
+      "grad_norm": 0.11646751314401627,
+      "learning_rate": 2.2796661917362104e-06,
+      "loss": 1.0452,
+      "step": 4863
+    },
+    {
+      "epoch": 1.9774344378938808,
+      "grad_norm": 0.11328614503145218,
+      "learning_rate": 2.238957866883778e-06,
+      "loss": 1.0363,
+      "step": 4864
+    },
+    {
+      "epoch": 1.9778410245984956,
+      "grad_norm": 0.10477136820554733,
+      "learning_rate": 2.1982495420313456e-06,
+      "loss": 0.8967,
+      "step": 4865
+    },
+    {
+      "epoch": 1.9782476113031104,
+      "grad_norm": 0.1011333018541336,
+      "learning_rate": 2.1575412171789133e-06,
+      "loss": 0.9051,
+      "step": 4866
+    },
+    {
+      "epoch": 1.978654198007725,
+      "grad_norm": 0.10585794597864151,
+      "learning_rate": 2.116832892326481e-06,
+      "loss": 0.9641,
+      "step": 4867
+    },
+    {
+      "epoch": 1.97906078471234,
+      "grad_norm": 0.10518283396959305,
+      "learning_rate": 2.0761245674740485e-06,
+      "loss": 0.9738,
+      "step": 4868
+    },
+    {
+      "epoch": 1.9794673714169546,
+      "grad_norm": 0.10781599581241608,
+      "learning_rate": 2.035416242621616e-06,
+      "loss": 0.9535,
+      "step": 4869
+    },
+    {
+      "epoch": 1.9798739581215694,
+      "grad_norm": 0.10149887949228287,
+      "learning_rate": 1.994707917769184e-06,
+      "loss": 0.7832,
+      "step": 4870
+    },
+    {
+      "epoch": 1.9802805448261842,
+      "grad_norm": 0.10625772923231125,
+      "learning_rate": 1.9539995929167514e-06,
+      "loss": 0.8969,
+      "step": 4871
+    },
+    {
+      "epoch": 1.9806871315307988,
+      "grad_norm": 0.100648894906044,
+      "learning_rate": 1.9132912680643195e-06,
+      "loss": 0.8592,
+      "step": 4872
+    },
+    {
+      "epoch": 1.9810937182354138,
+      "grad_norm": 0.10639602690935135,
+      "learning_rate": 1.872582943211887e-06,
+      "loss": 0.9377,
+      "step": 4873
+    },
+    {
+      "epoch": 1.9815003049400284,
+      "grad_norm": 0.10608502477407455,
+      "learning_rate": 1.8318746183594548e-06,
+      "loss": 0.8221,
+      "step": 4874
+    },
+    {
+      "epoch": 1.9819068916446432,
+      "grad_norm": 0.1076526865363121,
+      "learning_rate": 1.7911662935070222e-06,
+      "loss": 1.0001,
+      "step": 4875
+    },
+    {
+      "epoch": 1.982313478349258,
+      "grad_norm": 0.10484609007835388,
+      "learning_rate": 1.75045796865459e-06,
+      "loss": 0.9281,
+      "step": 4876
+    },
+    {
+      "epoch": 1.9827200650538728,
+      "grad_norm": 0.11033840477466583,
+      "learning_rate": 1.7097496438021575e-06,
+      "loss": 1.012,
+      "step": 4877
+    },
+    {
+      "epoch": 1.9831266517584876,
+      "grad_norm": 0.10178755968809128,
+      "learning_rate": 1.6690413189497253e-06,
+      "loss": 0.8751,
+      "step": 4878
+    },
+    {
+      "epoch": 1.9835332384631021,
+      "grad_norm": 0.09968069940805435,
+      "learning_rate": 1.6283329940972931e-06,
+      "loss": 0.8481,
+      "step": 4879
+    },
+    {
+      "epoch": 1.9839398251677172,
+      "grad_norm": 0.11199220269918442,
+      "learning_rate": 1.5876246692448606e-06,
+      "loss": 1.0553,
+      "step": 4880
+    },
+    {
+      "epoch": 1.9843464118723317,
+      "grad_norm": 0.10771384090185165,
+      "learning_rate": 1.5469163443924282e-06,
+      "loss": 0.9871,
+      "step": 4881
+    },
+    {
+      "epoch": 1.9847529985769465,
+      "grad_norm": 0.1033516600728035,
+      "learning_rate": 1.5062080195399958e-06,
+      "loss": 0.8731,
+      "step": 4882
+    },
+    {
+      "epoch": 1.9851595852815613,
+      "grad_norm": 0.10771310329437256,
+      "learning_rate": 1.4654996946875637e-06,
+      "loss": 1.0152,
+      "step": 4883
+    },
+    {
+      "epoch": 1.985566171986176,
+      "grad_norm": 0.10385514050722122,
+      "learning_rate": 1.4247913698351313e-06,
+      "loss": 0.8569,
+      "step": 4884
+    },
+    {
+      "epoch": 1.985972758690791,
+      "grad_norm": 0.10435989499092102,
+      "learning_rate": 1.3840830449826992e-06,
+      "loss": 0.8999,
+      "step": 4885
+    },
+    {
+      "epoch": 1.9863793453954055,
+      "grad_norm": 0.10604739189147949,
+      "learning_rate": 1.3433747201302668e-06,
+      "loss": 0.8837,
+      "step": 4886
+    },
+    {
+      "epoch": 1.9867859321000203,
+      "grad_norm": 0.11071362346410751,
+      "learning_rate": 1.3026663952778344e-06,
+      "loss": 0.9995,
+      "step": 4887
+    },
+    {
+      "epoch": 1.987192518804635,
+      "grad_norm": 0.11492349952459335,
+      "learning_rate": 1.261958070425402e-06,
+      "loss": 1.0693,
+      "step": 4888
+    },
+    {
+      "epoch": 1.98759910550925,
+      "grad_norm": 0.11402280628681183,
+      "learning_rate": 1.2212497455729697e-06,
+      "loss": 1.0973,
+      "step": 4889
+    },
+    {
+      "epoch": 1.9880056922138647,
+      "grad_norm": 0.10784902423620224,
+      "learning_rate": 1.1805414207205373e-06,
+      "loss": 0.9591,
+      "step": 4890
+    },
+    {
+      "epoch": 1.9884122789184793,
+      "grad_norm": 0.10509707778692245,
+      "learning_rate": 1.1398330958681052e-06,
+      "loss": 0.9233,
+      "step": 4891
+    },
+    {
+      "epoch": 1.9888188656230943,
+      "grad_norm": 0.10772809386253357,
+      "learning_rate": 1.0991247710156728e-06,
+      "loss": 0.9239,
+      "step": 4892
+    },
+    {
+      "epoch": 1.9892254523277089,
+      "grad_norm": 0.10139593482017517,
+      "learning_rate": 1.0584164461632405e-06,
+      "loss": 0.8991,
+      "step": 4893
+    },
+    {
+      "epoch": 1.9896320390323237,
+      "grad_norm": 0.11088011413812637,
+      "learning_rate": 1.017708121310808e-06,
+      "loss": 0.9746,
+      "step": 4894
+    },
+    {
+      "epoch": 1.9900386257369385,
+      "grad_norm": 0.1069415956735611,
+      "learning_rate": 9.769997964583757e-07,
+      "loss": 0.9667,
+      "step": 4895
+    },
+    {
+      "epoch": 1.990445212441553,
+      "grad_norm": 0.11252355575561523,
+      "learning_rate": 9.362914716059435e-07,
+      "loss": 0.9521,
+      "step": 4896
+    },
+    {
+      "epoch": 1.990851799146168,
+      "grad_norm": 0.11555030941963196,
+      "learning_rate": 8.955831467535111e-07,
+      "loss": 0.9464,
+      "step": 4897
+    },
+    {
+      "epoch": 1.9912583858507826,
+      "grad_norm": 0.10089296847581863,
+      "learning_rate": 8.548748219010787e-07,
+      "loss": 0.9118,
+      "step": 4898
+    },
+    {
+      "epoch": 1.9916649725553974,
+      "grad_norm": 0.10483364015817642,
+      "learning_rate": 8.141664970486466e-07,
+      "loss": 0.9561,
+      "step": 4899
+    },
+    {
+      "epoch": 1.9920715592600122,
+      "grad_norm": 0.10259924083948135,
+      "learning_rate": 7.734581721962141e-07,
+      "loss": 0.937,
+      "step": 4900
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.5662724108266447e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null