Upload checkpoint 5000

Browse files

Files changed (7) hide show

README.md +3 -3
adapter_model.safetensors +1 -1
loss.png +2 -2
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
-# Gradience T1 3B (Step 4900 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -38,11 +38,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 49.81%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
     <!--  3.75% -->
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4900 out of 9838 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
+# Gradience T1 3B (Step 5000 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 50.82%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
     <!--  3.75% -->
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 5000 out of 9838 steps</p>
 </body>
 </html>

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7777ecd4d443a9bb9d05a63076f89afe02a3e32cee50c110c32bc60ec41a023c
 size 119801528

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca8115148a13abf4d6296ac258a6403048e57695e02f379de7637b3c2102d4f0
 size 119801528

loss.png CHANGED Viewed

Git LFS Details

SHA256: 2fc0fcbf9cf4b2d465896fef570fe021fa027af02c18447f1fd253c1e2491278
Pointer size: 131 Bytes
Size of remote file: 178 kB

Git LFS Details

SHA256: 4d693fe62ed0d67640c101e1a08fb8594dd47b1325c3344eff21d057f13d0354
Pointer size: 131 Bytes
Size of remote file: 177 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e87999c10b1a99626be97810b001dd0c84d0ad22aaea8bcd14ece39bdb7303a0
 size 61392692

 version https://git-lfs.github.com/spec/v1
+oid sha256:2259d4339b6b613899a205d51304b186acc86b02e966dbec354e705f1fb840ba
 size 61392692

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:608fccb6c056ce88cdfd5355e6be2046f4d107a24a87c6b0d2c3b200ce6bb4ea
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4ae31f3bd6abd5e088309ad57fa2e995bc6dd61c02221bc158a3d63e6ad1f06
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9c2d0b42495476b53352ac83683b0ed52a93e363bf64ca71cec5b1e2376f903
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc0f4be7d4d8334212cb48351775916c4614df567ed15f716c0301ee17f90467
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9961374263061598,
   "eval_steps": 500,
-  "global_step": 4900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -34308,6 +34308,706 @@
       "learning_rate": 0.00010045764263195364,
       "loss": 1.0605,
       "step": 4900
     }
   ],
   "logging_steps": 1,
@@ -34327,7 +35027,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.7442820401958093e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0164667615368979,
   "eval_steps": 500,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00010045764263195364,
       "loss": 1.0605,
       "step": 4900
+    },
+    {
+      "epoch": 0.9963407196584672,
+      "grad_norm": 0.1430116593837738,
+      "learning_rate": 0.00010043730295942235,
+      "loss": 1.107,
+      "step": 4901
+    },
+    {
+      "epoch": 0.9965440130107746,
+      "grad_norm": 0.11865589022636414,
+      "learning_rate": 0.00010041696328689107,
+      "loss": 0.8887,
+      "step": 4902
+    },
+    {
+      "epoch": 0.996747306363082,
+      "grad_norm": 0.11495467275381088,
+      "learning_rate": 0.00010039662361435982,
+      "loss": 0.8365,
+      "step": 4903
+    },
+    {
+      "epoch": 0.9969505997153894,
+      "grad_norm": 0.1354401409626007,
+      "learning_rate": 0.00010037628394182855,
+      "loss": 1.1705,
+      "step": 4904
+    },
+    {
+      "epoch": 0.9971538930676966,
+      "grad_norm": 0.13998205959796906,
+      "learning_rate": 0.00010035594426929726,
+      "loss": 1.0365,
+      "step": 4905
+    },
+    {
+      "epoch": 0.997357186420004,
+      "grad_norm": 0.15044035017490387,
+      "learning_rate": 0.00010033560459676598,
+      "loss": 1.1061,
+      "step": 4906
+    },
+    {
+      "epoch": 0.9975604797723114,
+      "grad_norm": 0.1416459083557129,
+      "learning_rate": 0.00010031526492423473,
+      "loss": 1.1155,
+      "step": 4907
+    },
+    {
+      "epoch": 0.9977637731246188,
+      "grad_norm": 0.13485343754291534,
+      "learning_rate": 0.00010029492525170346,
+      "loss": 0.9937,
+      "step": 4908
+    },
+    {
+      "epoch": 0.9979670664769262,
+      "grad_norm": 0.14948885142803192,
+      "learning_rate": 0.00010027458557917217,
+      "loss": 1.1689,
+      "step": 4909
+    },
+    {
+      "epoch": 0.9981703598292336,
+      "grad_norm": 0.1309768706560135,
+      "learning_rate": 0.0001002542459066409,
+      "loss": 0.9428,
+      "step": 4910
+    },
+    {
+      "epoch": 0.9983736531815409,
+      "grad_norm": 0.11928943544626236,
+      "learning_rate": 0.00010023390623410965,
+      "loss": 0.8238,
+      "step": 4911
+    },
+    {
+      "epoch": 0.9985769465338483,
+      "grad_norm": 0.1389857530593872,
+      "learning_rate": 0.00010021356656157837,
+      "loss": 1.0459,
+      "step": 4912
+    },
+    {
+      "epoch": 0.9987802398861557,
+      "grad_norm": 0.14047744870185852,
+      "learning_rate": 0.00010019322688904708,
+      "loss": 0.9594,
+      "step": 4913
+    },
+    {
+      "epoch": 0.9989835332384631,
+      "grad_norm": 0.1307019144296646,
+      "learning_rate": 0.0001001728872165158,
+      "loss": 1.1549,
+      "step": 4914
+    },
+    {
+      "epoch": 0.9991868265907705,
+      "grad_norm": 0.13652239739894867,
+      "learning_rate": 0.00010015254754398456,
+      "loss": 1.142,
+      "step": 4915
+    },
+    {
+      "epoch": 0.9993901199430779,
+      "grad_norm": 0.1404002457857132,
+      "learning_rate": 0.00010013220787145328,
+      "loss": 1.0275,
+      "step": 4916
+    },
+    {
+      "epoch": 0.9995934132953852,
+      "grad_norm": 0.14137892425060272,
+      "learning_rate": 0.00010011186819892199,
+      "loss": 1.1169,
+      "step": 4917
+    },
+    {
+      "epoch": 0.9997967066476926,
+      "grad_norm": 0.12362517416477203,
+      "learning_rate": 0.00010009152852639072,
+      "loss": 0.9733,
+      "step": 4918
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.16257604956626892,
+      "learning_rate": 0.00010007118885385947,
+      "loss": 1.214,
+      "step": 4919
+    },
+    {
+      "epoch": 1.0002032933523073,
+      "grad_norm": 0.13455824553966522,
+      "learning_rate": 0.0001000508491813282,
+      "loss": 1.1717,
+      "step": 4920
+    },
+    {
+      "epoch": 1.0004065867046148,
+      "grad_norm": 0.1244397908449173,
+      "learning_rate": 0.0001000305095087969,
+      "loss": 0.9873,
+      "step": 4921
+    },
+    {
+      "epoch": 1.000609880056922,
+      "grad_norm": 0.13148358464241028,
+      "learning_rate": 0.00010001016983626563,
+      "loss": 1.0512,
+      "step": 4922
+    },
+    {
+      "epoch": 1.0008131734092296,
+      "grad_norm": 0.14207464456558228,
+      "learning_rate": 9.998983016373437e-05,
+      "loss": 1.1071,
+      "step": 4923
+    },
+    {
+      "epoch": 1.0010164667615369,
+      "grad_norm": 0.1350506693124771,
+      "learning_rate": 9.99694904912031e-05,
+      "loss": 1.1134,
+      "step": 4924
+    },
+    {
+      "epoch": 1.0012197601138442,
+      "grad_norm": 0.14575833082199097,
+      "learning_rate": 9.994915081867182e-05,
+      "loss": 1.0793,
+      "step": 4925
+    },
+    {
+      "epoch": 1.0014230534661517,
+      "grad_norm": 0.13254649937152863,
+      "learning_rate": 9.992881114614055e-05,
+      "loss": 0.9843,
+      "step": 4926
+    },
+    {
+      "epoch": 1.001626346818459,
+      "grad_norm": 0.13385853171348572,
+      "learning_rate": 9.990847147360928e-05,
+      "loss": 1.0446,
+      "step": 4927
+    },
+    {
+      "epoch": 1.0018296401707665,
+      "grad_norm": 0.13908478617668152,
+      "learning_rate": 9.988813180107802e-05,
+      "loss": 0.9968,
+      "step": 4928
+    },
+    {
+      "epoch": 1.0020329335230738,
+      "grad_norm": 0.13923251628875732,
+      "learning_rate": 9.986779212854673e-05,
+      "loss": 1.0023,
+      "step": 4929
+    },
+    {
+      "epoch": 1.0022362268753813,
+      "grad_norm": 0.1373911201953888,
+      "learning_rate": 9.984745245601547e-05,
+      "loss": 1.1753,
+      "step": 4930
+    },
+    {
+      "epoch": 1.0024395202276886,
+      "grad_norm": 0.13491371273994446,
+      "learning_rate": 9.982711278348419e-05,
+      "loss": 0.893,
+      "step": 4931
+    },
+    {
+      "epoch": 1.0026428135799959,
+      "grad_norm": 0.12279137223958969,
+      "learning_rate": 9.980677311095293e-05,
+      "loss": 0.8334,
+      "step": 4932
+    },
+    {
+      "epoch": 1.0028461069323034,
+      "grad_norm": 0.1489049643278122,
+      "learning_rate": 9.978643343842164e-05,
+      "loss": 1.2196,
+      "step": 4933
+    },
+    {
+      "epoch": 1.0030494002846106,
+      "grad_norm": 0.15800416469573975,
+      "learning_rate": 9.976609376589038e-05,
+      "loss": 1.1065,
+      "step": 4934
+    },
+    {
+      "epoch": 1.0032526936369182,
+      "grad_norm": 0.12695717811584473,
+      "learning_rate": 9.97457540933591e-05,
+      "loss": 0.8969,
+      "step": 4935
+    },
+    {
+      "epoch": 1.0034559869892254,
+      "grad_norm": 0.12970462441444397,
+      "learning_rate": 9.972541442082784e-05,
+      "loss": 0.9748,
+      "step": 4936
+    },
+    {
+      "epoch": 1.0036592803415327,
+      "grad_norm": 0.13583384454250336,
+      "learning_rate": 9.970507474829655e-05,
+      "loss": 0.9943,
+      "step": 4937
+    },
+    {
+      "epoch": 1.0038625736938402,
+      "grad_norm": 0.13171210885047913,
+      "learning_rate": 9.968473507576529e-05,
+      "loss": 1.0066,
+      "step": 4938
+    },
+    {
+      "epoch": 1.0040658670461475,
+      "grad_norm": 0.140077605843544,
+      "learning_rate": 9.966439540323401e-05,
+      "loss": 1.0276,
+      "step": 4939
+    },
+    {
+      "epoch": 1.004269160398455,
+      "grad_norm": 0.13248348236083984,
+      "learning_rate": 9.964405573070275e-05,
+      "loss": 0.9836,
+      "step": 4940
+    },
+    {
+      "epoch": 1.0044724537507623,
+      "grad_norm": 0.1502828449010849,
+      "learning_rate": 9.962371605817146e-05,
+      "loss": 1.175,
+      "step": 4941
+    },
+    {
+      "epoch": 1.0046757471030698,
+      "grad_norm": 0.14695493876934052,
+      "learning_rate": 9.96033763856402e-05,
+      "loss": 0.963,
+      "step": 4942
+    },
+    {
+      "epoch": 1.0048790404553771,
+      "grad_norm": 0.14214938879013062,
+      "learning_rate": 9.958303671310892e-05,
+      "loss": 1.0651,
+      "step": 4943
+    },
+    {
+      "epoch": 1.0050823338076844,
+      "grad_norm": 0.14761728048324585,
+      "learning_rate": 9.956269704057765e-05,
+      "loss": 0.9907,
+      "step": 4944
+    },
+    {
+      "epoch": 1.005285627159992,
+      "grad_norm": 0.13151785731315613,
+      "learning_rate": 9.954235736804637e-05,
+      "loss": 0.8793,
+      "step": 4945
+    },
+    {
+      "epoch": 1.0054889205122992,
+      "grad_norm": 0.1452670693397522,
+      "learning_rate": 9.95220176955151e-05,
+      "loss": 1.0906,
+      "step": 4946
+    },
+    {
+      "epoch": 1.0056922138646067,
+      "grad_norm": 0.13930079340934753,
+      "learning_rate": 9.950167802298384e-05,
+      "loss": 0.9598,
+      "step": 4947
+    },
+    {
+      "epoch": 1.005895507216914,
+      "grad_norm": 0.12317246198654175,
+      "learning_rate": 9.948133835045256e-05,
+      "loss": 0.9429,
+      "step": 4948
+    },
+    {
+      "epoch": 1.0060988005692213,
+      "grad_norm": 0.13415516912937164,
+      "learning_rate": 9.946099867792128e-05,
+      "loss": 1.0848,
+      "step": 4949
+    },
+    {
+      "epoch": 1.0063020939215288,
+      "grad_norm": 0.13976556062698364,
+      "learning_rate": 9.944065900539001e-05,
+      "loss": 0.934,
+      "step": 4950
+    },
+    {
+      "epoch": 1.006505387273836,
+      "grad_norm": 0.13384398818016052,
+      "learning_rate": 9.942031933285875e-05,
+      "loss": 0.955,
+      "step": 4951
+    },
+    {
+      "epoch": 1.0067086806261436,
+      "grad_norm": 0.14308519661426544,
+      "learning_rate": 9.939997966032747e-05,
+      "loss": 0.9543,
+      "step": 4952
+    },
+    {
+      "epoch": 1.006911973978451,
+      "grad_norm": 0.14340607821941376,
+      "learning_rate": 9.937963998779621e-05,
+      "loss": 1.047,
+      "step": 4953
+    },
+    {
+      "epoch": 1.0071152673307582,
+      "grad_norm": 0.14457905292510986,
+      "learning_rate": 9.935930031526492e-05,
+      "loss": 0.9937,
+      "step": 4954
+    },
+    {
+      "epoch": 1.0073185606830657,
+      "grad_norm": 0.13555844128131866,
+      "learning_rate": 9.933896064273366e-05,
+      "loss": 1.0211,
+      "step": 4955
+    },
+    {
+      "epoch": 1.007521854035373,
+      "grad_norm": 0.1536429524421692,
+      "learning_rate": 9.931862097020238e-05,
+      "loss": 1.188,
+      "step": 4956
+    },
+    {
+      "epoch": 1.0077251473876805,
+      "grad_norm": 0.13193362951278687,
+      "learning_rate": 9.929828129767112e-05,
+      "loss": 0.9143,
+      "step": 4957
+    },
+    {
+      "epoch": 1.0079284407399878,
+      "grad_norm": 0.14066417515277863,
+      "learning_rate": 9.927794162513983e-05,
+      "loss": 1.0662,
+      "step": 4958
+    },
+    {
+      "epoch": 1.0081317340922953,
+      "grad_norm": 0.13579119741916656,
+      "learning_rate": 9.925760195260857e-05,
+      "loss": 0.8999,
+      "step": 4959
+    },
+    {
+      "epoch": 1.0083350274446026,
+      "grad_norm": 0.14911122620105743,
+      "learning_rate": 9.92372622800773e-05,
+      "loss": 1.3171,
+      "step": 4960
+    },
+    {
+      "epoch": 1.0085383207969099,
+      "grad_norm": 0.1447262316942215,
+      "learning_rate": 9.921692260754603e-05,
+      "loss": 1.0899,
+      "step": 4961
+    },
+    {
+      "epoch": 1.0087416141492174,
+      "grad_norm": 0.1513487845659256,
+      "learning_rate": 9.919658293501474e-05,
+      "loss": 1.0844,
+      "step": 4962
+    },
+    {
+      "epoch": 1.0089449075015247,
+      "grad_norm": 0.1470583975315094,
+      "learning_rate": 9.917624326248348e-05,
+      "loss": 1.1176,
+      "step": 4963
+    },
+    {
+      "epoch": 1.0091482008538322,
+      "grad_norm": 0.13596630096435547,
+      "learning_rate": 9.91559035899522e-05,
+      "loss": 1.0829,
+      "step": 4964
+    },
+    {
+      "epoch": 1.0093514942061395,
+      "grad_norm": 0.1411203145980835,
+      "learning_rate": 9.913556391742094e-05,
+      "loss": 1.0523,
+      "step": 4965
+    },
+    {
+      "epoch": 1.0095547875584467,
+      "grad_norm": 0.14842981100082397,
+      "learning_rate": 9.911522424488965e-05,
+      "loss": 1.0513,
+      "step": 4966
+    },
+    {
+      "epoch": 1.0097580809107543,
+      "grad_norm": 0.1505335569381714,
+      "learning_rate": 9.909488457235839e-05,
+      "loss": 0.9964,
+      "step": 4967
+    },
+    {
+      "epoch": 1.0099613742630615,
+      "grad_norm": 0.12677620351314545,
+      "learning_rate": 9.907454489982712e-05,
+      "loss": 0.9546,
+      "step": 4968
+    },
+    {
+      "epoch": 1.010164667615369,
+      "grad_norm": 0.13651777803897858,
+      "learning_rate": 9.905420522729585e-05,
+      "loss": 1.0823,
+      "step": 4969
+    },
+    {
+      "epoch": 1.0103679609676763,
+      "grad_norm": 0.1392572969198227,
+      "learning_rate": 9.903386555476457e-05,
+      "loss": 0.9032,
+      "step": 4970
+    },
+    {
+      "epoch": 1.0105712543199838,
+      "grad_norm": 0.16775289177894592,
+      "learning_rate": 9.90135258822333e-05,
+      "loss": 1.1434,
+      "step": 4971
+    },
+    {
+      "epoch": 1.0107745476722911,
+      "grad_norm": 0.1534387320280075,
+      "learning_rate": 9.899318620970203e-05,
+      "loss": 1.166,
+      "step": 4972
+    },
+    {
+      "epoch": 1.0109778410245984,
+      "grad_norm": 0.14180676639080048,
+      "learning_rate": 9.897284653717077e-05,
+      "loss": 1.0688,
+      "step": 4973
+    },
+    {
+      "epoch": 1.011181134376906,
+      "grad_norm": 0.13633224368095398,
+      "learning_rate": 9.895250686463948e-05,
+      "loss": 1.0413,
+      "step": 4974
+    },
+    {
+      "epoch": 1.0113844277292132,
+      "grad_norm": 0.15582099556922913,
+      "learning_rate": 9.893216719210822e-05,
+      "loss": 1.256,
+      "step": 4975
+    },
+    {
+      "epoch": 1.0115877210815207,
+      "grad_norm": 0.16052106022834778,
+      "learning_rate": 9.891182751957694e-05,
+      "loss": 1.3048,
+      "step": 4976
+    },
+    {
+      "epoch": 1.011791014433828,
+      "grad_norm": 0.15733475983142853,
+      "learning_rate": 9.889148784704568e-05,
+      "loss": 1.1024,
+      "step": 4977
+    },
+    {
+      "epoch": 1.0119943077861353,
+      "grad_norm": 0.1398230642080307,
+      "learning_rate": 9.887114817451439e-05,
+      "loss": 1.0691,
+      "step": 4978
+    },
+    {
+      "epoch": 1.0121976011384428,
+      "grad_norm": 0.15575705468654633,
+      "learning_rate": 9.885080850198313e-05,
+      "loss": 1.0019,
+      "step": 4979
+    },
+    {
+      "epoch": 1.01240089449075,
+      "grad_norm": 0.13900624215602875,
+      "learning_rate": 9.883046882945185e-05,
+      "loss": 1.0318,
+      "step": 4980
+    },
+    {
+      "epoch": 1.0126041878430576,
+      "grad_norm": 0.1266520619392395,
+      "learning_rate": 9.881012915692059e-05,
+      "loss": 0.9455,
+      "step": 4981
+    },
+    {
+      "epoch": 1.012807481195365,
+      "grad_norm": 0.14327497780323029,
+      "learning_rate": 9.87897894843893e-05,
+      "loss": 1.1133,
+      "step": 4982
+    },
+    {
+      "epoch": 1.0130107745476722,
+      "grad_norm": 0.14177127182483673,
+      "learning_rate": 9.876944981185804e-05,
+      "loss": 0.9969,
+      "step": 4983
+    },
+    {
+      "epoch": 1.0132140678999797,
+      "grad_norm": 0.14066456258296967,
+      "learning_rate": 9.874911013932676e-05,
+      "loss": 0.9261,
+      "step": 4984
+    },
+    {
+      "epoch": 1.013417361252287,
+      "grad_norm": 0.14441144466400146,
+      "learning_rate": 9.872877046679549e-05,
+      "loss": 1.0065,
+      "step": 4985
+    },
+    {
+      "epoch": 1.0136206546045945,
+      "grad_norm": 0.12858086824417114,
+      "learning_rate": 9.870843079426421e-05,
+      "loss": 0.9306,
+      "step": 4986
+    },
+    {
+      "epoch": 1.0138239479569018,
+      "grad_norm": 0.1305333971977234,
+      "learning_rate": 9.868809112173294e-05,
+      "loss": 1.0058,
+      "step": 4987
+    },
+    {
+      "epoch": 1.0140272413092093,
+      "grad_norm": 0.1652311384677887,
+      "learning_rate": 9.866775144920167e-05,
+      "loss": 1.1992,
+      "step": 4988
+    },
+    {
+      "epoch": 1.0142305346615166,
+      "grad_norm": 0.1123913899064064,
+      "learning_rate": 9.86474117766704e-05,
+      "loss": 0.8779,
+      "step": 4989
+    },
+    {
+      "epoch": 1.0144338280138239,
+      "grad_norm": 0.15201310813426971,
+      "learning_rate": 9.862707210413912e-05,
+      "loss": 1.1553,
+      "step": 4990
+    },
+    {
+      "epoch": 1.0146371213661314,
+      "grad_norm": 0.13241463899612427,
+      "learning_rate": 9.860673243160785e-05,
+      "loss": 0.9276,
+      "step": 4991
+    },
+    {
+      "epoch": 1.0148404147184387,
+      "grad_norm": 0.15238632261753082,
+      "learning_rate": 9.858639275907659e-05,
+      "loss": 1.1528,
+      "step": 4992
+    },
+    {
+      "epoch": 1.0150437080707462,
+      "grad_norm": 0.13771474361419678,
+      "learning_rate": 9.856605308654531e-05,
+      "loss": 1.1871,
+      "step": 4993
+    },
+    {
+      "epoch": 1.0152470014230535,
+      "grad_norm": 0.135041743516922,
+      "learning_rate": 9.854571341401403e-05,
+      "loss": 0.9718,
+      "step": 4994
+    },
+    {
+      "epoch": 1.0154502947753608,
+      "grad_norm": 0.14199897646903992,
+      "learning_rate": 9.852537374148276e-05,
+      "loss": 1.0454,
+      "step": 4995
+    },
+    {
+      "epoch": 1.0156535881276683,
+      "grad_norm": 0.14556720852851868,
+      "learning_rate": 9.85050340689515e-05,
+      "loss": 1.102,
+      "step": 4996
+    },
+    {
+      "epoch": 1.0158568814799755,
+      "grad_norm": 0.1287354975938797,
+      "learning_rate": 9.848469439642022e-05,
+      "loss": 0.929,
+      "step": 4997
+    },
+    {
+      "epoch": 1.016060174832283,
+      "grad_norm": 0.15297791361808777,
+      "learning_rate": 9.846435472388895e-05,
+      "loss": 1.0234,
+      "step": 4998
+    },
+    {
+      "epoch": 1.0162634681845903,
+      "grad_norm": 0.1549387276172638,
+      "learning_rate": 9.844401505135767e-05,
+      "loss": 1.1666,
+      "step": 4999
+    },
+    {
+      "epoch": 1.0164667615368979,
+      "grad_norm": 0.15455321967601776,
+      "learning_rate": 9.842367537882641e-05,
+      "loss": 1.0845,
+      "step": 5000
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.801020652405637e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null