Upload checkpoint 400

Browse files

Files changed (6) hide show

README.md +4 -4
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
-# Gradience T1 7B (Step 300 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -38,11 +38,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 6.10%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    <!-- 6.10% -->
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 300 out of 4918 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
+# Gradience T1 7B (Step 400 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 8.13%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    <!-- 8.13% -->
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 400 out of 4918 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: e810ff40767611568e79a686ad4dfae1865f4945d6d52a68266b97a26749589a
Pointer size: 131 Bytes
Size of remote file: 160 kB

Git LFS Details

SHA256: 01875c9e9f80f8f6b0347549b0f04a0cb2801da395ce89eef4ddb1e2b274a929
Pointer size: 131 Bytes
Size of remote file: 169 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c5bf73f01bac2ca2277437d81aec72780e39675c53269ceb990c1a08b4af763
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:eefac9e8208f033b11163601df9a990821a006cdad2f5b671af5e786c9b60212
 size 82461044

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d30110b3009df9d8cb5018c074f6d9745ea65a134336e183ecde2b6b6201f0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:880e20dc665fe0d79037c2008c79e9fa46462cda4281c7356b7e603bf120e9f2
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12197601138442773,
   "eval_steps": 500,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2108,6 +2108,706 @@
       "learning_rate": 0.00018803175249338492,
       "loss": 1.0508,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2127,7 +2827,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.511575109617746e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.16263468184590363,
   "eval_steps": 500,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00018803175249338492,
       "loss": 1.0508,
       "step": 300
+    },
+    {
+      "epoch": 0.12238259808904249,
+      "grad_norm": 0.09302002936601639,
+      "learning_rate": 0.00018799104416853248,
+      "loss": 1.0512,
+      "step": 301
+    },
+    {
+      "epoch": 0.12278918479365725,
+      "grad_norm": 0.09081271290779114,
+      "learning_rate": 0.00018795033584368006,
+      "loss": 0.9688,
+      "step": 302
+    },
+    {
+      "epoch": 0.123195771498272,
+      "grad_norm": 0.1059931218624115,
+      "learning_rate": 0.0001879096275188276,
+      "loss": 1.0483,
+      "step": 303
+    },
+    {
+      "epoch": 0.12360235820288677,
+      "grad_norm": 0.1018669605255127,
+      "learning_rate": 0.0001878689191939752,
+      "loss": 1.019,
+      "step": 304
+    },
+    {
+      "epoch": 0.12400894490750153,
+      "grad_norm": 0.1040007546544075,
+      "learning_rate": 0.00018782821086912275,
+      "loss": 1.037,
+      "step": 305
+    },
+    {
+      "epoch": 0.12441553161211628,
+      "grad_norm": 0.10204601287841797,
+      "learning_rate": 0.0001877875025442703,
+      "loss": 0.9816,
+      "step": 306
+    },
+    {
+      "epoch": 0.12482211831673104,
+      "grad_norm": 0.10591764748096466,
+      "learning_rate": 0.00018774679421941788,
+      "loss": 1.0939,
+      "step": 307
+    },
+    {
+      "epoch": 0.1252287050213458,
+      "grad_norm": 0.09306305646896362,
+      "learning_rate": 0.00018770608589456544,
+      "loss": 1.0476,
+      "step": 308
+    },
+    {
+      "epoch": 0.12563529172596055,
+      "grad_norm": 11.22681713104248,
+      "learning_rate": 0.00018766537756971302,
+      "loss": 1.0573,
+      "step": 309
+    },
+    {
+      "epoch": 0.12604187843057532,
+      "grad_norm": 0.09422402083873749,
+      "learning_rate": 0.00018762466924486057,
+      "loss": 0.9993,
+      "step": 310
+    },
+    {
+      "epoch": 0.1264484651351901,
+      "grad_norm": 0.0982229933142662,
+      "learning_rate": 0.00018758396092000815,
+      "loss": 0.9159,
+      "step": 311
+    },
+    {
+      "epoch": 0.12685505183980483,
+      "grad_norm": 0.12579265236854553,
+      "learning_rate": 0.00018754325259515573,
+      "loss": 1.0935,
+      "step": 312
+    },
+    {
+      "epoch": 0.1272616385444196,
+      "grad_norm": 0.10069390386343002,
+      "learning_rate": 0.0001875025442703033,
+      "loss": 1.0127,
+      "step": 313
+    },
+    {
+      "epoch": 0.12766822524903434,
+      "grad_norm": 0.10948827862739563,
+      "learning_rate": 0.00018746183594545087,
+      "loss": 1.0576,
+      "step": 314
+    },
+    {
+      "epoch": 0.12807481195364911,
+      "grad_norm": 0.09232445061206818,
+      "learning_rate": 0.00018742112762059842,
+      "loss": 0.9856,
+      "step": 315
+    },
+    {
+      "epoch": 0.12848139865826388,
+      "grad_norm": 0.08319563418626785,
+      "learning_rate": 0.000187380419295746,
+      "loss": 0.9172,
+      "step": 316
+    },
+    {
+      "epoch": 0.12888798536287863,
+      "grad_norm": 0.09697309136390686,
+      "learning_rate": 0.00018733971097089356,
+      "loss": 1.0567,
+      "step": 317
+    },
+    {
+      "epoch": 0.1292945720674934,
+      "grad_norm": 0.09254255145788193,
+      "learning_rate": 0.0001872990026460411,
+      "loss": 1.0177,
+      "step": 318
+    },
+    {
+      "epoch": 0.12970115877210814,
+      "grad_norm": 0.09254108369350433,
+      "learning_rate": 0.0001872582943211887,
+      "loss": 1.0079,
+      "step": 319
+    },
+    {
+      "epoch": 0.1301077454767229,
+      "grad_norm": 0.09095866233110428,
+      "learning_rate": 0.00018721758599633625,
+      "loss": 1.0633,
+      "step": 320
+    },
+    {
+      "epoch": 0.13051433218133768,
+      "grad_norm": 0.09073010087013245,
+      "learning_rate": 0.00018717687767148383,
+      "loss": 0.9059,
+      "step": 321
+    },
+    {
+      "epoch": 0.13092091888595242,
+      "grad_norm": 0.09842764586210251,
+      "learning_rate": 0.00018713616934663138,
+      "loss": 1.0766,
+      "step": 322
+    },
+    {
+      "epoch": 0.1313275055905672,
+      "grad_norm": 0.09325529634952545,
+      "learning_rate": 0.00018709546102177896,
+      "loss": 1.066,
+      "step": 323
+    },
+    {
+      "epoch": 0.13173409229518196,
+      "grad_norm": 0.09692969918251038,
+      "learning_rate": 0.00018705475269692654,
+      "loss": 0.9743,
+      "step": 324
+    },
+    {
+      "epoch": 0.1321406789997967,
+      "grad_norm": 0.09432708472013474,
+      "learning_rate": 0.0001870140443720741,
+      "loss": 1.0141,
+      "step": 325
+    },
+    {
+      "epoch": 0.13254726570441147,
+      "grad_norm": 0.09226994961500168,
+      "learning_rate": 0.00018697333604722168,
+      "loss": 0.9837,
+      "step": 326
+    },
+    {
+      "epoch": 0.1329538524090262,
+      "grad_norm": 0.10843974351882935,
+      "learning_rate": 0.00018693262772236923,
+      "loss": 1.0248,
+      "step": 327
+    },
+    {
+      "epoch": 0.13336043911364098,
+      "grad_norm": 0.09324774891138077,
+      "learning_rate": 0.00018689191939751681,
+      "loss": 1.0642,
+      "step": 328
+    },
+    {
+      "epoch": 0.13376702581825575,
+      "grad_norm": 0.08934729546308517,
+      "learning_rate": 0.00018685121107266437,
+      "loss": 0.9792,
+      "step": 329
+    },
+    {
+      "epoch": 0.1341736125228705,
+      "grad_norm": 0.09125274419784546,
+      "learning_rate": 0.00018681050274781192,
+      "loss": 1.0093,
+      "step": 330
+    },
+    {
+      "epoch": 0.13458019922748526,
+      "grad_norm": 0.09645108133554459,
+      "learning_rate": 0.0001867697944229595,
+      "loss": 0.9503,
+      "step": 331
+    },
+    {
+      "epoch": 0.13498678593210003,
+      "grad_norm": 0.09900861978530884,
+      "learning_rate": 0.00018672908609810706,
+      "loss": 0.9966,
+      "step": 332
+    },
+    {
+      "epoch": 0.13539337263671478,
+      "grad_norm": 0.09018311649560928,
+      "learning_rate": 0.00018668837777325464,
+      "loss": 0.965,
+      "step": 333
+    },
+    {
+      "epoch": 0.13579995934132955,
+      "grad_norm": 0.10296136885881424,
+      "learning_rate": 0.00018664766944840222,
+      "loss": 1.1011,
+      "step": 334
+    },
+    {
+      "epoch": 0.1362065460459443,
+      "grad_norm": 0.09104129672050476,
+      "learning_rate": 0.00018660696112354977,
+      "loss": 0.9814,
+      "step": 335
+    },
+    {
+      "epoch": 0.13661313275055906,
+      "grad_norm": 0.09881450235843658,
+      "learning_rate": 0.00018656625279869736,
+      "loss": 1.0989,
+      "step": 336
+    },
+    {
+      "epoch": 0.13701971945517383,
+      "grad_norm": 0.09691241383552551,
+      "learning_rate": 0.0001865255444738449,
+      "loss": 1.0967,
+      "step": 337
+    },
+    {
+      "epoch": 0.13742630615978857,
+      "grad_norm": 0.10152243077754974,
+      "learning_rate": 0.0001864848361489925,
+      "loss": 1.0951,
+      "step": 338
+    },
+    {
+      "epoch": 0.13783289286440334,
+      "grad_norm": 0.10802541673183441,
+      "learning_rate": 0.00018644412782414005,
+      "loss": 0.8742,
+      "step": 339
+    },
+    {
+      "epoch": 0.13823947956901808,
+      "grad_norm": 0.09942565858364105,
+      "learning_rate": 0.0001864034194992876,
+      "loss": 0.9961,
+      "step": 340
+    },
+    {
+      "epoch": 0.13864606627363285,
+      "grad_norm": 0.08618199825286865,
+      "learning_rate": 0.00018636271117443518,
+      "loss": 0.9645,
+      "step": 341
+    },
+    {
+      "epoch": 0.13905265297824762,
+      "grad_norm": 0.1056099608540535,
+      "learning_rate": 0.00018632200284958273,
+      "loss": 0.9885,
+      "step": 342
+    },
+    {
+      "epoch": 0.13945923968286236,
+      "grad_norm": 0.08862382173538208,
+      "learning_rate": 0.00018628129452473032,
+      "loss": 0.9316,
+      "step": 343
+    },
+    {
+      "epoch": 0.13986582638747713,
+      "grad_norm": 0.09923135489225388,
+      "learning_rate": 0.00018624058619987787,
+      "loss": 0.9959,
+      "step": 344
+    },
+    {
+      "epoch": 0.1402724130920919,
+      "grad_norm": 0.09120538830757141,
+      "learning_rate": 0.00018619987787502545,
+      "loss": 0.968,
+      "step": 345
+    },
+    {
+      "epoch": 0.14067899979670664,
+      "grad_norm": 0.09669141471385956,
+      "learning_rate": 0.00018615916955017303,
+      "loss": 1.085,
+      "step": 346
+    },
+    {
+      "epoch": 0.1410855865013214,
+      "grad_norm": 0.08598754554986954,
+      "learning_rate": 0.00018611846122532059,
+      "loss": 0.9504,
+      "step": 347
+    },
+    {
+      "epoch": 0.14149217320593616,
+      "grad_norm": 0.09238371253013611,
+      "learning_rate": 0.00018607775290046817,
+      "loss": 0.9742,
+      "step": 348
+    },
+    {
+      "epoch": 0.14189875991055093,
+      "grad_norm": 0.091258205473423,
+      "learning_rate": 0.00018603704457561572,
+      "loss": 0.9341,
+      "step": 349
+    },
+    {
+      "epoch": 0.1423053466151657,
+      "grad_norm": 0.10129548609256744,
+      "learning_rate": 0.0001859963362507633,
+      "loss": 1.0814,
+      "step": 350
+    },
+    {
+      "epoch": 0.14271193331978044,
+      "grad_norm": 0.09523019194602966,
+      "learning_rate": 0.00018595562792591086,
+      "loss": 0.9848,
+      "step": 351
+    },
+    {
+      "epoch": 0.1431185200243952,
+      "grad_norm": 0.09485248476266861,
+      "learning_rate": 0.0001859149196010584,
+      "loss": 0.9828,
+      "step": 352
+    },
+    {
+      "epoch": 0.14352510672900995,
+      "grad_norm": 0.09963666647672653,
+      "learning_rate": 0.000185874211276206,
+      "loss": 1.1075,
+      "step": 353
+    },
+    {
+      "epoch": 0.14393169343362472,
+      "grad_norm": 0.09067155420780182,
+      "learning_rate": 0.00018583350295135355,
+      "loss": 0.971,
+      "step": 354
+    },
+    {
+      "epoch": 0.1443382801382395,
+      "grad_norm": 0.09153544157743454,
+      "learning_rate": 0.00018579279462650113,
+      "loss": 0.9405,
+      "step": 355
+    },
+    {
+      "epoch": 0.14474486684285423,
+      "grad_norm": 0.1024472787976265,
+      "learning_rate": 0.00018575208630164868,
+      "loss": 0.9967,
+      "step": 356
+    },
+    {
+      "epoch": 0.145151453547469,
+      "grad_norm": 0.09804495424032211,
+      "learning_rate": 0.00018571137797679626,
+      "loss": 0.9578,
+      "step": 357
+    },
+    {
+      "epoch": 0.14555804025208377,
+      "grad_norm": 0.099054716527462,
+      "learning_rate": 0.00018567066965194384,
+      "loss": 0.9999,
+      "step": 358
+    },
+    {
+      "epoch": 0.1459646269566985,
+      "grad_norm": 0.09781336784362793,
+      "learning_rate": 0.0001856299613270914,
+      "loss": 1.09,
+      "step": 359
+    },
+    {
+      "epoch": 0.14637121366131328,
+      "grad_norm": 0.08993211388587952,
+      "learning_rate": 0.00018558925300223898,
+      "loss": 1.0719,
+      "step": 360
+    },
+    {
+      "epoch": 0.14677780036592802,
+      "grad_norm": 0.09146003425121307,
+      "learning_rate": 0.00018554854467738653,
+      "loss": 1.0008,
+      "step": 361
+    },
+    {
+      "epoch": 0.1471843870705428,
+      "grad_norm": 0.09643495827913284,
+      "learning_rate": 0.00018550783635253411,
+      "loss": 1.0791,
+      "step": 362
+    },
+    {
+      "epoch": 0.14759097377515756,
+      "grad_norm": 0.09078676998615265,
+      "learning_rate": 0.00018546712802768167,
+      "loss": 0.8641,
+      "step": 363
+    },
+    {
+      "epoch": 0.1479975604797723,
+      "grad_norm": 0.08719085901975632,
+      "learning_rate": 0.00018542641970282922,
+      "loss": 0.985,
+      "step": 364
+    },
+    {
+      "epoch": 0.14840414718438708,
+      "grad_norm": 0.09189736843109131,
+      "learning_rate": 0.0001853857113779768,
+      "loss": 0.9638,
+      "step": 365
+    },
+    {
+      "epoch": 0.14881073388900182,
+      "grad_norm": 0.09381456673145294,
+      "learning_rate": 0.00018534500305312436,
+      "loss": 1.0036,
+      "step": 366
+    },
+    {
+      "epoch": 0.1492173205936166,
+      "grad_norm": 0.0922684445977211,
+      "learning_rate": 0.00018530429472827194,
+      "loss": 1.0391,
+      "step": 367
+    },
+    {
+      "epoch": 0.14962390729823136,
+      "grad_norm": 0.09465248882770538,
+      "learning_rate": 0.0001852635864034195,
+      "loss": 0.8874,
+      "step": 368
+    },
+    {
+      "epoch": 0.1500304940028461,
+      "grad_norm": 0.0938408225774765,
+      "learning_rate": 0.00018522287807856707,
+      "loss": 1.0269,
+      "step": 369
+    },
+    {
+      "epoch": 0.15043708070746087,
+      "grad_norm": 0.09377933293581009,
+      "learning_rate": 0.00018518216975371466,
+      "loss": 1.0142,
+      "step": 370
+    },
+    {
+      "epoch": 0.15084366741207564,
+      "grad_norm": 0.1117277517914772,
+      "learning_rate": 0.0001851414614288622,
+      "loss": 1.0371,
+      "step": 371
+    },
+    {
+      "epoch": 0.15125025411669038,
+      "grad_norm": 0.10293183475732803,
+      "learning_rate": 0.0001851007531040098,
+      "loss": 1.0,
+      "step": 372
+    },
+    {
+      "epoch": 0.15165684082130515,
+      "grad_norm": 0.09216313809156418,
+      "learning_rate": 0.00018506004477915734,
+      "loss": 0.9703,
+      "step": 373
+    },
+    {
+      "epoch": 0.1520634275259199,
+      "grad_norm": 0.09088669717311859,
+      "learning_rate": 0.00018501933645430493,
+      "loss": 0.8766,
+      "step": 374
+    },
+    {
+      "epoch": 0.15247001423053466,
+      "grad_norm": 0.09916643798351288,
+      "learning_rate": 0.00018497862812945248,
+      "loss": 1.0958,
+      "step": 375
+    },
+    {
+      "epoch": 0.15287660093514943,
+      "grad_norm": 0.08404985070228577,
+      "learning_rate": 0.00018493791980460003,
+      "loss": 0.9602,
+      "step": 376
+    },
+    {
+      "epoch": 0.15328318763976417,
+      "grad_norm": 0.10011377185583115,
+      "learning_rate": 0.00018489721147974762,
+      "loss": 1.0377,
+      "step": 377
+    },
+    {
+      "epoch": 0.15368977434437894,
+      "grad_norm": 0.09958089143037796,
+      "learning_rate": 0.00018485650315489517,
+      "loss": 1.0213,
+      "step": 378
+    },
+    {
+      "epoch": 0.15409636104899369,
+      "grad_norm": 0.09488838911056519,
+      "learning_rate": 0.00018481579483004275,
+      "loss": 0.941,
+      "step": 379
+    },
+    {
+      "epoch": 0.15450294775360846,
+      "grad_norm": 0.09099314361810684,
+      "learning_rate": 0.00018477508650519033,
+      "loss": 0.8913,
+      "step": 380
+    },
+    {
+      "epoch": 0.15490953445822322,
+      "grad_norm": 0.0956854447722435,
+      "learning_rate": 0.00018473437818033789,
+      "loss": 1.1478,
+      "step": 381
+    },
+    {
+      "epoch": 0.15531612116283797,
+      "grad_norm": 0.11225584149360657,
+      "learning_rate": 0.00018469366985548547,
+      "loss": 1.0795,
+      "step": 382
+    },
+    {
+      "epoch": 0.15572270786745274,
+      "grad_norm": 0.11592987924814224,
+      "learning_rate": 0.00018465296153063302,
+      "loss": 1.0863,
+      "step": 383
+    },
+    {
+      "epoch": 0.1561292945720675,
+      "grad_norm": 0.09232570976018906,
+      "learning_rate": 0.0001846122532057806,
+      "loss": 0.9551,
+      "step": 384
+    },
+    {
+      "epoch": 0.15653588127668225,
+      "grad_norm": 0.08860056847333908,
+      "learning_rate": 0.00018457154488092816,
+      "loss": 1.0206,
+      "step": 385
+    },
+    {
+      "epoch": 0.15694246798129702,
+      "grad_norm": 0.10788331180810928,
+      "learning_rate": 0.00018453083655607574,
+      "loss": 0.9378,
+      "step": 386
+    },
+    {
+      "epoch": 0.15734905468591176,
+      "grad_norm": 0.10758615285158157,
+      "learning_rate": 0.0001844901282312233,
+      "loss": 1.1149,
+      "step": 387
+    },
+    {
+      "epoch": 0.15775564139052653,
+      "grad_norm": 0.10551386326551437,
+      "learning_rate": 0.00018444941990637085,
+      "loss": 1.0729,
+      "step": 388
+    },
+    {
+      "epoch": 0.1581622280951413,
+      "grad_norm": 0.08733198046684265,
+      "learning_rate": 0.00018440871158151843,
+      "loss": 1.0058,
+      "step": 389
+    },
+    {
+      "epoch": 0.15856881479975604,
+      "grad_norm": 0.1095399409532547,
+      "learning_rate": 0.00018436800325666598,
+      "loss": 1.0566,
+      "step": 390
+    },
+    {
+      "epoch": 0.1589754015043708,
+      "grad_norm": 0.12356330454349518,
+      "learning_rate": 0.00018432729493181356,
+      "loss": 1.0173,
+      "step": 391
+    },
+    {
+      "epoch": 0.15938198820898555,
+      "grad_norm": 0.09934639930725098,
+      "learning_rate": 0.00018428658660696114,
+      "loss": 1.1237,
+      "step": 392
+    },
+    {
+      "epoch": 0.15978857491360032,
+      "grad_norm": 0.09402013570070267,
+      "learning_rate": 0.0001842458782821087,
+      "loss": 1.0018,
+      "step": 393
+    },
+    {
+      "epoch": 0.1601951616182151,
+      "grad_norm": 0.10511749237775803,
+      "learning_rate": 0.00018420516995725628,
+      "loss": 0.9844,
+      "step": 394
+    },
+    {
+      "epoch": 0.16060174832282983,
+      "grad_norm": 0.11193688213825226,
+      "learning_rate": 0.00018416446163240383,
+      "loss": 0.9888,
+      "step": 395
+    },
+    {
+      "epoch": 0.1610083350274446,
+      "grad_norm": 0.09895443916320801,
+      "learning_rate": 0.00018412375330755141,
+      "loss": 1.1045,
+      "step": 396
+    },
+    {
+      "epoch": 0.16141492173205937,
+      "grad_norm": 0.09660319238901138,
+      "learning_rate": 0.00018408304498269897,
+      "loss": 1.0457,
+      "step": 397
+    },
+    {
+      "epoch": 0.16182150843667412,
+      "grad_norm": 0.1339186728000641,
+      "learning_rate": 0.00018404233665784655,
+      "loss": 1.1266,
+      "step": 398
+    },
+    {
+      "epoch": 0.16222809514128889,
+      "grad_norm": 0.1154564693570137,
+      "learning_rate": 0.0001840016283329941,
+      "loss": 1.0299,
+      "step": 399
+    },
+    {
+      "epoch": 0.16263468184590363,
+      "grad_norm": 0.09698904305696487,
+      "learning_rate": 0.00018396092000814166,
+      "loss": 1.1101,
+      "step": 400
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.2713704795934392e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null