Upload checkpoint 9400

Browse files

Files changed (6) hide show

README.md +4 -4
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
-# Gradience T1 3B (Step 9300 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -38,11 +38,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 94.53%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    <!-- 94.53% -->
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 9300 out of 9838 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
+# Gradience T1 3B (Step 9400 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 95.55%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    <!-- 95.55% -->
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 9400 out of 9838 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-3B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: a296ea7c9b1e4a9415f0ab907d87fda009cbc92489ae843587d79a31b694ffca
Pointer size: 131 Bytes
Size of remote file: 109 kB

Git LFS Details

SHA256: 401b567be3b16569f8d231797150c244a219636eb288b4598ee7a5c268f6f45a
Pointer size: 131 Bytes
Size of remote file: 109 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:63c25e928d6c19f9814f48cb001dde48b2b4fff0d97888912d1311ea9feb1ee3
 size 61392692

 version https://git-lfs.github.com/spec/v1
+oid sha256:792627c97be0a2e0187572be80a240954ee12f398646440d185d8dd31f3d03c0
 size 61392692

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00f4445ebfc4cb887fb731185fcc431b4b786e69df04595e6a10e94409efff70
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:907302e63b2cfc53962ac5b5a9bc923220b8816caf109fa095b223d639e4695a
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.89062817645863,
   "eval_steps": 500,
-  "global_step": 9300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -65108,6 +65108,706 @@
       "learning_rate": 1.0963083494355742e-05,
       "loss": 1.0495,
       "step": 9300
     }
   ],
   "logging_steps": 1,
@@ -65127,7 +65827,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.20784955121184e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9109575116893678,
   "eval_steps": 500,
+  "global_step": 9400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.0963083494355742e-05,
       "loss": 1.0495,
       "step": 9300
+    },
+    {
+      "epoch": 1.8908314698109372,
+      "grad_norm": 0.16537079215049744,
+      "learning_rate": 1.094274382182447e-05,
+      "loss": 1.0986,
+      "step": 9301
+    },
+    {
+      "epoch": 1.8910347631632445,
+      "grad_norm": 0.15517854690551758,
+      "learning_rate": 1.0922404149293197e-05,
+      "loss": 0.8952,
+      "step": 9302
+    },
+    {
+      "epoch": 1.891238056515552,
+      "grad_norm": 0.20096749067306519,
+      "learning_rate": 1.0902064476761925e-05,
+      "loss": 1.212,
+      "step": 9303
+    },
+    {
+      "epoch": 1.8914413498678593,
+      "grad_norm": 0.1688317060470581,
+      "learning_rate": 1.0881724804230653e-05,
+      "loss": 1.0571,
+      "step": 9304
+    },
+    {
+      "epoch": 1.8916446432201668,
+      "grad_norm": 0.15583118796348572,
+      "learning_rate": 1.086138513169938e-05,
+      "loss": 0.9346,
+      "step": 9305
+    },
+    {
+      "epoch": 1.891847936572474,
+      "grad_norm": 0.17731614410877228,
+      "learning_rate": 1.0841045459168107e-05,
+      "loss": 1.0109,
+      "step": 9306
+    },
+    {
+      "epoch": 1.8920512299247814,
+      "grad_norm": 0.16740213334560394,
+      "learning_rate": 1.0820705786636835e-05,
+      "loss": 0.9507,
+      "step": 9307
+    },
+    {
+      "epoch": 1.8922545232770889,
+      "grad_norm": 0.15723784267902374,
+      "learning_rate": 1.0800366114105563e-05,
+      "loss": 0.9974,
+      "step": 9308
+    },
+    {
+      "epoch": 1.8924578166293964,
+      "grad_norm": 0.1717313826084137,
+      "learning_rate": 1.078002644157429e-05,
+      "loss": 1.1462,
+      "step": 9309
+    },
+    {
+      "epoch": 1.8926611099817037,
+      "grad_norm": 0.16550962626934052,
+      "learning_rate": 1.0759686769043019e-05,
+      "loss": 0.9711,
+      "step": 9310
+    },
+    {
+      "epoch": 1.892864403334011,
+      "grad_norm": 0.15991832315921783,
+      "learning_rate": 1.0739347096511746e-05,
+      "loss": 0.9214,
+      "step": 9311
+    },
+    {
+      "epoch": 1.8930676966863182,
+      "grad_norm": 0.16738741099834442,
+      "learning_rate": 1.0719007423980474e-05,
+      "loss": 0.9806,
+      "step": 9312
+    },
+    {
+      "epoch": 1.8932709900386258,
+      "grad_norm": 0.1509847342967987,
+      "learning_rate": 1.0698667751449202e-05,
+      "loss": 0.8114,
+      "step": 9313
+    },
+    {
+      "epoch": 1.8934742833909333,
+      "grad_norm": 0.16016948223114014,
+      "learning_rate": 1.067832807891793e-05,
+      "loss": 1.0437,
+      "step": 9314
+    },
+    {
+      "epoch": 1.8936775767432406,
+      "grad_norm": 0.16906878352165222,
+      "learning_rate": 1.0657988406386658e-05,
+      "loss": 1.1174,
+      "step": 9315
+    },
+    {
+      "epoch": 1.8938808700955478,
+      "grad_norm": 0.14439010620117188,
+      "learning_rate": 1.0637648733855386e-05,
+      "loss": 0.9612,
+      "step": 9316
+    },
+    {
+      "epoch": 1.8940841634478551,
+      "grad_norm": 0.14976871013641357,
+      "learning_rate": 1.0617309061324113e-05,
+      "loss": 0.9424,
+      "step": 9317
+    },
+    {
+      "epoch": 1.8942874568001626,
+      "grad_norm": 0.14871583878993988,
+      "learning_rate": 1.0596969388792841e-05,
+      "loss": 1.0182,
+      "step": 9318
+    },
+    {
+      "epoch": 1.8944907501524701,
+      "grad_norm": 0.15636609494686127,
+      "learning_rate": 1.057662971626157e-05,
+      "loss": 1.0271,
+      "step": 9319
+    },
+    {
+      "epoch": 1.8946940435047774,
+      "grad_norm": 0.15208925306797028,
+      "learning_rate": 1.0556290043730297e-05,
+      "loss": 0.9909,
+      "step": 9320
+    },
+    {
+      "epoch": 1.8948973368570847,
+      "grad_norm": 0.16356107592582703,
+      "learning_rate": 1.0535950371199025e-05,
+      "loss": 1.0277,
+      "step": 9321
+    },
+    {
+      "epoch": 1.895100630209392,
+      "grad_norm": 0.1584353744983673,
+      "learning_rate": 1.0515610698667753e-05,
+      "loss": 1.061,
+      "step": 9322
+    },
+    {
+      "epoch": 1.8953039235616995,
+      "grad_norm": 0.17385679483413696,
+      "learning_rate": 1.049527102613648e-05,
+      "loss": 1.0001,
+      "step": 9323
+    },
+    {
+      "epoch": 1.895507216914007,
+      "grad_norm": 0.14875008165836334,
+      "learning_rate": 1.0474931353605208e-05,
+      "loss": 0.8962,
+      "step": 9324
+    },
+    {
+      "epoch": 1.8957105102663143,
+      "grad_norm": 0.1576494425535202,
+      "learning_rate": 1.0454591681073936e-05,
+      "loss": 0.9676,
+      "step": 9325
+    },
+    {
+      "epoch": 1.8959138036186216,
+      "grad_norm": 0.17531967163085938,
+      "learning_rate": 1.0434252008542662e-05,
+      "loss": 1.0986,
+      "step": 9326
+    },
+    {
+      "epoch": 1.896117096970929,
+      "grad_norm": 0.17487825453281403,
+      "learning_rate": 1.041391233601139e-05,
+      "loss": 1.004,
+      "step": 9327
+    },
+    {
+      "epoch": 1.8963203903232364,
+      "grad_norm": 0.1605103313922882,
+      "learning_rate": 1.0393572663480118e-05,
+      "loss": 1.0181,
+      "step": 9328
+    },
+    {
+      "epoch": 1.896523683675544,
+      "grad_norm": 0.1699770838022232,
+      "learning_rate": 1.0373232990948846e-05,
+      "loss": 1.0835,
+      "step": 9329
+    },
+    {
+      "epoch": 1.8967269770278512,
+      "grad_norm": 0.15781551599502563,
+      "learning_rate": 1.0352893318417574e-05,
+      "loss": 0.9709,
+      "step": 9330
+    },
+    {
+      "epoch": 1.8969302703801585,
+      "grad_norm": 0.15262846648693085,
+      "learning_rate": 1.0332553645886302e-05,
+      "loss": 1.0025,
+      "step": 9331
+    },
+    {
+      "epoch": 1.897133563732466,
+      "grad_norm": 0.16675053536891937,
+      "learning_rate": 1.031221397335503e-05,
+      "loss": 1.058,
+      "step": 9332
+    },
+    {
+      "epoch": 1.8973368570847733,
+      "grad_norm": 0.15862716734409332,
+      "learning_rate": 1.0291874300823757e-05,
+      "loss": 1.0868,
+      "step": 9333
+    },
+    {
+      "epoch": 1.8975401504370808,
+      "grad_norm": 0.15670160949230194,
+      "learning_rate": 1.0271534628292485e-05,
+      "loss": 0.9464,
+      "step": 9334
+    },
+    {
+      "epoch": 1.897743443789388,
+      "grad_norm": 0.16652365028858185,
+      "learning_rate": 1.0251194955761213e-05,
+      "loss": 1.0129,
+      "step": 9335
+    },
+    {
+      "epoch": 1.8979467371416954,
+      "grad_norm": 0.13543561100959778,
+      "learning_rate": 1.023085528322994e-05,
+      "loss": 0.865,
+      "step": 9336
+    },
+    {
+      "epoch": 1.8981500304940029,
+      "grad_norm": 0.17539720237255096,
+      "learning_rate": 1.0210515610698667e-05,
+      "loss": 1.1737,
+      "step": 9337
+    },
+    {
+      "epoch": 1.8983533238463104,
+      "grad_norm": 0.16995757818222046,
+      "learning_rate": 1.0190175938167395e-05,
+      "loss": 1.0107,
+      "step": 9338
+    },
+    {
+      "epoch": 1.8985566171986177,
+      "grad_norm": 0.16076253354549408,
+      "learning_rate": 1.0169836265636123e-05,
+      "loss": 0.9791,
+      "step": 9339
+    },
+    {
+      "epoch": 1.898759910550925,
+      "grad_norm": 0.15653160214424133,
+      "learning_rate": 1.014949659310485e-05,
+      "loss": 1.0362,
+      "step": 9340
+    },
+    {
+      "epoch": 1.8989632039032323,
+      "grad_norm": 0.17451439797878265,
+      "learning_rate": 1.0129156920573578e-05,
+      "loss": 1.1921,
+      "step": 9341
+    },
+    {
+      "epoch": 1.8991664972555398,
+      "grad_norm": 0.15730206668376923,
+      "learning_rate": 1.0108817248042306e-05,
+      "loss": 1.0043,
+      "step": 9342
+    },
+    {
+      "epoch": 1.8993697906078473,
+      "grad_norm": 0.1547120213508606,
+      "learning_rate": 1.0088477575511034e-05,
+      "loss": 0.9677,
+      "step": 9343
+    },
+    {
+      "epoch": 1.8995730839601546,
+      "grad_norm": 0.14792628586292267,
+      "learning_rate": 1.0068137902979764e-05,
+      "loss": 0.9495,
+      "step": 9344
+    },
+    {
+      "epoch": 1.8997763773124619,
+      "grad_norm": 0.16715767979621887,
+      "learning_rate": 1.0047798230448492e-05,
+      "loss": 1.0871,
+      "step": 9345
+    },
+    {
+      "epoch": 1.8999796706647691,
+      "grad_norm": 0.16810470819473267,
+      "learning_rate": 1.002745855791722e-05,
+      "loss": 0.9926,
+      "step": 9346
+    },
+    {
+      "epoch": 1.9001829640170766,
+      "grad_norm": 0.16060957312583923,
+      "learning_rate": 1.0007118885385946e-05,
+      "loss": 0.9888,
+      "step": 9347
+    },
+    {
+      "epoch": 1.9003862573693842,
+      "grad_norm": 0.16978204250335693,
+      "learning_rate": 9.986779212854673e-06,
+      "loss": 1.1782,
+      "step": 9348
+    },
+    {
+      "epoch": 1.9005895507216914,
+      "grad_norm": 0.1680404543876648,
+      "learning_rate": 9.966439540323401e-06,
+      "loss": 1.0525,
+      "step": 9349
+    },
+    {
+      "epoch": 1.9007928440739987,
+      "grad_norm": 0.1583416610956192,
+      "learning_rate": 9.946099867792129e-06,
+      "loss": 0.9938,
+      "step": 9350
+    },
+    {
+      "epoch": 1.900996137426306,
+      "grad_norm": 0.16274034976959229,
+      "learning_rate": 9.925760195260857e-06,
+      "loss": 1.0139,
+      "step": 9351
+    },
+    {
+      "epoch": 1.9011994307786135,
+      "grad_norm": 0.1580599546432495,
+      "learning_rate": 9.905420522729585e-06,
+      "loss": 0.9478,
+      "step": 9352
+    },
+    {
+      "epoch": 1.901402724130921,
+      "grad_norm": 0.167547807097435,
+      "learning_rate": 9.885080850198313e-06,
+      "loss": 0.9575,
+      "step": 9353
+    },
+    {
+      "epoch": 1.9016060174832283,
+      "grad_norm": 0.17265664041042328,
+      "learning_rate": 9.86474117766704e-06,
+      "loss": 1.2307,
+      "step": 9354
+    },
+    {
+      "epoch": 1.9018093108355356,
+      "grad_norm": 0.15563230216503143,
+      "learning_rate": 9.844401505135768e-06,
+      "loss": 0.8814,
+      "step": 9355
+    },
+    {
+      "epoch": 1.902012604187843,
+      "grad_norm": 0.17064541578292847,
+      "learning_rate": 9.824061832604496e-06,
+      "loss": 1.1182,
+      "step": 9356
+    },
+    {
+      "epoch": 1.9022158975401504,
+      "grad_norm": 0.14311741292476654,
+      "learning_rate": 9.803722160073222e-06,
+      "loss": 0.8657,
+      "step": 9357
+    },
+    {
+      "epoch": 1.902419190892458,
+      "grad_norm": 0.17543523013591766,
+      "learning_rate": 9.78338248754195e-06,
+      "loss": 1.0129,
+      "step": 9358
+    },
+    {
+      "epoch": 1.9026224842447652,
+      "grad_norm": 0.1519622951745987,
+      "learning_rate": 9.763042815010678e-06,
+      "loss": 0.8612,
+      "step": 9359
+    },
+    {
+      "epoch": 1.9028257775970725,
+      "grad_norm": 0.1456150859594345,
+      "learning_rate": 9.742703142479406e-06,
+      "loss": 0.9171,
+      "step": 9360
+    },
+    {
+      "epoch": 1.90302907094938,
+      "grad_norm": 0.1435515433549881,
+      "learning_rate": 9.722363469948134e-06,
+      "loss": 0.9186,
+      "step": 9361
+    },
+    {
+      "epoch": 1.9032323643016873,
+      "grad_norm": 0.14047978818416595,
+      "learning_rate": 9.702023797416862e-06,
+      "loss": 1.0005,
+      "step": 9362
+    },
+    {
+      "epoch": 1.9034356576539948,
+      "grad_norm": 0.13906230032444,
+      "learning_rate": 9.68168412488559e-06,
+      "loss": 0.9253,
+      "step": 9363
+    },
+    {
+      "epoch": 1.903638951006302,
+      "grad_norm": 0.1536070853471756,
+      "learning_rate": 9.661344452354317e-06,
+      "loss": 0.9884,
+      "step": 9364
+    },
+    {
+      "epoch": 1.9038422443586094,
+      "grad_norm": 0.16738420724868774,
+      "learning_rate": 9.641004779823045e-06,
+      "loss": 1.0671,
+      "step": 9365
+    },
+    {
+      "epoch": 1.904045537710917,
+      "grad_norm": 0.14910069108009338,
+      "learning_rate": 9.620665107291773e-06,
+      "loss": 1.0673,
+      "step": 9366
+    },
+    {
+      "epoch": 1.9042488310632242,
+      "grad_norm": 0.17034853994846344,
+      "learning_rate": 9.600325434760501e-06,
+      "loss": 0.9899,
+      "step": 9367
+    },
+    {
+      "epoch": 1.9044521244155317,
+      "grad_norm": 0.14666365087032318,
+      "learning_rate": 9.579985762229229e-06,
+      "loss": 0.9937,
+      "step": 9368
+    },
+    {
+      "epoch": 1.904655417767839,
+      "grad_norm": 0.18429192900657654,
+      "learning_rate": 9.559646089697957e-06,
+      "loss": 0.9585,
+      "step": 9369
+    },
+    {
+      "epoch": 1.9048587111201463,
+      "grad_norm": 0.1640387326478958,
+      "learning_rate": 9.539306417166684e-06,
+      "loss": 0.8624,
+      "step": 9370
+    },
+    {
+      "epoch": 1.9050620044724538,
+      "grad_norm": 0.17350825667381287,
+      "learning_rate": 9.518966744635412e-06,
+      "loss": 1.1242,
+      "step": 9371
+    },
+    {
+      "epoch": 1.9052652978247613,
+      "grad_norm": 0.1487387865781784,
+      "learning_rate": 9.49862707210414e-06,
+      "loss": 0.8233,
+      "step": 9372
+    },
+    {
+      "epoch": 1.9054685911770686,
+      "grad_norm": 0.16694356501102448,
+      "learning_rate": 9.478287399572868e-06,
+      "loss": 0.9576,
+      "step": 9373
+    },
+    {
+      "epoch": 1.9056718845293759,
+      "grad_norm": 0.16773739457130432,
+      "learning_rate": 9.457947727041596e-06,
+      "loss": 1.1013,
+      "step": 9374
+    },
+    {
+      "epoch": 1.9058751778816831,
+      "grad_norm": 0.15956096351146698,
+      "learning_rate": 9.437608054510324e-06,
+      "loss": 0.9642,
+      "step": 9375
+    },
+    {
+      "epoch": 1.9060784712339907,
+      "grad_norm": 0.1490715593099594,
+      "learning_rate": 9.417268381979051e-06,
+      "loss": 0.8804,
+      "step": 9376
+    },
+    {
+      "epoch": 1.9062817645862982,
+      "grad_norm": 0.18091818690299988,
+      "learning_rate": 9.39692870944778e-06,
+      "loss": 1.2239,
+      "step": 9377
+    },
+    {
+      "epoch": 1.9064850579386055,
+      "grad_norm": 0.15678752958774567,
+      "learning_rate": 9.376589036916506e-06,
+      "loss": 0.9122,
+      "step": 9378
+    },
+    {
+      "epoch": 1.9066883512909127,
+      "grad_norm": 0.18459545075893402,
+      "learning_rate": 9.356249364385233e-06,
+      "loss": 1.252,
+      "step": 9379
+    },
+    {
+      "epoch": 1.90689164464322,
+      "grad_norm": 0.1801295429468155,
+      "learning_rate": 9.335909691853961e-06,
+      "loss": 1.2248,
+      "step": 9380
+    },
+    {
+      "epoch": 1.9070949379955275,
+      "grad_norm": 0.16494908928871155,
+      "learning_rate": 9.315570019322689e-06,
+      "loss": 1.0764,
+      "step": 9381
+    },
+    {
+      "epoch": 1.907298231347835,
+      "grad_norm": 0.1461213231086731,
+      "learning_rate": 9.295230346791417e-06,
+      "loss": 0.9669,
+      "step": 9382
+    },
+    {
+      "epoch": 1.9075015247001423,
+      "grad_norm": 0.1623806357383728,
+      "learning_rate": 9.274890674260145e-06,
+      "loss": 1.044,
+      "step": 9383
+    },
+    {
+      "epoch": 1.9077048180524496,
+      "grad_norm": 0.1630796492099762,
+      "learning_rate": 9.254551001728873e-06,
+      "loss": 1.1293,
+      "step": 9384
+    },
+    {
+      "epoch": 1.907908111404757,
+      "grad_norm": 0.15207818150520325,
+      "learning_rate": 9.2342113291976e-06,
+      "loss": 1.0695,
+      "step": 9385
+    },
+    {
+      "epoch": 1.9081114047570644,
+      "grad_norm": 0.18577249348163605,
+      "learning_rate": 9.213871656666328e-06,
+      "loss": 1.286,
+      "step": 9386
+    },
+    {
+      "epoch": 1.908314698109372,
+      "grad_norm": 0.1684713214635849,
+      "learning_rate": 9.193531984135056e-06,
+      "loss": 1.0222,
+      "step": 9387
+    },
+    {
+      "epoch": 1.9085179914616792,
+      "grad_norm": 0.1702156364917755,
+      "learning_rate": 9.173192311603782e-06,
+      "loss": 1.0436,
+      "step": 9388
+    },
+    {
+      "epoch": 1.9087212848139865,
+      "grad_norm": 0.16724956035614014,
+      "learning_rate": 9.15285263907251e-06,
+      "loss": 1.0927,
+      "step": 9389
+    },
+    {
+      "epoch": 1.908924578166294,
+      "grad_norm": 0.15122951567173004,
+      "learning_rate": 9.132512966541238e-06,
+      "loss": 0.9047,
+      "step": 9390
+    },
+    {
+      "epoch": 1.9091278715186013,
+      "grad_norm": 0.159726083278656,
+      "learning_rate": 9.112173294009966e-06,
+      "loss": 0.9561,
+      "step": 9391
+    },
+    {
+      "epoch": 1.9093311648709088,
+      "grad_norm": 0.16831693053245544,
+      "learning_rate": 9.091833621478694e-06,
+      "loss": 1.0801,
+      "step": 9392
+    },
+    {
+      "epoch": 1.909534458223216,
+      "grad_norm": 0.1692102700471878,
+      "learning_rate": 9.071493948947422e-06,
+      "loss": 1.1376,
+      "step": 9393
+    },
+    {
+      "epoch": 1.9097377515755234,
+      "grad_norm": 0.17488181591033936,
+      "learning_rate": 9.051154276416151e-06,
+      "loss": 1.1418,
+      "step": 9394
+    },
+    {
+      "epoch": 1.909941044927831,
+      "grad_norm": 0.16990408301353455,
+      "learning_rate": 9.030814603884879e-06,
+      "loss": 1.1285,
+      "step": 9395
+    },
+    {
+      "epoch": 1.9101443382801382,
+      "grad_norm": 0.16019228100776672,
+      "learning_rate": 9.010474931353607e-06,
+      "loss": 1.0915,
+      "step": 9396
+    },
+    {
+      "epoch": 1.9103476316324457,
+      "grad_norm": 0.18147097527980804,
+      "learning_rate": 8.990135258822335e-06,
+      "loss": 1.2002,
+      "step": 9397
+    },
+    {
+      "epoch": 1.910550924984753,
+      "grad_norm": 0.16949224472045898,
+      "learning_rate": 8.969795586291062e-06,
+      "loss": 1.0624,
+      "step": 9398
+    },
+    {
+      "epoch": 1.9107542183370603,
+      "grad_norm": 0.17006736993789673,
+      "learning_rate": 8.949455913759789e-06,
+      "loss": 1.2127,
+      "step": 9399
+    },
+    {
+      "epoch": 1.9109575116893678,
+      "grad_norm": 0.16276562213897705,
+      "learning_rate": 8.929116241228516e-06,
+      "loss": 0.9774,
+      "step": 9400
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.263015631335834e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null