Upload checkpoint 7600

Browse files

Files changed (6) hide show

README.md +3 -3
adapter_model.safetensors +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
-# Gradience T1 3B (Step 7500 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -38,11 +38,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 76.24%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
     <!--  3.75% -->
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 7500 out of 9838 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
+# Gradience T1 3B (Step 7600 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 77.25%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
     <!--  3.75% -->
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 7600 out of 9838 steps</p>
 </body>
 </html>

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ba7de9543f1f34b4b94e34ebafefebbb9e772bbfa1a36d1442368c7d913cd4f
 size 119801528

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fa1a8b2084abf4c0aa143ad82d5673ad22c47fc43f7f35a295ebd5f949602b1
 size 119801528

loss.png CHANGED Viewed

Git LFS Details

SHA256: b7c74551b71b7f1b5af4da0bca6229560fb22469d7f787f628e404409beedab3
Pointer size: 131 Bytes
Size of remote file: 177 kB

Git LFS Details

SHA256: 5cef93a5effa7e25be405a0ddd1534a84ea7fe5f352c25b178ac088ad207e690
Pointer size: 131 Bytes
Size of remote file: 176 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1be61e6aa4af344c33fc4c28ffd8c21f96ebcaee499984ec5ff9ce87c59b1e2
 size 61392692

 version https://git-lfs.github.com/spec/v1
+oid sha256:d724f96203373dcec88adb5dfa4341c8156f923a7472825798252a2a46750bd3
 size 61392692

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c3e76495dee6dbbdad9587f79a6f58b9eb8e66519d8012f0cf4e9c842847e55
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5559d18ced05b328f0925fbaa730c7e7e3e59068bfb889a48ac53bc0cce0a8c7
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.5247001423053466,
   "eval_steps": 500,
-  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -52508,6 +52508,706 @@
       "learning_rate": 4.757449405064579e-05,
       "loss": 0.9683,
       "step": 7500
     }
   ],
   "logging_steps": 1,
@@ -52527,7 +53227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.2064980872392704e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.5450294775360844,
   "eval_steps": 500,
+  "global_step": 7600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.757449405064579e-05,
       "loss": 0.9683,
       "step": 7500
+    },
+    {
+      "epoch": 1.524903435657654,
+      "grad_norm": 0.16842851042747498,
+      "learning_rate": 4.755415437811451e-05,
+      "loss": 1.1311,
+      "step": 7501
+    },
+    {
+      "epoch": 1.5251067290099614,
+      "grad_norm": 0.1710801124572754,
+      "learning_rate": 4.753381470558324e-05,
+      "loss": 1.051,
+      "step": 7502
+    },
+    {
+      "epoch": 1.5253100223622686,
+      "grad_norm": 0.14032766222953796,
+      "learning_rate": 4.751347503305197e-05,
+      "loss": 0.886,
+      "step": 7503
+    },
+    {
+      "epoch": 1.5255133157145762,
+      "grad_norm": 0.16021350026130676,
+      "learning_rate": 4.74931353605207e-05,
+      "loss": 1.1057,
+      "step": 7504
+    },
+    {
+      "epoch": 1.5257166090668837,
+      "grad_norm": 0.13777956366539001,
+      "learning_rate": 4.747279568798942e-05,
+      "loss": 0.8405,
+      "step": 7505
+    },
+    {
+      "epoch": 1.525919902419191,
+      "grad_norm": 0.15827859938144684,
+      "learning_rate": 4.7452456015458155e-05,
+      "loss": 0.9807,
+      "step": 7506
+    },
+    {
+      "epoch": 1.5261231957714982,
+      "grad_norm": 0.14875048398971558,
+      "learning_rate": 4.743211634292688e-05,
+      "loss": 0.9142,
+      "step": 7507
+    },
+    {
+      "epoch": 1.5263264891238055,
+      "grad_norm": 0.1471138298511505,
+      "learning_rate": 4.741177667039561e-05,
+      "loss": 0.9305,
+      "step": 7508
+    },
+    {
+      "epoch": 1.526529782476113,
+      "grad_norm": 0.15858818590641022,
+      "learning_rate": 4.7391436997864335e-05,
+      "loss": 1.0326,
+      "step": 7509
+    },
+    {
+      "epoch": 1.5267330758284205,
+      "grad_norm": 0.14757820963859558,
+      "learning_rate": 4.7371097325333066e-05,
+      "loss": 0.9423,
+      "step": 7510
+    },
+    {
+      "epoch": 1.5269363691807278,
+      "grad_norm": 0.14671318233013153,
+      "learning_rate": 4.735075765280179e-05,
+      "loss": 0.9953,
+      "step": 7511
+    },
+    {
+      "epoch": 1.5271396625330351,
+      "grad_norm": 0.16315753757953644,
+      "learning_rate": 4.733041798027052e-05,
+      "loss": 1.0674,
+      "step": 7512
+    },
+    {
+      "epoch": 1.5273429558853424,
+      "grad_norm": 0.15158745646476746,
+      "learning_rate": 4.7310078307739246e-05,
+      "loss": 0.9416,
+      "step": 7513
+    },
+    {
+      "epoch": 1.52754624923765,
+      "grad_norm": 0.1569458246231079,
+      "learning_rate": 4.728973863520798e-05,
+      "loss": 1.1336,
+      "step": 7514
+    },
+    {
+      "epoch": 1.5277495425899574,
+      "grad_norm": 0.15651223063468933,
+      "learning_rate": 4.72693989626767e-05,
+      "loss": 0.9724,
+      "step": 7515
+    },
+    {
+      "epoch": 1.5279528359422647,
+      "grad_norm": 0.1413620412349701,
+      "learning_rate": 4.724905929014543e-05,
+      "loss": 0.9164,
+      "step": 7516
+    },
+    {
+      "epoch": 1.528156129294572,
+      "grad_norm": 0.14873231947422028,
+      "learning_rate": 4.722871961761416e-05,
+      "loss": 0.9904,
+      "step": 7517
+    },
+    {
+      "epoch": 1.5283594226468793,
+      "grad_norm": 0.16632919013500214,
+      "learning_rate": 4.720837994508289e-05,
+      "loss": 1.0332,
+      "step": 7518
+    },
+    {
+      "epoch": 1.5285627159991868,
+      "grad_norm": 0.1570500135421753,
+      "learning_rate": 4.718804027255161e-05,
+      "loss": 1.0491,
+      "step": 7519
+    },
+    {
+      "epoch": 1.5287660093514943,
+      "grad_norm": 0.17738598585128784,
+      "learning_rate": 4.7167700600020344e-05,
+      "loss": 1.049,
+      "step": 7520
+    },
+    {
+      "epoch": 1.5289693027038016,
+      "grad_norm": 0.1635134369134903,
+      "learning_rate": 4.714736092748907e-05,
+      "loss": 1.1412,
+      "step": 7521
+    },
+    {
+      "epoch": 1.529172596056109,
+      "grad_norm": 0.16301599144935608,
+      "learning_rate": 4.71270212549578e-05,
+      "loss": 1.0967,
+      "step": 7522
+    },
+    {
+      "epoch": 1.5293758894084164,
+      "grad_norm": 0.14440634846687317,
+      "learning_rate": 4.7106681582426525e-05,
+      "loss": 0.8357,
+      "step": 7523
+    },
+    {
+      "epoch": 1.5295791827607237,
+      "grad_norm": 0.144694983959198,
+      "learning_rate": 4.7086341909895256e-05,
+      "loss": 0.8722,
+      "step": 7524
+    },
+    {
+      "epoch": 1.5297824761130312,
+      "grad_norm": 0.14646680653095245,
+      "learning_rate": 4.706600223736398e-05,
+      "loss": 0.8988,
+      "step": 7525
+    },
+    {
+      "epoch": 1.5299857694653385,
+      "grad_norm": 0.1644057333469391,
+      "learning_rate": 4.704566256483271e-05,
+      "loss": 1.1197,
+      "step": 7526
+    },
+    {
+      "epoch": 1.5301890628176458,
+      "grad_norm": 0.1693752557039261,
+      "learning_rate": 4.7025322892301436e-05,
+      "loss": 1.0486,
+      "step": 7527
+    },
+    {
+      "epoch": 1.5303923561699533,
+      "grad_norm": 0.1716986894607544,
+      "learning_rate": 4.700498321977017e-05,
+      "loss": 1.1087,
+      "step": 7528
+    },
+    {
+      "epoch": 1.5305956495222606,
+      "grad_norm": 0.1740422248840332,
+      "learning_rate": 4.698464354723889e-05,
+      "loss": 1.0909,
+      "step": 7529
+    },
+    {
+      "epoch": 1.530798942874568,
+      "grad_norm": 0.15906310081481934,
+      "learning_rate": 4.696430387470762e-05,
+      "loss": 1.0841,
+      "step": 7530
+    },
+    {
+      "epoch": 1.5310022362268754,
+      "grad_norm": 0.14159859716892242,
+      "learning_rate": 4.694396420217635e-05,
+      "loss": 0.8766,
+      "step": 7531
+    },
+    {
+      "epoch": 1.5312055295791827,
+      "grad_norm": 0.17096573114395142,
+      "learning_rate": 4.692362452964508e-05,
+      "loss": 1.1308,
+      "step": 7532
+    },
+    {
+      "epoch": 1.5314088229314902,
+      "grad_norm": 0.16331720352172852,
+      "learning_rate": 4.69032848571138e-05,
+      "loss": 0.9884,
+      "step": 7533
+    },
+    {
+      "epoch": 1.5316121162837977,
+      "grad_norm": 0.15612895786762238,
+      "learning_rate": 4.6882945184582534e-05,
+      "loss": 0.9737,
+      "step": 7534
+    },
+    {
+      "epoch": 1.531815409636105,
+      "grad_norm": 0.1716272234916687,
+      "learning_rate": 4.686260551205126e-05,
+      "loss": 1.2049,
+      "step": 7535
+    },
+    {
+      "epoch": 1.5320187029884123,
+      "grad_norm": 0.15378396213054657,
+      "learning_rate": 4.684226583951999e-05,
+      "loss": 1.0315,
+      "step": 7536
+    },
+    {
+      "epoch": 1.5322219963407195,
+      "grad_norm": 0.16745533049106598,
+      "learning_rate": 4.682192616698871e-05,
+      "loss": 1.1749,
+      "step": 7537
+    },
+    {
+      "epoch": 1.532425289693027,
+      "grad_norm": 0.16122505068778992,
+      "learning_rate": 4.680158649445744e-05,
+      "loss": 1.1481,
+      "step": 7538
+    },
+    {
+      "epoch": 1.5326285830453346,
+      "grad_norm": 0.15753133594989777,
+      "learning_rate": 4.678124682192617e-05,
+      "loss": 1.0085,
+      "step": 7539
+    },
+    {
+      "epoch": 1.5328318763976418,
+      "grad_norm": 0.15394344925880432,
+      "learning_rate": 4.6760907149394895e-05,
+      "loss": 0.9611,
+      "step": 7540
+    },
+    {
+      "epoch": 1.5330351697499491,
+      "grad_norm": 0.1620665341615677,
+      "learning_rate": 4.6740567476863626e-05,
+      "loss": 0.9435,
+      "step": 7541
+    },
+    {
+      "epoch": 1.5332384631022564,
+      "grad_norm": 0.15785206854343414,
+      "learning_rate": 4.672022780433235e-05,
+      "loss": 1.0311,
+      "step": 7542
+    },
+    {
+      "epoch": 1.533441756454564,
+      "grad_norm": 0.15812784433364868,
+      "learning_rate": 4.669988813180108e-05,
+      "loss": 0.9304,
+      "step": 7543
+    },
+    {
+      "epoch": 1.5336450498068714,
+      "grad_norm": 0.14829683303833008,
+      "learning_rate": 4.6679548459269806e-05,
+      "loss": 0.9735,
+      "step": 7544
+    },
+    {
+      "epoch": 1.5338483431591787,
+      "grad_norm": 0.15306831896305084,
+      "learning_rate": 4.665920878673854e-05,
+      "loss": 0.9459,
+      "step": 7545
+    },
+    {
+      "epoch": 1.534051636511486,
+      "grad_norm": 0.1524849534034729,
+      "learning_rate": 4.663886911420726e-05,
+      "loss": 0.9989,
+      "step": 7546
+    },
+    {
+      "epoch": 1.5342549298637933,
+      "grad_norm": 0.1524866819381714,
+      "learning_rate": 4.661852944167599e-05,
+      "loss": 0.9516,
+      "step": 7547
+    },
+    {
+      "epoch": 1.5344582232161008,
+      "grad_norm": 0.1561049073934555,
+      "learning_rate": 4.659818976914472e-05,
+      "loss": 0.9629,
+      "step": 7548
+    },
+    {
+      "epoch": 1.5346615165684083,
+      "grad_norm": 0.15052708983421326,
+      "learning_rate": 4.657785009661345e-05,
+      "loss": 0.9709,
+      "step": 7549
+    },
+    {
+      "epoch": 1.5348648099207156,
+      "grad_norm": 0.16317294538021088,
+      "learning_rate": 4.655751042408217e-05,
+      "loss": 1.0431,
+      "step": 7550
+    },
+    {
+      "epoch": 1.535068103273023,
+      "grad_norm": 0.1577170193195343,
+      "learning_rate": 4.6537170751550904e-05,
+      "loss": 1.0794,
+      "step": 7551
+    },
+    {
+      "epoch": 1.5352713966253302,
+      "grad_norm": 0.16741138696670532,
+      "learning_rate": 4.651683107901963e-05,
+      "loss": 1.2215,
+      "step": 7552
+    },
+    {
+      "epoch": 1.5354746899776377,
+      "grad_norm": 0.1500609666109085,
+      "learning_rate": 4.649649140648836e-05,
+      "loss": 0.9439,
+      "step": 7553
+    },
+    {
+      "epoch": 1.5356779833299452,
+      "grad_norm": 0.15758995711803436,
+      "learning_rate": 4.6476151733957085e-05,
+      "loss": 0.8848,
+      "step": 7554
+    },
+    {
+      "epoch": 1.5358812766822525,
+      "grad_norm": 0.14967188239097595,
+      "learning_rate": 4.6455812061425816e-05,
+      "loss": 0.9408,
+      "step": 7555
+    },
+    {
+      "epoch": 1.5360845700345598,
+      "grad_norm": 0.13587024807929993,
+      "learning_rate": 4.643547238889454e-05,
+      "loss": 0.9078,
+      "step": 7556
+    },
+    {
+      "epoch": 1.5362878633868673,
+      "grad_norm": 0.1551710069179535,
+      "learning_rate": 4.641513271636327e-05,
+      "loss": 1.0213,
+      "step": 7557
+    },
+    {
+      "epoch": 1.5364911567391746,
+      "grad_norm": 0.15696901082992554,
+      "learning_rate": 4.6394793043831996e-05,
+      "loss": 0.9854,
+      "step": 7558
+    },
+    {
+      "epoch": 1.536694450091482,
+      "grad_norm": 0.14111942052841187,
+      "learning_rate": 4.637445337130073e-05,
+      "loss": 0.8104,
+      "step": 7559
+    },
+    {
+      "epoch": 1.5368977434437894,
+      "grad_norm": 0.16344057023525238,
+      "learning_rate": 4.635411369876945e-05,
+      "loss": 1.0198,
+      "step": 7560
+    },
+    {
+      "epoch": 1.5371010367960967,
+      "grad_norm": 0.15371447801589966,
+      "learning_rate": 4.633377402623818e-05,
+      "loss": 1.1088,
+      "step": 7561
+    },
+    {
+      "epoch": 1.5373043301484042,
+      "grad_norm": 0.16405069828033447,
+      "learning_rate": 4.631343435370691e-05,
+      "loss": 1.1632,
+      "step": 7562
+    },
+    {
+      "epoch": 1.5375076235007117,
+      "grad_norm": 0.17231358587741852,
+      "learning_rate": 4.629309468117564e-05,
+      "loss": 1.0729,
+      "step": 7563
+    },
+    {
+      "epoch": 1.537710916853019,
+      "grad_norm": 0.16849292814731598,
+      "learning_rate": 4.627275500864436e-05,
+      "loss": 1.1287,
+      "step": 7564
+    },
+    {
+      "epoch": 1.5379142102053263,
+      "grad_norm": 0.14124159514904022,
+      "learning_rate": 4.6252415336113094e-05,
+      "loss": 0.9726,
+      "step": 7565
+    },
+    {
+      "epoch": 1.5381175035576335,
+      "grad_norm": 0.16582997143268585,
+      "learning_rate": 4.623207566358182e-05,
+      "loss": 1.048,
+      "step": 7566
+    },
+    {
+      "epoch": 1.538320796909941,
+      "grad_norm": 0.15703178942203522,
+      "learning_rate": 4.621173599105055e-05,
+      "loss": 0.9917,
+      "step": 7567
+    },
+    {
+      "epoch": 1.5385240902622486,
+      "grad_norm": 0.1521129459142685,
+      "learning_rate": 4.6191396318519274e-05,
+      "loss": 0.9314,
+      "step": 7568
+    },
+    {
+      "epoch": 1.5387273836145559,
+      "grad_norm": 0.18239177763462067,
+      "learning_rate": 4.6171056645988006e-05,
+      "loss": 1.1309,
+      "step": 7569
+    },
+    {
+      "epoch": 1.5389306769668631,
+      "grad_norm": 0.15608282387256622,
+      "learning_rate": 4.615071697345673e-05,
+      "loss": 0.9907,
+      "step": 7570
+    },
+    {
+      "epoch": 1.5391339703191704,
+      "grad_norm": 0.14907321333885193,
+      "learning_rate": 4.613037730092546e-05,
+      "loss": 0.948,
+      "step": 7571
+    },
+    {
+      "epoch": 1.539337263671478,
+      "grad_norm": 0.15870921313762665,
+      "learning_rate": 4.6110037628394186e-05,
+      "loss": 1.0293,
+      "step": 7572
+    },
+    {
+      "epoch": 1.5395405570237854,
+      "grad_norm": 0.1471608281135559,
+      "learning_rate": 4.608969795586292e-05,
+      "loss": 0.9045,
+      "step": 7573
+    },
+    {
+      "epoch": 1.5397438503760927,
+      "grad_norm": 0.1473323255777359,
+      "learning_rate": 4.606935828333164e-05,
+      "loss": 0.9773,
+      "step": 7574
+    },
+    {
+      "epoch": 1.5399471437284,
+      "grad_norm": 0.15672756731510162,
+      "learning_rate": 4.604901861080037e-05,
+      "loss": 0.9564,
+      "step": 7575
+    },
+    {
+      "epoch": 1.5401504370807073,
+      "grad_norm": 0.13355454802513123,
+      "learning_rate": 4.60286789382691e-05,
+      "loss": 0.9043,
+      "step": 7576
+    },
+    {
+      "epoch": 1.5403537304330148,
+      "grad_norm": 0.16888266801834106,
+      "learning_rate": 4.600833926573783e-05,
+      "loss": 1.049,
+      "step": 7577
+    },
+    {
+      "epoch": 1.5405570237853223,
+      "grad_norm": 0.14586526155471802,
+      "learning_rate": 4.5987999593206546e-05,
+      "loss": 0.991,
+      "step": 7578
+    },
+    {
+      "epoch": 1.5407603171376296,
+      "grad_norm": 0.15697935223579407,
+      "learning_rate": 4.596765992067528e-05,
+      "loss": 0.9276,
+      "step": 7579
+    },
+    {
+      "epoch": 1.540963610489937,
+      "grad_norm": 0.1606079787015915,
+      "learning_rate": 4.5947320248144e-05,
+      "loss": 1.1167,
+      "step": 7580
+    },
+    {
+      "epoch": 1.5411669038422442,
+      "grad_norm": 0.14496320486068726,
+      "learning_rate": 4.592698057561273e-05,
+      "loss": 0.978,
+      "step": 7581
+    },
+    {
+      "epoch": 1.5413701971945517,
+      "grad_norm": 0.1540028601884842,
+      "learning_rate": 4.590664090308146e-05,
+      "loss": 0.9584,
+      "step": 7582
+    },
+    {
+      "epoch": 1.5415734905468592,
+      "grad_norm": 0.14273619651794434,
+      "learning_rate": 4.588630123055019e-05,
+      "loss": 0.9559,
+      "step": 7583
+    },
+    {
+      "epoch": 1.5417767838991665,
+      "grad_norm": 0.15364350378513336,
+      "learning_rate": 4.586596155801891e-05,
+      "loss": 1.067,
+      "step": 7584
+    },
+    {
+      "epoch": 1.5419800772514738,
+      "grad_norm": 0.15916843712329865,
+      "learning_rate": 4.5845621885487644e-05,
+      "loss": 0.9734,
+      "step": 7585
+    },
+    {
+      "epoch": 1.5421833706037813,
+      "grad_norm": 0.16618654131889343,
+      "learning_rate": 4.582528221295637e-05,
+      "loss": 1.0467,
+      "step": 7586
+    },
+    {
+      "epoch": 1.5423866639560886,
+      "grad_norm": 0.1487346738576889,
+      "learning_rate": 4.58049425404251e-05,
+      "loss": 0.9555,
+      "step": 7587
+    },
+    {
+      "epoch": 1.542589957308396,
+      "grad_norm": 0.1543288677930832,
+      "learning_rate": 4.5784602867893825e-05,
+      "loss": 1.0203,
+      "step": 7588
+    },
+    {
+      "epoch": 1.5427932506607034,
+      "grad_norm": 0.15385927259922028,
+      "learning_rate": 4.5764263195362556e-05,
+      "loss": 1.0728,
+      "step": 7589
+    },
+    {
+      "epoch": 1.5429965440130107,
+      "grad_norm": 0.18972186744213104,
+      "learning_rate": 4.574392352283128e-05,
+      "loss": 1.1087,
+      "step": 7590
+    },
+    {
+      "epoch": 1.5431998373653182,
+      "grad_norm": 0.17217358946800232,
+      "learning_rate": 4.572358385030001e-05,
+      "loss": 1.0384,
+      "step": 7591
+    },
+    {
+      "epoch": 1.5434031307176257,
+      "grad_norm": 0.15717031061649323,
+      "learning_rate": 4.5703244177768736e-05,
+      "loss": 0.8593,
+      "step": 7592
+    },
+    {
+      "epoch": 1.543606424069933,
+      "grad_norm": 0.17360135912895203,
+      "learning_rate": 4.568290450523747e-05,
+      "loss": 1.1914,
+      "step": 7593
+    },
+    {
+      "epoch": 1.5438097174222403,
+      "grad_norm": 0.15492455661296844,
+      "learning_rate": 4.566256483270619e-05,
+      "loss": 0.9039,
+      "step": 7594
+    },
+    {
+      "epoch": 1.5440130107745476,
+      "grad_norm": 0.15058903396129608,
+      "learning_rate": 4.564222516017492e-05,
+      "loss": 0.9224,
+      "step": 7595
+    },
+    {
+      "epoch": 1.544216304126855,
+      "grad_norm": 0.16502228379249573,
+      "learning_rate": 4.562188548764365e-05,
+      "loss": 0.9956,
+      "step": 7596
+    },
+    {
+      "epoch": 1.5444195974791626,
+      "grad_norm": 0.15759393572807312,
+      "learning_rate": 4.560154581511238e-05,
+      "loss": 1.0067,
+      "step": 7597
+    },
+    {
+      "epoch": 1.5446228908314699,
+      "grad_norm": 0.1422048658132553,
+      "learning_rate": 4.558120614258111e-05,
+      "loss": 0.9564,
+      "step": 7598
+    },
+    {
+      "epoch": 1.5448261841837772,
+      "grad_norm": 0.13447371125221252,
+      "learning_rate": 4.5560866470049834e-05,
+      "loss": 0.9567,
+      "step": 7599
+    },
+    {
+      "epoch": 1.5450294775360844,
+      "grad_norm": 0.1465720683336258,
+      "learning_rate": 4.5540526797518566e-05,
+      "loss": 0.945,
+      "step": 7600
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.261864221527556e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null