Upload checkpoint 600

Browse files

Files changed (6) hide show

README.md +5 -28
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
-# Gradience T1 7B (Step 500 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -11,38 +11,15 @@ library_name: peft
 <html lang="en">
 <head>
     <meta charset="UTF-8">
-    <title>Progress Bar Example</title>
-    <style>
-        .progress-container {
-            width: 100%;
-            background-color: #e0e0e0;
-            border-radius: 25px;
-            overflow: hidden;
-            margin: 20px 0;
-        }
-        .progress-bar {
-            height: 30px;
-            width: 0;
-            background-color: #44965a;
-            text-align: center;
-            line-height: 30px;
-            color: white;
-            border-radius: 25px 0 0 25px;
-        }
-        .progress-text {
-            margin-top: 10px;
-            font-size: 16px;
-            font-family: Arial, sans-serif;
-        }
-    </style>
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 10.17%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    <!-- 10.17% -->
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 500 out of 4918 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
+# Gradience T1 7B (Step 600 Checkpoint)
 > [!NOTE]
 > Training in progress...
 <html lang="en">
 <head>
     <meta charset="UTF-8">
+    <title>Progress Bar</title>
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 12.20%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    <!-- 12.20% -->
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 600 out of 4918 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: 37445386f1b6c39db8d196daef02425fcaf1b834fccea3e0507b3c9f5c7ccba1
Pointer size: 131 Bytes
Size of remote file: 172 kB

Git LFS Details

SHA256: 696ed8c45d9aa3c00fcd86909bdb3ff567e3bd10bee05a6c658a2d08c670ea7a
Pointer size: 131 Bytes
Size of remote file: 172 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93e2360a952b23953330ac855c3123cb594108db5b2578b0a23ede55ab321afa
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:26571ef1b19c681cb39c0a2f1990922bf6b675e94079e41f21dd9577fef323d1
 size 82461044

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fe90944217b87f2db5382971d3c067633cfbdffc5e253607df747929a34a722
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cad174af49d220e06a73da64e877b11980706b182f379142714c6fa5747b447
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.20329335230737955,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3508,6 +3508,706 @@
       "learning_rate": 0.00017989008752289845,
       "loss": 1.085,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3527,7 +4227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.5824065174102671e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.24395202276885547,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00017989008752289845,
       "loss": 1.085,
       "step": 500
+    },
+    {
+      "epoch": 0.20369993901199432,
+      "grad_norm": 0.08499190211296082,
+      "learning_rate": 0.000179849379198046,
+      "loss": 0.9235,
+      "step": 501
+    },
+    {
+      "epoch": 0.20410652571660906,
+      "grad_norm": 0.09169955551624298,
+      "learning_rate": 0.00017980867087319358,
+      "loss": 0.9836,
+      "step": 502
+    },
+    {
+      "epoch": 0.20451311242122383,
+      "grad_norm": 0.10331466048955917,
+      "learning_rate": 0.00017976796254834114,
+      "loss": 1.0255,
+      "step": 503
+    },
+    {
+      "epoch": 0.20491969912583857,
+      "grad_norm": 0.0900363028049469,
+      "learning_rate": 0.00017972725422348872,
+      "loss": 0.9691,
+      "step": 504
+    },
+    {
+      "epoch": 0.20532628583045334,
+      "grad_norm": 0.10095544904470444,
+      "learning_rate": 0.00017968654589863627,
+      "loss": 1.0289,
+      "step": 505
+    },
+    {
+      "epoch": 0.2057328725350681,
+      "grad_norm": 0.0992627814412117,
+      "learning_rate": 0.00017964583757378383,
+      "loss": 0.9785,
+      "step": 506
+    },
+    {
+      "epoch": 0.20613945923968285,
+      "grad_norm": 0.0954422652721405,
+      "learning_rate": 0.00017960512924893144,
+      "loss": 1.0105,
+      "step": 507
+    },
+    {
+      "epoch": 0.20654604594429762,
+      "grad_norm": 0.0994410440325737,
+      "learning_rate": 0.000179564420924079,
+      "loss": 1.0894,
+      "step": 508
+    },
+    {
+      "epoch": 0.2069526326489124,
+      "grad_norm": 0.08866444230079651,
+      "learning_rate": 0.00017952371259922654,
+      "loss": 0.9725,
+      "step": 509
+    },
+    {
+      "epoch": 0.20735921935352714,
+      "grad_norm": 0.09361348301172256,
+      "learning_rate": 0.00017948300427437412,
+      "loss": 1.0441,
+      "step": 510
+    },
+    {
+      "epoch": 0.2077658060581419,
+      "grad_norm": 0.08215323090553284,
+      "learning_rate": 0.00017944229594952168,
+      "loss": 0.9214,
+      "step": 511
+    },
+    {
+      "epoch": 0.20817239276275665,
+      "grad_norm": 0.09752262383699417,
+      "learning_rate": 0.00017940158762466926,
+      "loss": 0.9456,
+      "step": 512
+    },
+    {
+      "epoch": 0.20857897946737142,
+      "grad_norm": 0.10021419823169708,
+      "learning_rate": 0.00017936087929981681,
+      "loss": 1.1158,
+      "step": 513
+    },
+    {
+      "epoch": 0.2089855661719862,
+      "grad_norm": 0.09550227969884872,
+      "learning_rate": 0.0001793201709749644,
+      "loss": 0.9789,
+      "step": 514
+    },
+    {
+      "epoch": 0.20939215287660093,
+      "grad_norm": 0.09059977531433105,
+      "learning_rate": 0.00017927946265011195,
+      "loss": 0.9649,
+      "step": 515
+    },
+    {
+      "epoch": 0.2097987395812157,
+      "grad_norm": 0.09227627515792847,
+      "learning_rate": 0.00017923875432525953,
+      "loss": 0.9779,
+      "step": 516
+    },
+    {
+      "epoch": 0.21020532628583044,
+      "grad_norm": 0.09919798374176025,
+      "learning_rate": 0.00017919804600040708,
+      "loss": 1.0155,
+      "step": 517
+    },
+    {
+      "epoch": 0.2106119129904452,
+      "grad_norm": 0.09044051915407181,
+      "learning_rate": 0.00017915733767555464,
+      "loss": 0.9428,
+      "step": 518
+    },
+    {
+      "epoch": 0.21101849969505998,
+      "grad_norm": 0.09017504006624222,
+      "learning_rate": 0.00017911662935070225,
+      "loss": 0.9244,
+      "step": 519
+    },
+    {
+      "epoch": 0.21142508639967472,
+      "grad_norm": 0.09257036447525024,
+      "learning_rate": 0.0001790759210258498,
+      "loss": 1.0168,
+      "step": 520
+    },
+    {
+      "epoch": 0.2118316731042895,
+      "grad_norm": 0.0926235020160675,
+      "learning_rate": 0.00017903521270099735,
+      "loss": 0.9363,
+      "step": 521
+    },
+    {
+      "epoch": 0.21223825980890426,
+      "grad_norm": 0.08785069733858109,
+      "learning_rate": 0.00017899450437614494,
+      "loss": 0.9428,
+      "step": 522
+    },
+    {
+      "epoch": 0.212644846513519,
+      "grad_norm": 0.09824348986148834,
+      "learning_rate": 0.0001789537960512925,
+      "loss": 1.0378,
+      "step": 523
+    },
+    {
+      "epoch": 0.21305143321813377,
+      "grad_norm": 0.0915142148733139,
+      "learning_rate": 0.00017891308772644007,
+      "loss": 0.9603,
+      "step": 524
+    },
+    {
+      "epoch": 0.21345801992274852,
+      "grad_norm": 0.09466978907585144,
+      "learning_rate": 0.00017887237940158763,
+      "loss": 1.013,
+      "step": 525
+    },
+    {
+      "epoch": 0.21386460662736329,
+      "grad_norm": 0.09305880963802338,
+      "learning_rate": 0.0001788316710767352,
+      "loss": 0.9386,
+      "step": 526
+    },
+    {
+      "epoch": 0.21427119333197805,
+      "grad_norm": 0.09210691601037979,
+      "learning_rate": 0.00017879096275188276,
+      "loss": 0.9797,
+      "step": 527
+    },
+    {
+      "epoch": 0.2146777800365928,
+      "grad_norm": 0.10415366291999817,
+      "learning_rate": 0.00017875025442703031,
+      "loss": 1.0125,
+      "step": 528
+    },
+    {
+      "epoch": 0.21508436674120757,
+      "grad_norm": 0.10259640216827393,
+      "learning_rate": 0.0001787095461021779,
+      "loss": 1.0473,
+      "step": 529
+    },
+    {
+      "epoch": 0.2154909534458223,
+      "grad_norm": 0.09523239731788635,
+      "learning_rate": 0.00017866883777732548,
+      "loss": 0.9603,
+      "step": 530
+    },
+    {
+      "epoch": 0.21589754015043708,
+      "grad_norm": 0.10005185008049011,
+      "learning_rate": 0.00017862812945247306,
+      "loss": 1.0768,
+      "step": 531
+    },
+    {
+      "epoch": 0.21630412685505185,
+      "grad_norm": 0.09643250703811646,
+      "learning_rate": 0.0001785874211276206,
+      "loss": 1.0799,
+      "step": 532
+    },
+    {
+      "epoch": 0.2167107135596666,
+      "grad_norm": 0.09473159909248352,
+      "learning_rate": 0.00017854671280276817,
+      "loss": 1.0657,
+      "step": 533
+    },
+    {
+      "epoch": 0.21711730026428136,
+      "grad_norm": 0.09550385922193527,
+      "learning_rate": 0.00017850600447791575,
+      "loss": 1.0389,
+      "step": 534
+    },
+    {
+      "epoch": 0.21752388696889613,
+      "grad_norm": 0.09414463490247726,
+      "learning_rate": 0.0001784652961530633,
+      "loss": 1.0317,
+      "step": 535
+    },
+    {
+      "epoch": 0.21793047367351087,
+      "grad_norm": 0.090250164270401,
+      "learning_rate": 0.00017842458782821088,
+      "loss": 1.0212,
+      "step": 536
+    },
+    {
+      "epoch": 0.21833706037812564,
+      "grad_norm": 0.09635050594806671,
+      "learning_rate": 0.00017838387950335844,
+      "loss": 0.9473,
+      "step": 537
+    },
+    {
+      "epoch": 0.21874364708274038,
+      "grad_norm": 0.0985347330570221,
+      "learning_rate": 0.00017834317117850602,
+      "loss": 1.1372,
+      "step": 538
+    },
+    {
+      "epoch": 0.21915023378735515,
+      "grad_norm": 0.09789203107357025,
+      "learning_rate": 0.00017830246285365357,
+      "loss": 1.0369,
+      "step": 539
+    },
+    {
+      "epoch": 0.21955682049196992,
+      "grad_norm": 0.09777568280696869,
+      "learning_rate": 0.00017826175452880113,
+      "loss": 1.0746,
+      "step": 540
+    },
+    {
+      "epoch": 0.21996340719658466,
+      "grad_norm": 0.09013503789901733,
+      "learning_rate": 0.0001782210462039487,
+      "loss": 1.0124,
+      "step": 541
+    },
+    {
+      "epoch": 0.22036999390119943,
+      "grad_norm": 0.10604355484247208,
+      "learning_rate": 0.0001781803378790963,
+      "loss": 1.0158,
+      "step": 542
+    },
+    {
+      "epoch": 0.22077658060581418,
+      "grad_norm": 0.09194648265838623,
+      "learning_rate": 0.00017813962955424387,
+      "loss": 0.9544,
+      "step": 543
+    },
+    {
+      "epoch": 0.22118316731042895,
+      "grad_norm": 0.09223110228776932,
+      "learning_rate": 0.00017809892122939142,
+      "loss": 1.0094,
+      "step": 544
+    },
+    {
+      "epoch": 0.22158975401504372,
+      "grad_norm": 0.09049870073795319,
+      "learning_rate": 0.00017805821290453898,
+      "loss": 0.8829,
+      "step": 545
+    },
+    {
+      "epoch": 0.22199634071965846,
+      "grad_norm": 0.10157813131809235,
+      "learning_rate": 0.00017801750457968656,
+      "loss": 1.0904,
+      "step": 546
+    },
+    {
+      "epoch": 0.22240292742427323,
+      "grad_norm": 0.09934356063604355,
+      "learning_rate": 0.0001779767962548341,
+      "loss": 1.0708,
+      "step": 547
+    },
+    {
+      "epoch": 0.222809514128888,
+      "grad_norm": 0.09037156403064728,
+      "learning_rate": 0.0001779360879299817,
+      "loss": 0.916,
+      "step": 548
+    },
+    {
+      "epoch": 0.22321610083350274,
+      "grad_norm": 0.09347829967737198,
+      "learning_rate": 0.00017789537960512925,
+      "loss": 1.0328,
+      "step": 549
+    },
+    {
+      "epoch": 0.2236226875381175,
+      "grad_norm": 0.087796151638031,
+      "learning_rate": 0.00017785467128027683,
+      "loss": 0.9961,
+      "step": 550
+    },
+    {
+      "epoch": 0.22402927424273225,
+      "grad_norm": 0.09518422931432724,
+      "learning_rate": 0.00017781396295542438,
+      "loss": 0.9855,
+      "step": 551
+    },
+    {
+      "epoch": 0.22443586094734702,
+      "grad_norm": 0.09606748074293137,
+      "learning_rate": 0.00017777325463057194,
+      "loss": 0.954,
+      "step": 552
+    },
+    {
+      "epoch": 0.2248424476519618,
+      "grad_norm": 0.09338165074586868,
+      "learning_rate": 0.00017773254630571955,
+      "loss": 1.0876,
+      "step": 553
+    },
+    {
+      "epoch": 0.22524903435657653,
+      "grad_norm": 0.09242440015077591,
+      "learning_rate": 0.0001776918379808671,
+      "loss": 0.9418,
+      "step": 554
+    },
+    {
+      "epoch": 0.2256556210611913,
+      "grad_norm": 0.0990302637219429,
+      "learning_rate": 0.00017765112965601468,
+      "loss": 1.0641,
+      "step": 555
+    },
+    {
+      "epoch": 0.22606220776580604,
+      "grad_norm": 0.09444238990545273,
+      "learning_rate": 0.00017761042133116224,
+      "loss": 1.0315,
+      "step": 556
+    },
+    {
+      "epoch": 0.22646879447042081,
+      "grad_norm": 0.08771083503961563,
+      "learning_rate": 0.0001775697130063098,
+      "loss": 0.9898,
+      "step": 557
+    },
+    {
+      "epoch": 0.22687538117503558,
+      "grad_norm": 0.10041147470474243,
+      "learning_rate": 0.00017752900468145737,
+      "loss": 1.0478,
+      "step": 558
+    },
+    {
+      "epoch": 0.22728196787965033,
+      "grad_norm": 0.0933571383357048,
+      "learning_rate": 0.00017748829635660492,
+      "loss": 1.0002,
+      "step": 559
+    },
+    {
+      "epoch": 0.2276885545842651,
+      "grad_norm": 0.0912991389632225,
+      "learning_rate": 0.0001774475880317525,
+      "loss": 1.0807,
+      "step": 560
+    },
+    {
+      "epoch": 0.22809514128887987,
+      "grad_norm": 0.09350984543561935,
+      "learning_rate": 0.00017740687970690006,
+      "loss": 0.8962,
+      "step": 561
+    },
+    {
+      "epoch": 0.2285017279934946,
+      "grad_norm": 0.0978541299700737,
+      "learning_rate": 0.00017736617138204764,
+      "loss": 1.0339,
+      "step": 562
+    },
+    {
+      "epoch": 0.22890831469810938,
+      "grad_norm": 0.08964958041906357,
+      "learning_rate": 0.0001773254630571952,
+      "loss": 1.051,
+      "step": 563
+    },
+    {
+      "epoch": 0.22931490140272412,
+      "grad_norm": 0.09241898357868195,
+      "learning_rate": 0.00017728475473234275,
+      "loss": 0.903,
+      "step": 564
+    },
+    {
+      "epoch": 0.2297214881073389,
+      "grad_norm": 0.09366483986377716,
+      "learning_rate": 0.00017724404640749036,
+      "loss": 1.0055,
+      "step": 565
+    },
+    {
+      "epoch": 0.23012807481195366,
+      "grad_norm": 0.10184673964977264,
+      "learning_rate": 0.0001772033380826379,
+      "loss": 1.004,
+      "step": 566
+    },
+    {
+      "epoch": 0.2305346615165684,
+      "grad_norm": 0.09287306666374207,
+      "learning_rate": 0.0001771626297577855,
+      "loss": 0.9667,
+      "step": 567
+    },
+    {
+      "epoch": 0.23094124822118317,
+      "grad_norm": 0.08905091136693954,
+      "learning_rate": 0.00017712192143293305,
+      "loss": 0.9295,
+      "step": 568
+    },
+    {
+      "epoch": 0.2313478349257979,
+      "grad_norm": 0.0908786877989769,
+      "learning_rate": 0.0001770812131080806,
+      "loss": 0.8957,
+      "step": 569
+    },
+    {
+      "epoch": 0.23175442163041268,
+      "grad_norm": 0.10284281522035599,
+      "learning_rate": 0.00017704050478322818,
+      "loss": 1.1311,
+      "step": 570
+    },
+    {
+      "epoch": 0.23216100833502745,
+      "grad_norm": 0.09007006883621216,
+      "learning_rate": 0.00017699979645837574,
+      "loss": 0.9919,
+      "step": 571
+    },
+    {
+      "epoch": 0.2325675950396422,
+      "grad_norm": 0.09025272727012634,
+      "learning_rate": 0.00017695908813352332,
+      "loss": 0.9057,
+      "step": 572
+    },
+    {
+      "epoch": 0.23297418174425696,
+      "grad_norm": 0.0994710698723793,
+      "learning_rate": 0.00017691837980867087,
+      "loss": 1.1472,
+      "step": 573
+    },
+    {
+      "epoch": 0.23338076844887173,
+      "grad_norm": 0.09117428958415985,
+      "learning_rate": 0.00017687767148381845,
+      "loss": 0.9665,
+      "step": 574
+    },
+    {
+      "epoch": 0.23378735515348648,
+      "grad_norm": 0.0893009826540947,
+      "learning_rate": 0.000176836963158966,
+      "loss": 0.951,
+      "step": 575
+    },
+    {
+      "epoch": 0.23419394185810125,
+      "grad_norm": 0.08649599552154541,
+      "learning_rate": 0.0001767962548341136,
+      "loss": 0.925,
+      "step": 576
+    },
+    {
+      "epoch": 0.234600528562716,
+      "grad_norm": 0.0928448736667633,
+      "learning_rate": 0.00017675554650926117,
+      "loss": 0.9253,
+      "step": 577
+    },
+    {
+      "epoch": 0.23500711526733076,
+      "grad_norm": 0.10335158556699753,
+      "learning_rate": 0.00017671483818440872,
+      "loss": 1.1171,
+      "step": 578
+    },
+    {
+      "epoch": 0.23541370197194553,
+      "grad_norm": 0.09889842569828033,
+      "learning_rate": 0.0001766741298595563,
+      "loss": 1.0005,
+      "step": 579
+    },
+    {
+      "epoch": 0.23582028867656027,
+      "grad_norm": 0.09655506163835526,
+      "learning_rate": 0.00017663342153470386,
+      "loss": 1.0273,
+      "step": 580
+    },
+    {
+      "epoch": 0.23622687538117504,
+      "grad_norm": 0.09516560286283493,
+      "learning_rate": 0.0001765927132098514,
+      "loss": 1.024,
+      "step": 581
+    },
+    {
+      "epoch": 0.23663346208578978,
+      "grad_norm": 0.10024843364953995,
+      "learning_rate": 0.000176552004884999,
+      "loss": 1.0299,
+      "step": 582
+    },
+    {
+      "epoch": 0.23704004879040455,
+      "grad_norm": 0.10152596235275269,
+      "learning_rate": 0.00017651129656014655,
+      "loss": 0.9658,
+      "step": 583
+    },
+    {
+      "epoch": 0.23744663549501932,
+      "grad_norm": 0.09654249995946884,
+      "learning_rate": 0.00017647058823529413,
+      "loss": 1.0722,
+      "step": 584
+    },
+    {
+      "epoch": 0.23785322219963406,
+      "grad_norm": 0.09112072736024857,
+      "learning_rate": 0.00017642987991044168,
+      "loss": 0.9846,
+      "step": 585
+    },
+    {
+      "epoch": 0.23825980890424883,
+      "grad_norm": 0.09640034288167953,
+      "learning_rate": 0.00017638917158558926,
+      "loss": 1.0501,
+      "step": 586
+    },
+    {
+      "epoch": 0.2386663956088636,
+      "grad_norm": 0.09564584493637085,
+      "learning_rate": 0.00017634846326073682,
+      "loss": 0.955,
+      "step": 587
+    },
+    {
+      "epoch": 0.23907298231347834,
+      "grad_norm": 0.10815359652042389,
+      "learning_rate": 0.0001763077549358844,
+      "loss": 1.203,
+      "step": 588
+    },
+    {
+      "epoch": 0.2394795690180931,
+      "grad_norm": 0.09078256040811539,
+      "learning_rate": 0.00017626704661103198,
+      "loss": 0.9881,
+      "step": 589
+    },
+    {
+      "epoch": 0.23988615572270786,
+      "grad_norm": 0.09075487405061722,
+      "learning_rate": 0.00017622633828617954,
+      "loss": 0.984,
+      "step": 590
+    },
+    {
+      "epoch": 0.24029274242732263,
+      "grad_norm": 0.09048381447792053,
+      "learning_rate": 0.00017618562996132712,
+      "loss": 1.0235,
+      "step": 591
+    },
+    {
+      "epoch": 0.2406993291319374,
+      "grad_norm": 0.09820905327796936,
+      "learning_rate": 0.00017614492163647467,
+      "loss": 0.9763,
+      "step": 592
+    },
+    {
+      "epoch": 0.24110591583655214,
+      "grad_norm": 0.0961097925901413,
+      "learning_rate": 0.00017610421331162222,
+      "loss": 1.1035,
+      "step": 593
+    },
+    {
+      "epoch": 0.2415125025411669,
+      "grad_norm": 0.0877358540892601,
+      "learning_rate": 0.0001760635049867698,
+      "loss": 0.8962,
+      "step": 594
+    },
+    {
+      "epoch": 0.24191908924578168,
+      "grad_norm": 0.09730017930269241,
+      "learning_rate": 0.00017602279666191736,
+      "loss": 1.1232,
+      "step": 595
+    },
+    {
+      "epoch": 0.24232567595039642,
+      "grad_norm": 0.09486240148544312,
+      "learning_rate": 0.00017598208833706494,
+      "loss": 1.0566,
+      "step": 596
+    },
+    {
+      "epoch": 0.2427322626550112,
+      "grad_norm": 0.09367606788873672,
+      "learning_rate": 0.0001759413800122125,
+      "loss": 0.9934,
+      "step": 597
+    },
+    {
+      "epoch": 0.24313884935962593,
+      "grad_norm": 0.09046703577041626,
+      "learning_rate": 0.00017590067168736008,
+      "loss": 0.9137,
+      "step": 598
+    },
+    {
+      "epoch": 0.2435454360642407,
+      "grad_norm": 0.09512536972761154,
+      "learning_rate": 0.00017585996336250766,
+      "loss": 0.9733,
+      "step": 599
+    },
+    {
+      "epoch": 0.24395202276885547,
+      "grad_norm": 0.08619649708271027,
+      "learning_rate": 0.0001758192550376552,
+      "loss": 0.8777,
+      "step": 600
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.906257354398122e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null