Upload checkpoint 2500

Browse files

Files changed (7) hide show

README.md +4 -4
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
-# Gradience T1 7B (Step 2400 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 48.80%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    48.8%
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 2400 out of 4918 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
+# Gradience T1 7B (Step 2500 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 50.83%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    50.8%
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 2500 out of 4918 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: 36f04eea377c705909f52258cc7b83ef5aab4a425c68a91194dbc3f69e0d5748
Pointer size: 131 Bytes
Size of remote file: 121 kB

Git LFS Details

SHA256: 03fca3f5d439c68fe899a75db58a5be74726de30dcafe1a09f3f966d4d649566
Pointer size: 131 Bytes
Size of remote file: 120 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:513c1fd3be34fa7e0c978965231189365ad72db01f79b6ab64348ee71938bf51
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ec5d1bb42b806432b0649c962fdaffdecdc557c94c8c01e010c06d455e651d3
 size 82461044

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:608fccb6c056ce88cdfd5355e6be2046f4d107a24a87c6b0d2c3b200ce6bb4ea
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4ae31f3bd6abd5e088309ad57fa2e995bc6dd61c02221bc158a3d63e6ad1f06
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e10a3584c6ffbdce35c888630a41e0ce4967d2a9c9028a7ea9acf61c5078add
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f30623027facf9b238397ebc799b819df1f5ae1bf4da593c1b0199dcd6b102f
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9758080910754219,
   "eval_steps": 500,
-  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -16808,6 +16808,706 @@
       "learning_rate": 0.00010254427030327702,
       "loss": 0.8787,
       "step": 2400
     }
   ],
   "logging_steps": 1,
@@ -16827,7 +17527,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.673519083047272e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0162634681845903,
   "eval_steps": 500,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00010254427030327702,
       "loss": 0.8787,
       "step": 2400
+    },
+    {
+      "epoch": 0.9762146777800366,
+      "grad_norm": 0.09117105603218079,
+      "learning_rate": 0.00010250356197842459,
+      "loss": 0.9053,
+      "step": 2401
+    },
+    {
+      "epoch": 0.9766212644846514,
+      "grad_norm": 0.09840644896030426,
+      "learning_rate": 0.00010246285365357216,
+      "loss": 1.0462,
+      "step": 2402
+    },
+    {
+      "epoch": 0.9770278511892662,
+      "grad_norm": 0.09379451721906662,
+      "learning_rate": 0.00010242214532871972,
+      "loss": 0.9617,
+      "step": 2403
+    },
+    {
+      "epoch": 0.9774344378938808,
+      "grad_norm": 0.09142056852579117,
+      "learning_rate": 0.00010238143700386729,
+      "loss": 1.0022,
+      "step": 2404
+    },
+    {
+      "epoch": 0.9778410245984956,
+      "grad_norm": 0.09325367957353592,
+      "learning_rate": 0.00010234072867901487,
+      "loss": 0.9356,
+      "step": 2405
+    },
+    {
+      "epoch": 0.9782476113031104,
+      "grad_norm": 0.09714538604021072,
+      "learning_rate": 0.00010230002035416244,
+      "loss": 1.0685,
+      "step": 2406
+    },
+    {
+      "epoch": 0.9786541980077251,
+      "grad_norm": 0.09502388536930084,
+      "learning_rate": 0.00010225931202931001,
+      "loss": 1.0158,
+      "step": 2407
+    },
+    {
+      "epoch": 0.9790607847123399,
+      "grad_norm": 0.09626177698373795,
+      "learning_rate": 0.00010221860370445758,
+      "loss": 1.0249,
+      "step": 2408
+    },
+    {
+      "epoch": 0.9794673714169546,
+      "grad_norm": 0.09790710359811783,
+      "learning_rate": 0.00010217789537960514,
+      "loss": 0.9974,
+      "step": 2409
+    },
+    {
+      "epoch": 0.9798739581215694,
+      "grad_norm": 0.0907469391822815,
+      "learning_rate": 0.0001021371870547527,
+      "loss": 0.994,
+      "step": 2410
+    },
+    {
+      "epoch": 0.9802805448261842,
+      "grad_norm": 0.10248905420303345,
+      "learning_rate": 0.00010209647872990026,
+      "loss": 1.0214,
+      "step": 2411
+    },
+    {
+      "epoch": 0.9806871315307989,
+      "grad_norm": 0.09504317492246628,
+      "learning_rate": 0.00010205577040504783,
+      "loss": 1.0642,
+      "step": 2412
+    },
+    {
+      "epoch": 0.9810937182354137,
+      "grad_norm": 0.09868543595075607,
+      "learning_rate": 0.0001020150620801954,
+      "loss": 1.0595,
+      "step": 2413
+    },
+    {
+      "epoch": 0.9815003049400285,
+      "grad_norm": 0.08648547530174255,
+      "learning_rate": 0.00010197435375534297,
+      "loss": 0.9273,
+      "step": 2414
+    },
+    {
+      "epoch": 0.9819068916446432,
+      "grad_norm": 0.0870203897356987,
+      "learning_rate": 0.00010193364543049054,
+      "loss": 0.8661,
+      "step": 2415
+    },
+    {
+      "epoch": 0.982313478349258,
+      "grad_norm": 0.09689280390739441,
+      "learning_rate": 0.0001018929371056381,
+      "loss": 1.0179,
+      "step": 2416
+    },
+    {
+      "epoch": 0.9827200650538728,
+      "grad_norm": 0.09497373551130295,
+      "learning_rate": 0.00010185222878078568,
+      "loss": 0.9292,
+      "step": 2417
+    },
+    {
+      "epoch": 0.9831266517584875,
+      "grad_norm": 0.09194166213274002,
+      "learning_rate": 0.00010181152045593325,
+      "loss": 0.969,
+      "step": 2418
+    },
+    {
+      "epoch": 0.9835332384631023,
+      "grad_norm": 0.08828569948673248,
+      "learning_rate": 0.00010177081213108082,
+      "loss": 0.8936,
+      "step": 2419
+    },
+    {
+      "epoch": 0.983939825167717,
+      "grad_norm": 0.095185786485672,
+      "learning_rate": 0.00010173010380622839,
+      "loss": 0.9859,
+      "step": 2420
+    },
+    {
+      "epoch": 0.9843464118723317,
+      "grad_norm": 0.09699594974517822,
+      "learning_rate": 0.00010168939548137595,
+      "loss": 1.0568,
+      "step": 2421
+    },
+    {
+      "epoch": 0.9847529985769465,
+      "grad_norm": 0.09333425760269165,
+      "learning_rate": 0.00010164868715652351,
+      "loss": 0.9503,
+      "step": 2422
+    },
+    {
+      "epoch": 0.9851595852815613,
+      "grad_norm": 0.0883539542555809,
+      "learning_rate": 0.00010160797883167108,
+      "loss": 0.9711,
+      "step": 2423
+    },
+    {
+      "epoch": 0.985566171986176,
+      "grad_norm": 0.09544458985328674,
+      "learning_rate": 0.00010156727050681864,
+      "loss": 0.8668,
+      "step": 2424
+    },
+    {
+      "epoch": 0.9859727586907908,
+      "grad_norm": 0.0979728177189827,
+      "learning_rate": 0.00010152656218196621,
+      "loss": 1.0685,
+      "step": 2425
+    },
+    {
+      "epoch": 0.9863793453954056,
+      "grad_norm": 0.08907411992549896,
+      "learning_rate": 0.00010148585385711378,
+      "loss": 0.8947,
+      "step": 2426
+    },
+    {
+      "epoch": 0.9867859321000203,
+      "grad_norm": 0.09532100707292557,
+      "learning_rate": 0.00010144514553226135,
+      "loss": 1.0793,
+      "step": 2427
+    },
+    {
+      "epoch": 0.9871925188046351,
+      "grad_norm": 0.0916009321808815,
+      "learning_rate": 0.00010140443720740893,
+      "loss": 0.9604,
+      "step": 2428
+    },
+    {
+      "epoch": 0.9875991055092499,
+      "grad_norm": 0.0960593968629837,
+      "learning_rate": 0.0001013637288825565,
+      "loss": 1.0012,
+      "step": 2429
+    },
+    {
+      "epoch": 0.9880056922138646,
+      "grad_norm": 0.0948946550488472,
+      "learning_rate": 0.00010132302055770406,
+      "loss": 0.9555,
+      "step": 2430
+    },
+    {
+      "epoch": 0.9884122789184794,
+      "grad_norm": 0.08670156449079514,
+      "learning_rate": 0.00010128231223285163,
+      "loss": 0.8863,
+      "step": 2431
+    },
+    {
+      "epoch": 0.9888188656230942,
+      "grad_norm": 0.0870981365442276,
+      "learning_rate": 0.0001012416039079992,
+      "loss": 0.949,
+      "step": 2432
+    },
+    {
+      "epoch": 0.9892254523277089,
+      "grad_norm": 0.09065506607294083,
+      "learning_rate": 0.00010120089558314677,
+      "loss": 1.0791,
+      "step": 2433
+    },
+    {
+      "epoch": 0.9896320390323237,
+      "grad_norm": 0.08753534406423569,
+      "learning_rate": 0.00010116018725829432,
+      "loss": 0.8656,
+      "step": 2434
+    },
+    {
+      "epoch": 0.9900386257369383,
+      "grad_norm": 0.08939878642559052,
+      "learning_rate": 0.00010111947893344189,
+      "loss": 0.8983,
+      "step": 2435
+    },
+    {
+      "epoch": 0.9904452124415531,
+      "grad_norm": 0.09110575914382935,
+      "learning_rate": 0.00010107877060858946,
+      "loss": 0.8971,
+      "step": 2436
+    },
+    {
+      "epoch": 0.9908517991461679,
+      "grad_norm": 0.08614566922187805,
+      "learning_rate": 0.00010103806228373702,
+      "loss": 0.9746,
+      "step": 2437
+    },
+    {
+      "epoch": 0.9912583858507826,
+      "grad_norm": 0.09685923904180527,
+      "learning_rate": 0.00010099735395888459,
+      "loss": 0.9638,
+      "step": 2438
+    },
+    {
+      "epoch": 0.9916649725553974,
+      "grad_norm": 0.10014784336090088,
+      "learning_rate": 0.00010095664563403216,
+      "loss": 1.0335,
+      "step": 2439
+    },
+    {
+      "epoch": 0.9920715592600122,
+      "grad_norm": 0.09917939454317093,
+      "learning_rate": 0.00010091593730917974,
+      "loss": 1.0288,
+      "step": 2440
+    },
+    {
+      "epoch": 0.9924781459646269,
+      "grad_norm": 0.09158805757761002,
+      "learning_rate": 0.00010087522898432731,
+      "loss": 0.9372,
+      "step": 2441
+    },
+    {
+      "epoch": 0.9928847326692417,
+      "grad_norm": 0.09151756763458252,
+      "learning_rate": 0.00010083452065947488,
+      "loss": 1.0042,
+      "step": 2442
+    },
+    {
+      "epoch": 0.9932913193738565,
+      "grad_norm": 0.09201864898204803,
+      "learning_rate": 0.00010079381233462244,
+      "loss": 0.937,
+      "step": 2443
+    },
+    {
+      "epoch": 0.9936979060784712,
+      "grad_norm": 0.10031972825527191,
+      "learning_rate": 0.00010075310400977001,
+      "loss": 0.989,
+      "step": 2444
+    },
+    {
+      "epoch": 0.994104492783086,
+      "grad_norm": 0.09593512862920761,
+      "learning_rate": 0.00010071239568491756,
+      "loss": 0.9259,
+      "step": 2445
+    },
+    {
+      "epoch": 0.9945110794877008,
+      "grad_norm": 0.10088519006967545,
+      "learning_rate": 0.00010067168736006513,
+      "loss": 1.0888,
+      "step": 2446
+    },
+    {
+      "epoch": 0.9949176661923155,
+      "grad_norm": 0.09052947163581848,
+      "learning_rate": 0.0001006309790352127,
+      "loss": 0.9643,
+      "step": 2447
+    },
+    {
+      "epoch": 0.9953242528969303,
+      "grad_norm": 0.0943833664059639,
+      "learning_rate": 0.00010059027071036027,
+      "loss": 1.0308,
+      "step": 2448
+    },
+    {
+      "epoch": 0.9957308396015451,
+      "grad_norm": 0.0929458737373352,
+      "learning_rate": 0.00010054956238550783,
+      "loss": 0.8993,
+      "step": 2449
+    },
+    {
+      "epoch": 0.9961374263061598,
+      "grad_norm": 0.09643827378749847,
+      "learning_rate": 0.0001005088540606554,
+      "loss": 0.9708,
+      "step": 2450
+    },
+    {
+      "epoch": 0.9965440130107746,
+      "grad_norm": 0.08925779908895493,
+      "learning_rate": 0.00010046814573580298,
+      "loss": 0.9209,
+      "step": 2451
+    },
+    {
+      "epoch": 0.9969505997153894,
+      "grad_norm": 0.08630047738552094,
+      "learning_rate": 0.00010042743741095055,
+      "loss": 0.9324,
+      "step": 2452
+    },
+    {
+      "epoch": 0.997357186420004,
+      "grad_norm": 0.10127938538789749,
+      "learning_rate": 0.00010038672908609812,
+      "loss": 0.9926,
+      "step": 2453
+    },
+    {
+      "epoch": 0.9977637731246188,
+      "grad_norm": 0.09573110938072205,
+      "learning_rate": 0.00010034602076124569,
+      "loss": 0.9801,
+      "step": 2454
+    },
+    {
+      "epoch": 0.9981703598292336,
+      "grad_norm": 0.0963260605931282,
+      "learning_rate": 0.00010030531243639325,
+      "loss": 0.98,
+      "step": 2455
+    },
+    {
+      "epoch": 0.9985769465338483,
+      "grad_norm": 0.08414101600646973,
+      "learning_rate": 0.00010026460411154082,
+      "loss": 0.8676,
+      "step": 2456
+    },
+    {
+      "epoch": 0.9989835332384631,
+      "grad_norm": 0.09320447593927383,
+      "learning_rate": 0.00010022389578668838,
+      "loss": 0.998,
+      "step": 2457
+    },
+    {
+      "epoch": 0.9993901199430779,
+      "grad_norm": 0.09721797704696655,
+      "learning_rate": 0.00010018318746183594,
+      "loss": 1.0123,
+      "step": 2458
+    },
+    {
+      "epoch": 0.9997967066476926,
+      "grad_norm": 0.08773447573184967,
+      "learning_rate": 0.00010014247913698351,
+      "loss": 0.9673,
+      "step": 2459
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.15718789398670197,
+      "learning_rate": 0.00010010177081213108,
+      "loss": 1.1286,
+      "step": 2460
+    },
+    {
+      "epoch": 1.0004065867046148,
+      "grad_norm": 0.09029074758291245,
+      "learning_rate": 0.00010006106248727865,
+      "loss": 0.9905,
+      "step": 2461
+    },
+    {
+      "epoch": 1.0008131734092296,
+      "grad_norm": 0.09984813630580902,
+      "learning_rate": 0.00010002035416242621,
+      "loss": 0.9981,
+      "step": 2462
+    },
+    {
+      "epoch": 1.0012197601138442,
+      "grad_norm": 0.09808840602636337,
+      "learning_rate": 9.997964583757378e-05,
+      "loss": 1.0156,
+      "step": 2463
+    },
+    {
+      "epoch": 1.001626346818459,
+      "grad_norm": 0.08917602896690369,
+      "learning_rate": 9.993893751272135e-05,
+      "loss": 0.944,
+      "step": 2464
+    },
+    {
+      "epoch": 1.0020329335230738,
+      "grad_norm": 0.0943906158208847,
+      "learning_rate": 9.989822918786892e-05,
+      "loss": 0.9294,
+      "step": 2465
+    },
+    {
+      "epoch": 1.0024395202276886,
+      "grad_norm": 0.09091315418481827,
+      "learning_rate": 9.98575208630165e-05,
+      "loss": 0.9707,
+      "step": 2466
+    },
+    {
+      "epoch": 1.0028461069323034,
+      "grad_norm": 0.09035106003284454,
+      "learning_rate": 9.981681253816407e-05,
+      "loss": 0.9562,
+      "step": 2467
+    },
+    {
+      "epoch": 1.0032526936369182,
+      "grad_norm": 0.09709779173135757,
+      "learning_rate": 9.977610421331163e-05,
+      "loss": 0.9287,
+      "step": 2468
+    },
+    {
+      "epoch": 1.0036592803415327,
+      "grad_norm": 0.09063035994768143,
+      "learning_rate": 9.973539588845919e-05,
+      "loss": 0.9138,
+      "step": 2469
+    },
+    {
+      "epoch": 1.0040658670461475,
+      "grad_norm": 0.09490003436803818,
+      "learning_rate": 9.969468756360676e-05,
+      "loss": 0.9475,
+      "step": 2470
+    },
+    {
+      "epoch": 1.0044724537507623,
+      "grad_norm": 0.10134010761976242,
+      "learning_rate": 9.965397923875432e-05,
+      "loss": 1.0092,
+      "step": 2471
+    },
+    {
+      "epoch": 1.0048790404553771,
+      "grad_norm": 0.09728873521089554,
+      "learning_rate": 9.96132709139019e-05,
+      "loss": 0.9498,
+      "step": 2472
+    },
+    {
+      "epoch": 1.005285627159992,
+      "grad_norm": 0.09160648286342621,
+      "learning_rate": 9.957256258904947e-05,
+      "loss": 0.8707,
+      "step": 2473
+    },
+    {
+      "epoch": 1.0056922138646067,
+      "grad_norm": 0.0939764603972435,
+      "learning_rate": 9.953185426419704e-05,
+      "loss": 0.9619,
+      "step": 2474
+    },
+    {
+      "epoch": 1.0060988005692213,
+      "grad_norm": 0.08643637597560883,
+      "learning_rate": 9.94911459393446e-05,
+      "loss": 0.9377,
+      "step": 2475
+    },
+    {
+      "epoch": 1.006505387273836,
+      "grad_norm": 0.09141729027032852,
+      "learning_rate": 9.945043761449216e-05,
+      "loss": 0.8859,
+      "step": 2476
+    },
+    {
+      "epoch": 1.006911973978451,
+      "grad_norm": 0.09555509686470032,
+      "learning_rate": 9.940972928963974e-05,
+      "loss": 0.933,
+      "step": 2477
+    },
+    {
+      "epoch": 1.0073185606830657,
+      "grad_norm": 0.0935022309422493,
+      "learning_rate": 9.936902096478731e-05,
+      "loss": 0.9368,
+      "step": 2478
+    },
+    {
+      "epoch": 1.0077251473876805,
+      "grad_norm": 0.09959034621715546,
+      "learning_rate": 9.932831263993488e-05,
+      "loss": 0.974,
+      "step": 2479
+    },
+    {
+      "epoch": 1.0081317340922953,
+      "grad_norm": 0.09246455878019333,
+      "learning_rate": 9.928760431508245e-05,
+      "loss": 0.9248,
+      "step": 2480
+    },
+    {
+      "epoch": 1.0085383207969099,
+      "grad_norm": 0.10091500729322433,
+      "learning_rate": 9.924689599023e-05,
+      "loss": 1.122,
+      "step": 2481
+    },
+    {
+      "epoch": 1.0089449075015247,
+      "grad_norm": 0.10083048790693283,
+      "learning_rate": 9.920618766537757e-05,
+      "loss": 1.0199,
+      "step": 2482
+    },
+    {
+      "epoch": 1.0093514942061395,
+      "grad_norm": 0.09641805291175842,
+      "learning_rate": 9.916547934052515e-05,
+      "loss": 0.9971,
+      "step": 2483
+    },
+    {
+      "epoch": 1.0097580809107543,
+      "grad_norm": 0.10362432897090912,
+      "learning_rate": 9.912477101567272e-05,
+      "loss": 0.9596,
+      "step": 2484
+    },
+    {
+      "epoch": 1.010164667615369,
+      "grad_norm": 0.09050238877534866,
+      "learning_rate": 9.908406269082028e-05,
+      "loss": 0.9423,
+      "step": 2485
+    },
+    {
+      "epoch": 1.0105712543199838,
+      "grad_norm": 0.10209590941667557,
+      "learning_rate": 9.904335436596785e-05,
+      "loss": 0.9366,
+      "step": 2486
+    },
+    {
+      "epoch": 1.0109778410245984,
+      "grad_norm": 0.104631707072258,
+      "learning_rate": 9.90026460411154e-05,
+      "loss": 1.0476,
+      "step": 2487
+    },
+    {
+      "epoch": 1.0113844277292132,
+      "grad_norm": 0.09572993963956833,
+      "learning_rate": 9.896193771626297e-05,
+      "loss": 1.0523,
+      "step": 2488
+    },
+    {
+      "epoch": 1.011791014433828,
+      "grad_norm": 0.10640837252140045,
+      "learning_rate": 9.892122939141055e-05,
+      "loss": 1.1238,
+      "step": 2489
+    },
+    {
+      "epoch": 1.0121976011384428,
+      "grad_norm": 0.09798834472894669,
+      "learning_rate": 9.888052106655812e-05,
+      "loss": 0.9597,
+      "step": 2490
+    },
+    {
+      "epoch": 1.0126041878430576,
+      "grad_norm": 0.08913593739271164,
+      "learning_rate": 9.883981274170569e-05,
+      "loss": 0.9258,
+      "step": 2491
+    },
+    {
+      "epoch": 1.0130107745476722,
+      "grad_norm": 0.09719277173280716,
+      "learning_rate": 9.879910441685324e-05,
+      "loss": 0.9812,
+      "step": 2492
+    },
+    {
+      "epoch": 1.013417361252287,
+      "grad_norm": 0.09699688851833344,
+      "learning_rate": 9.875839609200081e-05,
+      "loss": 0.8946,
+      "step": 2493
+    },
+    {
+      "epoch": 1.0138239479569018,
+      "grad_norm": 0.09061427414417267,
+      "learning_rate": 9.871768776714838e-05,
+      "loss": 0.9075,
+      "step": 2494
+    },
+    {
+      "epoch": 1.0142305346615166,
+      "grad_norm": 0.08979996293783188,
+      "learning_rate": 9.867697944229596e-05,
+      "loss": 0.933,
+      "step": 2495
+    },
+    {
+      "epoch": 1.0146371213661314,
+      "grad_norm": 0.09325064718723297,
+      "learning_rate": 9.863627111744353e-05,
+      "loss": 0.9604,
+      "step": 2496
+    },
+    {
+      "epoch": 1.0150437080707462,
+      "grad_norm": 0.09821408241987228,
+      "learning_rate": 9.85955627925911e-05,
+      "loss": 1.0871,
+      "step": 2497
+    },
+    {
+      "epoch": 1.0154502947753608,
+      "grad_norm": 0.09746625274419785,
+      "learning_rate": 9.855485446773865e-05,
+      "loss": 0.9304,
+      "step": 2498
+    },
+    {
+      "epoch": 1.0158568814799755,
+      "grad_norm": 0.09508597105741501,
+      "learning_rate": 9.851414614288622e-05,
+      "loss": 0.9469,
+      "step": 2499
+    },
+    {
+      "epoch": 1.0162634681845903,
+      "grad_norm": 0.10357919335365295,
+      "learning_rate": 9.84734378180338e-05,
+      "loss": 1.0272,
+      "step": 2500
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.991705590388761e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null