Upload checkpoint 1900

Browse files

Files changed (6) hide show

README.md +4 -4
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
-# Gradience T1 7B (Step 1800 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 36.60%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    36.6%
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 1800 out of 4918 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
+# Gradience T1 7B (Step 1900 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 38.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    38.6%
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 1900 out of 4918 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: 06dc6ac087be67766cde1c74f8ff74c5e89a2984533f082b9b9365e2ef9fdf9a
Pointer size: 130 Bytes
Size of remote file: 87.3 kB

Git LFS Details

SHA256: f2345eb8a5b0a38e34b7cee23371d45337ca4014475418d77535ee39c4a087cb
Pointer size: 131 Bytes
Size of remote file: 130 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea38d3a2395e7d462eb9cacb0d3f3fc856bcea16870286d46e7229a9d7f20632
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:aed3b5ff30e3e07ae751c0198bf9476de1a89a755bb4dc5a8149b83298170716
 size 82461044

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecd022be4c8582d05a53dd3cc5229272f1c0cf1b9bc4803f5070112cb2fa2c34
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec8ba207deb0762ece0e7f805dcaf1e7c6d96da2ae9e39aab68cb2700888f6ea
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7318560683065664,
   "eval_steps": 500,
-  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12608,6 +12608,706 @@
       "learning_rate": 0.00012696926521473643,
       "loss": 0.9162,
       "step": 1800
     }
   ],
   "logging_steps": 1,
@@ -12627,7 +13327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.773449995988025e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7725147387680423,
   "eval_steps": 500,
+  "global_step": 1900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00012696926521473643,
       "loss": 0.9162,
       "step": 1800
+    },
+    {
+      "epoch": 0.7322626550111812,
+      "grad_norm": 0.09074793756008148,
+      "learning_rate": 0.00012692855688988398,
+      "loss": 0.9388,
+      "step": 1801
+    },
+    {
+      "epoch": 0.7326692417157958,
+      "grad_norm": 0.10199327766895294,
+      "learning_rate": 0.00012688784856503156,
+      "loss": 0.9585,
+      "step": 1802
+    },
+    {
+      "epoch": 0.7330758284204106,
+      "grad_norm": 0.10722784698009491,
+      "learning_rate": 0.00012684714024017912,
+      "loss": 1.0226,
+      "step": 1803
+    },
+    {
+      "epoch": 0.7334824151250254,
+      "grad_norm": 0.10113389045000076,
+      "learning_rate": 0.0001268064319153267,
+      "loss": 1.0593,
+      "step": 1804
+    },
+    {
+      "epoch": 0.7338890018296401,
+      "grad_norm": 0.1125817522406578,
+      "learning_rate": 0.00012676572359047425,
+      "loss": 0.8962,
+      "step": 1805
+    },
+    {
+      "epoch": 0.7342955885342549,
+      "grad_norm": 0.10177897661924362,
+      "learning_rate": 0.0001267250152656218,
+      "loss": 1.0323,
+      "step": 1806
+    },
+    {
+      "epoch": 0.7347021752388697,
+      "grad_norm": 0.10272479057312012,
+      "learning_rate": 0.00012668430694076941,
+      "loss": 0.9947,
+      "step": 1807
+    },
+    {
+      "epoch": 0.7351087619434844,
+      "grad_norm": 0.11395642906427383,
+      "learning_rate": 0.00012664359861591697,
+      "loss": 1.0144,
+      "step": 1808
+    },
+    {
+      "epoch": 0.7355153486480992,
+      "grad_norm": 0.09565427899360657,
+      "learning_rate": 0.00012660289029106452,
+      "loss": 1.0052,
+      "step": 1809
+    },
+    {
+      "epoch": 0.735921935352714,
+      "grad_norm": 0.09244798123836517,
+      "learning_rate": 0.0001265621819662121,
+      "loss": 0.8411,
+      "step": 1810
+    },
+    {
+      "epoch": 0.7363285220573287,
+      "grad_norm": 0.08985315263271332,
+      "learning_rate": 0.00012652147364135966,
+      "loss": 1.0301,
+      "step": 1811
+    },
+    {
+      "epoch": 0.7367351087619435,
+      "grad_norm": 0.09606938809156418,
+      "learning_rate": 0.00012648076531650724,
+      "loss": 1.0053,
+      "step": 1812
+    },
+    {
+      "epoch": 0.7371416954665583,
+      "grad_norm": 0.10566183179616928,
+      "learning_rate": 0.0001264400569916548,
+      "loss": 0.9527,
+      "step": 1813
+    },
+    {
+      "epoch": 0.737548282171173,
+      "grad_norm": 0.10999652743339539,
+      "learning_rate": 0.00012639934866680237,
+      "loss": 1.0756,
+      "step": 1814
+    },
+    {
+      "epoch": 0.7379548688757878,
+      "grad_norm": 0.09473931044340134,
+      "learning_rate": 0.00012635864034194993,
+      "loss": 0.94,
+      "step": 1815
+    },
+    {
+      "epoch": 0.7383614555804026,
+      "grad_norm": 0.09815262258052826,
+      "learning_rate": 0.0001263179320170975,
+      "loss": 1.0436,
+      "step": 1816
+    },
+    {
+      "epoch": 0.7387680422850172,
+      "grad_norm": 0.08889912813901901,
+      "learning_rate": 0.00012627722369224506,
+      "loss": 0.9368,
+      "step": 1817
+    },
+    {
+      "epoch": 0.739174628989632,
+      "grad_norm": 0.09337257593870163,
+      "learning_rate": 0.00012623651536739262,
+      "loss": 1.0949,
+      "step": 1818
+    },
+    {
+      "epoch": 0.7395812156942468,
+      "grad_norm": 0.09112720191478729,
+      "learning_rate": 0.00012619580704254023,
+      "loss": 1.0239,
+      "step": 1819
+    },
+    {
+      "epoch": 0.7399878023988615,
+      "grad_norm": 0.0988708958029747,
+      "learning_rate": 0.00012615509871768778,
+      "loss": 1.0648,
+      "step": 1820
+    },
+    {
+      "epoch": 0.7403943891034763,
+      "grad_norm": 0.09849932789802551,
+      "learning_rate": 0.00012611439039283533,
+      "loss": 0.9867,
+      "step": 1821
+    },
+    {
+      "epoch": 0.7408009758080911,
+      "grad_norm": 0.09254156798124313,
+      "learning_rate": 0.00012607368206798291,
+      "loss": 0.9903,
+      "step": 1822
+    },
+    {
+      "epoch": 0.7412075625127058,
+      "grad_norm": 0.0954776182770729,
+      "learning_rate": 0.00012603297374313047,
+      "loss": 1.0081,
+      "step": 1823
+    },
+    {
+      "epoch": 0.7416141492173206,
+      "grad_norm": 0.08610807359218597,
+      "learning_rate": 0.00012599226541827805,
+      "loss": 0.9229,
+      "step": 1824
+    },
+    {
+      "epoch": 0.7420207359219354,
+      "grad_norm": 0.0977591797709465,
+      "learning_rate": 0.0001259515570934256,
+      "loss": 0.9076,
+      "step": 1825
+    },
+    {
+      "epoch": 0.7424273226265501,
+      "grad_norm": 0.0858481377363205,
+      "learning_rate": 0.00012591084876857319,
+      "loss": 0.8604,
+      "step": 1826
+    },
+    {
+      "epoch": 0.7428339093311649,
+      "grad_norm": 0.09642601758241653,
+      "learning_rate": 0.00012587014044372074,
+      "loss": 1.0476,
+      "step": 1827
+    },
+    {
+      "epoch": 0.7432404960357797,
+      "grad_norm": 0.08871784061193466,
+      "learning_rate": 0.0001258294321188683,
+      "loss": 0.9597,
+      "step": 1828
+    },
+    {
+      "epoch": 0.7436470827403944,
+      "grad_norm": 0.10808097571134567,
+      "learning_rate": 0.00012578872379401587,
+      "loss": 1.1415,
+      "step": 1829
+    },
+    {
+      "epoch": 0.7440536694450092,
+      "grad_norm": 0.09339917451143265,
+      "learning_rate": 0.00012574801546916346,
+      "loss": 0.9437,
+      "step": 1830
+    },
+    {
+      "epoch": 0.7444602561496239,
+      "grad_norm": 0.08945673704147339,
+      "learning_rate": 0.00012570730714431104,
+      "loss": 0.9714,
+      "step": 1831
+    },
+    {
+      "epoch": 0.7448668428542387,
+      "grad_norm": 0.0939527079463005,
+      "learning_rate": 0.0001256665988194586,
+      "loss": 0.9868,
+      "step": 1832
+    },
+    {
+      "epoch": 0.7452734295588535,
+      "grad_norm": 0.09327416867017746,
+      "learning_rate": 0.00012562589049460615,
+      "loss": 1.0001,
+      "step": 1833
+    },
+    {
+      "epoch": 0.7456800162634681,
+      "grad_norm": 0.10278622061014175,
+      "learning_rate": 0.00012558518216975373,
+      "loss": 1.0724,
+      "step": 1834
+    },
+    {
+      "epoch": 0.7460866029680829,
+      "grad_norm": 0.09421471506357193,
+      "learning_rate": 0.00012554447384490128,
+      "loss": 1.0088,
+      "step": 1835
+    },
+    {
+      "epoch": 0.7464931896726977,
+      "grad_norm": 0.1009073331952095,
+      "learning_rate": 0.00012550376552004886,
+      "loss": 1.0485,
+      "step": 1836
+    },
+    {
+      "epoch": 0.7468997763773124,
+      "grad_norm": 0.09199651330709457,
+      "learning_rate": 0.00012546305719519642,
+      "loss": 0.9765,
+      "step": 1837
+    },
+    {
+      "epoch": 0.7473063630819272,
+      "grad_norm": 0.09672168642282486,
+      "learning_rate": 0.000125422348870344,
+      "loss": 1.018,
+      "step": 1838
+    },
+    {
+      "epoch": 0.747712949786542,
+      "grad_norm": 0.09036868065595627,
+      "learning_rate": 0.00012538164054549155,
+      "loss": 0.9067,
+      "step": 1839
+    },
+    {
+      "epoch": 0.7481195364911567,
+      "grad_norm": 0.09706352651119232,
+      "learning_rate": 0.0001253409322206391,
+      "loss": 1.0439,
+      "step": 1840
+    },
+    {
+      "epoch": 0.7485261231957715,
+      "grad_norm": 0.09940480440855026,
+      "learning_rate": 0.00012530022389578669,
+      "loss": 1.0936,
+      "step": 1841
+    },
+    {
+      "epoch": 0.7489327099003863,
+      "grad_norm": 0.09489309787750244,
+      "learning_rate": 0.00012525951557093427,
+      "loss": 1.0606,
+      "step": 1842
+    },
+    {
+      "epoch": 0.749339296605001,
+      "grad_norm": 0.07897097617387772,
+      "learning_rate": 0.00012521880724608185,
+      "loss": 0.8109,
+      "step": 1843
+    },
+    {
+      "epoch": 0.7497458833096158,
+      "grad_norm": 0.09423919022083282,
+      "learning_rate": 0.0001251780989212294,
+      "loss": 1.0703,
+      "step": 1844
+    },
+    {
+      "epoch": 0.7501524700142306,
+      "grad_norm": 0.09601794928312302,
+      "learning_rate": 0.00012513739059637696,
+      "loss": 0.9692,
+      "step": 1845
+    },
+    {
+      "epoch": 0.7505590567188453,
+      "grad_norm": 0.09051002562046051,
+      "learning_rate": 0.00012509668227152454,
+      "loss": 0.9727,
+      "step": 1846
+    },
+    {
+      "epoch": 0.7509656434234601,
+      "grad_norm": 0.09665656834840775,
+      "learning_rate": 0.0001250559739466721,
+      "loss": 1.0701,
+      "step": 1847
+    },
+    {
+      "epoch": 0.7513722301280749,
+      "grad_norm": 0.08956587314605713,
+      "learning_rate": 0.00012501526562181967,
+      "loss": 0.9863,
+      "step": 1848
+    },
+    {
+      "epoch": 0.7517788168326895,
+      "grad_norm": 0.09464751929044724,
+      "learning_rate": 0.00012497455729696723,
+      "loss": 1.043,
+      "step": 1849
+    },
+    {
+      "epoch": 0.7521854035373043,
+      "grad_norm": 0.09246315807104111,
+      "learning_rate": 0.0001249338489721148,
+      "loss": 1.0306,
+      "step": 1850
+    },
+    {
+      "epoch": 0.7525919902419191,
+      "grad_norm": 0.0943431407213211,
+      "learning_rate": 0.00012489314064726236,
+      "loss": 0.9251,
+      "step": 1851
+    },
+    {
+      "epoch": 0.7529985769465338,
+      "grad_norm": 0.08852697908878326,
+      "learning_rate": 0.00012485243232240992,
+      "loss": 0.919,
+      "step": 1852
+    },
+    {
+      "epoch": 0.7534051636511486,
+      "grad_norm": 0.08856131881475449,
+      "learning_rate": 0.00012481172399755752,
+      "loss": 0.9874,
+      "step": 1853
+    },
+    {
+      "epoch": 0.7538117503557634,
+      "grad_norm": 0.08715582638978958,
+      "learning_rate": 0.00012477101567270508,
+      "loss": 0.9569,
+      "step": 1854
+    },
+    {
+      "epoch": 0.7542183370603781,
+      "grad_norm": 0.1005750522017479,
+      "learning_rate": 0.00012473030734785266,
+      "loss": 1.118,
+      "step": 1855
+    },
+    {
+      "epoch": 0.7546249237649929,
+      "grad_norm": 0.0848010703921318,
+      "learning_rate": 0.00012468959902300021,
+      "loss": 0.8808,
+      "step": 1856
+    },
+    {
+      "epoch": 0.7550315104696076,
+      "grad_norm": 0.10509838908910751,
+      "learning_rate": 0.00012464889069814777,
+      "loss": 1.0019,
+      "step": 1857
+    },
+    {
+      "epoch": 0.7554380971742224,
+      "grad_norm": 0.09729699045419693,
+      "learning_rate": 0.00012460818237329535,
+      "loss": 0.9275,
+      "step": 1858
+    },
+    {
+      "epoch": 0.7558446838788372,
+      "grad_norm": 0.0901610478758812,
+      "learning_rate": 0.0001245674740484429,
+      "loss": 1.0285,
+      "step": 1859
+    },
+    {
+      "epoch": 0.7562512705834519,
+      "grad_norm": 0.08691520988941193,
+      "learning_rate": 0.00012452676572359048,
+      "loss": 0.9524,
+      "step": 1860
+    },
+    {
+      "epoch": 0.7566578572880667,
+      "grad_norm": 0.09559500962495804,
+      "learning_rate": 0.00012448605739873804,
+      "loss": 1.0781,
+      "step": 1861
+    },
+    {
+      "epoch": 0.7570644439926815,
+      "grad_norm": 0.09581112861633301,
+      "learning_rate": 0.00012444534907388562,
+      "loss": 1.068,
+      "step": 1862
+    },
+    {
+      "epoch": 0.7574710306972962,
+      "grad_norm": 0.10235914587974548,
+      "learning_rate": 0.00012440464074903317,
+      "loss": 1.078,
+      "step": 1863
+    },
+    {
+      "epoch": 0.757877617401911,
+      "grad_norm": 0.09794023633003235,
+      "learning_rate": 0.00012436393242418073,
+      "loss": 1.0951,
+      "step": 1864
+    },
+    {
+      "epoch": 0.7582842041065257,
+      "grad_norm": 0.08910951763391495,
+      "learning_rate": 0.00012432322409932834,
+      "loss": 1.002,
+      "step": 1865
+    },
+    {
+      "epoch": 0.7586907908111404,
+      "grad_norm": 0.08909524232149124,
+      "learning_rate": 0.0001242825157744759,
+      "loss": 0.9027,
+      "step": 1866
+    },
+    {
+      "epoch": 0.7590973775157552,
+      "grad_norm": 0.09639742970466614,
+      "learning_rate": 0.00012424180744962347,
+      "loss": 1.1356,
+      "step": 1867
+    },
+    {
+      "epoch": 0.75950396422037,
+      "grad_norm": 0.08606995642185211,
+      "learning_rate": 0.00012420109912477103,
+      "loss": 0.8974,
+      "step": 1868
+    },
+    {
+      "epoch": 0.7599105509249847,
+      "grad_norm": 0.09715355932712555,
+      "learning_rate": 0.00012416039079991858,
+      "loss": 1.078,
+      "step": 1869
+    },
+    {
+      "epoch": 0.7603171376295995,
+      "grad_norm": 0.08933407068252563,
+      "learning_rate": 0.00012411968247506616,
+      "loss": 0.9177,
+      "step": 1870
+    },
+    {
+      "epoch": 0.7607237243342143,
+      "grad_norm": 0.0859113335609436,
+      "learning_rate": 0.00012407897415021372,
+      "loss": 0.9703,
+      "step": 1871
+    },
+    {
+      "epoch": 0.761130311038829,
+      "grad_norm": 0.09086931496858597,
+      "learning_rate": 0.0001240382658253613,
+      "loss": 1.0298,
+      "step": 1872
+    },
+    {
+      "epoch": 0.7615368977434438,
+      "grad_norm": 0.09112663567066193,
+      "learning_rate": 0.00012399755750050885,
+      "loss": 0.9918,
+      "step": 1873
+    },
+    {
+      "epoch": 0.7619434844480586,
+      "grad_norm": 0.09044841676950455,
+      "learning_rate": 0.00012395684917565643,
+      "loss": 0.9469,
+      "step": 1874
+    },
+    {
+      "epoch": 0.7623500711526733,
+      "grad_norm": 0.08345028758049011,
+      "learning_rate": 0.00012391614085080399,
+      "loss": 0.879,
+      "step": 1875
+    },
+    {
+      "epoch": 0.7627566578572881,
+      "grad_norm": 0.10249708592891693,
+      "learning_rate": 0.00012387543252595157,
+      "loss": 1.0247,
+      "step": 1876
+    },
+    {
+      "epoch": 0.7631632445619029,
+      "grad_norm": 0.0914909839630127,
+      "learning_rate": 0.00012383472420109915,
+      "loss": 0.9341,
+      "step": 1877
+    },
+    {
+      "epoch": 0.7635698312665176,
+      "grad_norm": 0.08616846054792404,
+      "learning_rate": 0.0001237940158762467,
+      "loss": 0.918,
+      "step": 1878
+    },
+    {
+      "epoch": 0.7639764179711324,
+      "grad_norm": 0.0853181779384613,
+      "learning_rate": 0.00012375330755139428,
+      "loss": 0.8903,
+      "step": 1879
+    },
+    {
+      "epoch": 0.7643830046757472,
+      "grad_norm": 0.0943385511636734,
+      "learning_rate": 0.00012371259922654184,
+      "loss": 1.0437,
+      "step": 1880
+    },
+    {
+      "epoch": 0.7647895913803618,
+      "grad_norm": 0.08487629890441895,
+      "learning_rate": 0.0001236718909016894,
+      "loss": 0.9655,
+      "step": 1881
+    },
+    {
+      "epoch": 0.7651961780849766,
+      "grad_norm": 0.09635015577077866,
+      "learning_rate": 0.00012363118257683697,
+      "loss": 1.0047,
+      "step": 1882
+    },
+    {
+      "epoch": 0.7656027647895913,
+      "grad_norm": 0.09787151217460632,
+      "learning_rate": 0.00012359047425198453,
+      "loss": 1.1058,
+      "step": 1883
+    },
+    {
+      "epoch": 0.7660093514942061,
+      "grad_norm": 0.10217342525720596,
+      "learning_rate": 0.0001235497659271321,
+      "loss": 1.1407,
+      "step": 1884
+    },
+    {
+      "epoch": 0.7664159381988209,
+      "grad_norm": 0.08770392835140228,
+      "learning_rate": 0.00012350905760227966,
+      "loss": 0.8851,
+      "step": 1885
+    },
+    {
+      "epoch": 0.7668225249034356,
+      "grad_norm": 0.08978156745433807,
+      "learning_rate": 0.00012346834927742724,
+      "loss": 1.0138,
+      "step": 1886
+    },
+    {
+      "epoch": 0.7672291116080504,
+      "grad_norm": 0.09110313653945923,
+      "learning_rate": 0.0001234276409525748,
+      "loss": 0.8872,
+      "step": 1887
+    },
+    {
+      "epoch": 0.7676356983126652,
+      "grad_norm": 0.0905870720744133,
+      "learning_rate": 0.00012338693262772238,
+      "loss": 0.9819,
+      "step": 1888
+    },
+    {
+      "epoch": 0.7680422850172799,
+      "grad_norm": 0.09418340027332306,
+      "learning_rate": 0.00012334622430286996,
+      "loss": 1.0486,
+      "step": 1889
+    },
+    {
+      "epoch": 0.7684488717218947,
+      "grad_norm": 0.09140585362911224,
+      "learning_rate": 0.00012330551597801751,
+      "loss": 0.9463,
+      "step": 1890
+    },
+    {
+      "epoch": 0.7688554584265095,
+      "grad_norm": 0.08720141649246216,
+      "learning_rate": 0.0001232648076531651,
+      "loss": 0.9833,
+      "step": 1891
+    },
+    {
+      "epoch": 0.7692620451311242,
+      "grad_norm": 0.09206419438123703,
+      "learning_rate": 0.00012322409932831265,
+      "loss": 0.9554,
+      "step": 1892
+    },
+    {
+      "epoch": 0.769668631835739,
+      "grad_norm": 0.09324870258569717,
+      "learning_rate": 0.0001231833910034602,
+      "loss": 1.0703,
+      "step": 1893
+    },
+    {
+      "epoch": 0.7700752185403538,
+      "grad_norm": 0.0868481770157814,
+      "learning_rate": 0.00012314268267860778,
+      "loss": 0.9374,
+      "step": 1894
+    },
+    {
+      "epoch": 0.7704818052449685,
+      "grad_norm": 0.0907289981842041,
+      "learning_rate": 0.00012310197435375534,
+      "loss": 1.0148,
+      "step": 1895
+    },
+    {
+      "epoch": 0.7708883919495833,
+      "grad_norm": 0.09804967790842056,
+      "learning_rate": 0.00012306126602890292,
+      "loss": 1.0541,
+      "step": 1896
+    },
+    {
+      "epoch": 0.771294978654198,
+      "grad_norm": 0.09168083965778351,
+      "learning_rate": 0.00012302055770405047,
+      "loss": 0.9363,
+      "step": 1897
+    },
+    {
+      "epoch": 0.7717015653588127,
+      "grad_norm": 0.09078045189380646,
+      "learning_rate": 0.00012297984937919805,
+      "loss": 1.0683,
+      "step": 1898
+    },
+    {
+      "epoch": 0.7721081520634275,
+      "grad_norm": 0.08930620551109314,
+      "learning_rate": 0.00012293914105434564,
+      "loss": 0.9659,
+      "step": 1899
+    },
+    {
+      "epoch": 0.7725147387680423,
+      "grad_norm": 0.09990911930799484,
+      "learning_rate": 0.0001228984327294932,
+      "loss": 1.1301,
+      "step": 1900
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.085340035177267e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null