Upload checkpoint 900

Browse files

Files changed (7) hide show

.gitattributes +1 -0
README.md +7 -4
adapter_model.safetensors +1 -1
loss.png +3 -0
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +703 -3

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+loss.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
-# Gradience T1 3B (Step 800 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -38,10 +38,13 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 8.13%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
     <!--  3.75% -->
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 800 out of 9838 steps</p>
 </body>
-</html>

 base_model: Qwen/Qwen2.5-3B-Instruct
 library_name: peft
 ---
+# Gradience T1 3B (Step 900 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 9.15%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
     <!--  3.75% -->
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 900 out of 9838 steps</p>
 </body>
+</html>
+## Training Loss
+![Training Loss Chart](loss.png)

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d662121c4c4e38fa7417857b15faabf4697bf6f421766c1af2289391ccb3d30
 size 119801528

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a5dbdeb8341e7e3d9596846ed4c7684398e8c99e694357ed0f13b00d742ac19
 size 119801528

loss.png ADDED Viewed

Git LFS Details

SHA256: 206213ec707e1ef90f75177f6ed9e296663adbaf3865270e5ce96a7769eebab4
Pointer size: 131 Bytes
Size of remote file: 175 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c230d8b5c1e095fc4008b79adc74d44f024c9250c191a0c398ce799413ca397
 size 61392692

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cd3e3db9bee3174e708cac4d94d11870d49ad54e90400463a5983f1eea959c4
 size 61392692

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4afe690e5607953bb2e0fc05a12430468a9b25343713c9f58a2a219031dce337
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:20ac70d593c855ffef93a9e2aba01508c369253c427365a830e36f7f000a094f
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.16263468184590363,
   "eval_steps": 500,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5608,6 +5608,706 @@
       "learning_rate": 0.00018385030001016986,
       "loss": 1.1788,
       "step": 800
     }
   ],
   "logging_steps": 1,
@@ -5627,7 +6327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.4585745800862106e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1829640170766416,
   "eval_steps": 500,
+  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00018385030001016986,
       "loss": 1.1788,
       "step": 800
+    },
+    {
+      "epoch": 0.16283797519821103,
+      "grad_norm": 0.11467185616493225,
+      "learning_rate": 0.00018382996033763859,
+      "loss": 0.9527,
+      "step": 801
+    },
+    {
+      "epoch": 0.1630412685505184,
+      "grad_norm": 0.129184752702713,
+      "learning_rate": 0.00018380962066510728,
+      "loss": 1.1758,
+      "step": 802
+    },
+    {
+      "epoch": 0.16324456190282577,
+      "grad_norm": 0.11696959286928177,
+      "learning_rate": 0.000183789280992576,
+      "loss": 1.03,
+      "step": 803
+    },
+    {
+      "epoch": 0.16344785525513317,
+      "grad_norm": 0.13689257204532623,
+      "learning_rate": 0.00018376894132004476,
+      "loss": 1.2516,
+      "step": 804
+    },
+    {
+      "epoch": 0.16365114860744054,
+      "grad_norm": 0.11370982229709625,
+      "learning_rate": 0.00018374860164751348,
+      "loss": 1.0484,
+      "step": 805
+    },
+    {
+      "epoch": 0.1638544419597479,
+      "grad_norm": 0.13201859593391418,
+      "learning_rate": 0.0001837282619749822,
+      "loss": 1.0903,
+      "step": 806
+    },
+    {
+      "epoch": 0.1640577353120553,
+      "grad_norm": 0.10468725860118866,
+      "learning_rate": 0.00018370792230245093,
+      "loss": 0.9548,
+      "step": 807
+    },
+    {
+      "epoch": 0.16426102866436268,
+      "grad_norm": 0.14737223088741302,
+      "learning_rate": 0.00018368758262991968,
+      "loss": 1.1607,
+      "step": 808
+    },
+    {
+      "epoch": 0.16446432201667005,
+      "grad_norm": 0.11500222235918045,
+      "learning_rate": 0.0001836672429573884,
+      "loss": 1.1032,
+      "step": 809
+    },
+    {
+      "epoch": 0.16466761536897742,
+      "grad_norm": 0.12849587202072144,
+      "learning_rate": 0.00018364690328485713,
+      "loss": 1.255,
+      "step": 810
+    },
+    {
+      "epoch": 0.16487090872128482,
+      "grad_norm": 0.10878688842058182,
+      "learning_rate": 0.00018362656361232583,
+      "loss": 1.1075,
+      "step": 811
+    },
+    {
+      "epoch": 0.1650742020735922,
+      "grad_norm": 0.10878860950469971,
+      "learning_rate": 0.00018360622393979458,
+      "loss": 1.0629,
+      "step": 812
+    },
+    {
+      "epoch": 0.16527749542589956,
+      "grad_norm": 0.1280430108308792,
+      "learning_rate": 0.0001835858842672633,
+      "loss": 1.1377,
+      "step": 813
+    },
+    {
+      "epoch": 0.16548078877820696,
+      "grad_norm": 0.11831233650445938,
+      "learning_rate": 0.00018356554459473203,
+      "loss": 1.0786,
+      "step": 814
+    },
+    {
+      "epoch": 0.16568408213051433,
+      "grad_norm": 0.11453156918287277,
+      "learning_rate": 0.00018354520492220075,
+      "loss": 1.0477,
+      "step": 815
+    },
+    {
+      "epoch": 0.1658873754828217,
+      "grad_norm": 0.13597573339939117,
+      "learning_rate": 0.0001835248652496695,
+      "loss": 1.1807,
+      "step": 816
+    },
+    {
+      "epoch": 0.1660906688351291,
+      "grad_norm": 0.12008185684680939,
+      "learning_rate": 0.00018350452557713823,
+      "loss": 1.0676,
+      "step": 817
+    },
+    {
+      "epoch": 0.16629396218743647,
+      "grad_norm": 0.1363888829946518,
+      "learning_rate": 0.00018348418590460696,
+      "loss": 1.1582,
+      "step": 818
+    },
+    {
+      "epoch": 0.16649725553974384,
+      "grad_norm": 0.11310733109712601,
+      "learning_rate": 0.00018346384623207565,
+      "loss": 1.0931,
+      "step": 819
+    },
+    {
+      "epoch": 0.16670054889205124,
+      "grad_norm": 0.13503344357013702,
+      "learning_rate": 0.0001834435065595444,
+      "loss": 1.1465,
+      "step": 820
+    },
+    {
+      "epoch": 0.1669038422443586,
+      "grad_norm": 0.12744784355163574,
+      "learning_rate": 0.00018342316688701313,
+      "loss": 1.1662,
+      "step": 821
+    },
+    {
+      "epoch": 0.16710713559666598,
+      "grad_norm": 0.13695518672466278,
+      "learning_rate": 0.00018340282721448185,
+      "loss": 1.1846,
+      "step": 822
+    },
+    {
+      "epoch": 0.16731042894897336,
+      "grad_norm": 0.12580302357673645,
+      "learning_rate": 0.00018338248754195058,
+      "loss": 0.93,
+      "step": 823
+    },
+    {
+      "epoch": 0.16751372230128075,
+      "grad_norm": 0.12266777455806732,
+      "learning_rate": 0.00018336214786941933,
+      "loss": 1.1033,
+      "step": 824
+    },
+    {
+      "epoch": 0.16771701565358813,
+      "grad_norm": 0.1129806861281395,
+      "learning_rate": 0.00018334180819688805,
+      "loss": 1.0517,
+      "step": 825
+    },
+    {
+      "epoch": 0.1679203090058955,
+      "grad_norm": 0.12590476870536804,
+      "learning_rate": 0.00018332146852435678,
+      "loss": 1.0374,
+      "step": 826
+    },
+    {
+      "epoch": 0.1681236023582029,
+      "grad_norm": 0.12631377577781677,
+      "learning_rate": 0.00018330112885182548,
+      "loss": 1.1898,
+      "step": 827
+    },
+    {
+      "epoch": 0.16832689571051027,
+      "grad_norm": 0.13719779253005981,
+      "learning_rate": 0.00018328078917929423,
+      "loss": 1.1108,
+      "step": 828
+    },
+    {
+      "epoch": 0.16853018906281764,
+      "grad_norm": 0.12414206564426422,
+      "learning_rate": 0.00018326044950676295,
+      "loss": 1.1654,
+      "step": 829
+    },
+    {
+      "epoch": 0.16873348241512504,
+      "grad_norm": 0.12075278162956238,
+      "learning_rate": 0.00018324010983423168,
+      "loss": 1.0255,
+      "step": 830
+    },
+    {
+      "epoch": 0.1689367757674324,
+      "grad_norm": 0.11906860023736954,
+      "learning_rate": 0.0001832197701617004,
+      "loss": 1.0433,
+      "step": 831
+    },
+    {
+      "epoch": 0.16914006911973978,
+      "grad_norm": 0.11960665136575699,
+      "learning_rate": 0.00018319943048916915,
+      "loss": 0.9501,
+      "step": 832
+    },
+    {
+      "epoch": 0.16934336247204718,
+      "grad_norm": 0.1228812113404274,
+      "learning_rate": 0.00018317909081663788,
+      "loss": 1.002,
+      "step": 833
+    },
+    {
+      "epoch": 0.16954665582435455,
+      "grad_norm": 0.12420972436666489,
+      "learning_rate": 0.0001831587511441066,
+      "loss": 1.062,
+      "step": 834
+    },
+    {
+      "epoch": 0.16974994917666192,
+      "grad_norm": 0.11490360647439957,
+      "learning_rate": 0.0001831384114715753,
+      "loss": 0.9708,
+      "step": 835
+    },
+    {
+      "epoch": 0.1699532425289693,
+      "grad_norm": 0.11945214867591858,
+      "learning_rate": 0.00018311807179904402,
+      "loss": 1.1042,
+      "step": 836
+    },
+    {
+      "epoch": 0.1701565358812767,
+      "grad_norm": 0.1234474778175354,
+      "learning_rate": 0.00018309773212651277,
+      "loss": 1.0258,
+      "step": 837
+    },
+    {
+      "epoch": 0.17035982923358406,
+      "grad_norm": 0.12447863817214966,
+      "learning_rate": 0.0001830773924539815,
+      "loss": 1.1132,
+      "step": 838
+    },
+    {
+      "epoch": 0.17056312258589143,
+      "grad_norm": 0.1321963667869568,
+      "learning_rate": 0.00018305705278145022,
+      "loss": 1.1835,
+      "step": 839
+    },
+    {
+      "epoch": 0.17076641593819883,
+      "grad_norm": 0.12708254158496857,
+      "learning_rate": 0.00018303671310891895,
+      "loss": 1.1787,
+      "step": 840
+    },
+    {
+      "epoch": 0.1709697092905062,
+      "grad_norm": 0.11481820046901703,
+      "learning_rate": 0.0001830163734363877,
+      "loss": 0.8837,
+      "step": 841
+    },
+    {
+      "epoch": 0.17117300264281357,
+      "grad_norm": 0.11851567029953003,
+      "learning_rate": 0.00018299603376385642,
+      "loss": 0.9516,
+      "step": 842
+    },
+    {
+      "epoch": 0.17137629599512097,
+      "grad_norm": 0.13182471692562103,
+      "learning_rate": 0.00018297569409132512,
+      "loss": 1.1809,
+      "step": 843
+    },
+    {
+      "epoch": 0.17157958934742834,
+      "grad_norm": 0.12840509414672852,
+      "learning_rate": 0.00018295535441879385,
+      "loss": 1.0557,
+      "step": 844
+    },
+    {
+      "epoch": 0.1717828826997357,
+      "grad_norm": 0.11280561983585358,
+      "learning_rate": 0.0001829350147462626,
+      "loss": 1.0737,
+      "step": 845
+    },
+    {
+      "epoch": 0.1719861760520431,
+      "grad_norm": 0.13144554197788239,
+      "learning_rate": 0.00018291467507373132,
+      "loss": 1.0275,
+      "step": 846
+    },
+    {
+      "epoch": 0.17218946940435048,
+      "grad_norm": 0.1224883422255516,
+      "learning_rate": 0.00018289433540120005,
+      "loss": 1.1558,
+      "step": 847
+    },
+    {
+      "epoch": 0.17239276275665785,
+      "grad_norm": 0.1263243854045868,
+      "learning_rate": 0.00018287399572866877,
+      "loss": 0.9381,
+      "step": 848
+    },
+    {
+      "epoch": 0.17259605610896522,
+      "grad_norm": 0.13391436636447906,
+      "learning_rate": 0.00018285365605613752,
+      "loss": 1.2548,
+      "step": 849
+    },
+    {
+      "epoch": 0.17279934946127262,
+      "grad_norm": 0.12166419625282288,
+      "learning_rate": 0.00018283331638360625,
+      "loss": 1.0981,
+      "step": 850
+    },
+    {
+      "epoch": 0.17300264281358,
+      "grad_norm": 0.13190463185310364,
+      "learning_rate": 0.00018281297671107494,
+      "loss": 1.1847,
+      "step": 851
+    },
+    {
+      "epoch": 0.17320593616588736,
+      "grad_norm": 0.11678186804056168,
+      "learning_rate": 0.00018279263703854367,
+      "loss": 1.0303,
+      "step": 852
+    },
+    {
+      "epoch": 0.17340922951819476,
+      "grad_norm": 0.11716858297586441,
+      "learning_rate": 0.00018277229736601242,
+      "loss": 0.9274,
+      "step": 853
+    },
+    {
+      "epoch": 0.17361252287050213,
+      "grad_norm": 0.1340217888355255,
+      "learning_rate": 0.00018275195769348114,
+      "loss": 1.0179,
+      "step": 854
+    },
+    {
+      "epoch": 0.1738158162228095,
+      "grad_norm": 0.12650153040885925,
+      "learning_rate": 0.00018273161802094987,
+      "loss": 1.0234,
+      "step": 855
+    },
+    {
+      "epoch": 0.1740191095751169,
+      "grad_norm": 0.1294967234134674,
+      "learning_rate": 0.0001827112783484186,
+      "loss": 1.2539,
+      "step": 856
+    },
+    {
+      "epoch": 0.17422240292742427,
+      "grad_norm": 0.13714881241321564,
+      "learning_rate": 0.00018269093867588734,
+      "loss": 1.0106,
+      "step": 857
+    },
+    {
+      "epoch": 0.17442569627973165,
+      "grad_norm": 0.12365014851093292,
+      "learning_rate": 0.00018267059900335607,
+      "loss": 1.1184,
+      "step": 858
+    },
+    {
+      "epoch": 0.17462898963203904,
+      "grad_norm": 0.11030489951372147,
+      "learning_rate": 0.00018265025933082477,
+      "loss": 0.9478,
+      "step": 859
+    },
+    {
+      "epoch": 0.17483228298434642,
+      "grad_norm": 0.1181483343243599,
+      "learning_rate": 0.0001826299196582935,
+      "loss": 1.0861,
+      "step": 860
+    },
+    {
+      "epoch": 0.1750355763366538,
+      "grad_norm": 0.12873612344264984,
+      "learning_rate": 0.00018260957998576224,
+      "loss": 0.9811,
+      "step": 861
+    },
+    {
+      "epoch": 0.17523886968896116,
+      "grad_norm": 0.11688394844532013,
+      "learning_rate": 0.00018258924031323097,
+      "loss": 1.1643,
+      "step": 862
+    },
+    {
+      "epoch": 0.17544216304126856,
+      "grad_norm": 0.12729796767234802,
+      "learning_rate": 0.0001825689006406997,
+      "loss": 1.0692,
+      "step": 863
+    },
+    {
+      "epoch": 0.17564545639357593,
+      "grad_norm": 0.12474660575389862,
+      "learning_rate": 0.00018254856096816842,
+      "loss": 1.2838,
+      "step": 864
+    },
+    {
+      "epoch": 0.1758487497458833,
+      "grad_norm": 0.12324024736881256,
+      "learning_rate": 0.00018252822129563717,
+      "loss": 1.0029,
+      "step": 865
+    },
+    {
+      "epoch": 0.1760520430981907,
+      "grad_norm": 0.13511407375335693,
+      "learning_rate": 0.0001825078816231059,
+      "loss": 1.1398,
+      "step": 866
+    },
+    {
+      "epoch": 0.17625533645049807,
+      "grad_norm": 0.13292032480239868,
+      "learning_rate": 0.0001824875419505746,
+      "loss": 1.3107,
+      "step": 867
+    },
+    {
+      "epoch": 0.17645862980280544,
+      "grad_norm": 0.12073294073343277,
+      "learning_rate": 0.00018246720227804331,
+      "loss": 1.1293,
+      "step": 868
+    },
+    {
+      "epoch": 0.17666192315511284,
+      "grad_norm": 0.11789250373840332,
+      "learning_rate": 0.00018244686260551207,
+      "loss": 1.0462,
+      "step": 869
+    },
+    {
+      "epoch": 0.1768652165074202,
+      "grad_norm": 0.1194562166929245,
+      "learning_rate": 0.0001824265229329808,
+      "loss": 1.0017,
+      "step": 870
+    },
+    {
+      "epoch": 0.17706850985972758,
+      "grad_norm": 0.10480080544948578,
+      "learning_rate": 0.00018240618326044951,
+      "loss": 0.8659,
+      "step": 871
+    },
+    {
+      "epoch": 0.17727180321203498,
+      "grad_norm": 0.1207701787352562,
+      "learning_rate": 0.00018238584358791824,
+      "loss": 0.9937,
+      "step": 872
+    },
+    {
+      "epoch": 0.17747509656434235,
+      "grad_norm": 0.1190091222524643,
+      "learning_rate": 0.000182365503915387,
+      "loss": 1.0437,
+      "step": 873
+    },
+    {
+      "epoch": 0.17767838991664972,
+      "grad_norm": 0.1277458369731903,
+      "learning_rate": 0.00018234516424285572,
+      "loss": 1.2392,
+      "step": 874
+    },
+    {
+      "epoch": 0.1778816832689571,
+      "grad_norm": 0.12237963080406189,
+      "learning_rate": 0.00018232482457032444,
+      "loss": 1.1032,
+      "step": 875
+    },
+    {
+      "epoch": 0.1780849766212645,
+      "grad_norm": 0.1319531798362732,
+      "learning_rate": 0.00018230448489779314,
+      "loss": 1.2012,
+      "step": 876
+    },
+    {
+      "epoch": 0.17828826997357186,
+      "grad_norm": 0.11914216727018356,
+      "learning_rate": 0.0001822841452252619,
+      "loss": 1.0272,
+      "step": 877
+    },
+    {
+      "epoch": 0.17849156332587923,
+      "grad_norm": 0.14588242769241333,
+      "learning_rate": 0.0001822638055527306,
+      "loss": 1.357,
+      "step": 878
+    },
+    {
+      "epoch": 0.17869485667818663,
+      "grad_norm": 0.11982700973749161,
+      "learning_rate": 0.00018224346588019934,
+      "loss": 1.049,
+      "step": 879
+    },
+    {
+      "epoch": 0.178898150030494,
+      "grad_norm": 0.12529560923576355,
+      "learning_rate": 0.00018222312620766806,
+      "loss": 1.0713,
+      "step": 880
+    },
+    {
+      "epoch": 0.17910144338280137,
+      "grad_norm": 0.1316487044095993,
+      "learning_rate": 0.00018220278653513679,
+      "loss": 1.1749,
+      "step": 881
+    },
+    {
+      "epoch": 0.17930473673510877,
+      "grad_norm": 0.12096232175827026,
+      "learning_rate": 0.00018218244686260554,
+      "loss": 1.2104,
+      "step": 882
+    },
+    {
+      "epoch": 0.17950803008741614,
+      "grad_norm": 0.1313014030456543,
+      "learning_rate": 0.00018216210719007426,
+      "loss": 1.0554,
+      "step": 883
+    },
+    {
+      "epoch": 0.1797113234397235,
+      "grad_norm": 0.1309378743171692,
+      "learning_rate": 0.00018214176751754296,
+      "loss": 1.2152,
+      "step": 884
+    },
+    {
+      "epoch": 0.1799146167920309,
+      "grad_norm": 0.1286410242319107,
+      "learning_rate": 0.00018212142784501168,
+      "loss": 1.0922,
+      "step": 885
+    },
+    {
+      "epoch": 0.18011791014433828,
+      "grad_norm": 0.12893226742744446,
+      "learning_rate": 0.00018210108817248044,
+      "loss": 1.1969,
+      "step": 886
+    },
+    {
+      "epoch": 0.18032120349664565,
+      "grad_norm": 0.11664584279060364,
+      "learning_rate": 0.00018208074849994916,
+      "loss": 1.0085,
+      "step": 887
+    },
+    {
+      "epoch": 0.18052449684895303,
+      "grad_norm": 0.10973158478736877,
+      "learning_rate": 0.00018206040882741788,
+      "loss": 0.9548,
+      "step": 888
+    },
+    {
+      "epoch": 0.18072779020126042,
+      "grad_norm": 0.11281079053878784,
+      "learning_rate": 0.0001820400691548866,
+      "loss": 0.8521,
+      "step": 889
+    },
+    {
+      "epoch": 0.1809310835535678,
+      "grad_norm": 0.12198197096586227,
+      "learning_rate": 0.00018201972948235536,
+      "loss": 1.0537,
+      "step": 890
+    },
+    {
+      "epoch": 0.18113437690587517,
+      "grad_norm": 0.09405733644962311,
+      "learning_rate": 0.00018199938980982409,
+      "loss": 0.7193,
+      "step": 891
+    },
+    {
+      "epoch": 0.18133767025818257,
+      "grad_norm": 0.13503974676132202,
+      "learning_rate": 0.00018197905013729278,
+      "loss": 1.1564,
+      "step": 892
+    },
+    {
+      "epoch": 0.18154096361048994,
+      "grad_norm": 0.1322106271982193,
+      "learning_rate": 0.0001819587104647615,
+      "loss": 1.0733,
+      "step": 893
+    },
+    {
+      "epoch": 0.1817442569627973,
+      "grad_norm": 0.12791374325752258,
+      "learning_rate": 0.00018193837079223026,
+      "loss": 1.0701,
+      "step": 894
+    },
+    {
+      "epoch": 0.1819475503151047,
+      "grad_norm": 0.12342046946287155,
+      "learning_rate": 0.00018191803111969898,
+      "loss": 1.1255,
+      "step": 895
+    },
+    {
+      "epoch": 0.18215084366741208,
+      "grad_norm": 0.12089495360851288,
+      "learning_rate": 0.0001818976914471677,
+      "loss": 1.0177,
+      "step": 896
+    },
+    {
+      "epoch": 0.18235413701971945,
+      "grad_norm": 0.12383720278739929,
+      "learning_rate": 0.00018187735177463643,
+      "loss": 1.0188,
+      "step": 897
+    },
+    {
+      "epoch": 0.18255743037202685,
+      "grad_norm": 0.12089379876852036,
+      "learning_rate": 0.00018185701210210518,
+      "loss": 1.1106,
+      "step": 898
+    },
+    {
+      "epoch": 0.18276072372433422,
+      "grad_norm": 0.12939763069152832,
+      "learning_rate": 0.0001818366724295739,
+      "loss": 1.1939,
+      "step": 899
+    },
+    {
+      "epoch": 0.1829640170766416,
+      "grad_norm": 0.14534543454647064,
+      "learning_rate": 0.0001818163327570426,
+      "loss": 1.252,
+      "step": 900
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.0020743561314304e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null