Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +722 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ebd8eb1963155d7959e5fd01bf4e6f5a5418391cb14eaca7c12c2ea2cdb7397
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c7f760852553c885da7ac4651c37d05351dad144849dba33d67013f7dd0453e
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96f7d36d5b8fa2a51ae896f43b6b1d4ad96ff4475345c06580c2e76ad1261f71
 size 253144

 version https://git-lfs.github.com/spec/v1
+oid sha256:79e6fe293e85b0f229b1a21ac73548863c9dfaf7ad7c010a558e69a5d604454b
 size 253144

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e75de118c84af1c7af0b228e50e086362dc00b6326644774b92f164bb90d49e8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a01750f036d9438514de7d76a47893cf8fe3739c2ce1fd668646188c049c326
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.323793411254883,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.321285140562249,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,722 @@
       "eval_samples_per_second": 451.852,
       "eval_steps_per_second": 112.963,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -754,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 11238729056256.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.32220458984375,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.642570281124498,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 451.852,
       "eval_steps_per_second": 112.963,
       "step": 100
+    },
+    {
+      "epoch": 0.3244979919678715,
+      "grad_norm": 0.13746659457683563,
+      "learning_rate": 5.330452921628497e-05,
+      "loss": 10.3277,
+      "step": 101
+    },
+    {
+      "epoch": 0.327710843373494,
+      "grad_norm": 0.14080485701560974,
+      "learning_rate": 5.247918773366112e-05,
+      "loss": 10.3229,
+      "step": 102
+    },
+    {
+      "epoch": 0.3309236947791165,
+      "grad_norm": 0.08412718772888184,
+      "learning_rate": 5.165316846586541e-05,
+      "loss": 10.3341,
+      "step": 103
+    },
+    {
+      "epoch": 0.334136546184739,
+      "grad_norm": 0.09817983210086823,
+      "learning_rate": 5.0826697238317935e-05,
+      "loss": 10.3305,
+      "step": 104
+    },
+    {
+      "epoch": 0.3373493975903614,
+      "grad_norm": 0.11353299766778946,
+      "learning_rate": 5e-05,
+      "loss": 10.3283,
+      "step": 105
+    },
+    {
+      "epoch": 0.3405622489959839,
+      "grad_norm": 0.1714153289794922,
+      "learning_rate": 4.917330276168208e-05,
+      "loss": 10.3298,
+      "step": 106
+    },
+    {
+      "epoch": 0.3437751004016064,
+      "grad_norm": 0.14416934549808502,
+      "learning_rate": 4.834683153413459e-05,
+      "loss": 10.3311,
+      "step": 107
+    },
+    {
+      "epoch": 0.3469879518072289,
+      "grad_norm": 0.12153627723455429,
+      "learning_rate": 4.7520812266338885e-05,
+      "loss": 10.3296,
+      "step": 108
+    },
+    {
+      "epoch": 0.3502008032128514,
+      "grad_norm": 0.09511595964431763,
+      "learning_rate": 4.669547078371504e-05,
+      "loss": 10.3231,
+      "step": 109
+    },
+    {
+      "epoch": 0.3534136546184739,
+      "grad_norm": 0.0920514166355133,
+      "learning_rate": 4.5871032726383386e-05,
+      "loss": 10.3294,
+      "step": 110
+    },
+    {
+      "epoch": 0.3566265060240964,
+      "grad_norm": 0.10794074833393097,
+      "learning_rate": 4.504772348747687e-05,
+      "loss": 10.3341,
+      "step": 111
+    },
+    {
+      "epoch": 0.3598393574297189,
+      "grad_norm": 0.10634323954582214,
+      "learning_rate": 4.4225768151520694e-05,
+      "loss": 10.3354,
+      "step": 112
+    },
+    {
+      "epoch": 0.36305220883534134,
+      "grad_norm": 0.11892643570899963,
+      "learning_rate": 4.3405391432896555e-05,
+      "loss": 10.3274,
+      "step": 113
+    },
+    {
+      "epoch": 0.36626506024096384,
+      "grad_norm": 0.14139418303966522,
+      "learning_rate": 4.2586817614407895e-05,
+      "loss": 10.3256,
+      "step": 114
+    },
+    {
+      "epoch": 0.36947791164658633,
+      "grad_norm": 0.13411003351211548,
+      "learning_rate": 4.17702704859633e-05,
+      "loss": 10.3273,
+      "step": 115
+    },
+    {
+      "epoch": 0.37269076305220883,
+      "grad_norm": 0.0930616557598114,
+      "learning_rate": 4.095597328339452e-05,
+      "loss": 10.3317,
+      "step": 116
+    },
+    {
+      "epoch": 0.3759036144578313,
+      "grad_norm": 0.08703553676605225,
+      "learning_rate": 4.0144148627425993e-05,
+      "loss": 10.3369,
+      "step": 117
+    },
+    {
+      "epoch": 0.3791164658634538,
+      "grad_norm": 0.08455926179885864,
+      "learning_rate": 3.933501846281267e-05,
+      "loss": 10.3318,
+      "step": 118
+    },
+    {
+      "epoch": 0.3823293172690763,
+      "grad_norm": 0.1004587933421135,
+      "learning_rate": 3.852880399766243e-05,
+      "loss": 10.3291,
+      "step": 119
+    },
+    {
+      "epoch": 0.3855421686746988,
+      "grad_norm": 0.09143741428852081,
+      "learning_rate": 3.772572564296005e-05,
+      "loss": 10.3216,
+      "step": 120
+    },
+    {
+      "epoch": 0.3887550200803213,
+      "grad_norm": 0.12408670037984848,
+      "learning_rate": 3.6926002952309016e-05,
+      "loss": 10.3266,
+      "step": 121
+    },
+    {
+      "epoch": 0.39196787148594375,
+      "grad_norm": 0.11100861430168152,
+      "learning_rate": 3.612985456190778e-05,
+      "loss": 10.3382,
+      "step": 122
+    },
+    {
+      "epoch": 0.39518072289156625,
+      "grad_norm": 0.09714003652334213,
+      "learning_rate": 3.533749813077677e-05,
+      "loss": 10.3232,
+      "step": 123
+    },
+    {
+      "epoch": 0.39839357429718875,
+      "grad_norm": 0.09558629989624023,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 10.3278,
+      "step": 124
+    },
+    {
+      "epoch": 0.40160642570281124,
+      "grad_norm": 0.11661966890096664,
+      "learning_rate": 3.3765026539765834e-05,
+      "loss": 10.3297,
+      "step": 125
+    },
+    {
+      "epoch": 0.40481927710843374,
+      "grad_norm": 0.1436244696378708,
+      "learning_rate": 3.298534127791785e-05,
+      "loss": 10.3295,
+      "step": 126
+    },
+    {
+      "epoch": 0.40803212851405624,
+      "grad_norm": 0.13147498667240143,
+      "learning_rate": 3.221030765387417e-05,
+      "loss": 10.3174,
+      "step": 127
+    },
+    {
+      "epoch": 0.41124497991967873,
+      "grad_norm": 0.1136285737156868,
+      "learning_rate": 3.144013755408895e-05,
+      "loss": 10.3281,
+      "step": 128
+    },
+    {
+      "epoch": 0.41445783132530123,
+      "grad_norm": 0.1124458834528923,
+      "learning_rate": 3.0675041535377405e-05,
+      "loss": 10.3303,
+      "step": 129
+    },
+    {
+      "epoch": 0.41767068273092367,
+      "grad_norm": 0.16464786231517792,
+      "learning_rate": 2.991522876735154e-05,
+      "loss": 10.3337,
+      "step": 130
+    },
+    {
+      "epoch": 0.42088353413654617,
+      "grad_norm": 0.16290715336799622,
+      "learning_rate": 2.916090697523549e-05,
+      "loss": 10.3255,
+      "step": 131
+    },
+    {
+      "epoch": 0.42409638554216866,
+      "grad_norm": 0.09206510335206985,
+      "learning_rate": 2.8412282383075363e-05,
+      "loss": 10.3199,
+      "step": 132
+    },
+    {
+      "epoch": 0.42730923694779116,
+      "grad_norm": 0.11564340442419052,
+      "learning_rate": 2.766955965735968e-05,
+      "loss": 10.326,
+      "step": 133
+    },
+    {
+      "epoch": 0.43052208835341366,
+      "grad_norm": 0.10356107354164124,
+      "learning_rate": 2.693294185106562e-05,
+      "loss": 10.3294,
+      "step": 134
+    },
+    {
+      "epoch": 0.43373493975903615,
+      "grad_norm": 0.12882065773010254,
+      "learning_rate": 2.6202630348146324e-05,
+      "loss": 10.3299,
+      "step": 135
+    },
+    {
+      "epoch": 0.43694779116465865,
+      "grad_norm": 0.12127930670976639,
+      "learning_rate": 2.547882480847461e-05,
+      "loss": 10.3231,
+      "step": 136
+    },
+    {
+      "epoch": 0.44016064257028115,
+      "grad_norm": 0.10148213058710098,
+      "learning_rate": 2.476172311325783e-05,
+      "loss": 10.3299,
+      "step": 137
+    },
+    {
+      "epoch": 0.4433734939759036,
+      "grad_norm": 0.11116017401218414,
+      "learning_rate": 2.405152131093926e-05,
+      "loss": 10.3209,
+      "step": 138
+    },
+    {
+      "epoch": 0.4465863453815261,
+      "grad_norm": 0.149327352643013,
+      "learning_rate": 2.3348413563600325e-05,
+      "loss": 10.3225,
+      "step": 139
+    },
+    {
+      "epoch": 0.4497991967871486,
+      "grad_norm": 0.13048338890075684,
+      "learning_rate": 2.2652592093878666e-05,
+      "loss": 10.3307,
+      "step": 140
+    },
+    {
+      "epoch": 0.4530120481927711,
+      "grad_norm": 0.09309875965118408,
+      "learning_rate": 2.196424713241637e-05,
+      "loss": 10.3179,
+      "step": 141
+    },
+    {
+      "epoch": 0.4562248995983936,
+      "grad_norm": 0.11135369539260864,
+      "learning_rate": 2.128356686585282e-05,
+      "loss": 10.318,
+      "step": 142
+    },
+    {
+      "epoch": 0.45943775100401607,
+      "grad_norm": 0.14271609485149384,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 10.3149,
+      "step": 143
+    },
+    {
+      "epoch": 0.46265060240963857,
+      "grad_norm": 0.12098371237516403,
+      "learning_rate": 1.9945942635848748e-05,
+      "loss": 10.3345,
+      "step": 144
+    },
+    {
+      "epoch": 0.46586345381526106,
+      "grad_norm": 0.09931446611881256,
+      "learning_rate": 1.928936436551661e-05,
+      "loss": 10.3217,
+      "step": 145
+    },
+    {
+      "epoch": 0.46907630522088356,
+      "grad_norm": 0.10187914222478867,
+      "learning_rate": 1.8641182076323148e-05,
+      "loss": 10.317,
+      "step": 146
+    },
+    {
+      "epoch": 0.472289156626506,
+      "grad_norm": 0.1341490000486374,
+      "learning_rate": 1.800157297483417e-05,
+      "loss": 10.3159,
+      "step": 147
+    },
+    {
+      "epoch": 0.4755020080321285,
+      "grad_norm": 0.13303139805793762,
+      "learning_rate": 1.7370711923791567e-05,
+      "loss": 10.3084,
+      "step": 148
+    },
+    {
+      "epoch": 0.478714859437751,
+      "grad_norm": 0.10958024859428406,
+      "learning_rate": 1.6748771394307585e-05,
+      "loss": 10.3196,
+      "step": 149
+    },
+    {
+      "epoch": 0.4819277108433735,
+      "grad_norm": 0.15488006174564362,
+      "learning_rate": 1.6135921418712956e-05,
+      "loss": 10.3069,
+      "step": 150
+    },
+    {
+      "epoch": 0.4819277108433735,
+      "eval_loss": 10.323417663574219,
+      "eval_runtime": 1.1572,
+      "eval_samples_per_second": 452.817,
+      "eval_steps_per_second": 113.204,
+      "step": 150
+    },
+    {
+      "epoch": 0.485140562248996,
+      "grad_norm": 0.1342858374118805,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 10.3197,
+      "step": 151
+    },
+    {
+      "epoch": 0.4883534136546185,
+      "grad_norm": 0.08063612133264542,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 10.3286,
+      "step": 152
+    },
+    {
+      "epoch": 0.491566265060241,
+      "grad_norm": 0.12848462164402008,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 10.3402,
+      "step": 153
+    },
+    {
+      "epoch": 0.4947791164658635,
+      "grad_norm": 0.10360779613256454,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 10.327,
+      "step": 154
+    },
+    {
+      "epoch": 0.4979919678714859,
+      "grad_norm": 0.09004613012075424,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 10.3244,
+      "step": 155
+    },
+    {
+      "epoch": 0.5012048192771085,
+      "grad_norm": 0.10969901084899902,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 10.3339,
+      "step": 156
+    },
+    {
+      "epoch": 0.5044176706827309,
+      "grad_norm": 0.1162419244647026,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 10.3268,
+      "step": 157
+    },
+    {
+      "epoch": 0.5076305220883535,
+      "grad_norm": 0.09138023853302002,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 10.3338,
+      "step": 158
+    },
+    {
+      "epoch": 0.5108433734939759,
+      "grad_norm": 0.09648050367832184,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 10.3271,
+      "step": 159
+    },
+    {
+      "epoch": 0.5140562248995983,
+      "grad_norm": 0.08033082634210587,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 10.328,
+      "step": 160
+    },
+    {
+      "epoch": 0.5172690763052209,
+      "grad_norm": 0.10192538797855377,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 10.3286,
+      "step": 161
+    },
+    {
+      "epoch": 0.5204819277108433,
+      "grad_norm": 0.1149694174528122,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 10.3251,
+      "step": 162
+    },
+    {
+      "epoch": 0.5236947791164659,
+      "grad_norm": 0.11814501136541367,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 10.32,
+      "step": 163
+    },
+    {
+      "epoch": 0.5269076305220883,
+      "grad_norm": 0.09197898954153061,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 10.3235,
+      "step": 164
+    },
+    {
+      "epoch": 0.5301204819277109,
+      "grad_norm": 0.07296612858772278,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 10.3401,
+      "step": 165
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.09049750119447708,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 10.3267,
+      "step": 166
+    },
+    {
+      "epoch": 0.5365461847389559,
+      "grad_norm": 0.11494322121143341,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 10.3268,
+      "step": 167
+    },
+    {
+      "epoch": 0.5397590361445783,
+      "grad_norm": 0.10997875779867172,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 10.3252,
+      "step": 168
+    },
+    {
+      "epoch": 0.5429718875502008,
+      "grad_norm": 0.10923228412866592,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 10.3276,
+      "step": 169
+    },
+    {
+      "epoch": 0.5461847389558233,
+      "grad_norm": 0.09875035285949707,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 10.3228,
+      "step": 170
+    },
+    {
+      "epoch": 0.5493975903614458,
+      "grad_norm": 0.12667155265808105,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 10.3328,
+      "step": 171
+    },
+    {
+      "epoch": 0.5526104417670683,
+      "grad_norm": 0.10388634353876114,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 10.3298,
+      "step": 172
+    },
+    {
+      "epoch": 0.5558232931726907,
+      "grad_norm": 0.12183015048503876,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 10.331,
+      "step": 173
+    },
+    {
+      "epoch": 0.5590361445783133,
+      "grad_norm": 0.12482339143753052,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 10.3244,
+      "step": 174
+    },
+    {
+      "epoch": 0.5622489959839357,
+      "grad_norm": 0.08300051093101501,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 10.3323,
+      "step": 175
+    },
+    {
+      "epoch": 0.5654618473895582,
+      "grad_norm": 0.11860600858926773,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 10.3321,
+      "step": 176
+    },
+    {
+      "epoch": 0.5686746987951807,
+      "grad_norm": 0.11088336259126663,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 10.3327,
+      "step": 177
+    },
+    {
+      "epoch": 0.5718875502008032,
+      "grad_norm": 0.08796709775924683,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 10.3308,
+      "step": 178
+    },
+    {
+      "epoch": 0.5751004016064257,
+      "grad_norm": 0.10811839252710342,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 10.3314,
+      "step": 179
+    },
+    {
+      "epoch": 0.5783132530120482,
+      "grad_norm": 0.11419740319252014,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 10.334,
+      "step": 180
+    },
+    {
+      "epoch": 0.5815261044176707,
+      "grad_norm": 0.09244905412197113,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 10.325,
+      "step": 181
+    },
+    {
+      "epoch": 0.5847389558232932,
+      "grad_norm": 0.11072293668985367,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 10.3272,
+      "step": 182
+    },
+    {
+      "epoch": 0.5879518072289157,
+      "grad_norm": 0.10999740660190582,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 10.3254,
+      "step": 183
+    },
+    {
+      "epoch": 0.5911646586345382,
+      "grad_norm": 0.09968981891870499,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 10.3236,
+      "step": 184
+    },
+    {
+      "epoch": 0.5943775100401606,
+      "grad_norm": 0.12061024457216263,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 10.3159,
+      "step": 185
+    },
+    {
+      "epoch": 0.5975903614457831,
+      "grad_norm": 0.07807968556880951,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 10.3207,
+      "step": 186
+    },
+    {
+      "epoch": 0.6008032128514056,
+      "grad_norm": 0.12807731330394745,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 10.3216,
+      "step": 187
+    },
+    {
+      "epoch": 0.6040160642570281,
+      "grad_norm": 0.12024622410535812,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 10.3303,
+      "step": 188
+    },
+    {
+      "epoch": 0.6072289156626506,
+      "grad_norm": 0.12788936495780945,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 10.3188,
+      "step": 189
+    },
+    {
+      "epoch": 0.6104417670682731,
+      "grad_norm": 0.19885775446891785,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 10.3076,
+      "step": 190
+    },
+    {
+      "epoch": 0.6136546184738956,
+      "grad_norm": 0.11481358110904694,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 10.3277,
+      "step": 191
+    },
+    {
+      "epoch": 0.6168674698795181,
+      "grad_norm": 0.12053002417087555,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 10.3274,
+      "step": 192
+    },
+    {
+      "epoch": 0.6200803212851406,
+      "grad_norm": 0.10069674998521805,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 10.3178,
+      "step": 193
+    },
+    {
+      "epoch": 0.623293172690763,
+      "grad_norm": 0.08938242495059967,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 10.3322,
+      "step": 194
+    },
+    {
+      "epoch": 0.6265060240963856,
+      "grad_norm": 0.13311991095542908,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 10.3274,
+      "step": 195
+    },
+    {
+      "epoch": 0.629718875502008,
+      "grad_norm": 0.08758800476789474,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 10.3239,
+      "step": 196
+    },
+    {
+      "epoch": 0.6329317269076306,
+      "grad_norm": 0.09972859919071198,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 10.3199,
+      "step": 197
+    },
+    {
+      "epoch": 0.636144578313253,
+      "grad_norm": 0.14015185832977295,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 10.3094,
+      "step": 198
+    },
+    {
+      "epoch": 0.6393574297188755,
+      "grad_norm": 0.10132836550474167,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 10.3234,
+      "step": 199
+    },
+    {
+      "epoch": 0.642570281124498,
+      "grad_norm": 0.19862784445285797,
+      "learning_rate": 0.0,
+      "loss": 10.3101,
+      "step": 200
+    },
+    {
+      "epoch": 0.642570281124498,
+      "eval_loss": 10.32220458984375,
+      "eval_runtime": 1.154,
+      "eval_samples_per_second": 454.088,
+      "eval_steps_per_second": 113.522,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 22477458112512.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null