Training in progress, step 191, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +291 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a5419011e6ef6d5ab8598b457cecb232830383d194b80e74f11065f2c4feec2
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:e50fb513373efc8d7be1aa405acd51b656eda59fe3bb2a6828a040bc40f028fa
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ab7e18ec589656e24262d136c2ddde6a438c46368036dd498155bfaeb8c0cf5
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:02eed8f1d6b9be5f98130b2d24376288bd62c4b4ae15100b5bd79cc9a256674f
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:817d34fc6c0281281cd59ced9605c773f0e7d52a8665482ccde9ca429dacf2bd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c0b3eb76ec3598af64c71a4844be44e294f3ae7cc9e4a726a8760027806eef5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f6e7a88cf230a068e3f82e88b624952b386262ca206f11ec8211ed4b5c33414
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8781b403f6e79d31199d91dc5482eb96689fd318472e93e4c83efb756ecf166
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.8613990545272827,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.362204724409449,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,293 @@
       "eval_samples_per_second": 6.976,
       "eval_steps_per_second": 1.76,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1399,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.1839074566209536e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.8613990545272827,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.0078740157480315,
   "eval_steps": 50,
+  "global_step": 191,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.976,
       "eval_steps_per_second": 1.76,
       "step": 150
+    },
+    {
+      "epoch": 2.377952755905512,
+      "grad_norm": 0.23553703725337982,
+      "learning_rate": 1.1574098862709992e-05,
+      "loss": 1.7086,
+      "step": 151
+    },
+    {
+      "epoch": 2.393700787401575,
+      "grad_norm": 0.2348308116197586,
+      "learning_rate": 1.1024644157551206e-05,
+      "loss": 1.7374,
+      "step": 152
+    },
+    {
+      "epoch": 2.409448818897638,
+      "grad_norm": 0.24118292331695557,
+      "learning_rate": 1.0486930903705094e-05,
+      "loss": 1.7819,
+      "step": 153
+    },
+    {
+      "epoch": 2.425196850393701,
+      "grad_norm": 0.22775617241859436,
+      "learning_rate": 9.961121089018932e-06,
+      "loss": 1.7897,
+      "step": 154
+    },
+    {
+      "epoch": 2.440944881889764,
+      "grad_norm": 0.22295954823493958,
+      "learning_rate": 9.4473731153907e-06,
+      "loss": 1.7652,
+      "step": 155
+    },
+    {
+      "epoch": 2.456692913385827,
+      "grad_norm": 0.23308737576007843,
+      "learning_rate": 8.945841751049915e-06,
+      "loss": 1.7224,
+      "step": 156
+    },
+    {
+      "epoch": 2.47244094488189,
+      "grad_norm": 0.25666847825050354,
+      "learning_rate": 8.456678083933289e-06,
+      "loss": 1.7704,
+      "step": 157
+    },
+    {
+      "epoch": 2.4881889763779528,
+      "grad_norm": 0.24374538660049438,
+      "learning_rate": 7.980029476168944e-06,
+      "loss": 1.958,
+      "step": 158
+    },
+    {
+      "epoch": 2.5039370078740157,
+      "grad_norm": 0.2336760014295578,
+      "learning_rate": 7.5160395196831046e-06,
+      "loss": 1.6695,
+      "step": 159
+    },
+    {
+      "epoch": 2.5196850393700787,
+      "grad_norm": 0.23040024936199188,
+      "learning_rate": 7.064847992942614e-06,
+      "loss": 1.748,
+      "step": 160
+    },
+    {
+      "epoch": 2.5354330708661417,
+      "grad_norm": 0.2296517789363861,
+      "learning_rate": 6.626590818846162e-06,
+      "loss": 1.7122,
+      "step": 161
+    },
+    {
+      "epoch": 2.5511811023622046,
+      "grad_norm": 0.2303856462240219,
+      "learning_rate": 6.201400023777104e-06,
+      "loss": 1.7648,
+      "step": 162
+    },
+    {
+      "epoch": 2.5669291338582676,
+      "grad_norm": 0.2246444672346115,
+      "learning_rate": 5.7894036978301035e-06,
+      "loss": 1.6923,
+      "step": 163
+    },
+    {
+      "epoch": 2.5826771653543306,
+      "grad_norm": 0.23633837699890137,
+      "learning_rate": 5.39072595622353e-06,
+      "loss": 1.7337,
+      "step": 164
+    },
+    {
+      "epoch": 2.5984251968503935,
+      "grad_norm": 0.24737262725830078,
+      "learning_rate": 5.005486901909428e-06,
+      "loss": 1.7537,
+      "step": 165
+    },
+    {
+      "epoch": 2.6141732283464565,
+      "grad_norm": 0.24044926464557648,
+      "learning_rate": 4.6338025893920166e-06,
+      "loss": 1.8285,
+      "step": 166
+    },
+    {
+      "epoch": 2.6299212598425195,
+      "grad_norm": 0.2352256029844284,
+      "learning_rate": 4.275784989765985e-06,
+      "loss": 1.7291,
+      "step": 167
+    },
+    {
+      "epoch": 2.6456692913385824,
+      "grad_norm": 0.23979999125003815,
+      "learning_rate": 3.93154195698478e-06,
+      "loss": 1.7735,
+      "step": 168
+    },
+    {
+      "epoch": 2.661417322834646,
+      "grad_norm": 0.23789139091968536,
+      "learning_rate": 3.601177195369304e-06,
+      "loss": 1.8119,
+      "step": 169
+    },
+    {
+      "epoch": 2.677165354330709,
+      "grad_norm": 0.2280394583940506,
+      "learning_rate": 3.2847902283666022e-06,
+      "loss": 1.7269,
+      "step": 170
+    },
+    {
+      "epoch": 2.6929133858267718,
+      "grad_norm": 0.22925442457199097,
+      "learning_rate": 2.9824763685681766e-06,
+      "loss": 1.7427,
+      "step": 171
+    },
+    {
+      "epoch": 2.7086614173228347,
+      "grad_norm": 0.25694581866264343,
+      "learning_rate": 2.694326688996662e-06,
+      "loss": 1.6969,
+      "step": 172
+    },
+    {
+      "epoch": 2.7244094488188977,
+      "grad_norm": 0.24270634353160858,
+      "learning_rate": 2.4204279956698995e-06,
+      "loss": 1.8398,
+      "step": 173
+    },
+    {
+      "epoch": 2.7401574803149606,
+      "grad_norm": 0.23506343364715576,
+      "learning_rate": 2.1608628014502365e-06,
+      "loss": 1.6692,
+      "step": 174
+    },
+    {
+      "epoch": 2.7559055118110236,
+      "grad_norm": 0.2271362543106079,
+      "learning_rate": 1.915709301187335e-06,
+      "loss": 1.6801,
+      "step": 175
+    },
+    {
+      "epoch": 2.7716535433070866,
+      "grad_norm": 0.22768786549568176,
+      "learning_rate": 1.6850413481616868e-06,
+      "loss": 1.5849,
+      "step": 176
+    },
+    {
+      "epoch": 2.7874015748031495,
+      "grad_norm": 0.24064046144485474,
+      "learning_rate": 1.4689284318360918e-06,
+      "loss": 1.7468,
+      "step": 177
+    },
+    {
+      "epoch": 2.8031496062992125,
+      "grad_norm": 0.2282429337501526,
+      "learning_rate": 1.2674356569217282e-06,
+      "loss": 1.7783,
+      "step": 178
+    },
+    {
+      "epoch": 2.8188976377952755,
+      "grad_norm": 0.2347092181444168,
+      "learning_rate": 1.080623723765134e-06,
+      "loss": 1.7443,
+      "step": 179
+    },
+    {
+      "epoch": 2.8346456692913384,
+      "grad_norm": 0.23791402578353882,
+      "learning_rate": 9.085489100620737e-07,
+      "loss": 1.8306,
+      "step": 180
+    },
+    {
+      "epoch": 2.850393700787402,
+      "grad_norm": 0.23669809103012085,
+      "learning_rate": 7.512630539036502e-07,
+      "loss": 1.7913,
+      "step": 181
+    },
+    {
+      "epoch": 2.866141732283465,
+      "grad_norm": 0.24042251706123352,
+      "learning_rate": 6.088135381599414e-07,
+      "loss": 1.7201,
+      "step": 182
+    },
+    {
+      "epoch": 2.8818897637795278,
+      "grad_norm": 0.23241114616394043,
+      "learning_rate": 4.812432762057673e-07,
+      "loss": 1.7816,
+      "step": 183
+    },
+    {
+      "epoch": 2.8976377952755907,
+      "grad_norm": 0.24429729580879211,
+      "learning_rate": 3.685906989928656e-07,
+      "loss": 1.8598,
+      "step": 184
+    },
+    {
+      "epoch": 2.9133858267716537,
+      "grad_norm": 0.23608095943927765,
+      "learning_rate": 2.7088974347246887e-07,
+      "loss": 1.7586,
+      "step": 185
+    },
+    {
+      "epoch": 2.9291338582677167,
+      "grad_norm": 0.2519053816795349,
+      "learning_rate": 1.8816984237169376e-07,
+      "loss": 1.7577,
+      "step": 186
+    },
+    {
+      "epoch": 2.9448818897637796,
+      "grad_norm": 0.27554014325141907,
+      "learning_rate": 1.2045591532681145e-07,
+      "loss": 1.7541,
+      "step": 187
+    },
+    {
+      "epoch": 2.9606299212598426,
+      "grad_norm": 0.24321898818016052,
+      "learning_rate": 6.776836137615262e-08,
+      "loss": 1.7011,
+      "step": 188
+    },
+    {
+      "epoch": 2.9763779527559056,
+      "grad_norm": 0.24500428140163422,
+      "learning_rate": 3.0123052814812206e-08,
+      "loss": 1.7679,
+      "step": 189
+    },
+    {
+      "epoch": 2.9921259842519685,
+      "grad_norm": 0.2374401092529297,
+      "learning_rate": 7.53133041307974e-09,
+      "loss": 1.7637,
+      "step": 190
+    },
+    {
+      "epoch": 3.0078740157480315,
+      "grad_norm": 0.47424909472465515,
+      "learning_rate": 0.0,
+      "loss": 2.8832,
+      "step": 191
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.319641044038451e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null