Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cbf0b88948acf20436e1ed5cf952ebe57b6cf87a1b2077625597f51d4f9e864
 size 838906392

 version https://git-lfs.github.com/spec/v1
+oid sha256:243b82a15109a05e0e798bd2271481838b67a5ae11f49509b2ba3babb16a7ed5
 size 838906392

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0616660c886263d5cc89a61ab47bdfe77de4183f25ba0a2d9e1e92d65e01d99f
 size 426360596

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9e7fb6435fbc5a295f56efcf6d98dc1cbd8273f6e90d925bd0656167b3e0b07
 size 426360596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48603112d48bcbe3d9fb653a945895262ab4ac75418ab0006d891ced24525686
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a400852cd5035566eed522ddc79acf445f6dd5b06b840301227afbf09fdd508
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4153937101364136,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.7150997150997151,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.643,
       "eval_steps_per_second": 2.411,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.81890674556928e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4082176685333252,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 2.2905982905982905,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.643,
       "eval_steps_per_second": 2.411,
       "step": 150
+    },
+    {
+      "epoch": 1.7264957264957266,
+      "grad_norm": 4.301281452178955,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 4.8175,
+      "step": 151
+    },
+    {
+      "epoch": 1.7378917378917378,
+      "grad_norm": 2.5174670219421387,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 4.131,
+      "step": 152
+    },
+    {
+      "epoch": 1.7492877492877494,
+      "grad_norm": 3.3506662845611572,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 3.9204,
+      "step": 153
+    },
+    {
+      "epoch": 1.7606837606837606,
+      "grad_norm": 3.023916006088257,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 3.7734,
+      "step": 154
+    },
+    {
+      "epoch": 1.772079772079772,
+      "grad_norm": 3.474066972732544,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 4.1911,
+      "step": 155
+    },
+    {
+      "epoch": 1.7834757834757835,
+      "grad_norm": 3.365908145904541,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 3.8576,
+      "step": 156
+    },
+    {
+      "epoch": 1.7948717948717947,
+      "grad_norm": 3.97501802444458,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 4.1777,
+      "step": 157
+    },
+    {
+      "epoch": 1.8062678062678064,
+      "grad_norm": 3.7351319789886475,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 4.0174,
+      "step": 158
+    },
+    {
+      "epoch": 1.8176638176638176,
+      "grad_norm": 3.822105884552002,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 4.3782,
+      "step": 159
+    },
+    {
+      "epoch": 1.8290598290598292,
+      "grad_norm": 4.0253472328186035,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 4.0832,
+      "step": 160
+    },
+    {
+      "epoch": 1.8404558404558404,
+      "grad_norm": 4.397227764129639,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 4.3355,
+      "step": 161
+    },
+    {
+      "epoch": 1.8518518518518519,
+      "grad_norm": 4.70242166519165,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 4.2251,
+      "step": 162
+    },
+    {
+      "epoch": 1.8632478632478633,
+      "grad_norm": 4.877957344055176,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 4.2689,
+      "step": 163
+    },
+    {
+      "epoch": 1.8746438746438745,
+      "grad_norm": 4.879999160766602,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 3.8663,
+      "step": 164
+    },
+    {
+      "epoch": 1.8860398860398861,
+      "grad_norm": 5.327471733093262,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 4.4942,
+      "step": 165
+    },
+    {
+      "epoch": 1.8974358974358974,
+      "grad_norm": 5.141540050506592,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 3.9602,
+      "step": 166
+    },
+    {
+      "epoch": 1.9088319088319088,
+      "grad_norm": 5.326440334320068,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 4.3685,
+      "step": 167
+    },
+    {
+      "epoch": 1.9202279202279202,
+      "grad_norm": 5.746788501739502,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 4.2441,
+      "step": 168
+    },
+    {
+      "epoch": 1.9316239316239316,
+      "grad_norm": 5.605677604675293,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 3.9436,
+      "step": 169
+    },
+    {
+      "epoch": 1.943019943019943,
+      "grad_norm": 6.427508354187012,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 4.3544,
+      "step": 170
+    },
+    {
+      "epoch": 1.9544159544159543,
+      "grad_norm": 6.7359843254089355,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 4.0768,
+      "step": 171
+    },
+    {
+      "epoch": 1.965811965811966,
+      "grad_norm": 4.663614749908447,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 4.441,
+      "step": 172
+    },
+    {
+      "epoch": 1.9772079772079771,
+      "grad_norm": 3.506089210510254,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 3.7087,
+      "step": 173
+    },
+    {
+      "epoch": 1.9886039886039886,
+      "grad_norm": 4.7643513679504395,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 4.2193,
+      "step": 174
+    },
+    {
+      "epoch": 2.005698005698006,
+      "grad_norm": 3.6457362174987793,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 4.0813,
+      "step": 175
+    },
+    {
+      "epoch": 2.017094017094017,
+      "grad_norm": 2.253894567489624,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 3.8981,
+      "step": 176
+    },
+    {
+      "epoch": 2.0284900284900287,
+      "grad_norm": 2.6464829444885254,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 3.5035,
+      "step": 177
+    },
+    {
+      "epoch": 2.03988603988604,
+      "grad_norm": 2.602922201156616,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 3.0522,
+      "step": 178
+    },
+    {
+      "epoch": 2.051282051282051,
+      "grad_norm": 2.8964452743530273,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 3.0494,
+      "step": 179
+    },
+    {
+      "epoch": 2.0626780626780628,
+      "grad_norm": 2.9482107162475586,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 3.3924,
+      "step": 180
+    },
+    {
+      "epoch": 2.074074074074074,
+      "grad_norm": 3.2104787826538086,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 3.4876,
+      "step": 181
+    },
+    {
+      "epoch": 2.0854700854700856,
+      "grad_norm": 3.4505696296691895,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 3.5312,
+      "step": 182
+    },
+    {
+      "epoch": 2.096866096866097,
+      "grad_norm": 3.403191328048706,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 3.7081,
+      "step": 183
+    },
+    {
+      "epoch": 2.1082621082621085,
+      "grad_norm": 3.5163111686706543,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 3.5965,
+      "step": 184
+    },
+    {
+      "epoch": 2.1196581196581197,
+      "grad_norm": 3.776791572570801,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 3.852,
+      "step": 185
+    },
+    {
+      "epoch": 2.131054131054131,
+      "grad_norm": 3.408745288848877,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 3.4362,
+      "step": 186
+    },
+    {
+      "epoch": 2.1424501424501425,
+      "grad_norm": 3.794450044631958,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 3.5493,
+      "step": 187
+    },
+    {
+      "epoch": 2.1538461538461537,
+      "grad_norm": 4.029599666595459,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 3.5246,
+      "step": 188
+    },
+    {
+      "epoch": 2.1652421652421654,
+      "grad_norm": 4.378782749176025,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 3.3886,
+      "step": 189
+    },
+    {
+      "epoch": 2.1766381766381766,
+      "grad_norm": 4.163429260253906,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 3.2662,
+      "step": 190
+    },
+    {
+      "epoch": 2.1880341880341883,
+      "grad_norm": 4.658184051513672,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 3.5128,
+      "step": 191
+    },
+    {
+      "epoch": 2.1994301994301995,
+      "grad_norm": 5.1661505699157715,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 3.7874,
+      "step": 192
+    },
+    {
+      "epoch": 2.2108262108262107,
+      "grad_norm": 5.285357475280762,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 3.6908,
+      "step": 193
+    },
+    {
+      "epoch": 2.2222222222222223,
+      "grad_norm": 5.300405025482178,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 3.6607,
+      "step": 194
+    },
+    {
+      "epoch": 2.2336182336182335,
+      "grad_norm": 5.986500263214111,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 3.5121,
+      "step": 195
+    },
+    {
+      "epoch": 2.245014245014245,
+      "grad_norm": 5.598491191864014,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 4.3549,
+      "step": 196
+    },
+    {
+      "epoch": 2.2564102564102564,
+      "grad_norm": 2.6448614597320557,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 3.5252,
+      "step": 197
+    },
+    {
+      "epoch": 2.267806267806268,
+      "grad_norm": 3.212853193283081,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 3.9089,
+      "step": 198
+    },
+    {
+      "epoch": 2.2792022792022792,
+      "grad_norm": 3.1535897254943848,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 3.603,
+      "step": 199
+    },
+    {
+      "epoch": 2.2905982905982905,
+      "grad_norm": 3.1747045516967773,
+      "learning_rate": 0.0,
+      "loss": 3.3873,
+      "step": 200
+    },
+    {
+      "epoch": 2.2905982905982905,
+      "eval_loss": 1.4082176685333252,
+      "eval_runtime": 15.3234,
+      "eval_samples_per_second": 9.658,
+      "eval_steps_per_second": 2.415,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.09187566075904e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null