Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfc3066dcf11c74f659f8e2489593f651df89605a02276ac084b6b563eba4c47
 size 402688040

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef7906fda5b0b9051fa1da5896a968534c1f17ceae7571f6f661631ffea73a94
 size 402688040

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b90b778a0cc8c5b59172c8b82d192ac8710fcd67139b423564d2a3380931bd8
 size 204773716

 version https://git-lfs.github.com/spec/v1
+oid sha256:93c9507b9ac220d02666636f61ae275df279fce99808a93f880b16eb94dbe761
 size 204773716

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f93d775008047d80cc712deca47a0bf530eb1f0d82d95fd64d7c8644b8c2ec6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2bce5d2f658e5c9fc45ee821d419c15e9f7a0a8ba464d2f04bc361bd5f62b962
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2779566049575806,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.15337423312883436,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 21.42,
       "eval_steps_per_second": 5.355,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1312784155344896e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2678008079528809,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.20449897750511248,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.42,
       "eval_steps_per_second": 5.355,
       "step": 150
+    },
+    {
+      "epoch": 0.15439672801635992,
+      "grad_norm": 1.3365451097488403,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 5.0941,
+      "step": 151
+    },
+    {
+      "epoch": 0.1554192229038855,
+      "grad_norm": 1.2911239862442017,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 5.1447,
+      "step": 152
+    },
+    {
+      "epoch": 0.15644171779141106,
+      "grad_norm": 1.4097652435302734,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 5.0452,
+      "step": 153
+    },
+    {
+      "epoch": 0.1574642126789366,
+      "grad_norm": 1.4519466161727905,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 5.0879,
+      "step": 154
+    },
+    {
+      "epoch": 0.15848670756646216,
+      "grad_norm": 1.3766026496887207,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 5.1179,
+      "step": 155
+    },
+    {
+      "epoch": 0.15950920245398773,
+      "grad_norm": 1.528366208076477,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 5.2507,
+      "step": 156
+    },
+    {
+      "epoch": 0.1605316973415133,
+      "grad_norm": 1.5099884271621704,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 4.9263,
+      "step": 157
+    },
+    {
+      "epoch": 0.16155419222903886,
+      "grad_norm": 1.5873007774353027,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 5.208,
+      "step": 158
+    },
+    {
+      "epoch": 0.16257668711656442,
+      "grad_norm": 1.6648768186569214,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 5.2683,
+      "step": 159
+    },
+    {
+      "epoch": 0.16359918200409,
+      "grad_norm": 1.6657906770706177,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 4.9056,
+      "step": 160
+    },
+    {
+      "epoch": 0.16462167689161555,
+      "grad_norm": 1.6533324718475342,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 5.0893,
+      "step": 161
+    },
+    {
+      "epoch": 0.1656441717791411,
+      "grad_norm": 1.5915066003799438,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 5.0327,
+      "step": 162
+    },
+    {
+      "epoch": 0.16666666666666666,
+      "grad_norm": 1.6049374341964722,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 5.2182,
+      "step": 163
+    },
+    {
+      "epoch": 0.16768916155419222,
+      "grad_norm": 1.6870073080062866,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 5.1544,
+      "step": 164
+    },
+    {
+      "epoch": 0.1687116564417178,
+      "grad_norm": 1.659881353378296,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 5.2233,
+      "step": 165
+    },
+    {
+      "epoch": 0.16973415132924335,
+      "grad_norm": 1.7923842668533325,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 5.0128,
+      "step": 166
+    },
+    {
+      "epoch": 0.17075664621676892,
+      "grad_norm": 1.7901921272277832,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 5.0846,
+      "step": 167
+    },
+    {
+      "epoch": 0.17177914110429449,
+      "grad_norm": 1.7227494716644287,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 4.6846,
+      "step": 168
+    },
+    {
+      "epoch": 0.17280163599182005,
+      "grad_norm": 1.754355549812317,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 4.6024,
+      "step": 169
+    },
+    {
+      "epoch": 0.1738241308793456,
+      "grad_norm": 1.9260191917419434,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 4.621,
+      "step": 170
+    },
+    {
+      "epoch": 0.17484662576687116,
+      "grad_norm": 1.820291519165039,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 5.1781,
+      "step": 171
+    },
+    {
+      "epoch": 0.17586912065439672,
+      "grad_norm": 1.795501708984375,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 5.028,
+      "step": 172
+    },
+    {
+      "epoch": 0.1768916155419223,
+      "grad_norm": 1.7812637090682983,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 4.8874,
+      "step": 173
+    },
+    {
+      "epoch": 0.17791411042944785,
+      "grad_norm": 1.9762341976165771,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 4.9367,
+      "step": 174
+    },
+    {
+      "epoch": 0.17893660531697342,
+      "grad_norm": 1.8496299982070923,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 4.6698,
+      "step": 175
+    },
+    {
+      "epoch": 0.17995910020449898,
+      "grad_norm": 1.8580443859100342,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 4.4089,
+      "step": 176
+    },
+    {
+      "epoch": 0.18098159509202455,
+      "grad_norm": 1.8747975826263428,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 4.7028,
+      "step": 177
+    },
+    {
+      "epoch": 0.18200408997955012,
+      "grad_norm": 2.0866196155548096,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 5.2959,
+      "step": 178
+    },
+    {
+      "epoch": 0.18302658486707565,
+      "grad_norm": 2.302617073059082,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 4.9475,
+      "step": 179
+    },
+    {
+      "epoch": 0.18404907975460122,
+      "grad_norm": 2.020359992980957,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 4.8951,
+      "step": 180
+    },
+    {
+      "epoch": 0.18507157464212678,
+      "grad_norm": 2.0235023498535156,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 4.9788,
+      "step": 181
+    },
+    {
+      "epoch": 0.18609406952965235,
+      "grad_norm": 2.0184152126312256,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 4.5104,
+      "step": 182
+    },
+    {
+      "epoch": 0.18711656441717792,
+      "grad_norm": 2.1669201850891113,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 5.0589,
+      "step": 183
+    },
+    {
+      "epoch": 0.18813905930470348,
+      "grad_norm": 2.0412745475769043,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 4.7311,
+      "step": 184
+    },
+    {
+      "epoch": 0.18916155419222905,
+      "grad_norm": 2.16290545463562,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 4.8338,
+      "step": 185
+    },
+    {
+      "epoch": 0.1901840490797546,
+      "grad_norm": 2.34346079826355,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 5.3892,
+      "step": 186
+    },
+    {
+      "epoch": 0.19120654396728015,
+      "grad_norm": 2.2262260913848877,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 4.7967,
+      "step": 187
+    },
+    {
+      "epoch": 0.19222903885480572,
+      "grad_norm": 2.3977932929992676,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 5.253,
+      "step": 188
+    },
+    {
+      "epoch": 0.19325153374233128,
+      "grad_norm": 2.1948745250701904,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 4.4234,
+      "step": 189
+    },
+    {
+      "epoch": 0.19427402862985685,
+      "grad_norm": 2.480764389038086,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 4.8305,
+      "step": 190
+    },
+    {
+      "epoch": 0.19529652351738241,
+      "grad_norm": 2.3602654933929443,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 4.9307,
+      "step": 191
+    },
+    {
+      "epoch": 0.19631901840490798,
+      "grad_norm": 2.5799152851104736,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 5.291,
+      "step": 192
+    },
+    {
+      "epoch": 0.19734151329243355,
+      "grad_norm": 2.59062123298645,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 5.582,
+      "step": 193
+    },
+    {
+      "epoch": 0.1983640081799591,
+      "grad_norm": 2.99045991897583,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 5.2994,
+      "step": 194
+    },
+    {
+      "epoch": 0.19938650306748465,
+      "grad_norm": 2.8544366359710693,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 5.2658,
+      "step": 195
+    },
+    {
+      "epoch": 0.20040899795501022,
+      "grad_norm": 2.7591915130615234,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 4.7309,
+      "step": 196
+    },
+    {
+      "epoch": 0.20143149284253578,
+      "grad_norm": 3.4523959159851074,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 4.8548,
+      "step": 197
+    },
+    {
+      "epoch": 0.20245398773006135,
+      "grad_norm": 3.9901299476623535,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 5.2126,
+      "step": 198
+    },
+    {
+      "epoch": 0.2034764826175869,
+      "grad_norm": 4.212128639221191,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 4.7981,
+      "step": 199
+    },
+    {
+      "epoch": 0.20449897750511248,
+      "grad_norm": 7.657096862792969,
+      "learning_rate": 0.0,
+      "loss": 6.1696,
+      "step": 200
+    },
+    {
+      "epoch": 0.20449897750511248,
+      "eval_loss": 1.2678008079528809,
+      "eval_runtime": 76.9402,
+      "eval_samples_per_second": 21.419,
+      "eval_steps_per_second": 5.355,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5089976207212544e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null