Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f7d6677ebfb4913ed082e19bc9ba657ecc0abd91cc965d3dfb9e93c6807fad9
 size 1001465824

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae9ec60f16edfd89f42f68519072b89ba19fabb31d731736f32b67c6bcd94fb3
 size 1001465824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f584c5a33394cab79ad5573713e1a2dda233cc7e450320c9836fe41ac869109
 size 509176980

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3c0e2d12fb3923641bfd867761ce1a654edd2c4355e624db6926c058d9f1af1
 size 509176980

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ab34d808034faa9b0c67b5282efa7338052c98ac1cba6e714c082e6dcea76aa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fee0353af67f64bbe6ef9ed7ab40fb3815f458ff8ea74533429be88b75734378
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01c5525f1d8420ca8a81a7fd2ec397a508131d03210dfd36c7ac5758b0e6313b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e88ef6a2716260516e17223973d6a3b0a4c88bf12c72ed47e80e6f2a6782fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.005204594228416681,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.005692923695846063,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 6.06,
       "eval_steps_per_second": 1.515,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.692377698533376e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.00452503003180027,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.007590564927794751,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.06,
       "eval_steps_per_second": 1.515,
       "step": 150
+    },
+    {
+      "epoch": 0.005730876520485037,
+      "grad_norm": 0.1738223284482956,
+      "learning_rate": 2.589263157894737e-05,
+      "loss": 0.0216,
+      "step": 151
+    },
+    {
+      "epoch": 0.005768829345124011,
+      "grad_norm": 0.07341416925191879,
+      "learning_rate": 2.536421052631579e-05,
+      "loss": 0.0026,
+      "step": 152
+    },
+    {
+      "epoch": 0.005806782169762985,
+      "grad_norm": 0.18592970073223114,
+      "learning_rate": 2.483578947368421e-05,
+      "loss": 0.0166,
+      "step": 153
+    },
+    {
+      "epoch": 0.005844734994401958,
+      "grad_norm": 0.22769445180892944,
+      "learning_rate": 2.430736842105263e-05,
+      "loss": 0.002,
+      "step": 154
+    },
+    {
+      "epoch": 0.0058826878190409325,
+      "grad_norm": 0.15929220616817474,
+      "learning_rate": 2.3778947368421052e-05,
+      "loss": 0.0081,
+      "step": 155
+    },
+    {
+      "epoch": 0.005920640643679906,
+      "grad_norm": 0.6884347796440125,
+      "learning_rate": 2.3250526315789473e-05,
+      "loss": 0.0261,
+      "step": 156
+    },
+    {
+      "epoch": 0.005958593468318879,
+      "grad_norm": 0.01688176579773426,
+      "learning_rate": 2.2722105263157894e-05,
+      "loss": 0.0008,
+      "step": 157
+    },
+    {
+      "epoch": 0.0059965462929578535,
+      "grad_norm": 0.12140548229217529,
+      "learning_rate": 2.2193684210526316e-05,
+      "loss": 0.0023,
+      "step": 158
+    },
+    {
+      "epoch": 0.006034499117596827,
+      "grad_norm": 0.393767386674881,
+      "learning_rate": 2.1665263157894737e-05,
+      "loss": 0.0198,
+      "step": 159
+    },
+    {
+      "epoch": 0.006072451942235801,
+      "grad_norm": 0.03978809714317322,
+      "learning_rate": 2.1136842105263158e-05,
+      "loss": 0.0023,
+      "step": 160
+    },
+    {
+      "epoch": 0.0061104047668747745,
+      "grad_norm": 0.023572852835059166,
+      "learning_rate": 2.060842105263158e-05,
+      "loss": 0.0008,
+      "step": 161
+    },
+    {
+      "epoch": 0.006148357591513749,
+      "grad_norm": 0.04517120495438576,
+      "learning_rate": 2.008e-05,
+      "loss": 0.0022,
+      "step": 162
+    },
+    {
+      "epoch": 0.006186310416152722,
+      "grad_norm": 0.24617835879325867,
+      "learning_rate": 1.9551578947368422e-05,
+      "loss": 0.0219,
+      "step": 163
+    },
+    {
+      "epoch": 0.006224263240791696,
+      "grad_norm": 0.12453620135784149,
+      "learning_rate": 1.9023157894736843e-05,
+      "loss": 0.0077,
+      "step": 164
+    },
+    {
+      "epoch": 0.00626221606543067,
+      "grad_norm": 0.0945534035563469,
+      "learning_rate": 1.849473684210526e-05,
+      "loss": 0.0041,
+      "step": 165
+    },
+    {
+      "epoch": 0.006300168890069643,
+      "grad_norm": 0.20276235044002533,
+      "learning_rate": 1.7966315789473686e-05,
+      "loss": 0.0096,
+      "step": 166
+    },
+    {
+      "epoch": 0.006338121714708617,
+      "grad_norm": 0.46055176854133606,
+      "learning_rate": 1.7437894736842107e-05,
+      "loss": 0.048,
+      "step": 167
+    },
+    {
+      "epoch": 0.006376074539347591,
+      "grad_norm": 0.09639254957437515,
+      "learning_rate": 1.6909473684210525e-05,
+      "loss": 0.006,
+      "step": 168
+    },
+    {
+      "epoch": 0.006414027363986565,
+      "grad_norm": 0.6072278022766113,
+      "learning_rate": 1.638105263157895e-05,
+      "loss": 0.0378,
+      "step": 169
+    },
+    {
+      "epoch": 0.006451980188625538,
+      "grad_norm": 0.13296294212341309,
+      "learning_rate": 1.5852631578947368e-05,
+      "loss": 0.0027,
+      "step": 170
+    },
+    {
+      "epoch": 0.0064899330132645126,
+      "grad_norm": 0.012549227103590965,
+      "learning_rate": 1.532421052631579e-05,
+      "loss": 0.0006,
+      "step": 171
+    },
+    {
+      "epoch": 0.006527885837903486,
+      "grad_norm": 0.021954134106636047,
+      "learning_rate": 1.4795789473684209e-05,
+      "loss": 0.0011,
+      "step": 172
+    },
+    {
+      "epoch": 0.006565838662542459,
+      "grad_norm": 0.29627010226249695,
+      "learning_rate": 1.4267368421052632e-05,
+      "loss": 0.0105,
+      "step": 173
+    },
+    {
+      "epoch": 0.0066037914871814336,
+      "grad_norm": 0.005537001416087151,
+      "learning_rate": 1.3738947368421053e-05,
+      "loss": 0.0003,
+      "step": 174
+    },
+    {
+      "epoch": 0.006641744311820407,
+      "grad_norm": 0.19363458454608917,
+      "learning_rate": 1.3210526315789473e-05,
+      "loss": 0.008,
+      "step": 175
+    },
+    {
+      "epoch": 0.006679697136459381,
+      "grad_norm": 0.0236789733171463,
+      "learning_rate": 1.2682105263157896e-05,
+      "loss": 0.001,
+      "step": 176
+    },
+    {
+      "epoch": 0.0067176499610983546,
+      "grad_norm": 0.020757243037223816,
+      "learning_rate": 1.2153684210526315e-05,
+      "loss": 0.0011,
+      "step": 177
+    },
+    {
+      "epoch": 0.006755602785737329,
+      "grad_norm": 0.07720403373241425,
+      "learning_rate": 1.1625263157894737e-05,
+      "loss": 0.0023,
+      "step": 178
+    },
+    {
+      "epoch": 0.006793555610376302,
+      "grad_norm": 0.01530242059379816,
+      "learning_rate": 1.1096842105263158e-05,
+      "loss": 0.0009,
+      "step": 179
+    },
+    {
+      "epoch": 0.006831508435015276,
+      "grad_norm": 0.012970831245183945,
+      "learning_rate": 1.0568421052631579e-05,
+      "loss": 0.0005,
+      "step": 180
+    },
+    {
+      "epoch": 0.00686946125965425,
+      "grad_norm": 0.13305489718914032,
+      "learning_rate": 1.004e-05,
+      "loss": 0.004,
+      "step": 181
+    },
+    {
+      "epoch": 0.006907414084293223,
+      "grad_norm": 0.012722056359052658,
+      "learning_rate": 9.511578947368422e-06,
+      "loss": 0.0006,
+      "step": 182
+    },
+    {
+      "epoch": 0.006945366908932197,
+      "grad_norm": 0.008336643688380718,
+      "learning_rate": 8.983157894736843e-06,
+      "loss": 0.0005,
+      "step": 183
+    },
+    {
+      "epoch": 0.006983319733571171,
+      "grad_norm": 0.036989495158195496,
+      "learning_rate": 8.454736842105263e-06,
+      "loss": 0.0012,
+      "step": 184
+    },
+    {
+      "epoch": 0.007021272558210145,
+      "grad_norm": 0.22104616463184357,
+      "learning_rate": 7.926315789473684e-06,
+      "loss": 0.0103,
+      "step": 185
+    },
+    {
+      "epoch": 0.007059225382849118,
+      "grad_norm": 0.027394304051995277,
+      "learning_rate": 7.397894736842104e-06,
+      "loss": 0.001,
+      "step": 186
+    },
+    {
+      "epoch": 0.007097178207488093,
+      "grad_norm": 0.08365759998559952,
+      "learning_rate": 6.8694736842105265e-06,
+      "loss": 0.0024,
+      "step": 187
+    },
+    {
+      "epoch": 0.007135131032127066,
+      "grad_norm": 0.035681914538145065,
+      "learning_rate": 6.341052631578948e-06,
+      "loss": 0.0019,
+      "step": 188
+    },
+    {
+      "epoch": 0.007173083856766039,
+      "grad_norm": 0.02156599797308445,
+      "learning_rate": 5.812631578947368e-06,
+      "loss": 0.0008,
+      "step": 189
+    },
+    {
+      "epoch": 0.007211036681405014,
+      "grad_norm": 0.01591232419013977,
+      "learning_rate": 5.2842105263157896e-06,
+      "loss": 0.0006,
+      "step": 190
+    },
+    {
+      "epoch": 0.007248989506043987,
+      "grad_norm": 0.04397885501384735,
+      "learning_rate": 4.755789473684211e-06,
+      "loss": 0.0013,
+      "step": 191
+    },
+    {
+      "epoch": 0.007286942330682961,
+      "grad_norm": 0.024483874440193176,
+      "learning_rate": 4.227368421052631e-06,
+      "loss": 0.001,
+      "step": 192
+    },
+    {
+      "epoch": 0.007324895155321935,
+      "grad_norm": 0.002727488288655877,
+      "learning_rate": 3.698947368421052e-06,
+      "loss": 0.0001,
+      "step": 193
+    },
+    {
+      "epoch": 0.007362847979960909,
+      "grad_norm": 0.014970963820815086,
+      "learning_rate": 3.170526315789474e-06,
+      "loss": 0.0006,
+      "step": 194
+    },
+    {
+      "epoch": 0.007400800804599882,
+      "grad_norm": 0.04240657016634941,
+      "learning_rate": 2.6421052631578948e-06,
+      "loss": 0.0015,
+      "step": 195
+    },
+    {
+      "epoch": 0.007438753629238856,
+      "grad_norm": 0.8572260737419128,
+      "learning_rate": 2.1136842105263157e-06,
+      "loss": 0.1317,
+      "step": 196
+    },
+    {
+      "epoch": 0.00747670645387783,
+      "grad_norm": 0.14317569136619568,
+      "learning_rate": 1.585263157894737e-06,
+      "loss": 0.0045,
+      "step": 197
+    },
+    {
+      "epoch": 0.007514659278516803,
+      "grad_norm": 0.20292750000953674,
+      "learning_rate": 1.0568421052631578e-06,
+      "loss": 0.0066,
+      "step": 198
+    },
+    {
+      "epoch": 0.0075526121031557775,
+      "grad_norm": 0.05312797799706459,
+      "learning_rate": 5.284210526315789e-07,
+      "loss": 0.0017,
+      "step": 199
+    },
+    {
+      "epoch": 0.007590564927794751,
+      "grad_norm": 0.11282480508089066,
+      "learning_rate": 0.0,
+      "loss": 0.003,
+      "step": 200
+    },
+    {
+      "epoch": 0.007590564927794751,
+      "eval_loss": 0.00452503003180027,
+      "eval_runtime": 1815.3286,
+      "eval_samples_per_second": 6.112,
+      "eval_steps_per_second": 1.528,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2912436734590976e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null