Training in progress, step 50, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:518e2a59a672ebfdce25172ded580f4439db7c91c524a9045fa14876481c3a94
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:e26038e87d8aeb13c2d2d09623bbe22bb49c2482161d6eb3d482ef43db2b843a
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21b34790e22caaa0802ce3e48b112cc3e4875429c7787ef5ef3bc8e3e784d6a3
 size 253144

 version https://git-lfs.github.com/spec/v1
+oid sha256:c54e90fed14c551a28052384395c4fbd5aefd8aabca9a72fcbcbc32bf025ca0f
 size 253144

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f6aaadfc3a2995ea3bff4c91194faec2c49a54ff830cb39f07c142f67055ad8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d1201220196b0eea01cc2e9383598f0df0fda082156bad64ee5af51ec629e09
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25dcb720489a6dcacdf3d1cc18c27188f9ede55d75d21b0750e7d6b79b7371e9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e69e2b49ea642509f0c688c16fb190b7cf27dac0a18903a5e2d1467d0343d8b8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1631912964641886,
   "eval_steps": 5,
-  "global_step": 45,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -192,6 +192,21 @@
       "eval_samples_per_second": 283.743,
       "eval_steps_per_second": 35.613,
       "step": 45
     }
   ],
   "logging_steps": 3,
@@ -206,12 +221,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 10039559454720.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1813236627379873,
   "eval_steps": 5,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 283.743,
       "eval_steps_per_second": 35.613,
       "step": 45
+    },
+    {
+      "epoch": 0.17407071622846781,
+      "grad_norm": 0.037530913949012756,
+      "learning_rate": 2.1852399266194314e-06,
+      "loss": 10.3563,
+      "step": 48
+    },
+    {
+      "epoch": 0.1813236627379873,
+      "eval_loss": 10.355225563049316,
+      "eval_runtime": 3.4386,
+      "eval_samples_per_second": 285.001,
+      "eval_steps_per_second": 35.77,
+      "step": 50
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 11155066060800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null