Training in progress, step 27, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +66 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4067b0eaec418c2f628fb51d5a070c4a712a9f1b4002e70aad6ba6e83016b712
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:91270bfed96898597e98a0dc329e749214ec79fc85295ed43e1076f41803c84a
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b2306a49040dac8712d96023099d7ac11c0252b0a8f841164afd269598ad2c9
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:22e6c4ca044bb438e090f58ead16e15cbf7d5f67d276507a6456b85d0f6b1746
 size 202110330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96541d88a7cf43aafce1f16ea1e16556284f71e18b276294cc3dc81a783e005f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:04b005073ce3487d1a9dc5d2831d6dd25abdac86b29562322c49da0afe478a92
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4b679dfcad285902b67d18379c2cc1ca0aebfc7646cb33fa82a3fb8ed15c820
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1f9e237c4e244cd6a21b3069d52ab1ce3e784c965dcb77abb8266616185916c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.24252631578947367,
   "eval_steps": 50,
-  "global_step": 18,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -141,6 +141,69 @@
       "learning_rate": 0.000135,
       "loss": 1.6469,
       "step": 18
     }
   ],
   "logging_steps": 1,
@@ -160,7 +223,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.5001495057465344e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.36378947368421055,
   "eval_steps": 50,
+  "global_step": 27,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000135,
       "loss": 1.6469,
       "step": 18
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 1.5747898817062378,
+      "learning_rate": 0.0001425,
+      "loss": 1.6895,
+      "step": 19
+    },
+    {
+      "epoch": 0.2694736842105263,
+      "grad_norm": 0.9997685551643372,
+      "learning_rate": 0.00015,
+      "loss": 1.5248,
+      "step": 20
+    },
+    {
+      "epoch": 0.2829473684210526,
+      "grad_norm": 1.195119857788086,
+      "learning_rate": 0.00014994217771805422,
+      "loss": 1.5649,
+      "step": 21
+    },
+    {
+      "epoch": 0.296421052631579,
+      "grad_norm": 0.8751718401908875,
+      "learning_rate": 0.00014976880002998458,
+      "loss": 1.5405,
+      "step": 22
+    },
+    {
+      "epoch": 0.3098947368421053,
+      "grad_norm": 0.8566117882728577,
+      "learning_rate": 0.00014948013427161947,
+      "loss": 1.5504,
+      "step": 23
+    },
+    {
+      "epoch": 0.3233684210526316,
+      "grad_norm": 0.7322584390640259,
+      "learning_rate": 0.00014907662554463532,
+      "loss": 1.5034,
+      "step": 24
+    },
+    {
+      "epoch": 0.3368421052631579,
+      "grad_norm": 0.9539948105812073,
+      "learning_rate": 0.00014855889603024227,
+      "loss": 1.4513,
+      "step": 25
+    },
+    {
+      "epoch": 0.3503157894736842,
+      "grad_norm": 0.7042058110237122,
+      "learning_rate": 0.00014792774402982574,
+      "loss": 1.5281,
+      "step": 26
+    },
+    {
+      "epoch": 0.36378947368421055,
+      "grad_norm": 0.6478146910667419,
+      "learning_rate": 0.0001471841427340235,
+      "loss": 1.5117,
+      "step": 27
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.2502242586198016e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null