Training in progress, step 72, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +66 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:198b80ec28ce5ce0b3d455500f182b56ea364aa1528ced46756d7be144210032
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6e702b14829d11f50416692b7314645eed8edb9ef55004a134630ab89f21564
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce776e8b16137f8ecdb7f2407056410613034570db744a54ed5a4598b6eed8d5
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:51a6dc0f4490063fd77dbc7abd15e069a4ceba0555b0127116aa94a284c08b7f
 size 202110330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa8e202d1e724bd48d211480607ebff7d50e6294a8b7441e06c81ba075040699
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9df98b952a993c028712d01917c33ffa810e4469add5fd029bc7022e9ce56793
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d43bd90ad476e419738deb9472ad85fd5991005a147e1627aa99867bdfc5655
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8488421052631578,
   "eval_steps": 50,
-  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -464,6 +464,69 @@
       "learning_rate": 6.618469519066217e-05,
       "loss": 1.4667,
       "step": 63
     }
   ],
   "logging_steps": 1,
@@ -483,7 +546,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.25052327011287e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9701052631578947,
   "eval_steps": 50,
+  "global_step": 72,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.618469519066217e-05,
       "loss": 1.4667,
       "step": 63
+    },
+    {
+      "epoch": 0.8623157894736843,
+      "grad_norm": 0.5462284684181213,
+      "learning_rate": 6.326741512198266e-05,
+      "loss": 1.4505,
+      "step": 64
+    },
+    {
+      "epoch": 0.8757894736842106,
+      "grad_norm": 0.6210593581199646,
+      "learning_rate": 6.036822584879038e-05,
+      "loss": 1.2947,
+      "step": 65
+    },
+    {
+      "epoch": 0.8892631578947369,
+      "grad_norm": 0.6453770399093628,
+      "learning_rate": 5.7491597710807114e-05,
+      "loss": 1.3575,
+      "step": 66
+    },
+    {
+      "epoch": 0.9027368421052632,
+      "grad_norm": 0.6173303127288818,
+      "learning_rate": 5.464196626011943e-05,
+      "loss": 1.3685,
+      "step": 67
+    },
+    {
+      "epoch": 0.9162105263157895,
+      "grad_norm": 0.6161783933639526,
+      "learning_rate": 5.182372542187895e-05,
+      "loss": 1.5084,
+      "step": 68
+    },
+    {
+      "epoch": 0.9296842105263158,
+      "grad_norm": 0.5926702618598938,
+      "learning_rate": 4.904122071918801e-05,
+      "loss": 1.5106,
+      "step": 69
+    },
+    {
+      "epoch": 0.9431578947368421,
+      "grad_norm": 0.7311588525772095,
+      "learning_rate": 4.6298742572618266e-05,
+      "loss": 1.3789,
+      "step": 70
+    },
+    {
+      "epoch": 0.9566315789473684,
+      "grad_norm": 0.5569392442703247,
+      "learning_rate": 4.360051968469291e-05,
+      "loss": 1.2037,
+      "step": 71
+    },
+    {
+      "epoch": 0.9701052631578947,
+      "grad_norm": 0.49740126729011536,
+      "learning_rate": 4.095071251953399e-05,
+      "loss": 1.3472,
+      "step": 72
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.000598022986138e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null