Upload checkpoint 4918

Browse files

Files changed (6) hide show

README.md +4 -4
adapter_config.json +1 -1
loss.png +2 -2
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +130 -4

README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
-# Gradience T1 7B (Step 4900 Checkpoint)
 > [!NOTE]
 > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
-  <div style="height: 30px; width: 99.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
-    99.6%
   </div>
 </div>
-<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4900 out of 4918 steps</p>
 </body>
 </html>

 base_model: Qwen/Qwen2.5-7B-Instruct
 library_name: peft
 ---
+# Gradience T1 7B (Step 4918 Checkpoint)
 > [!NOTE]
 > Training in progress...
 </head>
 <body>
 <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
+  <div style="height: 30px; width: 100.00%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
+    100.0%
   </div>
 </div>
+<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4918 out of 4918 steps</p>
 </body>
 </html>

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
   "bias": "none",
   "eva_config": null,
   "exclude_modules": null,

loss.png CHANGED Viewed

Git LFS Details

SHA256: 78e4b22d4a15c87c21a718df21fe716b05046588c081dda8953d79a6e74b8838
Pointer size: 131 Bytes
Size of remote file: 110 kB

Git LFS Details

SHA256: a8786320aae33d434156c5d81735fa4f50f11426368ad91b7e1b862e84d7d33e
Pointer size: 131 Bytes
Size of remote file: 111 kB

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8692dc55cf936e814593059ddc130c86529b75ce648f04fffb2c51d8a817cd80
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:5111f467e847f2750566ffd2cef8bd631d8c7221a6c0019c0c1320c4118e2b98
 size 82461044

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfaa254ae737802dd7cd5e65a69fbb58067ebe2f88a794ce9ee8b1c2a69498b4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:506cf36063a4621b49ee486a38867162e37a2f0bf6058c24c0b4f12fa1181aa8
 size 1064

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9920715592600122,
   "eval_steps": 500,
-  "global_step": 4900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -34308,6 +34308,132 @@
       "learning_rate": 7.734581721962141e-07,
       "loss": 0.937,
       "step": 4900
     }
   ],
   "logging_steps": 1,
@@ -34322,12 +34448,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.5662724108266447e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.999390119943078,
   "eval_steps": 500,
+  "global_step": 4918,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 7.734581721962141e-07,
       "loss": 0.937,
       "step": 4900
+    },
+    {
+      "epoch": 1.9924781459646268,
+      "grad_norm": 0.10515905171632767,
+      "learning_rate": 7.327498473437818e-07,
+      "loss": 0.9686,
+      "step": 4901
+    },
+    {
+      "epoch": 1.9928847326692418,
+      "grad_norm": 0.1109880730509758,
+      "learning_rate": 6.920415224913496e-07,
+      "loss": 0.9375,
+      "step": 4902
+    },
+    {
+      "epoch": 1.9932913193738564,
+      "grad_norm": 0.10059867799282074,
+      "learning_rate": 6.513331976389172e-07,
+      "loss": 0.9148,
+      "step": 4903
+    },
+    {
+      "epoch": 1.9936979060784712,
+      "grad_norm": 0.1153227686882019,
+      "learning_rate": 6.106248727864849e-07,
+      "loss": 1.065,
+      "step": 4904
+    },
+    {
+      "epoch": 1.994104492783086,
+      "grad_norm": 0.10817611962556839,
+      "learning_rate": 5.699165479340526e-07,
+      "loss": 0.9162,
+      "step": 4905
+    },
+    {
+      "epoch": 1.9945110794877008,
+      "grad_norm": 0.09951157122850418,
+      "learning_rate": 5.292082230816202e-07,
+      "loss": 0.885,
+      "step": 4906
+    },
+    {
+      "epoch": 1.9949176661923156,
+      "grad_norm": 0.1026596650481224,
+      "learning_rate": 4.884998982291879e-07,
+      "loss": 0.9054,
+      "step": 4907
+    },
+    {
+      "epoch": 1.9953242528969302,
+      "grad_norm": 0.10928881913423538,
+      "learning_rate": 4.4779157337675555e-07,
+      "loss": 0.9206,
+      "step": 4908
+    },
+    {
+      "epoch": 1.9957308396015452,
+      "grad_norm": 0.1039741113781929,
+      "learning_rate": 4.070832485243233e-07,
+      "loss": 0.9762,
+      "step": 4909
+    },
+    {
+      "epoch": 1.9961374263061598,
+      "grad_norm": 0.10720765590667725,
+      "learning_rate": 3.663749236718909e-07,
+      "loss": 0.9376,
+      "step": 4910
+    },
+    {
+      "epoch": 1.9965440130107746,
+      "grad_norm": 0.11087562888860703,
+      "learning_rate": 3.256665988194586e-07,
+      "loss": 1.0135,
+      "step": 4911
+    },
+    {
+      "epoch": 1.9969505997153894,
+      "grad_norm": 0.11333035677671432,
+      "learning_rate": 2.849582739670263e-07,
+      "loss": 0.9378,
+      "step": 4912
+    },
+    {
+      "epoch": 1.997357186420004,
+      "grad_norm": 0.10567180067300797,
+      "learning_rate": 2.4424994911459393e-07,
+      "loss": 0.8727,
+      "step": 4913
+    },
+    {
+      "epoch": 1.997763773124619,
+      "grad_norm": 0.09908761829137802,
+      "learning_rate": 2.0354162426216164e-07,
+      "loss": 0.8175,
+      "step": 4914
+    },
+    {
+      "epoch": 1.9981703598292335,
+      "grad_norm": 0.1148877665400505,
+      "learning_rate": 1.628332994097293e-07,
+      "loss": 0.9689,
+      "step": 4915
+    },
+    {
+      "epoch": 1.9985769465338483,
+      "grad_norm": 0.1073300689458847,
+      "learning_rate": 1.2212497455729696e-07,
+      "loss": 0.9064,
+      "step": 4916
+    },
+    {
+      "epoch": 1.9989835332384631,
+      "grad_norm": 0.10753702372312546,
+      "learning_rate": 8.141664970486465e-08,
+      "loss": 0.9366,
+      "step": 4917
+    },
+    {
+      "epoch": 1.999390119943078,
+      "grad_norm": 0.10542717576026917,
+      "learning_rate": 4.0708324852432326e-08,
+      "loss": 0.8963,
+      "step": 4918
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.57185946392996e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null