Upload checkpoint 4918
Browse files- README.md +4 -4
- adapter_config.json +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +130 -4
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 7B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -15,11 +15,11 @@ library_name: peft
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
-
<div style="height: 30px; width:
|
| 19 |
-
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 7B (Step 4918 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
+
<div style="height: 30px; width: 100.00%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 19 |
+
100.0%
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4918 out of 4918 steps</p>
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
adapter_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
-
"base_model_name_or_path": "
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
|
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 82461044
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5111f467e847f2750566ffd2cef8bd631d8c7221a6c0019c0c1320c4118e2b98
|
| 3 |
size 82461044
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:506cf36063a4621b49ee486a38867162e37a2f0bf6058c24c0b4f12fa1181aa8
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -34308,6 +34308,132 @@
|
|
| 34308 |
"learning_rate": 7.734581721962141e-07,
|
| 34309 |
"loss": 0.937,
|
| 34310 |
"step": 4900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34311 |
}
|
| 34312 |
],
|
| 34313 |
"logging_steps": 1,
|
|
@@ -34322,12 +34448,12 @@
|
|
| 34322 |
"should_evaluate": false,
|
| 34323 |
"should_log": false,
|
| 34324 |
"should_save": true,
|
| 34325 |
-
"should_training_stop":
|
| 34326 |
},
|
| 34327 |
"attributes": {}
|
| 34328 |
}
|
| 34329 |
},
|
| 34330 |
-
"total_flos": 1.
|
| 34331 |
"train_batch_size": 16,
|
| 34332 |
"trial_name": null,
|
| 34333 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.999390119943078,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4918,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 34308 |
"learning_rate": 7.734581721962141e-07,
|
| 34309 |
"loss": 0.937,
|
| 34310 |
"step": 4900
|
| 34311 |
+
},
|
| 34312 |
+
{
|
| 34313 |
+
"epoch": 1.9924781459646268,
|
| 34314 |
+
"grad_norm": 0.10515905171632767,
|
| 34315 |
+
"learning_rate": 7.327498473437818e-07,
|
| 34316 |
+
"loss": 0.9686,
|
| 34317 |
+
"step": 4901
|
| 34318 |
+
},
|
| 34319 |
+
{
|
| 34320 |
+
"epoch": 1.9928847326692418,
|
| 34321 |
+
"grad_norm": 0.1109880730509758,
|
| 34322 |
+
"learning_rate": 6.920415224913496e-07,
|
| 34323 |
+
"loss": 0.9375,
|
| 34324 |
+
"step": 4902
|
| 34325 |
+
},
|
| 34326 |
+
{
|
| 34327 |
+
"epoch": 1.9932913193738564,
|
| 34328 |
+
"grad_norm": 0.10059867799282074,
|
| 34329 |
+
"learning_rate": 6.513331976389172e-07,
|
| 34330 |
+
"loss": 0.9148,
|
| 34331 |
+
"step": 4903
|
| 34332 |
+
},
|
| 34333 |
+
{
|
| 34334 |
+
"epoch": 1.9936979060784712,
|
| 34335 |
+
"grad_norm": 0.1153227686882019,
|
| 34336 |
+
"learning_rate": 6.106248727864849e-07,
|
| 34337 |
+
"loss": 1.065,
|
| 34338 |
+
"step": 4904
|
| 34339 |
+
},
|
| 34340 |
+
{
|
| 34341 |
+
"epoch": 1.994104492783086,
|
| 34342 |
+
"grad_norm": 0.10817611962556839,
|
| 34343 |
+
"learning_rate": 5.699165479340526e-07,
|
| 34344 |
+
"loss": 0.9162,
|
| 34345 |
+
"step": 4905
|
| 34346 |
+
},
|
| 34347 |
+
{
|
| 34348 |
+
"epoch": 1.9945110794877008,
|
| 34349 |
+
"grad_norm": 0.09951157122850418,
|
| 34350 |
+
"learning_rate": 5.292082230816202e-07,
|
| 34351 |
+
"loss": 0.885,
|
| 34352 |
+
"step": 4906
|
| 34353 |
+
},
|
| 34354 |
+
{
|
| 34355 |
+
"epoch": 1.9949176661923156,
|
| 34356 |
+
"grad_norm": 0.1026596650481224,
|
| 34357 |
+
"learning_rate": 4.884998982291879e-07,
|
| 34358 |
+
"loss": 0.9054,
|
| 34359 |
+
"step": 4907
|
| 34360 |
+
},
|
| 34361 |
+
{
|
| 34362 |
+
"epoch": 1.9953242528969302,
|
| 34363 |
+
"grad_norm": 0.10928881913423538,
|
| 34364 |
+
"learning_rate": 4.4779157337675555e-07,
|
| 34365 |
+
"loss": 0.9206,
|
| 34366 |
+
"step": 4908
|
| 34367 |
+
},
|
| 34368 |
+
{
|
| 34369 |
+
"epoch": 1.9957308396015452,
|
| 34370 |
+
"grad_norm": 0.1039741113781929,
|
| 34371 |
+
"learning_rate": 4.070832485243233e-07,
|
| 34372 |
+
"loss": 0.9762,
|
| 34373 |
+
"step": 4909
|
| 34374 |
+
},
|
| 34375 |
+
{
|
| 34376 |
+
"epoch": 1.9961374263061598,
|
| 34377 |
+
"grad_norm": 0.10720765590667725,
|
| 34378 |
+
"learning_rate": 3.663749236718909e-07,
|
| 34379 |
+
"loss": 0.9376,
|
| 34380 |
+
"step": 4910
|
| 34381 |
+
},
|
| 34382 |
+
{
|
| 34383 |
+
"epoch": 1.9965440130107746,
|
| 34384 |
+
"grad_norm": 0.11087562888860703,
|
| 34385 |
+
"learning_rate": 3.256665988194586e-07,
|
| 34386 |
+
"loss": 1.0135,
|
| 34387 |
+
"step": 4911
|
| 34388 |
+
},
|
| 34389 |
+
{
|
| 34390 |
+
"epoch": 1.9969505997153894,
|
| 34391 |
+
"grad_norm": 0.11333035677671432,
|
| 34392 |
+
"learning_rate": 2.849582739670263e-07,
|
| 34393 |
+
"loss": 0.9378,
|
| 34394 |
+
"step": 4912
|
| 34395 |
+
},
|
| 34396 |
+
{
|
| 34397 |
+
"epoch": 1.997357186420004,
|
| 34398 |
+
"grad_norm": 0.10567180067300797,
|
| 34399 |
+
"learning_rate": 2.4424994911459393e-07,
|
| 34400 |
+
"loss": 0.8727,
|
| 34401 |
+
"step": 4913
|
| 34402 |
+
},
|
| 34403 |
+
{
|
| 34404 |
+
"epoch": 1.997763773124619,
|
| 34405 |
+
"grad_norm": 0.09908761829137802,
|
| 34406 |
+
"learning_rate": 2.0354162426216164e-07,
|
| 34407 |
+
"loss": 0.8175,
|
| 34408 |
+
"step": 4914
|
| 34409 |
+
},
|
| 34410 |
+
{
|
| 34411 |
+
"epoch": 1.9981703598292335,
|
| 34412 |
+
"grad_norm": 0.1148877665400505,
|
| 34413 |
+
"learning_rate": 1.628332994097293e-07,
|
| 34414 |
+
"loss": 0.9689,
|
| 34415 |
+
"step": 4915
|
| 34416 |
+
},
|
| 34417 |
+
{
|
| 34418 |
+
"epoch": 1.9985769465338483,
|
| 34419 |
+
"grad_norm": 0.1073300689458847,
|
| 34420 |
+
"learning_rate": 1.2212497455729696e-07,
|
| 34421 |
+
"loss": 0.9064,
|
| 34422 |
+
"step": 4916
|
| 34423 |
+
},
|
| 34424 |
+
{
|
| 34425 |
+
"epoch": 1.9989835332384631,
|
| 34426 |
+
"grad_norm": 0.10753702372312546,
|
| 34427 |
+
"learning_rate": 8.141664970486465e-08,
|
| 34428 |
+
"loss": 0.9366,
|
| 34429 |
+
"step": 4917
|
| 34430 |
+
},
|
| 34431 |
+
{
|
| 34432 |
+
"epoch": 1.999390119943078,
|
| 34433 |
+
"grad_norm": 0.10542717576026917,
|
| 34434 |
+
"learning_rate": 4.0708324852432326e-08,
|
| 34435 |
+
"loss": 0.8963,
|
| 34436 |
+
"step": 4918
|
| 34437 |
}
|
| 34438 |
],
|
| 34439 |
"logging_steps": 1,
|
|
|
|
| 34448 |
"should_evaluate": false,
|
| 34449 |
"should_log": false,
|
| 34450 |
"should_save": true,
|
| 34451 |
+
"should_training_stop": true
|
| 34452 |
},
|
| 34453 |
"attributes": {}
|
| 34454 |
}
|
| 34455 |
},
|
| 34456 |
+
"total_flos": 1.57185946392996e+19,
|
| 34457 |
"train_batch_size": 16,
|
| 34458 |
"trial_name": null,
|
| 34459 |
"trial_params": null
|