Training in progress, step 2850, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 39131224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e09c2a5dcf2e7fc1395c6f500fd04f6e9ed509fdd50cd012a60a68626bc519e
|
| 3 |
size 39131224
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 78510334
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:489e57bb05f8b7b131b825404e7f5532f91305b731eb0b5d4894936cbda854c0
|
| 3 |
size 78510334
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c26ee950e9a69b9213b7e74235a077ae09cbcb03643e4b3877a05d223c05e8
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f76a102ac755d5bd19e6f0e55ad50e49a03a307409b32eeac7fb5a2a13752ba
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7818862795829773,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2700",
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -537,6 +537,35 @@
|
|
| 537 |
"eval_samples_per_second": 40.417,
|
| 538 |
"eval_steps_per_second": 20.209,
|
| 539 |
"step": 2700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
}
|
| 541 |
],
|
| 542 |
"logging_steps": 50,
|
|
@@ -551,7 +580,7 @@
|
|
| 551 |
"early_stopping_threshold": 0.0
|
| 552 |
},
|
| 553 |
"attributes": {
|
| 554 |
-
"early_stopping_patience_counter":
|
| 555 |
}
|
| 556 |
},
|
| 557 |
"TrainerControl": {
|
|
@@ -565,7 +594,7 @@
|
|
| 565 |
"attributes": {}
|
| 566 |
}
|
| 567 |
},
|
| 568 |
-
"total_flos": 3.
|
| 569 |
"train_batch_size": 2,
|
| 570 |
"trial_name": null,
|
| 571 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7818862795829773,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2700",
|
| 4 |
+
"epoch": 4.230012984603969,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 2850,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 537 |
"eval_samples_per_second": 40.417,
|
| 538 |
"eval_steps_per_second": 20.209,
|
| 539 |
"step": 2700
|
| 540 |
+
},
|
| 541 |
+
{
|
| 542 |
+
"epoch": 4.081617510665924,
|
| 543 |
+
"grad_norm": 0.18808983266353607,
|
| 544 |
+
"learning_rate": 3.5232131185484076e-06,
|
| 545 |
+
"loss": 0.7197,
|
| 546 |
+
"step": 2750
|
| 547 |
+
},
|
| 548 |
+
{
|
| 549 |
+
"epoch": 4.155815247634947,
|
| 550 |
+
"grad_norm": 0.1956692934036255,
|
| 551 |
+
"learning_rate": 2.259661018213333e-06,
|
| 552 |
+
"loss": 0.7152,
|
| 553 |
+
"step": 2800
|
| 554 |
+
},
|
| 555 |
+
{
|
| 556 |
+
"epoch": 4.230012984603969,
|
| 557 |
+
"grad_norm": 0.1999446451663971,
|
| 558 |
+
"learning_rate": 1.2731645278655445e-06,
|
| 559 |
+
"loss": 0.7127,
|
| 560 |
+
"step": 2850
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"epoch": 4.230012984603969,
|
| 564 |
+
"eval_loss": 0.7822389006614685,
|
| 565 |
+
"eval_runtime": 14.2177,
|
| 566 |
+
"eval_samples_per_second": 39.95,
|
| 567 |
+
"eval_steps_per_second": 19.975,
|
| 568 |
+
"step": 2850
|
| 569 |
}
|
| 570 |
],
|
| 571 |
"logging_steps": 50,
|
|
|
|
| 580 |
"early_stopping_threshold": 0.0
|
| 581 |
},
|
| 582 |
"attributes": {
|
| 583 |
+
"early_stopping_patience_counter": 1
|
| 584 |
}
|
| 585 |
},
|
| 586 |
"TrainerControl": {
|
|
|
|
| 594 |
"attributes": {}
|
| 595 |
}
|
| 596 |
},
|
| 597 |
+
"total_flos": 3.24915742900224e+16,
|
| 598 |
"train_batch_size": 2,
|
| 599 |
"trial_name": null,
|
| 600 |
"trial_params": null
|