Training in progress, step 81, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 100966336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703111afd8de5da31434bbffcf1751ae8c1729bfa301e3e14ad6aaec1d7b1465
|
| 3 |
size 100966336
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202110330
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03db6c7c5ee7f73fa9f854f673d04717d0ae9a0bf00b4dbd1c0ad0b07dbff2a0
|
| 3 |
size 202110330
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09fbf705d0b0068815c843aaa33a8af71fae90cfe1af355c27f9a01b44f5b835
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09ab64d5ace66796aa9c2fa2fc4e0206d69a1eb4ef03f574f85ee8eb16a64b71
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 50,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -527,6 +527,69 @@
|
|
| 527 |
"learning_rate": 4.095071251953399e-05,
|
| 528 |
"loss": 1.3472,
|
| 529 |
"step": 72
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
}
|
| 531 |
],
|
| 532 |
"logging_steps": 1,
|
|
@@ -546,7 +609,7 @@
|
|
| 546 |
"attributes": {}
|
| 547 |
}
|
| 548 |
},
|
| 549 |
-
"total_flos": 6.
|
| 550 |
"train_batch_size": 2,
|
| 551 |
"trial_name": null,
|
| 552 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0913684210526315,
|
| 5 |
"eval_steps": 50,
|
| 6 |
+
"global_step": 81,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 527 |
"learning_rate": 4.095071251953399e-05,
|
| 528 |
"loss": 1.3472,
|
| 529 |
"step": 72
|
| 530 |
+
},
|
| 531 |
+
{
|
| 532 |
+
"epoch": 0.983578947368421,
|
| 533 |
+
"grad_norm": 0.5706843733787537,
|
| 534 |
+
"learning_rate": 3.83534068877284e-05,
|
| 535 |
+
"loss": 1.4041,
|
| 536 |
+
"step": 73
|
| 537 |
+
},
|
| 538 |
+
{
|
| 539 |
+
"epoch": 0.9970526315789474,
|
| 540 |
+
"grad_norm": 0.5967234373092651,
|
| 541 |
+
"learning_rate": 3.5812607646303834e-05,
|
| 542 |
+
"loss": 1.31,
|
| 543 |
+
"step": 74
|
| 544 |
+
},
|
| 545 |
+
{
|
| 546 |
+
"epoch": 1.0105263157894737,
|
| 547 |
+
"grad_norm": 1.052331566810608,
|
| 548 |
+
"learning_rate": 3.333223252352985e-05,
|
| 549 |
+
"loss": 2.0664,
|
| 550 |
+
"step": 75
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"epoch": 1.024,
|
| 554 |
+
"grad_norm": 0.6153193712234497,
|
| 555 |
+
"learning_rate": 3.091610607806452e-05,
|
| 556 |
+
"loss": 1.516,
|
| 557 |
+
"step": 76
|
| 558 |
+
},
|
| 559 |
+
{
|
| 560 |
+
"epoch": 1.0374736842105263,
|
| 561 |
+
"grad_norm": 0.5821354389190674,
|
| 562 |
+
"learning_rate": 2.856795380176244e-05,
|
| 563 |
+
"loss": 1.2732,
|
| 564 |
+
"step": 77
|
| 565 |
+
},
|
| 566 |
+
{
|
| 567 |
+
"epoch": 1.0509473684210526,
|
| 568 |
+
"grad_norm": 0.6261878609657288,
|
| 569 |
+
"learning_rate": 2.6291396375236232e-05,
|
| 570 |
+
"loss": 1.2817,
|
| 571 |
+
"step": 78
|
| 572 |
+
},
|
| 573 |
+
{
|
| 574 |
+
"epoch": 1.064421052631579,
|
| 575 |
+
"grad_norm": 0.5795064568519592,
|
| 576 |
+
"learning_rate": 2.4089944085029363e-05,
|
| 577 |
+
"loss": 1.3216,
|
| 578 |
+
"step": 79
|
| 579 |
+
},
|
| 580 |
+
{
|
| 581 |
+
"epoch": 1.0778947368421052,
|
| 582 |
+
"grad_norm": 0.5134410262107849,
|
| 583 |
+
"learning_rate": 2.1966991411008938e-05,
|
| 584 |
+
"loss": 1.2917,
|
| 585 |
+
"step": 80
|
| 586 |
+
},
|
| 587 |
+
{
|
| 588 |
+
"epoch": 1.0913684210526315,
|
| 589 |
+
"grad_norm": 0.6312588453292847,
|
| 590 |
+
"learning_rate": 1.99258117923236e-05,
|
| 591 |
+
"loss": 1.1945,
|
| 592 |
+
"step": 81
|
| 593 |
}
|
| 594 |
],
|
| 595 |
"logging_steps": 1,
|
|
|
|
| 609 |
"attributes": {}
|
| 610 |
}
|
| 611 |
},
|
| 612 |
+
"total_flos": 6.750672775859405e+16,
|
| 613 |
"train_batch_size": 2,
|
| 614 |
"trial_name": null,
|
| 615 |
"trial_params": null
|