Upload checkpoint 4900
Browse files- README.md +4 -4
- adapter_config.json +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 7B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -15,11 +15,11 @@ library_name: peft
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
-
<div style="height: 30px; width:
|
| 19 |
-
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 7B (Step 4900 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
+
<div style="height: 30px; width: 99.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 19 |
+
99.6%
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4900 out of 4918 steps</p>
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
adapter_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
-
"base_model_name_or_path": "
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
|
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 82461044
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8692dc55cf936e814593059ddc130c86529b75ce648f04fffb2c51d8a817cd80
|
| 3 |
size 82461044
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfaa254ae737802dd7cd5e65a69fbb58067ebe2f88a794ce9ee8b1c2a69498b4
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -33608,6 +33608,706 @@
|
|
| 33608 |
"learning_rate": 4.8442906574394464e-06,
|
| 33609 |
"loss": 0.746,
|
| 33610 |
"step": 4800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33611 |
}
|
| 33612 |
],
|
| 33613 |
"logging_steps": 1,
|
|
@@ -33627,7 +34327,7 @@
|
|
| 33627 |
"attributes": {}
|
| 33628 |
}
|
| 33629 |
},
|
| 33630 |
-
"total_flos": 1.
|
| 33631 |
"train_batch_size": 16,
|
| 33632 |
"trial_name": null,
|
| 33633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.9920715592600122,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 33608 |
"learning_rate": 4.8442906574394464e-06,
|
| 33609 |
"loss": 0.746,
|
| 33610 |
"step": 4800
|
| 33611 |
+
},
|
| 33612 |
+
{
|
| 33613 |
+
"epoch": 1.951819475503151,
|
| 33614 |
+
"grad_norm": 0.10632304102182388,
|
| 33615 |
+
"learning_rate": 4.803582332587014e-06,
|
| 33616 |
+
"loss": 0.9029,
|
| 33617 |
+
"step": 4801
|
| 33618 |
+
},
|
| 33619 |
+
{
|
| 33620 |
+
"epoch": 1.9522260622077658,
|
| 33621 |
+
"grad_norm": 0.11486592143774033,
|
| 33622 |
+
"learning_rate": 4.7628740077345826e-06,
|
| 33623 |
+
"loss": 1.0341,
|
| 33624 |
+
"step": 4802
|
| 33625 |
+
},
|
| 33626 |
+
{
|
| 33627 |
+
"epoch": 1.9526326489123806,
|
| 33628 |
+
"grad_norm": 0.10841212421655655,
|
| 33629 |
+
"learning_rate": 4.722165682882149e-06,
|
| 33630 |
+
"loss": 0.9374,
|
| 33631 |
+
"step": 4803
|
| 33632 |
+
},
|
| 33633 |
+
{
|
| 33634 |
+
"epoch": 1.9530392356169952,
|
| 33635 |
+
"grad_norm": 0.11145360767841339,
|
| 33636 |
+
"learning_rate": 4.681457358029717e-06,
|
| 33637 |
+
"loss": 0.9147,
|
| 33638 |
+
"step": 4804
|
| 33639 |
+
},
|
| 33640 |
+
{
|
| 33641 |
+
"epoch": 1.9534458223216102,
|
| 33642 |
+
"grad_norm": 0.11122753471136093,
|
| 33643 |
+
"learning_rate": 4.640749033177285e-06,
|
| 33644 |
+
"loss": 0.9332,
|
| 33645 |
+
"step": 4805
|
| 33646 |
+
},
|
| 33647 |
+
{
|
| 33648 |
+
"epoch": 1.9538524090262248,
|
| 33649 |
+
"grad_norm": 0.10207870602607727,
|
| 33650 |
+
"learning_rate": 4.600040708324853e-06,
|
| 33651 |
+
"loss": 0.937,
|
| 33652 |
+
"step": 4806
|
| 33653 |
+
},
|
| 33654 |
+
{
|
| 33655 |
+
"epoch": 1.9542589957308396,
|
| 33656 |
+
"grad_norm": 0.11454325169324875,
|
| 33657 |
+
"learning_rate": 4.559332383472421e-06,
|
| 33658 |
+
"loss": 1.0435,
|
| 33659 |
+
"step": 4807
|
| 33660 |
+
},
|
| 33661 |
+
{
|
| 33662 |
+
"epoch": 1.9546655824354544,
|
| 33663 |
+
"grad_norm": 0.10648126155138016,
|
| 33664 |
+
"learning_rate": 4.5186240586199875e-06,
|
| 33665 |
+
"loss": 0.927,
|
| 33666 |
+
"step": 4808
|
| 33667 |
+
},
|
| 33668 |
+
{
|
| 33669 |
+
"epoch": 1.955072169140069,
|
| 33670 |
+
"grad_norm": 0.10996894538402557,
|
| 33671 |
+
"learning_rate": 4.477915733767556e-06,
|
| 33672 |
+
"loss": 0.9693,
|
| 33673 |
+
"step": 4809
|
| 33674 |
+
},
|
| 33675 |
+
{
|
| 33676 |
+
"epoch": 1.955478755844684,
|
| 33677 |
+
"grad_norm": 0.10057996213436127,
|
| 33678 |
+
"learning_rate": 4.437207408915124e-06,
|
| 33679 |
+
"loss": 0.9325,
|
| 33680 |
+
"step": 4810
|
| 33681 |
+
},
|
| 33682 |
+
{
|
| 33683 |
+
"epoch": 1.9558853425492986,
|
| 33684 |
+
"grad_norm": 0.10628996044397354,
|
| 33685 |
+
"learning_rate": 4.396499084062691e-06,
|
| 33686 |
+
"loss": 0.891,
|
| 33687 |
+
"step": 4811
|
| 33688 |
+
},
|
| 33689 |
+
{
|
| 33690 |
+
"epoch": 1.9562919292539134,
|
| 33691 |
+
"grad_norm": 0.10557537525892258,
|
| 33692 |
+
"learning_rate": 4.355790759210258e-06,
|
| 33693 |
+
"loss": 0.8736,
|
| 33694 |
+
"step": 4812
|
| 33695 |
+
},
|
| 33696 |
+
{
|
| 33697 |
+
"epoch": 1.9566985159585282,
|
| 33698 |
+
"grad_norm": 0.10447331517934799,
|
| 33699 |
+
"learning_rate": 4.3150824343578265e-06,
|
| 33700 |
+
"loss": 0.9717,
|
| 33701 |
+
"step": 4813
|
| 33702 |
+
},
|
| 33703 |
+
{
|
| 33704 |
+
"epoch": 1.957105102663143,
|
| 33705 |
+
"grad_norm": 0.10446681082248688,
|
| 33706 |
+
"learning_rate": 4.274374109505394e-06,
|
| 33707 |
+
"loss": 1.0103,
|
| 33708 |
+
"step": 4814
|
| 33709 |
+
},
|
| 33710 |
+
{
|
| 33711 |
+
"epoch": 1.9575116893677578,
|
| 33712 |
+
"grad_norm": 0.10121920704841614,
|
| 33713 |
+
"learning_rate": 4.233665784652962e-06,
|
| 33714 |
+
"loss": 0.875,
|
| 33715 |
+
"step": 4815
|
| 33716 |
+
},
|
| 33717 |
+
{
|
| 33718 |
+
"epoch": 1.9579182760723723,
|
| 33719 |
+
"grad_norm": 0.10913816094398499,
|
| 33720 |
+
"learning_rate": 4.1929574598005294e-06,
|
| 33721 |
+
"loss": 1.0491,
|
| 33722 |
+
"step": 4816
|
| 33723 |
+
},
|
| 33724 |
+
{
|
| 33725 |
+
"epoch": 1.9583248627769871,
|
| 33726 |
+
"grad_norm": 0.11767001450061798,
|
| 33727 |
+
"learning_rate": 4.152249134948097e-06,
|
| 33728 |
+
"loss": 1.025,
|
| 33729 |
+
"step": 4817
|
| 33730 |
+
},
|
| 33731 |
+
{
|
| 33732 |
+
"epoch": 1.958731449481602,
|
| 33733 |
+
"grad_norm": 0.10180991888046265,
|
| 33734 |
+
"learning_rate": 4.111540810095665e-06,
|
| 33735 |
+
"loss": 0.892,
|
| 33736 |
+
"step": 4818
|
| 33737 |
+
},
|
| 33738 |
+
{
|
| 33739 |
+
"epoch": 1.9591380361862167,
|
| 33740 |
+
"grad_norm": 0.11216012388467789,
|
| 33741 |
+
"learning_rate": 4.070832485243232e-06,
|
| 33742 |
+
"loss": 0.9754,
|
| 33743 |
+
"step": 4819
|
| 33744 |
+
},
|
| 33745 |
+
{
|
| 33746 |
+
"epoch": 1.9595446228908315,
|
| 33747 |
+
"grad_norm": 0.1098812147974968,
|
| 33748 |
+
"learning_rate": 4.0301241603908e-06,
|
| 33749 |
+
"loss": 0.9805,
|
| 33750 |
+
"step": 4820
|
| 33751 |
+
},
|
| 33752 |
+
{
|
| 33753 |
+
"epoch": 1.959951209595446,
|
| 33754 |
+
"grad_norm": 0.10524158924818039,
|
| 33755 |
+
"learning_rate": 3.989415835538368e-06,
|
| 33756 |
+
"loss": 0.9045,
|
| 33757 |
+
"step": 4821
|
| 33758 |
+
},
|
| 33759 |
+
{
|
| 33760 |
+
"epoch": 1.9603577963000611,
|
| 33761 |
+
"grad_norm": 0.09650178253650665,
|
| 33762 |
+
"learning_rate": 3.948707510685935e-06,
|
| 33763 |
+
"loss": 0.7913,
|
| 33764 |
+
"step": 4822
|
| 33765 |
+
},
|
| 33766 |
+
{
|
| 33767 |
+
"epoch": 1.9607643830046757,
|
| 33768 |
+
"grad_norm": 0.11418919265270233,
|
| 33769 |
+
"learning_rate": 3.907999185833503e-06,
|
| 33770 |
+
"loss": 0.9991,
|
| 33771 |
+
"step": 4823
|
| 33772 |
+
},
|
| 33773 |
+
{
|
| 33774 |
+
"epoch": 1.9611709697092905,
|
| 33775 |
+
"grad_norm": 0.11137097328901291,
|
| 33776 |
+
"learning_rate": 3.867290860981071e-06,
|
| 33777 |
+
"loss": 0.978,
|
| 33778 |
+
"step": 4824
|
| 33779 |
+
},
|
| 33780 |
+
{
|
| 33781 |
+
"epoch": 1.9615775564139053,
|
| 33782 |
+
"grad_norm": 0.1029028594493866,
|
| 33783 |
+
"learning_rate": 3.826582536128639e-06,
|
| 33784 |
+
"loss": 0.8791,
|
| 33785 |
+
"step": 4825
|
| 33786 |
+
},
|
| 33787 |
+
{
|
| 33788 |
+
"epoch": 1.9619841431185199,
|
| 33789 |
+
"grad_norm": 0.10152295976877213,
|
| 33790 |
+
"learning_rate": 3.7858742112762058e-06,
|
| 33791 |
+
"loss": 0.8855,
|
| 33792 |
+
"step": 4826
|
| 33793 |
+
},
|
| 33794 |
+
{
|
| 33795 |
+
"epoch": 1.962390729823135,
|
| 33796 |
+
"grad_norm": 0.11157593131065369,
|
| 33797 |
+
"learning_rate": 3.745165886423774e-06,
|
| 33798 |
+
"loss": 1.0097,
|
| 33799 |
+
"step": 4827
|
| 33800 |
+
},
|
| 33801 |
+
{
|
| 33802 |
+
"epoch": 1.9627973165277495,
|
| 33803 |
+
"grad_norm": 0.10975543409585953,
|
| 33804 |
+
"learning_rate": 3.7044575615713415e-06,
|
| 33805 |
+
"loss": 1.0269,
|
| 33806 |
+
"step": 4828
|
| 33807 |
+
},
|
| 33808 |
+
{
|
| 33809 |
+
"epoch": 1.9632039032323643,
|
| 33810 |
+
"grad_norm": 0.10318556427955627,
|
| 33811 |
+
"learning_rate": 3.6637492367189095e-06,
|
| 33812 |
+
"loss": 0.9094,
|
| 33813 |
+
"step": 4829
|
| 33814 |
+
},
|
| 33815 |
+
{
|
| 33816 |
+
"epoch": 1.963610489936979,
|
| 33817 |
+
"grad_norm": 0.09540821611881256,
|
| 33818 |
+
"learning_rate": 3.6230409118664767e-06,
|
| 33819 |
+
"loss": 0.7923,
|
| 33820 |
+
"step": 4830
|
| 33821 |
+
},
|
| 33822 |
+
{
|
| 33823 |
+
"epoch": 1.9640170766415939,
|
| 33824 |
+
"grad_norm": 0.11185004562139511,
|
| 33825 |
+
"learning_rate": 3.5823325870140444e-06,
|
| 33826 |
+
"loss": 0.9945,
|
| 33827 |
+
"step": 4831
|
| 33828 |
+
},
|
| 33829 |
+
{
|
| 33830 |
+
"epoch": 1.9644236633462087,
|
| 33831 |
+
"grad_norm": 0.1030164510011673,
|
| 33832 |
+
"learning_rate": 3.541624262161612e-06,
|
| 33833 |
+
"loss": 0.8952,
|
| 33834 |
+
"step": 4832
|
| 33835 |
+
},
|
| 33836 |
+
{
|
| 33837 |
+
"epoch": 1.9648302500508232,
|
| 33838 |
+
"grad_norm": 0.10606315732002258,
|
| 33839 |
+
"learning_rate": 3.50091593730918e-06,
|
| 33840 |
+
"loss": 0.8872,
|
| 33841 |
+
"step": 4833
|
| 33842 |
+
},
|
| 33843 |
+
{
|
| 33844 |
+
"epoch": 1.9652368367554383,
|
| 33845 |
+
"grad_norm": 0.10676340013742447,
|
| 33846 |
+
"learning_rate": 3.4602076124567477e-06,
|
| 33847 |
+
"loss": 0.9616,
|
| 33848 |
+
"step": 4834
|
| 33849 |
+
},
|
| 33850 |
+
{
|
| 33851 |
+
"epoch": 1.9656434234600528,
|
| 33852 |
+
"grad_norm": 0.11374758929014206,
|
| 33853 |
+
"learning_rate": 3.419499287604315e-06,
|
| 33854 |
+
"loss": 1.0619,
|
| 33855 |
+
"step": 4835
|
| 33856 |
+
},
|
| 33857 |
+
{
|
| 33858 |
+
"epoch": 1.9660500101646676,
|
| 33859 |
+
"grad_norm": 0.10142536461353302,
|
| 33860 |
+
"learning_rate": 3.378790962751883e-06,
|
| 33861 |
+
"loss": 0.8787,
|
| 33862 |
+
"step": 4836
|
| 33863 |
+
},
|
| 33864 |
+
{
|
| 33865 |
+
"epoch": 1.9664565968692824,
|
| 33866 |
+
"grad_norm": 0.1088085025548935,
|
| 33867 |
+
"learning_rate": 3.3380826378994506e-06,
|
| 33868 |
+
"loss": 1.0706,
|
| 33869 |
+
"step": 4837
|
| 33870 |
+
},
|
| 33871 |
+
{
|
| 33872 |
+
"epoch": 1.966863183573897,
|
| 33873 |
+
"grad_norm": 0.11617989093065262,
|
| 33874 |
+
"learning_rate": 3.2973743130470187e-06,
|
| 33875 |
+
"loss": 1.0758,
|
| 33876 |
+
"step": 4838
|
| 33877 |
+
},
|
| 33878 |
+
{
|
| 33879 |
+
"epoch": 1.967269770278512,
|
| 33880 |
+
"grad_norm": 0.10999471694231033,
|
| 33881 |
+
"learning_rate": 3.2566659881945863e-06,
|
| 33882 |
+
"loss": 0.8955,
|
| 33883 |
+
"step": 4839
|
| 33884 |
+
},
|
| 33885 |
+
{
|
| 33886 |
+
"epoch": 1.9676763569831266,
|
| 33887 |
+
"grad_norm": 0.10413683950901031,
|
| 33888 |
+
"learning_rate": 3.2159576633421535e-06,
|
| 33889 |
+
"loss": 0.8774,
|
| 33890 |
+
"step": 4840
|
| 33891 |
+
},
|
| 33892 |
+
{
|
| 33893 |
+
"epoch": 1.9680829436877414,
|
| 33894 |
+
"grad_norm": 0.10912149399518967,
|
| 33895 |
+
"learning_rate": 3.175249338489721e-06,
|
| 33896 |
+
"loss": 0.9151,
|
| 33897 |
+
"step": 4841
|
| 33898 |
+
},
|
| 33899 |
+
{
|
| 33900 |
+
"epoch": 1.9684895303923562,
|
| 33901 |
+
"grad_norm": 0.10065335780382156,
|
| 33902 |
+
"learning_rate": 3.134541013637289e-06,
|
| 33903 |
+
"loss": 0.8947,
|
| 33904 |
+
"step": 4842
|
| 33905 |
+
},
|
| 33906 |
+
{
|
| 33907 |
+
"epoch": 1.9688961170969708,
|
| 33908 |
+
"grad_norm": 0.10842598974704742,
|
| 33909 |
+
"learning_rate": 3.0938326887848564e-06,
|
| 33910 |
+
"loss": 0.9149,
|
| 33911 |
+
"step": 4843
|
| 33912 |
+
},
|
| 33913 |
+
{
|
| 33914 |
+
"epoch": 1.9693027038015858,
|
| 33915 |
+
"grad_norm": 0.09546621143817902,
|
| 33916 |
+
"learning_rate": 3.0531243639324245e-06,
|
| 33917 |
+
"loss": 0.8106,
|
| 33918 |
+
"step": 4844
|
| 33919 |
+
},
|
| 33920 |
+
{
|
| 33921 |
+
"epoch": 1.9697092905062004,
|
| 33922 |
+
"grad_norm": 0.10605739057064056,
|
| 33923 |
+
"learning_rate": 3.0124160390799917e-06,
|
| 33924 |
+
"loss": 0.8663,
|
| 33925 |
+
"step": 4845
|
| 33926 |
+
},
|
| 33927 |
+
{
|
| 33928 |
+
"epoch": 1.9701158772108152,
|
| 33929 |
+
"grad_norm": 0.11531540751457214,
|
| 33930 |
+
"learning_rate": 2.9717077142275597e-06,
|
| 33931 |
+
"loss": 0.9487,
|
| 33932 |
+
"step": 4846
|
| 33933 |
+
},
|
| 33934 |
+
{
|
| 33935 |
+
"epoch": 1.97052246391543,
|
| 33936 |
+
"grad_norm": 0.112498939037323,
|
| 33937 |
+
"learning_rate": 2.9309993893751274e-06,
|
| 33938 |
+
"loss": 0.9846,
|
| 33939 |
+
"step": 4847
|
| 33940 |
+
},
|
| 33941 |
+
{
|
| 33942 |
+
"epoch": 1.9709290506200448,
|
| 33943 |
+
"grad_norm": 0.10680878907442093,
|
| 33944 |
+
"learning_rate": 2.890291064522695e-06,
|
| 33945 |
+
"loss": 0.9092,
|
| 33946 |
+
"step": 4848
|
| 33947 |
+
},
|
| 33948 |
+
{
|
| 33949 |
+
"epoch": 1.9713356373246596,
|
| 33950 |
+
"grad_norm": 0.11008645594120026,
|
| 33951 |
+
"learning_rate": 2.8495827396702626e-06,
|
| 33952 |
+
"loss": 0.918,
|
| 33953 |
+
"step": 4849
|
| 33954 |
+
},
|
| 33955 |
+
{
|
| 33956 |
+
"epoch": 1.9717422240292741,
|
| 33957 |
+
"grad_norm": 0.1180918887257576,
|
| 33958 |
+
"learning_rate": 2.8088744148178303e-06,
|
| 33959 |
+
"loss": 1.1026,
|
| 33960 |
+
"step": 4850
|
| 33961 |
+
},
|
| 33962 |
+
{
|
| 33963 |
+
"epoch": 1.9721488107338891,
|
| 33964 |
+
"grad_norm": 0.10788023471832275,
|
| 33965 |
+
"learning_rate": 2.7681660899653983e-06,
|
| 33966 |
+
"loss": 0.9422,
|
| 33967 |
+
"step": 4851
|
| 33968 |
+
},
|
| 33969 |
+
{
|
| 33970 |
+
"epoch": 1.9725553974385037,
|
| 33971 |
+
"grad_norm": 0.11532583087682724,
|
| 33972 |
+
"learning_rate": 2.7274577651129655e-06,
|
| 33973 |
+
"loss": 0.9619,
|
| 33974 |
+
"step": 4852
|
| 33975 |
+
},
|
| 33976 |
+
{
|
| 33977 |
+
"epoch": 1.9729619841431185,
|
| 33978 |
+
"grad_norm": 0.1164373904466629,
|
| 33979 |
+
"learning_rate": 2.6867494402605336e-06,
|
| 33980 |
+
"loss": 1.0735,
|
| 33981 |
+
"step": 4853
|
| 33982 |
+
},
|
| 33983 |
+
{
|
| 33984 |
+
"epoch": 1.9733685708477333,
|
| 33985 |
+
"grad_norm": 0.10352805256843567,
|
| 33986 |
+
"learning_rate": 2.646041115408101e-06,
|
| 33987 |
+
"loss": 0.9302,
|
| 33988 |
+
"step": 4854
|
| 33989 |
+
},
|
| 33990 |
+
{
|
| 33991 |
+
"epoch": 1.973775157552348,
|
| 33992 |
+
"grad_norm": 0.09697481989860535,
|
| 33993 |
+
"learning_rate": 2.605332790555669e-06,
|
| 33994 |
+
"loss": 0.8169,
|
| 33995 |
+
"step": 4855
|
| 33996 |
+
},
|
| 33997 |
+
{
|
| 33998 |
+
"epoch": 1.974181744256963,
|
| 33999 |
+
"grad_norm": 0.10641641169786453,
|
| 34000 |
+
"learning_rate": 2.5646244657032365e-06,
|
| 34001 |
+
"loss": 0.9379,
|
| 34002 |
+
"step": 4856
|
| 34003 |
+
},
|
| 34004 |
+
{
|
| 34005 |
+
"epoch": 1.9745883309615775,
|
| 34006 |
+
"grad_norm": 0.12247955799102783,
|
| 34007 |
+
"learning_rate": 2.523916140850804e-06,
|
| 34008 |
+
"loss": 1.1005,
|
| 34009 |
+
"step": 4857
|
| 34010 |
+
},
|
| 34011 |
+
{
|
| 34012 |
+
"epoch": 1.9749949176661923,
|
| 34013 |
+
"grad_norm": 0.11470235139131546,
|
| 34014 |
+
"learning_rate": 2.4832078159983718e-06,
|
| 34015 |
+
"loss": 1.0682,
|
| 34016 |
+
"step": 4858
|
| 34017 |
+
},
|
| 34018 |
+
{
|
| 34019 |
+
"epoch": 1.975401504370807,
|
| 34020 |
+
"grad_norm": 0.10415980964899063,
|
| 34021 |
+
"learning_rate": 2.4424994911459394e-06,
|
| 34022 |
+
"loss": 0.9184,
|
| 34023 |
+
"step": 4859
|
| 34024 |
+
},
|
| 34025 |
+
{
|
| 34026 |
+
"epoch": 1.9758080910754219,
|
| 34027 |
+
"grad_norm": 0.10580716282129288,
|
| 34028 |
+
"learning_rate": 2.401791166293507e-06,
|
| 34029 |
+
"loss": 0.9137,
|
| 34030 |
+
"step": 4860
|
| 34031 |
+
},
|
| 34032 |
+
{
|
| 34033 |
+
"epoch": 1.9762146777800367,
|
| 34034 |
+
"grad_norm": 0.10806702822446823,
|
| 34035 |
+
"learning_rate": 2.3610828414410747e-06,
|
| 34036 |
+
"loss": 1.0023,
|
| 34037 |
+
"step": 4861
|
| 34038 |
+
},
|
| 34039 |
+
{
|
| 34040 |
+
"epoch": 1.9766212644846513,
|
| 34041 |
+
"grad_norm": 0.10730385035276413,
|
| 34042 |
+
"learning_rate": 2.3203745165886423e-06,
|
| 34043 |
+
"loss": 0.9394,
|
| 34044 |
+
"step": 4862
|
| 34045 |
+
},
|
| 34046 |
+
{
|
| 34047 |
+
"epoch": 1.9770278511892663,
|
| 34048 |
+
"grad_norm": 0.11646751314401627,
|
| 34049 |
+
"learning_rate": 2.2796661917362104e-06,
|
| 34050 |
+
"loss": 1.0452,
|
| 34051 |
+
"step": 4863
|
| 34052 |
+
},
|
| 34053 |
+
{
|
| 34054 |
+
"epoch": 1.9774344378938808,
|
| 34055 |
+
"grad_norm": 0.11328614503145218,
|
| 34056 |
+
"learning_rate": 2.238957866883778e-06,
|
| 34057 |
+
"loss": 1.0363,
|
| 34058 |
+
"step": 4864
|
| 34059 |
+
},
|
| 34060 |
+
{
|
| 34061 |
+
"epoch": 1.9778410245984956,
|
| 34062 |
+
"grad_norm": 0.10477136820554733,
|
| 34063 |
+
"learning_rate": 2.1982495420313456e-06,
|
| 34064 |
+
"loss": 0.8967,
|
| 34065 |
+
"step": 4865
|
| 34066 |
+
},
|
| 34067 |
+
{
|
| 34068 |
+
"epoch": 1.9782476113031104,
|
| 34069 |
+
"grad_norm": 0.1011333018541336,
|
| 34070 |
+
"learning_rate": 2.1575412171789133e-06,
|
| 34071 |
+
"loss": 0.9051,
|
| 34072 |
+
"step": 4866
|
| 34073 |
+
},
|
| 34074 |
+
{
|
| 34075 |
+
"epoch": 1.978654198007725,
|
| 34076 |
+
"grad_norm": 0.10585794597864151,
|
| 34077 |
+
"learning_rate": 2.116832892326481e-06,
|
| 34078 |
+
"loss": 0.9641,
|
| 34079 |
+
"step": 4867
|
| 34080 |
+
},
|
| 34081 |
+
{
|
| 34082 |
+
"epoch": 1.97906078471234,
|
| 34083 |
+
"grad_norm": 0.10518283396959305,
|
| 34084 |
+
"learning_rate": 2.0761245674740485e-06,
|
| 34085 |
+
"loss": 0.9738,
|
| 34086 |
+
"step": 4868
|
| 34087 |
+
},
|
| 34088 |
+
{
|
| 34089 |
+
"epoch": 1.9794673714169546,
|
| 34090 |
+
"grad_norm": 0.10781599581241608,
|
| 34091 |
+
"learning_rate": 2.035416242621616e-06,
|
| 34092 |
+
"loss": 0.9535,
|
| 34093 |
+
"step": 4869
|
| 34094 |
+
},
|
| 34095 |
+
{
|
| 34096 |
+
"epoch": 1.9798739581215694,
|
| 34097 |
+
"grad_norm": 0.10149887949228287,
|
| 34098 |
+
"learning_rate": 1.994707917769184e-06,
|
| 34099 |
+
"loss": 0.7832,
|
| 34100 |
+
"step": 4870
|
| 34101 |
+
},
|
| 34102 |
+
{
|
| 34103 |
+
"epoch": 1.9802805448261842,
|
| 34104 |
+
"grad_norm": 0.10625772923231125,
|
| 34105 |
+
"learning_rate": 1.9539995929167514e-06,
|
| 34106 |
+
"loss": 0.8969,
|
| 34107 |
+
"step": 4871
|
| 34108 |
+
},
|
| 34109 |
+
{
|
| 34110 |
+
"epoch": 1.9806871315307988,
|
| 34111 |
+
"grad_norm": 0.100648894906044,
|
| 34112 |
+
"learning_rate": 1.9132912680643195e-06,
|
| 34113 |
+
"loss": 0.8592,
|
| 34114 |
+
"step": 4872
|
| 34115 |
+
},
|
| 34116 |
+
{
|
| 34117 |
+
"epoch": 1.9810937182354138,
|
| 34118 |
+
"grad_norm": 0.10639602690935135,
|
| 34119 |
+
"learning_rate": 1.872582943211887e-06,
|
| 34120 |
+
"loss": 0.9377,
|
| 34121 |
+
"step": 4873
|
| 34122 |
+
},
|
| 34123 |
+
{
|
| 34124 |
+
"epoch": 1.9815003049400284,
|
| 34125 |
+
"grad_norm": 0.10608502477407455,
|
| 34126 |
+
"learning_rate": 1.8318746183594548e-06,
|
| 34127 |
+
"loss": 0.8221,
|
| 34128 |
+
"step": 4874
|
| 34129 |
+
},
|
| 34130 |
+
{
|
| 34131 |
+
"epoch": 1.9819068916446432,
|
| 34132 |
+
"grad_norm": 0.1076526865363121,
|
| 34133 |
+
"learning_rate": 1.7911662935070222e-06,
|
| 34134 |
+
"loss": 1.0001,
|
| 34135 |
+
"step": 4875
|
| 34136 |
+
},
|
| 34137 |
+
{
|
| 34138 |
+
"epoch": 1.982313478349258,
|
| 34139 |
+
"grad_norm": 0.10484609007835388,
|
| 34140 |
+
"learning_rate": 1.75045796865459e-06,
|
| 34141 |
+
"loss": 0.9281,
|
| 34142 |
+
"step": 4876
|
| 34143 |
+
},
|
| 34144 |
+
{
|
| 34145 |
+
"epoch": 1.9827200650538728,
|
| 34146 |
+
"grad_norm": 0.11033840477466583,
|
| 34147 |
+
"learning_rate": 1.7097496438021575e-06,
|
| 34148 |
+
"loss": 1.012,
|
| 34149 |
+
"step": 4877
|
| 34150 |
+
},
|
| 34151 |
+
{
|
| 34152 |
+
"epoch": 1.9831266517584876,
|
| 34153 |
+
"grad_norm": 0.10178755968809128,
|
| 34154 |
+
"learning_rate": 1.6690413189497253e-06,
|
| 34155 |
+
"loss": 0.8751,
|
| 34156 |
+
"step": 4878
|
| 34157 |
+
},
|
| 34158 |
+
{
|
| 34159 |
+
"epoch": 1.9835332384631021,
|
| 34160 |
+
"grad_norm": 0.09968069940805435,
|
| 34161 |
+
"learning_rate": 1.6283329940972931e-06,
|
| 34162 |
+
"loss": 0.8481,
|
| 34163 |
+
"step": 4879
|
| 34164 |
+
},
|
| 34165 |
+
{
|
| 34166 |
+
"epoch": 1.9839398251677172,
|
| 34167 |
+
"grad_norm": 0.11199220269918442,
|
| 34168 |
+
"learning_rate": 1.5876246692448606e-06,
|
| 34169 |
+
"loss": 1.0553,
|
| 34170 |
+
"step": 4880
|
| 34171 |
+
},
|
| 34172 |
+
{
|
| 34173 |
+
"epoch": 1.9843464118723317,
|
| 34174 |
+
"grad_norm": 0.10771384090185165,
|
| 34175 |
+
"learning_rate": 1.5469163443924282e-06,
|
| 34176 |
+
"loss": 0.9871,
|
| 34177 |
+
"step": 4881
|
| 34178 |
+
},
|
| 34179 |
+
{
|
| 34180 |
+
"epoch": 1.9847529985769465,
|
| 34181 |
+
"grad_norm": 0.1033516600728035,
|
| 34182 |
+
"learning_rate": 1.5062080195399958e-06,
|
| 34183 |
+
"loss": 0.8731,
|
| 34184 |
+
"step": 4882
|
| 34185 |
+
},
|
| 34186 |
+
{
|
| 34187 |
+
"epoch": 1.9851595852815613,
|
| 34188 |
+
"grad_norm": 0.10771310329437256,
|
| 34189 |
+
"learning_rate": 1.4654996946875637e-06,
|
| 34190 |
+
"loss": 1.0152,
|
| 34191 |
+
"step": 4883
|
| 34192 |
+
},
|
| 34193 |
+
{
|
| 34194 |
+
"epoch": 1.985566171986176,
|
| 34195 |
+
"grad_norm": 0.10385514050722122,
|
| 34196 |
+
"learning_rate": 1.4247913698351313e-06,
|
| 34197 |
+
"loss": 0.8569,
|
| 34198 |
+
"step": 4884
|
| 34199 |
+
},
|
| 34200 |
+
{
|
| 34201 |
+
"epoch": 1.985972758690791,
|
| 34202 |
+
"grad_norm": 0.10435989499092102,
|
| 34203 |
+
"learning_rate": 1.3840830449826992e-06,
|
| 34204 |
+
"loss": 0.8999,
|
| 34205 |
+
"step": 4885
|
| 34206 |
+
},
|
| 34207 |
+
{
|
| 34208 |
+
"epoch": 1.9863793453954055,
|
| 34209 |
+
"grad_norm": 0.10604739189147949,
|
| 34210 |
+
"learning_rate": 1.3433747201302668e-06,
|
| 34211 |
+
"loss": 0.8837,
|
| 34212 |
+
"step": 4886
|
| 34213 |
+
},
|
| 34214 |
+
{
|
| 34215 |
+
"epoch": 1.9867859321000203,
|
| 34216 |
+
"grad_norm": 0.11071362346410751,
|
| 34217 |
+
"learning_rate": 1.3026663952778344e-06,
|
| 34218 |
+
"loss": 0.9995,
|
| 34219 |
+
"step": 4887
|
| 34220 |
+
},
|
| 34221 |
+
{
|
| 34222 |
+
"epoch": 1.987192518804635,
|
| 34223 |
+
"grad_norm": 0.11492349952459335,
|
| 34224 |
+
"learning_rate": 1.261958070425402e-06,
|
| 34225 |
+
"loss": 1.0693,
|
| 34226 |
+
"step": 4888
|
| 34227 |
+
},
|
| 34228 |
+
{
|
| 34229 |
+
"epoch": 1.98759910550925,
|
| 34230 |
+
"grad_norm": 0.11402280628681183,
|
| 34231 |
+
"learning_rate": 1.2212497455729697e-06,
|
| 34232 |
+
"loss": 1.0973,
|
| 34233 |
+
"step": 4889
|
| 34234 |
+
},
|
| 34235 |
+
{
|
| 34236 |
+
"epoch": 1.9880056922138647,
|
| 34237 |
+
"grad_norm": 0.10784902423620224,
|
| 34238 |
+
"learning_rate": 1.1805414207205373e-06,
|
| 34239 |
+
"loss": 0.9591,
|
| 34240 |
+
"step": 4890
|
| 34241 |
+
},
|
| 34242 |
+
{
|
| 34243 |
+
"epoch": 1.9884122789184793,
|
| 34244 |
+
"grad_norm": 0.10509707778692245,
|
| 34245 |
+
"learning_rate": 1.1398330958681052e-06,
|
| 34246 |
+
"loss": 0.9233,
|
| 34247 |
+
"step": 4891
|
| 34248 |
+
},
|
| 34249 |
+
{
|
| 34250 |
+
"epoch": 1.9888188656230943,
|
| 34251 |
+
"grad_norm": 0.10772809386253357,
|
| 34252 |
+
"learning_rate": 1.0991247710156728e-06,
|
| 34253 |
+
"loss": 0.9239,
|
| 34254 |
+
"step": 4892
|
| 34255 |
+
},
|
| 34256 |
+
{
|
| 34257 |
+
"epoch": 1.9892254523277089,
|
| 34258 |
+
"grad_norm": 0.10139593482017517,
|
| 34259 |
+
"learning_rate": 1.0584164461632405e-06,
|
| 34260 |
+
"loss": 0.8991,
|
| 34261 |
+
"step": 4893
|
| 34262 |
+
},
|
| 34263 |
+
{
|
| 34264 |
+
"epoch": 1.9896320390323237,
|
| 34265 |
+
"grad_norm": 0.11088011413812637,
|
| 34266 |
+
"learning_rate": 1.017708121310808e-06,
|
| 34267 |
+
"loss": 0.9746,
|
| 34268 |
+
"step": 4894
|
| 34269 |
+
},
|
| 34270 |
+
{
|
| 34271 |
+
"epoch": 1.9900386257369385,
|
| 34272 |
+
"grad_norm": 0.1069415956735611,
|
| 34273 |
+
"learning_rate": 9.769997964583757e-07,
|
| 34274 |
+
"loss": 0.9667,
|
| 34275 |
+
"step": 4895
|
| 34276 |
+
},
|
| 34277 |
+
{
|
| 34278 |
+
"epoch": 1.990445212441553,
|
| 34279 |
+
"grad_norm": 0.11252355575561523,
|
| 34280 |
+
"learning_rate": 9.362914716059435e-07,
|
| 34281 |
+
"loss": 0.9521,
|
| 34282 |
+
"step": 4896
|
| 34283 |
+
},
|
| 34284 |
+
{
|
| 34285 |
+
"epoch": 1.990851799146168,
|
| 34286 |
+
"grad_norm": 0.11555030941963196,
|
| 34287 |
+
"learning_rate": 8.955831467535111e-07,
|
| 34288 |
+
"loss": 0.9464,
|
| 34289 |
+
"step": 4897
|
| 34290 |
+
},
|
| 34291 |
+
{
|
| 34292 |
+
"epoch": 1.9912583858507826,
|
| 34293 |
+
"grad_norm": 0.10089296847581863,
|
| 34294 |
+
"learning_rate": 8.548748219010787e-07,
|
| 34295 |
+
"loss": 0.9118,
|
| 34296 |
+
"step": 4898
|
| 34297 |
+
},
|
| 34298 |
+
{
|
| 34299 |
+
"epoch": 1.9916649725553974,
|
| 34300 |
+
"grad_norm": 0.10483364015817642,
|
| 34301 |
+
"learning_rate": 8.141664970486466e-07,
|
| 34302 |
+
"loss": 0.9561,
|
| 34303 |
+
"step": 4899
|
| 34304 |
+
},
|
| 34305 |
+
{
|
| 34306 |
+
"epoch": 1.9920715592600122,
|
| 34307 |
+
"grad_norm": 0.10259924083948135,
|
| 34308 |
+
"learning_rate": 7.734581721962141e-07,
|
| 34309 |
+
"loss": 0.937,
|
| 34310 |
+
"step": 4900
|
| 34311 |
}
|
| 34312 |
],
|
| 34313 |
"logging_steps": 1,
|
|
|
|
| 34327 |
"attributes": {}
|
| 34328 |
}
|
| 34329 |
},
|
| 34330 |
+
"total_flos": 1.5662724108266447e+19,
|
| 34331 |
"train_batch_size": 16,
|
| 34332 |
"trial_name": null,
|
| 34333 |
"trial_params": null
|