Upload checkpoint 900
Browse files- .gitattributes +1 -0
- README.md +7 -4
- adapter_model.safetensors +1 -1
- loss.png +3 -0
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
loss.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 3B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -38,10 +38,13 @@ library_name: peft
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
-
<div style="height: 30px; width:
|
| 42 |
<!-- 3.75% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 46 |
</body>
|
| 47 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 3B (Step 900 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
+
<div style="height: 30px; width: 9.15%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 42 |
<!-- 3.75% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 900 out of 9838 steps</p>
|
| 46 |
</body>
|
| 47 |
+
</html>
|
| 48 |
+
|
| 49 |
+
## Training Loss
|
| 50 |
+

|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 119801528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a5dbdeb8341e7e3d9596846ed4c7684398e8c99e694357ed0f13b00d742ac19
|
| 3 |
size 119801528
|
loss.png
ADDED
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 61392692
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cd3e3db9bee3174e708cac4d94d11870d49ad54e90400463a5983f1eea959c4
|
| 3 |
size 61392692
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20ac70d593c855ffef93a9e2aba01508c369253c427365a830e36f7f000a094f
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5608,6 +5608,706 @@
|
|
| 5608 |
"learning_rate": 0.00018385030001016986,
|
| 5609 |
"loss": 1.1788,
|
| 5610 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5611 |
}
|
| 5612 |
],
|
| 5613 |
"logging_steps": 1,
|
|
@@ -5627,7 +6327,7 @@
|
|
| 5627 |
"attributes": {}
|
| 5628 |
}
|
| 5629 |
},
|
| 5630 |
-
"total_flos":
|
| 5631 |
"train_batch_size": 8,
|
| 5632 |
"trial_name": null,
|
| 5633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.1829640170766416,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5608 |
"learning_rate": 0.00018385030001016986,
|
| 5609 |
"loss": 1.1788,
|
| 5610 |
"step": 800
|
| 5611 |
+
},
|
| 5612 |
+
{
|
| 5613 |
+
"epoch": 0.16283797519821103,
|
| 5614 |
+
"grad_norm": 0.11467185616493225,
|
| 5615 |
+
"learning_rate": 0.00018382996033763859,
|
| 5616 |
+
"loss": 0.9527,
|
| 5617 |
+
"step": 801
|
| 5618 |
+
},
|
| 5619 |
+
{
|
| 5620 |
+
"epoch": 0.1630412685505184,
|
| 5621 |
+
"grad_norm": 0.129184752702713,
|
| 5622 |
+
"learning_rate": 0.00018380962066510728,
|
| 5623 |
+
"loss": 1.1758,
|
| 5624 |
+
"step": 802
|
| 5625 |
+
},
|
| 5626 |
+
{
|
| 5627 |
+
"epoch": 0.16324456190282577,
|
| 5628 |
+
"grad_norm": 0.11696959286928177,
|
| 5629 |
+
"learning_rate": 0.000183789280992576,
|
| 5630 |
+
"loss": 1.03,
|
| 5631 |
+
"step": 803
|
| 5632 |
+
},
|
| 5633 |
+
{
|
| 5634 |
+
"epoch": 0.16344785525513317,
|
| 5635 |
+
"grad_norm": 0.13689257204532623,
|
| 5636 |
+
"learning_rate": 0.00018376894132004476,
|
| 5637 |
+
"loss": 1.2516,
|
| 5638 |
+
"step": 804
|
| 5639 |
+
},
|
| 5640 |
+
{
|
| 5641 |
+
"epoch": 0.16365114860744054,
|
| 5642 |
+
"grad_norm": 0.11370982229709625,
|
| 5643 |
+
"learning_rate": 0.00018374860164751348,
|
| 5644 |
+
"loss": 1.0484,
|
| 5645 |
+
"step": 805
|
| 5646 |
+
},
|
| 5647 |
+
{
|
| 5648 |
+
"epoch": 0.1638544419597479,
|
| 5649 |
+
"grad_norm": 0.13201859593391418,
|
| 5650 |
+
"learning_rate": 0.0001837282619749822,
|
| 5651 |
+
"loss": 1.0903,
|
| 5652 |
+
"step": 806
|
| 5653 |
+
},
|
| 5654 |
+
{
|
| 5655 |
+
"epoch": 0.1640577353120553,
|
| 5656 |
+
"grad_norm": 0.10468725860118866,
|
| 5657 |
+
"learning_rate": 0.00018370792230245093,
|
| 5658 |
+
"loss": 0.9548,
|
| 5659 |
+
"step": 807
|
| 5660 |
+
},
|
| 5661 |
+
{
|
| 5662 |
+
"epoch": 0.16426102866436268,
|
| 5663 |
+
"grad_norm": 0.14737223088741302,
|
| 5664 |
+
"learning_rate": 0.00018368758262991968,
|
| 5665 |
+
"loss": 1.1607,
|
| 5666 |
+
"step": 808
|
| 5667 |
+
},
|
| 5668 |
+
{
|
| 5669 |
+
"epoch": 0.16446432201667005,
|
| 5670 |
+
"grad_norm": 0.11500222235918045,
|
| 5671 |
+
"learning_rate": 0.0001836672429573884,
|
| 5672 |
+
"loss": 1.1032,
|
| 5673 |
+
"step": 809
|
| 5674 |
+
},
|
| 5675 |
+
{
|
| 5676 |
+
"epoch": 0.16466761536897742,
|
| 5677 |
+
"grad_norm": 0.12849587202072144,
|
| 5678 |
+
"learning_rate": 0.00018364690328485713,
|
| 5679 |
+
"loss": 1.255,
|
| 5680 |
+
"step": 810
|
| 5681 |
+
},
|
| 5682 |
+
{
|
| 5683 |
+
"epoch": 0.16487090872128482,
|
| 5684 |
+
"grad_norm": 0.10878688842058182,
|
| 5685 |
+
"learning_rate": 0.00018362656361232583,
|
| 5686 |
+
"loss": 1.1075,
|
| 5687 |
+
"step": 811
|
| 5688 |
+
},
|
| 5689 |
+
{
|
| 5690 |
+
"epoch": 0.1650742020735922,
|
| 5691 |
+
"grad_norm": 0.10878860950469971,
|
| 5692 |
+
"learning_rate": 0.00018360622393979458,
|
| 5693 |
+
"loss": 1.0629,
|
| 5694 |
+
"step": 812
|
| 5695 |
+
},
|
| 5696 |
+
{
|
| 5697 |
+
"epoch": 0.16527749542589956,
|
| 5698 |
+
"grad_norm": 0.1280430108308792,
|
| 5699 |
+
"learning_rate": 0.0001835858842672633,
|
| 5700 |
+
"loss": 1.1377,
|
| 5701 |
+
"step": 813
|
| 5702 |
+
},
|
| 5703 |
+
{
|
| 5704 |
+
"epoch": 0.16548078877820696,
|
| 5705 |
+
"grad_norm": 0.11831233650445938,
|
| 5706 |
+
"learning_rate": 0.00018356554459473203,
|
| 5707 |
+
"loss": 1.0786,
|
| 5708 |
+
"step": 814
|
| 5709 |
+
},
|
| 5710 |
+
{
|
| 5711 |
+
"epoch": 0.16568408213051433,
|
| 5712 |
+
"grad_norm": 0.11453156918287277,
|
| 5713 |
+
"learning_rate": 0.00018354520492220075,
|
| 5714 |
+
"loss": 1.0477,
|
| 5715 |
+
"step": 815
|
| 5716 |
+
},
|
| 5717 |
+
{
|
| 5718 |
+
"epoch": 0.1658873754828217,
|
| 5719 |
+
"grad_norm": 0.13597573339939117,
|
| 5720 |
+
"learning_rate": 0.0001835248652496695,
|
| 5721 |
+
"loss": 1.1807,
|
| 5722 |
+
"step": 816
|
| 5723 |
+
},
|
| 5724 |
+
{
|
| 5725 |
+
"epoch": 0.1660906688351291,
|
| 5726 |
+
"grad_norm": 0.12008185684680939,
|
| 5727 |
+
"learning_rate": 0.00018350452557713823,
|
| 5728 |
+
"loss": 1.0676,
|
| 5729 |
+
"step": 817
|
| 5730 |
+
},
|
| 5731 |
+
{
|
| 5732 |
+
"epoch": 0.16629396218743647,
|
| 5733 |
+
"grad_norm": 0.1363888829946518,
|
| 5734 |
+
"learning_rate": 0.00018348418590460696,
|
| 5735 |
+
"loss": 1.1582,
|
| 5736 |
+
"step": 818
|
| 5737 |
+
},
|
| 5738 |
+
{
|
| 5739 |
+
"epoch": 0.16649725553974384,
|
| 5740 |
+
"grad_norm": 0.11310733109712601,
|
| 5741 |
+
"learning_rate": 0.00018346384623207565,
|
| 5742 |
+
"loss": 1.0931,
|
| 5743 |
+
"step": 819
|
| 5744 |
+
},
|
| 5745 |
+
{
|
| 5746 |
+
"epoch": 0.16670054889205124,
|
| 5747 |
+
"grad_norm": 0.13503344357013702,
|
| 5748 |
+
"learning_rate": 0.0001834435065595444,
|
| 5749 |
+
"loss": 1.1465,
|
| 5750 |
+
"step": 820
|
| 5751 |
+
},
|
| 5752 |
+
{
|
| 5753 |
+
"epoch": 0.1669038422443586,
|
| 5754 |
+
"grad_norm": 0.12744784355163574,
|
| 5755 |
+
"learning_rate": 0.00018342316688701313,
|
| 5756 |
+
"loss": 1.1662,
|
| 5757 |
+
"step": 821
|
| 5758 |
+
},
|
| 5759 |
+
{
|
| 5760 |
+
"epoch": 0.16710713559666598,
|
| 5761 |
+
"grad_norm": 0.13695518672466278,
|
| 5762 |
+
"learning_rate": 0.00018340282721448185,
|
| 5763 |
+
"loss": 1.1846,
|
| 5764 |
+
"step": 822
|
| 5765 |
+
},
|
| 5766 |
+
{
|
| 5767 |
+
"epoch": 0.16731042894897336,
|
| 5768 |
+
"grad_norm": 0.12580302357673645,
|
| 5769 |
+
"learning_rate": 0.00018338248754195058,
|
| 5770 |
+
"loss": 0.93,
|
| 5771 |
+
"step": 823
|
| 5772 |
+
},
|
| 5773 |
+
{
|
| 5774 |
+
"epoch": 0.16751372230128075,
|
| 5775 |
+
"grad_norm": 0.12266777455806732,
|
| 5776 |
+
"learning_rate": 0.00018336214786941933,
|
| 5777 |
+
"loss": 1.1033,
|
| 5778 |
+
"step": 824
|
| 5779 |
+
},
|
| 5780 |
+
{
|
| 5781 |
+
"epoch": 0.16771701565358813,
|
| 5782 |
+
"grad_norm": 0.1129806861281395,
|
| 5783 |
+
"learning_rate": 0.00018334180819688805,
|
| 5784 |
+
"loss": 1.0517,
|
| 5785 |
+
"step": 825
|
| 5786 |
+
},
|
| 5787 |
+
{
|
| 5788 |
+
"epoch": 0.1679203090058955,
|
| 5789 |
+
"grad_norm": 0.12590476870536804,
|
| 5790 |
+
"learning_rate": 0.00018332146852435678,
|
| 5791 |
+
"loss": 1.0374,
|
| 5792 |
+
"step": 826
|
| 5793 |
+
},
|
| 5794 |
+
{
|
| 5795 |
+
"epoch": 0.1681236023582029,
|
| 5796 |
+
"grad_norm": 0.12631377577781677,
|
| 5797 |
+
"learning_rate": 0.00018330112885182548,
|
| 5798 |
+
"loss": 1.1898,
|
| 5799 |
+
"step": 827
|
| 5800 |
+
},
|
| 5801 |
+
{
|
| 5802 |
+
"epoch": 0.16832689571051027,
|
| 5803 |
+
"grad_norm": 0.13719779253005981,
|
| 5804 |
+
"learning_rate": 0.00018328078917929423,
|
| 5805 |
+
"loss": 1.1108,
|
| 5806 |
+
"step": 828
|
| 5807 |
+
},
|
| 5808 |
+
{
|
| 5809 |
+
"epoch": 0.16853018906281764,
|
| 5810 |
+
"grad_norm": 0.12414206564426422,
|
| 5811 |
+
"learning_rate": 0.00018326044950676295,
|
| 5812 |
+
"loss": 1.1654,
|
| 5813 |
+
"step": 829
|
| 5814 |
+
},
|
| 5815 |
+
{
|
| 5816 |
+
"epoch": 0.16873348241512504,
|
| 5817 |
+
"grad_norm": 0.12075278162956238,
|
| 5818 |
+
"learning_rate": 0.00018324010983423168,
|
| 5819 |
+
"loss": 1.0255,
|
| 5820 |
+
"step": 830
|
| 5821 |
+
},
|
| 5822 |
+
{
|
| 5823 |
+
"epoch": 0.1689367757674324,
|
| 5824 |
+
"grad_norm": 0.11906860023736954,
|
| 5825 |
+
"learning_rate": 0.0001832197701617004,
|
| 5826 |
+
"loss": 1.0433,
|
| 5827 |
+
"step": 831
|
| 5828 |
+
},
|
| 5829 |
+
{
|
| 5830 |
+
"epoch": 0.16914006911973978,
|
| 5831 |
+
"grad_norm": 0.11960665136575699,
|
| 5832 |
+
"learning_rate": 0.00018319943048916915,
|
| 5833 |
+
"loss": 0.9501,
|
| 5834 |
+
"step": 832
|
| 5835 |
+
},
|
| 5836 |
+
{
|
| 5837 |
+
"epoch": 0.16934336247204718,
|
| 5838 |
+
"grad_norm": 0.1228812113404274,
|
| 5839 |
+
"learning_rate": 0.00018317909081663788,
|
| 5840 |
+
"loss": 1.002,
|
| 5841 |
+
"step": 833
|
| 5842 |
+
},
|
| 5843 |
+
{
|
| 5844 |
+
"epoch": 0.16954665582435455,
|
| 5845 |
+
"grad_norm": 0.12420972436666489,
|
| 5846 |
+
"learning_rate": 0.0001831587511441066,
|
| 5847 |
+
"loss": 1.062,
|
| 5848 |
+
"step": 834
|
| 5849 |
+
},
|
| 5850 |
+
{
|
| 5851 |
+
"epoch": 0.16974994917666192,
|
| 5852 |
+
"grad_norm": 0.11490360647439957,
|
| 5853 |
+
"learning_rate": 0.0001831384114715753,
|
| 5854 |
+
"loss": 0.9708,
|
| 5855 |
+
"step": 835
|
| 5856 |
+
},
|
| 5857 |
+
{
|
| 5858 |
+
"epoch": 0.1699532425289693,
|
| 5859 |
+
"grad_norm": 0.11945214867591858,
|
| 5860 |
+
"learning_rate": 0.00018311807179904402,
|
| 5861 |
+
"loss": 1.1042,
|
| 5862 |
+
"step": 836
|
| 5863 |
+
},
|
| 5864 |
+
{
|
| 5865 |
+
"epoch": 0.1701565358812767,
|
| 5866 |
+
"grad_norm": 0.1234474778175354,
|
| 5867 |
+
"learning_rate": 0.00018309773212651277,
|
| 5868 |
+
"loss": 1.0258,
|
| 5869 |
+
"step": 837
|
| 5870 |
+
},
|
| 5871 |
+
{
|
| 5872 |
+
"epoch": 0.17035982923358406,
|
| 5873 |
+
"grad_norm": 0.12447863817214966,
|
| 5874 |
+
"learning_rate": 0.0001830773924539815,
|
| 5875 |
+
"loss": 1.1132,
|
| 5876 |
+
"step": 838
|
| 5877 |
+
},
|
| 5878 |
+
{
|
| 5879 |
+
"epoch": 0.17056312258589143,
|
| 5880 |
+
"grad_norm": 0.1321963667869568,
|
| 5881 |
+
"learning_rate": 0.00018305705278145022,
|
| 5882 |
+
"loss": 1.1835,
|
| 5883 |
+
"step": 839
|
| 5884 |
+
},
|
| 5885 |
+
{
|
| 5886 |
+
"epoch": 0.17076641593819883,
|
| 5887 |
+
"grad_norm": 0.12708254158496857,
|
| 5888 |
+
"learning_rate": 0.00018303671310891895,
|
| 5889 |
+
"loss": 1.1787,
|
| 5890 |
+
"step": 840
|
| 5891 |
+
},
|
| 5892 |
+
{
|
| 5893 |
+
"epoch": 0.1709697092905062,
|
| 5894 |
+
"grad_norm": 0.11481820046901703,
|
| 5895 |
+
"learning_rate": 0.0001830163734363877,
|
| 5896 |
+
"loss": 0.8837,
|
| 5897 |
+
"step": 841
|
| 5898 |
+
},
|
| 5899 |
+
{
|
| 5900 |
+
"epoch": 0.17117300264281357,
|
| 5901 |
+
"grad_norm": 0.11851567029953003,
|
| 5902 |
+
"learning_rate": 0.00018299603376385642,
|
| 5903 |
+
"loss": 0.9516,
|
| 5904 |
+
"step": 842
|
| 5905 |
+
},
|
| 5906 |
+
{
|
| 5907 |
+
"epoch": 0.17137629599512097,
|
| 5908 |
+
"grad_norm": 0.13182471692562103,
|
| 5909 |
+
"learning_rate": 0.00018297569409132512,
|
| 5910 |
+
"loss": 1.1809,
|
| 5911 |
+
"step": 843
|
| 5912 |
+
},
|
| 5913 |
+
{
|
| 5914 |
+
"epoch": 0.17157958934742834,
|
| 5915 |
+
"grad_norm": 0.12840509414672852,
|
| 5916 |
+
"learning_rate": 0.00018295535441879385,
|
| 5917 |
+
"loss": 1.0557,
|
| 5918 |
+
"step": 844
|
| 5919 |
+
},
|
| 5920 |
+
{
|
| 5921 |
+
"epoch": 0.1717828826997357,
|
| 5922 |
+
"grad_norm": 0.11280561983585358,
|
| 5923 |
+
"learning_rate": 0.0001829350147462626,
|
| 5924 |
+
"loss": 1.0737,
|
| 5925 |
+
"step": 845
|
| 5926 |
+
},
|
| 5927 |
+
{
|
| 5928 |
+
"epoch": 0.1719861760520431,
|
| 5929 |
+
"grad_norm": 0.13144554197788239,
|
| 5930 |
+
"learning_rate": 0.00018291467507373132,
|
| 5931 |
+
"loss": 1.0275,
|
| 5932 |
+
"step": 846
|
| 5933 |
+
},
|
| 5934 |
+
{
|
| 5935 |
+
"epoch": 0.17218946940435048,
|
| 5936 |
+
"grad_norm": 0.1224883422255516,
|
| 5937 |
+
"learning_rate": 0.00018289433540120005,
|
| 5938 |
+
"loss": 1.1558,
|
| 5939 |
+
"step": 847
|
| 5940 |
+
},
|
| 5941 |
+
{
|
| 5942 |
+
"epoch": 0.17239276275665785,
|
| 5943 |
+
"grad_norm": 0.1263243854045868,
|
| 5944 |
+
"learning_rate": 0.00018287399572866877,
|
| 5945 |
+
"loss": 0.9381,
|
| 5946 |
+
"step": 848
|
| 5947 |
+
},
|
| 5948 |
+
{
|
| 5949 |
+
"epoch": 0.17259605610896522,
|
| 5950 |
+
"grad_norm": 0.13391436636447906,
|
| 5951 |
+
"learning_rate": 0.00018285365605613752,
|
| 5952 |
+
"loss": 1.2548,
|
| 5953 |
+
"step": 849
|
| 5954 |
+
},
|
| 5955 |
+
{
|
| 5956 |
+
"epoch": 0.17279934946127262,
|
| 5957 |
+
"grad_norm": 0.12166419625282288,
|
| 5958 |
+
"learning_rate": 0.00018283331638360625,
|
| 5959 |
+
"loss": 1.0981,
|
| 5960 |
+
"step": 850
|
| 5961 |
+
},
|
| 5962 |
+
{
|
| 5963 |
+
"epoch": 0.17300264281358,
|
| 5964 |
+
"grad_norm": 0.13190463185310364,
|
| 5965 |
+
"learning_rate": 0.00018281297671107494,
|
| 5966 |
+
"loss": 1.1847,
|
| 5967 |
+
"step": 851
|
| 5968 |
+
},
|
| 5969 |
+
{
|
| 5970 |
+
"epoch": 0.17320593616588736,
|
| 5971 |
+
"grad_norm": 0.11678186804056168,
|
| 5972 |
+
"learning_rate": 0.00018279263703854367,
|
| 5973 |
+
"loss": 1.0303,
|
| 5974 |
+
"step": 852
|
| 5975 |
+
},
|
| 5976 |
+
{
|
| 5977 |
+
"epoch": 0.17340922951819476,
|
| 5978 |
+
"grad_norm": 0.11716858297586441,
|
| 5979 |
+
"learning_rate": 0.00018277229736601242,
|
| 5980 |
+
"loss": 0.9274,
|
| 5981 |
+
"step": 853
|
| 5982 |
+
},
|
| 5983 |
+
{
|
| 5984 |
+
"epoch": 0.17361252287050213,
|
| 5985 |
+
"grad_norm": 0.1340217888355255,
|
| 5986 |
+
"learning_rate": 0.00018275195769348114,
|
| 5987 |
+
"loss": 1.0179,
|
| 5988 |
+
"step": 854
|
| 5989 |
+
},
|
| 5990 |
+
{
|
| 5991 |
+
"epoch": 0.1738158162228095,
|
| 5992 |
+
"grad_norm": 0.12650153040885925,
|
| 5993 |
+
"learning_rate": 0.00018273161802094987,
|
| 5994 |
+
"loss": 1.0234,
|
| 5995 |
+
"step": 855
|
| 5996 |
+
},
|
| 5997 |
+
{
|
| 5998 |
+
"epoch": 0.1740191095751169,
|
| 5999 |
+
"grad_norm": 0.1294967234134674,
|
| 6000 |
+
"learning_rate": 0.0001827112783484186,
|
| 6001 |
+
"loss": 1.2539,
|
| 6002 |
+
"step": 856
|
| 6003 |
+
},
|
| 6004 |
+
{
|
| 6005 |
+
"epoch": 0.17422240292742427,
|
| 6006 |
+
"grad_norm": 0.13714881241321564,
|
| 6007 |
+
"learning_rate": 0.00018269093867588734,
|
| 6008 |
+
"loss": 1.0106,
|
| 6009 |
+
"step": 857
|
| 6010 |
+
},
|
| 6011 |
+
{
|
| 6012 |
+
"epoch": 0.17442569627973165,
|
| 6013 |
+
"grad_norm": 0.12365014851093292,
|
| 6014 |
+
"learning_rate": 0.00018267059900335607,
|
| 6015 |
+
"loss": 1.1184,
|
| 6016 |
+
"step": 858
|
| 6017 |
+
},
|
| 6018 |
+
{
|
| 6019 |
+
"epoch": 0.17462898963203904,
|
| 6020 |
+
"grad_norm": 0.11030489951372147,
|
| 6021 |
+
"learning_rate": 0.00018265025933082477,
|
| 6022 |
+
"loss": 0.9478,
|
| 6023 |
+
"step": 859
|
| 6024 |
+
},
|
| 6025 |
+
{
|
| 6026 |
+
"epoch": 0.17483228298434642,
|
| 6027 |
+
"grad_norm": 0.1181483343243599,
|
| 6028 |
+
"learning_rate": 0.0001826299196582935,
|
| 6029 |
+
"loss": 1.0861,
|
| 6030 |
+
"step": 860
|
| 6031 |
+
},
|
| 6032 |
+
{
|
| 6033 |
+
"epoch": 0.1750355763366538,
|
| 6034 |
+
"grad_norm": 0.12873612344264984,
|
| 6035 |
+
"learning_rate": 0.00018260957998576224,
|
| 6036 |
+
"loss": 0.9811,
|
| 6037 |
+
"step": 861
|
| 6038 |
+
},
|
| 6039 |
+
{
|
| 6040 |
+
"epoch": 0.17523886968896116,
|
| 6041 |
+
"grad_norm": 0.11688394844532013,
|
| 6042 |
+
"learning_rate": 0.00018258924031323097,
|
| 6043 |
+
"loss": 1.1643,
|
| 6044 |
+
"step": 862
|
| 6045 |
+
},
|
| 6046 |
+
{
|
| 6047 |
+
"epoch": 0.17544216304126856,
|
| 6048 |
+
"grad_norm": 0.12729796767234802,
|
| 6049 |
+
"learning_rate": 0.0001825689006406997,
|
| 6050 |
+
"loss": 1.0692,
|
| 6051 |
+
"step": 863
|
| 6052 |
+
},
|
| 6053 |
+
{
|
| 6054 |
+
"epoch": 0.17564545639357593,
|
| 6055 |
+
"grad_norm": 0.12474660575389862,
|
| 6056 |
+
"learning_rate": 0.00018254856096816842,
|
| 6057 |
+
"loss": 1.2838,
|
| 6058 |
+
"step": 864
|
| 6059 |
+
},
|
| 6060 |
+
{
|
| 6061 |
+
"epoch": 0.1758487497458833,
|
| 6062 |
+
"grad_norm": 0.12324024736881256,
|
| 6063 |
+
"learning_rate": 0.00018252822129563717,
|
| 6064 |
+
"loss": 1.0029,
|
| 6065 |
+
"step": 865
|
| 6066 |
+
},
|
| 6067 |
+
{
|
| 6068 |
+
"epoch": 0.1760520430981907,
|
| 6069 |
+
"grad_norm": 0.13511407375335693,
|
| 6070 |
+
"learning_rate": 0.0001825078816231059,
|
| 6071 |
+
"loss": 1.1398,
|
| 6072 |
+
"step": 866
|
| 6073 |
+
},
|
| 6074 |
+
{
|
| 6075 |
+
"epoch": 0.17625533645049807,
|
| 6076 |
+
"grad_norm": 0.13292032480239868,
|
| 6077 |
+
"learning_rate": 0.0001824875419505746,
|
| 6078 |
+
"loss": 1.3107,
|
| 6079 |
+
"step": 867
|
| 6080 |
+
},
|
| 6081 |
+
{
|
| 6082 |
+
"epoch": 0.17645862980280544,
|
| 6083 |
+
"grad_norm": 0.12073294073343277,
|
| 6084 |
+
"learning_rate": 0.00018246720227804331,
|
| 6085 |
+
"loss": 1.1293,
|
| 6086 |
+
"step": 868
|
| 6087 |
+
},
|
| 6088 |
+
{
|
| 6089 |
+
"epoch": 0.17666192315511284,
|
| 6090 |
+
"grad_norm": 0.11789250373840332,
|
| 6091 |
+
"learning_rate": 0.00018244686260551207,
|
| 6092 |
+
"loss": 1.0462,
|
| 6093 |
+
"step": 869
|
| 6094 |
+
},
|
| 6095 |
+
{
|
| 6096 |
+
"epoch": 0.1768652165074202,
|
| 6097 |
+
"grad_norm": 0.1194562166929245,
|
| 6098 |
+
"learning_rate": 0.0001824265229329808,
|
| 6099 |
+
"loss": 1.0017,
|
| 6100 |
+
"step": 870
|
| 6101 |
+
},
|
| 6102 |
+
{
|
| 6103 |
+
"epoch": 0.17706850985972758,
|
| 6104 |
+
"grad_norm": 0.10480080544948578,
|
| 6105 |
+
"learning_rate": 0.00018240618326044951,
|
| 6106 |
+
"loss": 0.8659,
|
| 6107 |
+
"step": 871
|
| 6108 |
+
},
|
| 6109 |
+
{
|
| 6110 |
+
"epoch": 0.17727180321203498,
|
| 6111 |
+
"grad_norm": 0.1207701787352562,
|
| 6112 |
+
"learning_rate": 0.00018238584358791824,
|
| 6113 |
+
"loss": 0.9937,
|
| 6114 |
+
"step": 872
|
| 6115 |
+
},
|
| 6116 |
+
{
|
| 6117 |
+
"epoch": 0.17747509656434235,
|
| 6118 |
+
"grad_norm": 0.1190091222524643,
|
| 6119 |
+
"learning_rate": 0.000182365503915387,
|
| 6120 |
+
"loss": 1.0437,
|
| 6121 |
+
"step": 873
|
| 6122 |
+
},
|
| 6123 |
+
{
|
| 6124 |
+
"epoch": 0.17767838991664972,
|
| 6125 |
+
"grad_norm": 0.1277458369731903,
|
| 6126 |
+
"learning_rate": 0.00018234516424285572,
|
| 6127 |
+
"loss": 1.2392,
|
| 6128 |
+
"step": 874
|
| 6129 |
+
},
|
| 6130 |
+
{
|
| 6131 |
+
"epoch": 0.1778816832689571,
|
| 6132 |
+
"grad_norm": 0.12237963080406189,
|
| 6133 |
+
"learning_rate": 0.00018232482457032444,
|
| 6134 |
+
"loss": 1.1032,
|
| 6135 |
+
"step": 875
|
| 6136 |
+
},
|
| 6137 |
+
{
|
| 6138 |
+
"epoch": 0.1780849766212645,
|
| 6139 |
+
"grad_norm": 0.1319531798362732,
|
| 6140 |
+
"learning_rate": 0.00018230448489779314,
|
| 6141 |
+
"loss": 1.2012,
|
| 6142 |
+
"step": 876
|
| 6143 |
+
},
|
| 6144 |
+
{
|
| 6145 |
+
"epoch": 0.17828826997357186,
|
| 6146 |
+
"grad_norm": 0.11914216727018356,
|
| 6147 |
+
"learning_rate": 0.0001822841452252619,
|
| 6148 |
+
"loss": 1.0272,
|
| 6149 |
+
"step": 877
|
| 6150 |
+
},
|
| 6151 |
+
{
|
| 6152 |
+
"epoch": 0.17849156332587923,
|
| 6153 |
+
"grad_norm": 0.14588242769241333,
|
| 6154 |
+
"learning_rate": 0.0001822638055527306,
|
| 6155 |
+
"loss": 1.357,
|
| 6156 |
+
"step": 878
|
| 6157 |
+
},
|
| 6158 |
+
{
|
| 6159 |
+
"epoch": 0.17869485667818663,
|
| 6160 |
+
"grad_norm": 0.11982700973749161,
|
| 6161 |
+
"learning_rate": 0.00018224346588019934,
|
| 6162 |
+
"loss": 1.049,
|
| 6163 |
+
"step": 879
|
| 6164 |
+
},
|
| 6165 |
+
{
|
| 6166 |
+
"epoch": 0.178898150030494,
|
| 6167 |
+
"grad_norm": 0.12529560923576355,
|
| 6168 |
+
"learning_rate": 0.00018222312620766806,
|
| 6169 |
+
"loss": 1.0713,
|
| 6170 |
+
"step": 880
|
| 6171 |
+
},
|
| 6172 |
+
{
|
| 6173 |
+
"epoch": 0.17910144338280137,
|
| 6174 |
+
"grad_norm": 0.1316487044095993,
|
| 6175 |
+
"learning_rate": 0.00018220278653513679,
|
| 6176 |
+
"loss": 1.1749,
|
| 6177 |
+
"step": 881
|
| 6178 |
+
},
|
| 6179 |
+
{
|
| 6180 |
+
"epoch": 0.17930473673510877,
|
| 6181 |
+
"grad_norm": 0.12096232175827026,
|
| 6182 |
+
"learning_rate": 0.00018218244686260554,
|
| 6183 |
+
"loss": 1.2104,
|
| 6184 |
+
"step": 882
|
| 6185 |
+
},
|
| 6186 |
+
{
|
| 6187 |
+
"epoch": 0.17950803008741614,
|
| 6188 |
+
"grad_norm": 0.1313014030456543,
|
| 6189 |
+
"learning_rate": 0.00018216210719007426,
|
| 6190 |
+
"loss": 1.0554,
|
| 6191 |
+
"step": 883
|
| 6192 |
+
},
|
| 6193 |
+
{
|
| 6194 |
+
"epoch": 0.1797113234397235,
|
| 6195 |
+
"grad_norm": 0.1309378743171692,
|
| 6196 |
+
"learning_rate": 0.00018214176751754296,
|
| 6197 |
+
"loss": 1.2152,
|
| 6198 |
+
"step": 884
|
| 6199 |
+
},
|
| 6200 |
+
{
|
| 6201 |
+
"epoch": 0.1799146167920309,
|
| 6202 |
+
"grad_norm": 0.1286410242319107,
|
| 6203 |
+
"learning_rate": 0.00018212142784501168,
|
| 6204 |
+
"loss": 1.0922,
|
| 6205 |
+
"step": 885
|
| 6206 |
+
},
|
| 6207 |
+
{
|
| 6208 |
+
"epoch": 0.18011791014433828,
|
| 6209 |
+
"grad_norm": 0.12893226742744446,
|
| 6210 |
+
"learning_rate": 0.00018210108817248044,
|
| 6211 |
+
"loss": 1.1969,
|
| 6212 |
+
"step": 886
|
| 6213 |
+
},
|
| 6214 |
+
{
|
| 6215 |
+
"epoch": 0.18032120349664565,
|
| 6216 |
+
"grad_norm": 0.11664584279060364,
|
| 6217 |
+
"learning_rate": 0.00018208074849994916,
|
| 6218 |
+
"loss": 1.0085,
|
| 6219 |
+
"step": 887
|
| 6220 |
+
},
|
| 6221 |
+
{
|
| 6222 |
+
"epoch": 0.18052449684895303,
|
| 6223 |
+
"grad_norm": 0.10973158478736877,
|
| 6224 |
+
"learning_rate": 0.00018206040882741788,
|
| 6225 |
+
"loss": 0.9548,
|
| 6226 |
+
"step": 888
|
| 6227 |
+
},
|
| 6228 |
+
{
|
| 6229 |
+
"epoch": 0.18072779020126042,
|
| 6230 |
+
"grad_norm": 0.11281079053878784,
|
| 6231 |
+
"learning_rate": 0.0001820400691548866,
|
| 6232 |
+
"loss": 0.8521,
|
| 6233 |
+
"step": 889
|
| 6234 |
+
},
|
| 6235 |
+
{
|
| 6236 |
+
"epoch": 0.1809310835535678,
|
| 6237 |
+
"grad_norm": 0.12198197096586227,
|
| 6238 |
+
"learning_rate": 0.00018201972948235536,
|
| 6239 |
+
"loss": 1.0537,
|
| 6240 |
+
"step": 890
|
| 6241 |
+
},
|
| 6242 |
+
{
|
| 6243 |
+
"epoch": 0.18113437690587517,
|
| 6244 |
+
"grad_norm": 0.09405733644962311,
|
| 6245 |
+
"learning_rate": 0.00018199938980982409,
|
| 6246 |
+
"loss": 0.7193,
|
| 6247 |
+
"step": 891
|
| 6248 |
+
},
|
| 6249 |
+
{
|
| 6250 |
+
"epoch": 0.18133767025818257,
|
| 6251 |
+
"grad_norm": 0.13503974676132202,
|
| 6252 |
+
"learning_rate": 0.00018197905013729278,
|
| 6253 |
+
"loss": 1.1564,
|
| 6254 |
+
"step": 892
|
| 6255 |
+
},
|
| 6256 |
+
{
|
| 6257 |
+
"epoch": 0.18154096361048994,
|
| 6258 |
+
"grad_norm": 0.1322106271982193,
|
| 6259 |
+
"learning_rate": 0.0001819587104647615,
|
| 6260 |
+
"loss": 1.0733,
|
| 6261 |
+
"step": 893
|
| 6262 |
+
},
|
| 6263 |
+
{
|
| 6264 |
+
"epoch": 0.1817442569627973,
|
| 6265 |
+
"grad_norm": 0.12791374325752258,
|
| 6266 |
+
"learning_rate": 0.00018193837079223026,
|
| 6267 |
+
"loss": 1.0701,
|
| 6268 |
+
"step": 894
|
| 6269 |
+
},
|
| 6270 |
+
{
|
| 6271 |
+
"epoch": 0.1819475503151047,
|
| 6272 |
+
"grad_norm": 0.12342046946287155,
|
| 6273 |
+
"learning_rate": 0.00018191803111969898,
|
| 6274 |
+
"loss": 1.1255,
|
| 6275 |
+
"step": 895
|
| 6276 |
+
},
|
| 6277 |
+
{
|
| 6278 |
+
"epoch": 0.18215084366741208,
|
| 6279 |
+
"grad_norm": 0.12089495360851288,
|
| 6280 |
+
"learning_rate": 0.0001818976914471677,
|
| 6281 |
+
"loss": 1.0177,
|
| 6282 |
+
"step": 896
|
| 6283 |
+
},
|
| 6284 |
+
{
|
| 6285 |
+
"epoch": 0.18235413701971945,
|
| 6286 |
+
"grad_norm": 0.12383720278739929,
|
| 6287 |
+
"learning_rate": 0.00018187735177463643,
|
| 6288 |
+
"loss": 1.0188,
|
| 6289 |
+
"step": 897
|
| 6290 |
+
},
|
| 6291 |
+
{
|
| 6292 |
+
"epoch": 0.18255743037202685,
|
| 6293 |
+
"grad_norm": 0.12089379876852036,
|
| 6294 |
+
"learning_rate": 0.00018185701210210518,
|
| 6295 |
+
"loss": 1.1106,
|
| 6296 |
+
"step": 898
|
| 6297 |
+
},
|
| 6298 |
+
{
|
| 6299 |
+
"epoch": 0.18276072372433422,
|
| 6300 |
+
"grad_norm": 0.12939763069152832,
|
| 6301 |
+
"learning_rate": 0.0001818366724295739,
|
| 6302 |
+
"loss": 1.1939,
|
| 6303 |
+
"step": 899
|
| 6304 |
+
},
|
| 6305 |
+
{
|
| 6306 |
+
"epoch": 0.1829640170766416,
|
| 6307 |
+
"grad_norm": 0.14534543454647064,
|
| 6308 |
+
"learning_rate": 0.0001818163327570426,
|
| 6309 |
+
"loss": 1.252,
|
| 6310 |
+
"step": 900
|
| 6311 |
}
|
| 6312 |
],
|
| 6313 |
"logging_steps": 1,
|
|
|
|
| 6327 |
"attributes": {}
|
| 6328 |
}
|
| 6329 |
},
|
| 6330 |
+
"total_flos": 5.0020743561314304e+17,
|
| 6331 |
"train_batch_size": 8,
|
| 6332 |
"trial_name": null,
|
| 6333 |
"trial_params": null
|