Upload checkpoint 1900
Browse files- README.md +4 -4
- adapter_config.json +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 7B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -15,11 +15,11 @@ library_name: peft
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
-
<div style="height: 30px; width:
|
| 19 |
-
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 7B (Step 1900 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
+
<div style="height: 30px; width: 38.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 19 |
+
38.6%
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 1900 out of 4918 steps</p>
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
adapter_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
-
"base_model_name_or_path": "
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
|
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 82461044
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aed3b5ff30e3e07ae751c0198bf9476de1a89a755bb4dc5a8149b83298170716
|
| 3 |
size 82461044
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec8ba207deb0762ece0e7f805dcaf1e7c6d96da2ae9e39aab68cb2700888f6ea
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -12608,6 +12608,706 @@
|
|
| 12608 |
"learning_rate": 0.00012696926521473643,
|
| 12609 |
"loss": 0.9162,
|
| 12610 |
"step": 1800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12611 |
}
|
| 12612 |
],
|
| 12613 |
"logging_steps": 1,
|
|
@@ -12627,7 +13327,7 @@
|
|
| 12627 |
"attributes": {}
|
| 12628 |
}
|
| 12629 |
},
|
| 12630 |
-
"total_flos":
|
| 12631 |
"train_batch_size": 16,
|
| 12632 |
"trial_name": null,
|
| 12633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.7725147387680423,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 12608 |
"learning_rate": 0.00012696926521473643,
|
| 12609 |
"loss": 0.9162,
|
| 12610 |
"step": 1800
|
| 12611 |
+
},
|
| 12612 |
+
{
|
| 12613 |
+
"epoch": 0.7322626550111812,
|
| 12614 |
+
"grad_norm": 0.09074793756008148,
|
| 12615 |
+
"learning_rate": 0.00012692855688988398,
|
| 12616 |
+
"loss": 0.9388,
|
| 12617 |
+
"step": 1801
|
| 12618 |
+
},
|
| 12619 |
+
{
|
| 12620 |
+
"epoch": 0.7326692417157958,
|
| 12621 |
+
"grad_norm": 0.10199327766895294,
|
| 12622 |
+
"learning_rate": 0.00012688784856503156,
|
| 12623 |
+
"loss": 0.9585,
|
| 12624 |
+
"step": 1802
|
| 12625 |
+
},
|
| 12626 |
+
{
|
| 12627 |
+
"epoch": 0.7330758284204106,
|
| 12628 |
+
"grad_norm": 0.10722784698009491,
|
| 12629 |
+
"learning_rate": 0.00012684714024017912,
|
| 12630 |
+
"loss": 1.0226,
|
| 12631 |
+
"step": 1803
|
| 12632 |
+
},
|
| 12633 |
+
{
|
| 12634 |
+
"epoch": 0.7334824151250254,
|
| 12635 |
+
"grad_norm": 0.10113389045000076,
|
| 12636 |
+
"learning_rate": 0.0001268064319153267,
|
| 12637 |
+
"loss": 1.0593,
|
| 12638 |
+
"step": 1804
|
| 12639 |
+
},
|
| 12640 |
+
{
|
| 12641 |
+
"epoch": 0.7338890018296401,
|
| 12642 |
+
"grad_norm": 0.1125817522406578,
|
| 12643 |
+
"learning_rate": 0.00012676572359047425,
|
| 12644 |
+
"loss": 0.8962,
|
| 12645 |
+
"step": 1805
|
| 12646 |
+
},
|
| 12647 |
+
{
|
| 12648 |
+
"epoch": 0.7342955885342549,
|
| 12649 |
+
"grad_norm": 0.10177897661924362,
|
| 12650 |
+
"learning_rate": 0.0001267250152656218,
|
| 12651 |
+
"loss": 1.0323,
|
| 12652 |
+
"step": 1806
|
| 12653 |
+
},
|
| 12654 |
+
{
|
| 12655 |
+
"epoch": 0.7347021752388697,
|
| 12656 |
+
"grad_norm": 0.10272479057312012,
|
| 12657 |
+
"learning_rate": 0.00012668430694076941,
|
| 12658 |
+
"loss": 0.9947,
|
| 12659 |
+
"step": 1807
|
| 12660 |
+
},
|
| 12661 |
+
{
|
| 12662 |
+
"epoch": 0.7351087619434844,
|
| 12663 |
+
"grad_norm": 0.11395642906427383,
|
| 12664 |
+
"learning_rate": 0.00012664359861591697,
|
| 12665 |
+
"loss": 1.0144,
|
| 12666 |
+
"step": 1808
|
| 12667 |
+
},
|
| 12668 |
+
{
|
| 12669 |
+
"epoch": 0.7355153486480992,
|
| 12670 |
+
"grad_norm": 0.09565427899360657,
|
| 12671 |
+
"learning_rate": 0.00012660289029106452,
|
| 12672 |
+
"loss": 1.0052,
|
| 12673 |
+
"step": 1809
|
| 12674 |
+
},
|
| 12675 |
+
{
|
| 12676 |
+
"epoch": 0.735921935352714,
|
| 12677 |
+
"grad_norm": 0.09244798123836517,
|
| 12678 |
+
"learning_rate": 0.0001265621819662121,
|
| 12679 |
+
"loss": 0.8411,
|
| 12680 |
+
"step": 1810
|
| 12681 |
+
},
|
| 12682 |
+
{
|
| 12683 |
+
"epoch": 0.7363285220573287,
|
| 12684 |
+
"grad_norm": 0.08985315263271332,
|
| 12685 |
+
"learning_rate": 0.00012652147364135966,
|
| 12686 |
+
"loss": 1.0301,
|
| 12687 |
+
"step": 1811
|
| 12688 |
+
},
|
| 12689 |
+
{
|
| 12690 |
+
"epoch": 0.7367351087619435,
|
| 12691 |
+
"grad_norm": 0.09606938809156418,
|
| 12692 |
+
"learning_rate": 0.00012648076531650724,
|
| 12693 |
+
"loss": 1.0053,
|
| 12694 |
+
"step": 1812
|
| 12695 |
+
},
|
| 12696 |
+
{
|
| 12697 |
+
"epoch": 0.7371416954665583,
|
| 12698 |
+
"grad_norm": 0.10566183179616928,
|
| 12699 |
+
"learning_rate": 0.0001264400569916548,
|
| 12700 |
+
"loss": 0.9527,
|
| 12701 |
+
"step": 1813
|
| 12702 |
+
},
|
| 12703 |
+
{
|
| 12704 |
+
"epoch": 0.737548282171173,
|
| 12705 |
+
"grad_norm": 0.10999652743339539,
|
| 12706 |
+
"learning_rate": 0.00012639934866680237,
|
| 12707 |
+
"loss": 1.0756,
|
| 12708 |
+
"step": 1814
|
| 12709 |
+
},
|
| 12710 |
+
{
|
| 12711 |
+
"epoch": 0.7379548688757878,
|
| 12712 |
+
"grad_norm": 0.09473931044340134,
|
| 12713 |
+
"learning_rate": 0.00012635864034194993,
|
| 12714 |
+
"loss": 0.94,
|
| 12715 |
+
"step": 1815
|
| 12716 |
+
},
|
| 12717 |
+
{
|
| 12718 |
+
"epoch": 0.7383614555804026,
|
| 12719 |
+
"grad_norm": 0.09815262258052826,
|
| 12720 |
+
"learning_rate": 0.0001263179320170975,
|
| 12721 |
+
"loss": 1.0436,
|
| 12722 |
+
"step": 1816
|
| 12723 |
+
},
|
| 12724 |
+
{
|
| 12725 |
+
"epoch": 0.7387680422850172,
|
| 12726 |
+
"grad_norm": 0.08889912813901901,
|
| 12727 |
+
"learning_rate": 0.00012627722369224506,
|
| 12728 |
+
"loss": 0.9368,
|
| 12729 |
+
"step": 1817
|
| 12730 |
+
},
|
| 12731 |
+
{
|
| 12732 |
+
"epoch": 0.739174628989632,
|
| 12733 |
+
"grad_norm": 0.09337257593870163,
|
| 12734 |
+
"learning_rate": 0.00012623651536739262,
|
| 12735 |
+
"loss": 1.0949,
|
| 12736 |
+
"step": 1818
|
| 12737 |
+
},
|
| 12738 |
+
{
|
| 12739 |
+
"epoch": 0.7395812156942468,
|
| 12740 |
+
"grad_norm": 0.09112720191478729,
|
| 12741 |
+
"learning_rate": 0.00012619580704254023,
|
| 12742 |
+
"loss": 1.0239,
|
| 12743 |
+
"step": 1819
|
| 12744 |
+
},
|
| 12745 |
+
{
|
| 12746 |
+
"epoch": 0.7399878023988615,
|
| 12747 |
+
"grad_norm": 0.0988708958029747,
|
| 12748 |
+
"learning_rate": 0.00012615509871768778,
|
| 12749 |
+
"loss": 1.0648,
|
| 12750 |
+
"step": 1820
|
| 12751 |
+
},
|
| 12752 |
+
{
|
| 12753 |
+
"epoch": 0.7403943891034763,
|
| 12754 |
+
"grad_norm": 0.09849932789802551,
|
| 12755 |
+
"learning_rate": 0.00012611439039283533,
|
| 12756 |
+
"loss": 0.9867,
|
| 12757 |
+
"step": 1821
|
| 12758 |
+
},
|
| 12759 |
+
{
|
| 12760 |
+
"epoch": 0.7408009758080911,
|
| 12761 |
+
"grad_norm": 0.09254156798124313,
|
| 12762 |
+
"learning_rate": 0.00012607368206798291,
|
| 12763 |
+
"loss": 0.9903,
|
| 12764 |
+
"step": 1822
|
| 12765 |
+
},
|
| 12766 |
+
{
|
| 12767 |
+
"epoch": 0.7412075625127058,
|
| 12768 |
+
"grad_norm": 0.0954776182770729,
|
| 12769 |
+
"learning_rate": 0.00012603297374313047,
|
| 12770 |
+
"loss": 1.0081,
|
| 12771 |
+
"step": 1823
|
| 12772 |
+
},
|
| 12773 |
+
{
|
| 12774 |
+
"epoch": 0.7416141492173206,
|
| 12775 |
+
"grad_norm": 0.08610807359218597,
|
| 12776 |
+
"learning_rate": 0.00012599226541827805,
|
| 12777 |
+
"loss": 0.9229,
|
| 12778 |
+
"step": 1824
|
| 12779 |
+
},
|
| 12780 |
+
{
|
| 12781 |
+
"epoch": 0.7420207359219354,
|
| 12782 |
+
"grad_norm": 0.0977591797709465,
|
| 12783 |
+
"learning_rate": 0.0001259515570934256,
|
| 12784 |
+
"loss": 0.9076,
|
| 12785 |
+
"step": 1825
|
| 12786 |
+
},
|
| 12787 |
+
{
|
| 12788 |
+
"epoch": 0.7424273226265501,
|
| 12789 |
+
"grad_norm": 0.0858481377363205,
|
| 12790 |
+
"learning_rate": 0.00012591084876857319,
|
| 12791 |
+
"loss": 0.8604,
|
| 12792 |
+
"step": 1826
|
| 12793 |
+
},
|
| 12794 |
+
{
|
| 12795 |
+
"epoch": 0.7428339093311649,
|
| 12796 |
+
"grad_norm": 0.09642601758241653,
|
| 12797 |
+
"learning_rate": 0.00012587014044372074,
|
| 12798 |
+
"loss": 1.0476,
|
| 12799 |
+
"step": 1827
|
| 12800 |
+
},
|
| 12801 |
+
{
|
| 12802 |
+
"epoch": 0.7432404960357797,
|
| 12803 |
+
"grad_norm": 0.08871784061193466,
|
| 12804 |
+
"learning_rate": 0.0001258294321188683,
|
| 12805 |
+
"loss": 0.9597,
|
| 12806 |
+
"step": 1828
|
| 12807 |
+
},
|
| 12808 |
+
{
|
| 12809 |
+
"epoch": 0.7436470827403944,
|
| 12810 |
+
"grad_norm": 0.10808097571134567,
|
| 12811 |
+
"learning_rate": 0.00012578872379401587,
|
| 12812 |
+
"loss": 1.1415,
|
| 12813 |
+
"step": 1829
|
| 12814 |
+
},
|
| 12815 |
+
{
|
| 12816 |
+
"epoch": 0.7440536694450092,
|
| 12817 |
+
"grad_norm": 0.09339917451143265,
|
| 12818 |
+
"learning_rate": 0.00012574801546916346,
|
| 12819 |
+
"loss": 0.9437,
|
| 12820 |
+
"step": 1830
|
| 12821 |
+
},
|
| 12822 |
+
{
|
| 12823 |
+
"epoch": 0.7444602561496239,
|
| 12824 |
+
"grad_norm": 0.08945673704147339,
|
| 12825 |
+
"learning_rate": 0.00012570730714431104,
|
| 12826 |
+
"loss": 0.9714,
|
| 12827 |
+
"step": 1831
|
| 12828 |
+
},
|
| 12829 |
+
{
|
| 12830 |
+
"epoch": 0.7448668428542387,
|
| 12831 |
+
"grad_norm": 0.0939527079463005,
|
| 12832 |
+
"learning_rate": 0.0001256665988194586,
|
| 12833 |
+
"loss": 0.9868,
|
| 12834 |
+
"step": 1832
|
| 12835 |
+
},
|
| 12836 |
+
{
|
| 12837 |
+
"epoch": 0.7452734295588535,
|
| 12838 |
+
"grad_norm": 0.09327416867017746,
|
| 12839 |
+
"learning_rate": 0.00012562589049460615,
|
| 12840 |
+
"loss": 1.0001,
|
| 12841 |
+
"step": 1833
|
| 12842 |
+
},
|
| 12843 |
+
{
|
| 12844 |
+
"epoch": 0.7456800162634681,
|
| 12845 |
+
"grad_norm": 0.10278622061014175,
|
| 12846 |
+
"learning_rate": 0.00012558518216975373,
|
| 12847 |
+
"loss": 1.0724,
|
| 12848 |
+
"step": 1834
|
| 12849 |
+
},
|
| 12850 |
+
{
|
| 12851 |
+
"epoch": 0.7460866029680829,
|
| 12852 |
+
"grad_norm": 0.09421471506357193,
|
| 12853 |
+
"learning_rate": 0.00012554447384490128,
|
| 12854 |
+
"loss": 1.0088,
|
| 12855 |
+
"step": 1835
|
| 12856 |
+
},
|
| 12857 |
+
{
|
| 12858 |
+
"epoch": 0.7464931896726977,
|
| 12859 |
+
"grad_norm": 0.1009073331952095,
|
| 12860 |
+
"learning_rate": 0.00012550376552004886,
|
| 12861 |
+
"loss": 1.0485,
|
| 12862 |
+
"step": 1836
|
| 12863 |
+
},
|
| 12864 |
+
{
|
| 12865 |
+
"epoch": 0.7468997763773124,
|
| 12866 |
+
"grad_norm": 0.09199651330709457,
|
| 12867 |
+
"learning_rate": 0.00012546305719519642,
|
| 12868 |
+
"loss": 0.9765,
|
| 12869 |
+
"step": 1837
|
| 12870 |
+
},
|
| 12871 |
+
{
|
| 12872 |
+
"epoch": 0.7473063630819272,
|
| 12873 |
+
"grad_norm": 0.09672168642282486,
|
| 12874 |
+
"learning_rate": 0.000125422348870344,
|
| 12875 |
+
"loss": 1.018,
|
| 12876 |
+
"step": 1838
|
| 12877 |
+
},
|
| 12878 |
+
{
|
| 12879 |
+
"epoch": 0.747712949786542,
|
| 12880 |
+
"grad_norm": 0.09036868065595627,
|
| 12881 |
+
"learning_rate": 0.00012538164054549155,
|
| 12882 |
+
"loss": 0.9067,
|
| 12883 |
+
"step": 1839
|
| 12884 |
+
},
|
| 12885 |
+
{
|
| 12886 |
+
"epoch": 0.7481195364911567,
|
| 12887 |
+
"grad_norm": 0.09706352651119232,
|
| 12888 |
+
"learning_rate": 0.0001253409322206391,
|
| 12889 |
+
"loss": 1.0439,
|
| 12890 |
+
"step": 1840
|
| 12891 |
+
},
|
| 12892 |
+
{
|
| 12893 |
+
"epoch": 0.7485261231957715,
|
| 12894 |
+
"grad_norm": 0.09940480440855026,
|
| 12895 |
+
"learning_rate": 0.00012530022389578669,
|
| 12896 |
+
"loss": 1.0936,
|
| 12897 |
+
"step": 1841
|
| 12898 |
+
},
|
| 12899 |
+
{
|
| 12900 |
+
"epoch": 0.7489327099003863,
|
| 12901 |
+
"grad_norm": 0.09489309787750244,
|
| 12902 |
+
"learning_rate": 0.00012525951557093427,
|
| 12903 |
+
"loss": 1.0606,
|
| 12904 |
+
"step": 1842
|
| 12905 |
+
},
|
| 12906 |
+
{
|
| 12907 |
+
"epoch": 0.749339296605001,
|
| 12908 |
+
"grad_norm": 0.07897097617387772,
|
| 12909 |
+
"learning_rate": 0.00012521880724608185,
|
| 12910 |
+
"loss": 0.8109,
|
| 12911 |
+
"step": 1843
|
| 12912 |
+
},
|
| 12913 |
+
{
|
| 12914 |
+
"epoch": 0.7497458833096158,
|
| 12915 |
+
"grad_norm": 0.09423919022083282,
|
| 12916 |
+
"learning_rate": 0.0001251780989212294,
|
| 12917 |
+
"loss": 1.0703,
|
| 12918 |
+
"step": 1844
|
| 12919 |
+
},
|
| 12920 |
+
{
|
| 12921 |
+
"epoch": 0.7501524700142306,
|
| 12922 |
+
"grad_norm": 0.09601794928312302,
|
| 12923 |
+
"learning_rate": 0.00012513739059637696,
|
| 12924 |
+
"loss": 0.9692,
|
| 12925 |
+
"step": 1845
|
| 12926 |
+
},
|
| 12927 |
+
{
|
| 12928 |
+
"epoch": 0.7505590567188453,
|
| 12929 |
+
"grad_norm": 0.09051002562046051,
|
| 12930 |
+
"learning_rate": 0.00012509668227152454,
|
| 12931 |
+
"loss": 0.9727,
|
| 12932 |
+
"step": 1846
|
| 12933 |
+
},
|
| 12934 |
+
{
|
| 12935 |
+
"epoch": 0.7509656434234601,
|
| 12936 |
+
"grad_norm": 0.09665656834840775,
|
| 12937 |
+
"learning_rate": 0.0001250559739466721,
|
| 12938 |
+
"loss": 1.0701,
|
| 12939 |
+
"step": 1847
|
| 12940 |
+
},
|
| 12941 |
+
{
|
| 12942 |
+
"epoch": 0.7513722301280749,
|
| 12943 |
+
"grad_norm": 0.08956587314605713,
|
| 12944 |
+
"learning_rate": 0.00012501526562181967,
|
| 12945 |
+
"loss": 0.9863,
|
| 12946 |
+
"step": 1848
|
| 12947 |
+
},
|
| 12948 |
+
{
|
| 12949 |
+
"epoch": 0.7517788168326895,
|
| 12950 |
+
"grad_norm": 0.09464751929044724,
|
| 12951 |
+
"learning_rate": 0.00012497455729696723,
|
| 12952 |
+
"loss": 1.043,
|
| 12953 |
+
"step": 1849
|
| 12954 |
+
},
|
| 12955 |
+
{
|
| 12956 |
+
"epoch": 0.7521854035373043,
|
| 12957 |
+
"grad_norm": 0.09246315807104111,
|
| 12958 |
+
"learning_rate": 0.0001249338489721148,
|
| 12959 |
+
"loss": 1.0306,
|
| 12960 |
+
"step": 1850
|
| 12961 |
+
},
|
| 12962 |
+
{
|
| 12963 |
+
"epoch": 0.7525919902419191,
|
| 12964 |
+
"grad_norm": 0.0943431407213211,
|
| 12965 |
+
"learning_rate": 0.00012489314064726236,
|
| 12966 |
+
"loss": 0.9251,
|
| 12967 |
+
"step": 1851
|
| 12968 |
+
},
|
| 12969 |
+
{
|
| 12970 |
+
"epoch": 0.7529985769465338,
|
| 12971 |
+
"grad_norm": 0.08852697908878326,
|
| 12972 |
+
"learning_rate": 0.00012485243232240992,
|
| 12973 |
+
"loss": 0.919,
|
| 12974 |
+
"step": 1852
|
| 12975 |
+
},
|
| 12976 |
+
{
|
| 12977 |
+
"epoch": 0.7534051636511486,
|
| 12978 |
+
"grad_norm": 0.08856131881475449,
|
| 12979 |
+
"learning_rate": 0.00012481172399755752,
|
| 12980 |
+
"loss": 0.9874,
|
| 12981 |
+
"step": 1853
|
| 12982 |
+
},
|
| 12983 |
+
{
|
| 12984 |
+
"epoch": 0.7538117503557634,
|
| 12985 |
+
"grad_norm": 0.08715582638978958,
|
| 12986 |
+
"learning_rate": 0.00012477101567270508,
|
| 12987 |
+
"loss": 0.9569,
|
| 12988 |
+
"step": 1854
|
| 12989 |
+
},
|
| 12990 |
+
{
|
| 12991 |
+
"epoch": 0.7542183370603781,
|
| 12992 |
+
"grad_norm": 0.1005750522017479,
|
| 12993 |
+
"learning_rate": 0.00012473030734785266,
|
| 12994 |
+
"loss": 1.118,
|
| 12995 |
+
"step": 1855
|
| 12996 |
+
},
|
| 12997 |
+
{
|
| 12998 |
+
"epoch": 0.7546249237649929,
|
| 12999 |
+
"grad_norm": 0.0848010703921318,
|
| 13000 |
+
"learning_rate": 0.00012468959902300021,
|
| 13001 |
+
"loss": 0.8808,
|
| 13002 |
+
"step": 1856
|
| 13003 |
+
},
|
| 13004 |
+
{
|
| 13005 |
+
"epoch": 0.7550315104696076,
|
| 13006 |
+
"grad_norm": 0.10509838908910751,
|
| 13007 |
+
"learning_rate": 0.00012464889069814777,
|
| 13008 |
+
"loss": 1.0019,
|
| 13009 |
+
"step": 1857
|
| 13010 |
+
},
|
| 13011 |
+
{
|
| 13012 |
+
"epoch": 0.7554380971742224,
|
| 13013 |
+
"grad_norm": 0.09729699045419693,
|
| 13014 |
+
"learning_rate": 0.00012460818237329535,
|
| 13015 |
+
"loss": 0.9275,
|
| 13016 |
+
"step": 1858
|
| 13017 |
+
},
|
| 13018 |
+
{
|
| 13019 |
+
"epoch": 0.7558446838788372,
|
| 13020 |
+
"grad_norm": 0.0901610478758812,
|
| 13021 |
+
"learning_rate": 0.0001245674740484429,
|
| 13022 |
+
"loss": 1.0285,
|
| 13023 |
+
"step": 1859
|
| 13024 |
+
},
|
| 13025 |
+
{
|
| 13026 |
+
"epoch": 0.7562512705834519,
|
| 13027 |
+
"grad_norm": 0.08691520988941193,
|
| 13028 |
+
"learning_rate": 0.00012452676572359048,
|
| 13029 |
+
"loss": 0.9524,
|
| 13030 |
+
"step": 1860
|
| 13031 |
+
},
|
| 13032 |
+
{
|
| 13033 |
+
"epoch": 0.7566578572880667,
|
| 13034 |
+
"grad_norm": 0.09559500962495804,
|
| 13035 |
+
"learning_rate": 0.00012448605739873804,
|
| 13036 |
+
"loss": 1.0781,
|
| 13037 |
+
"step": 1861
|
| 13038 |
+
},
|
| 13039 |
+
{
|
| 13040 |
+
"epoch": 0.7570644439926815,
|
| 13041 |
+
"grad_norm": 0.09581112861633301,
|
| 13042 |
+
"learning_rate": 0.00012444534907388562,
|
| 13043 |
+
"loss": 1.068,
|
| 13044 |
+
"step": 1862
|
| 13045 |
+
},
|
| 13046 |
+
{
|
| 13047 |
+
"epoch": 0.7574710306972962,
|
| 13048 |
+
"grad_norm": 0.10235914587974548,
|
| 13049 |
+
"learning_rate": 0.00012440464074903317,
|
| 13050 |
+
"loss": 1.078,
|
| 13051 |
+
"step": 1863
|
| 13052 |
+
},
|
| 13053 |
+
{
|
| 13054 |
+
"epoch": 0.757877617401911,
|
| 13055 |
+
"grad_norm": 0.09794023633003235,
|
| 13056 |
+
"learning_rate": 0.00012436393242418073,
|
| 13057 |
+
"loss": 1.0951,
|
| 13058 |
+
"step": 1864
|
| 13059 |
+
},
|
| 13060 |
+
{
|
| 13061 |
+
"epoch": 0.7582842041065257,
|
| 13062 |
+
"grad_norm": 0.08910951763391495,
|
| 13063 |
+
"learning_rate": 0.00012432322409932834,
|
| 13064 |
+
"loss": 1.002,
|
| 13065 |
+
"step": 1865
|
| 13066 |
+
},
|
| 13067 |
+
{
|
| 13068 |
+
"epoch": 0.7586907908111404,
|
| 13069 |
+
"grad_norm": 0.08909524232149124,
|
| 13070 |
+
"learning_rate": 0.0001242825157744759,
|
| 13071 |
+
"loss": 0.9027,
|
| 13072 |
+
"step": 1866
|
| 13073 |
+
},
|
| 13074 |
+
{
|
| 13075 |
+
"epoch": 0.7590973775157552,
|
| 13076 |
+
"grad_norm": 0.09639742970466614,
|
| 13077 |
+
"learning_rate": 0.00012424180744962347,
|
| 13078 |
+
"loss": 1.1356,
|
| 13079 |
+
"step": 1867
|
| 13080 |
+
},
|
| 13081 |
+
{
|
| 13082 |
+
"epoch": 0.75950396422037,
|
| 13083 |
+
"grad_norm": 0.08606995642185211,
|
| 13084 |
+
"learning_rate": 0.00012420109912477103,
|
| 13085 |
+
"loss": 0.8974,
|
| 13086 |
+
"step": 1868
|
| 13087 |
+
},
|
| 13088 |
+
{
|
| 13089 |
+
"epoch": 0.7599105509249847,
|
| 13090 |
+
"grad_norm": 0.09715355932712555,
|
| 13091 |
+
"learning_rate": 0.00012416039079991858,
|
| 13092 |
+
"loss": 1.078,
|
| 13093 |
+
"step": 1869
|
| 13094 |
+
},
|
| 13095 |
+
{
|
| 13096 |
+
"epoch": 0.7603171376295995,
|
| 13097 |
+
"grad_norm": 0.08933407068252563,
|
| 13098 |
+
"learning_rate": 0.00012411968247506616,
|
| 13099 |
+
"loss": 0.9177,
|
| 13100 |
+
"step": 1870
|
| 13101 |
+
},
|
| 13102 |
+
{
|
| 13103 |
+
"epoch": 0.7607237243342143,
|
| 13104 |
+
"grad_norm": 0.0859113335609436,
|
| 13105 |
+
"learning_rate": 0.00012407897415021372,
|
| 13106 |
+
"loss": 0.9703,
|
| 13107 |
+
"step": 1871
|
| 13108 |
+
},
|
| 13109 |
+
{
|
| 13110 |
+
"epoch": 0.761130311038829,
|
| 13111 |
+
"grad_norm": 0.09086931496858597,
|
| 13112 |
+
"learning_rate": 0.0001240382658253613,
|
| 13113 |
+
"loss": 1.0298,
|
| 13114 |
+
"step": 1872
|
| 13115 |
+
},
|
| 13116 |
+
{
|
| 13117 |
+
"epoch": 0.7615368977434438,
|
| 13118 |
+
"grad_norm": 0.09112663567066193,
|
| 13119 |
+
"learning_rate": 0.00012399755750050885,
|
| 13120 |
+
"loss": 0.9918,
|
| 13121 |
+
"step": 1873
|
| 13122 |
+
},
|
| 13123 |
+
{
|
| 13124 |
+
"epoch": 0.7619434844480586,
|
| 13125 |
+
"grad_norm": 0.09044841676950455,
|
| 13126 |
+
"learning_rate": 0.00012395684917565643,
|
| 13127 |
+
"loss": 0.9469,
|
| 13128 |
+
"step": 1874
|
| 13129 |
+
},
|
| 13130 |
+
{
|
| 13131 |
+
"epoch": 0.7623500711526733,
|
| 13132 |
+
"grad_norm": 0.08345028758049011,
|
| 13133 |
+
"learning_rate": 0.00012391614085080399,
|
| 13134 |
+
"loss": 0.879,
|
| 13135 |
+
"step": 1875
|
| 13136 |
+
},
|
| 13137 |
+
{
|
| 13138 |
+
"epoch": 0.7627566578572881,
|
| 13139 |
+
"grad_norm": 0.10249708592891693,
|
| 13140 |
+
"learning_rate": 0.00012387543252595157,
|
| 13141 |
+
"loss": 1.0247,
|
| 13142 |
+
"step": 1876
|
| 13143 |
+
},
|
| 13144 |
+
{
|
| 13145 |
+
"epoch": 0.7631632445619029,
|
| 13146 |
+
"grad_norm": 0.0914909839630127,
|
| 13147 |
+
"learning_rate": 0.00012383472420109915,
|
| 13148 |
+
"loss": 0.9341,
|
| 13149 |
+
"step": 1877
|
| 13150 |
+
},
|
| 13151 |
+
{
|
| 13152 |
+
"epoch": 0.7635698312665176,
|
| 13153 |
+
"grad_norm": 0.08616846054792404,
|
| 13154 |
+
"learning_rate": 0.0001237940158762467,
|
| 13155 |
+
"loss": 0.918,
|
| 13156 |
+
"step": 1878
|
| 13157 |
+
},
|
| 13158 |
+
{
|
| 13159 |
+
"epoch": 0.7639764179711324,
|
| 13160 |
+
"grad_norm": 0.0853181779384613,
|
| 13161 |
+
"learning_rate": 0.00012375330755139428,
|
| 13162 |
+
"loss": 0.8903,
|
| 13163 |
+
"step": 1879
|
| 13164 |
+
},
|
| 13165 |
+
{
|
| 13166 |
+
"epoch": 0.7643830046757472,
|
| 13167 |
+
"grad_norm": 0.0943385511636734,
|
| 13168 |
+
"learning_rate": 0.00012371259922654184,
|
| 13169 |
+
"loss": 1.0437,
|
| 13170 |
+
"step": 1880
|
| 13171 |
+
},
|
| 13172 |
+
{
|
| 13173 |
+
"epoch": 0.7647895913803618,
|
| 13174 |
+
"grad_norm": 0.08487629890441895,
|
| 13175 |
+
"learning_rate": 0.0001236718909016894,
|
| 13176 |
+
"loss": 0.9655,
|
| 13177 |
+
"step": 1881
|
| 13178 |
+
},
|
| 13179 |
+
{
|
| 13180 |
+
"epoch": 0.7651961780849766,
|
| 13181 |
+
"grad_norm": 0.09635015577077866,
|
| 13182 |
+
"learning_rate": 0.00012363118257683697,
|
| 13183 |
+
"loss": 1.0047,
|
| 13184 |
+
"step": 1882
|
| 13185 |
+
},
|
| 13186 |
+
{
|
| 13187 |
+
"epoch": 0.7656027647895913,
|
| 13188 |
+
"grad_norm": 0.09787151217460632,
|
| 13189 |
+
"learning_rate": 0.00012359047425198453,
|
| 13190 |
+
"loss": 1.1058,
|
| 13191 |
+
"step": 1883
|
| 13192 |
+
},
|
| 13193 |
+
{
|
| 13194 |
+
"epoch": 0.7660093514942061,
|
| 13195 |
+
"grad_norm": 0.10217342525720596,
|
| 13196 |
+
"learning_rate": 0.0001235497659271321,
|
| 13197 |
+
"loss": 1.1407,
|
| 13198 |
+
"step": 1884
|
| 13199 |
+
},
|
| 13200 |
+
{
|
| 13201 |
+
"epoch": 0.7664159381988209,
|
| 13202 |
+
"grad_norm": 0.08770392835140228,
|
| 13203 |
+
"learning_rate": 0.00012350905760227966,
|
| 13204 |
+
"loss": 0.8851,
|
| 13205 |
+
"step": 1885
|
| 13206 |
+
},
|
| 13207 |
+
{
|
| 13208 |
+
"epoch": 0.7668225249034356,
|
| 13209 |
+
"grad_norm": 0.08978156745433807,
|
| 13210 |
+
"learning_rate": 0.00012346834927742724,
|
| 13211 |
+
"loss": 1.0138,
|
| 13212 |
+
"step": 1886
|
| 13213 |
+
},
|
| 13214 |
+
{
|
| 13215 |
+
"epoch": 0.7672291116080504,
|
| 13216 |
+
"grad_norm": 0.09110313653945923,
|
| 13217 |
+
"learning_rate": 0.0001234276409525748,
|
| 13218 |
+
"loss": 0.8872,
|
| 13219 |
+
"step": 1887
|
| 13220 |
+
},
|
| 13221 |
+
{
|
| 13222 |
+
"epoch": 0.7676356983126652,
|
| 13223 |
+
"grad_norm": 0.0905870720744133,
|
| 13224 |
+
"learning_rate": 0.00012338693262772238,
|
| 13225 |
+
"loss": 0.9819,
|
| 13226 |
+
"step": 1888
|
| 13227 |
+
},
|
| 13228 |
+
{
|
| 13229 |
+
"epoch": 0.7680422850172799,
|
| 13230 |
+
"grad_norm": 0.09418340027332306,
|
| 13231 |
+
"learning_rate": 0.00012334622430286996,
|
| 13232 |
+
"loss": 1.0486,
|
| 13233 |
+
"step": 1889
|
| 13234 |
+
},
|
| 13235 |
+
{
|
| 13236 |
+
"epoch": 0.7684488717218947,
|
| 13237 |
+
"grad_norm": 0.09140585362911224,
|
| 13238 |
+
"learning_rate": 0.00012330551597801751,
|
| 13239 |
+
"loss": 0.9463,
|
| 13240 |
+
"step": 1890
|
| 13241 |
+
},
|
| 13242 |
+
{
|
| 13243 |
+
"epoch": 0.7688554584265095,
|
| 13244 |
+
"grad_norm": 0.08720141649246216,
|
| 13245 |
+
"learning_rate": 0.0001232648076531651,
|
| 13246 |
+
"loss": 0.9833,
|
| 13247 |
+
"step": 1891
|
| 13248 |
+
},
|
| 13249 |
+
{
|
| 13250 |
+
"epoch": 0.7692620451311242,
|
| 13251 |
+
"grad_norm": 0.09206419438123703,
|
| 13252 |
+
"learning_rate": 0.00012322409932831265,
|
| 13253 |
+
"loss": 0.9554,
|
| 13254 |
+
"step": 1892
|
| 13255 |
+
},
|
| 13256 |
+
{
|
| 13257 |
+
"epoch": 0.769668631835739,
|
| 13258 |
+
"grad_norm": 0.09324870258569717,
|
| 13259 |
+
"learning_rate": 0.0001231833910034602,
|
| 13260 |
+
"loss": 1.0703,
|
| 13261 |
+
"step": 1893
|
| 13262 |
+
},
|
| 13263 |
+
{
|
| 13264 |
+
"epoch": 0.7700752185403538,
|
| 13265 |
+
"grad_norm": 0.0868481770157814,
|
| 13266 |
+
"learning_rate": 0.00012314268267860778,
|
| 13267 |
+
"loss": 0.9374,
|
| 13268 |
+
"step": 1894
|
| 13269 |
+
},
|
| 13270 |
+
{
|
| 13271 |
+
"epoch": 0.7704818052449685,
|
| 13272 |
+
"grad_norm": 0.0907289981842041,
|
| 13273 |
+
"learning_rate": 0.00012310197435375534,
|
| 13274 |
+
"loss": 1.0148,
|
| 13275 |
+
"step": 1895
|
| 13276 |
+
},
|
| 13277 |
+
{
|
| 13278 |
+
"epoch": 0.7708883919495833,
|
| 13279 |
+
"grad_norm": 0.09804967790842056,
|
| 13280 |
+
"learning_rate": 0.00012306126602890292,
|
| 13281 |
+
"loss": 1.0541,
|
| 13282 |
+
"step": 1896
|
| 13283 |
+
},
|
| 13284 |
+
{
|
| 13285 |
+
"epoch": 0.771294978654198,
|
| 13286 |
+
"grad_norm": 0.09168083965778351,
|
| 13287 |
+
"learning_rate": 0.00012302055770405047,
|
| 13288 |
+
"loss": 0.9363,
|
| 13289 |
+
"step": 1897
|
| 13290 |
+
},
|
| 13291 |
+
{
|
| 13292 |
+
"epoch": 0.7717015653588127,
|
| 13293 |
+
"grad_norm": 0.09078045189380646,
|
| 13294 |
+
"learning_rate": 0.00012297984937919805,
|
| 13295 |
+
"loss": 1.0683,
|
| 13296 |
+
"step": 1898
|
| 13297 |
+
},
|
| 13298 |
+
{
|
| 13299 |
+
"epoch": 0.7721081520634275,
|
| 13300 |
+
"grad_norm": 0.08930620551109314,
|
| 13301 |
+
"learning_rate": 0.00012293914105434564,
|
| 13302 |
+
"loss": 0.9659,
|
| 13303 |
+
"step": 1899
|
| 13304 |
+
},
|
| 13305 |
+
{
|
| 13306 |
+
"epoch": 0.7725147387680423,
|
| 13307 |
+
"grad_norm": 0.09990911930799484,
|
| 13308 |
+
"learning_rate": 0.0001228984327294932,
|
| 13309 |
+
"loss": 1.1301,
|
| 13310 |
+
"step": 1900
|
| 13311 |
}
|
| 13312 |
],
|
| 13313 |
"logging_steps": 1,
|
|
|
|
| 13327 |
"attributes": {}
|
| 13328 |
}
|
| 13329 |
},
|
| 13330 |
+
"total_flos": 6.085340035177267e+18,
|
| 13331 |
"train_batch_size": 16,
|
| 13332 |
"trial_name": null,
|
| 13333 |
"trial_params": null
|