Upload checkpoint 2500
Browse files- README.md +4 -4
- adapter_config.json +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 7B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -15,11 +15,11 @@ library_name: peft
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
-
<div style="height: 30px; width:
|
| 19 |
-
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-7B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 7B (Step 2500 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 15 |
</head>
|
| 16 |
<body>
|
| 17 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 18 |
+
<div style="height: 30px; width: 50.83%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 19 |
+
50.8%
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 2500 out of 4918 steps</p>
|
| 23 |
</body>
|
| 24 |
</html>
|
| 25 |
|
adapter_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
-
"base_model_name_or_path": "
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
|
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 82461044
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ec5d1bb42b806432b0649c962fdaffdecdc557c94c8c01e010c06d455e651d3
|
| 3 |
size 82461044
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4ae31f3bd6abd5e088309ad57fa2e995bc6dd61c02221bc158a3d63e6ad1f06
|
| 3 |
size 14244
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f30623027facf9b238397ebc799b819df1f5ae1bf4da593c1b0199dcd6b102f
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -16808,6 +16808,706 @@
|
|
| 16808 |
"learning_rate": 0.00010254427030327702,
|
| 16809 |
"loss": 0.8787,
|
| 16810 |
"step": 2400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16811 |
}
|
| 16812 |
],
|
| 16813 |
"logging_steps": 1,
|
|
@@ -16827,7 +17527,7 @@
|
|
| 16827 |
"attributes": {}
|
| 16828 |
}
|
| 16829 |
},
|
| 16830 |
-
"total_flos": 7.
|
| 16831 |
"train_batch_size": 16,
|
| 16832 |
"trial_name": null,
|
| 16833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0162634681845903,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 2500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 16808 |
"learning_rate": 0.00010254427030327702,
|
| 16809 |
"loss": 0.8787,
|
| 16810 |
"step": 2400
|
| 16811 |
+
},
|
| 16812 |
+
{
|
| 16813 |
+
"epoch": 0.9762146777800366,
|
| 16814 |
+
"grad_norm": 0.09117105603218079,
|
| 16815 |
+
"learning_rate": 0.00010250356197842459,
|
| 16816 |
+
"loss": 0.9053,
|
| 16817 |
+
"step": 2401
|
| 16818 |
+
},
|
| 16819 |
+
{
|
| 16820 |
+
"epoch": 0.9766212644846514,
|
| 16821 |
+
"grad_norm": 0.09840644896030426,
|
| 16822 |
+
"learning_rate": 0.00010246285365357216,
|
| 16823 |
+
"loss": 1.0462,
|
| 16824 |
+
"step": 2402
|
| 16825 |
+
},
|
| 16826 |
+
{
|
| 16827 |
+
"epoch": 0.9770278511892662,
|
| 16828 |
+
"grad_norm": 0.09379451721906662,
|
| 16829 |
+
"learning_rate": 0.00010242214532871972,
|
| 16830 |
+
"loss": 0.9617,
|
| 16831 |
+
"step": 2403
|
| 16832 |
+
},
|
| 16833 |
+
{
|
| 16834 |
+
"epoch": 0.9774344378938808,
|
| 16835 |
+
"grad_norm": 0.09142056852579117,
|
| 16836 |
+
"learning_rate": 0.00010238143700386729,
|
| 16837 |
+
"loss": 1.0022,
|
| 16838 |
+
"step": 2404
|
| 16839 |
+
},
|
| 16840 |
+
{
|
| 16841 |
+
"epoch": 0.9778410245984956,
|
| 16842 |
+
"grad_norm": 0.09325367957353592,
|
| 16843 |
+
"learning_rate": 0.00010234072867901487,
|
| 16844 |
+
"loss": 0.9356,
|
| 16845 |
+
"step": 2405
|
| 16846 |
+
},
|
| 16847 |
+
{
|
| 16848 |
+
"epoch": 0.9782476113031104,
|
| 16849 |
+
"grad_norm": 0.09714538604021072,
|
| 16850 |
+
"learning_rate": 0.00010230002035416244,
|
| 16851 |
+
"loss": 1.0685,
|
| 16852 |
+
"step": 2406
|
| 16853 |
+
},
|
| 16854 |
+
{
|
| 16855 |
+
"epoch": 0.9786541980077251,
|
| 16856 |
+
"grad_norm": 0.09502388536930084,
|
| 16857 |
+
"learning_rate": 0.00010225931202931001,
|
| 16858 |
+
"loss": 1.0158,
|
| 16859 |
+
"step": 2407
|
| 16860 |
+
},
|
| 16861 |
+
{
|
| 16862 |
+
"epoch": 0.9790607847123399,
|
| 16863 |
+
"grad_norm": 0.09626177698373795,
|
| 16864 |
+
"learning_rate": 0.00010221860370445758,
|
| 16865 |
+
"loss": 1.0249,
|
| 16866 |
+
"step": 2408
|
| 16867 |
+
},
|
| 16868 |
+
{
|
| 16869 |
+
"epoch": 0.9794673714169546,
|
| 16870 |
+
"grad_norm": 0.09790710359811783,
|
| 16871 |
+
"learning_rate": 0.00010217789537960514,
|
| 16872 |
+
"loss": 0.9974,
|
| 16873 |
+
"step": 2409
|
| 16874 |
+
},
|
| 16875 |
+
{
|
| 16876 |
+
"epoch": 0.9798739581215694,
|
| 16877 |
+
"grad_norm": 0.0907469391822815,
|
| 16878 |
+
"learning_rate": 0.0001021371870547527,
|
| 16879 |
+
"loss": 0.994,
|
| 16880 |
+
"step": 2410
|
| 16881 |
+
},
|
| 16882 |
+
{
|
| 16883 |
+
"epoch": 0.9802805448261842,
|
| 16884 |
+
"grad_norm": 0.10248905420303345,
|
| 16885 |
+
"learning_rate": 0.00010209647872990026,
|
| 16886 |
+
"loss": 1.0214,
|
| 16887 |
+
"step": 2411
|
| 16888 |
+
},
|
| 16889 |
+
{
|
| 16890 |
+
"epoch": 0.9806871315307989,
|
| 16891 |
+
"grad_norm": 0.09504317492246628,
|
| 16892 |
+
"learning_rate": 0.00010205577040504783,
|
| 16893 |
+
"loss": 1.0642,
|
| 16894 |
+
"step": 2412
|
| 16895 |
+
},
|
| 16896 |
+
{
|
| 16897 |
+
"epoch": 0.9810937182354137,
|
| 16898 |
+
"grad_norm": 0.09868543595075607,
|
| 16899 |
+
"learning_rate": 0.0001020150620801954,
|
| 16900 |
+
"loss": 1.0595,
|
| 16901 |
+
"step": 2413
|
| 16902 |
+
},
|
| 16903 |
+
{
|
| 16904 |
+
"epoch": 0.9815003049400285,
|
| 16905 |
+
"grad_norm": 0.08648547530174255,
|
| 16906 |
+
"learning_rate": 0.00010197435375534297,
|
| 16907 |
+
"loss": 0.9273,
|
| 16908 |
+
"step": 2414
|
| 16909 |
+
},
|
| 16910 |
+
{
|
| 16911 |
+
"epoch": 0.9819068916446432,
|
| 16912 |
+
"grad_norm": 0.0870203897356987,
|
| 16913 |
+
"learning_rate": 0.00010193364543049054,
|
| 16914 |
+
"loss": 0.8661,
|
| 16915 |
+
"step": 2415
|
| 16916 |
+
},
|
| 16917 |
+
{
|
| 16918 |
+
"epoch": 0.982313478349258,
|
| 16919 |
+
"grad_norm": 0.09689280390739441,
|
| 16920 |
+
"learning_rate": 0.0001018929371056381,
|
| 16921 |
+
"loss": 1.0179,
|
| 16922 |
+
"step": 2416
|
| 16923 |
+
},
|
| 16924 |
+
{
|
| 16925 |
+
"epoch": 0.9827200650538728,
|
| 16926 |
+
"grad_norm": 0.09497373551130295,
|
| 16927 |
+
"learning_rate": 0.00010185222878078568,
|
| 16928 |
+
"loss": 0.9292,
|
| 16929 |
+
"step": 2417
|
| 16930 |
+
},
|
| 16931 |
+
{
|
| 16932 |
+
"epoch": 0.9831266517584875,
|
| 16933 |
+
"grad_norm": 0.09194166213274002,
|
| 16934 |
+
"learning_rate": 0.00010181152045593325,
|
| 16935 |
+
"loss": 0.969,
|
| 16936 |
+
"step": 2418
|
| 16937 |
+
},
|
| 16938 |
+
{
|
| 16939 |
+
"epoch": 0.9835332384631023,
|
| 16940 |
+
"grad_norm": 0.08828569948673248,
|
| 16941 |
+
"learning_rate": 0.00010177081213108082,
|
| 16942 |
+
"loss": 0.8936,
|
| 16943 |
+
"step": 2419
|
| 16944 |
+
},
|
| 16945 |
+
{
|
| 16946 |
+
"epoch": 0.983939825167717,
|
| 16947 |
+
"grad_norm": 0.095185786485672,
|
| 16948 |
+
"learning_rate": 0.00010173010380622839,
|
| 16949 |
+
"loss": 0.9859,
|
| 16950 |
+
"step": 2420
|
| 16951 |
+
},
|
| 16952 |
+
{
|
| 16953 |
+
"epoch": 0.9843464118723317,
|
| 16954 |
+
"grad_norm": 0.09699594974517822,
|
| 16955 |
+
"learning_rate": 0.00010168939548137595,
|
| 16956 |
+
"loss": 1.0568,
|
| 16957 |
+
"step": 2421
|
| 16958 |
+
},
|
| 16959 |
+
{
|
| 16960 |
+
"epoch": 0.9847529985769465,
|
| 16961 |
+
"grad_norm": 0.09333425760269165,
|
| 16962 |
+
"learning_rate": 0.00010164868715652351,
|
| 16963 |
+
"loss": 0.9503,
|
| 16964 |
+
"step": 2422
|
| 16965 |
+
},
|
| 16966 |
+
{
|
| 16967 |
+
"epoch": 0.9851595852815613,
|
| 16968 |
+
"grad_norm": 0.0883539542555809,
|
| 16969 |
+
"learning_rate": 0.00010160797883167108,
|
| 16970 |
+
"loss": 0.9711,
|
| 16971 |
+
"step": 2423
|
| 16972 |
+
},
|
| 16973 |
+
{
|
| 16974 |
+
"epoch": 0.985566171986176,
|
| 16975 |
+
"grad_norm": 0.09544458985328674,
|
| 16976 |
+
"learning_rate": 0.00010156727050681864,
|
| 16977 |
+
"loss": 0.8668,
|
| 16978 |
+
"step": 2424
|
| 16979 |
+
},
|
| 16980 |
+
{
|
| 16981 |
+
"epoch": 0.9859727586907908,
|
| 16982 |
+
"grad_norm": 0.0979728177189827,
|
| 16983 |
+
"learning_rate": 0.00010152656218196621,
|
| 16984 |
+
"loss": 1.0685,
|
| 16985 |
+
"step": 2425
|
| 16986 |
+
},
|
| 16987 |
+
{
|
| 16988 |
+
"epoch": 0.9863793453954056,
|
| 16989 |
+
"grad_norm": 0.08907411992549896,
|
| 16990 |
+
"learning_rate": 0.00010148585385711378,
|
| 16991 |
+
"loss": 0.8947,
|
| 16992 |
+
"step": 2426
|
| 16993 |
+
},
|
| 16994 |
+
{
|
| 16995 |
+
"epoch": 0.9867859321000203,
|
| 16996 |
+
"grad_norm": 0.09532100707292557,
|
| 16997 |
+
"learning_rate": 0.00010144514553226135,
|
| 16998 |
+
"loss": 1.0793,
|
| 16999 |
+
"step": 2427
|
| 17000 |
+
},
|
| 17001 |
+
{
|
| 17002 |
+
"epoch": 0.9871925188046351,
|
| 17003 |
+
"grad_norm": 0.0916009321808815,
|
| 17004 |
+
"learning_rate": 0.00010140443720740893,
|
| 17005 |
+
"loss": 0.9604,
|
| 17006 |
+
"step": 2428
|
| 17007 |
+
},
|
| 17008 |
+
{
|
| 17009 |
+
"epoch": 0.9875991055092499,
|
| 17010 |
+
"grad_norm": 0.0960593968629837,
|
| 17011 |
+
"learning_rate": 0.0001013637288825565,
|
| 17012 |
+
"loss": 1.0012,
|
| 17013 |
+
"step": 2429
|
| 17014 |
+
},
|
| 17015 |
+
{
|
| 17016 |
+
"epoch": 0.9880056922138646,
|
| 17017 |
+
"grad_norm": 0.0948946550488472,
|
| 17018 |
+
"learning_rate": 0.00010132302055770406,
|
| 17019 |
+
"loss": 0.9555,
|
| 17020 |
+
"step": 2430
|
| 17021 |
+
},
|
| 17022 |
+
{
|
| 17023 |
+
"epoch": 0.9884122789184794,
|
| 17024 |
+
"grad_norm": 0.08670156449079514,
|
| 17025 |
+
"learning_rate": 0.00010128231223285163,
|
| 17026 |
+
"loss": 0.8863,
|
| 17027 |
+
"step": 2431
|
| 17028 |
+
},
|
| 17029 |
+
{
|
| 17030 |
+
"epoch": 0.9888188656230942,
|
| 17031 |
+
"grad_norm": 0.0870981365442276,
|
| 17032 |
+
"learning_rate": 0.0001012416039079992,
|
| 17033 |
+
"loss": 0.949,
|
| 17034 |
+
"step": 2432
|
| 17035 |
+
},
|
| 17036 |
+
{
|
| 17037 |
+
"epoch": 0.9892254523277089,
|
| 17038 |
+
"grad_norm": 0.09065506607294083,
|
| 17039 |
+
"learning_rate": 0.00010120089558314677,
|
| 17040 |
+
"loss": 1.0791,
|
| 17041 |
+
"step": 2433
|
| 17042 |
+
},
|
| 17043 |
+
{
|
| 17044 |
+
"epoch": 0.9896320390323237,
|
| 17045 |
+
"grad_norm": 0.08753534406423569,
|
| 17046 |
+
"learning_rate": 0.00010116018725829432,
|
| 17047 |
+
"loss": 0.8656,
|
| 17048 |
+
"step": 2434
|
| 17049 |
+
},
|
| 17050 |
+
{
|
| 17051 |
+
"epoch": 0.9900386257369383,
|
| 17052 |
+
"grad_norm": 0.08939878642559052,
|
| 17053 |
+
"learning_rate": 0.00010111947893344189,
|
| 17054 |
+
"loss": 0.8983,
|
| 17055 |
+
"step": 2435
|
| 17056 |
+
},
|
| 17057 |
+
{
|
| 17058 |
+
"epoch": 0.9904452124415531,
|
| 17059 |
+
"grad_norm": 0.09110575914382935,
|
| 17060 |
+
"learning_rate": 0.00010107877060858946,
|
| 17061 |
+
"loss": 0.8971,
|
| 17062 |
+
"step": 2436
|
| 17063 |
+
},
|
| 17064 |
+
{
|
| 17065 |
+
"epoch": 0.9908517991461679,
|
| 17066 |
+
"grad_norm": 0.08614566922187805,
|
| 17067 |
+
"learning_rate": 0.00010103806228373702,
|
| 17068 |
+
"loss": 0.9746,
|
| 17069 |
+
"step": 2437
|
| 17070 |
+
},
|
| 17071 |
+
{
|
| 17072 |
+
"epoch": 0.9912583858507826,
|
| 17073 |
+
"grad_norm": 0.09685923904180527,
|
| 17074 |
+
"learning_rate": 0.00010099735395888459,
|
| 17075 |
+
"loss": 0.9638,
|
| 17076 |
+
"step": 2438
|
| 17077 |
+
},
|
| 17078 |
+
{
|
| 17079 |
+
"epoch": 0.9916649725553974,
|
| 17080 |
+
"grad_norm": 0.10014784336090088,
|
| 17081 |
+
"learning_rate": 0.00010095664563403216,
|
| 17082 |
+
"loss": 1.0335,
|
| 17083 |
+
"step": 2439
|
| 17084 |
+
},
|
| 17085 |
+
{
|
| 17086 |
+
"epoch": 0.9920715592600122,
|
| 17087 |
+
"grad_norm": 0.09917939454317093,
|
| 17088 |
+
"learning_rate": 0.00010091593730917974,
|
| 17089 |
+
"loss": 1.0288,
|
| 17090 |
+
"step": 2440
|
| 17091 |
+
},
|
| 17092 |
+
{
|
| 17093 |
+
"epoch": 0.9924781459646269,
|
| 17094 |
+
"grad_norm": 0.09158805757761002,
|
| 17095 |
+
"learning_rate": 0.00010087522898432731,
|
| 17096 |
+
"loss": 0.9372,
|
| 17097 |
+
"step": 2441
|
| 17098 |
+
},
|
| 17099 |
+
{
|
| 17100 |
+
"epoch": 0.9928847326692417,
|
| 17101 |
+
"grad_norm": 0.09151756763458252,
|
| 17102 |
+
"learning_rate": 0.00010083452065947488,
|
| 17103 |
+
"loss": 1.0042,
|
| 17104 |
+
"step": 2442
|
| 17105 |
+
},
|
| 17106 |
+
{
|
| 17107 |
+
"epoch": 0.9932913193738565,
|
| 17108 |
+
"grad_norm": 0.09201864898204803,
|
| 17109 |
+
"learning_rate": 0.00010079381233462244,
|
| 17110 |
+
"loss": 0.937,
|
| 17111 |
+
"step": 2443
|
| 17112 |
+
},
|
| 17113 |
+
{
|
| 17114 |
+
"epoch": 0.9936979060784712,
|
| 17115 |
+
"grad_norm": 0.10031972825527191,
|
| 17116 |
+
"learning_rate": 0.00010075310400977001,
|
| 17117 |
+
"loss": 0.989,
|
| 17118 |
+
"step": 2444
|
| 17119 |
+
},
|
| 17120 |
+
{
|
| 17121 |
+
"epoch": 0.994104492783086,
|
| 17122 |
+
"grad_norm": 0.09593512862920761,
|
| 17123 |
+
"learning_rate": 0.00010071239568491756,
|
| 17124 |
+
"loss": 0.9259,
|
| 17125 |
+
"step": 2445
|
| 17126 |
+
},
|
| 17127 |
+
{
|
| 17128 |
+
"epoch": 0.9945110794877008,
|
| 17129 |
+
"grad_norm": 0.10088519006967545,
|
| 17130 |
+
"learning_rate": 0.00010067168736006513,
|
| 17131 |
+
"loss": 1.0888,
|
| 17132 |
+
"step": 2446
|
| 17133 |
+
},
|
| 17134 |
+
{
|
| 17135 |
+
"epoch": 0.9949176661923155,
|
| 17136 |
+
"grad_norm": 0.09052947163581848,
|
| 17137 |
+
"learning_rate": 0.0001006309790352127,
|
| 17138 |
+
"loss": 0.9643,
|
| 17139 |
+
"step": 2447
|
| 17140 |
+
},
|
| 17141 |
+
{
|
| 17142 |
+
"epoch": 0.9953242528969303,
|
| 17143 |
+
"grad_norm": 0.0943833664059639,
|
| 17144 |
+
"learning_rate": 0.00010059027071036027,
|
| 17145 |
+
"loss": 1.0308,
|
| 17146 |
+
"step": 2448
|
| 17147 |
+
},
|
| 17148 |
+
{
|
| 17149 |
+
"epoch": 0.9957308396015451,
|
| 17150 |
+
"grad_norm": 0.0929458737373352,
|
| 17151 |
+
"learning_rate": 0.00010054956238550783,
|
| 17152 |
+
"loss": 0.8993,
|
| 17153 |
+
"step": 2449
|
| 17154 |
+
},
|
| 17155 |
+
{
|
| 17156 |
+
"epoch": 0.9961374263061598,
|
| 17157 |
+
"grad_norm": 0.09643827378749847,
|
| 17158 |
+
"learning_rate": 0.0001005088540606554,
|
| 17159 |
+
"loss": 0.9708,
|
| 17160 |
+
"step": 2450
|
| 17161 |
+
},
|
| 17162 |
+
{
|
| 17163 |
+
"epoch": 0.9965440130107746,
|
| 17164 |
+
"grad_norm": 0.08925779908895493,
|
| 17165 |
+
"learning_rate": 0.00010046814573580298,
|
| 17166 |
+
"loss": 0.9209,
|
| 17167 |
+
"step": 2451
|
| 17168 |
+
},
|
| 17169 |
+
{
|
| 17170 |
+
"epoch": 0.9969505997153894,
|
| 17171 |
+
"grad_norm": 0.08630047738552094,
|
| 17172 |
+
"learning_rate": 0.00010042743741095055,
|
| 17173 |
+
"loss": 0.9324,
|
| 17174 |
+
"step": 2452
|
| 17175 |
+
},
|
| 17176 |
+
{
|
| 17177 |
+
"epoch": 0.997357186420004,
|
| 17178 |
+
"grad_norm": 0.10127938538789749,
|
| 17179 |
+
"learning_rate": 0.00010038672908609812,
|
| 17180 |
+
"loss": 0.9926,
|
| 17181 |
+
"step": 2453
|
| 17182 |
+
},
|
| 17183 |
+
{
|
| 17184 |
+
"epoch": 0.9977637731246188,
|
| 17185 |
+
"grad_norm": 0.09573110938072205,
|
| 17186 |
+
"learning_rate": 0.00010034602076124569,
|
| 17187 |
+
"loss": 0.9801,
|
| 17188 |
+
"step": 2454
|
| 17189 |
+
},
|
| 17190 |
+
{
|
| 17191 |
+
"epoch": 0.9981703598292336,
|
| 17192 |
+
"grad_norm": 0.0963260605931282,
|
| 17193 |
+
"learning_rate": 0.00010030531243639325,
|
| 17194 |
+
"loss": 0.98,
|
| 17195 |
+
"step": 2455
|
| 17196 |
+
},
|
| 17197 |
+
{
|
| 17198 |
+
"epoch": 0.9985769465338483,
|
| 17199 |
+
"grad_norm": 0.08414101600646973,
|
| 17200 |
+
"learning_rate": 0.00010026460411154082,
|
| 17201 |
+
"loss": 0.8676,
|
| 17202 |
+
"step": 2456
|
| 17203 |
+
},
|
| 17204 |
+
{
|
| 17205 |
+
"epoch": 0.9989835332384631,
|
| 17206 |
+
"grad_norm": 0.09320447593927383,
|
| 17207 |
+
"learning_rate": 0.00010022389578668838,
|
| 17208 |
+
"loss": 0.998,
|
| 17209 |
+
"step": 2457
|
| 17210 |
+
},
|
| 17211 |
+
{
|
| 17212 |
+
"epoch": 0.9993901199430779,
|
| 17213 |
+
"grad_norm": 0.09721797704696655,
|
| 17214 |
+
"learning_rate": 0.00010018318746183594,
|
| 17215 |
+
"loss": 1.0123,
|
| 17216 |
+
"step": 2458
|
| 17217 |
+
},
|
| 17218 |
+
{
|
| 17219 |
+
"epoch": 0.9997967066476926,
|
| 17220 |
+
"grad_norm": 0.08773447573184967,
|
| 17221 |
+
"learning_rate": 0.00010014247913698351,
|
| 17222 |
+
"loss": 0.9673,
|
| 17223 |
+
"step": 2459
|
| 17224 |
+
},
|
| 17225 |
+
{
|
| 17226 |
+
"epoch": 1.0,
|
| 17227 |
+
"grad_norm": 0.15718789398670197,
|
| 17228 |
+
"learning_rate": 0.00010010177081213108,
|
| 17229 |
+
"loss": 1.1286,
|
| 17230 |
+
"step": 2460
|
| 17231 |
+
},
|
| 17232 |
+
{
|
| 17233 |
+
"epoch": 1.0004065867046148,
|
| 17234 |
+
"grad_norm": 0.09029074758291245,
|
| 17235 |
+
"learning_rate": 0.00010006106248727865,
|
| 17236 |
+
"loss": 0.9905,
|
| 17237 |
+
"step": 2461
|
| 17238 |
+
},
|
| 17239 |
+
{
|
| 17240 |
+
"epoch": 1.0008131734092296,
|
| 17241 |
+
"grad_norm": 0.09984813630580902,
|
| 17242 |
+
"learning_rate": 0.00010002035416242621,
|
| 17243 |
+
"loss": 0.9981,
|
| 17244 |
+
"step": 2462
|
| 17245 |
+
},
|
| 17246 |
+
{
|
| 17247 |
+
"epoch": 1.0012197601138442,
|
| 17248 |
+
"grad_norm": 0.09808840602636337,
|
| 17249 |
+
"learning_rate": 9.997964583757378e-05,
|
| 17250 |
+
"loss": 1.0156,
|
| 17251 |
+
"step": 2463
|
| 17252 |
+
},
|
| 17253 |
+
{
|
| 17254 |
+
"epoch": 1.001626346818459,
|
| 17255 |
+
"grad_norm": 0.08917602896690369,
|
| 17256 |
+
"learning_rate": 9.993893751272135e-05,
|
| 17257 |
+
"loss": 0.944,
|
| 17258 |
+
"step": 2464
|
| 17259 |
+
},
|
| 17260 |
+
{
|
| 17261 |
+
"epoch": 1.0020329335230738,
|
| 17262 |
+
"grad_norm": 0.0943906158208847,
|
| 17263 |
+
"learning_rate": 9.989822918786892e-05,
|
| 17264 |
+
"loss": 0.9294,
|
| 17265 |
+
"step": 2465
|
| 17266 |
+
},
|
| 17267 |
+
{
|
| 17268 |
+
"epoch": 1.0024395202276886,
|
| 17269 |
+
"grad_norm": 0.09091315418481827,
|
| 17270 |
+
"learning_rate": 9.98575208630165e-05,
|
| 17271 |
+
"loss": 0.9707,
|
| 17272 |
+
"step": 2466
|
| 17273 |
+
},
|
| 17274 |
+
{
|
| 17275 |
+
"epoch": 1.0028461069323034,
|
| 17276 |
+
"grad_norm": 0.09035106003284454,
|
| 17277 |
+
"learning_rate": 9.981681253816407e-05,
|
| 17278 |
+
"loss": 0.9562,
|
| 17279 |
+
"step": 2467
|
| 17280 |
+
},
|
| 17281 |
+
{
|
| 17282 |
+
"epoch": 1.0032526936369182,
|
| 17283 |
+
"grad_norm": 0.09709779173135757,
|
| 17284 |
+
"learning_rate": 9.977610421331163e-05,
|
| 17285 |
+
"loss": 0.9287,
|
| 17286 |
+
"step": 2468
|
| 17287 |
+
},
|
| 17288 |
+
{
|
| 17289 |
+
"epoch": 1.0036592803415327,
|
| 17290 |
+
"grad_norm": 0.09063035994768143,
|
| 17291 |
+
"learning_rate": 9.973539588845919e-05,
|
| 17292 |
+
"loss": 0.9138,
|
| 17293 |
+
"step": 2469
|
| 17294 |
+
},
|
| 17295 |
+
{
|
| 17296 |
+
"epoch": 1.0040658670461475,
|
| 17297 |
+
"grad_norm": 0.09490003436803818,
|
| 17298 |
+
"learning_rate": 9.969468756360676e-05,
|
| 17299 |
+
"loss": 0.9475,
|
| 17300 |
+
"step": 2470
|
| 17301 |
+
},
|
| 17302 |
+
{
|
| 17303 |
+
"epoch": 1.0044724537507623,
|
| 17304 |
+
"grad_norm": 0.10134010761976242,
|
| 17305 |
+
"learning_rate": 9.965397923875432e-05,
|
| 17306 |
+
"loss": 1.0092,
|
| 17307 |
+
"step": 2471
|
| 17308 |
+
},
|
| 17309 |
+
{
|
| 17310 |
+
"epoch": 1.0048790404553771,
|
| 17311 |
+
"grad_norm": 0.09728873521089554,
|
| 17312 |
+
"learning_rate": 9.96132709139019e-05,
|
| 17313 |
+
"loss": 0.9498,
|
| 17314 |
+
"step": 2472
|
| 17315 |
+
},
|
| 17316 |
+
{
|
| 17317 |
+
"epoch": 1.005285627159992,
|
| 17318 |
+
"grad_norm": 0.09160648286342621,
|
| 17319 |
+
"learning_rate": 9.957256258904947e-05,
|
| 17320 |
+
"loss": 0.8707,
|
| 17321 |
+
"step": 2473
|
| 17322 |
+
},
|
| 17323 |
+
{
|
| 17324 |
+
"epoch": 1.0056922138646067,
|
| 17325 |
+
"grad_norm": 0.0939764603972435,
|
| 17326 |
+
"learning_rate": 9.953185426419704e-05,
|
| 17327 |
+
"loss": 0.9619,
|
| 17328 |
+
"step": 2474
|
| 17329 |
+
},
|
| 17330 |
+
{
|
| 17331 |
+
"epoch": 1.0060988005692213,
|
| 17332 |
+
"grad_norm": 0.08643637597560883,
|
| 17333 |
+
"learning_rate": 9.94911459393446e-05,
|
| 17334 |
+
"loss": 0.9377,
|
| 17335 |
+
"step": 2475
|
| 17336 |
+
},
|
| 17337 |
+
{
|
| 17338 |
+
"epoch": 1.006505387273836,
|
| 17339 |
+
"grad_norm": 0.09141729027032852,
|
| 17340 |
+
"learning_rate": 9.945043761449216e-05,
|
| 17341 |
+
"loss": 0.8859,
|
| 17342 |
+
"step": 2476
|
| 17343 |
+
},
|
| 17344 |
+
{
|
| 17345 |
+
"epoch": 1.006911973978451,
|
| 17346 |
+
"grad_norm": 0.09555509686470032,
|
| 17347 |
+
"learning_rate": 9.940972928963974e-05,
|
| 17348 |
+
"loss": 0.933,
|
| 17349 |
+
"step": 2477
|
| 17350 |
+
},
|
| 17351 |
+
{
|
| 17352 |
+
"epoch": 1.0073185606830657,
|
| 17353 |
+
"grad_norm": 0.0935022309422493,
|
| 17354 |
+
"learning_rate": 9.936902096478731e-05,
|
| 17355 |
+
"loss": 0.9368,
|
| 17356 |
+
"step": 2478
|
| 17357 |
+
},
|
| 17358 |
+
{
|
| 17359 |
+
"epoch": 1.0077251473876805,
|
| 17360 |
+
"grad_norm": 0.09959034621715546,
|
| 17361 |
+
"learning_rate": 9.932831263993488e-05,
|
| 17362 |
+
"loss": 0.974,
|
| 17363 |
+
"step": 2479
|
| 17364 |
+
},
|
| 17365 |
+
{
|
| 17366 |
+
"epoch": 1.0081317340922953,
|
| 17367 |
+
"grad_norm": 0.09246455878019333,
|
| 17368 |
+
"learning_rate": 9.928760431508245e-05,
|
| 17369 |
+
"loss": 0.9248,
|
| 17370 |
+
"step": 2480
|
| 17371 |
+
},
|
| 17372 |
+
{
|
| 17373 |
+
"epoch": 1.0085383207969099,
|
| 17374 |
+
"grad_norm": 0.10091500729322433,
|
| 17375 |
+
"learning_rate": 9.924689599023e-05,
|
| 17376 |
+
"loss": 1.122,
|
| 17377 |
+
"step": 2481
|
| 17378 |
+
},
|
| 17379 |
+
{
|
| 17380 |
+
"epoch": 1.0089449075015247,
|
| 17381 |
+
"grad_norm": 0.10083048790693283,
|
| 17382 |
+
"learning_rate": 9.920618766537757e-05,
|
| 17383 |
+
"loss": 1.0199,
|
| 17384 |
+
"step": 2482
|
| 17385 |
+
},
|
| 17386 |
+
{
|
| 17387 |
+
"epoch": 1.0093514942061395,
|
| 17388 |
+
"grad_norm": 0.09641805291175842,
|
| 17389 |
+
"learning_rate": 9.916547934052515e-05,
|
| 17390 |
+
"loss": 0.9971,
|
| 17391 |
+
"step": 2483
|
| 17392 |
+
},
|
| 17393 |
+
{
|
| 17394 |
+
"epoch": 1.0097580809107543,
|
| 17395 |
+
"grad_norm": 0.10362432897090912,
|
| 17396 |
+
"learning_rate": 9.912477101567272e-05,
|
| 17397 |
+
"loss": 0.9596,
|
| 17398 |
+
"step": 2484
|
| 17399 |
+
},
|
| 17400 |
+
{
|
| 17401 |
+
"epoch": 1.010164667615369,
|
| 17402 |
+
"grad_norm": 0.09050238877534866,
|
| 17403 |
+
"learning_rate": 9.908406269082028e-05,
|
| 17404 |
+
"loss": 0.9423,
|
| 17405 |
+
"step": 2485
|
| 17406 |
+
},
|
| 17407 |
+
{
|
| 17408 |
+
"epoch": 1.0105712543199838,
|
| 17409 |
+
"grad_norm": 0.10209590941667557,
|
| 17410 |
+
"learning_rate": 9.904335436596785e-05,
|
| 17411 |
+
"loss": 0.9366,
|
| 17412 |
+
"step": 2486
|
| 17413 |
+
},
|
| 17414 |
+
{
|
| 17415 |
+
"epoch": 1.0109778410245984,
|
| 17416 |
+
"grad_norm": 0.104631707072258,
|
| 17417 |
+
"learning_rate": 9.90026460411154e-05,
|
| 17418 |
+
"loss": 1.0476,
|
| 17419 |
+
"step": 2487
|
| 17420 |
+
},
|
| 17421 |
+
{
|
| 17422 |
+
"epoch": 1.0113844277292132,
|
| 17423 |
+
"grad_norm": 0.09572993963956833,
|
| 17424 |
+
"learning_rate": 9.896193771626297e-05,
|
| 17425 |
+
"loss": 1.0523,
|
| 17426 |
+
"step": 2488
|
| 17427 |
+
},
|
| 17428 |
+
{
|
| 17429 |
+
"epoch": 1.011791014433828,
|
| 17430 |
+
"grad_norm": 0.10640837252140045,
|
| 17431 |
+
"learning_rate": 9.892122939141055e-05,
|
| 17432 |
+
"loss": 1.1238,
|
| 17433 |
+
"step": 2489
|
| 17434 |
+
},
|
| 17435 |
+
{
|
| 17436 |
+
"epoch": 1.0121976011384428,
|
| 17437 |
+
"grad_norm": 0.09798834472894669,
|
| 17438 |
+
"learning_rate": 9.888052106655812e-05,
|
| 17439 |
+
"loss": 0.9597,
|
| 17440 |
+
"step": 2490
|
| 17441 |
+
},
|
| 17442 |
+
{
|
| 17443 |
+
"epoch": 1.0126041878430576,
|
| 17444 |
+
"grad_norm": 0.08913593739271164,
|
| 17445 |
+
"learning_rate": 9.883981274170569e-05,
|
| 17446 |
+
"loss": 0.9258,
|
| 17447 |
+
"step": 2491
|
| 17448 |
+
},
|
| 17449 |
+
{
|
| 17450 |
+
"epoch": 1.0130107745476722,
|
| 17451 |
+
"grad_norm": 0.09719277173280716,
|
| 17452 |
+
"learning_rate": 9.879910441685324e-05,
|
| 17453 |
+
"loss": 0.9812,
|
| 17454 |
+
"step": 2492
|
| 17455 |
+
},
|
| 17456 |
+
{
|
| 17457 |
+
"epoch": 1.013417361252287,
|
| 17458 |
+
"grad_norm": 0.09699688851833344,
|
| 17459 |
+
"learning_rate": 9.875839609200081e-05,
|
| 17460 |
+
"loss": 0.8946,
|
| 17461 |
+
"step": 2493
|
| 17462 |
+
},
|
| 17463 |
+
{
|
| 17464 |
+
"epoch": 1.0138239479569018,
|
| 17465 |
+
"grad_norm": 0.09061427414417267,
|
| 17466 |
+
"learning_rate": 9.871768776714838e-05,
|
| 17467 |
+
"loss": 0.9075,
|
| 17468 |
+
"step": 2494
|
| 17469 |
+
},
|
| 17470 |
+
{
|
| 17471 |
+
"epoch": 1.0142305346615166,
|
| 17472 |
+
"grad_norm": 0.08979996293783188,
|
| 17473 |
+
"learning_rate": 9.867697944229596e-05,
|
| 17474 |
+
"loss": 0.933,
|
| 17475 |
+
"step": 2495
|
| 17476 |
+
},
|
| 17477 |
+
{
|
| 17478 |
+
"epoch": 1.0146371213661314,
|
| 17479 |
+
"grad_norm": 0.09325064718723297,
|
| 17480 |
+
"learning_rate": 9.863627111744353e-05,
|
| 17481 |
+
"loss": 0.9604,
|
| 17482 |
+
"step": 2496
|
| 17483 |
+
},
|
| 17484 |
+
{
|
| 17485 |
+
"epoch": 1.0150437080707462,
|
| 17486 |
+
"grad_norm": 0.09821408241987228,
|
| 17487 |
+
"learning_rate": 9.85955627925911e-05,
|
| 17488 |
+
"loss": 1.0871,
|
| 17489 |
+
"step": 2497
|
| 17490 |
+
},
|
| 17491 |
+
{
|
| 17492 |
+
"epoch": 1.0154502947753608,
|
| 17493 |
+
"grad_norm": 0.09746625274419785,
|
| 17494 |
+
"learning_rate": 9.855485446773865e-05,
|
| 17495 |
+
"loss": 0.9304,
|
| 17496 |
+
"step": 2498
|
| 17497 |
+
},
|
| 17498 |
+
{
|
| 17499 |
+
"epoch": 1.0158568814799755,
|
| 17500 |
+
"grad_norm": 0.09508597105741501,
|
| 17501 |
+
"learning_rate": 9.851414614288622e-05,
|
| 17502 |
+
"loss": 0.9469,
|
| 17503 |
+
"step": 2499
|
| 17504 |
+
},
|
| 17505 |
+
{
|
| 17506 |
+
"epoch": 1.0162634681845903,
|
| 17507 |
+
"grad_norm": 0.10357919335365295,
|
| 17508 |
+
"learning_rate": 9.84734378180338e-05,
|
| 17509 |
+
"loss": 1.0272,
|
| 17510 |
+
"step": 2500
|
| 17511 |
}
|
| 17512 |
],
|
| 17513 |
"logging_steps": 1,
|
|
|
|
| 17527 |
"attributes": {}
|
| 17528 |
}
|
| 17529 |
},
|
| 17530 |
+
"total_flos": 7.991705590388761e+18,
|
| 17531 |
"train_batch_size": 16,
|
| 17532 |
"trial_name": null,
|
| 17533 |
"trial_params": null
|