Upload checkpoint 9400
Browse files- README.md +4 -4
- adapter_config.json +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 3B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -38,11 +38,11 @@ library_name: peft
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
-
<div style="height: 30px; width:
|
| 42 |
-
<!--
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 3B (Step 9400 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
+
<div style="height: 30px; width: 95.55%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 42 |
+
<!-- 95.55% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 9400 out of 9838 steps</p>
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
|
adapter_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
-
"base_model_name_or_path": "
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
|
|
|
| 1 |
{
|
| 2 |
"alpha_pattern": {},
|
| 3 |
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
|
| 5 |
"bias": "none",
|
| 6 |
"eva_config": null,
|
| 7 |
"exclude_modules": null,
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 61392692
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:792627c97be0a2e0187572be80a240954ee12f398646440d185d8dd31f3d03c0
|
| 3 |
size 61392692
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:907302e63b2cfc53962ac5b5a9bc923220b8816caf109fa095b223d639e4695a
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -65108,6 +65108,706 @@
|
|
| 65108 |
"learning_rate": 1.0963083494355742e-05,
|
| 65109 |
"loss": 1.0495,
|
| 65110 |
"step": 9300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65111 |
}
|
| 65112 |
],
|
| 65113 |
"logging_steps": 1,
|
|
@@ -65127,7 +65827,7 @@
|
|
| 65127 |
"attributes": {}
|
| 65128 |
}
|
| 65129 |
},
|
| 65130 |
-
"total_flos": 5.
|
| 65131 |
"train_batch_size": 8,
|
| 65132 |
"trial_name": null,
|
| 65133 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.9109575116893678,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 9400,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 65108 |
"learning_rate": 1.0963083494355742e-05,
|
| 65109 |
"loss": 1.0495,
|
| 65110 |
"step": 9300
|
| 65111 |
+
},
|
| 65112 |
+
{
|
| 65113 |
+
"epoch": 1.8908314698109372,
|
| 65114 |
+
"grad_norm": 0.16537079215049744,
|
| 65115 |
+
"learning_rate": 1.094274382182447e-05,
|
| 65116 |
+
"loss": 1.0986,
|
| 65117 |
+
"step": 9301
|
| 65118 |
+
},
|
| 65119 |
+
{
|
| 65120 |
+
"epoch": 1.8910347631632445,
|
| 65121 |
+
"grad_norm": 0.15517854690551758,
|
| 65122 |
+
"learning_rate": 1.0922404149293197e-05,
|
| 65123 |
+
"loss": 0.8952,
|
| 65124 |
+
"step": 9302
|
| 65125 |
+
},
|
| 65126 |
+
{
|
| 65127 |
+
"epoch": 1.891238056515552,
|
| 65128 |
+
"grad_norm": 0.20096749067306519,
|
| 65129 |
+
"learning_rate": 1.0902064476761925e-05,
|
| 65130 |
+
"loss": 1.212,
|
| 65131 |
+
"step": 9303
|
| 65132 |
+
},
|
| 65133 |
+
{
|
| 65134 |
+
"epoch": 1.8914413498678593,
|
| 65135 |
+
"grad_norm": 0.1688317060470581,
|
| 65136 |
+
"learning_rate": 1.0881724804230653e-05,
|
| 65137 |
+
"loss": 1.0571,
|
| 65138 |
+
"step": 9304
|
| 65139 |
+
},
|
| 65140 |
+
{
|
| 65141 |
+
"epoch": 1.8916446432201668,
|
| 65142 |
+
"grad_norm": 0.15583118796348572,
|
| 65143 |
+
"learning_rate": 1.086138513169938e-05,
|
| 65144 |
+
"loss": 0.9346,
|
| 65145 |
+
"step": 9305
|
| 65146 |
+
},
|
| 65147 |
+
{
|
| 65148 |
+
"epoch": 1.891847936572474,
|
| 65149 |
+
"grad_norm": 0.17731614410877228,
|
| 65150 |
+
"learning_rate": 1.0841045459168107e-05,
|
| 65151 |
+
"loss": 1.0109,
|
| 65152 |
+
"step": 9306
|
| 65153 |
+
},
|
| 65154 |
+
{
|
| 65155 |
+
"epoch": 1.8920512299247814,
|
| 65156 |
+
"grad_norm": 0.16740213334560394,
|
| 65157 |
+
"learning_rate": 1.0820705786636835e-05,
|
| 65158 |
+
"loss": 0.9507,
|
| 65159 |
+
"step": 9307
|
| 65160 |
+
},
|
| 65161 |
+
{
|
| 65162 |
+
"epoch": 1.8922545232770889,
|
| 65163 |
+
"grad_norm": 0.15723784267902374,
|
| 65164 |
+
"learning_rate": 1.0800366114105563e-05,
|
| 65165 |
+
"loss": 0.9974,
|
| 65166 |
+
"step": 9308
|
| 65167 |
+
},
|
| 65168 |
+
{
|
| 65169 |
+
"epoch": 1.8924578166293964,
|
| 65170 |
+
"grad_norm": 0.1717313826084137,
|
| 65171 |
+
"learning_rate": 1.078002644157429e-05,
|
| 65172 |
+
"loss": 1.1462,
|
| 65173 |
+
"step": 9309
|
| 65174 |
+
},
|
| 65175 |
+
{
|
| 65176 |
+
"epoch": 1.8926611099817037,
|
| 65177 |
+
"grad_norm": 0.16550962626934052,
|
| 65178 |
+
"learning_rate": 1.0759686769043019e-05,
|
| 65179 |
+
"loss": 0.9711,
|
| 65180 |
+
"step": 9310
|
| 65181 |
+
},
|
| 65182 |
+
{
|
| 65183 |
+
"epoch": 1.892864403334011,
|
| 65184 |
+
"grad_norm": 0.15991832315921783,
|
| 65185 |
+
"learning_rate": 1.0739347096511746e-05,
|
| 65186 |
+
"loss": 0.9214,
|
| 65187 |
+
"step": 9311
|
| 65188 |
+
},
|
| 65189 |
+
{
|
| 65190 |
+
"epoch": 1.8930676966863182,
|
| 65191 |
+
"grad_norm": 0.16738741099834442,
|
| 65192 |
+
"learning_rate": 1.0719007423980474e-05,
|
| 65193 |
+
"loss": 0.9806,
|
| 65194 |
+
"step": 9312
|
| 65195 |
+
},
|
| 65196 |
+
{
|
| 65197 |
+
"epoch": 1.8932709900386258,
|
| 65198 |
+
"grad_norm": 0.1509847342967987,
|
| 65199 |
+
"learning_rate": 1.0698667751449202e-05,
|
| 65200 |
+
"loss": 0.8114,
|
| 65201 |
+
"step": 9313
|
| 65202 |
+
},
|
| 65203 |
+
{
|
| 65204 |
+
"epoch": 1.8934742833909333,
|
| 65205 |
+
"grad_norm": 0.16016948223114014,
|
| 65206 |
+
"learning_rate": 1.067832807891793e-05,
|
| 65207 |
+
"loss": 1.0437,
|
| 65208 |
+
"step": 9314
|
| 65209 |
+
},
|
| 65210 |
+
{
|
| 65211 |
+
"epoch": 1.8936775767432406,
|
| 65212 |
+
"grad_norm": 0.16906878352165222,
|
| 65213 |
+
"learning_rate": 1.0657988406386658e-05,
|
| 65214 |
+
"loss": 1.1174,
|
| 65215 |
+
"step": 9315
|
| 65216 |
+
},
|
| 65217 |
+
{
|
| 65218 |
+
"epoch": 1.8938808700955478,
|
| 65219 |
+
"grad_norm": 0.14439010620117188,
|
| 65220 |
+
"learning_rate": 1.0637648733855386e-05,
|
| 65221 |
+
"loss": 0.9612,
|
| 65222 |
+
"step": 9316
|
| 65223 |
+
},
|
| 65224 |
+
{
|
| 65225 |
+
"epoch": 1.8940841634478551,
|
| 65226 |
+
"grad_norm": 0.14976871013641357,
|
| 65227 |
+
"learning_rate": 1.0617309061324113e-05,
|
| 65228 |
+
"loss": 0.9424,
|
| 65229 |
+
"step": 9317
|
| 65230 |
+
},
|
| 65231 |
+
{
|
| 65232 |
+
"epoch": 1.8942874568001626,
|
| 65233 |
+
"grad_norm": 0.14871583878993988,
|
| 65234 |
+
"learning_rate": 1.0596969388792841e-05,
|
| 65235 |
+
"loss": 1.0182,
|
| 65236 |
+
"step": 9318
|
| 65237 |
+
},
|
| 65238 |
+
{
|
| 65239 |
+
"epoch": 1.8944907501524701,
|
| 65240 |
+
"grad_norm": 0.15636609494686127,
|
| 65241 |
+
"learning_rate": 1.057662971626157e-05,
|
| 65242 |
+
"loss": 1.0271,
|
| 65243 |
+
"step": 9319
|
| 65244 |
+
},
|
| 65245 |
+
{
|
| 65246 |
+
"epoch": 1.8946940435047774,
|
| 65247 |
+
"grad_norm": 0.15208925306797028,
|
| 65248 |
+
"learning_rate": 1.0556290043730297e-05,
|
| 65249 |
+
"loss": 0.9909,
|
| 65250 |
+
"step": 9320
|
| 65251 |
+
},
|
| 65252 |
+
{
|
| 65253 |
+
"epoch": 1.8948973368570847,
|
| 65254 |
+
"grad_norm": 0.16356107592582703,
|
| 65255 |
+
"learning_rate": 1.0535950371199025e-05,
|
| 65256 |
+
"loss": 1.0277,
|
| 65257 |
+
"step": 9321
|
| 65258 |
+
},
|
| 65259 |
+
{
|
| 65260 |
+
"epoch": 1.895100630209392,
|
| 65261 |
+
"grad_norm": 0.1584353744983673,
|
| 65262 |
+
"learning_rate": 1.0515610698667753e-05,
|
| 65263 |
+
"loss": 1.061,
|
| 65264 |
+
"step": 9322
|
| 65265 |
+
},
|
| 65266 |
+
{
|
| 65267 |
+
"epoch": 1.8953039235616995,
|
| 65268 |
+
"grad_norm": 0.17385679483413696,
|
| 65269 |
+
"learning_rate": 1.049527102613648e-05,
|
| 65270 |
+
"loss": 1.0001,
|
| 65271 |
+
"step": 9323
|
| 65272 |
+
},
|
| 65273 |
+
{
|
| 65274 |
+
"epoch": 1.895507216914007,
|
| 65275 |
+
"grad_norm": 0.14875008165836334,
|
| 65276 |
+
"learning_rate": 1.0474931353605208e-05,
|
| 65277 |
+
"loss": 0.8962,
|
| 65278 |
+
"step": 9324
|
| 65279 |
+
},
|
| 65280 |
+
{
|
| 65281 |
+
"epoch": 1.8957105102663143,
|
| 65282 |
+
"grad_norm": 0.1576494425535202,
|
| 65283 |
+
"learning_rate": 1.0454591681073936e-05,
|
| 65284 |
+
"loss": 0.9676,
|
| 65285 |
+
"step": 9325
|
| 65286 |
+
},
|
| 65287 |
+
{
|
| 65288 |
+
"epoch": 1.8959138036186216,
|
| 65289 |
+
"grad_norm": 0.17531967163085938,
|
| 65290 |
+
"learning_rate": 1.0434252008542662e-05,
|
| 65291 |
+
"loss": 1.0986,
|
| 65292 |
+
"step": 9326
|
| 65293 |
+
},
|
| 65294 |
+
{
|
| 65295 |
+
"epoch": 1.896117096970929,
|
| 65296 |
+
"grad_norm": 0.17487825453281403,
|
| 65297 |
+
"learning_rate": 1.041391233601139e-05,
|
| 65298 |
+
"loss": 1.004,
|
| 65299 |
+
"step": 9327
|
| 65300 |
+
},
|
| 65301 |
+
{
|
| 65302 |
+
"epoch": 1.8963203903232364,
|
| 65303 |
+
"grad_norm": 0.1605103313922882,
|
| 65304 |
+
"learning_rate": 1.0393572663480118e-05,
|
| 65305 |
+
"loss": 1.0181,
|
| 65306 |
+
"step": 9328
|
| 65307 |
+
},
|
| 65308 |
+
{
|
| 65309 |
+
"epoch": 1.896523683675544,
|
| 65310 |
+
"grad_norm": 0.1699770838022232,
|
| 65311 |
+
"learning_rate": 1.0373232990948846e-05,
|
| 65312 |
+
"loss": 1.0835,
|
| 65313 |
+
"step": 9329
|
| 65314 |
+
},
|
| 65315 |
+
{
|
| 65316 |
+
"epoch": 1.8967269770278512,
|
| 65317 |
+
"grad_norm": 0.15781551599502563,
|
| 65318 |
+
"learning_rate": 1.0352893318417574e-05,
|
| 65319 |
+
"loss": 0.9709,
|
| 65320 |
+
"step": 9330
|
| 65321 |
+
},
|
| 65322 |
+
{
|
| 65323 |
+
"epoch": 1.8969302703801585,
|
| 65324 |
+
"grad_norm": 0.15262846648693085,
|
| 65325 |
+
"learning_rate": 1.0332553645886302e-05,
|
| 65326 |
+
"loss": 1.0025,
|
| 65327 |
+
"step": 9331
|
| 65328 |
+
},
|
| 65329 |
+
{
|
| 65330 |
+
"epoch": 1.897133563732466,
|
| 65331 |
+
"grad_norm": 0.16675053536891937,
|
| 65332 |
+
"learning_rate": 1.031221397335503e-05,
|
| 65333 |
+
"loss": 1.058,
|
| 65334 |
+
"step": 9332
|
| 65335 |
+
},
|
| 65336 |
+
{
|
| 65337 |
+
"epoch": 1.8973368570847733,
|
| 65338 |
+
"grad_norm": 0.15862716734409332,
|
| 65339 |
+
"learning_rate": 1.0291874300823757e-05,
|
| 65340 |
+
"loss": 1.0868,
|
| 65341 |
+
"step": 9333
|
| 65342 |
+
},
|
| 65343 |
+
{
|
| 65344 |
+
"epoch": 1.8975401504370808,
|
| 65345 |
+
"grad_norm": 0.15670160949230194,
|
| 65346 |
+
"learning_rate": 1.0271534628292485e-05,
|
| 65347 |
+
"loss": 0.9464,
|
| 65348 |
+
"step": 9334
|
| 65349 |
+
},
|
| 65350 |
+
{
|
| 65351 |
+
"epoch": 1.897743443789388,
|
| 65352 |
+
"grad_norm": 0.16652365028858185,
|
| 65353 |
+
"learning_rate": 1.0251194955761213e-05,
|
| 65354 |
+
"loss": 1.0129,
|
| 65355 |
+
"step": 9335
|
| 65356 |
+
},
|
| 65357 |
+
{
|
| 65358 |
+
"epoch": 1.8979467371416954,
|
| 65359 |
+
"grad_norm": 0.13543561100959778,
|
| 65360 |
+
"learning_rate": 1.023085528322994e-05,
|
| 65361 |
+
"loss": 0.865,
|
| 65362 |
+
"step": 9336
|
| 65363 |
+
},
|
| 65364 |
+
{
|
| 65365 |
+
"epoch": 1.8981500304940029,
|
| 65366 |
+
"grad_norm": 0.17539720237255096,
|
| 65367 |
+
"learning_rate": 1.0210515610698667e-05,
|
| 65368 |
+
"loss": 1.1737,
|
| 65369 |
+
"step": 9337
|
| 65370 |
+
},
|
| 65371 |
+
{
|
| 65372 |
+
"epoch": 1.8983533238463104,
|
| 65373 |
+
"grad_norm": 0.16995757818222046,
|
| 65374 |
+
"learning_rate": 1.0190175938167395e-05,
|
| 65375 |
+
"loss": 1.0107,
|
| 65376 |
+
"step": 9338
|
| 65377 |
+
},
|
| 65378 |
+
{
|
| 65379 |
+
"epoch": 1.8985566171986177,
|
| 65380 |
+
"grad_norm": 0.16076253354549408,
|
| 65381 |
+
"learning_rate": 1.0169836265636123e-05,
|
| 65382 |
+
"loss": 0.9791,
|
| 65383 |
+
"step": 9339
|
| 65384 |
+
},
|
| 65385 |
+
{
|
| 65386 |
+
"epoch": 1.898759910550925,
|
| 65387 |
+
"grad_norm": 0.15653160214424133,
|
| 65388 |
+
"learning_rate": 1.014949659310485e-05,
|
| 65389 |
+
"loss": 1.0362,
|
| 65390 |
+
"step": 9340
|
| 65391 |
+
},
|
| 65392 |
+
{
|
| 65393 |
+
"epoch": 1.8989632039032323,
|
| 65394 |
+
"grad_norm": 0.17451439797878265,
|
| 65395 |
+
"learning_rate": 1.0129156920573578e-05,
|
| 65396 |
+
"loss": 1.1921,
|
| 65397 |
+
"step": 9341
|
| 65398 |
+
},
|
| 65399 |
+
{
|
| 65400 |
+
"epoch": 1.8991664972555398,
|
| 65401 |
+
"grad_norm": 0.15730206668376923,
|
| 65402 |
+
"learning_rate": 1.0108817248042306e-05,
|
| 65403 |
+
"loss": 1.0043,
|
| 65404 |
+
"step": 9342
|
| 65405 |
+
},
|
| 65406 |
+
{
|
| 65407 |
+
"epoch": 1.8993697906078473,
|
| 65408 |
+
"grad_norm": 0.1547120213508606,
|
| 65409 |
+
"learning_rate": 1.0088477575511034e-05,
|
| 65410 |
+
"loss": 0.9677,
|
| 65411 |
+
"step": 9343
|
| 65412 |
+
},
|
| 65413 |
+
{
|
| 65414 |
+
"epoch": 1.8995730839601546,
|
| 65415 |
+
"grad_norm": 0.14792628586292267,
|
| 65416 |
+
"learning_rate": 1.0068137902979764e-05,
|
| 65417 |
+
"loss": 0.9495,
|
| 65418 |
+
"step": 9344
|
| 65419 |
+
},
|
| 65420 |
+
{
|
| 65421 |
+
"epoch": 1.8997763773124619,
|
| 65422 |
+
"grad_norm": 0.16715767979621887,
|
| 65423 |
+
"learning_rate": 1.0047798230448492e-05,
|
| 65424 |
+
"loss": 1.0871,
|
| 65425 |
+
"step": 9345
|
| 65426 |
+
},
|
| 65427 |
+
{
|
| 65428 |
+
"epoch": 1.8999796706647691,
|
| 65429 |
+
"grad_norm": 0.16810470819473267,
|
| 65430 |
+
"learning_rate": 1.002745855791722e-05,
|
| 65431 |
+
"loss": 0.9926,
|
| 65432 |
+
"step": 9346
|
| 65433 |
+
},
|
| 65434 |
+
{
|
| 65435 |
+
"epoch": 1.9001829640170766,
|
| 65436 |
+
"grad_norm": 0.16060957312583923,
|
| 65437 |
+
"learning_rate": 1.0007118885385946e-05,
|
| 65438 |
+
"loss": 0.9888,
|
| 65439 |
+
"step": 9347
|
| 65440 |
+
},
|
| 65441 |
+
{
|
| 65442 |
+
"epoch": 1.9003862573693842,
|
| 65443 |
+
"grad_norm": 0.16978204250335693,
|
| 65444 |
+
"learning_rate": 9.986779212854673e-06,
|
| 65445 |
+
"loss": 1.1782,
|
| 65446 |
+
"step": 9348
|
| 65447 |
+
},
|
| 65448 |
+
{
|
| 65449 |
+
"epoch": 1.9005895507216914,
|
| 65450 |
+
"grad_norm": 0.1680404543876648,
|
| 65451 |
+
"learning_rate": 9.966439540323401e-06,
|
| 65452 |
+
"loss": 1.0525,
|
| 65453 |
+
"step": 9349
|
| 65454 |
+
},
|
| 65455 |
+
{
|
| 65456 |
+
"epoch": 1.9007928440739987,
|
| 65457 |
+
"grad_norm": 0.1583416610956192,
|
| 65458 |
+
"learning_rate": 9.946099867792129e-06,
|
| 65459 |
+
"loss": 0.9938,
|
| 65460 |
+
"step": 9350
|
| 65461 |
+
},
|
| 65462 |
+
{
|
| 65463 |
+
"epoch": 1.900996137426306,
|
| 65464 |
+
"grad_norm": 0.16274034976959229,
|
| 65465 |
+
"learning_rate": 9.925760195260857e-06,
|
| 65466 |
+
"loss": 1.0139,
|
| 65467 |
+
"step": 9351
|
| 65468 |
+
},
|
| 65469 |
+
{
|
| 65470 |
+
"epoch": 1.9011994307786135,
|
| 65471 |
+
"grad_norm": 0.1580599546432495,
|
| 65472 |
+
"learning_rate": 9.905420522729585e-06,
|
| 65473 |
+
"loss": 0.9478,
|
| 65474 |
+
"step": 9352
|
| 65475 |
+
},
|
| 65476 |
+
{
|
| 65477 |
+
"epoch": 1.901402724130921,
|
| 65478 |
+
"grad_norm": 0.167547807097435,
|
| 65479 |
+
"learning_rate": 9.885080850198313e-06,
|
| 65480 |
+
"loss": 0.9575,
|
| 65481 |
+
"step": 9353
|
| 65482 |
+
},
|
| 65483 |
+
{
|
| 65484 |
+
"epoch": 1.9016060174832283,
|
| 65485 |
+
"grad_norm": 0.17265664041042328,
|
| 65486 |
+
"learning_rate": 9.86474117766704e-06,
|
| 65487 |
+
"loss": 1.2307,
|
| 65488 |
+
"step": 9354
|
| 65489 |
+
},
|
| 65490 |
+
{
|
| 65491 |
+
"epoch": 1.9018093108355356,
|
| 65492 |
+
"grad_norm": 0.15563230216503143,
|
| 65493 |
+
"learning_rate": 9.844401505135768e-06,
|
| 65494 |
+
"loss": 0.8814,
|
| 65495 |
+
"step": 9355
|
| 65496 |
+
},
|
| 65497 |
+
{
|
| 65498 |
+
"epoch": 1.902012604187843,
|
| 65499 |
+
"grad_norm": 0.17064541578292847,
|
| 65500 |
+
"learning_rate": 9.824061832604496e-06,
|
| 65501 |
+
"loss": 1.1182,
|
| 65502 |
+
"step": 9356
|
| 65503 |
+
},
|
| 65504 |
+
{
|
| 65505 |
+
"epoch": 1.9022158975401504,
|
| 65506 |
+
"grad_norm": 0.14311741292476654,
|
| 65507 |
+
"learning_rate": 9.803722160073222e-06,
|
| 65508 |
+
"loss": 0.8657,
|
| 65509 |
+
"step": 9357
|
| 65510 |
+
},
|
| 65511 |
+
{
|
| 65512 |
+
"epoch": 1.902419190892458,
|
| 65513 |
+
"grad_norm": 0.17543523013591766,
|
| 65514 |
+
"learning_rate": 9.78338248754195e-06,
|
| 65515 |
+
"loss": 1.0129,
|
| 65516 |
+
"step": 9358
|
| 65517 |
+
},
|
| 65518 |
+
{
|
| 65519 |
+
"epoch": 1.9026224842447652,
|
| 65520 |
+
"grad_norm": 0.1519622951745987,
|
| 65521 |
+
"learning_rate": 9.763042815010678e-06,
|
| 65522 |
+
"loss": 0.8612,
|
| 65523 |
+
"step": 9359
|
| 65524 |
+
},
|
| 65525 |
+
{
|
| 65526 |
+
"epoch": 1.9028257775970725,
|
| 65527 |
+
"grad_norm": 0.1456150859594345,
|
| 65528 |
+
"learning_rate": 9.742703142479406e-06,
|
| 65529 |
+
"loss": 0.9171,
|
| 65530 |
+
"step": 9360
|
| 65531 |
+
},
|
| 65532 |
+
{
|
| 65533 |
+
"epoch": 1.90302907094938,
|
| 65534 |
+
"grad_norm": 0.1435515433549881,
|
| 65535 |
+
"learning_rate": 9.722363469948134e-06,
|
| 65536 |
+
"loss": 0.9186,
|
| 65537 |
+
"step": 9361
|
| 65538 |
+
},
|
| 65539 |
+
{
|
| 65540 |
+
"epoch": 1.9032323643016873,
|
| 65541 |
+
"grad_norm": 0.14047978818416595,
|
| 65542 |
+
"learning_rate": 9.702023797416862e-06,
|
| 65543 |
+
"loss": 1.0005,
|
| 65544 |
+
"step": 9362
|
| 65545 |
+
},
|
| 65546 |
+
{
|
| 65547 |
+
"epoch": 1.9034356576539948,
|
| 65548 |
+
"grad_norm": 0.13906230032444,
|
| 65549 |
+
"learning_rate": 9.68168412488559e-06,
|
| 65550 |
+
"loss": 0.9253,
|
| 65551 |
+
"step": 9363
|
| 65552 |
+
},
|
| 65553 |
+
{
|
| 65554 |
+
"epoch": 1.903638951006302,
|
| 65555 |
+
"grad_norm": 0.1536070853471756,
|
| 65556 |
+
"learning_rate": 9.661344452354317e-06,
|
| 65557 |
+
"loss": 0.9884,
|
| 65558 |
+
"step": 9364
|
| 65559 |
+
},
|
| 65560 |
+
{
|
| 65561 |
+
"epoch": 1.9038422443586094,
|
| 65562 |
+
"grad_norm": 0.16738420724868774,
|
| 65563 |
+
"learning_rate": 9.641004779823045e-06,
|
| 65564 |
+
"loss": 1.0671,
|
| 65565 |
+
"step": 9365
|
| 65566 |
+
},
|
| 65567 |
+
{
|
| 65568 |
+
"epoch": 1.904045537710917,
|
| 65569 |
+
"grad_norm": 0.14910069108009338,
|
| 65570 |
+
"learning_rate": 9.620665107291773e-06,
|
| 65571 |
+
"loss": 1.0673,
|
| 65572 |
+
"step": 9366
|
| 65573 |
+
},
|
| 65574 |
+
{
|
| 65575 |
+
"epoch": 1.9042488310632242,
|
| 65576 |
+
"grad_norm": 0.17034853994846344,
|
| 65577 |
+
"learning_rate": 9.600325434760501e-06,
|
| 65578 |
+
"loss": 0.9899,
|
| 65579 |
+
"step": 9367
|
| 65580 |
+
},
|
| 65581 |
+
{
|
| 65582 |
+
"epoch": 1.9044521244155317,
|
| 65583 |
+
"grad_norm": 0.14666365087032318,
|
| 65584 |
+
"learning_rate": 9.579985762229229e-06,
|
| 65585 |
+
"loss": 0.9937,
|
| 65586 |
+
"step": 9368
|
| 65587 |
+
},
|
| 65588 |
+
{
|
| 65589 |
+
"epoch": 1.904655417767839,
|
| 65590 |
+
"grad_norm": 0.18429192900657654,
|
| 65591 |
+
"learning_rate": 9.559646089697957e-06,
|
| 65592 |
+
"loss": 0.9585,
|
| 65593 |
+
"step": 9369
|
| 65594 |
+
},
|
| 65595 |
+
{
|
| 65596 |
+
"epoch": 1.9048587111201463,
|
| 65597 |
+
"grad_norm": 0.1640387326478958,
|
| 65598 |
+
"learning_rate": 9.539306417166684e-06,
|
| 65599 |
+
"loss": 0.8624,
|
| 65600 |
+
"step": 9370
|
| 65601 |
+
},
|
| 65602 |
+
{
|
| 65603 |
+
"epoch": 1.9050620044724538,
|
| 65604 |
+
"grad_norm": 0.17350825667381287,
|
| 65605 |
+
"learning_rate": 9.518966744635412e-06,
|
| 65606 |
+
"loss": 1.1242,
|
| 65607 |
+
"step": 9371
|
| 65608 |
+
},
|
| 65609 |
+
{
|
| 65610 |
+
"epoch": 1.9052652978247613,
|
| 65611 |
+
"grad_norm": 0.1487387865781784,
|
| 65612 |
+
"learning_rate": 9.49862707210414e-06,
|
| 65613 |
+
"loss": 0.8233,
|
| 65614 |
+
"step": 9372
|
| 65615 |
+
},
|
| 65616 |
+
{
|
| 65617 |
+
"epoch": 1.9054685911770686,
|
| 65618 |
+
"grad_norm": 0.16694356501102448,
|
| 65619 |
+
"learning_rate": 9.478287399572868e-06,
|
| 65620 |
+
"loss": 0.9576,
|
| 65621 |
+
"step": 9373
|
| 65622 |
+
},
|
| 65623 |
+
{
|
| 65624 |
+
"epoch": 1.9056718845293759,
|
| 65625 |
+
"grad_norm": 0.16773739457130432,
|
| 65626 |
+
"learning_rate": 9.457947727041596e-06,
|
| 65627 |
+
"loss": 1.1013,
|
| 65628 |
+
"step": 9374
|
| 65629 |
+
},
|
| 65630 |
+
{
|
| 65631 |
+
"epoch": 1.9058751778816831,
|
| 65632 |
+
"grad_norm": 0.15956096351146698,
|
| 65633 |
+
"learning_rate": 9.437608054510324e-06,
|
| 65634 |
+
"loss": 0.9642,
|
| 65635 |
+
"step": 9375
|
| 65636 |
+
},
|
| 65637 |
+
{
|
| 65638 |
+
"epoch": 1.9060784712339907,
|
| 65639 |
+
"grad_norm": 0.1490715593099594,
|
| 65640 |
+
"learning_rate": 9.417268381979051e-06,
|
| 65641 |
+
"loss": 0.8804,
|
| 65642 |
+
"step": 9376
|
| 65643 |
+
},
|
| 65644 |
+
{
|
| 65645 |
+
"epoch": 1.9062817645862982,
|
| 65646 |
+
"grad_norm": 0.18091818690299988,
|
| 65647 |
+
"learning_rate": 9.39692870944778e-06,
|
| 65648 |
+
"loss": 1.2239,
|
| 65649 |
+
"step": 9377
|
| 65650 |
+
},
|
| 65651 |
+
{
|
| 65652 |
+
"epoch": 1.9064850579386055,
|
| 65653 |
+
"grad_norm": 0.15678752958774567,
|
| 65654 |
+
"learning_rate": 9.376589036916506e-06,
|
| 65655 |
+
"loss": 0.9122,
|
| 65656 |
+
"step": 9378
|
| 65657 |
+
},
|
| 65658 |
+
{
|
| 65659 |
+
"epoch": 1.9066883512909127,
|
| 65660 |
+
"grad_norm": 0.18459545075893402,
|
| 65661 |
+
"learning_rate": 9.356249364385233e-06,
|
| 65662 |
+
"loss": 1.252,
|
| 65663 |
+
"step": 9379
|
| 65664 |
+
},
|
| 65665 |
+
{
|
| 65666 |
+
"epoch": 1.90689164464322,
|
| 65667 |
+
"grad_norm": 0.1801295429468155,
|
| 65668 |
+
"learning_rate": 9.335909691853961e-06,
|
| 65669 |
+
"loss": 1.2248,
|
| 65670 |
+
"step": 9380
|
| 65671 |
+
},
|
| 65672 |
+
{
|
| 65673 |
+
"epoch": 1.9070949379955275,
|
| 65674 |
+
"grad_norm": 0.16494908928871155,
|
| 65675 |
+
"learning_rate": 9.315570019322689e-06,
|
| 65676 |
+
"loss": 1.0764,
|
| 65677 |
+
"step": 9381
|
| 65678 |
+
},
|
| 65679 |
+
{
|
| 65680 |
+
"epoch": 1.907298231347835,
|
| 65681 |
+
"grad_norm": 0.1461213231086731,
|
| 65682 |
+
"learning_rate": 9.295230346791417e-06,
|
| 65683 |
+
"loss": 0.9669,
|
| 65684 |
+
"step": 9382
|
| 65685 |
+
},
|
| 65686 |
+
{
|
| 65687 |
+
"epoch": 1.9075015247001423,
|
| 65688 |
+
"grad_norm": 0.1623806357383728,
|
| 65689 |
+
"learning_rate": 9.274890674260145e-06,
|
| 65690 |
+
"loss": 1.044,
|
| 65691 |
+
"step": 9383
|
| 65692 |
+
},
|
| 65693 |
+
{
|
| 65694 |
+
"epoch": 1.9077048180524496,
|
| 65695 |
+
"grad_norm": 0.1630796492099762,
|
| 65696 |
+
"learning_rate": 9.254551001728873e-06,
|
| 65697 |
+
"loss": 1.1293,
|
| 65698 |
+
"step": 9384
|
| 65699 |
+
},
|
| 65700 |
+
{
|
| 65701 |
+
"epoch": 1.907908111404757,
|
| 65702 |
+
"grad_norm": 0.15207818150520325,
|
| 65703 |
+
"learning_rate": 9.2342113291976e-06,
|
| 65704 |
+
"loss": 1.0695,
|
| 65705 |
+
"step": 9385
|
| 65706 |
+
},
|
| 65707 |
+
{
|
| 65708 |
+
"epoch": 1.9081114047570644,
|
| 65709 |
+
"grad_norm": 0.18577249348163605,
|
| 65710 |
+
"learning_rate": 9.213871656666328e-06,
|
| 65711 |
+
"loss": 1.286,
|
| 65712 |
+
"step": 9386
|
| 65713 |
+
},
|
| 65714 |
+
{
|
| 65715 |
+
"epoch": 1.908314698109372,
|
| 65716 |
+
"grad_norm": 0.1684713214635849,
|
| 65717 |
+
"learning_rate": 9.193531984135056e-06,
|
| 65718 |
+
"loss": 1.0222,
|
| 65719 |
+
"step": 9387
|
| 65720 |
+
},
|
| 65721 |
+
{
|
| 65722 |
+
"epoch": 1.9085179914616792,
|
| 65723 |
+
"grad_norm": 0.1702156364917755,
|
| 65724 |
+
"learning_rate": 9.173192311603782e-06,
|
| 65725 |
+
"loss": 1.0436,
|
| 65726 |
+
"step": 9388
|
| 65727 |
+
},
|
| 65728 |
+
{
|
| 65729 |
+
"epoch": 1.9087212848139865,
|
| 65730 |
+
"grad_norm": 0.16724956035614014,
|
| 65731 |
+
"learning_rate": 9.15285263907251e-06,
|
| 65732 |
+
"loss": 1.0927,
|
| 65733 |
+
"step": 9389
|
| 65734 |
+
},
|
| 65735 |
+
{
|
| 65736 |
+
"epoch": 1.908924578166294,
|
| 65737 |
+
"grad_norm": 0.15122951567173004,
|
| 65738 |
+
"learning_rate": 9.132512966541238e-06,
|
| 65739 |
+
"loss": 0.9047,
|
| 65740 |
+
"step": 9390
|
| 65741 |
+
},
|
| 65742 |
+
{
|
| 65743 |
+
"epoch": 1.9091278715186013,
|
| 65744 |
+
"grad_norm": 0.159726083278656,
|
| 65745 |
+
"learning_rate": 9.112173294009966e-06,
|
| 65746 |
+
"loss": 0.9561,
|
| 65747 |
+
"step": 9391
|
| 65748 |
+
},
|
| 65749 |
+
{
|
| 65750 |
+
"epoch": 1.9093311648709088,
|
| 65751 |
+
"grad_norm": 0.16831693053245544,
|
| 65752 |
+
"learning_rate": 9.091833621478694e-06,
|
| 65753 |
+
"loss": 1.0801,
|
| 65754 |
+
"step": 9392
|
| 65755 |
+
},
|
| 65756 |
+
{
|
| 65757 |
+
"epoch": 1.909534458223216,
|
| 65758 |
+
"grad_norm": 0.1692102700471878,
|
| 65759 |
+
"learning_rate": 9.071493948947422e-06,
|
| 65760 |
+
"loss": 1.1376,
|
| 65761 |
+
"step": 9393
|
| 65762 |
+
},
|
| 65763 |
+
{
|
| 65764 |
+
"epoch": 1.9097377515755234,
|
| 65765 |
+
"grad_norm": 0.17488181591033936,
|
| 65766 |
+
"learning_rate": 9.051154276416151e-06,
|
| 65767 |
+
"loss": 1.1418,
|
| 65768 |
+
"step": 9394
|
| 65769 |
+
},
|
| 65770 |
+
{
|
| 65771 |
+
"epoch": 1.909941044927831,
|
| 65772 |
+
"grad_norm": 0.16990408301353455,
|
| 65773 |
+
"learning_rate": 9.030814603884879e-06,
|
| 65774 |
+
"loss": 1.1285,
|
| 65775 |
+
"step": 9395
|
| 65776 |
+
},
|
| 65777 |
+
{
|
| 65778 |
+
"epoch": 1.9101443382801382,
|
| 65779 |
+
"grad_norm": 0.16019228100776672,
|
| 65780 |
+
"learning_rate": 9.010474931353607e-06,
|
| 65781 |
+
"loss": 1.0915,
|
| 65782 |
+
"step": 9396
|
| 65783 |
+
},
|
| 65784 |
+
{
|
| 65785 |
+
"epoch": 1.9103476316324457,
|
| 65786 |
+
"grad_norm": 0.18147097527980804,
|
| 65787 |
+
"learning_rate": 8.990135258822335e-06,
|
| 65788 |
+
"loss": 1.2002,
|
| 65789 |
+
"step": 9397
|
| 65790 |
+
},
|
| 65791 |
+
{
|
| 65792 |
+
"epoch": 1.910550924984753,
|
| 65793 |
+
"grad_norm": 0.16949224472045898,
|
| 65794 |
+
"learning_rate": 8.969795586291062e-06,
|
| 65795 |
+
"loss": 1.0624,
|
| 65796 |
+
"step": 9398
|
| 65797 |
+
},
|
| 65798 |
+
{
|
| 65799 |
+
"epoch": 1.9107542183370603,
|
| 65800 |
+
"grad_norm": 0.17006736993789673,
|
| 65801 |
+
"learning_rate": 8.949455913759789e-06,
|
| 65802 |
+
"loss": 1.2127,
|
| 65803 |
+
"step": 9399
|
| 65804 |
+
},
|
| 65805 |
+
{
|
| 65806 |
+
"epoch": 1.9109575116893678,
|
| 65807 |
+
"grad_norm": 0.16276562213897705,
|
| 65808 |
+
"learning_rate": 8.929116241228516e-06,
|
| 65809 |
+
"loss": 0.9774,
|
| 65810 |
+
"step": 9400
|
| 65811 |
}
|
| 65812 |
],
|
| 65813 |
"logging_steps": 1,
|
|
|
|
| 65827 |
"attributes": {}
|
| 65828 |
}
|
| 65829 |
},
|
| 65830 |
+
"total_flos": 5.263015631335834e+18,
|
| 65831 |
"train_batch_size": 8,
|
| 65832 |
"trial_name": null,
|
| 65833 |
"trial_params": null
|