Upload checkpoint 5000
Browse files- README.md +3 -3
- adapter_model.safetensors +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 3B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -38,11 +38,11 @@ library_name: peft
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
-
<div style="height: 30px; width:
|
| 42 |
<!-- 3.75% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 3B (Step 5000 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
+
<div style="height: 30px; width: 50.82%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 42 |
<!-- 3.75% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 5000 out of 9838 steps</p>
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 119801528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca8115148a13abf4d6296ac258a6403048e57695e02f379de7637b3c2102d4f0
|
| 3 |
size 119801528
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 61392692
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2259d4339b6b613899a205d51304b186acc86b02e966dbec354e705f1fb840ba
|
| 3 |
size 61392692
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4ae31f3bd6abd5e088309ad57fa2e995bc6dd61c02221bc158a3d63e6ad1f06
|
| 3 |
size 14244
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc0f4be7d4d8334212cb48351775916c4614df567ed15f716c0301ee17f90467
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -34308,6 +34308,706 @@
|
|
| 34308 |
"learning_rate": 0.00010045764263195364,
|
| 34309 |
"loss": 1.0605,
|
| 34310 |
"step": 4900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34311 |
}
|
| 34312 |
],
|
| 34313 |
"logging_steps": 1,
|
|
@@ -34327,7 +35027,7 @@
|
|
| 34327 |
"attributes": {}
|
| 34328 |
}
|
| 34329 |
},
|
| 34330 |
-
"total_flos": 2.
|
| 34331 |
"train_batch_size": 8,
|
| 34332 |
"trial_name": null,
|
| 34333 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0164667615368979,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 5000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 34308 |
"learning_rate": 0.00010045764263195364,
|
| 34309 |
"loss": 1.0605,
|
| 34310 |
"step": 4900
|
| 34311 |
+
},
|
| 34312 |
+
{
|
| 34313 |
+
"epoch": 0.9963407196584672,
|
| 34314 |
+
"grad_norm": 0.1430116593837738,
|
| 34315 |
+
"learning_rate": 0.00010043730295942235,
|
| 34316 |
+
"loss": 1.107,
|
| 34317 |
+
"step": 4901
|
| 34318 |
+
},
|
| 34319 |
+
{
|
| 34320 |
+
"epoch": 0.9965440130107746,
|
| 34321 |
+
"grad_norm": 0.11865589022636414,
|
| 34322 |
+
"learning_rate": 0.00010041696328689107,
|
| 34323 |
+
"loss": 0.8887,
|
| 34324 |
+
"step": 4902
|
| 34325 |
+
},
|
| 34326 |
+
{
|
| 34327 |
+
"epoch": 0.996747306363082,
|
| 34328 |
+
"grad_norm": 0.11495467275381088,
|
| 34329 |
+
"learning_rate": 0.00010039662361435982,
|
| 34330 |
+
"loss": 0.8365,
|
| 34331 |
+
"step": 4903
|
| 34332 |
+
},
|
| 34333 |
+
{
|
| 34334 |
+
"epoch": 0.9969505997153894,
|
| 34335 |
+
"grad_norm": 0.1354401409626007,
|
| 34336 |
+
"learning_rate": 0.00010037628394182855,
|
| 34337 |
+
"loss": 1.1705,
|
| 34338 |
+
"step": 4904
|
| 34339 |
+
},
|
| 34340 |
+
{
|
| 34341 |
+
"epoch": 0.9971538930676966,
|
| 34342 |
+
"grad_norm": 0.13998205959796906,
|
| 34343 |
+
"learning_rate": 0.00010035594426929726,
|
| 34344 |
+
"loss": 1.0365,
|
| 34345 |
+
"step": 4905
|
| 34346 |
+
},
|
| 34347 |
+
{
|
| 34348 |
+
"epoch": 0.997357186420004,
|
| 34349 |
+
"grad_norm": 0.15044035017490387,
|
| 34350 |
+
"learning_rate": 0.00010033560459676598,
|
| 34351 |
+
"loss": 1.1061,
|
| 34352 |
+
"step": 4906
|
| 34353 |
+
},
|
| 34354 |
+
{
|
| 34355 |
+
"epoch": 0.9975604797723114,
|
| 34356 |
+
"grad_norm": 0.1416459083557129,
|
| 34357 |
+
"learning_rate": 0.00010031526492423473,
|
| 34358 |
+
"loss": 1.1155,
|
| 34359 |
+
"step": 4907
|
| 34360 |
+
},
|
| 34361 |
+
{
|
| 34362 |
+
"epoch": 0.9977637731246188,
|
| 34363 |
+
"grad_norm": 0.13485343754291534,
|
| 34364 |
+
"learning_rate": 0.00010029492525170346,
|
| 34365 |
+
"loss": 0.9937,
|
| 34366 |
+
"step": 4908
|
| 34367 |
+
},
|
| 34368 |
+
{
|
| 34369 |
+
"epoch": 0.9979670664769262,
|
| 34370 |
+
"grad_norm": 0.14948885142803192,
|
| 34371 |
+
"learning_rate": 0.00010027458557917217,
|
| 34372 |
+
"loss": 1.1689,
|
| 34373 |
+
"step": 4909
|
| 34374 |
+
},
|
| 34375 |
+
{
|
| 34376 |
+
"epoch": 0.9981703598292336,
|
| 34377 |
+
"grad_norm": 0.1309768706560135,
|
| 34378 |
+
"learning_rate": 0.0001002542459066409,
|
| 34379 |
+
"loss": 0.9428,
|
| 34380 |
+
"step": 4910
|
| 34381 |
+
},
|
| 34382 |
+
{
|
| 34383 |
+
"epoch": 0.9983736531815409,
|
| 34384 |
+
"grad_norm": 0.11928943544626236,
|
| 34385 |
+
"learning_rate": 0.00010023390623410965,
|
| 34386 |
+
"loss": 0.8238,
|
| 34387 |
+
"step": 4911
|
| 34388 |
+
},
|
| 34389 |
+
{
|
| 34390 |
+
"epoch": 0.9985769465338483,
|
| 34391 |
+
"grad_norm": 0.1389857530593872,
|
| 34392 |
+
"learning_rate": 0.00010021356656157837,
|
| 34393 |
+
"loss": 1.0459,
|
| 34394 |
+
"step": 4912
|
| 34395 |
+
},
|
| 34396 |
+
{
|
| 34397 |
+
"epoch": 0.9987802398861557,
|
| 34398 |
+
"grad_norm": 0.14047744870185852,
|
| 34399 |
+
"learning_rate": 0.00010019322688904708,
|
| 34400 |
+
"loss": 0.9594,
|
| 34401 |
+
"step": 4913
|
| 34402 |
+
},
|
| 34403 |
+
{
|
| 34404 |
+
"epoch": 0.9989835332384631,
|
| 34405 |
+
"grad_norm": 0.1307019144296646,
|
| 34406 |
+
"learning_rate": 0.0001001728872165158,
|
| 34407 |
+
"loss": 1.1549,
|
| 34408 |
+
"step": 4914
|
| 34409 |
+
},
|
| 34410 |
+
{
|
| 34411 |
+
"epoch": 0.9991868265907705,
|
| 34412 |
+
"grad_norm": 0.13652239739894867,
|
| 34413 |
+
"learning_rate": 0.00010015254754398456,
|
| 34414 |
+
"loss": 1.142,
|
| 34415 |
+
"step": 4915
|
| 34416 |
+
},
|
| 34417 |
+
{
|
| 34418 |
+
"epoch": 0.9993901199430779,
|
| 34419 |
+
"grad_norm": 0.1404002457857132,
|
| 34420 |
+
"learning_rate": 0.00010013220787145328,
|
| 34421 |
+
"loss": 1.0275,
|
| 34422 |
+
"step": 4916
|
| 34423 |
+
},
|
| 34424 |
+
{
|
| 34425 |
+
"epoch": 0.9995934132953852,
|
| 34426 |
+
"grad_norm": 0.14137892425060272,
|
| 34427 |
+
"learning_rate": 0.00010011186819892199,
|
| 34428 |
+
"loss": 1.1169,
|
| 34429 |
+
"step": 4917
|
| 34430 |
+
},
|
| 34431 |
+
{
|
| 34432 |
+
"epoch": 0.9997967066476926,
|
| 34433 |
+
"grad_norm": 0.12362517416477203,
|
| 34434 |
+
"learning_rate": 0.00010009152852639072,
|
| 34435 |
+
"loss": 0.9733,
|
| 34436 |
+
"step": 4918
|
| 34437 |
+
},
|
| 34438 |
+
{
|
| 34439 |
+
"epoch": 1.0,
|
| 34440 |
+
"grad_norm": 0.16257604956626892,
|
| 34441 |
+
"learning_rate": 0.00010007118885385947,
|
| 34442 |
+
"loss": 1.214,
|
| 34443 |
+
"step": 4919
|
| 34444 |
+
},
|
| 34445 |
+
{
|
| 34446 |
+
"epoch": 1.0002032933523073,
|
| 34447 |
+
"grad_norm": 0.13455824553966522,
|
| 34448 |
+
"learning_rate": 0.0001000508491813282,
|
| 34449 |
+
"loss": 1.1717,
|
| 34450 |
+
"step": 4920
|
| 34451 |
+
},
|
| 34452 |
+
{
|
| 34453 |
+
"epoch": 1.0004065867046148,
|
| 34454 |
+
"grad_norm": 0.1244397908449173,
|
| 34455 |
+
"learning_rate": 0.0001000305095087969,
|
| 34456 |
+
"loss": 0.9873,
|
| 34457 |
+
"step": 4921
|
| 34458 |
+
},
|
| 34459 |
+
{
|
| 34460 |
+
"epoch": 1.000609880056922,
|
| 34461 |
+
"grad_norm": 0.13148358464241028,
|
| 34462 |
+
"learning_rate": 0.00010001016983626563,
|
| 34463 |
+
"loss": 1.0512,
|
| 34464 |
+
"step": 4922
|
| 34465 |
+
},
|
| 34466 |
+
{
|
| 34467 |
+
"epoch": 1.0008131734092296,
|
| 34468 |
+
"grad_norm": 0.14207464456558228,
|
| 34469 |
+
"learning_rate": 9.998983016373437e-05,
|
| 34470 |
+
"loss": 1.1071,
|
| 34471 |
+
"step": 4923
|
| 34472 |
+
},
|
| 34473 |
+
{
|
| 34474 |
+
"epoch": 1.0010164667615369,
|
| 34475 |
+
"grad_norm": 0.1350506693124771,
|
| 34476 |
+
"learning_rate": 9.99694904912031e-05,
|
| 34477 |
+
"loss": 1.1134,
|
| 34478 |
+
"step": 4924
|
| 34479 |
+
},
|
| 34480 |
+
{
|
| 34481 |
+
"epoch": 1.0012197601138442,
|
| 34482 |
+
"grad_norm": 0.14575833082199097,
|
| 34483 |
+
"learning_rate": 9.994915081867182e-05,
|
| 34484 |
+
"loss": 1.0793,
|
| 34485 |
+
"step": 4925
|
| 34486 |
+
},
|
| 34487 |
+
{
|
| 34488 |
+
"epoch": 1.0014230534661517,
|
| 34489 |
+
"grad_norm": 0.13254649937152863,
|
| 34490 |
+
"learning_rate": 9.992881114614055e-05,
|
| 34491 |
+
"loss": 0.9843,
|
| 34492 |
+
"step": 4926
|
| 34493 |
+
},
|
| 34494 |
+
{
|
| 34495 |
+
"epoch": 1.001626346818459,
|
| 34496 |
+
"grad_norm": 0.13385853171348572,
|
| 34497 |
+
"learning_rate": 9.990847147360928e-05,
|
| 34498 |
+
"loss": 1.0446,
|
| 34499 |
+
"step": 4927
|
| 34500 |
+
},
|
| 34501 |
+
{
|
| 34502 |
+
"epoch": 1.0018296401707665,
|
| 34503 |
+
"grad_norm": 0.13908478617668152,
|
| 34504 |
+
"learning_rate": 9.988813180107802e-05,
|
| 34505 |
+
"loss": 0.9968,
|
| 34506 |
+
"step": 4928
|
| 34507 |
+
},
|
| 34508 |
+
{
|
| 34509 |
+
"epoch": 1.0020329335230738,
|
| 34510 |
+
"grad_norm": 0.13923251628875732,
|
| 34511 |
+
"learning_rate": 9.986779212854673e-05,
|
| 34512 |
+
"loss": 1.0023,
|
| 34513 |
+
"step": 4929
|
| 34514 |
+
},
|
| 34515 |
+
{
|
| 34516 |
+
"epoch": 1.0022362268753813,
|
| 34517 |
+
"grad_norm": 0.1373911201953888,
|
| 34518 |
+
"learning_rate": 9.984745245601547e-05,
|
| 34519 |
+
"loss": 1.1753,
|
| 34520 |
+
"step": 4930
|
| 34521 |
+
},
|
| 34522 |
+
{
|
| 34523 |
+
"epoch": 1.0024395202276886,
|
| 34524 |
+
"grad_norm": 0.13491371273994446,
|
| 34525 |
+
"learning_rate": 9.982711278348419e-05,
|
| 34526 |
+
"loss": 0.893,
|
| 34527 |
+
"step": 4931
|
| 34528 |
+
},
|
| 34529 |
+
{
|
| 34530 |
+
"epoch": 1.0026428135799959,
|
| 34531 |
+
"grad_norm": 0.12279137223958969,
|
| 34532 |
+
"learning_rate": 9.980677311095293e-05,
|
| 34533 |
+
"loss": 0.8334,
|
| 34534 |
+
"step": 4932
|
| 34535 |
+
},
|
| 34536 |
+
{
|
| 34537 |
+
"epoch": 1.0028461069323034,
|
| 34538 |
+
"grad_norm": 0.1489049643278122,
|
| 34539 |
+
"learning_rate": 9.978643343842164e-05,
|
| 34540 |
+
"loss": 1.2196,
|
| 34541 |
+
"step": 4933
|
| 34542 |
+
},
|
| 34543 |
+
{
|
| 34544 |
+
"epoch": 1.0030494002846106,
|
| 34545 |
+
"grad_norm": 0.15800416469573975,
|
| 34546 |
+
"learning_rate": 9.976609376589038e-05,
|
| 34547 |
+
"loss": 1.1065,
|
| 34548 |
+
"step": 4934
|
| 34549 |
+
},
|
| 34550 |
+
{
|
| 34551 |
+
"epoch": 1.0032526936369182,
|
| 34552 |
+
"grad_norm": 0.12695717811584473,
|
| 34553 |
+
"learning_rate": 9.97457540933591e-05,
|
| 34554 |
+
"loss": 0.8969,
|
| 34555 |
+
"step": 4935
|
| 34556 |
+
},
|
| 34557 |
+
{
|
| 34558 |
+
"epoch": 1.0034559869892254,
|
| 34559 |
+
"grad_norm": 0.12970462441444397,
|
| 34560 |
+
"learning_rate": 9.972541442082784e-05,
|
| 34561 |
+
"loss": 0.9748,
|
| 34562 |
+
"step": 4936
|
| 34563 |
+
},
|
| 34564 |
+
{
|
| 34565 |
+
"epoch": 1.0036592803415327,
|
| 34566 |
+
"grad_norm": 0.13583384454250336,
|
| 34567 |
+
"learning_rate": 9.970507474829655e-05,
|
| 34568 |
+
"loss": 0.9943,
|
| 34569 |
+
"step": 4937
|
| 34570 |
+
},
|
| 34571 |
+
{
|
| 34572 |
+
"epoch": 1.0038625736938402,
|
| 34573 |
+
"grad_norm": 0.13171210885047913,
|
| 34574 |
+
"learning_rate": 9.968473507576529e-05,
|
| 34575 |
+
"loss": 1.0066,
|
| 34576 |
+
"step": 4938
|
| 34577 |
+
},
|
| 34578 |
+
{
|
| 34579 |
+
"epoch": 1.0040658670461475,
|
| 34580 |
+
"grad_norm": 0.140077605843544,
|
| 34581 |
+
"learning_rate": 9.966439540323401e-05,
|
| 34582 |
+
"loss": 1.0276,
|
| 34583 |
+
"step": 4939
|
| 34584 |
+
},
|
| 34585 |
+
{
|
| 34586 |
+
"epoch": 1.004269160398455,
|
| 34587 |
+
"grad_norm": 0.13248348236083984,
|
| 34588 |
+
"learning_rate": 9.964405573070275e-05,
|
| 34589 |
+
"loss": 0.9836,
|
| 34590 |
+
"step": 4940
|
| 34591 |
+
},
|
| 34592 |
+
{
|
| 34593 |
+
"epoch": 1.0044724537507623,
|
| 34594 |
+
"grad_norm": 0.1502828449010849,
|
| 34595 |
+
"learning_rate": 9.962371605817146e-05,
|
| 34596 |
+
"loss": 1.175,
|
| 34597 |
+
"step": 4941
|
| 34598 |
+
},
|
| 34599 |
+
{
|
| 34600 |
+
"epoch": 1.0046757471030698,
|
| 34601 |
+
"grad_norm": 0.14695493876934052,
|
| 34602 |
+
"learning_rate": 9.96033763856402e-05,
|
| 34603 |
+
"loss": 0.963,
|
| 34604 |
+
"step": 4942
|
| 34605 |
+
},
|
| 34606 |
+
{
|
| 34607 |
+
"epoch": 1.0048790404553771,
|
| 34608 |
+
"grad_norm": 0.14214938879013062,
|
| 34609 |
+
"learning_rate": 9.958303671310892e-05,
|
| 34610 |
+
"loss": 1.0651,
|
| 34611 |
+
"step": 4943
|
| 34612 |
+
},
|
| 34613 |
+
{
|
| 34614 |
+
"epoch": 1.0050823338076844,
|
| 34615 |
+
"grad_norm": 0.14761728048324585,
|
| 34616 |
+
"learning_rate": 9.956269704057765e-05,
|
| 34617 |
+
"loss": 0.9907,
|
| 34618 |
+
"step": 4944
|
| 34619 |
+
},
|
| 34620 |
+
{
|
| 34621 |
+
"epoch": 1.005285627159992,
|
| 34622 |
+
"grad_norm": 0.13151785731315613,
|
| 34623 |
+
"learning_rate": 9.954235736804637e-05,
|
| 34624 |
+
"loss": 0.8793,
|
| 34625 |
+
"step": 4945
|
| 34626 |
+
},
|
| 34627 |
+
{
|
| 34628 |
+
"epoch": 1.0054889205122992,
|
| 34629 |
+
"grad_norm": 0.1452670693397522,
|
| 34630 |
+
"learning_rate": 9.95220176955151e-05,
|
| 34631 |
+
"loss": 1.0906,
|
| 34632 |
+
"step": 4946
|
| 34633 |
+
},
|
| 34634 |
+
{
|
| 34635 |
+
"epoch": 1.0056922138646067,
|
| 34636 |
+
"grad_norm": 0.13930079340934753,
|
| 34637 |
+
"learning_rate": 9.950167802298384e-05,
|
| 34638 |
+
"loss": 0.9598,
|
| 34639 |
+
"step": 4947
|
| 34640 |
+
},
|
| 34641 |
+
{
|
| 34642 |
+
"epoch": 1.005895507216914,
|
| 34643 |
+
"grad_norm": 0.12317246198654175,
|
| 34644 |
+
"learning_rate": 9.948133835045256e-05,
|
| 34645 |
+
"loss": 0.9429,
|
| 34646 |
+
"step": 4948
|
| 34647 |
+
},
|
| 34648 |
+
{
|
| 34649 |
+
"epoch": 1.0060988005692213,
|
| 34650 |
+
"grad_norm": 0.13415516912937164,
|
| 34651 |
+
"learning_rate": 9.946099867792128e-05,
|
| 34652 |
+
"loss": 1.0848,
|
| 34653 |
+
"step": 4949
|
| 34654 |
+
},
|
| 34655 |
+
{
|
| 34656 |
+
"epoch": 1.0063020939215288,
|
| 34657 |
+
"grad_norm": 0.13976556062698364,
|
| 34658 |
+
"learning_rate": 9.944065900539001e-05,
|
| 34659 |
+
"loss": 0.934,
|
| 34660 |
+
"step": 4950
|
| 34661 |
+
},
|
| 34662 |
+
{
|
| 34663 |
+
"epoch": 1.006505387273836,
|
| 34664 |
+
"grad_norm": 0.13384398818016052,
|
| 34665 |
+
"learning_rate": 9.942031933285875e-05,
|
| 34666 |
+
"loss": 0.955,
|
| 34667 |
+
"step": 4951
|
| 34668 |
+
},
|
| 34669 |
+
{
|
| 34670 |
+
"epoch": 1.0067086806261436,
|
| 34671 |
+
"grad_norm": 0.14308519661426544,
|
| 34672 |
+
"learning_rate": 9.939997966032747e-05,
|
| 34673 |
+
"loss": 0.9543,
|
| 34674 |
+
"step": 4952
|
| 34675 |
+
},
|
| 34676 |
+
{
|
| 34677 |
+
"epoch": 1.006911973978451,
|
| 34678 |
+
"grad_norm": 0.14340607821941376,
|
| 34679 |
+
"learning_rate": 9.937963998779621e-05,
|
| 34680 |
+
"loss": 1.047,
|
| 34681 |
+
"step": 4953
|
| 34682 |
+
},
|
| 34683 |
+
{
|
| 34684 |
+
"epoch": 1.0071152673307582,
|
| 34685 |
+
"grad_norm": 0.14457905292510986,
|
| 34686 |
+
"learning_rate": 9.935930031526492e-05,
|
| 34687 |
+
"loss": 0.9937,
|
| 34688 |
+
"step": 4954
|
| 34689 |
+
},
|
| 34690 |
+
{
|
| 34691 |
+
"epoch": 1.0073185606830657,
|
| 34692 |
+
"grad_norm": 0.13555844128131866,
|
| 34693 |
+
"learning_rate": 9.933896064273366e-05,
|
| 34694 |
+
"loss": 1.0211,
|
| 34695 |
+
"step": 4955
|
| 34696 |
+
},
|
| 34697 |
+
{
|
| 34698 |
+
"epoch": 1.007521854035373,
|
| 34699 |
+
"grad_norm": 0.1536429524421692,
|
| 34700 |
+
"learning_rate": 9.931862097020238e-05,
|
| 34701 |
+
"loss": 1.188,
|
| 34702 |
+
"step": 4956
|
| 34703 |
+
},
|
| 34704 |
+
{
|
| 34705 |
+
"epoch": 1.0077251473876805,
|
| 34706 |
+
"grad_norm": 0.13193362951278687,
|
| 34707 |
+
"learning_rate": 9.929828129767112e-05,
|
| 34708 |
+
"loss": 0.9143,
|
| 34709 |
+
"step": 4957
|
| 34710 |
+
},
|
| 34711 |
+
{
|
| 34712 |
+
"epoch": 1.0079284407399878,
|
| 34713 |
+
"grad_norm": 0.14066417515277863,
|
| 34714 |
+
"learning_rate": 9.927794162513983e-05,
|
| 34715 |
+
"loss": 1.0662,
|
| 34716 |
+
"step": 4958
|
| 34717 |
+
},
|
| 34718 |
+
{
|
| 34719 |
+
"epoch": 1.0081317340922953,
|
| 34720 |
+
"grad_norm": 0.13579119741916656,
|
| 34721 |
+
"learning_rate": 9.925760195260857e-05,
|
| 34722 |
+
"loss": 0.8999,
|
| 34723 |
+
"step": 4959
|
| 34724 |
+
},
|
| 34725 |
+
{
|
| 34726 |
+
"epoch": 1.0083350274446026,
|
| 34727 |
+
"grad_norm": 0.14911122620105743,
|
| 34728 |
+
"learning_rate": 9.92372622800773e-05,
|
| 34729 |
+
"loss": 1.3171,
|
| 34730 |
+
"step": 4960
|
| 34731 |
+
},
|
| 34732 |
+
{
|
| 34733 |
+
"epoch": 1.0085383207969099,
|
| 34734 |
+
"grad_norm": 0.1447262316942215,
|
| 34735 |
+
"learning_rate": 9.921692260754603e-05,
|
| 34736 |
+
"loss": 1.0899,
|
| 34737 |
+
"step": 4961
|
| 34738 |
+
},
|
| 34739 |
+
{
|
| 34740 |
+
"epoch": 1.0087416141492174,
|
| 34741 |
+
"grad_norm": 0.1513487845659256,
|
| 34742 |
+
"learning_rate": 9.919658293501474e-05,
|
| 34743 |
+
"loss": 1.0844,
|
| 34744 |
+
"step": 4962
|
| 34745 |
+
},
|
| 34746 |
+
{
|
| 34747 |
+
"epoch": 1.0089449075015247,
|
| 34748 |
+
"grad_norm": 0.1470583975315094,
|
| 34749 |
+
"learning_rate": 9.917624326248348e-05,
|
| 34750 |
+
"loss": 1.1176,
|
| 34751 |
+
"step": 4963
|
| 34752 |
+
},
|
| 34753 |
+
{
|
| 34754 |
+
"epoch": 1.0091482008538322,
|
| 34755 |
+
"grad_norm": 0.13596630096435547,
|
| 34756 |
+
"learning_rate": 9.91559035899522e-05,
|
| 34757 |
+
"loss": 1.0829,
|
| 34758 |
+
"step": 4964
|
| 34759 |
+
},
|
| 34760 |
+
{
|
| 34761 |
+
"epoch": 1.0093514942061395,
|
| 34762 |
+
"grad_norm": 0.1411203145980835,
|
| 34763 |
+
"learning_rate": 9.913556391742094e-05,
|
| 34764 |
+
"loss": 1.0523,
|
| 34765 |
+
"step": 4965
|
| 34766 |
+
},
|
| 34767 |
+
{
|
| 34768 |
+
"epoch": 1.0095547875584467,
|
| 34769 |
+
"grad_norm": 0.14842981100082397,
|
| 34770 |
+
"learning_rate": 9.911522424488965e-05,
|
| 34771 |
+
"loss": 1.0513,
|
| 34772 |
+
"step": 4966
|
| 34773 |
+
},
|
| 34774 |
+
{
|
| 34775 |
+
"epoch": 1.0097580809107543,
|
| 34776 |
+
"grad_norm": 0.1505335569381714,
|
| 34777 |
+
"learning_rate": 9.909488457235839e-05,
|
| 34778 |
+
"loss": 0.9964,
|
| 34779 |
+
"step": 4967
|
| 34780 |
+
},
|
| 34781 |
+
{
|
| 34782 |
+
"epoch": 1.0099613742630615,
|
| 34783 |
+
"grad_norm": 0.12677620351314545,
|
| 34784 |
+
"learning_rate": 9.907454489982712e-05,
|
| 34785 |
+
"loss": 0.9546,
|
| 34786 |
+
"step": 4968
|
| 34787 |
+
},
|
| 34788 |
+
{
|
| 34789 |
+
"epoch": 1.010164667615369,
|
| 34790 |
+
"grad_norm": 0.13651777803897858,
|
| 34791 |
+
"learning_rate": 9.905420522729585e-05,
|
| 34792 |
+
"loss": 1.0823,
|
| 34793 |
+
"step": 4969
|
| 34794 |
+
},
|
| 34795 |
+
{
|
| 34796 |
+
"epoch": 1.0103679609676763,
|
| 34797 |
+
"grad_norm": 0.1392572969198227,
|
| 34798 |
+
"learning_rate": 9.903386555476457e-05,
|
| 34799 |
+
"loss": 0.9032,
|
| 34800 |
+
"step": 4970
|
| 34801 |
+
},
|
| 34802 |
+
{
|
| 34803 |
+
"epoch": 1.0105712543199838,
|
| 34804 |
+
"grad_norm": 0.16775289177894592,
|
| 34805 |
+
"learning_rate": 9.90135258822333e-05,
|
| 34806 |
+
"loss": 1.1434,
|
| 34807 |
+
"step": 4971
|
| 34808 |
+
},
|
| 34809 |
+
{
|
| 34810 |
+
"epoch": 1.0107745476722911,
|
| 34811 |
+
"grad_norm": 0.1534387320280075,
|
| 34812 |
+
"learning_rate": 9.899318620970203e-05,
|
| 34813 |
+
"loss": 1.166,
|
| 34814 |
+
"step": 4972
|
| 34815 |
+
},
|
| 34816 |
+
{
|
| 34817 |
+
"epoch": 1.0109778410245984,
|
| 34818 |
+
"grad_norm": 0.14180676639080048,
|
| 34819 |
+
"learning_rate": 9.897284653717077e-05,
|
| 34820 |
+
"loss": 1.0688,
|
| 34821 |
+
"step": 4973
|
| 34822 |
+
},
|
| 34823 |
+
{
|
| 34824 |
+
"epoch": 1.011181134376906,
|
| 34825 |
+
"grad_norm": 0.13633224368095398,
|
| 34826 |
+
"learning_rate": 9.895250686463948e-05,
|
| 34827 |
+
"loss": 1.0413,
|
| 34828 |
+
"step": 4974
|
| 34829 |
+
},
|
| 34830 |
+
{
|
| 34831 |
+
"epoch": 1.0113844277292132,
|
| 34832 |
+
"grad_norm": 0.15582099556922913,
|
| 34833 |
+
"learning_rate": 9.893216719210822e-05,
|
| 34834 |
+
"loss": 1.256,
|
| 34835 |
+
"step": 4975
|
| 34836 |
+
},
|
| 34837 |
+
{
|
| 34838 |
+
"epoch": 1.0115877210815207,
|
| 34839 |
+
"grad_norm": 0.16052106022834778,
|
| 34840 |
+
"learning_rate": 9.891182751957694e-05,
|
| 34841 |
+
"loss": 1.3048,
|
| 34842 |
+
"step": 4976
|
| 34843 |
+
},
|
| 34844 |
+
{
|
| 34845 |
+
"epoch": 1.011791014433828,
|
| 34846 |
+
"grad_norm": 0.15733475983142853,
|
| 34847 |
+
"learning_rate": 9.889148784704568e-05,
|
| 34848 |
+
"loss": 1.1024,
|
| 34849 |
+
"step": 4977
|
| 34850 |
+
},
|
| 34851 |
+
{
|
| 34852 |
+
"epoch": 1.0119943077861353,
|
| 34853 |
+
"grad_norm": 0.1398230642080307,
|
| 34854 |
+
"learning_rate": 9.887114817451439e-05,
|
| 34855 |
+
"loss": 1.0691,
|
| 34856 |
+
"step": 4978
|
| 34857 |
+
},
|
| 34858 |
+
{
|
| 34859 |
+
"epoch": 1.0121976011384428,
|
| 34860 |
+
"grad_norm": 0.15575705468654633,
|
| 34861 |
+
"learning_rate": 9.885080850198313e-05,
|
| 34862 |
+
"loss": 1.0019,
|
| 34863 |
+
"step": 4979
|
| 34864 |
+
},
|
| 34865 |
+
{
|
| 34866 |
+
"epoch": 1.01240089449075,
|
| 34867 |
+
"grad_norm": 0.13900624215602875,
|
| 34868 |
+
"learning_rate": 9.883046882945185e-05,
|
| 34869 |
+
"loss": 1.0318,
|
| 34870 |
+
"step": 4980
|
| 34871 |
+
},
|
| 34872 |
+
{
|
| 34873 |
+
"epoch": 1.0126041878430576,
|
| 34874 |
+
"grad_norm": 0.1266520619392395,
|
| 34875 |
+
"learning_rate": 9.881012915692059e-05,
|
| 34876 |
+
"loss": 0.9455,
|
| 34877 |
+
"step": 4981
|
| 34878 |
+
},
|
| 34879 |
+
{
|
| 34880 |
+
"epoch": 1.012807481195365,
|
| 34881 |
+
"grad_norm": 0.14327497780323029,
|
| 34882 |
+
"learning_rate": 9.87897894843893e-05,
|
| 34883 |
+
"loss": 1.1133,
|
| 34884 |
+
"step": 4982
|
| 34885 |
+
},
|
| 34886 |
+
{
|
| 34887 |
+
"epoch": 1.0130107745476722,
|
| 34888 |
+
"grad_norm": 0.14177127182483673,
|
| 34889 |
+
"learning_rate": 9.876944981185804e-05,
|
| 34890 |
+
"loss": 0.9969,
|
| 34891 |
+
"step": 4983
|
| 34892 |
+
},
|
| 34893 |
+
{
|
| 34894 |
+
"epoch": 1.0132140678999797,
|
| 34895 |
+
"grad_norm": 0.14066456258296967,
|
| 34896 |
+
"learning_rate": 9.874911013932676e-05,
|
| 34897 |
+
"loss": 0.9261,
|
| 34898 |
+
"step": 4984
|
| 34899 |
+
},
|
| 34900 |
+
{
|
| 34901 |
+
"epoch": 1.013417361252287,
|
| 34902 |
+
"grad_norm": 0.14441144466400146,
|
| 34903 |
+
"learning_rate": 9.872877046679549e-05,
|
| 34904 |
+
"loss": 1.0065,
|
| 34905 |
+
"step": 4985
|
| 34906 |
+
},
|
| 34907 |
+
{
|
| 34908 |
+
"epoch": 1.0136206546045945,
|
| 34909 |
+
"grad_norm": 0.12858086824417114,
|
| 34910 |
+
"learning_rate": 9.870843079426421e-05,
|
| 34911 |
+
"loss": 0.9306,
|
| 34912 |
+
"step": 4986
|
| 34913 |
+
},
|
| 34914 |
+
{
|
| 34915 |
+
"epoch": 1.0138239479569018,
|
| 34916 |
+
"grad_norm": 0.1305333971977234,
|
| 34917 |
+
"learning_rate": 9.868809112173294e-05,
|
| 34918 |
+
"loss": 1.0058,
|
| 34919 |
+
"step": 4987
|
| 34920 |
+
},
|
| 34921 |
+
{
|
| 34922 |
+
"epoch": 1.0140272413092093,
|
| 34923 |
+
"grad_norm": 0.1652311384677887,
|
| 34924 |
+
"learning_rate": 9.866775144920167e-05,
|
| 34925 |
+
"loss": 1.1992,
|
| 34926 |
+
"step": 4988
|
| 34927 |
+
},
|
| 34928 |
+
{
|
| 34929 |
+
"epoch": 1.0142305346615166,
|
| 34930 |
+
"grad_norm": 0.1123913899064064,
|
| 34931 |
+
"learning_rate": 9.86474117766704e-05,
|
| 34932 |
+
"loss": 0.8779,
|
| 34933 |
+
"step": 4989
|
| 34934 |
+
},
|
| 34935 |
+
{
|
| 34936 |
+
"epoch": 1.0144338280138239,
|
| 34937 |
+
"grad_norm": 0.15201310813426971,
|
| 34938 |
+
"learning_rate": 9.862707210413912e-05,
|
| 34939 |
+
"loss": 1.1553,
|
| 34940 |
+
"step": 4990
|
| 34941 |
+
},
|
| 34942 |
+
{
|
| 34943 |
+
"epoch": 1.0146371213661314,
|
| 34944 |
+
"grad_norm": 0.13241463899612427,
|
| 34945 |
+
"learning_rate": 9.860673243160785e-05,
|
| 34946 |
+
"loss": 0.9276,
|
| 34947 |
+
"step": 4991
|
| 34948 |
+
},
|
| 34949 |
+
{
|
| 34950 |
+
"epoch": 1.0148404147184387,
|
| 34951 |
+
"grad_norm": 0.15238632261753082,
|
| 34952 |
+
"learning_rate": 9.858639275907659e-05,
|
| 34953 |
+
"loss": 1.1528,
|
| 34954 |
+
"step": 4992
|
| 34955 |
+
},
|
| 34956 |
+
{
|
| 34957 |
+
"epoch": 1.0150437080707462,
|
| 34958 |
+
"grad_norm": 0.13771474361419678,
|
| 34959 |
+
"learning_rate": 9.856605308654531e-05,
|
| 34960 |
+
"loss": 1.1871,
|
| 34961 |
+
"step": 4993
|
| 34962 |
+
},
|
| 34963 |
+
{
|
| 34964 |
+
"epoch": 1.0152470014230535,
|
| 34965 |
+
"grad_norm": 0.135041743516922,
|
| 34966 |
+
"learning_rate": 9.854571341401403e-05,
|
| 34967 |
+
"loss": 0.9718,
|
| 34968 |
+
"step": 4994
|
| 34969 |
+
},
|
| 34970 |
+
{
|
| 34971 |
+
"epoch": 1.0154502947753608,
|
| 34972 |
+
"grad_norm": 0.14199897646903992,
|
| 34973 |
+
"learning_rate": 9.852537374148276e-05,
|
| 34974 |
+
"loss": 1.0454,
|
| 34975 |
+
"step": 4995
|
| 34976 |
+
},
|
| 34977 |
+
{
|
| 34978 |
+
"epoch": 1.0156535881276683,
|
| 34979 |
+
"grad_norm": 0.14556720852851868,
|
| 34980 |
+
"learning_rate": 9.85050340689515e-05,
|
| 34981 |
+
"loss": 1.102,
|
| 34982 |
+
"step": 4996
|
| 34983 |
+
},
|
| 34984 |
+
{
|
| 34985 |
+
"epoch": 1.0158568814799755,
|
| 34986 |
+
"grad_norm": 0.1287354975938797,
|
| 34987 |
+
"learning_rate": 9.848469439642022e-05,
|
| 34988 |
+
"loss": 0.929,
|
| 34989 |
+
"step": 4997
|
| 34990 |
+
},
|
| 34991 |
+
{
|
| 34992 |
+
"epoch": 1.016060174832283,
|
| 34993 |
+
"grad_norm": 0.15297791361808777,
|
| 34994 |
+
"learning_rate": 9.846435472388895e-05,
|
| 34995 |
+
"loss": 1.0234,
|
| 34996 |
+
"step": 4998
|
| 34997 |
+
},
|
| 34998 |
+
{
|
| 34999 |
+
"epoch": 1.0162634681845903,
|
| 35000 |
+
"grad_norm": 0.1549387276172638,
|
| 35001 |
+
"learning_rate": 9.844401505135767e-05,
|
| 35002 |
+
"loss": 1.1666,
|
| 35003 |
+
"step": 4999
|
| 35004 |
+
},
|
| 35005 |
+
{
|
| 35006 |
+
"epoch": 1.0164667615368979,
|
| 35007 |
+
"grad_norm": 0.15455321967601776,
|
| 35008 |
+
"learning_rate": 9.842367537882641e-05,
|
| 35009 |
+
"loss": 1.0845,
|
| 35010 |
+
"step": 5000
|
| 35011 |
}
|
| 35012 |
],
|
| 35013 |
"logging_steps": 1,
|
|
|
|
| 35027 |
"attributes": {}
|
| 35028 |
}
|
| 35029 |
},
|
| 35030 |
+
"total_flos": 2.801020652405637e+18,
|
| 35031 |
"train_batch_size": 8,
|
| 35032 |
"trial_name": null,
|
| 35033 |
"trial_params": null
|