Training in progress, step 13145, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98447936
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f883b4f265b6b8064b67009ea7bfe448f26ae43c44e65e959b3de74a0b473f8
|
| 3 |
size 98447936
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 196978810
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15f89a104fdd62216eac28fe99f857669937aa8b4fed6aa1a2b53d8e286f81f9
|
| 3 |
size 196978810
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19e7284245a01704e7547fac857930f61337e742c5987dcc39fdf9c642d77c9e
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5928a4230aa28b0327c505e24d3c41016e7b6d560dcb0f1569e7770b3922423
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -90349,6 +90349,1679 @@
|
|
| 90349 |
"learning_rate": 3.0262150381350494e-07,
|
| 90350 |
"loss": 0.0439,
|
| 90351 |
"step": 12906
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90352 |
}
|
| 90353 |
],
|
| 90354 |
"logging_steps": 1,
|
|
@@ -90368,7 +92041,7 @@
|
|
| 90368 |
"attributes": {}
|
| 90369 |
}
|
| 90370 |
},
|
| 90371 |
-
"total_flos":
|
| 90372 |
"train_batch_size": 4,
|
| 90373 |
"trial_name": null,
|
| 90374 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9830609879220731,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 13145,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 90349 |
"learning_rate": 3.0262150381350494e-07,
|
| 90350 |
"loss": 0.0439,
|
| 90351 |
"step": 12906
|
| 90352 |
+
},
|
| 90353 |
+
{
|
| 90354 |
+
"epoch": 0.9652619377033242,
|
| 90355 |
+
"grad_norm": 0.4368337094783783,
|
| 90356 |
+
"learning_rate": 3.0132261302588395e-07,
|
| 90357 |
+
"loss": 0.0399,
|
| 90358 |
+
"step": 12907
|
| 90359 |
+
},
|
| 90360 |
+
{
|
| 90361 |
+
"epoch": 0.9653367236286131,
|
| 90362 |
+
"grad_norm": 0.6395098567008972,
|
| 90363 |
+
"learning_rate": 3.000265073157038e-07,
|
| 90364 |
+
"loss": 0.0591,
|
| 90365 |
+
"step": 12908
|
| 90366 |
+
},
|
| 90367 |
+
{
|
| 90368 |
+
"epoch": 0.965411509553902,
|
| 90369 |
+
"grad_norm": 0.43370211124420166,
|
| 90370 |
+
"learning_rate": 2.987331867556009e-07,
|
| 90371 |
+
"loss": 0.0339,
|
| 90372 |
+
"step": 12909
|
| 90373 |
+
},
|
| 90374 |
+
{
|
| 90375 |
+
"epoch": 0.9654862954791908,
|
| 90376 |
+
"grad_norm": 0.5491053462028503,
|
| 90377 |
+
"learning_rate": 2.974426514180506e-07,
|
| 90378 |
+
"loss": 0.0459,
|
| 90379 |
+
"step": 12910
|
| 90380 |
+
},
|
| 90381 |
+
{
|
| 90382 |
+
"epoch": 0.9655610814044797,
|
| 90383 |
+
"grad_norm": 0.48986148834228516,
|
| 90384 |
+
"learning_rate": 2.961549013753784e-07,
|
| 90385 |
+
"loss": 0.04,
|
| 90386 |
+
"step": 12911
|
| 90387 |
+
},
|
| 90388 |
+
{
|
| 90389 |
+
"epoch": 0.9656358673297686,
|
| 90390 |
+
"grad_norm": 0.8874034881591797,
|
| 90391 |
+
"learning_rate": 2.9486993669974315e-07,
|
| 90392 |
+
"loss": 0.0708,
|
| 90393 |
+
"step": 12912
|
| 90394 |
+
},
|
| 90395 |
+
{
|
| 90396 |
+
"epoch": 0.9657106532550574,
|
| 90397 |
+
"grad_norm": 0.7711852192878723,
|
| 90398 |
+
"learning_rate": 2.9358775746315956e-07,
|
| 90399 |
+
"loss": 0.101,
|
| 90400 |
+
"step": 12913
|
| 90401 |
+
},
|
| 90402 |
+
{
|
| 90403 |
+
"epoch": 0.9657854391803462,
|
| 90404 |
+
"grad_norm": 0.5177712440490723,
|
| 90405 |
+
"learning_rate": 2.9230836373747574e-07,
|
| 90406 |
+
"loss": 0.0518,
|
| 90407 |
+
"step": 12914
|
| 90408 |
+
},
|
| 90409 |
+
{
|
| 90410 |
+
"epoch": 0.9658602251056351,
|
| 90411 |
+
"grad_norm": 0.8569120168685913,
|
| 90412 |
+
"learning_rate": 2.910317555943898e-07,
|
| 90413 |
+
"loss": 0.085,
|
| 90414 |
+
"step": 12915
|
| 90415 |
+
},
|
| 90416 |
+
{
|
| 90417 |
+
"epoch": 0.9659350110309239,
|
| 90418 |
+
"grad_norm": 0.6278529167175293,
|
| 90419 |
+
"learning_rate": 2.8975793310543897e-07,
|
| 90420 |
+
"loss": 0.0411,
|
| 90421 |
+
"step": 12916
|
| 90422 |
+
},
|
| 90423 |
+
{
|
| 90424 |
+
"epoch": 0.9660097969562128,
|
| 90425 |
+
"grad_norm": 1.1338576078414917,
|
| 90426 |
+
"learning_rate": 2.884868963420162e-07,
|
| 90427 |
+
"loss": 0.0452,
|
| 90428 |
+
"step": 12917
|
| 90429 |
+
},
|
| 90430 |
+
{
|
| 90431 |
+
"epoch": 0.9660845828815017,
|
| 90432 |
+
"grad_norm": 0.792792022228241,
|
| 90433 |
+
"learning_rate": 2.8721864537533116e-07,
|
| 90434 |
+
"loss": 0.0578,
|
| 90435 |
+
"step": 12918
|
| 90436 |
+
},
|
| 90437 |
+
{
|
| 90438 |
+
"epoch": 0.9661593688067905,
|
| 90439 |
+
"grad_norm": 0.4867582321166992,
|
| 90440 |
+
"learning_rate": 2.85953180276477e-07,
|
| 90441 |
+
"loss": 0.034,
|
| 90442 |
+
"step": 12919
|
| 90443 |
+
},
|
| 90444 |
+
{
|
| 90445 |
+
"epoch": 0.9662341547320794,
|
| 90446 |
+
"grad_norm": 0.5390796661376953,
|
| 90447 |
+
"learning_rate": 2.8469050111635253e-07,
|
| 90448 |
+
"loss": 0.0326,
|
| 90449 |
+
"step": 12920
|
| 90450 |
+
},
|
| 90451 |
+
{
|
| 90452 |
+
"epoch": 0.9663089406573683,
|
| 90453 |
+
"grad_norm": 0.4971838593482971,
|
| 90454 |
+
"learning_rate": 2.8343060796572897e-07,
|
| 90455 |
+
"loss": 0.0411,
|
| 90456 |
+
"step": 12921
|
| 90457 |
+
},
|
| 90458 |
+
{
|
| 90459 |
+
"epoch": 0.9663837265826571,
|
| 90460 |
+
"grad_norm": 0.8017480969429016,
|
| 90461 |
+
"learning_rate": 2.8217350089520533e-07,
|
| 90462 |
+
"loss": 0.0638,
|
| 90463 |
+
"step": 12922
|
| 90464 |
+
},
|
| 90465 |
+
{
|
| 90466 |
+
"epoch": 0.966458512507946,
|
| 90467 |
+
"grad_norm": 0.6261386275291443,
|
| 90468 |
+
"learning_rate": 2.809191799752309e-07,
|
| 90469 |
+
"loss": 0.0423,
|
| 90470 |
+
"step": 12923
|
| 90471 |
+
},
|
| 90472 |
+
{
|
| 90473 |
+
"epoch": 0.9665332984332349,
|
| 90474 |
+
"grad_norm": 0.5139853358268738,
|
| 90475 |
+
"learning_rate": 2.7966764527608827e-07,
|
| 90476 |
+
"loss": 0.0428,
|
| 90477 |
+
"step": 12924
|
| 90478 |
+
},
|
| 90479 |
+
{
|
| 90480 |
+
"epoch": 0.9666080843585237,
|
| 90481 |
+
"grad_norm": 0.8327328562736511,
|
| 90482 |
+
"learning_rate": 2.7841889686792686e-07,
|
| 90483 |
+
"loss": 0.0712,
|
| 90484 |
+
"step": 12925
|
| 90485 |
+
},
|
| 90486 |
+
{
|
| 90487 |
+
"epoch": 0.9666828702838126,
|
| 90488 |
+
"grad_norm": 0.9425419569015503,
|
| 90489 |
+
"learning_rate": 2.7717293482071303e-07,
|
| 90490 |
+
"loss": 0.0712,
|
| 90491 |
+
"step": 12926
|
| 90492 |
+
},
|
| 90493 |
+
{
|
| 90494 |
+
"epoch": 0.9667576562091015,
|
| 90495 |
+
"grad_norm": 0.999728262424469,
|
| 90496 |
+
"learning_rate": 2.759297592042742e-07,
|
| 90497 |
+
"loss": 0.1424,
|
| 90498 |
+
"step": 12927
|
| 90499 |
+
},
|
| 90500 |
+
{
|
| 90501 |
+
"epoch": 0.9668324421343903,
|
| 90502 |
+
"grad_norm": 0.6338492035865784,
|
| 90503 |
+
"learning_rate": 2.7468937008828246e-07,
|
| 90504 |
+
"loss": 0.0392,
|
| 90505 |
+
"step": 12928
|
| 90506 |
+
},
|
| 90507 |
+
{
|
| 90508 |
+
"epoch": 0.9669072280596792,
|
| 90509 |
+
"grad_norm": 0.7793694734573364,
|
| 90510 |
+
"learning_rate": 2.734517675422377e-07,
|
| 90511 |
+
"loss": 0.0659,
|
| 90512 |
+
"step": 12929
|
| 90513 |
+
},
|
| 90514 |
+
{
|
| 90515 |
+
"epoch": 0.9669820139849681,
|
| 90516 |
+
"grad_norm": 0.9696511030197144,
|
| 90517 |
+
"learning_rate": 2.7221695163550107e-07,
|
| 90518 |
+
"loss": 0.1005,
|
| 90519 |
+
"step": 12930
|
| 90520 |
+
},
|
| 90521 |
+
{
|
| 90522 |
+
"epoch": 0.9670567999102568,
|
| 90523 |
+
"grad_norm": 1.540802001953125,
|
| 90524 |
+
"learning_rate": 2.709849224372729e-07,
|
| 90525 |
+
"loss": 0.1015,
|
| 90526 |
+
"step": 12931
|
| 90527 |
+
},
|
| 90528 |
+
{
|
| 90529 |
+
"epoch": 0.9671315858355457,
|
| 90530 |
+
"grad_norm": 0.5976976156234741,
|
| 90531 |
+
"learning_rate": 2.697556800165868e-07,
|
| 90532 |
+
"loss": 0.0303,
|
| 90533 |
+
"step": 12932
|
| 90534 |
+
},
|
| 90535 |
+
{
|
| 90536 |
+
"epoch": 0.9672063717608346,
|
| 90537 |
+
"grad_norm": 0.8872543573379517,
|
| 90538 |
+
"learning_rate": 2.685292244423376e-07,
|
| 90539 |
+
"loss": 0.0686,
|
| 90540 |
+
"step": 12933
|
| 90541 |
+
},
|
| 90542 |
+
{
|
| 90543 |
+
"epoch": 0.9672811576861234,
|
| 90544 |
+
"grad_norm": 0.710059642791748,
|
| 90545 |
+
"learning_rate": 2.673055557832538e-07,
|
| 90546 |
+
"loss": 0.054,
|
| 90547 |
+
"step": 12934
|
| 90548 |
+
},
|
| 90549 |
+
{
|
| 90550 |
+
"epoch": 0.9673559436114123,
|
| 90551 |
+
"grad_norm": 1.1238468885421753,
|
| 90552 |
+
"learning_rate": 2.660846741079026e-07,
|
| 90553 |
+
"loss": 0.1244,
|
| 90554 |
+
"step": 12935
|
| 90555 |
+
},
|
| 90556 |
+
{
|
| 90557 |
+
"epoch": 0.9674307295367012,
|
| 90558 |
+
"grad_norm": 0.9026004672050476,
|
| 90559 |
+
"learning_rate": 2.6486657948470163e-07,
|
| 90560 |
+
"loss": 0.0706,
|
| 90561 |
+
"step": 12936
|
| 90562 |
+
},
|
| 90563 |
+
{
|
| 90564 |
+
"epoch": 0.96750551546199,
|
| 90565 |
+
"grad_norm": 0.9717168807983398,
|
| 90566 |
+
"learning_rate": 2.636512719819184e-07,
|
| 90567 |
+
"loss": 0.1016,
|
| 90568 |
+
"step": 12937
|
| 90569 |
+
},
|
| 90570 |
+
{
|
| 90571 |
+
"epoch": 0.9675803013872789,
|
| 90572 |
+
"grad_norm": 0.7005811929702759,
|
| 90573 |
+
"learning_rate": 2.6243875166765406e-07,
|
| 90574 |
+
"loss": 0.0552,
|
| 90575 |
+
"step": 12938
|
| 90576 |
+
},
|
| 90577 |
+
{
|
| 90578 |
+
"epoch": 0.9676550873125678,
|
| 90579 |
+
"grad_norm": 0.8264267444610596,
|
| 90580 |
+
"learning_rate": 2.6122901860985425e-07,
|
| 90581 |
+
"loss": 0.0389,
|
| 90582 |
+
"step": 12939
|
| 90583 |
+
},
|
| 90584 |
+
{
|
| 90585 |
+
"epoch": 0.9677298732378566,
|
| 90586 |
+
"grad_norm": 0.7877691984176636,
|
| 90587 |
+
"learning_rate": 2.600220728763203e-07,
|
| 90588 |
+
"loss": 0.0707,
|
| 90589 |
+
"step": 12940
|
| 90590 |
+
},
|
| 90591 |
+
{
|
| 90592 |
+
"epoch": 0.9678046591631455,
|
| 90593 |
+
"grad_norm": 0.8559983968734741,
|
| 90594 |
+
"learning_rate": 2.588179145346814e-07,
|
| 90595 |
+
"loss": 0.0521,
|
| 90596 |
+
"step": 12941
|
| 90597 |
+
},
|
| 90598 |
+
{
|
| 90599 |
+
"epoch": 0.9678794450884344,
|
| 90600 |
+
"grad_norm": 1.7342501878738403,
|
| 90601 |
+
"learning_rate": 2.576165436524225e-07,
|
| 90602 |
+
"loss": 0.1444,
|
| 90603 |
+
"step": 12942
|
| 90604 |
+
},
|
| 90605 |
+
{
|
| 90606 |
+
"epoch": 0.9679542310137232,
|
| 90607 |
+
"grad_norm": 1.3305034637451172,
|
| 90608 |
+
"learning_rate": 2.5641796029686194e-07,
|
| 90609 |
+
"loss": 0.0944,
|
| 90610 |
+
"step": 12943
|
| 90611 |
+
},
|
| 90612 |
+
{
|
| 90613 |
+
"epoch": 0.9680290169390121,
|
| 90614 |
+
"grad_norm": 0.9448521137237549,
|
| 90615 |
+
"learning_rate": 2.552221645351738e-07,
|
| 90616 |
+
"loss": 0.0652,
|
| 90617 |
+
"step": 12944
|
| 90618 |
+
},
|
| 90619 |
+
{
|
| 90620 |
+
"epoch": 0.968103802864301,
|
| 90621 |
+
"grad_norm": 1.5735996961593628,
|
| 90622 |
+
"learning_rate": 2.5402915643436e-07,
|
| 90623 |
+
"loss": 0.1673,
|
| 90624 |
+
"step": 12945
|
| 90625 |
+
},
|
| 90626 |
+
{
|
| 90627 |
+
"epoch": 0.9681785887895898,
|
| 90628 |
+
"grad_norm": 0.888843834400177,
|
| 90629 |
+
"learning_rate": 2.528389360612837e-07,
|
| 90630 |
+
"loss": 0.1212,
|
| 90631 |
+
"step": 12946
|
| 90632 |
+
},
|
| 90633 |
+
{
|
| 90634 |
+
"epoch": 0.9682533747148787,
|
| 90635 |
+
"grad_norm": 0.892540454864502,
|
| 90636 |
+
"learning_rate": 2.516515034826472e-07,
|
| 90637 |
+
"loss": 0.0598,
|
| 90638 |
+
"step": 12947
|
| 90639 |
+
},
|
| 90640 |
+
{
|
| 90641 |
+
"epoch": 0.9683281606401675,
|
| 90642 |
+
"grad_norm": 0.6834811568260193,
|
| 90643 |
+
"learning_rate": 2.504668587649805e-07,
|
| 90644 |
+
"loss": 0.0483,
|
| 90645 |
+
"step": 12948
|
| 90646 |
+
},
|
| 90647 |
+
{
|
| 90648 |
+
"epoch": 0.9684029465654563,
|
| 90649 |
+
"grad_norm": 0.7159473299980164,
|
| 90650 |
+
"learning_rate": 2.4928500197468616e-07,
|
| 90651 |
+
"loss": 0.0455,
|
| 90652 |
+
"step": 12949
|
| 90653 |
+
},
|
| 90654 |
+
{
|
| 90655 |
+
"epoch": 0.9684777324907452,
|
| 90656 |
+
"grad_norm": 1.5077083110809326,
|
| 90657 |
+
"learning_rate": 2.4810593317798336e-07,
|
| 90658 |
+
"loss": 0.1202,
|
| 90659 |
+
"step": 12950
|
| 90660 |
+
},
|
| 90661 |
+
{
|
| 90662 |
+
"epoch": 0.9685525184160341,
|
| 90663 |
+
"grad_norm": 0.5121647715568542,
|
| 90664 |
+
"learning_rate": 2.46929652440947e-07,
|
| 90665 |
+
"loss": 0.067,
|
| 90666 |
+
"step": 12951
|
| 90667 |
+
},
|
| 90668 |
+
{
|
| 90669 |
+
"epoch": 0.9686273043413229,
|
| 90670 |
+
"grad_norm": 0.5928946137428284,
|
| 90671 |
+
"learning_rate": 2.457561598294966e-07,
|
| 90672 |
+
"loss": 0.0578,
|
| 90673 |
+
"step": 12952
|
| 90674 |
+
},
|
| 90675 |
+
{
|
| 90676 |
+
"epoch": 0.9687020902666118,
|
| 90677 |
+
"grad_norm": 0.5399084687232971,
|
| 90678 |
+
"learning_rate": 2.445854554094018e-07,
|
| 90679 |
+
"loss": 0.0413,
|
| 90680 |
+
"step": 12953
|
| 90681 |
+
},
|
| 90682 |
+
{
|
| 90683 |
+
"epoch": 0.9687768761919007,
|
| 90684 |
+
"grad_norm": 0.8208631277084351,
|
| 90685 |
+
"learning_rate": 2.4341753924625457e-07,
|
| 90686 |
+
"loss": 0.0741,
|
| 90687 |
+
"step": 12954
|
| 90688 |
+
},
|
| 90689 |
+
{
|
| 90690 |
+
"epoch": 0.9688516621171895,
|
| 90691 |
+
"grad_norm": 0.7083703279495239,
|
| 90692 |
+
"learning_rate": 2.422524114055136e-07,
|
| 90693 |
+
"loss": 0.0987,
|
| 90694 |
+
"step": 12955
|
| 90695 |
+
},
|
| 90696 |
+
{
|
| 90697 |
+
"epoch": 0.9689264480424784,
|
| 90698 |
+
"grad_norm": 0.6513927578926086,
|
| 90699 |
+
"learning_rate": 2.4109007195246556e-07,
|
| 90700 |
+
"loss": 0.0743,
|
| 90701 |
+
"step": 12956
|
| 90702 |
+
},
|
| 90703 |
+
{
|
| 90704 |
+
"epoch": 0.9690012339677673,
|
| 90705 |
+
"grad_norm": 0.8623580932617188,
|
| 90706 |
+
"learning_rate": 2.399305209522529e-07,
|
| 90707 |
+
"loss": 0.0642,
|
| 90708 |
+
"step": 12957
|
| 90709 |
+
},
|
| 90710 |
+
{
|
| 90711 |
+
"epoch": 0.9690760198930561,
|
| 90712 |
+
"grad_norm": 0.591946005821228,
|
| 90713 |
+
"learning_rate": 2.387737584698513e-07,
|
| 90714 |
+
"loss": 0.0366,
|
| 90715 |
+
"step": 12958
|
| 90716 |
+
},
|
| 90717 |
+
{
|
| 90718 |
+
"epoch": 0.969150805818345,
|
| 90719 |
+
"grad_norm": 0.7606440186500549,
|
| 90720 |
+
"learning_rate": 2.376197845700867e-07,
|
| 90721 |
+
"loss": 0.0733,
|
| 90722 |
+
"step": 12959
|
| 90723 |
+
},
|
| 90724 |
+
{
|
| 90725 |
+
"epoch": 0.9692255917436339,
|
| 90726 |
+
"grad_norm": 0.3158939480781555,
|
| 90727 |
+
"learning_rate": 2.3646859931762965e-07,
|
| 90728 |
+
"loss": 0.0171,
|
| 90729 |
+
"step": 12960
|
| 90730 |
+
},
|
| 90731 |
+
{
|
| 90732 |
+
"epoch": 0.9693003776689227,
|
| 90733 |
+
"grad_norm": 0.47718873620033264,
|
| 90734 |
+
"learning_rate": 2.3532020277698407e-07,
|
| 90735 |
+
"loss": 0.0427,
|
| 90736 |
+
"step": 12961
|
| 90737 |
+
},
|
| 90738 |
+
{
|
| 90739 |
+
"epoch": 0.9693751635942116,
|
| 90740 |
+
"grad_norm": 0.6722490787506104,
|
| 90741 |
+
"learning_rate": 2.3417459501251515e-07,
|
| 90742 |
+
"loss": 0.0535,
|
| 90743 |
+
"step": 12962
|
| 90744 |
+
},
|
| 90745 |
+
{
|
| 90746 |
+
"epoch": 0.9694499495195005,
|
| 90747 |
+
"grad_norm": 0.55564284324646,
|
| 90748 |
+
"learning_rate": 2.3303177608841597e-07,
|
| 90749 |
+
"loss": 0.0618,
|
| 90750 |
+
"step": 12963
|
| 90751 |
+
},
|
| 90752 |
+
{
|
| 90753 |
+
"epoch": 0.9695247354447893,
|
| 90754 |
+
"grad_norm": 0.6552045345306396,
|
| 90755 |
+
"learning_rate": 2.3189174606872976e-07,
|
| 90756 |
+
"loss": 0.0825,
|
| 90757 |
+
"step": 12964
|
| 90758 |
+
},
|
| 90759 |
+
{
|
| 90760 |
+
"epoch": 0.9695995213700781,
|
| 90761 |
+
"grad_norm": 0.6629922986030579,
|
| 90762 |
+
"learning_rate": 2.307545050173443e-07,
|
| 90763 |
+
"loss": 0.0459,
|
| 90764 |
+
"step": 12965
|
| 90765 |
+
},
|
| 90766 |
+
{
|
| 90767 |
+
"epoch": 0.969674307295367,
|
| 90768 |
+
"grad_norm": 0.851646363735199,
|
| 90769 |
+
"learning_rate": 2.2962005299798639e-07,
|
| 90770 |
+
"loss": 0.0874,
|
| 90771 |
+
"step": 12966
|
| 90772 |
+
},
|
| 90773 |
+
{
|
| 90774 |
+
"epoch": 0.9697490932206558,
|
| 90775 |
+
"grad_norm": 0.9766066670417786,
|
| 90776 |
+
"learning_rate": 2.2848839007423295e-07,
|
| 90777 |
+
"loss": 0.1098,
|
| 90778 |
+
"step": 12967
|
| 90779 |
+
},
|
| 90780 |
+
{
|
| 90781 |
+
"epoch": 0.9698238791459447,
|
| 90782 |
+
"grad_norm": 1.0904663801193237,
|
| 90783 |
+
"learning_rate": 2.2735951630950548e-07,
|
| 90784 |
+
"loss": 0.1515,
|
| 90785 |
+
"step": 12968
|
| 90786 |
+
},
|
| 90787 |
+
{
|
| 90788 |
+
"epoch": 0.9698986650712336,
|
| 90789 |
+
"grad_norm": 0.7357199192047119,
|
| 90790 |
+
"learning_rate": 2.2623343176705892e-07,
|
| 90791 |
+
"loss": 0.062,
|
| 90792 |
+
"step": 12969
|
| 90793 |
+
},
|
| 90794 |
+
{
|
| 90795 |
+
"epoch": 0.9699734509965224,
|
| 90796 |
+
"grad_norm": 0.513715386390686,
|
| 90797 |
+
"learning_rate": 2.2511013650999835e-07,
|
| 90798 |
+
"loss": 0.0408,
|
| 90799 |
+
"step": 12970
|
| 90800 |
+
},
|
| 90801 |
+
{
|
| 90802 |
+
"epoch": 0.9700482369218113,
|
| 90803 |
+
"grad_norm": 0.8974568247795105,
|
| 90804 |
+
"learning_rate": 2.2398963060127342e-07,
|
| 90805 |
+
"loss": 0.0687,
|
| 90806 |
+
"step": 12971
|
| 90807 |
+
},
|
| 90808 |
+
{
|
| 90809 |
+
"epoch": 0.9701230228471002,
|
| 90810 |
+
"grad_norm": 0.4329313039779663,
|
| 90811 |
+
"learning_rate": 2.2287191410367836e-07,
|
| 90812 |
+
"loss": 0.0332,
|
| 90813 |
+
"step": 12972
|
| 90814 |
+
},
|
| 90815 |
+
{
|
| 90816 |
+
"epoch": 0.970197808772389,
|
| 90817 |
+
"grad_norm": 0.751781165599823,
|
| 90818 |
+
"learning_rate": 2.2175698707984638e-07,
|
| 90819 |
+
"loss": 0.0691,
|
| 90820 |
+
"step": 12973
|
| 90821 |
+
},
|
| 90822 |
+
{
|
| 90823 |
+
"epoch": 0.9702725946976779,
|
| 90824 |
+
"grad_norm": 0.5423529148101807,
|
| 90825 |
+
"learning_rate": 2.2064484959226083e-07,
|
| 90826 |
+
"loss": 0.0449,
|
| 90827 |
+
"step": 12974
|
| 90828 |
+
},
|
| 90829 |
+
{
|
| 90830 |
+
"epoch": 0.9703473806229668,
|
| 90831 |
+
"grad_norm": 0.8365032076835632,
|
| 90832 |
+
"learning_rate": 2.195355017032441e-07,
|
| 90833 |
+
"loss": 0.0968,
|
| 90834 |
+
"step": 12975
|
| 90835 |
+
},
|
| 90836 |
+
{
|
| 90837 |
+
"epoch": 0.9704221665482556,
|
| 90838 |
+
"grad_norm": 0.9210798144340515,
|
| 90839 |
+
"learning_rate": 2.1842894347496312e-07,
|
| 90840 |
+
"loss": 0.1159,
|
| 90841 |
+
"step": 12976
|
| 90842 |
+
},
|
| 90843 |
+
{
|
| 90844 |
+
"epoch": 0.9704969524735445,
|
| 90845 |
+
"grad_norm": 0.7737799286842346,
|
| 90846 |
+
"learning_rate": 2.1732517496942383e-07,
|
| 90847 |
+
"loss": 0.0661,
|
| 90848 |
+
"step": 12977
|
| 90849 |
+
},
|
| 90850 |
+
{
|
| 90851 |
+
"epoch": 0.9705717383988334,
|
| 90852 |
+
"grad_norm": 0.7045473456382751,
|
| 90853 |
+
"learning_rate": 2.1622419624848788e-07,
|
| 90854 |
+
"loss": 0.0383,
|
| 90855 |
+
"step": 12978
|
| 90856 |
+
},
|
| 90857 |
+
{
|
| 90858 |
+
"epoch": 0.9706465243241222,
|
| 90859 |
+
"grad_norm": 1.0195949077606201,
|
| 90860 |
+
"learning_rate": 2.1512600737385037e-07,
|
| 90861 |
+
"loss": 0.0988,
|
| 90862 |
+
"step": 12979
|
| 90863 |
+
},
|
| 90864 |
+
{
|
| 90865 |
+
"epoch": 0.9707213102494111,
|
| 90866 |
+
"grad_norm": 0.8737218379974365,
|
| 90867 |
+
"learning_rate": 2.1403060840704536e-07,
|
| 90868 |
+
"loss": 0.0793,
|
| 90869 |
+
"step": 12980
|
| 90870 |
+
},
|
| 90871 |
+
{
|
| 90872 |
+
"epoch": 0.9707960961747,
|
| 90873 |
+
"grad_norm": 0.531203031539917,
|
| 90874 |
+
"learning_rate": 2.129379994094738e-07,
|
| 90875 |
+
"loss": 0.0303,
|
| 90876 |
+
"step": 12981
|
| 90877 |
+
},
|
| 90878 |
+
{
|
| 90879 |
+
"epoch": 0.9708708820999887,
|
| 90880 |
+
"grad_norm": 1.0328282117843628,
|
| 90881 |
+
"learning_rate": 2.1184818044235332e-07,
|
| 90882 |
+
"loss": 0.0792,
|
| 90883 |
+
"step": 12982
|
| 90884 |
+
},
|
| 90885 |
+
{
|
| 90886 |
+
"epoch": 0.9709456680252776,
|
| 90887 |
+
"grad_norm": 0.734826922416687,
|
| 90888 |
+
"learning_rate": 2.1076115156675735e-07,
|
| 90889 |
+
"loss": 0.0536,
|
| 90890 |
+
"step": 12983
|
| 90891 |
+
},
|
| 90892 |
+
{
|
| 90893 |
+
"epoch": 0.9710204539505665,
|
| 90894 |
+
"grad_norm": 0.5005146861076355,
|
| 90895 |
+
"learning_rate": 2.0967691284360935e-07,
|
| 90896 |
+
"loss": 0.0325,
|
| 90897 |
+
"step": 12984
|
| 90898 |
+
},
|
| 90899 |
+
{
|
| 90900 |
+
"epoch": 0.9710952398758553,
|
| 90901 |
+
"grad_norm": 1.020372748374939,
|
| 90902 |
+
"learning_rate": 2.0859546433366072e-07,
|
| 90903 |
+
"loss": 0.1083,
|
| 90904 |
+
"step": 12985
|
| 90905 |
+
},
|
| 90906 |
+
{
|
| 90907 |
+
"epoch": 0.9711700258011442,
|
| 90908 |
+
"grad_norm": 0.5662137866020203,
|
| 90909 |
+
"learning_rate": 2.0751680609751856e-07,
|
| 90910 |
+
"loss": 0.0426,
|
| 90911 |
+
"step": 12986
|
| 90912 |
+
},
|
| 90913 |
+
{
|
| 90914 |
+
"epoch": 0.9712448117264331,
|
| 90915 |
+
"grad_norm": 0.6404293179512024,
|
| 90916 |
+
"learning_rate": 2.064409381956345e-07,
|
| 90917 |
+
"loss": 0.0384,
|
| 90918 |
+
"step": 12987
|
| 90919 |
+
},
|
| 90920 |
+
{
|
| 90921 |
+
"epoch": 0.9713195976517219,
|
| 90922 |
+
"grad_norm": 0.6235434412956238,
|
| 90923 |
+
"learning_rate": 2.0536786068828805e-07,
|
| 90924 |
+
"loss": 0.0293,
|
| 90925 |
+
"step": 12988
|
| 90926 |
+
},
|
| 90927 |
+
{
|
| 90928 |
+
"epoch": 0.9713943835770108,
|
| 90929 |
+
"grad_norm": 1.5746077299118042,
|
| 90930 |
+
"learning_rate": 2.0429757363562563e-07,
|
| 90931 |
+
"loss": 0.1573,
|
| 90932 |
+
"step": 12989
|
| 90933 |
+
},
|
| 90934 |
+
{
|
| 90935 |
+
"epoch": 0.9714691695022997,
|
| 90936 |
+
"grad_norm": 0.35104164481163025,
|
| 90937 |
+
"learning_rate": 2.0323007709762144e-07,
|
| 90938 |
+
"loss": 0.0212,
|
| 90939 |
+
"step": 12990
|
| 90940 |
+
},
|
| 90941 |
+
{
|
| 90942 |
+
"epoch": 0.9715439554275885,
|
| 90943 |
+
"grad_norm": 0.8248312473297119,
|
| 90944 |
+
"learning_rate": 2.021653711340943e-07,
|
| 90945 |
+
"loss": 0.1061,
|
| 90946 |
+
"step": 12991
|
| 90947 |
+
},
|
| 90948 |
+
{
|
| 90949 |
+
"epoch": 0.9716187413528774,
|
| 90950 |
+
"grad_norm": 1.168135404586792,
|
| 90951 |
+
"learning_rate": 2.0110345580470756e-07,
|
| 90952 |
+
"loss": 0.1196,
|
| 90953 |
+
"step": 12992
|
| 90954 |
+
},
|
| 90955 |
+
{
|
| 90956 |
+
"epoch": 0.9716935272781663,
|
| 90957 |
+
"grad_norm": 0.6298313736915588,
|
| 90958 |
+
"learning_rate": 2.0004433116898037e-07,
|
| 90959 |
+
"loss": 0.039,
|
| 90960 |
+
"step": 12993
|
| 90961 |
+
},
|
| 90962 |
+
{
|
| 90963 |
+
"epoch": 0.9717683132034551,
|
| 90964 |
+
"grad_norm": 2.1503021717071533,
|
| 90965 |
+
"learning_rate": 1.9898799728625407e-07,
|
| 90966 |
+
"loss": 0.3274,
|
| 90967 |
+
"step": 12994
|
| 90968 |
+
},
|
| 90969 |
+
{
|
| 90970 |
+
"epoch": 0.971843099128744,
|
| 90971 |
+
"grad_norm": 2.1687769889831543,
|
| 90972 |
+
"learning_rate": 1.979344542157313e-07,
|
| 90973 |
+
"loss": 0.0948,
|
| 90974 |
+
"step": 12995
|
| 90975 |
+
},
|
| 90976 |
+
{
|
| 90977 |
+
"epoch": 0.9719178850540329,
|
| 90978 |
+
"grad_norm": 1.4140548706054688,
|
| 90979 |
+
"learning_rate": 1.9688370201644822e-07,
|
| 90980 |
+
"loss": 0.1153,
|
| 90981 |
+
"step": 12996
|
| 90982 |
+
},
|
| 90983 |
+
{
|
| 90984 |
+
"epoch": 0.9719926709793217,
|
| 90985 |
+
"grad_norm": 1.139455795288086,
|
| 90986 |
+
"learning_rate": 1.958357407472966e-07,
|
| 90987 |
+
"loss": 0.0983,
|
| 90988 |
+
"step": 12997
|
| 90989 |
+
},
|
| 90990 |
+
{
|
| 90991 |
+
"epoch": 0.9720674569046106,
|
| 90992 |
+
"grad_norm": 2.854573965072632,
|
| 90993 |
+
"learning_rate": 1.9479057046699056e-07,
|
| 90994 |
+
"loss": 0.6571,
|
| 90995 |
+
"step": 12998
|
| 90996 |
+
},
|
| 90997 |
+
{
|
| 90998 |
+
"epoch": 0.9721422428298994,
|
| 90999 |
+
"grad_norm": 1.1940664052963257,
|
| 91000 |
+
"learning_rate": 1.9374819123411104e-07,
|
| 91001 |
+
"loss": 0.0986,
|
| 91002 |
+
"step": 12999
|
| 91003 |
+
},
|
| 91004 |
+
{
|
| 91005 |
+
"epoch": 0.9722170287551882,
|
| 91006 |
+
"grad_norm": 1.5254565477371216,
|
| 91007 |
+
"learning_rate": 1.927086031070724e-07,
|
| 91008 |
+
"loss": 0.1172,
|
| 91009 |
+
"step": 13000
|
| 91010 |
+
},
|
| 91011 |
+
{
|
| 91012 |
+
"epoch": 0.9722918146804771,
|
| 91013 |
+
"grad_norm": 0.5440202355384827,
|
| 91014 |
+
"learning_rate": 1.9167180614411695e-07,
|
| 91015 |
+
"loss": 0.0491,
|
| 91016 |
+
"step": 13001
|
| 91017 |
+
},
|
| 91018 |
+
{
|
| 91019 |
+
"epoch": 0.972366600605766,
|
| 91020 |
+
"grad_norm": 0.7353929281234741,
|
| 91021 |
+
"learning_rate": 1.9063780040336488e-07,
|
| 91022 |
+
"loss": 0.0792,
|
| 91023 |
+
"step": 13002
|
| 91024 |
+
},
|
| 91025 |
+
{
|
| 91026 |
+
"epoch": 0.9724413865310548,
|
| 91027 |
+
"grad_norm": 0.67140793800354,
|
| 91028 |
+
"learning_rate": 1.8960658594275316e-07,
|
| 91029 |
+
"loss": 0.0683,
|
| 91030 |
+
"step": 13003
|
| 91031 |
+
},
|
| 91032 |
+
{
|
| 91033 |
+
"epoch": 0.9725161724563437,
|
| 91034 |
+
"grad_norm": 0.8194896578788757,
|
| 91035 |
+
"learning_rate": 1.885781628200689e-07,
|
| 91036 |
+
"loss": 0.0734,
|
| 91037 |
+
"step": 13004
|
| 91038 |
+
},
|
| 91039 |
+
{
|
| 91040 |
+
"epoch": 0.9725909583816326,
|
| 91041 |
+
"grad_norm": 0.714746356010437,
|
| 91042 |
+
"learning_rate": 1.8755253109294935e-07,
|
| 91043 |
+
"loss": 0.0611,
|
| 91044 |
+
"step": 13005
|
| 91045 |
+
},
|
| 91046 |
+
{
|
| 91047 |
+
"epoch": 0.9726657443069214,
|
| 91048 |
+
"grad_norm": 0.7146202921867371,
|
| 91049 |
+
"learning_rate": 1.865296908188652e-07,
|
| 91050 |
+
"loss": 0.0714,
|
| 91051 |
+
"step": 13006
|
| 91052 |
+
},
|
| 91053 |
+
{
|
| 91054 |
+
"epoch": 0.9727405302322103,
|
| 91055 |
+
"grad_norm": 0.4795819818973541,
|
| 91056 |
+
"learning_rate": 1.855096420551372e-07,
|
| 91057 |
+
"loss": 0.0331,
|
| 91058 |
+
"step": 13007
|
| 91059 |
+
},
|
| 91060 |
+
{
|
| 91061 |
+
"epoch": 0.9728153161574992,
|
| 91062 |
+
"grad_norm": 0.5267304182052612,
|
| 91063 |
+
"learning_rate": 1.844923848589253e-07,
|
| 91064 |
+
"loss": 0.0694,
|
| 91065 |
+
"step": 13008
|
| 91066 |
+
},
|
| 91067 |
+
{
|
| 91068 |
+
"epoch": 0.972890102082788,
|
| 91069 |
+
"grad_norm": 0.9844954013824463,
|
| 91070 |
+
"learning_rate": 1.8347791928724488e-07,
|
| 91071 |
+
"loss": 0.0921,
|
| 91072 |
+
"step": 13009
|
| 91073 |
+
},
|
| 91074 |
+
{
|
| 91075 |
+
"epoch": 0.9729648880080769,
|
| 91076 |
+
"grad_norm": 0.6365022659301758,
|
| 91077 |
+
"learning_rate": 1.824662453969339e-07,
|
| 91078 |
+
"loss": 0.068,
|
| 91079 |
+
"step": 13010
|
| 91080 |
+
},
|
| 91081 |
+
{
|
| 91082 |
+
"epoch": 0.9730396739333658,
|
| 91083 |
+
"grad_norm": 0.43110954761505127,
|
| 91084 |
+
"learning_rate": 1.8145736324469697e-07,
|
| 91085 |
+
"loss": 0.026,
|
| 91086 |
+
"step": 13011
|
| 91087 |
+
},
|
| 91088 |
+
{
|
| 91089 |
+
"epoch": 0.9731144598586546,
|
| 91090 |
+
"grad_norm": 0.550247073173523,
|
| 91091 |
+
"learning_rate": 1.804512728870611e-07,
|
| 91092 |
+
"loss": 0.0515,
|
| 91093 |
+
"step": 13012
|
| 91094 |
+
},
|
| 91095 |
+
{
|
| 91096 |
+
"epoch": 0.9731892457839435,
|
| 91097 |
+
"grad_norm": 0.4430747330188751,
|
| 91098 |
+
"learning_rate": 1.794479743804145e-07,
|
| 91099 |
+
"loss": 0.0328,
|
| 91100 |
+
"step": 13013
|
| 91101 |
+
},
|
| 91102 |
+
{
|
| 91103 |
+
"epoch": 0.9732640317092324,
|
| 91104 |
+
"grad_norm": 0.5010110139846802,
|
| 91105 |
+
"learning_rate": 1.7844746778097887e-07,
|
| 91106 |
+
"loss": 0.0228,
|
| 91107 |
+
"step": 13014
|
| 91108 |
+
},
|
| 91109 |
+
{
|
| 91110 |
+
"epoch": 0.9733388176345212,
|
| 91111 |
+
"grad_norm": 0.831089973449707,
|
| 91112 |
+
"learning_rate": 1.7744975314482047e-07,
|
| 91113 |
+
"loss": 0.0817,
|
| 91114 |
+
"step": 13015
|
| 91115 |
+
},
|
| 91116 |
+
{
|
| 91117 |
+
"epoch": 0.97341360355981,
|
| 91118 |
+
"grad_norm": 0.4925210773944855,
|
| 91119 |
+
"learning_rate": 1.7645483052785017e-07,
|
| 91120 |
+
"loss": 0.0318,
|
| 91121 |
+
"step": 13016
|
| 91122 |
+
},
|
| 91123 |
+
{
|
| 91124 |
+
"epoch": 0.9734883894850989,
|
| 91125 |
+
"grad_norm": 0.5358295440673828,
|
| 91126 |
+
"learning_rate": 1.7546269998582333e-07,
|
| 91127 |
+
"loss": 0.0459,
|
| 91128 |
+
"step": 13017
|
| 91129 |
+
},
|
| 91130 |
+
{
|
| 91131 |
+
"epoch": 0.9735631754103877,
|
| 91132 |
+
"grad_norm": 0.6370837688446045,
|
| 91133 |
+
"learning_rate": 1.7447336157434545e-07,
|
| 91134 |
+
"loss": 0.0613,
|
| 91135 |
+
"step": 13018
|
| 91136 |
+
},
|
| 91137 |
+
{
|
| 91138 |
+
"epoch": 0.9736379613356766,
|
| 91139 |
+
"grad_norm": 0.5662913918495178,
|
| 91140 |
+
"learning_rate": 1.7348681534884447e-07,
|
| 91141 |
+
"loss": 0.0432,
|
| 91142 |
+
"step": 13019
|
| 91143 |
+
},
|
| 91144 |
+
{
|
| 91145 |
+
"epoch": 0.9737127472609655,
|
| 91146 |
+
"grad_norm": 0.6075406074523926,
|
| 91147 |
+
"learning_rate": 1.7250306136461502e-07,
|
| 91148 |
+
"loss": 0.0367,
|
| 91149 |
+
"step": 13020
|
| 91150 |
+
},
|
| 91151 |
+
{
|
| 91152 |
+
"epoch": 0.9737875331862543,
|
| 91153 |
+
"grad_norm": 0.4553796947002411,
|
| 91154 |
+
"learning_rate": 1.7152209967678523e-07,
|
| 91155 |
+
"loss": 0.0303,
|
| 91156 |
+
"step": 13021
|
| 91157 |
+
},
|
| 91158 |
+
{
|
| 91159 |
+
"epoch": 0.9738623191115432,
|
| 91160 |
+
"grad_norm": 1.0038201808929443,
|
| 91161 |
+
"learning_rate": 1.7054393034032779e-07,
|
| 91162 |
+
"loss": 0.0675,
|
| 91163 |
+
"step": 13022
|
| 91164 |
+
},
|
| 91165 |
+
{
|
| 91166 |
+
"epoch": 0.9739371050368321,
|
| 91167 |
+
"grad_norm": 0.8288964629173279,
|
| 91168 |
+
"learning_rate": 1.695685534100544e-07,
|
| 91169 |
+
"loss": 0.0465,
|
| 91170 |
+
"step": 13023
|
| 91171 |
+
},
|
| 91172 |
+
{
|
| 91173 |
+
"epoch": 0.9740118909621209,
|
| 91174 |
+
"grad_norm": 0.7483683824539185,
|
| 91175 |
+
"learning_rate": 1.685959689406269e-07,
|
| 91176 |
+
"loss": 0.0743,
|
| 91177 |
+
"step": 13024
|
| 91178 |
+
},
|
| 91179 |
+
{
|
| 91180 |
+
"epoch": 0.9740866768874098,
|
| 91181 |
+
"grad_norm": 0.5443363189697266,
|
| 91182 |
+
"learning_rate": 1.6762617698654615e-07,
|
| 91183 |
+
"loss": 0.0478,
|
| 91184 |
+
"step": 13025
|
| 91185 |
+
},
|
| 91186 |
+
{
|
| 91187 |
+
"epoch": 0.9741614628126987,
|
| 91188 |
+
"grad_norm": 0.699834406375885,
|
| 91189 |
+
"learning_rate": 1.666591776021631e-07,
|
| 91190 |
+
"loss": 0.0734,
|
| 91191 |
+
"step": 13026
|
| 91192 |
+
},
|
| 91193 |
+
{
|
| 91194 |
+
"epoch": 0.9742362487379875,
|
| 91195 |
+
"grad_norm": 0.8187717795372009,
|
| 91196 |
+
"learning_rate": 1.6569497084166773e-07,
|
| 91197 |
+
"loss": 0.0655,
|
| 91198 |
+
"step": 13027
|
| 91199 |
+
},
|
| 91200 |
+
{
|
| 91201 |
+
"epoch": 0.9743110346632764,
|
| 91202 |
+
"grad_norm": 0.4898703992366791,
|
| 91203 |
+
"learning_rate": 1.647335567590891e-07,
|
| 91204 |
+
"loss": 0.0679,
|
| 91205 |
+
"step": 13028
|
| 91206 |
+
},
|
| 91207 |
+
{
|
| 91208 |
+
"epoch": 0.9743858205885653,
|
| 91209 |
+
"grad_norm": 0.5239977836608887,
|
| 91210 |
+
"learning_rate": 1.6377493540830625e-07,
|
| 91211 |
+
"loss": 0.0406,
|
| 91212 |
+
"step": 13029
|
| 91213 |
+
},
|
| 91214 |
+
{
|
| 91215 |
+
"epoch": 0.9744606065138541,
|
| 91216 |
+
"grad_norm": 0.7604244351387024,
|
| 91217 |
+
"learning_rate": 1.6281910684303737e-07,
|
| 91218 |
+
"loss": 0.0753,
|
| 91219 |
+
"step": 13030
|
| 91220 |
+
},
|
| 91221 |
+
{
|
| 91222 |
+
"epoch": 0.974535392439143,
|
| 91223 |
+
"grad_norm": 0.842095136642456,
|
| 91224 |
+
"learning_rate": 1.618660711168507e-07,
|
| 91225 |
+
"loss": 0.0494,
|
| 91226 |
+
"step": 13031
|
| 91227 |
+
},
|
| 91228 |
+
{
|
| 91229 |
+
"epoch": 0.9746101783644319,
|
| 91230 |
+
"grad_norm": 0.8381233215332031,
|
| 91231 |
+
"learning_rate": 1.6091582828315355e-07,
|
| 91232 |
+
"loss": 0.0549,
|
| 91233 |
+
"step": 13032
|
| 91234 |
+
},
|
| 91235 |
+
{
|
| 91236 |
+
"epoch": 0.9746849642897206,
|
| 91237 |
+
"grad_norm": 0.5621647238731384,
|
| 91238 |
+
"learning_rate": 1.5996837839519218e-07,
|
| 91239 |
+
"loss": 0.0483,
|
| 91240 |
+
"step": 13033
|
| 91241 |
+
},
|
| 91242 |
+
{
|
| 91243 |
+
"epoch": 0.9747597502150095,
|
| 91244 |
+
"grad_norm": 0.987895667552948,
|
| 91245 |
+
"learning_rate": 1.5902372150606304e-07,
|
| 91246 |
+
"loss": 0.0699,
|
| 91247 |
+
"step": 13034
|
| 91248 |
+
},
|
| 91249 |
+
{
|
| 91250 |
+
"epoch": 0.9748345361402984,
|
| 91251 |
+
"grad_norm": 0.7235648036003113,
|
| 91252 |
+
"learning_rate": 1.5808185766870153e-07,
|
| 91253 |
+
"loss": 0.0347,
|
| 91254 |
+
"step": 13035
|
| 91255 |
+
},
|
| 91256 |
+
{
|
| 91257 |
+
"epoch": 0.9749093220655872,
|
| 91258 |
+
"grad_norm": 0.6615068316459656,
|
| 91259 |
+
"learning_rate": 1.5714278693589878e-07,
|
| 91260 |
+
"loss": 0.0481,
|
| 91261 |
+
"step": 13036
|
| 91262 |
+
},
|
| 91263 |
+
{
|
| 91264 |
+
"epoch": 0.9749841079908761,
|
| 91265 |
+
"grad_norm": 0.6945041418075562,
|
| 91266 |
+
"learning_rate": 1.562065093602738e-07,
|
| 91267 |
+
"loss": 0.0366,
|
| 91268 |
+
"step": 13037
|
| 91269 |
+
},
|
| 91270 |
+
{
|
| 91271 |
+
"epoch": 0.975058893916165,
|
| 91272 |
+
"grad_norm": 0.8700776100158691,
|
| 91273 |
+
"learning_rate": 1.5527302499428465e-07,
|
| 91274 |
+
"loss": 0.0893,
|
| 91275 |
+
"step": 13038
|
| 91276 |
+
},
|
| 91277 |
+
{
|
| 91278 |
+
"epoch": 0.9751336798414538,
|
| 91279 |
+
"grad_norm": 0.7300289273262024,
|
| 91280 |
+
"learning_rate": 1.5434233389026165e-07,
|
| 91281 |
+
"loss": 0.0467,
|
| 91282 |
+
"step": 13039
|
| 91283 |
+
},
|
| 91284 |
+
{
|
| 91285 |
+
"epoch": 0.9752084657667427,
|
| 91286 |
+
"grad_norm": 0.6579194664955139,
|
| 91287 |
+
"learning_rate": 1.5341443610034645e-07,
|
| 91288 |
+
"loss": 0.0633,
|
| 91289 |
+
"step": 13040
|
| 91290 |
+
},
|
| 91291 |
+
{
|
| 91292 |
+
"epoch": 0.9752832516920316,
|
| 91293 |
+
"grad_norm": 1.026484727859497,
|
| 91294 |
+
"learning_rate": 1.5248933167654188e-07,
|
| 91295 |
+
"loss": 0.0938,
|
| 91296 |
+
"step": 13041
|
| 91297 |
+
},
|
| 91298 |
+
{
|
| 91299 |
+
"epoch": 0.9753580376173204,
|
| 91300 |
+
"grad_norm": 0.8569180369377136,
|
| 91301 |
+
"learning_rate": 1.515670206706954e-07,
|
| 91302 |
+
"loss": 0.0779,
|
| 91303 |
+
"step": 13042
|
| 91304 |
+
},
|
| 91305 |
+
{
|
| 91306 |
+
"epoch": 0.9754328235426093,
|
| 91307 |
+
"grad_norm": 1.164454698562622,
|
| 91308 |
+
"learning_rate": 1.506475031344823e-07,
|
| 91309 |
+
"loss": 0.1158,
|
| 91310 |
+
"step": 13043
|
| 91311 |
+
},
|
| 91312 |
+
{
|
| 91313 |
+
"epoch": 0.9755076094678982,
|
| 91314 |
+
"grad_norm": 0.6989597082138062,
|
| 91315 |
+
"learning_rate": 1.497307791194391e-07,
|
| 91316 |
+
"loss": 0.057,
|
| 91317 |
+
"step": 13044
|
| 91318 |
+
},
|
| 91319 |
+
{
|
| 91320 |
+
"epoch": 0.975582395393187,
|
| 91321 |
+
"grad_norm": 0.9949011206626892,
|
| 91322 |
+
"learning_rate": 1.488168486769359e-07,
|
| 91323 |
+
"loss": 0.0714,
|
| 91324 |
+
"step": 13045
|
| 91325 |
+
},
|
| 91326 |
+
{
|
| 91327 |
+
"epoch": 0.9756571813184759,
|
| 91328 |
+
"grad_norm": 1.5810455083847046,
|
| 91329 |
+
"learning_rate": 1.4790571185818724e-07,
|
| 91330 |
+
"loss": 0.1696,
|
| 91331 |
+
"step": 13046
|
| 91332 |
+
},
|
| 91333 |
+
{
|
| 91334 |
+
"epoch": 0.9757319672437648,
|
| 91335 |
+
"grad_norm": 1.3241075277328491,
|
| 91336 |
+
"learning_rate": 1.469973687142523e-07,
|
| 91337 |
+
"loss": 0.1038,
|
| 91338 |
+
"step": 13047
|
| 91339 |
+
},
|
| 91340 |
+
{
|
| 91341 |
+
"epoch": 0.9758067531690536,
|
| 91342 |
+
"grad_norm": 0.8140578269958496,
|
| 91343 |
+
"learning_rate": 1.4609181929604033e-07,
|
| 91344 |
+
"loss": 0.0522,
|
| 91345 |
+
"step": 13048
|
| 91346 |
+
},
|
| 91347 |
+
{
|
| 91348 |
+
"epoch": 0.9758815390943425,
|
| 91349 |
+
"grad_norm": 1.0757066011428833,
|
| 91350 |
+
"learning_rate": 1.4518906365429408e-07,
|
| 91351 |
+
"loss": 0.0717,
|
| 91352 |
+
"step": 13049
|
| 91353 |
+
},
|
| 91354 |
+
{
|
| 91355 |
+
"epoch": 0.9759563250196313,
|
| 91356 |
+
"grad_norm": 1.9058736562728882,
|
| 91357 |
+
"learning_rate": 1.4428910183960087e-07,
|
| 91358 |
+
"loss": 0.1612,
|
| 91359 |
+
"step": 13050
|
| 91360 |
+
},
|
| 91361 |
+
{
|
| 91362 |
+
"epoch": 0.9760311109449201,
|
| 91363 |
+
"grad_norm": 0.6265395879745483,
|
| 91364 |
+
"learning_rate": 1.4339193390239813e-07,
|
| 91365 |
+
"loss": 0.0873,
|
| 91366 |
+
"step": 13051
|
| 91367 |
+
},
|
| 91368 |
+
{
|
| 91369 |
+
"epoch": 0.976105896870209,
|
| 91370 |
+
"grad_norm": 1.08207106590271,
|
| 91371 |
+
"learning_rate": 1.4249755989295677e-07,
|
| 91372 |
+
"loss": 0.0839,
|
| 91373 |
+
"step": 13052
|
| 91374 |
+
},
|
| 91375 |
+
{
|
| 91376 |
+
"epoch": 0.9761806827954979,
|
| 91377 |
+
"grad_norm": 0.6696078777313232,
|
| 91378 |
+
"learning_rate": 1.4160597986140335e-07,
|
| 91379 |
+
"loss": 0.0655,
|
| 91380 |
+
"step": 13053
|
| 91381 |
+
},
|
| 91382 |
+
{
|
| 91383 |
+
"epoch": 0.9762554687207867,
|
| 91384 |
+
"grad_norm": 0.5708719491958618,
|
| 91385 |
+
"learning_rate": 1.4071719385769234e-07,
|
| 91386 |
+
"loss": 0.075,
|
| 91387 |
+
"step": 13054
|
| 91388 |
+
},
|
| 91389 |
+
{
|
| 91390 |
+
"epoch": 0.9763302546460756,
|
| 91391 |
+
"grad_norm": 0.39571017026901245,
|
| 91392 |
+
"learning_rate": 1.3983120193163946e-07,
|
| 91393 |
+
"loss": 0.0286,
|
| 91394 |
+
"step": 13055
|
| 91395 |
+
},
|
| 91396 |
+
{
|
| 91397 |
+
"epoch": 0.9764050405713645,
|
| 91398 |
+
"grad_norm": 1.0295779705047607,
|
| 91399 |
+
"learning_rate": 1.3894800413289388e-07,
|
| 91400 |
+
"loss": 0.1031,
|
| 91401 |
+
"step": 13056
|
| 91402 |
+
},
|
| 91403 |
+
{
|
| 91404 |
+
"epoch": 0.9764798264966533,
|
| 91405 |
+
"grad_norm": 0.42557287216186523,
|
| 91406 |
+
"learning_rate": 1.3806760051094935e-07,
|
| 91407 |
+
"loss": 0.0271,
|
| 91408 |
+
"step": 13057
|
| 91409 |
+
},
|
| 91410 |
+
{
|
| 91411 |
+
"epoch": 0.9765546124219422,
|
| 91412 |
+
"grad_norm": 0.8260351419448853,
|
| 91413 |
+
"learning_rate": 1.3718999111513308e-07,
|
| 91414 |
+
"loss": 0.0546,
|
| 91415 |
+
"step": 13058
|
| 91416 |
+
},
|
| 91417 |
+
{
|
| 91418 |
+
"epoch": 0.9766293983472311,
|
| 91419 |
+
"grad_norm": 0.7049921154975891,
|
| 91420 |
+
"learning_rate": 1.3631517599463907e-07,
|
| 91421 |
+
"loss": 0.0798,
|
| 91422 |
+
"step": 13059
|
| 91423 |
+
},
|
| 91424 |
+
{
|
| 91425 |
+
"epoch": 0.9767041842725199,
|
| 91426 |
+
"grad_norm": 0.5924288034439087,
|
| 91427 |
+
"learning_rate": 1.3544315519848916e-07,
|
| 91428 |
+
"loss": 0.0518,
|
| 91429 |
+
"step": 13060
|
| 91430 |
+
},
|
| 91431 |
+
{
|
| 91432 |
+
"epoch": 0.9767789701978088,
|
| 91433 |
+
"grad_norm": 1.14959716796875,
|
| 91434 |
+
"learning_rate": 1.3457392877553876e-07,
|
| 91435 |
+
"loss": 0.0876,
|
| 91436 |
+
"step": 13061
|
| 91437 |
+
},
|
| 91438 |
+
{
|
| 91439 |
+
"epoch": 0.9768537561230977,
|
| 91440 |
+
"grad_norm": 0.4802398085594177,
|
| 91441 |
+
"learning_rate": 1.3370749677451554e-07,
|
| 91442 |
+
"loss": 0.0389,
|
| 91443 |
+
"step": 13062
|
| 91444 |
+
},
|
| 91445 |
+
{
|
| 91446 |
+
"epoch": 0.9769285420483865,
|
| 91447 |
+
"grad_norm": 0.6689320802688599,
|
| 91448 |
+
"learning_rate": 1.3284385924396403e-07,
|
| 91449 |
+
"loss": 0.0361,
|
| 91450 |
+
"step": 13063
|
| 91451 |
+
},
|
| 91452 |
+
{
|
| 91453 |
+
"epoch": 0.9770033279736754,
|
| 91454 |
+
"grad_norm": 0.6408694386482239,
|
| 91455 |
+
"learning_rate": 1.3198301623227883e-07,
|
| 91456 |
+
"loss": 0.0544,
|
| 91457 |
+
"step": 13064
|
| 91458 |
+
},
|
| 91459 |
+
{
|
| 91460 |
+
"epoch": 0.9770781138989643,
|
| 91461 |
+
"grad_norm": 0.6966913342475891,
|
| 91462 |
+
"learning_rate": 1.3112496778771022e-07,
|
| 91463 |
+
"loss": 0.0607,
|
| 91464 |
+
"step": 13065
|
| 91465 |
+
},
|
| 91466 |
+
{
|
| 91467 |
+
"epoch": 0.9771528998242531,
|
| 91468 |
+
"grad_norm": 0.8057048916816711,
|
| 91469 |
+
"learning_rate": 1.3026971395833642e-07,
|
| 91470 |
+
"loss": 0.0675,
|
| 91471 |
+
"step": 13066
|
| 91472 |
+
},
|
| 91473 |
+
{
|
| 91474 |
+
"epoch": 0.9772276857495419,
|
| 91475 |
+
"grad_norm": 0.645700216293335,
|
| 91476 |
+
"learning_rate": 1.2941725479208578e-07,
|
| 91477 |
+
"loss": 0.0397,
|
| 91478 |
+
"step": 13067
|
| 91479 |
+
},
|
| 91480 |
+
{
|
| 91481 |
+
"epoch": 0.9773024716748308,
|
| 91482 |
+
"grad_norm": 0.812199056148529,
|
| 91483 |
+
"learning_rate": 1.2856759033673116e-07,
|
| 91484 |
+
"loss": 0.0548,
|
| 91485 |
+
"step": 13068
|
| 91486 |
+
},
|
| 91487 |
+
{
|
| 91488 |
+
"epoch": 0.9773772576001196,
|
| 91489 |
+
"grad_norm": 1.3543890714645386,
|
| 91490 |
+
"learning_rate": 1.2772072063989003e-07,
|
| 91491 |
+
"loss": 0.0872,
|
| 91492 |
+
"step": 13069
|
| 91493 |
+
},
|
| 91494 |
+
{
|
| 91495 |
+
"epoch": 0.9774520435254085,
|
| 91496 |
+
"grad_norm": 0.48335230350494385,
|
| 91497 |
+
"learning_rate": 1.2687664574901336e-07,
|
| 91498 |
+
"loss": 0.0335,
|
| 91499 |
+
"step": 13070
|
| 91500 |
+
},
|
| 91501 |
+
{
|
| 91502 |
+
"epoch": 0.9775268294506974,
|
| 91503 |
+
"grad_norm": 0.7605381011962891,
|
| 91504 |
+
"learning_rate": 1.2603536571140773e-07,
|
| 91505 |
+
"loss": 0.0757,
|
| 91506 |
+
"step": 13071
|
| 91507 |
+
},
|
| 91508 |
+
{
|
| 91509 |
+
"epoch": 0.9776016153759862,
|
| 91510 |
+
"grad_norm": 0.8594883680343628,
|
| 91511 |
+
"learning_rate": 1.2519688057421318e-07,
|
| 91512 |
+
"loss": 0.0716,
|
| 91513 |
+
"step": 13072
|
| 91514 |
+
},
|
| 91515 |
+
{
|
| 91516 |
+
"epoch": 0.9776764013012751,
|
| 91517 |
+
"grad_norm": 0.8642313480377197,
|
| 91518 |
+
"learning_rate": 1.2436119038441996e-07,
|
| 91519 |
+
"loss": 0.0716,
|
| 91520 |
+
"step": 13073
|
| 91521 |
+
},
|
| 91522 |
+
{
|
| 91523 |
+
"epoch": 0.977751187226564,
|
| 91524 |
+
"grad_norm": 0.7008442282676697,
|
| 91525 |
+
"learning_rate": 1.2352829518886831e-07,
|
| 91526 |
+
"loss": 0.0606,
|
| 91527 |
+
"step": 13074
|
| 91528 |
+
},
|
| 91529 |
+
{
|
| 91530 |
+
"epoch": 0.9778259731518528,
|
| 91531 |
+
"grad_norm": 0.6041141748428345,
|
| 91532 |
+
"learning_rate": 1.2269819503421542e-07,
|
| 91533 |
+
"loss": 0.032,
|
| 91534 |
+
"step": 13075
|
| 91535 |
+
},
|
| 91536 |
+
{
|
| 91537 |
+
"epoch": 0.9779007590771417,
|
| 91538 |
+
"grad_norm": 0.73857182264328,
|
| 91539 |
+
"learning_rate": 1.2187088996699625e-07,
|
| 91540 |
+
"loss": 0.0572,
|
| 91541 |
+
"step": 13076
|
| 91542 |
+
},
|
| 91543 |
+
{
|
| 91544 |
+
"epoch": 0.9779755450024306,
|
| 91545 |
+
"grad_norm": 0.7224127650260925,
|
| 91546 |
+
"learning_rate": 1.2104638003355705e-07,
|
| 91547 |
+
"loss": 0.0564,
|
| 91548 |
+
"step": 13077
|
| 91549 |
+
},
|
| 91550 |
+
{
|
| 91551 |
+
"epoch": 0.9780503309277194,
|
| 91552 |
+
"grad_norm": 0.49023354053497314,
|
| 91553 |
+
"learning_rate": 1.2022466528011645e-07,
|
| 91554 |
+
"loss": 0.0395,
|
| 91555 |
+
"step": 13078
|
| 91556 |
+
},
|
| 91557 |
+
{
|
| 91558 |
+
"epoch": 0.9781251168530083,
|
| 91559 |
+
"grad_norm": 0.6345604658126831,
|
| 91560 |
+
"learning_rate": 1.194057457527209e-07,
|
| 91561 |
+
"loss": 0.0457,
|
| 91562 |
+
"step": 13079
|
| 91563 |
+
},
|
| 91564 |
+
{
|
| 91565 |
+
"epoch": 0.9781999027782972,
|
| 91566 |
+
"grad_norm": 0.7459271550178528,
|
| 91567 |
+
"learning_rate": 1.185896214972504e-07,
|
| 91568 |
+
"loss": 0.0605,
|
| 91569 |
+
"step": 13080
|
| 91570 |
+
},
|
| 91571 |
+
{
|
| 91572 |
+
"epoch": 0.978274688703586,
|
| 91573 |
+
"grad_norm": 0.9838876724243164,
|
| 91574 |
+
"learning_rate": 1.1777629255945167e-07,
|
| 91575 |
+
"loss": 0.0737,
|
| 91576 |
+
"step": 13081
|
| 91577 |
+
},
|
| 91578 |
+
{
|
| 91579 |
+
"epoch": 0.9783494746288749,
|
| 91580 |
+
"grad_norm": 0.6713008880615234,
|
| 91581 |
+
"learning_rate": 1.1696575898489936e-07,
|
| 91582 |
+
"loss": 0.0688,
|
| 91583 |
+
"step": 13082
|
| 91584 |
+
},
|
| 91585 |
+
{
|
| 91586 |
+
"epoch": 0.9784242605541638,
|
| 91587 |
+
"grad_norm": 0.8351396918296814,
|
| 91588 |
+
"learning_rate": 1.1615802081901272e-07,
|
| 91589 |
+
"loss": 0.0405,
|
| 91590 |
+
"step": 13083
|
| 91591 |
+
},
|
| 91592 |
+
{
|
| 91593 |
+
"epoch": 0.9784990464794525,
|
| 91594 |
+
"grad_norm": 0.7137233018875122,
|
| 91595 |
+
"learning_rate": 1.1535307810706108e-07,
|
| 91596 |
+
"loss": 0.0456,
|
| 91597 |
+
"step": 13084
|
| 91598 |
+
},
|
| 91599 |
+
{
|
| 91600 |
+
"epoch": 0.9785738324047414,
|
| 91601 |
+
"grad_norm": 0.57951420545578,
|
| 91602 |
+
"learning_rate": 1.1455093089415281e-07,
|
| 91603 |
+
"loss": 0.031,
|
| 91604 |
+
"step": 13085
|
| 91605 |
+
},
|
| 91606 |
+
{
|
| 91607 |
+
"epoch": 0.9786486183300303,
|
| 91608 |
+
"grad_norm": 0.938587486743927,
|
| 91609 |
+
"learning_rate": 1.1375157922523527e-07,
|
| 91610 |
+
"loss": 0.1027,
|
| 91611 |
+
"step": 13086
|
| 91612 |
+
},
|
| 91613 |
+
{
|
| 91614 |
+
"epoch": 0.9787234042553191,
|
| 91615 |
+
"grad_norm": 0.7072553038597107,
|
| 91616 |
+
"learning_rate": 1.1295502314510598e-07,
|
| 91617 |
+
"loss": 0.0458,
|
| 91618 |
+
"step": 13087
|
| 91619 |
+
},
|
| 91620 |
+
{
|
| 91621 |
+
"epoch": 0.978798190180608,
|
| 91622 |
+
"grad_norm": 0.5360969305038452,
|
| 91623 |
+
"learning_rate": 1.1216126269840699e-07,
|
| 91624 |
+
"loss": 0.0417,
|
| 91625 |
+
"step": 13088
|
| 91626 |
+
},
|
| 91627 |
+
{
|
| 91628 |
+
"epoch": 0.9788729761058969,
|
| 91629 |
+
"grad_norm": 0.8541834950447083,
|
| 91630 |
+
"learning_rate": 1.1137029792961384e-07,
|
| 91631 |
+
"loss": 0.0572,
|
| 91632 |
+
"step": 13089
|
| 91633 |
+
},
|
| 91634 |
+
{
|
| 91635 |
+
"epoch": 0.9789477620311857,
|
| 91636 |
+
"grad_norm": 0.8962938189506531,
|
| 91637 |
+
"learning_rate": 1.105821288830522e-07,
|
| 91638 |
+
"loss": 0.0871,
|
| 91639 |
+
"step": 13090
|
| 91640 |
+
},
|
| 91641 |
+
{
|
| 91642 |
+
"epoch": 0.9790225479564746,
|
| 91643 |
+
"grad_norm": 1.4392637014389038,
|
| 91644 |
+
"learning_rate": 1.0979675560289781e-07,
|
| 91645 |
+
"loss": 0.1042,
|
| 91646 |
+
"step": 13091
|
| 91647 |
+
},
|
| 91648 |
+
{
|
| 91649 |
+
"epoch": 0.9790973338817635,
|
| 91650 |
+
"grad_norm": 1.2927100658416748,
|
| 91651 |
+
"learning_rate": 1.090141781331544e-07,
|
| 91652 |
+
"loss": 0.1091,
|
| 91653 |
+
"step": 13092
|
| 91654 |
+
},
|
| 91655 |
+
{
|
| 91656 |
+
"epoch": 0.9791721198070523,
|
| 91657 |
+
"grad_norm": 1.6105395555496216,
|
| 91658 |
+
"learning_rate": 1.0823439651767575e-07,
|
| 91659 |
+
"loss": 0.1397,
|
| 91660 |
+
"step": 13093
|
| 91661 |
+
},
|
| 91662 |
+
{
|
| 91663 |
+
"epoch": 0.9792469057323412,
|
| 91664 |
+
"grad_norm": 1.2756192684173584,
|
| 91665 |
+
"learning_rate": 1.0745741080017135e-07,
|
| 91666 |
+
"loss": 0.0866,
|
| 91667 |
+
"step": 13094
|
| 91668 |
+
},
|
| 91669 |
+
{
|
| 91670 |
+
"epoch": 0.9793216916576301,
|
| 91671 |
+
"grad_norm": 1.3140145540237427,
|
| 91672 |
+
"learning_rate": 1.0668322102417305e-07,
|
| 91673 |
+
"loss": 0.1108,
|
| 91674 |
+
"step": 13095
|
| 91675 |
+
},
|
| 91676 |
+
{
|
| 91677 |
+
"epoch": 0.9793964775829189,
|
| 91678 |
+
"grad_norm": 1.489345908164978,
|
| 91679 |
+
"learning_rate": 1.0591182723306836e-07,
|
| 91680 |
+
"loss": 0.1239,
|
| 91681 |
+
"step": 13096
|
| 91682 |
+
},
|
| 91683 |
+
{
|
| 91684 |
+
"epoch": 0.9794712635082078,
|
| 91685 |
+
"grad_norm": 0.8814570903778076,
|
| 91686 |
+
"learning_rate": 1.0514322947008382e-07,
|
| 91687 |
+
"loss": 0.0919,
|
| 91688 |
+
"step": 13097
|
| 91689 |
+
},
|
| 91690 |
+
{
|
| 91691 |
+
"epoch": 0.9795460494334967,
|
| 91692 |
+
"grad_norm": 1.595694661140442,
|
| 91693 |
+
"learning_rate": 1.0437742777829606e-07,
|
| 91694 |
+
"loss": 0.1223,
|
| 91695 |
+
"step": 13098
|
| 91696 |
+
},
|
| 91697 |
+
{
|
| 91698 |
+
"epoch": 0.9796208353587855,
|
| 91699 |
+
"grad_norm": 1.1279810667037964,
|
| 91700 |
+
"learning_rate": 1.0361442220061524e-07,
|
| 91701 |
+
"loss": 0.0767,
|
| 91702 |
+
"step": 13099
|
| 91703 |
+
},
|
| 91704 |
+
{
|
| 91705 |
+
"epoch": 0.9796956212840744,
|
| 91706 |
+
"grad_norm": 2.32450270652771,
|
| 91707 |
+
"learning_rate": 1.0285421277980156e-07,
|
| 91708 |
+
"loss": 0.2051,
|
| 91709 |
+
"step": 13100
|
| 91710 |
+
},
|
| 91711 |
+
{
|
| 91712 |
+
"epoch": 0.9797704072093631,
|
| 91713 |
+
"grad_norm": 0.7000548243522644,
|
| 91714 |
+
"learning_rate": 1.0209679955845985e-07,
|
| 91715 |
+
"loss": 0.0823,
|
| 91716 |
+
"step": 13101
|
| 91717 |
+
},
|
| 91718 |
+
{
|
| 91719 |
+
"epoch": 0.979845193134652,
|
| 91720 |
+
"grad_norm": 0.5720806121826172,
|
| 91721 |
+
"learning_rate": 1.013421825790284e-07,
|
| 91722 |
+
"loss": 0.0701,
|
| 91723 |
+
"step": 13102
|
| 91724 |
+
},
|
| 91725 |
+
{
|
| 91726 |
+
"epoch": 0.9799199790599409,
|
| 91727 |
+
"grad_norm": 0.9151096343994141,
|
| 91728 |
+
"learning_rate": 1.0059036188380111e-07,
|
| 91729 |
+
"loss": 0.0914,
|
| 91730 |
+
"step": 13103
|
| 91731 |
+
},
|
| 91732 |
+
{
|
| 91733 |
+
"epoch": 0.9799947649852297,
|
| 91734 |
+
"grad_norm": 0.5501378774642944,
|
| 91735 |
+
"learning_rate": 9.984133751490543e-08,
|
| 91736 |
+
"loss": 0.05,
|
| 91737 |
+
"step": 13104
|
| 91738 |
+
},
|
| 91739 |
+
{
|
| 91740 |
+
"epoch": 0.9800695509105186,
|
| 91741 |
+
"grad_norm": 0.6816980838775635,
|
| 91742 |
+
"learning_rate": 9.909510951431888e-08,
|
| 91743 |
+
"loss": 0.041,
|
| 91744 |
+
"step": 13105
|
| 91745 |
+
},
|
| 91746 |
+
{
|
| 91747 |
+
"epoch": 0.9801443368358075,
|
| 91748 |
+
"grad_norm": 0.47302597761154175,
|
| 91749 |
+
"learning_rate": 9.835167792385803e-08,
|
| 91750 |
+
"loss": 0.0259,
|
| 91751 |
+
"step": 13106
|
| 91752 |
+
},
|
| 91753 |
+
{
|
| 91754 |
+
"epoch": 0.9802191227610964,
|
| 91755 |
+
"grad_norm": 0.5860419273376465,
|
| 91756 |
+
"learning_rate": 9.761104278518396e-08,
|
| 91757 |
+
"loss": 0.0405,
|
| 91758 |
+
"step": 13107
|
| 91759 |
+
},
|
| 91760 |
+
{
|
| 91761 |
+
"epoch": 0.9802939086863852,
|
| 91762 |
+
"grad_norm": 0.7843829989433289,
|
| 91763 |
+
"learning_rate": 9.687320413980239e-08,
|
| 91764 |
+
"loss": 0.0765,
|
| 91765 |
+
"step": 13108
|
| 91766 |
+
},
|
| 91767 |
+
{
|
| 91768 |
+
"epoch": 0.9803686946116741,
|
| 91769 |
+
"grad_norm": 0.48027125000953674,
|
| 91770 |
+
"learning_rate": 9.613816202906356e-08,
|
| 91771 |
+
"loss": 0.0365,
|
| 91772 |
+
"step": 13109
|
| 91773 |
+
},
|
| 91774 |
+
{
|
| 91775 |
+
"epoch": 0.980443480536963,
|
| 91776 |
+
"grad_norm": 1.1443341970443726,
|
| 91777 |
+
"learning_rate": 9.54059164941512e-08,
|
| 91778 |
+
"loss": 0.08,
|
| 91779 |
+
"step": 13110
|
| 91780 |
+
},
|
| 91781 |
+
{
|
| 91782 |
+
"epoch": 0.9805182664622518,
|
| 91783 |
+
"grad_norm": 0.6189633011817932,
|
| 91784 |
+
"learning_rate": 9.467646757611026e-08,
|
| 91785 |
+
"loss": 0.0579,
|
| 91786 |
+
"step": 13111
|
| 91787 |
+
},
|
| 91788 |
+
{
|
| 91789 |
+
"epoch": 0.9805930523875407,
|
| 91790 |
+
"grad_norm": 0.576843798160553,
|
| 91791 |
+
"learning_rate": 9.394981531580804e-08,
|
| 91792 |
+
"loss": 0.0628,
|
| 91793 |
+
"step": 13112
|
| 91794 |
+
},
|
| 91795 |
+
{
|
| 91796 |
+
"epoch": 0.9806678383128296,
|
| 91797 |
+
"grad_norm": 0.6445755362510681,
|
| 91798 |
+
"learning_rate": 9.322595975397308e-08,
|
| 91799 |
+
"loss": 0.1089,
|
| 91800 |
+
"step": 13113
|
| 91801 |
+
},
|
| 91802 |
+
{
|
| 91803 |
+
"epoch": 0.9807426242381184,
|
| 91804 |
+
"grad_norm": 0.6944069862365723,
|
| 91805 |
+
"learning_rate": 9.250490093116737e-08,
|
| 91806 |
+
"loss": 0.0606,
|
| 91807 |
+
"step": 13114
|
| 91808 |
+
},
|
| 91809 |
+
{
|
| 91810 |
+
"epoch": 0.9808174101634073,
|
| 91811 |
+
"grad_norm": 0.5240738987922668,
|
| 91812 |
+
"learning_rate": 9.178663888779748e-08,
|
| 91813 |
+
"loss": 0.0372,
|
| 91814 |
+
"step": 13115
|
| 91815 |
+
},
|
| 91816 |
+
{
|
| 91817 |
+
"epoch": 0.9808921960886962,
|
| 91818 |
+
"grad_norm": 0.5983749628067017,
|
| 91819 |
+
"learning_rate": 9.107117366411456e-08,
|
| 91820 |
+
"loss": 0.0414,
|
| 91821 |
+
"step": 13116
|
| 91822 |
+
},
|
| 91823 |
+
{
|
| 91824 |
+
"epoch": 0.980966982013985,
|
| 91825 |
+
"grad_norm": 0.6702715158462524,
|
| 91826 |
+
"learning_rate": 9.035850530021428e-08,
|
| 91827 |
+
"loss": 0.0489,
|
| 91828 |
+
"step": 13117
|
| 91829 |
+
},
|
| 91830 |
+
{
|
| 91831 |
+
"epoch": 0.9810417679392738,
|
| 91832 |
+
"grad_norm": 0.44082406163215637,
|
| 91833 |
+
"learning_rate": 8.96486338360314e-08,
|
| 91834 |
+
"loss": 0.0266,
|
| 91835 |
+
"step": 13118
|
| 91836 |
+
},
|
| 91837 |
+
{
|
| 91838 |
+
"epoch": 0.9811165538645626,
|
| 91839 |
+
"grad_norm": 0.904138445854187,
|
| 91840 |
+
"learning_rate": 8.894155931135073e-08,
|
| 91841 |
+
"loss": 0.0778,
|
| 91842 |
+
"step": 13119
|
| 91843 |
+
},
|
| 91844 |
+
{
|
| 91845 |
+
"epoch": 0.9811913397898515,
|
| 91846 |
+
"grad_norm": 0.7605939507484436,
|
| 91847 |
+
"learning_rate": 8.82372817657906e-08,
|
| 91848 |
+
"loss": 0.1153,
|
| 91849 |
+
"step": 13120
|
| 91850 |
+
},
|
| 91851 |
+
{
|
| 91852 |
+
"epoch": 0.9812661257151404,
|
| 91853 |
+
"grad_norm": 0.9398657083511353,
|
| 91854 |
+
"learning_rate": 8.753580123882499e-08,
|
| 91855 |
+
"loss": 0.0912,
|
| 91856 |
+
"step": 13121
|
| 91857 |
+
},
|
| 91858 |
+
{
|
| 91859 |
+
"epoch": 0.9813409116404292,
|
| 91860 |
+
"grad_norm": 0.44631102681159973,
|
| 91861 |
+
"learning_rate": 8.683711776976133e-08,
|
| 91862 |
+
"loss": 0.0276,
|
| 91863 |
+
"step": 13122
|
| 91864 |
+
},
|
| 91865 |
+
{
|
| 91866 |
+
"epoch": 0.9814156975657181,
|
| 91867 |
+
"grad_norm": 0.7926178574562073,
|
| 91868 |
+
"learning_rate": 8.61412313977461e-08,
|
| 91869 |
+
"loss": 0.0831,
|
| 91870 |
+
"step": 13123
|
| 91871 |
+
},
|
| 91872 |
+
{
|
| 91873 |
+
"epoch": 0.981490483491007,
|
| 91874 |
+
"grad_norm": 0.8973842859268188,
|
| 91875 |
+
"learning_rate": 8.544814216179253e-08,
|
| 91876 |
+
"loss": 0.0874,
|
| 91877 |
+
"step": 13124
|
| 91878 |
+
},
|
| 91879 |
+
{
|
| 91880 |
+
"epoch": 0.9815652694162958,
|
| 91881 |
+
"grad_norm": 0.8152311444282532,
|
| 91882 |
+
"learning_rate": 8.475785010072512e-08,
|
| 91883 |
+
"loss": 0.0522,
|
| 91884 |
+
"step": 13125
|
| 91885 |
+
},
|
| 91886 |
+
{
|
| 91887 |
+
"epoch": 0.9816400553415847,
|
| 91888 |
+
"grad_norm": 0.8099016547203064,
|
| 91889 |
+
"learning_rate": 8.40703552532296e-08,
|
| 91890 |
+
"loss": 0.0697,
|
| 91891 |
+
"step": 13126
|
| 91892 |
+
},
|
| 91893 |
+
{
|
| 91894 |
+
"epoch": 0.9817148412668736,
|
| 91895 |
+
"grad_norm": 1.021374225616455,
|
| 91896 |
+
"learning_rate": 8.338565765784179e-08,
|
| 91897 |
+
"loss": 0.1113,
|
| 91898 |
+
"step": 13127
|
| 91899 |
+
},
|
| 91900 |
+
{
|
| 91901 |
+
"epoch": 0.9817896271921625,
|
| 91902 |
+
"grad_norm": 1.1013946533203125,
|
| 91903 |
+
"learning_rate": 8.270375735292547e-08,
|
| 91904 |
+
"loss": 0.1259,
|
| 91905 |
+
"step": 13128
|
| 91906 |
+
},
|
| 91907 |
+
{
|
| 91908 |
+
"epoch": 0.9818644131174513,
|
| 91909 |
+
"grad_norm": 0.6119487881660461,
|
| 91910 |
+
"learning_rate": 8.202465437669449e-08,
|
| 91911 |
+
"loss": 0.0635,
|
| 91912 |
+
"step": 13129
|
| 91913 |
+
},
|
| 91914 |
+
{
|
| 91915 |
+
"epoch": 0.9819391990427402,
|
| 91916 |
+
"grad_norm": 0.6561717391014099,
|
| 91917 |
+
"learning_rate": 8.134834876720732e-08,
|
| 91918 |
+
"loss": 0.0358,
|
| 91919 |
+
"step": 13130
|
| 91920 |
+
},
|
| 91921 |
+
{
|
| 91922 |
+
"epoch": 0.982013984968029,
|
| 91923 |
+
"grad_norm": 0.8759846687316895,
|
| 91924 |
+
"learning_rate": 8.067484056235586e-08,
|
| 91925 |
+
"loss": 0.0563,
|
| 91926 |
+
"step": 13131
|
| 91927 |
+
},
|
| 91928 |
+
{
|
| 91929 |
+
"epoch": 0.9820887708933179,
|
| 91930 |
+
"grad_norm": 0.6102621555328369,
|
| 91931 |
+
"learning_rate": 8.000412979989325e-08,
|
| 91932 |
+
"loss": 0.0394,
|
| 91933 |
+
"step": 13132
|
| 91934 |
+
},
|
| 91935 |
+
{
|
| 91936 |
+
"epoch": 0.9821635568186068,
|
| 91937 |
+
"grad_norm": 0.9649103283882141,
|
| 91938 |
+
"learning_rate": 7.933621651740053e-08,
|
| 91939 |
+
"loss": 0.1023,
|
| 91940 |
+
"step": 13133
|
| 91941 |
+
},
|
| 91942 |
+
{
|
| 91943 |
+
"epoch": 0.9822383427438957,
|
| 91944 |
+
"grad_norm": 1.011826753616333,
|
| 91945 |
+
"learning_rate": 7.867110075231444e-08,
|
| 91946 |
+
"loss": 0.0957,
|
| 91947 |
+
"step": 13134
|
| 91948 |
+
},
|
| 91949 |
+
{
|
| 91950 |
+
"epoch": 0.9823131286691844,
|
| 91951 |
+
"grad_norm": 0.8232734799385071,
|
| 91952 |
+
"learning_rate": 7.800878254188848e-08,
|
| 91953 |
+
"loss": 0.0801,
|
| 91954 |
+
"step": 13135
|
| 91955 |
+
},
|
| 91956 |
+
{
|
| 91957 |
+
"epoch": 0.9823879145944733,
|
| 91958 |
+
"grad_norm": 0.6169044971466064,
|
| 91959 |
+
"learning_rate": 7.734926192325965e-08,
|
| 91960 |
+
"loss": 0.0352,
|
| 91961 |
+
"step": 13136
|
| 91962 |
+
},
|
| 91963 |
+
{
|
| 91964 |
+
"epoch": 0.9824627005197621,
|
| 91965 |
+
"grad_norm": 0.6818606853485107,
|
| 91966 |
+
"learning_rate": 7.669253893337614e-08,
|
| 91967 |
+
"loss": 0.0494,
|
| 91968 |
+
"step": 13137
|
| 91969 |
+
},
|
| 91970 |
+
{
|
| 91971 |
+
"epoch": 0.982537486445051,
|
| 91972 |
+
"grad_norm": 1.4228134155273438,
|
| 91973 |
+
"learning_rate": 7.603861360904186e-08,
|
| 91974 |
+
"loss": 0.3075,
|
| 91975 |
+
"step": 13138
|
| 91976 |
+
},
|
| 91977 |
+
{
|
| 91978 |
+
"epoch": 0.9826122723703399,
|
| 91979 |
+
"grad_norm": 0.8413154482841492,
|
| 91980 |
+
"learning_rate": 7.538748598690526e-08,
|
| 91981 |
+
"loss": 0.0662,
|
| 91982 |
+
"step": 13139
|
| 91983 |
+
},
|
| 91984 |
+
{
|
| 91985 |
+
"epoch": 0.9826870582956287,
|
| 91986 |
+
"grad_norm": 0.8035567402839661,
|
| 91987 |
+
"learning_rate": 7.473915610344828e-08,
|
| 91988 |
+
"loss": 0.0769,
|
| 91989 |
+
"step": 13140
|
| 91990 |
+
},
|
| 91991 |
+
{
|
| 91992 |
+
"epoch": 0.9827618442209176,
|
| 91993 |
+
"grad_norm": 0.9257685542106628,
|
| 91994 |
+
"learning_rate": 7.409362399501407e-08,
|
| 91995 |
+
"loss": 0.0857,
|
| 91996 |
+
"step": 13141
|
| 91997 |
+
},
|
| 91998 |
+
{
|
| 91999 |
+
"epoch": 0.9828366301462065,
|
| 92000 |
+
"grad_norm": 2.2359378337860107,
|
| 92001 |
+
"learning_rate": 7.345088969776259e-08,
|
| 92002 |
+
"loss": 0.2031,
|
| 92003 |
+
"step": 13142
|
| 92004 |
+
},
|
| 92005 |
+
{
|
| 92006 |
+
"epoch": 0.9829114160714953,
|
| 92007 |
+
"grad_norm": 0.49772417545318604,
|
| 92008 |
+
"learning_rate": 7.281095324772613e-08,
|
| 92009 |
+
"loss": 0.0257,
|
| 92010 |
+
"step": 13143
|
| 92011 |
+
},
|
| 92012 |
+
{
|
| 92013 |
+
"epoch": 0.9829862019967842,
|
| 92014 |
+
"grad_norm": 0.7169822454452515,
|
| 92015 |
+
"learning_rate": 7.217381468075934e-08,
|
| 92016 |
+
"loss": 0.0399,
|
| 92017 |
+
"step": 13144
|
| 92018 |
+
},
|
| 92019 |
+
{
|
| 92020 |
+
"epoch": 0.9830609879220731,
|
| 92021 |
+
"grad_norm": 1.068713903427124,
|
| 92022 |
+
"learning_rate": 7.1539474032567e-08,
|
| 92023 |
+
"loss": 0.0722,
|
| 92024 |
+
"step": 13145
|
| 92025 |
}
|
| 92026 |
],
|
| 92027 |
"logging_steps": 1,
|
|
|
|
| 92041 |
"attributes": {}
|
| 92042 |
}
|
| 92043 |
},
|
| 92044 |
+
"total_flos": 9.048973682147328e+16,
|
| 92045 |
"train_batch_size": 4,
|
| 92046 |
"trial_name": null,
|
| 92047 |
"trial_params": null
|