Upload checkpoint 7600
Browse files- README.md +3 -3
- adapter_model.safetensors +1 -1
- loss.png +2 -2
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +703 -3
README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
-
# Gradience T1 3B (Step
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
@@ -38,11 +38,11 @@ library_name: peft
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
-
<div style="height: 30px; width:
|
| 42 |
<!-- 3.75% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
-
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress:
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
|
|
|
|
| 2 |
base_model: Qwen/Qwen2.5-3B-Instruct
|
| 3 |
library_name: peft
|
| 4 |
---
|
| 5 |
+
# Gradience T1 3B (Step 7600 Checkpoint)
|
| 6 |
|
| 7 |
> [!NOTE]
|
| 8 |
> Training in progress...
|
|
|
|
| 38 |
</head>
|
| 39 |
<body>
|
| 40 |
<div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
|
| 41 |
+
<div style="height: 30px; width: 77.25%; background-color: #76c7c0; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
|
| 42 |
<!-- 3.75% -->
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
+
<p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 7600 out of 9838 steps</p>
|
| 46 |
</body>
|
| 47 |
</html>
|
| 48 |
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 119801528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fa1a8b2084abf4c0aa143ad82d5673ad22c47fc43f7f35a295ebd5f949602b1
|
| 3 |
size 119801528
|
loss.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 61392692
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d724f96203373dcec88adb5dfa4341c8156f923a7472825798252a2a46750bd3
|
| 3 |
size 61392692
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5559d18ced05b328f0925fbaa730c7e7e3e59068bfb889a48ac53bc0cce0a8c7
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -52508,6 +52508,706 @@
|
|
| 52508 |
"learning_rate": 4.757449405064579e-05,
|
| 52509 |
"loss": 0.9683,
|
| 52510 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52511 |
}
|
| 52512 |
],
|
| 52513 |
"logging_steps": 1,
|
|
@@ -52527,7 +53227,7 @@
|
|
| 52527 |
"attributes": {}
|
| 52528 |
}
|
| 52529 |
},
|
| 52530 |
-
"total_flos": 4.
|
| 52531 |
"train_batch_size": 8,
|
| 52532 |
"trial_name": null,
|
| 52533 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.5450294775360844,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 7600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 52508 |
"learning_rate": 4.757449405064579e-05,
|
| 52509 |
"loss": 0.9683,
|
| 52510 |
"step": 7500
|
| 52511 |
+
},
|
| 52512 |
+
{
|
| 52513 |
+
"epoch": 1.524903435657654,
|
| 52514 |
+
"grad_norm": 0.16842851042747498,
|
| 52515 |
+
"learning_rate": 4.755415437811451e-05,
|
| 52516 |
+
"loss": 1.1311,
|
| 52517 |
+
"step": 7501
|
| 52518 |
+
},
|
| 52519 |
+
{
|
| 52520 |
+
"epoch": 1.5251067290099614,
|
| 52521 |
+
"grad_norm": 0.1710801124572754,
|
| 52522 |
+
"learning_rate": 4.753381470558324e-05,
|
| 52523 |
+
"loss": 1.051,
|
| 52524 |
+
"step": 7502
|
| 52525 |
+
},
|
| 52526 |
+
{
|
| 52527 |
+
"epoch": 1.5253100223622686,
|
| 52528 |
+
"grad_norm": 0.14032766222953796,
|
| 52529 |
+
"learning_rate": 4.751347503305197e-05,
|
| 52530 |
+
"loss": 0.886,
|
| 52531 |
+
"step": 7503
|
| 52532 |
+
},
|
| 52533 |
+
{
|
| 52534 |
+
"epoch": 1.5255133157145762,
|
| 52535 |
+
"grad_norm": 0.16021350026130676,
|
| 52536 |
+
"learning_rate": 4.74931353605207e-05,
|
| 52537 |
+
"loss": 1.1057,
|
| 52538 |
+
"step": 7504
|
| 52539 |
+
},
|
| 52540 |
+
{
|
| 52541 |
+
"epoch": 1.5257166090668837,
|
| 52542 |
+
"grad_norm": 0.13777956366539001,
|
| 52543 |
+
"learning_rate": 4.747279568798942e-05,
|
| 52544 |
+
"loss": 0.8405,
|
| 52545 |
+
"step": 7505
|
| 52546 |
+
},
|
| 52547 |
+
{
|
| 52548 |
+
"epoch": 1.525919902419191,
|
| 52549 |
+
"grad_norm": 0.15827859938144684,
|
| 52550 |
+
"learning_rate": 4.7452456015458155e-05,
|
| 52551 |
+
"loss": 0.9807,
|
| 52552 |
+
"step": 7506
|
| 52553 |
+
},
|
| 52554 |
+
{
|
| 52555 |
+
"epoch": 1.5261231957714982,
|
| 52556 |
+
"grad_norm": 0.14875048398971558,
|
| 52557 |
+
"learning_rate": 4.743211634292688e-05,
|
| 52558 |
+
"loss": 0.9142,
|
| 52559 |
+
"step": 7507
|
| 52560 |
+
},
|
| 52561 |
+
{
|
| 52562 |
+
"epoch": 1.5263264891238055,
|
| 52563 |
+
"grad_norm": 0.1471138298511505,
|
| 52564 |
+
"learning_rate": 4.741177667039561e-05,
|
| 52565 |
+
"loss": 0.9305,
|
| 52566 |
+
"step": 7508
|
| 52567 |
+
},
|
| 52568 |
+
{
|
| 52569 |
+
"epoch": 1.526529782476113,
|
| 52570 |
+
"grad_norm": 0.15858818590641022,
|
| 52571 |
+
"learning_rate": 4.7391436997864335e-05,
|
| 52572 |
+
"loss": 1.0326,
|
| 52573 |
+
"step": 7509
|
| 52574 |
+
},
|
| 52575 |
+
{
|
| 52576 |
+
"epoch": 1.5267330758284205,
|
| 52577 |
+
"grad_norm": 0.14757820963859558,
|
| 52578 |
+
"learning_rate": 4.7371097325333066e-05,
|
| 52579 |
+
"loss": 0.9423,
|
| 52580 |
+
"step": 7510
|
| 52581 |
+
},
|
| 52582 |
+
{
|
| 52583 |
+
"epoch": 1.5269363691807278,
|
| 52584 |
+
"grad_norm": 0.14671318233013153,
|
| 52585 |
+
"learning_rate": 4.735075765280179e-05,
|
| 52586 |
+
"loss": 0.9953,
|
| 52587 |
+
"step": 7511
|
| 52588 |
+
},
|
| 52589 |
+
{
|
| 52590 |
+
"epoch": 1.5271396625330351,
|
| 52591 |
+
"grad_norm": 0.16315753757953644,
|
| 52592 |
+
"learning_rate": 4.733041798027052e-05,
|
| 52593 |
+
"loss": 1.0674,
|
| 52594 |
+
"step": 7512
|
| 52595 |
+
},
|
| 52596 |
+
{
|
| 52597 |
+
"epoch": 1.5273429558853424,
|
| 52598 |
+
"grad_norm": 0.15158745646476746,
|
| 52599 |
+
"learning_rate": 4.7310078307739246e-05,
|
| 52600 |
+
"loss": 0.9416,
|
| 52601 |
+
"step": 7513
|
| 52602 |
+
},
|
| 52603 |
+
{
|
| 52604 |
+
"epoch": 1.52754624923765,
|
| 52605 |
+
"grad_norm": 0.1569458246231079,
|
| 52606 |
+
"learning_rate": 4.728973863520798e-05,
|
| 52607 |
+
"loss": 1.1336,
|
| 52608 |
+
"step": 7514
|
| 52609 |
+
},
|
| 52610 |
+
{
|
| 52611 |
+
"epoch": 1.5277495425899574,
|
| 52612 |
+
"grad_norm": 0.15651223063468933,
|
| 52613 |
+
"learning_rate": 4.72693989626767e-05,
|
| 52614 |
+
"loss": 0.9724,
|
| 52615 |
+
"step": 7515
|
| 52616 |
+
},
|
| 52617 |
+
{
|
| 52618 |
+
"epoch": 1.5279528359422647,
|
| 52619 |
+
"grad_norm": 0.1413620412349701,
|
| 52620 |
+
"learning_rate": 4.724905929014543e-05,
|
| 52621 |
+
"loss": 0.9164,
|
| 52622 |
+
"step": 7516
|
| 52623 |
+
},
|
| 52624 |
+
{
|
| 52625 |
+
"epoch": 1.528156129294572,
|
| 52626 |
+
"grad_norm": 0.14873231947422028,
|
| 52627 |
+
"learning_rate": 4.722871961761416e-05,
|
| 52628 |
+
"loss": 0.9904,
|
| 52629 |
+
"step": 7517
|
| 52630 |
+
},
|
| 52631 |
+
{
|
| 52632 |
+
"epoch": 1.5283594226468793,
|
| 52633 |
+
"grad_norm": 0.16632919013500214,
|
| 52634 |
+
"learning_rate": 4.720837994508289e-05,
|
| 52635 |
+
"loss": 1.0332,
|
| 52636 |
+
"step": 7518
|
| 52637 |
+
},
|
| 52638 |
+
{
|
| 52639 |
+
"epoch": 1.5285627159991868,
|
| 52640 |
+
"grad_norm": 0.1570500135421753,
|
| 52641 |
+
"learning_rate": 4.718804027255161e-05,
|
| 52642 |
+
"loss": 1.0491,
|
| 52643 |
+
"step": 7519
|
| 52644 |
+
},
|
| 52645 |
+
{
|
| 52646 |
+
"epoch": 1.5287660093514943,
|
| 52647 |
+
"grad_norm": 0.17738598585128784,
|
| 52648 |
+
"learning_rate": 4.7167700600020344e-05,
|
| 52649 |
+
"loss": 1.049,
|
| 52650 |
+
"step": 7520
|
| 52651 |
+
},
|
| 52652 |
+
{
|
| 52653 |
+
"epoch": 1.5289693027038016,
|
| 52654 |
+
"grad_norm": 0.1635134369134903,
|
| 52655 |
+
"learning_rate": 4.714736092748907e-05,
|
| 52656 |
+
"loss": 1.1412,
|
| 52657 |
+
"step": 7521
|
| 52658 |
+
},
|
| 52659 |
+
{
|
| 52660 |
+
"epoch": 1.529172596056109,
|
| 52661 |
+
"grad_norm": 0.16301599144935608,
|
| 52662 |
+
"learning_rate": 4.71270212549578e-05,
|
| 52663 |
+
"loss": 1.0967,
|
| 52664 |
+
"step": 7522
|
| 52665 |
+
},
|
| 52666 |
+
{
|
| 52667 |
+
"epoch": 1.5293758894084164,
|
| 52668 |
+
"grad_norm": 0.14440634846687317,
|
| 52669 |
+
"learning_rate": 4.7106681582426525e-05,
|
| 52670 |
+
"loss": 0.8357,
|
| 52671 |
+
"step": 7523
|
| 52672 |
+
},
|
| 52673 |
+
{
|
| 52674 |
+
"epoch": 1.5295791827607237,
|
| 52675 |
+
"grad_norm": 0.144694983959198,
|
| 52676 |
+
"learning_rate": 4.7086341909895256e-05,
|
| 52677 |
+
"loss": 0.8722,
|
| 52678 |
+
"step": 7524
|
| 52679 |
+
},
|
| 52680 |
+
{
|
| 52681 |
+
"epoch": 1.5297824761130312,
|
| 52682 |
+
"grad_norm": 0.14646680653095245,
|
| 52683 |
+
"learning_rate": 4.706600223736398e-05,
|
| 52684 |
+
"loss": 0.8988,
|
| 52685 |
+
"step": 7525
|
| 52686 |
+
},
|
| 52687 |
+
{
|
| 52688 |
+
"epoch": 1.5299857694653385,
|
| 52689 |
+
"grad_norm": 0.1644057333469391,
|
| 52690 |
+
"learning_rate": 4.704566256483271e-05,
|
| 52691 |
+
"loss": 1.1197,
|
| 52692 |
+
"step": 7526
|
| 52693 |
+
},
|
| 52694 |
+
{
|
| 52695 |
+
"epoch": 1.5301890628176458,
|
| 52696 |
+
"grad_norm": 0.1693752557039261,
|
| 52697 |
+
"learning_rate": 4.7025322892301436e-05,
|
| 52698 |
+
"loss": 1.0486,
|
| 52699 |
+
"step": 7527
|
| 52700 |
+
},
|
| 52701 |
+
{
|
| 52702 |
+
"epoch": 1.5303923561699533,
|
| 52703 |
+
"grad_norm": 0.1716986894607544,
|
| 52704 |
+
"learning_rate": 4.700498321977017e-05,
|
| 52705 |
+
"loss": 1.1087,
|
| 52706 |
+
"step": 7528
|
| 52707 |
+
},
|
| 52708 |
+
{
|
| 52709 |
+
"epoch": 1.5305956495222606,
|
| 52710 |
+
"grad_norm": 0.1740422248840332,
|
| 52711 |
+
"learning_rate": 4.698464354723889e-05,
|
| 52712 |
+
"loss": 1.0909,
|
| 52713 |
+
"step": 7529
|
| 52714 |
+
},
|
| 52715 |
+
{
|
| 52716 |
+
"epoch": 1.530798942874568,
|
| 52717 |
+
"grad_norm": 0.15906310081481934,
|
| 52718 |
+
"learning_rate": 4.696430387470762e-05,
|
| 52719 |
+
"loss": 1.0841,
|
| 52720 |
+
"step": 7530
|
| 52721 |
+
},
|
| 52722 |
+
{
|
| 52723 |
+
"epoch": 1.5310022362268754,
|
| 52724 |
+
"grad_norm": 0.14159859716892242,
|
| 52725 |
+
"learning_rate": 4.694396420217635e-05,
|
| 52726 |
+
"loss": 0.8766,
|
| 52727 |
+
"step": 7531
|
| 52728 |
+
},
|
| 52729 |
+
{
|
| 52730 |
+
"epoch": 1.5312055295791827,
|
| 52731 |
+
"grad_norm": 0.17096573114395142,
|
| 52732 |
+
"learning_rate": 4.692362452964508e-05,
|
| 52733 |
+
"loss": 1.1308,
|
| 52734 |
+
"step": 7532
|
| 52735 |
+
},
|
| 52736 |
+
{
|
| 52737 |
+
"epoch": 1.5314088229314902,
|
| 52738 |
+
"grad_norm": 0.16331720352172852,
|
| 52739 |
+
"learning_rate": 4.69032848571138e-05,
|
| 52740 |
+
"loss": 0.9884,
|
| 52741 |
+
"step": 7533
|
| 52742 |
+
},
|
| 52743 |
+
{
|
| 52744 |
+
"epoch": 1.5316121162837977,
|
| 52745 |
+
"grad_norm": 0.15612895786762238,
|
| 52746 |
+
"learning_rate": 4.6882945184582534e-05,
|
| 52747 |
+
"loss": 0.9737,
|
| 52748 |
+
"step": 7534
|
| 52749 |
+
},
|
| 52750 |
+
{
|
| 52751 |
+
"epoch": 1.531815409636105,
|
| 52752 |
+
"grad_norm": 0.1716272234916687,
|
| 52753 |
+
"learning_rate": 4.686260551205126e-05,
|
| 52754 |
+
"loss": 1.2049,
|
| 52755 |
+
"step": 7535
|
| 52756 |
+
},
|
| 52757 |
+
{
|
| 52758 |
+
"epoch": 1.5320187029884123,
|
| 52759 |
+
"grad_norm": 0.15378396213054657,
|
| 52760 |
+
"learning_rate": 4.684226583951999e-05,
|
| 52761 |
+
"loss": 1.0315,
|
| 52762 |
+
"step": 7536
|
| 52763 |
+
},
|
| 52764 |
+
{
|
| 52765 |
+
"epoch": 1.5322219963407195,
|
| 52766 |
+
"grad_norm": 0.16745533049106598,
|
| 52767 |
+
"learning_rate": 4.682192616698871e-05,
|
| 52768 |
+
"loss": 1.1749,
|
| 52769 |
+
"step": 7537
|
| 52770 |
+
},
|
| 52771 |
+
{
|
| 52772 |
+
"epoch": 1.532425289693027,
|
| 52773 |
+
"grad_norm": 0.16122505068778992,
|
| 52774 |
+
"learning_rate": 4.680158649445744e-05,
|
| 52775 |
+
"loss": 1.1481,
|
| 52776 |
+
"step": 7538
|
| 52777 |
+
},
|
| 52778 |
+
{
|
| 52779 |
+
"epoch": 1.5326285830453346,
|
| 52780 |
+
"grad_norm": 0.15753133594989777,
|
| 52781 |
+
"learning_rate": 4.678124682192617e-05,
|
| 52782 |
+
"loss": 1.0085,
|
| 52783 |
+
"step": 7539
|
| 52784 |
+
},
|
| 52785 |
+
{
|
| 52786 |
+
"epoch": 1.5328318763976418,
|
| 52787 |
+
"grad_norm": 0.15394344925880432,
|
| 52788 |
+
"learning_rate": 4.6760907149394895e-05,
|
| 52789 |
+
"loss": 0.9611,
|
| 52790 |
+
"step": 7540
|
| 52791 |
+
},
|
| 52792 |
+
{
|
| 52793 |
+
"epoch": 1.5330351697499491,
|
| 52794 |
+
"grad_norm": 0.1620665341615677,
|
| 52795 |
+
"learning_rate": 4.6740567476863626e-05,
|
| 52796 |
+
"loss": 0.9435,
|
| 52797 |
+
"step": 7541
|
| 52798 |
+
},
|
| 52799 |
+
{
|
| 52800 |
+
"epoch": 1.5332384631022564,
|
| 52801 |
+
"grad_norm": 0.15785206854343414,
|
| 52802 |
+
"learning_rate": 4.672022780433235e-05,
|
| 52803 |
+
"loss": 1.0311,
|
| 52804 |
+
"step": 7542
|
| 52805 |
+
},
|
| 52806 |
+
{
|
| 52807 |
+
"epoch": 1.533441756454564,
|
| 52808 |
+
"grad_norm": 0.15812784433364868,
|
| 52809 |
+
"learning_rate": 4.669988813180108e-05,
|
| 52810 |
+
"loss": 0.9304,
|
| 52811 |
+
"step": 7543
|
| 52812 |
+
},
|
| 52813 |
+
{
|
| 52814 |
+
"epoch": 1.5336450498068714,
|
| 52815 |
+
"grad_norm": 0.14829683303833008,
|
| 52816 |
+
"learning_rate": 4.6679548459269806e-05,
|
| 52817 |
+
"loss": 0.9735,
|
| 52818 |
+
"step": 7544
|
| 52819 |
+
},
|
| 52820 |
+
{
|
| 52821 |
+
"epoch": 1.5338483431591787,
|
| 52822 |
+
"grad_norm": 0.15306831896305084,
|
| 52823 |
+
"learning_rate": 4.665920878673854e-05,
|
| 52824 |
+
"loss": 0.9459,
|
| 52825 |
+
"step": 7545
|
| 52826 |
+
},
|
| 52827 |
+
{
|
| 52828 |
+
"epoch": 1.534051636511486,
|
| 52829 |
+
"grad_norm": 0.1524849534034729,
|
| 52830 |
+
"learning_rate": 4.663886911420726e-05,
|
| 52831 |
+
"loss": 0.9989,
|
| 52832 |
+
"step": 7546
|
| 52833 |
+
},
|
| 52834 |
+
{
|
| 52835 |
+
"epoch": 1.5342549298637933,
|
| 52836 |
+
"grad_norm": 0.1524866819381714,
|
| 52837 |
+
"learning_rate": 4.661852944167599e-05,
|
| 52838 |
+
"loss": 0.9516,
|
| 52839 |
+
"step": 7547
|
| 52840 |
+
},
|
| 52841 |
+
{
|
| 52842 |
+
"epoch": 1.5344582232161008,
|
| 52843 |
+
"grad_norm": 0.1561049073934555,
|
| 52844 |
+
"learning_rate": 4.659818976914472e-05,
|
| 52845 |
+
"loss": 0.9629,
|
| 52846 |
+
"step": 7548
|
| 52847 |
+
},
|
| 52848 |
+
{
|
| 52849 |
+
"epoch": 1.5346615165684083,
|
| 52850 |
+
"grad_norm": 0.15052708983421326,
|
| 52851 |
+
"learning_rate": 4.657785009661345e-05,
|
| 52852 |
+
"loss": 0.9709,
|
| 52853 |
+
"step": 7549
|
| 52854 |
+
},
|
| 52855 |
+
{
|
| 52856 |
+
"epoch": 1.5348648099207156,
|
| 52857 |
+
"grad_norm": 0.16317294538021088,
|
| 52858 |
+
"learning_rate": 4.655751042408217e-05,
|
| 52859 |
+
"loss": 1.0431,
|
| 52860 |
+
"step": 7550
|
| 52861 |
+
},
|
| 52862 |
+
{
|
| 52863 |
+
"epoch": 1.535068103273023,
|
| 52864 |
+
"grad_norm": 0.1577170193195343,
|
| 52865 |
+
"learning_rate": 4.6537170751550904e-05,
|
| 52866 |
+
"loss": 1.0794,
|
| 52867 |
+
"step": 7551
|
| 52868 |
+
},
|
| 52869 |
+
{
|
| 52870 |
+
"epoch": 1.5352713966253302,
|
| 52871 |
+
"grad_norm": 0.16741138696670532,
|
| 52872 |
+
"learning_rate": 4.651683107901963e-05,
|
| 52873 |
+
"loss": 1.2215,
|
| 52874 |
+
"step": 7552
|
| 52875 |
+
},
|
| 52876 |
+
{
|
| 52877 |
+
"epoch": 1.5354746899776377,
|
| 52878 |
+
"grad_norm": 0.1500609666109085,
|
| 52879 |
+
"learning_rate": 4.649649140648836e-05,
|
| 52880 |
+
"loss": 0.9439,
|
| 52881 |
+
"step": 7553
|
| 52882 |
+
},
|
| 52883 |
+
{
|
| 52884 |
+
"epoch": 1.5356779833299452,
|
| 52885 |
+
"grad_norm": 0.15758995711803436,
|
| 52886 |
+
"learning_rate": 4.6476151733957085e-05,
|
| 52887 |
+
"loss": 0.8848,
|
| 52888 |
+
"step": 7554
|
| 52889 |
+
},
|
| 52890 |
+
{
|
| 52891 |
+
"epoch": 1.5358812766822525,
|
| 52892 |
+
"grad_norm": 0.14967188239097595,
|
| 52893 |
+
"learning_rate": 4.6455812061425816e-05,
|
| 52894 |
+
"loss": 0.9408,
|
| 52895 |
+
"step": 7555
|
| 52896 |
+
},
|
| 52897 |
+
{
|
| 52898 |
+
"epoch": 1.5360845700345598,
|
| 52899 |
+
"grad_norm": 0.13587024807929993,
|
| 52900 |
+
"learning_rate": 4.643547238889454e-05,
|
| 52901 |
+
"loss": 0.9078,
|
| 52902 |
+
"step": 7556
|
| 52903 |
+
},
|
| 52904 |
+
{
|
| 52905 |
+
"epoch": 1.5362878633868673,
|
| 52906 |
+
"grad_norm": 0.1551710069179535,
|
| 52907 |
+
"learning_rate": 4.641513271636327e-05,
|
| 52908 |
+
"loss": 1.0213,
|
| 52909 |
+
"step": 7557
|
| 52910 |
+
},
|
| 52911 |
+
{
|
| 52912 |
+
"epoch": 1.5364911567391746,
|
| 52913 |
+
"grad_norm": 0.15696901082992554,
|
| 52914 |
+
"learning_rate": 4.6394793043831996e-05,
|
| 52915 |
+
"loss": 0.9854,
|
| 52916 |
+
"step": 7558
|
| 52917 |
+
},
|
| 52918 |
+
{
|
| 52919 |
+
"epoch": 1.536694450091482,
|
| 52920 |
+
"grad_norm": 0.14111942052841187,
|
| 52921 |
+
"learning_rate": 4.637445337130073e-05,
|
| 52922 |
+
"loss": 0.8104,
|
| 52923 |
+
"step": 7559
|
| 52924 |
+
},
|
| 52925 |
+
{
|
| 52926 |
+
"epoch": 1.5368977434437894,
|
| 52927 |
+
"grad_norm": 0.16344057023525238,
|
| 52928 |
+
"learning_rate": 4.635411369876945e-05,
|
| 52929 |
+
"loss": 1.0198,
|
| 52930 |
+
"step": 7560
|
| 52931 |
+
},
|
| 52932 |
+
{
|
| 52933 |
+
"epoch": 1.5371010367960967,
|
| 52934 |
+
"grad_norm": 0.15371447801589966,
|
| 52935 |
+
"learning_rate": 4.633377402623818e-05,
|
| 52936 |
+
"loss": 1.1088,
|
| 52937 |
+
"step": 7561
|
| 52938 |
+
},
|
| 52939 |
+
{
|
| 52940 |
+
"epoch": 1.5373043301484042,
|
| 52941 |
+
"grad_norm": 0.16405069828033447,
|
| 52942 |
+
"learning_rate": 4.631343435370691e-05,
|
| 52943 |
+
"loss": 1.1632,
|
| 52944 |
+
"step": 7562
|
| 52945 |
+
},
|
| 52946 |
+
{
|
| 52947 |
+
"epoch": 1.5375076235007117,
|
| 52948 |
+
"grad_norm": 0.17231358587741852,
|
| 52949 |
+
"learning_rate": 4.629309468117564e-05,
|
| 52950 |
+
"loss": 1.0729,
|
| 52951 |
+
"step": 7563
|
| 52952 |
+
},
|
| 52953 |
+
{
|
| 52954 |
+
"epoch": 1.537710916853019,
|
| 52955 |
+
"grad_norm": 0.16849292814731598,
|
| 52956 |
+
"learning_rate": 4.627275500864436e-05,
|
| 52957 |
+
"loss": 1.1287,
|
| 52958 |
+
"step": 7564
|
| 52959 |
+
},
|
| 52960 |
+
{
|
| 52961 |
+
"epoch": 1.5379142102053263,
|
| 52962 |
+
"grad_norm": 0.14124159514904022,
|
| 52963 |
+
"learning_rate": 4.6252415336113094e-05,
|
| 52964 |
+
"loss": 0.9726,
|
| 52965 |
+
"step": 7565
|
| 52966 |
+
},
|
| 52967 |
+
{
|
| 52968 |
+
"epoch": 1.5381175035576335,
|
| 52969 |
+
"grad_norm": 0.16582997143268585,
|
| 52970 |
+
"learning_rate": 4.623207566358182e-05,
|
| 52971 |
+
"loss": 1.048,
|
| 52972 |
+
"step": 7566
|
| 52973 |
+
},
|
| 52974 |
+
{
|
| 52975 |
+
"epoch": 1.538320796909941,
|
| 52976 |
+
"grad_norm": 0.15703178942203522,
|
| 52977 |
+
"learning_rate": 4.621173599105055e-05,
|
| 52978 |
+
"loss": 0.9917,
|
| 52979 |
+
"step": 7567
|
| 52980 |
+
},
|
| 52981 |
+
{
|
| 52982 |
+
"epoch": 1.5385240902622486,
|
| 52983 |
+
"grad_norm": 0.1521129459142685,
|
| 52984 |
+
"learning_rate": 4.6191396318519274e-05,
|
| 52985 |
+
"loss": 0.9314,
|
| 52986 |
+
"step": 7568
|
| 52987 |
+
},
|
| 52988 |
+
{
|
| 52989 |
+
"epoch": 1.5387273836145559,
|
| 52990 |
+
"grad_norm": 0.18239177763462067,
|
| 52991 |
+
"learning_rate": 4.6171056645988006e-05,
|
| 52992 |
+
"loss": 1.1309,
|
| 52993 |
+
"step": 7569
|
| 52994 |
+
},
|
| 52995 |
+
{
|
| 52996 |
+
"epoch": 1.5389306769668631,
|
| 52997 |
+
"grad_norm": 0.15608282387256622,
|
| 52998 |
+
"learning_rate": 4.615071697345673e-05,
|
| 52999 |
+
"loss": 0.9907,
|
| 53000 |
+
"step": 7570
|
| 53001 |
+
},
|
| 53002 |
+
{
|
| 53003 |
+
"epoch": 1.5391339703191704,
|
| 53004 |
+
"grad_norm": 0.14907321333885193,
|
| 53005 |
+
"learning_rate": 4.613037730092546e-05,
|
| 53006 |
+
"loss": 0.948,
|
| 53007 |
+
"step": 7571
|
| 53008 |
+
},
|
| 53009 |
+
{
|
| 53010 |
+
"epoch": 1.539337263671478,
|
| 53011 |
+
"grad_norm": 0.15870921313762665,
|
| 53012 |
+
"learning_rate": 4.6110037628394186e-05,
|
| 53013 |
+
"loss": 1.0293,
|
| 53014 |
+
"step": 7572
|
| 53015 |
+
},
|
| 53016 |
+
{
|
| 53017 |
+
"epoch": 1.5395405570237854,
|
| 53018 |
+
"grad_norm": 0.1471608281135559,
|
| 53019 |
+
"learning_rate": 4.608969795586292e-05,
|
| 53020 |
+
"loss": 0.9045,
|
| 53021 |
+
"step": 7573
|
| 53022 |
+
},
|
| 53023 |
+
{
|
| 53024 |
+
"epoch": 1.5397438503760927,
|
| 53025 |
+
"grad_norm": 0.1473323255777359,
|
| 53026 |
+
"learning_rate": 4.606935828333164e-05,
|
| 53027 |
+
"loss": 0.9773,
|
| 53028 |
+
"step": 7574
|
| 53029 |
+
},
|
| 53030 |
+
{
|
| 53031 |
+
"epoch": 1.5399471437284,
|
| 53032 |
+
"grad_norm": 0.15672756731510162,
|
| 53033 |
+
"learning_rate": 4.604901861080037e-05,
|
| 53034 |
+
"loss": 0.9564,
|
| 53035 |
+
"step": 7575
|
| 53036 |
+
},
|
| 53037 |
+
{
|
| 53038 |
+
"epoch": 1.5401504370807073,
|
| 53039 |
+
"grad_norm": 0.13355454802513123,
|
| 53040 |
+
"learning_rate": 4.60286789382691e-05,
|
| 53041 |
+
"loss": 0.9043,
|
| 53042 |
+
"step": 7576
|
| 53043 |
+
},
|
| 53044 |
+
{
|
| 53045 |
+
"epoch": 1.5403537304330148,
|
| 53046 |
+
"grad_norm": 0.16888266801834106,
|
| 53047 |
+
"learning_rate": 4.600833926573783e-05,
|
| 53048 |
+
"loss": 1.049,
|
| 53049 |
+
"step": 7577
|
| 53050 |
+
},
|
| 53051 |
+
{
|
| 53052 |
+
"epoch": 1.5405570237853223,
|
| 53053 |
+
"grad_norm": 0.14586526155471802,
|
| 53054 |
+
"learning_rate": 4.5987999593206546e-05,
|
| 53055 |
+
"loss": 0.991,
|
| 53056 |
+
"step": 7578
|
| 53057 |
+
},
|
| 53058 |
+
{
|
| 53059 |
+
"epoch": 1.5407603171376296,
|
| 53060 |
+
"grad_norm": 0.15697935223579407,
|
| 53061 |
+
"learning_rate": 4.596765992067528e-05,
|
| 53062 |
+
"loss": 0.9276,
|
| 53063 |
+
"step": 7579
|
| 53064 |
+
},
|
| 53065 |
+
{
|
| 53066 |
+
"epoch": 1.540963610489937,
|
| 53067 |
+
"grad_norm": 0.1606079787015915,
|
| 53068 |
+
"learning_rate": 4.5947320248144e-05,
|
| 53069 |
+
"loss": 1.1167,
|
| 53070 |
+
"step": 7580
|
| 53071 |
+
},
|
| 53072 |
+
{
|
| 53073 |
+
"epoch": 1.5411669038422442,
|
| 53074 |
+
"grad_norm": 0.14496320486068726,
|
| 53075 |
+
"learning_rate": 4.592698057561273e-05,
|
| 53076 |
+
"loss": 0.978,
|
| 53077 |
+
"step": 7581
|
| 53078 |
+
},
|
| 53079 |
+
{
|
| 53080 |
+
"epoch": 1.5413701971945517,
|
| 53081 |
+
"grad_norm": 0.1540028601884842,
|
| 53082 |
+
"learning_rate": 4.590664090308146e-05,
|
| 53083 |
+
"loss": 0.9584,
|
| 53084 |
+
"step": 7582
|
| 53085 |
+
},
|
| 53086 |
+
{
|
| 53087 |
+
"epoch": 1.5415734905468592,
|
| 53088 |
+
"grad_norm": 0.14273619651794434,
|
| 53089 |
+
"learning_rate": 4.588630123055019e-05,
|
| 53090 |
+
"loss": 0.9559,
|
| 53091 |
+
"step": 7583
|
| 53092 |
+
},
|
| 53093 |
+
{
|
| 53094 |
+
"epoch": 1.5417767838991665,
|
| 53095 |
+
"grad_norm": 0.15364350378513336,
|
| 53096 |
+
"learning_rate": 4.586596155801891e-05,
|
| 53097 |
+
"loss": 1.067,
|
| 53098 |
+
"step": 7584
|
| 53099 |
+
},
|
| 53100 |
+
{
|
| 53101 |
+
"epoch": 1.5419800772514738,
|
| 53102 |
+
"grad_norm": 0.15916843712329865,
|
| 53103 |
+
"learning_rate": 4.5845621885487644e-05,
|
| 53104 |
+
"loss": 0.9734,
|
| 53105 |
+
"step": 7585
|
| 53106 |
+
},
|
| 53107 |
+
{
|
| 53108 |
+
"epoch": 1.5421833706037813,
|
| 53109 |
+
"grad_norm": 0.16618654131889343,
|
| 53110 |
+
"learning_rate": 4.582528221295637e-05,
|
| 53111 |
+
"loss": 1.0467,
|
| 53112 |
+
"step": 7586
|
| 53113 |
+
},
|
| 53114 |
+
{
|
| 53115 |
+
"epoch": 1.5423866639560886,
|
| 53116 |
+
"grad_norm": 0.1487346738576889,
|
| 53117 |
+
"learning_rate": 4.58049425404251e-05,
|
| 53118 |
+
"loss": 0.9555,
|
| 53119 |
+
"step": 7587
|
| 53120 |
+
},
|
| 53121 |
+
{
|
| 53122 |
+
"epoch": 1.542589957308396,
|
| 53123 |
+
"grad_norm": 0.1543288677930832,
|
| 53124 |
+
"learning_rate": 4.5784602867893825e-05,
|
| 53125 |
+
"loss": 1.0203,
|
| 53126 |
+
"step": 7588
|
| 53127 |
+
},
|
| 53128 |
+
{
|
| 53129 |
+
"epoch": 1.5427932506607034,
|
| 53130 |
+
"grad_norm": 0.15385927259922028,
|
| 53131 |
+
"learning_rate": 4.5764263195362556e-05,
|
| 53132 |
+
"loss": 1.0728,
|
| 53133 |
+
"step": 7589
|
| 53134 |
+
},
|
| 53135 |
+
{
|
| 53136 |
+
"epoch": 1.5429965440130107,
|
| 53137 |
+
"grad_norm": 0.18972186744213104,
|
| 53138 |
+
"learning_rate": 4.574392352283128e-05,
|
| 53139 |
+
"loss": 1.1087,
|
| 53140 |
+
"step": 7590
|
| 53141 |
+
},
|
| 53142 |
+
{
|
| 53143 |
+
"epoch": 1.5431998373653182,
|
| 53144 |
+
"grad_norm": 0.17217358946800232,
|
| 53145 |
+
"learning_rate": 4.572358385030001e-05,
|
| 53146 |
+
"loss": 1.0384,
|
| 53147 |
+
"step": 7591
|
| 53148 |
+
},
|
| 53149 |
+
{
|
| 53150 |
+
"epoch": 1.5434031307176257,
|
| 53151 |
+
"grad_norm": 0.15717031061649323,
|
| 53152 |
+
"learning_rate": 4.5703244177768736e-05,
|
| 53153 |
+
"loss": 0.8593,
|
| 53154 |
+
"step": 7592
|
| 53155 |
+
},
|
| 53156 |
+
{
|
| 53157 |
+
"epoch": 1.543606424069933,
|
| 53158 |
+
"grad_norm": 0.17360135912895203,
|
| 53159 |
+
"learning_rate": 4.568290450523747e-05,
|
| 53160 |
+
"loss": 1.1914,
|
| 53161 |
+
"step": 7593
|
| 53162 |
+
},
|
| 53163 |
+
{
|
| 53164 |
+
"epoch": 1.5438097174222403,
|
| 53165 |
+
"grad_norm": 0.15492455661296844,
|
| 53166 |
+
"learning_rate": 4.566256483270619e-05,
|
| 53167 |
+
"loss": 0.9039,
|
| 53168 |
+
"step": 7594
|
| 53169 |
+
},
|
| 53170 |
+
{
|
| 53171 |
+
"epoch": 1.5440130107745476,
|
| 53172 |
+
"grad_norm": 0.15058903396129608,
|
| 53173 |
+
"learning_rate": 4.564222516017492e-05,
|
| 53174 |
+
"loss": 0.9224,
|
| 53175 |
+
"step": 7595
|
| 53176 |
+
},
|
| 53177 |
+
{
|
| 53178 |
+
"epoch": 1.544216304126855,
|
| 53179 |
+
"grad_norm": 0.16502228379249573,
|
| 53180 |
+
"learning_rate": 4.562188548764365e-05,
|
| 53181 |
+
"loss": 0.9956,
|
| 53182 |
+
"step": 7596
|
| 53183 |
+
},
|
| 53184 |
+
{
|
| 53185 |
+
"epoch": 1.5444195974791626,
|
| 53186 |
+
"grad_norm": 0.15759393572807312,
|
| 53187 |
+
"learning_rate": 4.560154581511238e-05,
|
| 53188 |
+
"loss": 1.0067,
|
| 53189 |
+
"step": 7597
|
| 53190 |
+
},
|
| 53191 |
+
{
|
| 53192 |
+
"epoch": 1.5446228908314699,
|
| 53193 |
+
"grad_norm": 0.1422048658132553,
|
| 53194 |
+
"learning_rate": 4.558120614258111e-05,
|
| 53195 |
+
"loss": 0.9564,
|
| 53196 |
+
"step": 7598
|
| 53197 |
+
},
|
| 53198 |
+
{
|
| 53199 |
+
"epoch": 1.5448261841837772,
|
| 53200 |
+
"grad_norm": 0.13447371125221252,
|
| 53201 |
+
"learning_rate": 4.5560866470049834e-05,
|
| 53202 |
+
"loss": 0.9567,
|
| 53203 |
+
"step": 7599
|
| 53204 |
+
},
|
| 53205 |
+
{
|
| 53206 |
+
"epoch": 1.5450294775360844,
|
| 53207 |
+
"grad_norm": 0.1465720683336258,
|
| 53208 |
+
"learning_rate": 4.5540526797518566e-05,
|
| 53209 |
+
"loss": 0.945,
|
| 53210 |
+
"step": 7600
|
| 53211 |
}
|
| 53212 |
],
|
| 53213 |
"logging_steps": 1,
|
|
|
|
| 53227 |
"attributes": {}
|
| 53228 |
}
|
| 53229 |
},
|
| 53230 |
+
"total_flos": 4.261864221527556e+18,
|
| 53231 |
"train_batch_size": 8,
|
| 53232 |
"trial_name": null,
|
| 53233 |
"trial_params": null
|