Training in progress, step 1125, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 981512984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e416f0d868f250fb4fe48bc32a34264e5934e660b3cc400e50eedcffe33124f7
|
| 3 |
size 981512984
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 8.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -27308,11 +27308,1961 @@
|
|
| 27308 |
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27309 |
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27310 |
"step": 1050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27311 |
}
|
| 27312 |
],
|
| 27313 |
"logging_steps": 1,
|
| 27314 |
"max_steps": 1200,
|
| 27315 |
-
"num_input_tokens_seen":
|
| 27316 |
"num_train_epochs": 10,
|
| 27317 |
"save_steps": 75,
|
| 27318 |
"stateful_callbacks": {
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 8.923076923076923,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1125,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 27308 |
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27309 |
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27310 |
"step": 1050
|
| 27311 |
+
},
|
| 27312 |
+
{
|
| 27313 |
+
"clip_ratio/high_max": 0.0,
|
| 27314 |
+
"clip_ratio/high_mean": 0.0,
|
| 27315 |
+
"clip_ratio/low_mean": 0.0,
|
| 27316 |
+
"clip_ratio/low_min": 0.0,
|
| 27317 |
+
"clip_ratio/region_mean": 0.0,
|
| 27318 |
+
"completions/clipped_ratio": 0.0,
|
| 27319 |
+
"completions/max_length": 1.0,
|
| 27320 |
+
"completions/max_terminated_length": 1.0,
|
| 27321 |
+
"completions/mean_length": 1.0,
|
| 27322 |
+
"completions/mean_terminated_length": 1.0,
|
| 27323 |
+
"completions/min_length": 1.0,
|
| 27324 |
+
"completions/min_terminated_length": 1.0,
|
| 27325 |
+
"epoch": 8.339250493096648,
|
| 27326 |
+
"frac_reward_zero_std": 1.0,
|
| 27327 |
+
"grad_norm": 0.00013638069503940642,
|
| 27328 |
+
"kl": 4.820525646209717e-05,
|
| 27329 |
+
"learning_rate": 2e-06,
|
| 27330 |
+
"loss": 0.0,
|
| 27331 |
+
"num_tokens": 583217.0,
|
| 27332 |
+
"reward": 0.0009184567024931312,
|
| 27333 |
+
"reward_std": 0.0,
|
| 27334 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27335 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27336 |
+
"step": 1051
|
| 27337 |
+
},
|
| 27338 |
+
{
|
| 27339 |
+
"clip_ratio/high_max": 0.0,
|
| 27340 |
+
"clip_ratio/high_mean": 0.0,
|
| 27341 |
+
"clip_ratio/low_mean": 0.0,
|
| 27342 |
+
"clip_ratio/low_min": 0.0,
|
| 27343 |
+
"clip_ratio/region_mean": 0.0,
|
| 27344 |
+
"completions/clipped_ratio": 0.0625,
|
| 27345 |
+
"completions/max_length": 128.0,
|
| 27346 |
+
"completions/max_terminated_length": 1.0,
|
| 27347 |
+
"completions/mean_length": 8.9375,
|
| 27348 |
+
"completions/mean_terminated_length": 1.0,
|
| 27349 |
+
"completions/min_length": 1.0,
|
| 27350 |
+
"completions/min_terminated_length": 1.0,
|
| 27351 |
+
"epoch": 8.347140039447732,
|
| 27352 |
+
"frac_reward_zero_std": 0.9375,
|
| 27353 |
+
"grad_norm": 0.04354625940322876,
|
| 27354 |
+
"kl": 0.001021722739096731,
|
| 27355 |
+
"learning_rate": 2e-06,
|
| 27356 |
+
"loss": 0.0001,
|
| 27357 |
+
"num_tokens": 584039.0,
|
| 27358 |
+
"reward": 0.000905577908270061,
|
| 27359 |
+
"reward_std": 5.816980319650611e-07,
|
| 27360 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001801048929337412,
|
| 27361 |
+
"rewards/reward_high_identity_attack_score/std": 1.0084442692459561e-05,
|
| 27362 |
+
"step": 1052
|
| 27363 |
+
},
|
| 27364 |
+
{
|
| 27365 |
+
"clip_ratio/high_max": 0.0,
|
| 27366 |
+
"clip_ratio/high_mean": 0.0,
|
| 27367 |
+
"clip_ratio/low_mean": 0.0,
|
| 27368 |
+
"clip_ratio/low_min": 0.0,
|
| 27369 |
+
"clip_ratio/region_mean": 0.0,
|
| 27370 |
+
"completions/clipped_ratio": 0.0,
|
| 27371 |
+
"completions/max_length": 1.0,
|
| 27372 |
+
"completions/max_terminated_length": 1.0,
|
| 27373 |
+
"completions/mean_length": 1.0,
|
| 27374 |
+
"completions/mean_terminated_length": 1.0,
|
| 27375 |
+
"completions/min_length": 1.0,
|
| 27376 |
+
"completions/min_terminated_length": 1.0,
|
| 27377 |
+
"epoch": 8.355029585798816,
|
| 27378 |
+
"frac_reward_zero_std": 1.0,
|
| 27379 |
+
"grad_norm": 4.138869553571567e-05,
|
| 27380 |
+
"kl": 2.4419277906417847e-05,
|
| 27381 |
+
"learning_rate": 2e-06,
|
| 27382 |
+
"loss": 0.0,
|
| 27383 |
+
"num_tokens": 584549.0,
|
| 27384 |
+
"reward": 0.0009184567024931312,
|
| 27385 |
+
"reward_std": 0.0,
|
| 27386 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27387 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27388 |
+
"step": 1053
|
| 27389 |
+
},
|
| 27390 |
+
{
|
| 27391 |
+
"clip_ratio/high_max": 0.0,
|
| 27392 |
+
"clip_ratio/high_mean": 0.0,
|
| 27393 |
+
"clip_ratio/low_mean": 0.0,
|
| 27394 |
+
"clip_ratio/low_min": 0.0,
|
| 27395 |
+
"clip_ratio/region_mean": 0.0,
|
| 27396 |
+
"completions/clipped_ratio": 0.0,
|
| 27397 |
+
"completions/max_length": 1.0,
|
| 27398 |
+
"completions/max_terminated_length": 1.0,
|
| 27399 |
+
"completions/mean_length": 1.0,
|
| 27400 |
+
"completions/mean_terminated_length": 1.0,
|
| 27401 |
+
"completions/min_length": 1.0,
|
| 27402 |
+
"completions/min_terminated_length": 1.0,
|
| 27403 |
+
"epoch": 8.362919132149901,
|
| 27404 |
+
"frac_reward_zero_std": 1.0,
|
| 27405 |
+
"grad_norm": 0.00010592924081720412,
|
| 27406 |
+
"kl": 5.028769373893738e-05,
|
| 27407 |
+
"learning_rate": 2e-06,
|
| 27408 |
+
"loss": 0.0,
|
| 27409 |
+
"num_tokens": 585049.0,
|
| 27410 |
+
"reward": 0.0009184567024931312,
|
| 27411 |
+
"reward_std": 0.0,
|
| 27412 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27413 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27414 |
+
"step": 1054
|
| 27415 |
+
},
|
| 27416 |
+
{
|
| 27417 |
+
"clip_ratio/high_max": 0.0,
|
| 27418 |
+
"clip_ratio/high_mean": 0.0,
|
| 27419 |
+
"clip_ratio/low_mean": 0.0,
|
| 27420 |
+
"clip_ratio/low_min": 0.0,
|
| 27421 |
+
"clip_ratio/region_mean": 0.0,
|
| 27422 |
+
"completions/clipped_ratio": 0.0,
|
| 27423 |
+
"completions/max_length": 1.0,
|
| 27424 |
+
"completions/max_terminated_length": 1.0,
|
| 27425 |
+
"completions/mean_length": 1.0,
|
| 27426 |
+
"completions/mean_terminated_length": 1.0,
|
| 27427 |
+
"completions/min_length": 1.0,
|
| 27428 |
+
"completions/min_terminated_length": 1.0,
|
| 27429 |
+
"epoch": 8.370808678500985,
|
| 27430 |
+
"frac_reward_zero_std": 1.0,
|
| 27431 |
+
"grad_norm": 0.0005381772061809897,
|
| 27432 |
+
"kl": 0.00029714033007621765,
|
| 27433 |
+
"learning_rate": 2e-06,
|
| 27434 |
+
"loss": 0.0,
|
| 27435 |
+
"num_tokens": 585497.0,
|
| 27436 |
+
"reward": 0.0009184567024931312,
|
| 27437 |
+
"reward_std": 0.0,
|
| 27438 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27439 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27440 |
+
"step": 1055
|
| 27441 |
+
},
|
| 27442 |
+
{
|
| 27443 |
+
"clip_ratio/high_max": 0.0,
|
| 27444 |
+
"clip_ratio/high_mean": 0.0,
|
| 27445 |
+
"clip_ratio/low_mean": 0.0,
|
| 27446 |
+
"clip_ratio/low_min": 0.0,
|
| 27447 |
+
"clip_ratio/region_mean": 0.0,
|
| 27448 |
+
"completions/clipped_ratio": 0.0,
|
| 27449 |
+
"completions/max_length": 1.0,
|
| 27450 |
+
"completions/max_terminated_length": 1.0,
|
| 27451 |
+
"completions/mean_length": 1.0,
|
| 27452 |
+
"completions/mean_terminated_length": 1.0,
|
| 27453 |
+
"completions/min_length": 1.0,
|
| 27454 |
+
"completions/min_terminated_length": 1.0,
|
| 27455 |
+
"epoch": 8.378698224852071,
|
| 27456 |
+
"frac_reward_zero_std": 1.0,
|
| 27457 |
+
"grad_norm": 0.0006338380044326186,
|
| 27458 |
+
"kl": 0.00025733932852745056,
|
| 27459 |
+
"learning_rate": 2e-06,
|
| 27460 |
+
"loss": 0.0,
|
| 27461 |
+
"num_tokens": 585949.0,
|
| 27462 |
+
"reward": 0.0009184567024931312,
|
| 27463 |
+
"reward_std": 0.0,
|
| 27464 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27465 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27466 |
+
"step": 1056
|
| 27467 |
+
},
|
| 27468 |
+
{
|
| 27469 |
+
"clip_ratio/high_max": 0.0,
|
| 27470 |
+
"clip_ratio/high_mean": 0.0,
|
| 27471 |
+
"clip_ratio/low_mean": 0.0,
|
| 27472 |
+
"clip_ratio/low_min": 0.0,
|
| 27473 |
+
"clip_ratio/region_mean": 0.0,
|
| 27474 |
+
"completions/clipped_ratio": 0.0,
|
| 27475 |
+
"completions/max_length": 1.0,
|
| 27476 |
+
"completions/max_terminated_length": 1.0,
|
| 27477 |
+
"completions/mean_length": 1.0,
|
| 27478 |
+
"completions/mean_terminated_length": 1.0,
|
| 27479 |
+
"completions/min_length": 1.0,
|
| 27480 |
+
"completions/min_terminated_length": 1.0,
|
| 27481 |
+
"epoch": 8.386587771203155,
|
| 27482 |
+
"frac_reward_zero_std": 1.0,
|
| 27483 |
+
"grad_norm": 0.009605144150555134,
|
| 27484 |
+
"kl": 0.003907311707735062,
|
| 27485 |
+
"learning_rate": 2e-06,
|
| 27486 |
+
"loss": 0.0002,
|
| 27487 |
+
"num_tokens": 586467.0,
|
| 27488 |
+
"reward": 0.0009184567024931312,
|
| 27489 |
+
"reward_std": 0.0,
|
| 27490 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27491 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27492 |
+
"step": 1057
|
| 27493 |
+
},
|
| 27494 |
+
{
|
| 27495 |
+
"clip_ratio/high_max": 0.0,
|
| 27496 |
+
"clip_ratio/high_mean": 0.0,
|
| 27497 |
+
"clip_ratio/low_mean": 0.0,
|
| 27498 |
+
"clip_ratio/low_min": 0.0,
|
| 27499 |
+
"clip_ratio/region_mean": 0.0,
|
| 27500 |
+
"completions/clipped_ratio": 0.0,
|
| 27501 |
+
"completions/max_length": 1.0,
|
| 27502 |
+
"completions/max_terminated_length": 1.0,
|
| 27503 |
+
"completions/mean_length": 1.0,
|
| 27504 |
+
"completions/mean_terminated_length": 1.0,
|
| 27505 |
+
"completions/min_length": 1.0,
|
| 27506 |
+
"completions/min_terminated_length": 1.0,
|
| 27507 |
+
"epoch": 8.39447731755424,
|
| 27508 |
+
"frac_reward_zero_std": 1.0,
|
| 27509 |
+
"grad_norm": 0.0017512802733108401,
|
| 27510 |
+
"kl": 0.0007003918290138245,
|
| 27511 |
+
"learning_rate": 2e-06,
|
| 27512 |
+
"loss": 0.0,
|
| 27513 |
+
"num_tokens": 586991.0,
|
| 27514 |
+
"reward": 0.0009184567024931312,
|
| 27515 |
+
"reward_std": 0.0,
|
| 27516 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27517 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27518 |
+
"step": 1058
|
| 27519 |
+
},
|
| 27520 |
+
{
|
| 27521 |
+
"clip_ratio/high_max": 0.0,
|
| 27522 |
+
"clip_ratio/high_mean": 0.0,
|
| 27523 |
+
"clip_ratio/low_mean": 0.0,
|
| 27524 |
+
"clip_ratio/low_min": 0.0,
|
| 27525 |
+
"clip_ratio/region_mean": 0.0,
|
| 27526 |
+
"completions/clipped_ratio": 0.0,
|
| 27527 |
+
"completions/max_length": 1.0,
|
| 27528 |
+
"completions/max_terminated_length": 1.0,
|
| 27529 |
+
"completions/mean_length": 1.0,
|
| 27530 |
+
"completions/mean_terminated_length": 1.0,
|
| 27531 |
+
"completions/min_length": 1.0,
|
| 27532 |
+
"completions/min_terminated_length": 1.0,
|
| 27533 |
+
"epoch": 8.402366863905325,
|
| 27534 |
+
"frac_reward_zero_std": 1.0,
|
| 27535 |
+
"grad_norm": 0.00031084357760846615,
|
| 27536 |
+
"kl": 0.00015052035450935364,
|
| 27537 |
+
"learning_rate": 2e-06,
|
| 27538 |
+
"loss": 0.0,
|
| 27539 |
+
"num_tokens": 587573.0,
|
| 27540 |
+
"reward": 0.0009184567024931312,
|
| 27541 |
+
"reward_std": 0.0,
|
| 27542 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27543 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27544 |
+
"step": 1059
|
| 27545 |
+
},
|
| 27546 |
+
{
|
| 27547 |
+
"clip_ratio/high_max": 0.0,
|
| 27548 |
+
"clip_ratio/high_mean": 0.0,
|
| 27549 |
+
"clip_ratio/low_mean": 0.0,
|
| 27550 |
+
"clip_ratio/low_min": 0.0,
|
| 27551 |
+
"clip_ratio/region_mean": 0.0,
|
| 27552 |
+
"completions/clipped_ratio": 0.0,
|
| 27553 |
+
"completions/max_length": 1.0,
|
| 27554 |
+
"completions/max_terminated_length": 1.0,
|
| 27555 |
+
"completions/mean_length": 1.0,
|
| 27556 |
+
"completions/mean_terminated_length": 1.0,
|
| 27557 |
+
"completions/min_length": 1.0,
|
| 27558 |
+
"completions/min_terminated_length": 1.0,
|
| 27559 |
+
"epoch": 8.41025641025641,
|
| 27560 |
+
"frac_reward_zero_std": 1.0,
|
| 27561 |
+
"grad_norm": 0.0005003889673389494,
|
| 27562 |
+
"kl": 0.00018950924277305603,
|
| 27563 |
+
"learning_rate": 2e-06,
|
| 27564 |
+
"loss": 0.0,
|
| 27565 |
+
"num_tokens": 588023.0,
|
| 27566 |
+
"reward": 0.0009184567024931312,
|
| 27567 |
+
"reward_std": 0.0,
|
| 27568 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27569 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27570 |
+
"step": 1060
|
| 27571 |
+
},
|
| 27572 |
+
{
|
| 27573 |
+
"clip_ratio/high_max": 0.0,
|
| 27574 |
+
"clip_ratio/high_mean": 0.0,
|
| 27575 |
+
"clip_ratio/low_mean": 0.0,
|
| 27576 |
+
"clip_ratio/low_min": 0.0,
|
| 27577 |
+
"clip_ratio/region_mean": 0.0,
|
| 27578 |
+
"completions/clipped_ratio": 0.0,
|
| 27579 |
+
"completions/max_length": 1.0,
|
| 27580 |
+
"completions/max_terminated_length": 1.0,
|
| 27581 |
+
"completions/mean_length": 1.0,
|
| 27582 |
+
"completions/mean_terminated_length": 1.0,
|
| 27583 |
+
"completions/min_length": 1.0,
|
| 27584 |
+
"completions/min_terminated_length": 1.0,
|
| 27585 |
+
"epoch": 8.418145956607495,
|
| 27586 |
+
"frac_reward_zero_std": 1.0,
|
| 27587 |
+
"grad_norm": 0.00017963761638384312,
|
| 27588 |
+
"kl": 0.0001412220299243927,
|
| 27589 |
+
"learning_rate": 2e-06,
|
| 27590 |
+
"loss": 0.0,
|
| 27591 |
+
"num_tokens": 588537.0,
|
| 27592 |
+
"reward": 0.0009184567024931312,
|
| 27593 |
+
"reward_std": 0.0,
|
| 27594 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27595 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27596 |
+
"step": 1061
|
| 27597 |
+
},
|
| 27598 |
+
{
|
| 27599 |
+
"clip_ratio/high_max": 0.0,
|
| 27600 |
+
"clip_ratio/high_mean": 0.0,
|
| 27601 |
+
"clip_ratio/low_mean": 0.0,
|
| 27602 |
+
"clip_ratio/low_min": 0.0,
|
| 27603 |
+
"clip_ratio/region_mean": 0.0,
|
| 27604 |
+
"completions/clipped_ratio": 0.0,
|
| 27605 |
+
"completions/max_length": 1.0,
|
| 27606 |
+
"completions/max_terminated_length": 1.0,
|
| 27607 |
+
"completions/mean_length": 1.0,
|
| 27608 |
+
"completions/mean_terminated_length": 1.0,
|
| 27609 |
+
"completions/min_length": 1.0,
|
| 27610 |
+
"completions/min_terminated_length": 1.0,
|
| 27611 |
+
"epoch": 8.42603550295858,
|
| 27612 |
+
"frac_reward_zero_std": 1.0,
|
| 27613 |
+
"grad_norm": 3.474459663266316e-05,
|
| 27614 |
+
"kl": 1.461803913116455e-05,
|
| 27615 |
+
"learning_rate": 2e-06,
|
| 27616 |
+
"loss": 0.0,
|
| 27617 |
+
"num_tokens": 589085.0,
|
| 27618 |
+
"reward": 0.0009184567024931312,
|
| 27619 |
+
"reward_std": 0.0,
|
| 27620 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27621 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27622 |
+
"step": 1062
|
| 27623 |
+
},
|
| 27624 |
+
{
|
| 27625 |
+
"clip_ratio/high_max": 0.0,
|
| 27626 |
+
"clip_ratio/high_mean": 0.0,
|
| 27627 |
+
"clip_ratio/low_mean": 0.0,
|
| 27628 |
+
"clip_ratio/low_min": 0.0,
|
| 27629 |
+
"clip_ratio/region_mean": 0.0,
|
| 27630 |
+
"completions/clipped_ratio": 0.0,
|
| 27631 |
+
"completions/max_length": 1.0,
|
| 27632 |
+
"completions/max_terminated_length": 1.0,
|
| 27633 |
+
"completions/mean_length": 1.0,
|
| 27634 |
+
"completions/mean_terminated_length": 1.0,
|
| 27635 |
+
"completions/min_length": 1.0,
|
| 27636 |
+
"completions/min_terminated_length": 1.0,
|
| 27637 |
+
"epoch": 8.433925049309664,
|
| 27638 |
+
"frac_reward_zero_std": 1.0,
|
| 27639 |
+
"grad_norm": 0.0010317455744370818,
|
| 27640 |
+
"kl": 0.00035093724727630615,
|
| 27641 |
+
"learning_rate": 2e-06,
|
| 27642 |
+
"loss": 0.0,
|
| 27643 |
+
"num_tokens": 589591.0,
|
| 27644 |
+
"reward": 0.0009184567024931312,
|
| 27645 |
+
"reward_std": 0.0,
|
| 27646 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27647 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27648 |
+
"step": 1063
|
| 27649 |
+
},
|
| 27650 |
+
{
|
| 27651 |
+
"clip_ratio/high_max": 0.0,
|
| 27652 |
+
"clip_ratio/high_mean": 0.0,
|
| 27653 |
+
"clip_ratio/low_mean": 0.0,
|
| 27654 |
+
"clip_ratio/low_min": 0.0,
|
| 27655 |
+
"clip_ratio/region_mean": 0.0,
|
| 27656 |
+
"completions/clipped_ratio": 0.0,
|
| 27657 |
+
"completions/max_length": 1.0,
|
| 27658 |
+
"completions/max_terminated_length": 1.0,
|
| 27659 |
+
"completions/mean_length": 1.0,
|
| 27660 |
+
"completions/mean_terminated_length": 1.0,
|
| 27661 |
+
"completions/min_length": 1.0,
|
| 27662 |
+
"completions/min_terminated_length": 1.0,
|
| 27663 |
+
"epoch": 8.44181459566075,
|
| 27664 |
+
"frac_reward_zero_std": 1.0,
|
| 27665 |
+
"grad_norm": 7.65090953791514e-05,
|
| 27666 |
+
"kl": 3.806501626968384e-05,
|
| 27667 |
+
"learning_rate": 2e-06,
|
| 27668 |
+
"loss": 0.0,
|
| 27669 |
+
"num_tokens": 590115.0,
|
| 27670 |
+
"reward": 0.0009184567024931312,
|
| 27671 |
+
"reward_std": 0.0,
|
| 27672 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27673 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27674 |
+
"step": 1064
|
| 27675 |
+
},
|
| 27676 |
+
{
|
| 27677 |
+
"clip_ratio/high_max": 0.0,
|
| 27678 |
+
"clip_ratio/high_mean": 0.0,
|
| 27679 |
+
"clip_ratio/low_mean": 0.0,
|
| 27680 |
+
"clip_ratio/low_min": 0.0,
|
| 27681 |
+
"clip_ratio/region_mean": 0.0,
|
| 27682 |
+
"completions/clipped_ratio": 0.0,
|
| 27683 |
+
"completions/max_length": 1.0,
|
| 27684 |
+
"completions/max_terminated_length": 1.0,
|
| 27685 |
+
"completions/mean_length": 1.0,
|
| 27686 |
+
"completions/mean_terminated_length": 1.0,
|
| 27687 |
+
"completions/min_length": 1.0,
|
| 27688 |
+
"completions/min_terminated_length": 1.0,
|
| 27689 |
+
"epoch": 8.449704142011834,
|
| 27690 |
+
"frac_reward_zero_std": 1.0,
|
| 27691 |
+
"grad_norm": 2.855510319932364e-05,
|
| 27692 |
+
"kl": 1.4398247003555298e-05,
|
| 27693 |
+
"learning_rate": 2e-06,
|
| 27694 |
+
"loss": 0.0,
|
| 27695 |
+
"num_tokens": 590643.0,
|
| 27696 |
+
"reward": 0.0009184567024931312,
|
| 27697 |
+
"reward_std": 0.0,
|
| 27698 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27699 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27700 |
+
"step": 1065
|
| 27701 |
+
},
|
| 27702 |
+
{
|
| 27703 |
+
"clip_ratio/high_max": 0.0,
|
| 27704 |
+
"clip_ratio/high_mean": 0.0,
|
| 27705 |
+
"clip_ratio/low_mean": 0.0,
|
| 27706 |
+
"clip_ratio/low_min": 0.0,
|
| 27707 |
+
"clip_ratio/region_mean": 0.0,
|
| 27708 |
+
"completions/clipped_ratio": 0.0,
|
| 27709 |
+
"completions/max_length": 1.0,
|
| 27710 |
+
"completions/max_terminated_length": 1.0,
|
| 27711 |
+
"completions/mean_length": 1.0,
|
| 27712 |
+
"completions/mean_terminated_length": 1.0,
|
| 27713 |
+
"completions/min_length": 1.0,
|
| 27714 |
+
"completions/min_terminated_length": 1.0,
|
| 27715 |
+
"epoch": 8.45759368836292,
|
| 27716 |
+
"frac_reward_zero_std": 1.0,
|
| 27717 |
+
"grad_norm": 0.0002876239304896444,
|
| 27718 |
+
"kl": 0.00012814253568649292,
|
| 27719 |
+
"learning_rate": 2e-06,
|
| 27720 |
+
"loss": 0.0,
|
| 27721 |
+
"num_tokens": 591191.0,
|
| 27722 |
+
"reward": 0.0009184567024931312,
|
| 27723 |
+
"reward_std": 0.0,
|
| 27724 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27725 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27726 |
+
"step": 1066
|
| 27727 |
+
},
|
| 27728 |
+
{
|
| 27729 |
+
"clip_ratio/high_max": 0.0,
|
| 27730 |
+
"clip_ratio/high_mean": 0.0,
|
| 27731 |
+
"clip_ratio/low_mean": 0.0,
|
| 27732 |
+
"clip_ratio/low_min": 0.0,
|
| 27733 |
+
"clip_ratio/region_mean": 0.0,
|
| 27734 |
+
"completions/clipped_ratio": 0.0,
|
| 27735 |
+
"completions/max_length": 1.0,
|
| 27736 |
+
"completions/max_terminated_length": 1.0,
|
| 27737 |
+
"completions/mean_length": 1.0,
|
| 27738 |
+
"completions/mean_terminated_length": 1.0,
|
| 27739 |
+
"completions/min_length": 1.0,
|
| 27740 |
+
"completions/min_terminated_length": 1.0,
|
| 27741 |
+
"epoch": 8.465483234714004,
|
| 27742 |
+
"frac_reward_zero_std": 1.0,
|
| 27743 |
+
"grad_norm": 0.0001350129023194313,
|
| 27744 |
+
"kl": 6.249174475669861e-05,
|
| 27745 |
+
"learning_rate": 2e-06,
|
| 27746 |
+
"loss": 0.0,
|
| 27747 |
+
"num_tokens": 591745.0,
|
| 27748 |
+
"reward": 0.0009184567024931312,
|
| 27749 |
+
"reward_std": 0.0,
|
| 27750 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27751 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27752 |
+
"step": 1067
|
| 27753 |
+
},
|
| 27754 |
+
{
|
| 27755 |
+
"clip_ratio/high_max": 0.0,
|
| 27756 |
+
"clip_ratio/high_mean": 0.0,
|
| 27757 |
+
"clip_ratio/low_mean": 0.0,
|
| 27758 |
+
"clip_ratio/low_min": 0.0,
|
| 27759 |
+
"clip_ratio/region_mean": 0.0,
|
| 27760 |
+
"completions/clipped_ratio": 0.0,
|
| 27761 |
+
"completions/max_length": 1.0,
|
| 27762 |
+
"completions/max_terminated_length": 1.0,
|
| 27763 |
+
"completions/mean_length": 1.0,
|
| 27764 |
+
"completions/mean_terminated_length": 1.0,
|
| 27765 |
+
"completions/min_length": 1.0,
|
| 27766 |
+
"completions/min_terminated_length": 1.0,
|
| 27767 |
+
"epoch": 8.47337278106509,
|
| 27768 |
+
"frac_reward_zero_std": 1.0,
|
| 27769 |
+
"grad_norm": 0.0002275337028549984,
|
| 27770 |
+
"kl": 0.00011239573359489441,
|
| 27771 |
+
"learning_rate": 2e-06,
|
| 27772 |
+
"loss": 0.0,
|
| 27773 |
+
"num_tokens": 592273.0,
|
| 27774 |
+
"reward": 0.0009184567024931312,
|
| 27775 |
+
"reward_std": 0.0,
|
| 27776 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27777 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27778 |
+
"step": 1068
|
| 27779 |
+
},
|
| 27780 |
+
{
|
| 27781 |
+
"clip_ratio/high_max": 0.0,
|
| 27782 |
+
"clip_ratio/high_mean": 0.0,
|
| 27783 |
+
"clip_ratio/low_mean": 0.0,
|
| 27784 |
+
"clip_ratio/low_min": 0.0,
|
| 27785 |
+
"clip_ratio/region_mean": 0.0,
|
| 27786 |
+
"completions/clipped_ratio": 0.0,
|
| 27787 |
+
"completions/max_length": 1.0,
|
| 27788 |
+
"completions/max_terminated_length": 1.0,
|
| 27789 |
+
"completions/mean_length": 1.0,
|
| 27790 |
+
"completions/mean_terminated_length": 1.0,
|
| 27791 |
+
"completions/min_length": 1.0,
|
| 27792 |
+
"completions/min_terminated_length": 1.0,
|
| 27793 |
+
"epoch": 8.481262327416173,
|
| 27794 |
+
"frac_reward_zero_std": 1.0,
|
| 27795 |
+
"grad_norm": 0.03512755408883095,
|
| 27796 |
+
"kl": 0.0069213807582855225,
|
| 27797 |
+
"learning_rate": 2e-06,
|
| 27798 |
+
"loss": 0.0003,
|
| 27799 |
+
"num_tokens": 592793.0,
|
| 27800 |
+
"reward": 0.0009184567024931312,
|
| 27801 |
+
"reward_std": 0.0,
|
| 27802 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27803 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27804 |
+
"step": 1069
|
| 27805 |
+
},
|
| 27806 |
+
{
|
| 27807 |
+
"clip_ratio/high_max": 0.0,
|
| 27808 |
+
"clip_ratio/high_mean": 0.0,
|
| 27809 |
+
"clip_ratio/low_mean": 0.0,
|
| 27810 |
+
"clip_ratio/low_min": 0.0,
|
| 27811 |
+
"clip_ratio/region_mean": 0.0,
|
| 27812 |
+
"completions/clipped_ratio": 0.0,
|
| 27813 |
+
"completions/max_length": 1.0,
|
| 27814 |
+
"completions/max_terminated_length": 1.0,
|
| 27815 |
+
"completions/mean_length": 1.0,
|
| 27816 |
+
"completions/mean_terminated_length": 1.0,
|
| 27817 |
+
"completions/min_length": 1.0,
|
| 27818 |
+
"completions/min_terminated_length": 1.0,
|
| 27819 |
+
"epoch": 8.489151873767259,
|
| 27820 |
+
"frac_reward_zero_std": 1.0,
|
| 27821 |
+
"grad_norm": 0.00013964457320980728,
|
| 27822 |
+
"kl": 5.913153290748596e-05,
|
| 27823 |
+
"learning_rate": 2e-06,
|
| 27824 |
+
"loss": 0.0,
|
| 27825 |
+
"num_tokens": 593343.0,
|
| 27826 |
+
"reward": 0.0009184567024931312,
|
| 27827 |
+
"reward_std": 0.0,
|
| 27828 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27829 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27830 |
+
"step": 1070
|
| 27831 |
+
},
|
| 27832 |
+
{
|
| 27833 |
+
"clip_ratio/high_max": 0.0,
|
| 27834 |
+
"clip_ratio/high_mean": 0.0,
|
| 27835 |
+
"clip_ratio/low_mean": 0.0,
|
| 27836 |
+
"clip_ratio/low_min": 0.0,
|
| 27837 |
+
"clip_ratio/region_mean": 0.0,
|
| 27838 |
+
"completions/clipped_ratio": 0.0,
|
| 27839 |
+
"completions/max_length": 1.0,
|
| 27840 |
+
"completions/max_terminated_length": 1.0,
|
| 27841 |
+
"completions/mean_length": 1.0,
|
| 27842 |
+
"completions/mean_terminated_length": 1.0,
|
| 27843 |
+
"completions/min_length": 1.0,
|
| 27844 |
+
"completions/min_terminated_length": 1.0,
|
| 27845 |
+
"epoch": 8.497041420118343,
|
| 27846 |
+
"frac_reward_zero_std": 1.0,
|
| 27847 |
+
"grad_norm": 0.00017951025802176446,
|
| 27848 |
+
"kl": 0.0001367628574371338,
|
| 27849 |
+
"learning_rate": 2e-06,
|
| 27850 |
+
"loss": 0.0,
|
| 27851 |
+
"num_tokens": 593903.0,
|
| 27852 |
+
"reward": 0.0009184567024931312,
|
| 27853 |
+
"reward_std": 0.0,
|
| 27854 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27855 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27856 |
+
"step": 1071
|
| 27857 |
+
},
|
| 27858 |
+
{
|
| 27859 |
+
"clip_ratio/high_max": 0.0,
|
| 27860 |
+
"clip_ratio/high_mean": 0.0,
|
| 27861 |
+
"clip_ratio/low_mean": 0.0,
|
| 27862 |
+
"clip_ratio/low_min": 0.0,
|
| 27863 |
+
"clip_ratio/region_mean": 0.0,
|
| 27864 |
+
"completions/clipped_ratio": 0.0,
|
| 27865 |
+
"completions/max_length": 1.0,
|
| 27866 |
+
"completions/max_terminated_length": 1.0,
|
| 27867 |
+
"completions/mean_length": 1.0,
|
| 27868 |
+
"completions/mean_terminated_length": 1.0,
|
| 27869 |
+
"completions/min_length": 1.0,
|
| 27870 |
+
"completions/min_terminated_length": 1.0,
|
| 27871 |
+
"epoch": 8.504930966469429,
|
| 27872 |
+
"frac_reward_zero_std": 1.0,
|
| 27873 |
+
"grad_norm": 0.00011877052747877315,
|
| 27874 |
+
"kl": 4.143267869949341e-05,
|
| 27875 |
+
"learning_rate": 2e-06,
|
| 27876 |
+
"loss": 0.0,
|
| 27877 |
+
"num_tokens": 594477.0,
|
| 27878 |
+
"reward": 0.0009184567024931312,
|
| 27879 |
+
"reward_std": 0.0,
|
| 27880 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27881 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27882 |
+
"step": 1072
|
| 27883 |
+
},
|
| 27884 |
+
{
|
| 27885 |
+
"clip_ratio/high_max": 0.0,
|
| 27886 |
+
"clip_ratio/high_mean": 0.0,
|
| 27887 |
+
"clip_ratio/low_mean": 0.0,
|
| 27888 |
+
"clip_ratio/low_min": 0.0,
|
| 27889 |
+
"clip_ratio/region_mean": 0.0,
|
| 27890 |
+
"completions/clipped_ratio": 0.0,
|
| 27891 |
+
"completions/max_length": 1.0,
|
| 27892 |
+
"completions/max_terminated_length": 1.0,
|
| 27893 |
+
"completions/mean_length": 1.0,
|
| 27894 |
+
"completions/mean_terminated_length": 1.0,
|
| 27895 |
+
"completions/min_length": 1.0,
|
| 27896 |
+
"completions/min_terminated_length": 1.0,
|
| 27897 |
+
"epoch": 8.512820512820513,
|
| 27898 |
+
"frac_reward_zero_std": 1.0,
|
| 27899 |
+
"grad_norm": 0.0001373274135403335,
|
| 27900 |
+
"kl": 5.389004945755005e-05,
|
| 27901 |
+
"learning_rate": 2e-06,
|
| 27902 |
+
"loss": 0.0,
|
| 27903 |
+
"num_tokens": 595039.0,
|
| 27904 |
+
"reward": 0.0009184567024931312,
|
| 27905 |
+
"reward_std": 0.0,
|
| 27906 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27907 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27908 |
+
"step": 1073
|
| 27909 |
+
},
|
| 27910 |
+
{
|
| 27911 |
+
"clip_ratio/high_max": 0.0,
|
| 27912 |
+
"clip_ratio/high_mean": 0.0,
|
| 27913 |
+
"clip_ratio/low_mean": 0.0,
|
| 27914 |
+
"clip_ratio/low_min": 0.0,
|
| 27915 |
+
"clip_ratio/region_mean": 0.0,
|
| 27916 |
+
"completions/clipped_ratio": 0.0,
|
| 27917 |
+
"completions/max_length": 1.0,
|
| 27918 |
+
"completions/max_terminated_length": 1.0,
|
| 27919 |
+
"completions/mean_length": 1.0,
|
| 27920 |
+
"completions/mean_terminated_length": 1.0,
|
| 27921 |
+
"completions/min_length": 1.0,
|
| 27922 |
+
"completions/min_terminated_length": 1.0,
|
| 27923 |
+
"epoch": 8.520710059171599,
|
| 27924 |
+
"frac_reward_zero_std": 1.0,
|
| 27925 |
+
"grad_norm": 5.3857456805417314e-05,
|
| 27926 |
+
"kl": 2.3078173398971558e-05,
|
| 27927 |
+
"learning_rate": 2e-06,
|
| 27928 |
+
"loss": 0.0,
|
| 27929 |
+
"num_tokens": 595549.0,
|
| 27930 |
+
"reward": 0.0009184567024931312,
|
| 27931 |
+
"reward_std": 0.0,
|
| 27932 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27933 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27934 |
+
"step": 1074
|
| 27935 |
+
},
|
| 27936 |
+
{
|
| 27937 |
+
"clip_ratio/high_max": 0.0,
|
| 27938 |
+
"clip_ratio/high_mean": 0.0,
|
| 27939 |
+
"clip_ratio/low_mean": 0.0,
|
| 27940 |
+
"clip_ratio/low_min": 0.0,
|
| 27941 |
+
"clip_ratio/region_mean": 0.0,
|
| 27942 |
+
"completions/clipped_ratio": 0.0,
|
| 27943 |
+
"completions/max_length": 2.0,
|
| 27944 |
+
"completions/max_terminated_length": 2.0,
|
| 27945 |
+
"completions/mean_length": 1.0625,
|
| 27946 |
+
"completions/mean_terminated_length": 1.0625,
|
| 27947 |
+
"completions/min_length": 1.0,
|
| 27948 |
+
"completions/min_terminated_length": 1.0,
|
| 27949 |
+
"epoch": 8.528599605522682,
|
| 27950 |
+
"frac_reward_zero_std": 1.0,
|
| 27951 |
+
"grad_norm": 0.02848130278289318,
|
| 27952 |
+
"kl": 0.005706154741346836,
|
| 27953 |
+
"learning_rate": 2e-06,
|
| 27954 |
+
"loss": 0.0003,
|
| 27955 |
+
"num_tokens": 596063.0,
|
| 27956 |
+
"reward": 0.0009184567024931312,
|
| 27957 |
+
"reward_std": 0.0,
|
| 27958 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27959 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27960 |
+
"step": 1075
|
| 27961 |
+
},
|
| 27962 |
+
{
|
| 27963 |
+
"clip_ratio/high_max": 0.0,
|
| 27964 |
+
"clip_ratio/high_mean": 0.0,
|
| 27965 |
+
"clip_ratio/low_mean": 0.0,
|
| 27966 |
+
"clip_ratio/low_min": 0.0,
|
| 27967 |
+
"clip_ratio/region_mean": 0.0,
|
| 27968 |
+
"completions/clipped_ratio": 0.0,
|
| 27969 |
+
"completions/max_length": 1.0,
|
| 27970 |
+
"completions/max_terminated_length": 1.0,
|
| 27971 |
+
"completions/mean_length": 1.0,
|
| 27972 |
+
"completions/mean_terminated_length": 1.0,
|
| 27973 |
+
"completions/min_length": 1.0,
|
| 27974 |
+
"completions/min_terminated_length": 1.0,
|
| 27975 |
+
"epoch": 8.536489151873766,
|
| 27976 |
+
"frac_reward_zero_std": 1.0,
|
| 27977 |
+
"grad_norm": 0.005397849250584841,
|
| 27978 |
+
"kl": 0.002629391849040985,
|
| 27979 |
+
"learning_rate": 2e-06,
|
| 27980 |
+
"loss": 0.0001,
|
| 27981 |
+
"num_tokens": 596561.0,
|
| 27982 |
+
"reward": 0.0009184567024931312,
|
| 27983 |
+
"reward_std": 0.0,
|
| 27984 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 27985 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 27986 |
+
"step": 1076
|
| 27987 |
+
},
|
| 27988 |
+
{
|
| 27989 |
+
"clip_ratio/high_max": 0.0,
|
| 27990 |
+
"clip_ratio/high_mean": 0.0,
|
| 27991 |
+
"clip_ratio/low_mean": 0.0,
|
| 27992 |
+
"clip_ratio/low_min": 0.0,
|
| 27993 |
+
"clip_ratio/region_mean": 0.0,
|
| 27994 |
+
"completions/clipped_ratio": 0.0,
|
| 27995 |
+
"completions/max_length": 1.0,
|
| 27996 |
+
"completions/max_terminated_length": 1.0,
|
| 27997 |
+
"completions/mean_length": 1.0,
|
| 27998 |
+
"completions/mean_terminated_length": 1.0,
|
| 27999 |
+
"completions/min_length": 1.0,
|
| 28000 |
+
"completions/min_terminated_length": 1.0,
|
| 28001 |
+
"epoch": 8.544378698224852,
|
| 28002 |
+
"frac_reward_zero_std": 1.0,
|
| 28003 |
+
"grad_norm": 8.72955861268565e-05,
|
| 28004 |
+
"kl": 2.822279930114746e-05,
|
| 28005 |
+
"learning_rate": 2e-06,
|
| 28006 |
+
"loss": 0.0,
|
| 28007 |
+
"num_tokens": 597085.0,
|
| 28008 |
+
"reward": 0.0009184567024931312,
|
| 28009 |
+
"reward_std": 0.0,
|
| 28010 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28011 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28012 |
+
"step": 1077
|
| 28013 |
+
},
|
| 28014 |
+
{
|
| 28015 |
+
"clip_ratio/high_max": 0.0,
|
| 28016 |
+
"clip_ratio/high_mean": 0.0,
|
| 28017 |
+
"clip_ratio/low_mean": 0.0,
|
| 28018 |
+
"clip_ratio/low_min": 0.0,
|
| 28019 |
+
"clip_ratio/region_mean": 0.0,
|
| 28020 |
+
"completions/clipped_ratio": 0.0,
|
| 28021 |
+
"completions/max_length": 1.0,
|
| 28022 |
+
"completions/max_terminated_length": 1.0,
|
| 28023 |
+
"completions/mean_length": 1.0,
|
| 28024 |
+
"completions/mean_terminated_length": 1.0,
|
| 28025 |
+
"completions/min_length": 1.0,
|
| 28026 |
+
"completions/min_terminated_length": 1.0,
|
| 28027 |
+
"epoch": 8.552268244575936,
|
| 28028 |
+
"frac_reward_zero_std": 1.0,
|
| 28029 |
+
"grad_norm": 0.00018880993593484163,
|
| 28030 |
+
"kl": 9.92119312286377e-05,
|
| 28031 |
+
"learning_rate": 2e-06,
|
| 28032 |
+
"loss": 0.0,
|
| 28033 |
+
"num_tokens": 597599.0,
|
| 28034 |
+
"reward": 0.0009184567024931312,
|
| 28035 |
+
"reward_std": 0.0,
|
| 28036 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28037 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28038 |
+
"step": 1078
|
| 28039 |
+
},
|
| 28040 |
+
{
|
| 28041 |
+
"clip_ratio/high_max": 0.0,
|
| 28042 |
+
"clip_ratio/high_mean": 0.0,
|
| 28043 |
+
"clip_ratio/low_mean": 0.0,
|
| 28044 |
+
"clip_ratio/low_min": 0.0,
|
| 28045 |
+
"clip_ratio/region_mean": 0.0,
|
| 28046 |
+
"completions/clipped_ratio": 0.0,
|
| 28047 |
+
"completions/max_length": 1.0,
|
| 28048 |
+
"completions/max_terminated_length": 1.0,
|
| 28049 |
+
"completions/mean_length": 1.0,
|
| 28050 |
+
"completions/mean_terminated_length": 1.0,
|
| 28051 |
+
"completions/min_length": 1.0,
|
| 28052 |
+
"completions/min_terminated_length": 1.0,
|
| 28053 |
+
"epoch": 8.560157790927022,
|
| 28054 |
+
"frac_reward_zero_std": 1.0,
|
| 28055 |
+
"grad_norm": 7.275133248185739e-05,
|
| 28056 |
+
"kl": 3.4924596548080444e-05,
|
| 28057 |
+
"learning_rate": 2e-06,
|
| 28058 |
+
"loss": 0.0,
|
| 28059 |
+
"num_tokens": 598133.0,
|
| 28060 |
+
"reward": 0.0009184567024931312,
|
| 28061 |
+
"reward_std": 0.0,
|
| 28062 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28063 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28064 |
+
"step": 1079
|
| 28065 |
+
},
|
| 28066 |
+
{
|
| 28067 |
+
"clip_ratio/high_max": 0.0,
|
| 28068 |
+
"clip_ratio/high_mean": 0.0,
|
| 28069 |
+
"clip_ratio/low_mean": 0.0,
|
| 28070 |
+
"clip_ratio/low_min": 0.0,
|
| 28071 |
+
"clip_ratio/region_mean": 0.0,
|
| 28072 |
+
"completions/clipped_ratio": 0.0,
|
| 28073 |
+
"completions/max_length": 1.0,
|
| 28074 |
+
"completions/max_terminated_length": 1.0,
|
| 28075 |
+
"completions/mean_length": 1.0,
|
| 28076 |
+
"completions/mean_terminated_length": 1.0,
|
| 28077 |
+
"completions/min_length": 1.0,
|
| 28078 |
+
"completions/min_terminated_length": 1.0,
|
| 28079 |
+
"epoch": 8.568047337278106,
|
| 28080 |
+
"frac_reward_zero_std": 1.0,
|
| 28081 |
+
"grad_norm": 0.00011954068031627685,
|
| 28082 |
+
"kl": 4.998594522476196e-05,
|
| 28083 |
+
"learning_rate": 2e-06,
|
| 28084 |
+
"loss": 0.0,
|
| 28085 |
+
"num_tokens": 598653.0,
|
| 28086 |
+
"reward": 0.0009184567024931312,
|
| 28087 |
+
"reward_std": 0.0,
|
| 28088 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28089 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28090 |
+
"step": 1080
|
| 28091 |
+
},
|
| 28092 |
+
{
|
| 28093 |
+
"clip_ratio/high_max": 0.0,
|
| 28094 |
+
"clip_ratio/high_mean": 0.0,
|
| 28095 |
+
"clip_ratio/low_mean": 0.0,
|
| 28096 |
+
"clip_ratio/low_min": 0.0,
|
| 28097 |
+
"clip_ratio/region_mean": 0.0,
|
| 28098 |
+
"completions/clipped_ratio": 0.0,
|
| 28099 |
+
"completions/max_length": 1.0,
|
| 28100 |
+
"completions/max_terminated_length": 1.0,
|
| 28101 |
+
"completions/mean_length": 1.0,
|
| 28102 |
+
"completions/mean_terminated_length": 1.0,
|
| 28103 |
+
"completions/min_length": 1.0,
|
| 28104 |
+
"completions/min_terminated_length": 1.0,
|
| 28105 |
+
"epoch": 8.575936883629192,
|
| 28106 |
+
"frac_reward_zero_std": 1.0,
|
| 28107 |
+
"grad_norm": 7.469802949344739e-05,
|
| 28108 |
+
"kl": 4.254281520843506e-05,
|
| 28109 |
+
"learning_rate": 2e-06,
|
| 28110 |
+
"loss": 0.0,
|
| 28111 |
+
"num_tokens": 599185.0,
|
| 28112 |
+
"reward": 0.0009184567024931312,
|
| 28113 |
+
"reward_std": 0.0,
|
| 28114 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28115 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28116 |
+
"step": 1081
|
| 28117 |
+
},
|
| 28118 |
+
{
|
| 28119 |
+
"clip_ratio/high_max": 0.0,
|
| 28120 |
+
"clip_ratio/high_mean": 0.0,
|
| 28121 |
+
"clip_ratio/low_mean": 0.0,
|
| 28122 |
+
"clip_ratio/low_min": 0.0,
|
| 28123 |
+
"clip_ratio/region_mean": 0.0,
|
| 28124 |
+
"completions/clipped_ratio": 0.0,
|
| 28125 |
+
"completions/max_length": 1.0,
|
| 28126 |
+
"completions/max_terminated_length": 1.0,
|
| 28127 |
+
"completions/mean_length": 1.0,
|
| 28128 |
+
"completions/mean_terminated_length": 1.0,
|
| 28129 |
+
"completions/min_length": 1.0,
|
| 28130 |
+
"completions/min_terminated_length": 1.0,
|
| 28131 |
+
"epoch": 8.583826429980276,
|
| 28132 |
+
"frac_reward_zero_std": 1.0,
|
| 28133 |
+
"grad_norm": 0.00014623381139244884,
|
| 28134 |
+
"kl": 5.622208118438721e-05,
|
| 28135 |
+
"learning_rate": 2e-06,
|
| 28136 |
+
"loss": 0.0,
|
| 28137 |
+
"num_tokens": 599753.0,
|
| 28138 |
+
"reward": 0.0009184567024931312,
|
| 28139 |
+
"reward_std": 0.0,
|
| 28140 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28141 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28142 |
+
"step": 1082
|
| 28143 |
+
},
|
| 28144 |
+
{
|
| 28145 |
+
"clip_ratio/high_max": 0.0,
|
| 28146 |
+
"clip_ratio/high_mean": 0.0,
|
| 28147 |
+
"clip_ratio/low_mean": 0.0,
|
| 28148 |
+
"clip_ratio/low_min": 0.0,
|
| 28149 |
+
"clip_ratio/region_mean": 0.0,
|
| 28150 |
+
"completions/clipped_ratio": 0.0,
|
| 28151 |
+
"completions/max_length": 1.0,
|
| 28152 |
+
"completions/max_terminated_length": 1.0,
|
| 28153 |
+
"completions/mean_length": 1.0,
|
| 28154 |
+
"completions/mean_terminated_length": 1.0,
|
| 28155 |
+
"completions/min_length": 1.0,
|
| 28156 |
+
"completions/min_terminated_length": 1.0,
|
| 28157 |
+
"epoch": 8.591715976331361,
|
| 28158 |
+
"frac_reward_zero_std": 1.0,
|
| 28159 |
+
"grad_norm": 8.390660514123738e-05,
|
| 28160 |
+
"kl": 3.372877836227417e-05,
|
| 28161 |
+
"learning_rate": 2e-06,
|
| 28162 |
+
"loss": 0.0,
|
| 28163 |
+
"num_tokens": 600273.0,
|
| 28164 |
+
"reward": 0.0009184567024931312,
|
| 28165 |
+
"reward_std": 0.0,
|
| 28166 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28167 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28168 |
+
"step": 1083
|
| 28169 |
+
},
|
| 28170 |
+
{
|
| 28171 |
+
"clip_ratio/high_max": 0.0,
|
| 28172 |
+
"clip_ratio/high_mean": 0.0,
|
| 28173 |
+
"clip_ratio/low_mean": 0.0,
|
| 28174 |
+
"clip_ratio/low_min": 0.0,
|
| 28175 |
+
"clip_ratio/region_mean": 0.0,
|
| 28176 |
+
"completions/clipped_ratio": 0.0,
|
| 28177 |
+
"completions/max_length": 1.0,
|
| 28178 |
+
"completions/max_terminated_length": 1.0,
|
| 28179 |
+
"completions/mean_length": 1.0,
|
| 28180 |
+
"completions/mean_terminated_length": 1.0,
|
| 28181 |
+
"completions/min_length": 1.0,
|
| 28182 |
+
"completions/min_terminated_length": 1.0,
|
| 28183 |
+
"epoch": 8.599605522682445,
|
| 28184 |
+
"frac_reward_zero_std": 1.0,
|
| 28185 |
+
"grad_norm": 0.0006358494283631444,
|
| 28186 |
+
"kl": 0.00034865736961364746,
|
| 28187 |
+
"learning_rate": 2e-06,
|
| 28188 |
+
"loss": 0.0,
|
| 28189 |
+
"num_tokens": 600887.0,
|
| 28190 |
+
"reward": 0.0009184567024931312,
|
| 28191 |
+
"reward_std": 0.0,
|
| 28192 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28193 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28194 |
+
"step": 1084
|
| 28195 |
+
},
|
| 28196 |
+
{
|
| 28197 |
+
"clip_ratio/high_max": 0.0,
|
| 28198 |
+
"clip_ratio/high_mean": 0.0,
|
| 28199 |
+
"clip_ratio/low_mean": 0.0,
|
| 28200 |
+
"clip_ratio/low_min": 0.0,
|
| 28201 |
+
"clip_ratio/region_mean": 0.0,
|
| 28202 |
+
"completions/clipped_ratio": 0.0,
|
| 28203 |
+
"completions/max_length": 1.0,
|
| 28204 |
+
"completions/max_terminated_length": 1.0,
|
| 28205 |
+
"completions/mean_length": 1.0,
|
| 28206 |
+
"completions/mean_terminated_length": 1.0,
|
| 28207 |
+
"completions/min_length": 1.0,
|
| 28208 |
+
"completions/min_terminated_length": 1.0,
|
| 28209 |
+
"epoch": 8.607495069033531,
|
| 28210 |
+
"frac_reward_zero_std": 1.0,
|
| 28211 |
+
"grad_norm": 0.0005096627282910049,
|
| 28212 |
+
"kl": 0.00020300596952438354,
|
| 28213 |
+
"learning_rate": 2e-06,
|
| 28214 |
+
"loss": 0.0,
|
| 28215 |
+
"num_tokens": 601421.0,
|
| 28216 |
+
"reward": 0.0009184567024931312,
|
| 28217 |
+
"reward_std": 0.0,
|
| 28218 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28219 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28220 |
+
"step": 1085
|
| 28221 |
+
},
|
| 28222 |
+
{
|
| 28223 |
+
"clip_ratio/high_max": 0.0,
|
| 28224 |
+
"clip_ratio/high_mean": 0.0,
|
| 28225 |
+
"clip_ratio/low_mean": 0.0,
|
| 28226 |
+
"clip_ratio/low_min": 0.0,
|
| 28227 |
+
"clip_ratio/region_mean": 0.0,
|
| 28228 |
+
"completions/clipped_ratio": 0.0,
|
| 28229 |
+
"completions/max_length": 1.0,
|
| 28230 |
+
"completions/max_terminated_length": 1.0,
|
| 28231 |
+
"completions/mean_length": 1.0,
|
| 28232 |
+
"completions/mean_terminated_length": 1.0,
|
| 28233 |
+
"completions/min_length": 1.0,
|
| 28234 |
+
"completions/min_terminated_length": 1.0,
|
| 28235 |
+
"epoch": 8.615384615384615,
|
| 28236 |
+
"frac_reward_zero_std": 1.0,
|
| 28237 |
+
"grad_norm": 0.00031507035600952804,
|
| 28238 |
+
"kl": 0.00016472488641738892,
|
| 28239 |
+
"learning_rate": 2e-06,
|
| 28240 |
+
"loss": 0.0,
|
| 28241 |
+
"num_tokens": 601917.0,
|
| 28242 |
+
"reward": 0.0009184567024931312,
|
| 28243 |
+
"reward_std": 0.0,
|
| 28244 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28245 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28246 |
+
"step": 1086
|
| 28247 |
+
},
|
| 28248 |
+
{
|
| 28249 |
+
"clip_ratio/high_max": 0.0,
|
| 28250 |
+
"clip_ratio/high_mean": 0.0,
|
| 28251 |
+
"clip_ratio/low_mean": 0.0,
|
| 28252 |
+
"clip_ratio/low_min": 0.0,
|
| 28253 |
+
"clip_ratio/region_mean": 0.0,
|
| 28254 |
+
"completions/clipped_ratio": 0.0,
|
| 28255 |
+
"completions/max_length": 1.0,
|
| 28256 |
+
"completions/max_terminated_length": 1.0,
|
| 28257 |
+
"completions/mean_length": 1.0,
|
| 28258 |
+
"completions/mean_terminated_length": 1.0,
|
| 28259 |
+
"completions/min_length": 1.0,
|
| 28260 |
+
"completions/min_terminated_length": 1.0,
|
| 28261 |
+
"epoch": 8.6232741617357,
|
| 28262 |
+
"frac_reward_zero_std": 1.0,
|
| 28263 |
+
"grad_norm": 0.0010786071652546525,
|
| 28264 |
+
"kl": 0.0004771128296852112,
|
| 28265 |
+
"learning_rate": 2e-06,
|
| 28266 |
+
"loss": 0.0,
|
| 28267 |
+
"num_tokens": 602437.0,
|
| 28268 |
+
"reward": 0.0009184567024931312,
|
| 28269 |
+
"reward_std": 0.0,
|
| 28270 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28271 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28272 |
+
"step": 1087
|
| 28273 |
+
},
|
| 28274 |
+
{
|
| 28275 |
+
"clip_ratio/high_max": 0.0,
|
| 28276 |
+
"clip_ratio/high_mean": 0.0,
|
| 28277 |
+
"clip_ratio/low_mean": 0.0,
|
| 28278 |
+
"clip_ratio/low_min": 0.0,
|
| 28279 |
+
"clip_ratio/region_mean": 0.0,
|
| 28280 |
+
"completions/clipped_ratio": 0.0,
|
| 28281 |
+
"completions/max_length": 1.0,
|
| 28282 |
+
"completions/max_terminated_length": 1.0,
|
| 28283 |
+
"completions/mean_length": 1.0,
|
| 28284 |
+
"completions/mean_terminated_length": 1.0,
|
| 28285 |
+
"completions/min_length": 1.0,
|
| 28286 |
+
"completions/min_terminated_length": 1.0,
|
| 28287 |
+
"epoch": 8.631163708086785,
|
| 28288 |
+
"frac_reward_zero_std": 1.0,
|
| 28289 |
+
"grad_norm": 0.0001308958017034456,
|
| 28290 |
+
"kl": 3.4771859645843506e-05,
|
| 28291 |
+
"learning_rate": 2e-06,
|
| 28292 |
+
"loss": 0.0,
|
| 28293 |
+
"num_tokens": 602983.0,
|
| 28294 |
+
"reward": 0.0009184567024931312,
|
| 28295 |
+
"reward_std": 0.0,
|
| 28296 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28297 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28298 |
+
"step": 1088
|
| 28299 |
+
},
|
| 28300 |
+
{
|
| 28301 |
+
"clip_ratio/high_max": 0.0,
|
| 28302 |
+
"clip_ratio/high_mean": 0.0,
|
| 28303 |
+
"clip_ratio/low_mean": 0.0,
|
| 28304 |
+
"clip_ratio/low_min": 0.0,
|
| 28305 |
+
"clip_ratio/region_mean": 0.0,
|
| 28306 |
+
"completions/clipped_ratio": 0.0,
|
| 28307 |
+
"completions/max_length": 1.0,
|
| 28308 |
+
"completions/max_terminated_length": 1.0,
|
| 28309 |
+
"completions/mean_length": 1.0,
|
| 28310 |
+
"completions/mean_terminated_length": 1.0,
|
| 28311 |
+
"completions/min_length": 1.0,
|
| 28312 |
+
"completions/min_terminated_length": 1.0,
|
| 28313 |
+
"epoch": 8.63905325443787,
|
| 28314 |
+
"frac_reward_zero_std": 1.0,
|
| 28315 |
+
"grad_norm": 0.001695003011263907,
|
| 28316 |
+
"kl": 0.0014914311468601227,
|
| 28317 |
+
"learning_rate": 2e-06,
|
| 28318 |
+
"loss": 0.0001,
|
| 28319 |
+
"num_tokens": 603559.0,
|
| 28320 |
+
"reward": 0.0009184567024931312,
|
| 28321 |
+
"reward_std": 0.0,
|
| 28322 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28323 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28324 |
+
"step": 1089
|
| 28325 |
+
},
|
| 28326 |
+
{
|
| 28327 |
+
"clip_ratio/high_max": 0.0,
|
| 28328 |
+
"clip_ratio/high_mean": 0.0,
|
| 28329 |
+
"clip_ratio/low_mean": 0.0,
|
| 28330 |
+
"clip_ratio/low_min": 0.0,
|
| 28331 |
+
"clip_ratio/region_mean": 0.0,
|
| 28332 |
+
"completions/clipped_ratio": 0.0,
|
| 28333 |
+
"completions/max_length": 1.0,
|
| 28334 |
+
"completions/max_terminated_length": 1.0,
|
| 28335 |
+
"completions/mean_length": 1.0,
|
| 28336 |
+
"completions/mean_terminated_length": 1.0,
|
| 28337 |
+
"completions/min_length": 1.0,
|
| 28338 |
+
"completions/min_terminated_length": 1.0,
|
| 28339 |
+
"epoch": 8.646942800788954,
|
| 28340 |
+
"frac_reward_zero_std": 1.0,
|
| 28341 |
+
"grad_norm": 0.000481656810734421,
|
| 28342 |
+
"kl": 0.00014894083142280579,
|
| 28343 |
+
"learning_rate": 2e-06,
|
| 28344 |
+
"loss": 0.0,
|
| 28345 |
+
"num_tokens": 604165.0,
|
| 28346 |
+
"reward": 0.0009184567024931312,
|
| 28347 |
+
"reward_std": 0.0,
|
| 28348 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28349 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28350 |
+
"step": 1090
|
| 28351 |
+
},
|
| 28352 |
+
{
|
| 28353 |
+
"clip_ratio/high_max": 0.0,
|
| 28354 |
+
"clip_ratio/high_mean": 0.0,
|
| 28355 |
+
"clip_ratio/low_mean": 0.0,
|
| 28356 |
+
"clip_ratio/low_min": 0.0,
|
| 28357 |
+
"clip_ratio/region_mean": 0.0,
|
| 28358 |
+
"completions/clipped_ratio": 0.0,
|
| 28359 |
+
"completions/max_length": 1.0,
|
| 28360 |
+
"completions/max_terminated_length": 1.0,
|
| 28361 |
+
"completions/mean_length": 1.0,
|
| 28362 |
+
"completions/mean_terminated_length": 1.0,
|
| 28363 |
+
"completions/min_length": 1.0,
|
| 28364 |
+
"completions/min_terminated_length": 1.0,
|
| 28365 |
+
"epoch": 8.65483234714004,
|
| 28366 |
+
"frac_reward_zero_std": 1.0,
|
| 28367 |
+
"grad_norm": 8.332054130733013e-05,
|
| 28368 |
+
"kl": 2.9999762773513794e-05,
|
| 28369 |
+
"learning_rate": 2e-06,
|
| 28370 |
+
"loss": 0.0,
|
| 28371 |
+
"num_tokens": 604713.0,
|
| 28372 |
+
"reward": 0.0009184567024931312,
|
| 28373 |
+
"reward_std": 0.0,
|
| 28374 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28375 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28376 |
+
"step": 1091
|
| 28377 |
+
},
|
| 28378 |
+
{
|
| 28379 |
+
"clip_ratio/high_max": 0.0,
|
| 28380 |
+
"clip_ratio/high_mean": 0.0,
|
| 28381 |
+
"clip_ratio/low_mean": 0.0,
|
| 28382 |
+
"clip_ratio/low_min": 0.0,
|
| 28383 |
+
"clip_ratio/region_mean": 0.0,
|
| 28384 |
+
"completions/clipped_ratio": 0.03125,
|
| 28385 |
+
"completions/max_length": 128.0,
|
| 28386 |
+
"completions/max_terminated_length": 1.0,
|
| 28387 |
+
"completions/mean_length": 4.96875,
|
| 28388 |
+
"completions/mean_terminated_length": 1.0,
|
| 28389 |
+
"completions/min_length": 1.0,
|
| 28390 |
+
"completions/min_terminated_length": 1.0,
|
| 28391 |
+
"epoch": 8.662721893491124,
|
| 28392 |
+
"frac_reward_zero_std": 0.9375,
|
| 28393 |
+
"grad_norm": 0.3093504309654236,
|
| 28394 |
+
"kl": 0.0011588901397772133,
|
| 28395 |
+
"learning_rate": 2e-06,
|
| 28396 |
+
"loss": 0.0664,
|
| 28397 |
+
"num_tokens": 605368.0,
|
| 28398 |
+
"reward": 0.0009147582459263504,
|
| 28399 |
+
"reward_std": 5.230402166489512e-06,
|
| 28400 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001819307217374444,
|
| 28401 |
+
"rewards/reward_high_identity_attack_score/std": 4.160971911915112e-06,
|
| 28402 |
+
"step": 1092
|
| 28403 |
+
},
|
| 28404 |
+
{
|
| 28405 |
+
"clip_ratio/high_max": 0.0,
|
| 28406 |
+
"clip_ratio/high_mean": 0.0,
|
| 28407 |
+
"clip_ratio/low_mean": 0.0,
|
| 28408 |
+
"clip_ratio/low_min": 0.0,
|
| 28409 |
+
"clip_ratio/region_mean": 0.0,
|
| 28410 |
+
"completions/clipped_ratio": 0.0,
|
| 28411 |
+
"completions/max_length": 1.0,
|
| 28412 |
+
"completions/max_terminated_length": 1.0,
|
| 28413 |
+
"completions/mean_length": 1.0,
|
| 28414 |
+
"completions/mean_terminated_length": 1.0,
|
| 28415 |
+
"completions/min_length": 1.0,
|
| 28416 |
+
"completions/min_terminated_length": 1.0,
|
| 28417 |
+
"epoch": 8.67061143984221,
|
| 28418 |
+
"frac_reward_zero_std": 1.0,
|
| 28419 |
+
"grad_norm": 9.948049410013482e-05,
|
| 28420 |
+
"kl": 3.5762786865234375e-05,
|
| 28421 |
+
"learning_rate": 2e-06,
|
| 28422 |
+
"loss": 0.0,
|
| 28423 |
+
"num_tokens": 605900.0,
|
| 28424 |
+
"reward": 0.0009184567024931312,
|
| 28425 |
+
"reward_std": 0.0,
|
| 28426 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28427 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28428 |
+
"step": 1093
|
| 28429 |
+
},
|
| 28430 |
+
{
|
| 28431 |
+
"clip_ratio/high_max": 0.0,
|
| 28432 |
+
"clip_ratio/high_mean": 0.0,
|
| 28433 |
+
"clip_ratio/low_mean": 0.0,
|
| 28434 |
+
"clip_ratio/low_min": 0.0,
|
| 28435 |
+
"clip_ratio/region_mean": 0.0,
|
| 28436 |
+
"completions/clipped_ratio": 0.0,
|
| 28437 |
+
"completions/max_length": 1.0,
|
| 28438 |
+
"completions/max_terminated_length": 1.0,
|
| 28439 |
+
"completions/mean_length": 1.0,
|
| 28440 |
+
"completions/mean_terminated_length": 1.0,
|
| 28441 |
+
"completions/min_length": 1.0,
|
| 28442 |
+
"completions/min_terminated_length": 1.0,
|
| 28443 |
+
"epoch": 8.678500986193294,
|
| 28444 |
+
"frac_reward_zero_std": 1.0,
|
| 28445 |
+
"grad_norm": 4.30829131801147e-05,
|
| 28446 |
+
"kl": 2.0891427993774414e-05,
|
| 28447 |
+
"learning_rate": 2e-06,
|
| 28448 |
+
"loss": 0.0,
|
| 28449 |
+
"num_tokens": 606484.0,
|
| 28450 |
+
"reward": 0.0009184567024931312,
|
| 28451 |
+
"reward_std": 0.0,
|
| 28452 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28453 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28454 |
+
"step": 1094
|
| 28455 |
+
},
|
| 28456 |
+
{
|
| 28457 |
+
"clip_ratio/high_max": 0.0,
|
| 28458 |
+
"clip_ratio/high_mean": 0.0,
|
| 28459 |
+
"clip_ratio/low_mean": 0.0,
|
| 28460 |
+
"clip_ratio/low_min": 0.0,
|
| 28461 |
+
"clip_ratio/region_mean": 0.0,
|
| 28462 |
+
"completions/clipped_ratio": 0.0,
|
| 28463 |
+
"completions/max_length": 1.0,
|
| 28464 |
+
"completions/max_terminated_length": 1.0,
|
| 28465 |
+
"completions/mean_length": 1.0,
|
| 28466 |
+
"completions/mean_terminated_length": 1.0,
|
| 28467 |
+
"completions/min_length": 1.0,
|
| 28468 |
+
"completions/min_terminated_length": 1.0,
|
| 28469 |
+
"epoch": 8.68639053254438,
|
| 28470 |
+
"frac_reward_zero_std": 1.0,
|
| 28471 |
+
"grad_norm": 0.007058305200189352,
|
| 28472 |
+
"kl": 0.003930628299713135,
|
| 28473 |
+
"learning_rate": 2e-06,
|
| 28474 |
+
"loss": 0.0002,
|
| 28475 |
+
"num_tokens": 607062.0,
|
| 28476 |
+
"reward": 0.0009184567024931312,
|
| 28477 |
+
"reward_std": 0.0,
|
| 28478 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28479 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28480 |
+
"step": 1095
|
| 28481 |
+
},
|
| 28482 |
+
{
|
| 28483 |
+
"clip_ratio/high_max": 0.0,
|
| 28484 |
+
"clip_ratio/high_mean": 0.0,
|
| 28485 |
+
"clip_ratio/low_mean": 0.0,
|
| 28486 |
+
"clip_ratio/low_min": 0.0,
|
| 28487 |
+
"clip_ratio/region_mean": 0.0,
|
| 28488 |
+
"completions/clipped_ratio": 0.0625,
|
| 28489 |
+
"completions/max_length": 128.0,
|
| 28490 |
+
"completions/max_terminated_length": 1.0,
|
| 28491 |
+
"completions/mean_length": 8.9375,
|
| 28492 |
+
"completions/mean_terminated_length": 1.0,
|
| 28493 |
+
"completions/min_length": 1.0,
|
| 28494 |
+
"completions/min_terminated_length": 1.0,
|
| 28495 |
+
"epoch": 8.694280078895464,
|
| 28496 |
+
"frac_reward_zero_std": 0.9375,
|
| 28497 |
+
"grad_norm": 0.33532580733299255,
|
| 28498 |
+
"kl": 0.00036744270619237795,
|
| 28499 |
+
"learning_rate": 2e-06,
|
| 28500 |
+
"loss": 0.0,
|
| 28501 |
+
"num_tokens": 607814.0,
|
| 28502 |
+
"reward": 0.0009117487934418023,
|
| 28503 |
+
"reward_std": 9.967894584406167e-06,
|
| 28504 |
+
"rewards/reward_high_identity_attack_score/mean": 0.00018133220146410167,
|
| 28505 |
+
"rewards/reward_high_identity_attack_score/std": 7.746834853605833e-06,
|
| 28506 |
+
"step": 1096
|
| 28507 |
+
},
|
| 28508 |
+
{
|
| 28509 |
+
"clip_ratio/high_max": 0.0,
|
| 28510 |
+
"clip_ratio/high_mean": 0.0,
|
| 28511 |
+
"clip_ratio/low_mean": 0.0,
|
| 28512 |
+
"clip_ratio/low_min": 0.0,
|
| 28513 |
+
"clip_ratio/region_mean": 0.0,
|
| 28514 |
+
"completions/clipped_ratio": 0.0,
|
| 28515 |
+
"completions/max_length": 1.0,
|
| 28516 |
+
"completions/max_terminated_length": 1.0,
|
| 28517 |
+
"completions/mean_length": 1.0,
|
| 28518 |
+
"completions/mean_terminated_length": 1.0,
|
| 28519 |
+
"completions/min_length": 1.0,
|
| 28520 |
+
"completions/min_terminated_length": 1.0,
|
| 28521 |
+
"epoch": 8.702169625246547,
|
| 28522 |
+
"frac_reward_zero_std": 1.0,
|
| 28523 |
+
"grad_norm": 8.960484410636127e-05,
|
| 28524 |
+
"kl": 3.2573938369750977e-05,
|
| 28525 |
+
"learning_rate": 2e-06,
|
| 28526 |
+
"loss": 0.0,
|
| 28527 |
+
"num_tokens": 608344.0,
|
| 28528 |
+
"reward": 0.0009184567024931312,
|
| 28529 |
+
"reward_std": 0.0,
|
| 28530 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28531 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28532 |
+
"step": 1097
|
| 28533 |
+
},
|
| 28534 |
+
{
|
| 28535 |
+
"clip_ratio/high_max": 0.0,
|
| 28536 |
+
"clip_ratio/high_mean": 0.0,
|
| 28537 |
+
"clip_ratio/low_mean": 0.0,
|
| 28538 |
+
"clip_ratio/low_min": 0.0,
|
| 28539 |
+
"clip_ratio/region_mean": 0.0,
|
| 28540 |
+
"completions/clipped_ratio": 0.0,
|
| 28541 |
+
"completions/max_length": 1.0,
|
| 28542 |
+
"completions/max_terminated_length": 1.0,
|
| 28543 |
+
"completions/mean_length": 1.0,
|
| 28544 |
+
"completions/mean_terminated_length": 1.0,
|
| 28545 |
+
"completions/min_length": 1.0,
|
| 28546 |
+
"completions/min_terminated_length": 1.0,
|
| 28547 |
+
"epoch": 8.710059171597633,
|
| 28548 |
+
"frac_reward_zero_std": 1.0,
|
| 28549 |
+
"grad_norm": 4.2808584112208337e-05,
|
| 28550 |
+
"kl": 2.2396445274353027e-05,
|
| 28551 |
+
"learning_rate": 2e-06,
|
| 28552 |
+
"loss": 0.0,
|
| 28553 |
+
"num_tokens": 608850.0,
|
| 28554 |
+
"reward": 0.0009184567024931312,
|
| 28555 |
+
"reward_std": 0.0,
|
| 28556 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28557 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28558 |
+
"step": 1098
|
| 28559 |
+
},
|
| 28560 |
+
{
|
| 28561 |
+
"clip_ratio/high_max": 0.0,
|
| 28562 |
+
"clip_ratio/high_mean": 0.0,
|
| 28563 |
+
"clip_ratio/low_mean": 0.0,
|
| 28564 |
+
"clip_ratio/low_min": 0.0,
|
| 28565 |
+
"clip_ratio/region_mean": 0.0,
|
| 28566 |
+
"completions/clipped_ratio": 0.0,
|
| 28567 |
+
"completions/max_length": 1.0,
|
| 28568 |
+
"completions/max_terminated_length": 1.0,
|
| 28569 |
+
"completions/mean_length": 1.0,
|
| 28570 |
+
"completions/mean_terminated_length": 1.0,
|
| 28571 |
+
"completions/min_length": 1.0,
|
| 28572 |
+
"completions/min_terminated_length": 1.0,
|
| 28573 |
+
"epoch": 8.717948717948717,
|
| 28574 |
+
"frac_reward_zero_std": 1.0,
|
| 28575 |
+
"grad_norm": 5.318546391208656e-05,
|
| 28576 |
+
"kl": 1.4428049325942993e-05,
|
| 28577 |
+
"learning_rate": 2e-06,
|
| 28578 |
+
"loss": 0.0,
|
| 28579 |
+
"num_tokens": 609464.0,
|
| 28580 |
+
"reward": 0.0009184567024931312,
|
| 28581 |
+
"reward_std": 0.0,
|
| 28582 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28583 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28584 |
+
"step": 1099
|
| 28585 |
+
},
|
| 28586 |
+
{
|
| 28587 |
+
"clip_ratio/high_max": 0.0,
|
| 28588 |
+
"clip_ratio/high_mean": 0.0,
|
| 28589 |
+
"clip_ratio/low_mean": 0.0,
|
| 28590 |
+
"clip_ratio/low_min": 0.0,
|
| 28591 |
+
"clip_ratio/region_mean": 0.0,
|
| 28592 |
+
"completions/clipped_ratio": 0.0,
|
| 28593 |
+
"completions/max_length": 1.0,
|
| 28594 |
+
"completions/max_terminated_length": 1.0,
|
| 28595 |
+
"completions/mean_length": 1.0,
|
| 28596 |
+
"completions/mean_terminated_length": 1.0,
|
| 28597 |
+
"completions/min_length": 1.0,
|
| 28598 |
+
"completions/min_terminated_length": 1.0,
|
| 28599 |
+
"epoch": 8.725838264299803,
|
| 28600 |
+
"frac_reward_zero_std": 1.0,
|
| 28601 |
+
"grad_norm": 0.00010975827171932906,
|
| 28602 |
+
"kl": 5.116313695907593e-05,
|
| 28603 |
+
"learning_rate": 2e-06,
|
| 28604 |
+
"loss": 0.0,
|
| 28605 |
+
"num_tokens": 610020.0,
|
| 28606 |
+
"reward": 0.0009184567024931312,
|
| 28607 |
+
"reward_std": 0.0,
|
| 28608 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28609 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28610 |
+
"step": 1100
|
| 28611 |
+
},
|
| 28612 |
+
{
|
| 28613 |
+
"clip_ratio/high_max": 0.0,
|
| 28614 |
+
"clip_ratio/high_mean": 0.0,
|
| 28615 |
+
"clip_ratio/low_mean": 0.0,
|
| 28616 |
+
"clip_ratio/low_min": 0.0,
|
| 28617 |
+
"clip_ratio/region_mean": 0.0,
|
| 28618 |
+
"completions/clipped_ratio": 0.0,
|
| 28619 |
+
"completions/max_length": 1.0,
|
| 28620 |
+
"completions/max_terminated_length": 1.0,
|
| 28621 |
+
"completions/mean_length": 1.0,
|
| 28622 |
+
"completions/mean_terminated_length": 1.0,
|
| 28623 |
+
"completions/min_length": 1.0,
|
| 28624 |
+
"completions/min_terminated_length": 1.0,
|
| 28625 |
+
"epoch": 8.733727810650887,
|
| 28626 |
+
"frac_reward_zero_std": 1.0,
|
| 28627 |
+
"grad_norm": 0.00043700210517272353,
|
| 28628 |
+
"kl": 0.0002166740596294403,
|
| 28629 |
+
"learning_rate": 2e-06,
|
| 28630 |
+
"loss": 0.0,
|
| 28631 |
+
"num_tokens": 610506.0,
|
| 28632 |
+
"reward": 0.0009184567024931312,
|
| 28633 |
+
"reward_std": 0.0,
|
| 28634 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28635 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28636 |
+
"step": 1101
|
| 28637 |
+
},
|
| 28638 |
+
{
|
| 28639 |
+
"clip_ratio/high_max": 0.0,
|
| 28640 |
+
"clip_ratio/high_mean": 0.0,
|
| 28641 |
+
"clip_ratio/low_mean": 0.0,
|
| 28642 |
+
"clip_ratio/low_min": 0.0,
|
| 28643 |
+
"clip_ratio/region_mean": 0.0,
|
| 28644 |
+
"completions/clipped_ratio": 0.0,
|
| 28645 |
+
"completions/max_length": 1.0,
|
| 28646 |
+
"completions/max_terminated_length": 1.0,
|
| 28647 |
+
"completions/mean_length": 1.0,
|
| 28648 |
+
"completions/mean_terminated_length": 1.0,
|
| 28649 |
+
"completions/min_length": 1.0,
|
| 28650 |
+
"completions/min_terminated_length": 1.0,
|
| 28651 |
+
"epoch": 8.741617357001973,
|
| 28652 |
+
"frac_reward_zero_std": 1.0,
|
| 28653 |
+
"grad_norm": 8.66984628373757e-05,
|
| 28654 |
+
"kl": 4.193931818008423e-05,
|
| 28655 |
+
"learning_rate": 2e-06,
|
| 28656 |
+
"loss": 0.0,
|
| 28657 |
+
"num_tokens": 611050.0,
|
| 28658 |
+
"reward": 0.0009184567024931312,
|
| 28659 |
+
"reward_std": 0.0,
|
| 28660 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28661 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28662 |
+
"step": 1102
|
| 28663 |
+
},
|
| 28664 |
+
{
|
| 28665 |
+
"clip_ratio/high_max": 0.0,
|
| 28666 |
+
"clip_ratio/high_mean": 0.0,
|
| 28667 |
+
"clip_ratio/low_mean": 0.0,
|
| 28668 |
+
"clip_ratio/low_min": 0.0,
|
| 28669 |
+
"clip_ratio/region_mean": 0.0,
|
| 28670 |
+
"completions/clipped_ratio": 0.0,
|
| 28671 |
+
"completions/max_length": 1.0,
|
| 28672 |
+
"completions/max_terminated_length": 1.0,
|
| 28673 |
+
"completions/mean_length": 1.0,
|
| 28674 |
+
"completions/mean_terminated_length": 1.0,
|
| 28675 |
+
"completions/min_length": 1.0,
|
| 28676 |
+
"completions/min_terminated_length": 1.0,
|
| 28677 |
+
"epoch": 8.749506903353057,
|
| 28678 |
+
"frac_reward_zero_std": 1.0,
|
| 28679 |
+
"grad_norm": 0.0009264847612939775,
|
| 28680 |
+
"kl": 0.00016462430357933044,
|
| 28681 |
+
"learning_rate": 2e-06,
|
| 28682 |
+
"loss": 0.0,
|
| 28683 |
+
"num_tokens": 611526.0,
|
| 28684 |
+
"reward": 0.0009184567024931312,
|
| 28685 |
+
"reward_std": 0.0,
|
| 28686 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28687 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28688 |
+
"step": 1103
|
| 28689 |
+
},
|
| 28690 |
+
{
|
| 28691 |
+
"clip_ratio/high_max": 0.0,
|
| 28692 |
+
"clip_ratio/high_mean": 0.0,
|
| 28693 |
+
"clip_ratio/low_mean": 0.0,
|
| 28694 |
+
"clip_ratio/low_min": 0.0,
|
| 28695 |
+
"clip_ratio/region_mean": 0.0,
|
| 28696 |
+
"completions/clipped_ratio": 0.0,
|
| 28697 |
+
"completions/max_length": 1.0,
|
| 28698 |
+
"completions/max_terminated_length": 1.0,
|
| 28699 |
+
"completions/mean_length": 1.0,
|
| 28700 |
+
"completions/mean_terminated_length": 1.0,
|
| 28701 |
+
"completions/min_length": 1.0,
|
| 28702 |
+
"completions/min_terminated_length": 1.0,
|
| 28703 |
+
"epoch": 8.757396449704142,
|
| 28704 |
+
"frac_reward_zero_std": 1.0,
|
| 28705 |
+
"grad_norm": 0.00021662222570739686,
|
| 28706 |
+
"kl": 0.00013697892427444458,
|
| 28707 |
+
"learning_rate": 2e-06,
|
| 28708 |
+
"loss": 0.0,
|
| 28709 |
+
"num_tokens": 612030.0,
|
| 28710 |
+
"reward": 0.0009184567024931312,
|
| 28711 |
+
"reward_std": 0.0,
|
| 28712 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28713 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28714 |
+
"step": 1104
|
| 28715 |
+
},
|
| 28716 |
+
{
|
| 28717 |
+
"clip_ratio/high_max": 0.0,
|
| 28718 |
+
"clip_ratio/high_mean": 0.0,
|
| 28719 |
+
"clip_ratio/low_mean": 0.0,
|
| 28720 |
+
"clip_ratio/low_min": 0.0,
|
| 28721 |
+
"clip_ratio/region_mean": 0.0,
|
| 28722 |
+
"completions/clipped_ratio": 0.0,
|
| 28723 |
+
"completions/max_length": 1.0,
|
| 28724 |
+
"completions/max_terminated_length": 1.0,
|
| 28725 |
+
"completions/mean_length": 1.0,
|
| 28726 |
+
"completions/mean_terminated_length": 1.0,
|
| 28727 |
+
"completions/min_length": 1.0,
|
| 28728 |
+
"completions/min_terminated_length": 1.0,
|
| 28729 |
+
"epoch": 8.765285996055226,
|
| 28730 |
+
"frac_reward_zero_std": 1.0,
|
| 28731 |
+
"grad_norm": 0.09041354060173035,
|
| 28732 |
+
"kl": 0.008402518928050995,
|
| 28733 |
+
"learning_rate": 2e-06,
|
| 28734 |
+
"loss": 0.0004,
|
| 28735 |
+
"num_tokens": 612550.0,
|
| 28736 |
+
"reward": 0.0009184567024931312,
|
| 28737 |
+
"reward_std": 0.0,
|
| 28738 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28739 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28740 |
+
"step": 1105
|
| 28741 |
+
},
|
| 28742 |
+
{
|
| 28743 |
+
"clip_ratio/high_max": 0.0,
|
| 28744 |
+
"clip_ratio/high_mean": 0.0,
|
| 28745 |
+
"clip_ratio/low_mean": 0.0,
|
| 28746 |
+
"clip_ratio/low_min": 0.0,
|
| 28747 |
+
"clip_ratio/region_mean": 0.0,
|
| 28748 |
+
"completions/clipped_ratio": 0.0,
|
| 28749 |
+
"completions/max_length": 1.0,
|
| 28750 |
+
"completions/max_terminated_length": 1.0,
|
| 28751 |
+
"completions/mean_length": 1.0,
|
| 28752 |
+
"completions/mean_terminated_length": 1.0,
|
| 28753 |
+
"completions/min_length": 1.0,
|
| 28754 |
+
"completions/min_terminated_length": 1.0,
|
| 28755 |
+
"epoch": 8.773175542406312,
|
| 28756 |
+
"frac_reward_zero_std": 1.0,
|
| 28757 |
+
"grad_norm": 0.0002531117352191359,
|
| 28758 |
+
"kl": 0.0001450851559638977,
|
| 28759 |
+
"learning_rate": 2e-06,
|
| 28760 |
+
"loss": 0.0,
|
| 28761 |
+
"num_tokens": 613012.0,
|
| 28762 |
+
"reward": 0.0009184567024931312,
|
| 28763 |
+
"reward_std": 0.0,
|
| 28764 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28765 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28766 |
+
"step": 1106
|
| 28767 |
+
},
|
| 28768 |
+
{
|
| 28769 |
+
"clip_ratio/high_max": 0.0,
|
| 28770 |
+
"clip_ratio/high_mean": 0.0,
|
| 28771 |
+
"clip_ratio/low_mean": 0.0,
|
| 28772 |
+
"clip_ratio/low_min": 0.0,
|
| 28773 |
+
"clip_ratio/region_mean": 0.0,
|
| 28774 |
+
"completions/clipped_ratio": 0.0,
|
| 28775 |
+
"completions/max_length": 1.0,
|
| 28776 |
+
"completions/max_terminated_length": 1.0,
|
| 28777 |
+
"completions/mean_length": 1.0,
|
| 28778 |
+
"completions/mean_terminated_length": 1.0,
|
| 28779 |
+
"completions/min_length": 1.0,
|
| 28780 |
+
"completions/min_terminated_length": 1.0,
|
| 28781 |
+
"epoch": 8.781065088757396,
|
| 28782 |
+
"frac_reward_zero_std": 1.0,
|
| 28783 |
+
"grad_norm": 0.0003755576617550105,
|
| 28784 |
+
"kl": 0.0002874433994293213,
|
| 28785 |
+
"learning_rate": 2e-06,
|
| 28786 |
+
"loss": 0.0,
|
| 28787 |
+
"num_tokens": 613528.0,
|
| 28788 |
+
"reward": 0.0009184567024931312,
|
| 28789 |
+
"reward_std": 0.0,
|
| 28790 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28791 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28792 |
+
"step": 1107
|
| 28793 |
+
},
|
| 28794 |
+
{
|
| 28795 |
+
"clip_ratio/high_max": 0.0,
|
| 28796 |
+
"clip_ratio/high_mean": 0.0,
|
| 28797 |
+
"clip_ratio/low_mean": 0.0,
|
| 28798 |
+
"clip_ratio/low_min": 0.0,
|
| 28799 |
+
"clip_ratio/region_mean": 0.0,
|
| 28800 |
+
"completions/clipped_ratio": 0.0,
|
| 28801 |
+
"completions/max_length": 1.0,
|
| 28802 |
+
"completions/max_terminated_length": 1.0,
|
| 28803 |
+
"completions/mean_length": 1.0,
|
| 28804 |
+
"completions/mean_terminated_length": 1.0,
|
| 28805 |
+
"completions/min_length": 1.0,
|
| 28806 |
+
"completions/min_terminated_length": 1.0,
|
| 28807 |
+
"epoch": 8.788954635108482,
|
| 28808 |
+
"frac_reward_zero_std": 1.0,
|
| 28809 |
+
"grad_norm": 0.00029925949638709426,
|
| 28810 |
+
"kl": 0.0001658126711845398,
|
| 28811 |
+
"learning_rate": 2e-06,
|
| 28812 |
+
"loss": 0.0,
|
| 28813 |
+
"num_tokens": 614084.0,
|
| 28814 |
+
"reward": 0.0009184567024931312,
|
| 28815 |
+
"reward_std": 0.0,
|
| 28816 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28817 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28818 |
+
"step": 1108
|
| 28819 |
+
},
|
| 28820 |
+
{
|
| 28821 |
+
"clip_ratio/high_max": 0.0,
|
| 28822 |
+
"clip_ratio/high_mean": 0.0,
|
| 28823 |
+
"clip_ratio/low_mean": 0.0,
|
| 28824 |
+
"clip_ratio/low_min": 0.0,
|
| 28825 |
+
"clip_ratio/region_mean": 0.0,
|
| 28826 |
+
"completions/clipped_ratio": 0.0,
|
| 28827 |
+
"completions/max_length": 1.0,
|
| 28828 |
+
"completions/max_terminated_length": 1.0,
|
| 28829 |
+
"completions/mean_length": 1.0,
|
| 28830 |
+
"completions/mean_terminated_length": 1.0,
|
| 28831 |
+
"completions/min_length": 1.0,
|
| 28832 |
+
"completions/min_terminated_length": 1.0,
|
| 28833 |
+
"epoch": 8.796844181459566,
|
| 28834 |
+
"frac_reward_zero_std": 1.0,
|
| 28835 |
+
"grad_norm": 0.009911871515214443,
|
| 28836 |
+
"kl": 0.0037598907947540283,
|
| 28837 |
+
"learning_rate": 2e-06,
|
| 28838 |
+
"loss": 0.0002,
|
| 28839 |
+
"num_tokens": 614606.0,
|
| 28840 |
+
"reward": 0.0009184567024931312,
|
| 28841 |
+
"reward_std": 0.0,
|
| 28842 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28843 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28844 |
+
"step": 1109
|
| 28845 |
+
},
|
| 28846 |
+
{
|
| 28847 |
+
"clip_ratio/high_max": 0.0,
|
| 28848 |
+
"clip_ratio/high_mean": 0.0,
|
| 28849 |
+
"clip_ratio/low_mean": 0.0,
|
| 28850 |
+
"clip_ratio/low_min": 0.0,
|
| 28851 |
+
"clip_ratio/region_mean": 0.0,
|
| 28852 |
+
"completions/clipped_ratio": 0.0,
|
| 28853 |
+
"completions/max_length": 1.0,
|
| 28854 |
+
"completions/max_terminated_length": 1.0,
|
| 28855 |
+
"completions/mean_length": 1.0,
|
| 28856 |
+
"completions/mean_terminated_length": 1.0,
|
| 28857 |
+
"completions/min_length": 1.0,
|
| 28858 |
+
"completions/min_terminated_length": 1.0,
|
| 28859 |
+
"epoch": 8.804733727810651,
|
| 28860 |
+
"frac_reward_zero_std": 1.0,
|
| 28861 |
+
"grad_norm": 0.005583275109529495,
|
| 28862 |
+
"kl": 0.0005717538297176361,
|
| 28863 |
+
"learning_rate": 2e-06,
|
| 28864 |
+
"loss": 0.0,
|
| 28865 |
+
"num_tokens": 615150.0,
|
| 28866 |
+
"reward": 0.0009184567024931312,
|
| 28867 |
+
"reward_std": 0.0,
|
| 28868 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28869 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28870 |
+
"step": 1110
|
| 28871 |
+
},
|
| 28872 |
+
{
|
| 28873 |
+
"clip_ratio/high_max": 0.0,
|
| 28874 |
+
"clip_ratio/high_mean": 0.0,
|
| 28875 |
+
"clip_ratio/low_mean": 0.0,
|
| 28876 |
+
"clip_ratio/low_min": 0.0,
|
| 28877 |
+
"clip_ratio/region_mean": 0.0,
|
| 28878 |
+
"completions/clipped_ratio": 0.0,
|
| 28879 |
+
"completions/max_length": 1.0,
|
| 28880 |
+
"completions/max_terminated_length": 1.0,
|
| 28881 |
+
"completions/mean_length": 1.0,
|
| 28882 |
+
"completions/mean_terminated_length": 1.0,
|
| 28883 |
+
"completions/min_length": 1.0,
|
| 28884 |
+
"completions/min_terminated_length": 1.0,
|
| 28885 |
+
"epoch": 8.812623274161735,
|
| 28886 |
+
"frac_reward_zero_std": 1.0,
|
| 28887 |
+
"grad_norm": 0.0004297482082620263,
|
| 28888 |
+
"kl": 0.00023529306054115295,
|
| 28889 |
+
"learning_rate": 2e-06,
|
| 28890 |
+
"loss": 0.0,
|
| 28891 |
+
"num_tokens": 615738.0,
|
| 28892 |
+
"reward": 0.0009184567024931312,
|
| 28893 |
+
"reward_std": 0.0,
|
| 28894 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28895 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28896 |
+
"step": 1111
|
| 28897 |
+
},
|
| 28898 |
+
{
|
| 28899 |
+
"clip_ratio/high_max": 0.0,
|
| 28900 |
+
"clip_ratio/high_mean": 0.0,
|
| 28901 |
+
"clip_ratio/low_mean": 0.0,
|
| 28902 |
+
"clip_ratio/low_min": 0.0,
|
| 28903 |
+
"clip_ratio/region_mean": 0.0,
|
| 28904 |
+
"completions/clipped_ratio": 0.0,
|
| 28905 |
+
"completions/max_length": 1.0,
|
| 28906 |
+
"completions/max_terminated_length": 1.0,
|
| 28907 |
+
"completions/mean_length": 1.0,
|
| 28908 |
+
"completions/mean_terminated_length": 1.0,
|
| 28909 |
+
"completions/min_length": 1.0,
|
| 28910 |
+
"completions/min_terminated_length": 1.0,
|
| 28911 |
+
"epoch": 8.820512820512821,
|
| 28912 |
+
"frac_reward_zero_std": 1.0,
|
| 28913 |
+
"grad_norm": 0.0001023484073812142,
|
| 28914 |
+
"kl": 5.105137825012207e-05,
|
| 28915 |
+
"learning_rate": 2e-06,
|
| 28916 |
+
"loss": 0.0,
|
| 28917 |
+
"num_tokens": 616332.0,
|
| 28918 |
+
"reward": 0.0009184567024931312,
|
| 28919 |
+
"reward_std": 0.0,
|
| 28920 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28921 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28922 |
+
"step": 1112
|
| 28923 |
+
},
|
| 28924 |
+
{
|
| 28925 |
+
"clip_ratio/high_max": 0.0,
|
| 28926 |
+
"clip_ratio/high_mean": 0.0,
|
| 28927 |
+
"clip_ratio/low_mean": 0.0,
|
| 28928 |
+
"clip_ratio/low_min": 0.0,
|
| 28929 |
+
"clip_ratio/region_mean": 0.0,
|
| 28930 |
+
"completions/clipped_ratio": 0.0,
|
| 28931 |
+
"completions/max_length": 1.0,
|
| 28932 |
+
"completions/max_terminated_length": 1.0,
|
| 28933 |
+
"completions/mean_length": 1.0,
|
| 28934 |
+
"completions/mean_terminated_length": 1.0,
|
| 28935 |
+
"completions/min_length": 1.0,
|
| 28936 |
+
"completions/min_terminated_length": 1.0,
|
| 28937 |
+
"epoch": 8.828402366863905,
|
| 28938 |
+
"frac_reward_zero_std": 1.0,
|
| 28939 |
+
"grad_norm": 0.0028757709078490734,
|
| 28940 |
+
"kl": 0.0007946379482746124,
|
| 28941 |
+
"learning_rate": 2e-06,
|
| 28942 |
+
"loss": 0.0,
|
| 28943 |
+
"num_tokens": 616878.0,
|
| 28944 |
+
"reward": 0.0009184567024931312,
|
| 28945 |
+
"reward_std": 0.0,
|
| 28946 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28947 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28948 |
+
"step": 1113
|
| 28949 |
+
},
|
| 28950 |
+
{
|
| 28951 |
+
"clip_ratio/high_max": 0.0,
|
| 28952 |
+
"clip_ratio/high_mean": 0.0,
|
| 28953 |
+
"clip_ratio/low_mean": 0.0,
|
| 28954 |
+
"clip_ratio/low_min": 0.0,
|
| 28955 |
+
"clip_ratio/region_mean": 0.0,
|
| 28956 |
+
"completions/clipped_ratio": 0.0,
|
| 28957 |
+
"completions/max_length": 1.0,
|
| 28958 |
+
"completions/max_terminated_length": 1.0,
|
| 28959 |
+
"completions/mean_length": 1.0,
|
| 28960 |
+
"completions/mean_terminated_length": 1.0,
|
| 28961 |
+
"completions/min_length": 1.0,
|
| 28962 |
+
"completions/min_terminated_length": 1.0,
|
| 28963 |
+
"epoch": 8.83629191321499,
|
| 28964 |
+
"frac_reward_zero_std": 1.0,
|
| 28965 |
+
"grad_norm": 5.8135072322329506e-05,
|
| 28966 |
+
"kl": 1.959875226020813e-05,
|
| 28967 |
+
"learning_rate": 2e-06,
|
| 28968 |
+
"loss": 0.0,
|
| 28969 |
+
"num_tokens": 617402.0,
|
| 28970 |
+
"reward": 0.0009184567024931312,
|
| 28971 |
+
"reward_std": 0.0,
|
| 28972 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28973 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 28974 |
+
"step": 1114
|
| 28975 |
+
},
|
| 28976 |
+
{
|
| 28977 |
+
"clip_ratio/high_max": 0.0,
|
| 28978 |
+
"clip_ratio/high_mean": 0.0,
|
| 28979 |
+
"clip_ratio/low_mean": 0.0,
|
| 28980 |
+
"clip_ratio/low_min": 0.0,
|
| 28981 |
+
"clip_ratio/region_mean": 0.0,
|
| 28982 |
+
"completions/clipped_ratio": 0.0,
|
| 28983 |
+
"completions/max_length": 1.0,
|
| 28984 |
+
"completions/max_terminated_length": 1.0,
|
| 28985 |
+
"completions/mean_length": 1.0,
|
| 28986 |
+
"completions/mean_terminated_length": 1.0,
|
| 28987 |
+
"completions/min_length": 1.0,
|
| 28988 |
+
"completions/min_terminated_length": 1.0,
|
| 28989 |
+
"epoch": 8.844181459566075,
|
| 28990 |
+
"frac_reward_zero_std": 1.0,
|
| 28991 |
+
"grad_norm": 0.00013063322694506496,
|
| 28992 |
+
"kl": 6.257742643356323e-05,
|
| 28993 |
+
"learning_rate": 2e-06,
|
| 28994 |
+
"loss": 0.0,
|
| 28995 |
+
"num_tokens": 618036.0,
|
| 28996 |
+
"reward": 0.0009184567024931312,
|
| 28997 |
+
"reward_std": 0.0,
|
| 28998 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 28999 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29000 |
+
"step": 1115
|
| 29001 |
+
},
|
| 29002 |
+
{
|
| 29003 |
+
"clip_ratio/high_max": 0.0,
|
| 29004 |
+
"clip_ratio/high_mean": 0.0,
|
| 29005 |
+
"clip_ratio/low_mean": 0.0,
|
| 29006 |
+
"clip_ratio/low_min": 0.0,
|
| 29007 |
+
"clip_ratio/region_mean": 0.0,
|
| 29008 |
+
"completions/clipped_ratio": 0.0,
|
| 29009 |
+
"completions/max_length": 1.0,
|
| 29010 |
+
"completions/max_terminated_length": 1.0,
|
| 29011 |
+
"completions/mean_length": 1.0,
|
| 29012 |
+
"completions/mean_terminated_length": 1.0,
|
| 29013 |
+
"completions/min_length": 1.0,
|
| 29014 |
+
"completions/min_terminated_length": 1.0,
|
| 29015 |
+
"epoch": 8.85207100591716,
|
| 29016 |
+
"frac_reward_zero_std": 1.0,
|
| 29017 |
+
"grad_norm": 0.0002437905059196055,
|
| 29018 |
+
"kl": 0.0001697726547718048,
|
| 29019 |
+
"learning_rate": 2e-06,
|
| 29020 |
+
"loss": 0.0,
|
| 29021 |
+
"num_tokens": 618518.0,
|
| 29022 |
+
"reward": 0.0009184567024931312,
|
| 29023 |
+
"reward_std": 0.0,
|
| 29024 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29025 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29026 |
+
"step": 1116
|
| 29027 |
+
},
|
| 29028 |
+
{
|
| 29029 |
+
"clip_ratio/high_max": 0.0,
|
| 29030 |
+
"clip_ratio/high_mean": 0.0,
|
| 29031 |
+
"clip_ratio/low_mean": 0.0,
|
| 29032 |
+
"clip_ratio/low_min": 0.0,
|
| 29033 |
+
"clip_ratio/region_mean": 0.0,
|
| 29034 |
+
"completions/clipped_ratio": 0.0,
|
| 29035 |
+
"completions/max_length": 1.0,
|
| 29036 |
+
"completions/max_terminated_length": 1.0,
|
| 29037 |
+
"completions/mean_length": 1.0,
|
| 29038 |
+
"completions/mean_terminated_length": 1.0,
|
| 29039 |
+
"completions/min_length": 1.0,
|
| 29040 |
+
"completions/min_terminated_length": 1.0,
|
| 29041 |
+
"epoch": 8.859960552268245,
|
| 29042 |
+
"frac_reward_zero_std": 1.0,
|
| 29043 |
+
"grad_norm": 0.08250457793474197,
|
| 29044 |
+
"kl": 0.1591484621167183,
|
| 29045 |
+
"learning_rate": 2e-06,
|
| 29046 |
+
"loss": 0.008,
|
| 29047 |
+
"num_tokens": 619054.0,
|
| 29048 |
+
"reward": 0.0009184567024931312,
|
| 29049 |
+
"reward_std": 0.0,
|
| 29050 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29051 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29052 |
+
"step": 1117
|
| 29053 |
+
},
|
| 29054 |
+
{
|
| 29055 |
+
"clip_ratio/high_max": 0.0,
|
| 29056 |
+
"clip_ratio/high_mean": 0.0,
|
| 29057 |
+
"clip_ratio/low_mean": 0.0,
|
| 29058 |
+
"clip_ratio/low_min": 0.0,
|
| 29059 |
+
"clip_ratio/region_mean": 0.0,
|
| 29060 |
+
"completions/clipped_ratio": 0.0,
|
| 29061 |
+
"completions/max_length": 1.0,
|
| 29062 |
+
"completions/max_terminated_length": 1.0,
|
| 29063 |
+
"completions/mean_length": 1.0,
|
| 29064 |
+
"completions/mean_terminated_length": 1.0,
|
| 29065 |
+
"completions/min_length": 1.0,
|
| 29066 |
+
"completions/min_terminated_length": 1.0,
|
| 29067 |
+
"epoch": 8.867850098619328,
|
| 29068 |
+
"frac_reward_zero_std": 1.0,
|
| 29069 |
+
"grad_norm": 0.0001296801638090983,
|
| 29070 |
+
"kl": 8.263811469078064e-05,
|
| 29071 |
+
"learning_rate": 2e-06,
|
| 29072 |
+
"loss": 0.0,
|
| 29073 |
+
"num_tokens": 619612.0,
|
| 29074 |
+
"reward": 0.0009184567024931312,
|
| 29075 |
+
"reward_std": 0.0,
|
| 29076 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29077 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29078 |
+
"step": 1118
|
| 29079 |
+
},
|
| 29080 |
+
{
|
| 29081 |
+
"clip_ratio/high_max": 0.0,
|
| 29082 |
+
"clip_ratio/high_mean": 0.0,
|
| 29083 |
+
"clip_ratio/low_mean": 0.0,
|
| 29084 |
+
"clip_ratio/low_min": 0.0,
|
| 29085 |
+
"clip_ratio/region_mean": 0.0,
|
| 29086 |
+
"completions/clipped_ratio": 0.0,
|
| 29087 |
+
"completions/max_length": 1.0,
|
| 29088 |
+
"completions/max_terminated_length": 1.0,
|
| 29089 |
+
"completions/mean_length": 1.0,
|
| 29090 |
+
"completions/mean_terminated_length": 1.0,
|
| 29091 |
+
"completions/min_length": 1.0,
|
| 29092 |
+
"completions/min_terminated_length": 1.0,
|
| 29093 |
+
"epoch": 8.875739644970414,
|
| 29094 |
+
"frac_reward_zero_std": 1.0,
|
| 29095 |
+
"grad_norm": 6.869369826745242e-05,
|
| 29096 |
+
"kl": 3.816187381744385e-05,
|
| 29097 |
+
"learning_rate": 2e-06,
|
| 29098 |
+
"loss": 0.0,
|
| 29099 |
+
"num_tokens": 620232.0,
|
| 29100 |
+
"reward": 0.0009184567024931312,
|
| 29101 |
+
"reward_std": 0.0,
|
| 29102 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29103 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29104 |
+
"step": 1119
|
| 29105 |
+
},
|
| 29106 |
+
{
|
| 29107 |
+
"clip_ratio/high_max": 0.0,
|
| 29108 |
+
"clip_ratio/high_mean": 0.0,
|
| 29109 |
+
"clip_ratio/low_mean": 0.0,
|
| 29110 |
+
"clip_ratio/low_min": 0.0,
|
| 29111 |
+
"clip_ratio/region_mean": 0.0,
|
| 29112 |
+
"completions/clipped_ratio": 0.0,
|
| 29113 |
+
"completions/max_length": 1.0,
|
| 29114 |
+
"completions/max_terminated_length": 1.0,
|
| 29115 |
+
"completions/mean_length": 1.0,
|
| 29116 |
+
"completions/mean_terminated_length": 1.0,
|
| 29117 |
+
"completions/min_length": 1.0,
|
| 29118 |
+
"completions/min_terminated_length": 1.0,
|
| 29119 |
+
"epoch": 8.883629191321498,
|
| 29120 |
+
"frac_reward_zero_std": 1.0,
|
| 29121 |
+
"grad_norm": 0.00018871534848585725,
|
| 29122 |
+
"kl": 9.661540389060974e-05,
|
| 29123 |
+
"learning_rate": 2e-06,
|
| 29124 |
+
"loss": 0.0,
|
| 29125 |
+
"num_tokens": 620756.0,
|
| 29126 |
+
"reward": 0.0009184567024931312,
|
| 29127 |
+
"reward_std": 0.0,
|
| 29128 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29129 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29130 |
+
"step": 1120
|
| 29131 |
+
},
|
| 29132 |
+
{
|
| 29133 |
+
"clip_ratio/high_max": 0.0,
|
| 29134 |
+
"clip_ratio/high_mean": 0.0,
|
| 29135 |
+
"clip_ratio/low_mean": 0.0,
|
| 29136 |
+
"clip_ratio/low_min": 0.0,
|
| 29137 |
+
"clip_ratio/region_mean": 0.0,
|
| 29138 |
+
"completions/clipped_ratio": 0.0,
|
| 29139 |
+
"completions/max_length": 1.0,
|
| 29140 |
+
"completions/max_terminated_length": 1.0,
|
| 29141 |
+
"completions/mean_length": 1.0,
|
| 29142 |
+
"completions/mean_terminated_length": 1.0,
|
| 29143 |
+
"completions/min_length": 1.0,
|
| 29144 |
+
"completions/min_terminated_length": 1.0,
|
| 29145 |
+
"epoch": 8.891518737672584,
|
| 29146 |
+
"frac_reward_zero_std": 1.0,
|
| 29147 |
+
"grad_norm": 0.0008106045424938202,
|
| 29148 |
+
"kl": 0.00044995173811912537,
|
| 29149 |
+
"learning_rate": 2e-06,
|
| 29150 |
+
"loss": 0.0,
|
| 29151 |
+
"num_tokens": 621268.0,
|
| 29152 |
+
"reward": 0.0009184567024931312,
|
| 29153 |
+
"reward_std": 0.0,
|
| 29154 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29155 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29156 |
+
"step": 1121
|
| 29157 |
+
},
|
| 29158 |
+
{
|
| 29159 |
+
"clip_ratio/high_max": 0.0,
|
| 29160 |
+
"clip_ratio/high_mean": 0.0,
|
| 29161 |
+
"clip_ratio/low_mean": 0.0,
|
| 29162 |
+
"clip_ratio/low_min": 0.0,
|
| 29163 |
+
"clip_ratio/region_mean": 0.0,
|
| 29164 |
+
"completions/clipped_ratio": 0.0,
|
| 29165 |
+
"completions/max_length": 1.0,
|
| 29166 |
+
"completions/max_terminated_length": 1.0,
|
| 29167 |
+
"completions/mean_length": 1.0,
|
| 29168 |
+
"completions/mean_terminated_length": 1.0,
|
| 29169 |
+
"completions/min_length": 1.0,
|
| 29170 |
+
"completions/min_terminated_length": 1.0,
|
| 29171 |
+
"epoch": 8.899408284023668,
|
| 29172 |
+
"frac_reward_zero_std": 1.0,
|
| 29173 |
+
"grad_norm": 0.003337966976687312,
|
| 29174 |
+
"kl": 0.0018642731010913849,
|
| 29175 |
+
"learning_rate": 2e-06,
|
| 29176 |
+
"loss": 0.0001,
|
| 29177 |
+
"num_tokens": 621860.0,
|
| 29178 |
+
"reward": 0.0009184567024931312,
|
| 29179 |
+
"reward_std": 0.0,
|
| 29180 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29181 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29182 |
+
"step": 1122
|
| 29183 |
+
},
|
| 29184 |
+
{
|
| 29185 |
+
"clip_ratio/high_max": 0.0,
|
| 29186 |
+
"clip_ratio/high_mean": 0.0,
|
| 29187 |
+
"clip_ratio/low_mean": 0.0,
|
| 29188 |
+
"clip_ratio/low_min": 0.0,
|
| 29189 |
+
"clip_ratio/region_mean": 0.0,
|
| 29190 |
+
"completions/clipped_ratio": 0.0,
|
| 29191 |
+
"completions/max_length": 1.0,
|
| 29192 |
+
"completions/max_terminated_length": 1.0,
|
| 29193 |
+
"completions/mean_length": 1.0,
|
| 29194 |
+
"completions/mean_terminated_length": 1.0,
|
| 29195 |
+
"completions/min_length": 1.0,
|
| 29196 |
+
"completions/min_terminated_length": 1.0,
|
| 29197 |
+
"epoch": 8.907297830374754,
|
| 29198 |
+
"frac_reward_zero_std": 1.0,
|
| 29199 |
+
"grad_norm": 0.00014351322897709906,
|
| 29200 |
+
"kl": 5.163252353668213e-05,
|
| 29201 |
+
"learning_rate": 2e-06,
|
| 29202 |
+
"loss": 0.0,
|
| 29203 |
+
"num_tokens": 622386.0,
|
| 29204 |
+
"reward": 0.0009184567024931312,
|
| 29205 |
+
"reward_std": 0.0,
|
| 29206 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29207 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29208 |
+
"step": 1123
|
| 29209 |
+
},
|
| 29210 |
+
{
|
| 29211 |
+
"clip_ratio/high_max": 0.0,
|
| 29212 |
+
"clip_ratio/high_mean": 0.0,
|
| 29213 |
+
"clip_ratio/low_mean": 0.0,
|
| 29214 |
+
"clip_ratio/low_min": 0.0,
|
| 29215 |
+
"clip_ratio/region_mean": 0.0,
|
| 29216 |
+
"completions/clipped_ratio": 0.0,
|
| 29217 |
+
"completions/max_length": 1.0,
|
| 29218 |
+
"completions/max_terminated_length": 1.0,
|
| 29219 |
+
"completions/mean_length": 1.0,
|
| 29220 |
+
"completions/mean_terminated_length": 1.0,
|
| 29221 |
+
"completions/min_length": 1.0,
|
| 29222 |
+
"completions/min_terminated_length": 1.0,
|
| 29223 |
+
"epoch": 8.915187376725838,
|
| 29224 |
+
"frac_reward_zero_std": 1.0,
|
| 29225 |
+
"grad_norm": 0.00017613630916457623,
|
| 29226 |
+
"kl": 0.00010449811816215515,
|
| 29227 |
+
"learning_rate": 2e-06,
|
| 29228 |
+
"loss": 0.0,
|
| 29229 |
+
"num_tokens": 623028.0,
|
| 29230 |
+
"reward": 0.0009184567024931312,
|
| 29231 |
+
"reward_std": 0.0,
|
| 29232 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29233 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29234 |
+
"step": 1124
|
| 29235 |
+
},
|
| 29236 |
+
{
|
| 29237 |
+
"clip_ratio/high_max": 0.0,
|
| 29238 |
+
"clip_ratio/high_mean": 0.0,
|
| 29239 |
+
"clip_ratio/low_mean": 0.0,
|
| 29240 |
+
"clip_ratio/low_min": 0.0,
|
| 29241 |
+
"clip_ratio/region_mean": 0.0,
|
| 29242 |
+
"completions/clipped_ratio": 0.0,
|
| 29243 |
+
"completions/max_length": 1.0,
|
| 29244 |
+
"completions/max_terminated_length": 1.0,
|
| 29245 |
+
"completions/mean_length": 1.0,
|
| 29246 |
+
"completions/mean_terminated_length": 1.0,
|
| 29247 |
+
"completions/min_length": 1.0,
|
| 29248 |
+
"completions/min_terminated_length": 1.0,
|
| 29249 |
+
"epoch": 8.923076923076923,
|
| 29250 |
+
"frac_reward_zero_std": 1.0,
|
| 29251 |
+
"grad_norm": 0.0006031371303834021,
|
| 29252 |
+
"kl": 0.00023985281586647034,
|
| 29253 |
+
"learning_rate": 2e-06,
|
| 29254 |
+
"loss": 0.0,
|
| 29255 |
+
"num_tokens": 623634.0,
|
| 29256 |
+
"reward": 0.0009184567024931312,
|
| 29257 |
+
"reward_std": 0.0,
|
| 29258 |
+
"rewards/reward_high_identity_attack_score/mean": 0.0001826662919484079,
|
| 29259 |
+
"rewards/reward_high_identity_attack_score/std": 0.0,
|
| 29260 |
+
"step": 1125
|
| 29261 |
}
|
| 29262 |
],
|
| 29263 |
"logging_steps": 1,
|
| 29264 |
"max_steps": 1200,
|
| 29265 |
+
"num_input_tokens_seen": 623634,
|
| 29266 |
"num_train_epochs": 10,
|
| 29267 |
"save_steps": 75,
|
| 29268 |
"stateful_callbacks": {
|