| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.05555555555555555, | |
| "eval_steps": 500, | |
| "global_step": 250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 309.04, | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": NaN, | |
| "kl": 222.66988746643065, | |
| "learning_rate": 5.444444444444444e-07, | |
| "loss": 8.9068, | |
| "reward": -18.06266725540161, | |
| "reward_std": 6.391496688127518, | |
| "rewards/check_first_pass": -9.93666666984558, | |
| "rewards/check_solution": -7.600000243186951, | |
| "rewards/check_solution_words": -6.068000079095364, | |
| "rewards/check_word_guesses": 5.54200014591217, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 368.64, | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": NaN, | |
| "kl": 557.3866543316841, | |
| "learning_rate": 1.1e-06, | |
| "loss": 22.2955, | |
| "reward": -17.431167125701904, | |
| "reward_std": 5.4497878611087796, | |
| "rewards/check_first_pass": -9.859833374023438, | |
| "rewards/check_solution": -7.2583335638046265, | |
| "rewards/check_solution_words": -5.878333521187305, | |
| "rewards/check_word_guesses": 5.565333509445191, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 346.92, | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": NaN, | |
| "kl": 4737.8455329227445, | |
| "learning_rate": 1.6555555555555559e-06, | |
| "loss": 189.5138, | |
| "reward": -18.070500688552855, | |
| "reward_std": 7.8515861177444455, | |
| "rewards/check_first_pass": -9.786166725158692, | |
| "rewards/check_solution": -7.325000324249268, | |
| "rewards/check_solution_words": -7.050333592891693, | |
| "rewards/check_word_guesses": 6.091000156402588, | |
| "step": 150 | |
| }, | |
| { | |
| "completion_length": 322.2, | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": NaN, | |
| "kl": 32057.38775477886, | |
| "learning_rate": 2.2111111111111113e-06, | |
| "loss": 1282.2956, | |
| "reward": -15.816333751678467, | |
| "reward_std": 6.191992573738098, | |
| "rewards/check_first_pass": -9.895000038146973, | |
| "rewards/check_solution": -7.100000200271606, | |
| "rewards/check_solution_words": -4.8800000631809235, | |
| "rewards/check_word_guesses": 6.058666839599609, | |
| "step": 200 | |
| }, | |
| { | |
| "completion_length": 349.9, | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": NaN, | |
| "kl": 5074.338300862312, | |
| "learning_rate": 2.766666666666667e-06, | |
| "loss": 202.9736, | |
| "reward": -17.724167308807374, | |
| "reward_std": 6.207637655735016, | |
| "rewards/check_first_pass": -9.912833366394043, | |
| "rewards/check_solution": -7.358333556652069, | |
| "rewards/check_solution_words": -6.180666843354702, | |
| "rewards/check_word_guesses": 5.727666816711426, | |
| "step": 250 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |