phi-rebus-grpo-test / checkpoint-250 /trainer_state.json
gsarti's picture
Upload 1170 files
3cdd523 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.05555555555555555,
"eval_steps": 500,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 309.04,
"epoch": 0.011111111111111112,
"grad_norm": NaN,
"kl": 222.66988746643065,
"learning_rate": 5.444444444444444e-07,
"loss": 8.9068,
"reward": -18.06266725540161,
"reward_std": 6.391496688127518,
"rewards/check_first_pass": -9.93666666984558,
"rewards/check_solution": -7.600000243186951,
"rewards/check_solution_words": -6.068000079095364,
"rewards/check_word_guesses": 5.54200014591217,
"step": 50
},
{
"completion_length": 368.64,
"epoch": 0.022222222222222223,
"grad_norm": NaN,
"kl": 557.3866543316841,
"learning_rate": 1.1e-06,
"loss": 22.2955,
"reward": -17.431167125701904,
"reward_std": 5.4497878611087796,
"rewards/check_first_pass": -9.859833374023438,
"rewards/check_solution": -7.2583335638046265,
"rewards/check_solution_words": -5.878333521187305,
"rewards/check_word_guesses": 5.565333509445191,
"step": 100
},
{
"completion_length": 346.92,
"epoch": 0.03333333333333333,
"grad_norm": NaN,
"kl": 4737.8455329227445,
"learning_rate": 1.6555555555555559e-06,
"loss": 189.5138,
"reward": -18.070500688552855,
"reward_std": 7.8515861177444455,
"rewards/check_first_pass": -9.786166725158692,
"rewards/check_solution": -7.325000324249268,
"rewards/check_solution_words": -7.050333592891693,
"rewards/check_word_guesses": 6.091000156402588,
"step": 150
},
{
"completion_length": 322.2,
"epoch": 0.044444444444444446,
"grad_norm": NaN,
"kl": 32057.38775477886,
"learning_rate": 2.2111111111111113e-06,
"loss": 1282.2956,
"reward": -15.816333751678467,
"reward_std": 6.191992573738098,
"rewards/check_first_pass": -9.895000038146973,
"rewards/check_solution": -7.100000200271606,
"rewards/check_solution_words": -4.8800000631809235,
"rewards/check_word_guesses": 6.058666839599609,
"step": 200
},
{
"completion_length": 349.9,
"epoch": 0.05555555555555555,
"grad_norm": NaN,
"kl": 5074.338300862312,
"learning_rate": 2.766666666666667e-06,
"loss": 202.9736,
"reward": -17.724167308807374,
"reward_std": 6.207637655735016,
"rewards/check_first_pass": -9.912833366394043,
"rewards/check_solution": -7.358333556652069,
"rewards/check_solution_words": -6.180666843354702,
"rewards/check_word_guesses": 5.727666816711426,
"step": 250
}
],
"logging_steps": 50,
"max_steps": 4500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}