gsarti's picture
Upload 1170 files
3cdd523 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.45555555555555555,
"eval_steps": 500,
"global_step": 2050,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 309.04,
"epoch": 0.011111111111111112,
"grad_norm": NaN,
"kl": 222.66988746643065,
"learning_rate": 5.444444444444444e-07,
"loss": 8.9068,
"reward": -18.06266725540161,
"reward_std": 6.391496688127518,
"rewards/check_first_pass": -9.93666666984558,
"rewards/check_solution": -7.600000243186951,
"rewards/check_solution_words": -6.068000079095364,
"rewards/check_word_guesses": 5.54200014591217,
"step": 50
},
{
"completion_length": 368.64,
"epoch": 0.022222222222222223,
"grad_norm": NaN,
"kl": 557.3866543316841,
"learning_rate": 1.1e-06,
"loss": 22.2955,
"reward": -17.431167125701904,
"reward_std": 5.4497878611087796,
"rewards/check_first_pass": -9.859833374023438,
"rewards/check_solution": -7.2583335638046265,
"rewards/check_solution_words": -5.878333521187305,
"rewards/check_word_guesses": 5.565333509445191,
"step": 100
},
{
"completion_length": 346.92,
"epoch": 0.03333333333333333,
"grad_norm": NaN,
"kl": 4737.8455329227445,
"learning_rate": 1.6555555555555559e-06,
"loss": 189.5138,
"reward": -18.070500688552855,
"reward_std": 7.8515861177444455,
"rewards/check_first_pass": -9.786166725158692,
"rewards/check_solution": -7.325000324249268,
"rewards/check_solution_words": -7.050333592891693,
"rewards/check_word_guesses": 6.091000156402588,
"step": 150
},
{
"completion_length": 322.2,
"epoch": 0.044444444444444446,
"grad_norm": NaN,
"kl": 32057.38775477886,
"learning_rate": 2.2111111111111113e-06,
"loss": 1282.2956,
"reward": -15.816333751678467,
"reward_std": 6.191992573738098,
"rewards/check_first_pass": -9.895000038146973,
"rewards/check_solution": -7.100000200271606,
"rewards/check_solution_words": -4.8800000631809235,
"rewards/check_word_guesses": 6.058666839599609,
"step": 200
},
{
"completion_length": 349.9,
"epoch": 0.05555555555555555,
"grad_norm": NaN,
"kl": 5074.338300862312,
"learning_rate": 2.766666666666667e-06,
"loss": 202.9736,
"reward": -17.724167308807374,
"reward_std": 6.207637655735016,
"rewards/check_first_pass": -9.912833366394043,
"rewards/check_solution": -7.358333556652069,
"rewards/check_solution_words": -6.180666843354702,
"rewards/check_word_guesses": 5.727666816711426,
"step": 250
},
{
"completion_length": 336.42,
"epoch": 0.06666666666666667,
"grad_norm": NaN,
"kl": 315.6221669435501,
"learning_rate": 3.322222222222222e-06,
"loss": 12.6249,
"reward": -16.775000438690185,
"reward_std": 5.353409328460693,
"rewards/check_first_pass": -9.81633337020874,
"rewards/check_solution": -7.341666927337647,
"rewards/check_solution_words": -5.623000101844471,
"rewards/check_word_guesses": 6.006000165939331,
"step": 300
},
{
"completion_length": 307.04,
"epoch": 0.07777777777777778,
"grad_norm": NaN,
"kl": 6570.5719665384295,
"learning_rate": 3.877777777777778e-06,
"loss": 262.8229,
"reward": -17.077000389099123,
"reward_std": 5.669408960938454,
"rewards/check_first_pass": -9.886666717529296,
"rewards/check_solution": -7.250000200271606,
"rewards/check_solution_words": -5.695666807889938,
"rewards/check_word_guesses": 5.755333452224732,
"step": 350
},
{
"completion_length": 313.08,
"epoch": 0.08888888888888889,
"grad_norm": NaN,
"kl": 1532.4928638124466,
"learning_rate": 4.433333333333334e-06,
"loss": 61.2997,
"reward": -17.507167091369627,
"reward_std": 5.527194731235504,
"rewards/check_first_pass": -9.908166694641114,
"rewards/check_solution": -7.30833353638649,
"rewards/check_solution_words": -6.251000165343284,
"rewards/check_word_guesses": 5.9603334903717045,
"step": 400
},
{
"completion_length": 329.37666687011716,
"epoch": 0.1,
"grad_norm": NaN,
"kl": 1601.70994805336,
"learning_rate": 4.988888888888889e-06,
"loss": 64.0684,
"reward": -17.980167026519776,
"reward_std": 6.458992264270782,
"rewards/check_first_pass": -9.801500053405762,
"rewards/check_solution": -7.2666668963432315,
"rewards/check_solution_words": -6.554666934013366,
"rewards/check_word_guesses": 5.64266683101654,
"step": 450
},
{
"completion_length": 307.52,
"epoch": 0.1111111111111111,
"grad_norm": NaN,
"kl": 702.5347912788391,
"learning_rate": 4.998194324998843e-06,
"loss": 28.1014,
"reward": -16.74250042915344,
"reward_std": 6.445133271217347,
"rewards/check_first_pass": -9.824500045776368,
"rewards/check_solution": -7.308333573341369,
"rewards/check_solution_words": -5.524333542585373,
"rewards/check_word_guesses": 5.914666795730591,
"step": 500
},
{
"completion_length": 335.9,
"epoch": 0.12222222222222222,
"grad_norm": NaN,
"kl": 19601.83191286087,
"learning_rate": 4.992631880567301e-06,
"loss": 784.0733,
"reward": -17.86000030517578,
"reward_std": 7.05341215133667,
"rewards/check_first_pass": -9.785000047683717,
"rewards/check_solution": -7.49166690826416,
"rewards/check_solution_words": -6.301333554983139,
"rewards/check_word_guesses": 5.71800015449524,
"step": 550
},
{
"completion_length": 298.2,
"epoch": 0.13333333333333333,
"grad_norm": NaN,
"kl": 1115.117756202221,
"learning_rate": 4.983320281008445e-06,
"loss": 44.6047,
"reward": -16.99700037956238,
"reward_std": 5.631768324375153,
"rewards/check_first_pass": -9.813000040054321,
"rewards/check_solution": -7.041666898727417,
"rewards/check_solution_words": -6.250666889995337,
"rewards/check_word_guesses": 6.108333473205566,
"step": 600
},
{
"completion_length": 318.48,
"epoch": 0.14444444444444443,
"grad_norm": NaN,
"kl": 3946.661036362648,
"learning_rate": 4.970273531852536e-06,
"loss": 157.8665,
"reward": -17.999333934783934,
"reward_std": 6.210418889522552,
"rewards/check_first_pass": -9.89133337020874,
"rewards/check_solution": -7.458333578109741,
"rewards/check_solution_words": -6.459333531856537,
"rewards/check_word_guesses": 5.809666805267334,
"step": 650
},
{
"completion_length": 351.9,
"epoch": 0.15555555555555556,
"grad_norm": NaN,
"kl": 2870.44579018116,
"learning_rate": 4.953511256649632e-06,
"loss": 114.8178,
"reward": -17.553834075927735,
"reward_std": 5.835132333040238,
"rewards/check_first_pass": -9.929833374023438,
"rewards/check_solution": -7.383333520889282,
"rewards/check_solution_words": -6.055666868388653,
"rewards/check_word_guesses": 5.815000147819519,
"step": 700
},
{
"completion_length": 308.34,
"epoch": 0.16666666666666666,
"grad_norm": NaN,
"kl": 164.13174985408784,
"learning_rate": 4.933058667453916e-06,
"loss": 6.5653,
"reward": -16.56966731071472,
"reward_std": 6.621588716208935,
"rewards/check_first_pass": -9.908333358764649,
"rewards/check_solution": -7.291666874885559,
"rewards/check_solution_words": -5.485666743516922,
"rewards/check_word_guesses": 6.116000127792359,
"step": 750
},
{
"completion_length": 342.34,
"epoch": 0.17777777777777778,
"grad_norm": NaN,
"kl": 1447.0631847190857,
"learning_rate": 4.9089465269023596e-06,
"loss": 57.8825,
"reward": -17.248333780765535,
"reward_std": 6.114709348678589,
"rewards/check_first_pass": -9.830000019073486,
"rewards/check_solution": -7.2333335685729985,
"rewards/check_solution_words": -6.300666825771332,
"rewards/check_word_guesses": 6.115666842460632,
"step": 800
},
{
"completion_length": 354.18,
"epoch": 0.18888888888888888,
"grad_norm": NaN,
"kl": 23526.59426044941,
"learning_rate": 4.881211101944802e-06,
"loss": 941.0638,
"reward": -17.54183391571045,
"reward_std": 6.4859533834457395,
"rewards/check_first_pass": -9.808833379745483,
"rewards/check_solution": -7.708333535194397,
"rewards/check_solution_words": -5.9636668264865875,
"rewards/check_word_guesses": 5.939000129699707,
"step": 850
},
{
"completion_length": 308.18,
"epoch": 0.2,
"grad_norm": NaN,
"kl": 138.43031896591185,
"learning_rate": 4.84989410929501e-06,
"loss": 5.5372,
"reward": -17.896833839416505,
"reward_std": 5.668911509513855,
"rewards/check_first_pass": -9.863166694641114,
"rewards/check_solution": -7.233333587646484,
"rewards/check_solution_words": -6.624666909873485,
"rewards/check_word_guesses": 5.824333515167236,
"step": 900
},
{
"completion_length": 314.82,
"epoch": 0.2111111111111111,
"grad_norm": NaN,
"kl": 1218.171366314888,
"learning_rate": 4.815042652684779e-06,
"loss": 48.7269,
"reward": -16.533334035873413,
"reward_std": 7.360376672744751,
"rewards/check_first_pass": -9.612000093460082,
"rewards/check_solution": -7.158333578109741,
"rewards/check_solution_words": -5.995000202357769,
"rewards/check_word_guesses": 6.232000198364258,
"step": 950
},
{
"completion_length": 339.66,
"epoch": 0.2222222222222222,
"grad_norm": NaN,
"kl": 174.28998464107514,
"learning_rate": 4.776709152015443e-06,
"loss": 6.9716,
"reward": -17.22483383178711,
"reward_std": 6.013938563764095,
"rewards/check_first_pass": -9.816166725158691,
"rewards/check_solution": -7.008333616256714,
"rewards/check_solution_words": -6.318000204563141,
"rewards/check_word_guesses": 5.9176667785644534,
"step": 1000
},
{
"completion_length": 299.26,
"epoch": 0.23333333333333334,
"grad_norm": NaN,
"kl": 240.47271874427796,
"learning_rate": 4.734951264513368e-06,
"loss": 9.6189,
"reward": -16.81516722679138,
"reward_std": 5.74999471783638,
"rewards/check_first_pass": -9.819500045776367,
"rewards/check_solution": -7.191666922569275,
"rewards/check_solution_words": -5.3703335279226305,
"rewards/check_word_guesses": 5.566333475112915,
"step": 1050
},
{
"completion_length": 290.3,
"epoch": 0.24444444444444444,
"grad_norm": NaN,
"kl": 5930.2736493730545,
"learning_rate": 4.689831798008002e-06,
"loss": 237.2109,
"reward": -16.739000663757324,
"reward_std": 6.145890753269196,
"rewards/check_first_pass": -9.928000049591065,
"rewards/check_solution": -7.291666860580444,
"rewards/check_solution_words": -5.206000205874443,
"rewards/check_word_guesses": 5.686666803359985,
"step": 1100
},
{
"completion_length": 313.16,
"epoch": 0.25555555555555554,
"grad_norm": NaN,
"kl": 790.292287569046,
"learning_rate": 4.641418616462938e-06,
"loss": 31.6117,
"reward": -18.431500701904298,
"reward_std": 5.68714599609375,
"rewards/check_first_pass": -9.87116668701172,
"rewards/check_solution": -7.8250002098083495,
"rewards/check_solution_words": -6.450666869878769,
"rewards/check_word_guesses": 5.7153334808349605,
"step": 1150
},
{
"completion_length": 288.62,
"epoch": 0.26666666666666666,
"grad_norm": NaN,
"kl": 299.9416353178024,
"learning_rate": 4.589784537902062e-06,
"loss": 11.9977,
"reward": -17.612167091369628,
"reward_std": 5.032542688846588,
"rewards/check_first_pass": -9.781500082015992,
"rewards/check_solution": -7.425000247955322,
"rewards/check_solution_words": -6.234666793346405,
"rewards/check_word_guesses": 5.829000115394592,
"step": 1200
},
{
"completion_length": 322.98,
"epoch": 0.2777777777777778,
"grad_norm": NaN,
"kl": 2793.8524017858504,
"learning_rate": 4.53500722488433e-06,
"loss": 111.7541,
"reward": -17.664333724975585,
"reward_std": 5.747455310821533,
"rewards/check_first_pass": -9.923000030517578,
"rewards/check_solution": -7.4000002384185795,
"rewards/check_solution_words": -6.01900016926229,
"rewards/check_word_guesses": 5.6776668119430544,
"step": 1250
},
{
"completion_length": 339.3,
"epoch": 0.28888888888888886,
"grad_norm": NaN,
"kl": 3813.7429452037813,
"learning_rate": 4.477169067691902e-06,
"loss": 152.5497,
"reward": -17.690833921432496,
"reward_std": 5.353043854236603,
"rewards/check_first_pass": -9.892833366394044,
"rewards/check_solution": -7.600000247955323,
"rewards/check_solution_words": -5.904333523511887,
"rewards/check_word_guesses": 5.706333441734314,
"step": 1300
},
{
"completion_length": 310.96,
"epoch": 0.3,
"grad_norm": NaN,
"kl": 304.47424302577974,
"learning_rate": 4.416357060407332e-06,
"loss": 12.179,
"reward": -17.26916711807251,
"reward_std": 5.263389755487442,
"rewards/check_first_pass": -9.854500017166139,
"rewards/check_solution": -7.250000243186951,
"rewards/check_solution_words": -5.872333557605743,
"rewards/check_word_guesses": 5.707666797637939,
"step": 1350
},
{
"completion_length": 315.5,
"epoch": 0.3111111111111111,
"grad_norm": NaN,
"kl": 13639.832137713433,
"learning_rate": 4.3526626700662e-06,
"loss": 545.5934,
"reward": -18.18800064086914,
"reward_std": 6.097169952392578,
"rewards/check_first_pass": -9.844333381652833,
"rewards/check_solution": -7.566666946411133,
"rewards/check_solution_words": -6.689333482980728,
"rewards/check_word_guesses": 5.91233346939087,
"step": 1400
},
{
"completion_length": 351.12,
"epoch": 0.32222222222222224,
"grad_norm": NaN,
"kl": 2326.0249900770186,
"learning_rate": 4.286181699082008e-06,
"loss": 93.041,
"reward": -18.623000659942626,
"reward_std": 6.812479295730591,
"rewards/check_first_pass": -9.90633337020874,
"rewards/check_solution": -7.350000295639038,
"rewards/check_solution_words": -7.338333506584167,
"rewards/check_word_guesses": 5.971666851043701,
"step": 1450
},
{
"completion_length": 366.32,
"epoch": 0.3333333333333333,
"grad_norm": NaN,
"kl": 16549.004248199464,
"learning_rate": 4.217014141150248e-06,
"loss": 661.9602,
"reward": -18.263500604629517,
"reward_std": 6.059882239103318,
"rewards/check_first_pass": -9.878166694641113,
"rewards/check_solution": -7.3666668796539305,
"rewards/check_solution_words": -6.7500001257658,
"rewards/check_word_guesses": 5.731333417892456,
"step": 1500
},
{
"completion_length": 320.5,
"epoch": 0.34444444444444444,
"grad_norm": NaN,
"kl": 8347.091685709953,
"learning_rate": 4.145264030848381e-06,
"loss": 333.8836,
"reward": -17.722667150497436,
"reward_std": 5.240287501811981,
"rewards/check_first_pass": -9.976666679382324,
"rewards/check_solution": -7.566666932106018,
"rewards/check_solution_words": -5.952333456873894,
"rewards/check_word_guesses": 5.773000164031982,
"step": 1550
},
{
"completion_length": 313.42,
"epoch": 0.35555555555555557,
"grad_norm": NaN,
"kl": 864.0743899011612,
"learning_rate": 4.071039287157953e-06,
"loss": 34.563,
"reward": -17.983000602722168,
"reward_std": 5.850111997127533,
"rewards/check_first_pass": -9.918333358764649,
"rewards/check_solution": -7.208333625793457,
"rewards/check_solution_words": -6.453333538174629,
"rewards/check_word_guesses": 5.597000193595886,
"step": 1600
},
{
"completion_length": 329.72,
"epoch": 0.36666666666666664,
"grad_norm": NaN,
"kl": 396.7952742242813,
"learning_rate": 3.9944515511441995e-06,
"loss": 15.8718,
"reward": -16.43366714477539,
"reward_std": 7.244253120422363,
"rewards/check_first_pass": -9.88666669845581,
"rewards/check_solution": -6.900000200271607,
"rewards/check_solution_words": -5.581333435922861,
"rewards/check_word_guesses": 5.93433349609375,
"step": 1650
},
{
"completion_length": 295.6,
"epoch": 0.37777777777777777,
"grad_norm": NaN,
"kl": 2458.04032143116,
"learning_rate": 3.915616018037271e-06,
"loss": 98.3216,
"reward": -16.582167387008667,
"reward_std": 6.116619675159455,
"rewards/check_first_pass": -9.845500040054322,
"rewards/check_solution": -7.29166687965393,
"rewards/check_solution_words": -5.515666830142339,
"rewards/check_word_guesses": 6.0706668472290035,
"step": 1700
},
{
"completion_length": 305.44,
"epoch": 0.3888888888888889,
"grad_norm": NaN,
"kl": 6297.921968564987,
"learning_rate": 3.834651263967667e-06,
"loss": 251.9169,
"reward": -17.544833850860595,
"reward_std": 6.234307850599289,
"rewards/check_first_pass": -9.903166675567627,
"rewards/check_solution": -7.258333530426025,
"rewards/check_solution_words": -6.216000239551067,
"rewards/check_word_guesses": 5.832666802406311,
"step": 1750
},
{
"completion_length": 293.7,
"epoch": 0.4,
"grad_norm": NaN,
"kl": 4064.312862081528,
"learning_rate": 3.7516790676164795e-06,
"loss": 162.5725,
"reward": -17.36033399581909,
"reward_std": 5.197294096946717,
"rewards/check_first_pass": -9.983333339691162,
"rewards/check_solution": -7.516666932106018,
"rewards/check_solution_words": -5.423000110387802,
"rewards/check_word_guesses": 5.562666816711426,
"step": 1800
},
{
"completion_length": 320.76,
"epoch": 0.4111111111111111,
"grad_norm": NaN,
"kl": 4698.212507400513,
"learning_rate": 3.6668242270486736e-06,
"loss": 187.9285,
"reward": -17.611333808898927,
"reward_std": 5.37955255150795,
"rewards/check_first_pass": -9.976666679382324,
"rewards/check_solution": -7.441666889190674,
"rewards/check_solution_words": -6.205666851997376,
"rewards/check_word_guesses": 6.012666845321656,
"step": 1850
},
{
"completion_length": 318.02,
"epoch": 0.4222222222222222,
"grad_norm": NaN,
"kl": 467.2588349723816,
"learning_rate": 3.5802143720049565e-06,
"loss": 18.6904,
"reward": -18.40666706085205,
"reward_std": 5.797463660240173,
"rewards/check_first_pass": -9.95166669845581,
"rewards/check_solution": -7.400000267028808,
"rewards/check_solution_words": -6.630000138878822,
"rewards/check_word_guesses": 5.575000162124634,
"step": 1900
},
{
"completion_length": 317.28,
"epoch": 0.43333333333333335,
"grad_norm": NaN,
"kl": 889510.5962282228,
"learning_rate": 3.4919797719345172e-06,
"loss": 35580.425,
"reward": -17.448667163848878,
"reward_std": 4.699667553901673,
"rewards/check_first_pass": -9.881666679382324,
"rewards/check_solution": -7.475000233650207,
"rewards/check_solution_words": -5.838000079877674,
"rewards/check_word_guesses": 5.746000151634217,
"step": 1950
},
{
"completion_length": 302.48,
"epoch": 0.4444444444444444,
"grad_norm": NaN,
"kl": 5604.918187556267,
"learning_rate": 3.402253140057402e-06,
"loss": 224.1967,
"reward": -16.404333744049072,
"reward_std": 5.4958923101425174,
"rewards/check_first_pass": -9.890000019073486,
"rewards/check_solution": -7.033333592414856,
"rewards/check_solution_words": -5.368000164031982,
"rewards/check_word_guesses": 5.887000150680542,
"step": 2000
},
{
"completion_length": 321.74,
"epoch": 0.45555555555555555,
"grad_norm": NaN,
"kl": 7411.243695282936,
"learning_rate": 3.311169433751226e-06,
"loss": 296.4498,
"reward": -16.623500537872314,
"reward_std": 6.51646169424057,
"rewards/check_first_pass": -9.827833366394042,
"rewards/check_solution": -6.908333498239517,
"rewards/check_solution_words": -5.6363335295766595,
"rewards/check_word_guesses": 5.749000191688538,
"step": 2050
}
],
"logging_steps": 50,
"max_steps": 4500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}