| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.45555555555555555, | |
| "eval_steps": 500, | |
| "global_step": 2050, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 309.04, | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": NaN, | |
| "kl": 222.66988746643065, | |
| "learning_rate": 5.444444444444444e-07, | |
| "loss": 8.9068, | |
| "reward": -18.06266725540161, | |
| "reward_std": 6.391496688127518, | |
| "rewards/check_first_pass": -9.93666666984558, | |
| "rewards/check_solution": -7.600000243186951, | |
| "rewards/check_solution_words": -6.068000079095364, | |
| "rewards/check_word_guesses": 5.54200014591217, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 368.64, | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": NaN, | |
| "kl": 557.3866543316841, | |
| "learning_rate": 1.1e-06, | |
| "loss": 22.2955, | |
| "reward": -17.431167125701904, | |
| "reward_std": 5.4497878611087796, | |
| "rewards/check_first_pass": -9.859833374023438, | |
| "rewards/check_solution": -7.2583335638046265, | |
| "rewards/check_solution_words": -5.878333521187305, | |
| "rewards/check_word_guesses": 5.565333509445191, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 346.92, | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": NaN, | |
| "kl": 4737.8455329227445, | |
| "learning_rate": 1.6555555555555559e-06, | |
| "loss": 189.5138, | |
| "reward": -18.070500688552855, | |
| "reward_std": 7.8515861177444455, | |
| "rewards/check_first_pass": -9.786166725158692, | |
| "rewards/check_solution": -7.325000324249268, | |
| "rewards/check_solution_words": -7.050333592891693, | |
| "rewards/check_word_guesses": 6.091000156402588, | |
| "step": 150 | |
| }, | |
| { | |
| "completion_length": 322.2, | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": NaN, | |
| "kl": 32057.38775477886, | |
| "learning_rate": 2.2111111111111113e-06, | |
| "loss": 1282.2956, | |
| "reward": -15.816333751678467, | |
| "reward_std": 6.191992573738098, | |
| "rewards/check_first_pass": -9.895000038146973, | |
| "rewards/check_solution": -7.100000200271606, | |
| "rewards/check_solution_words": -4.8800000631809235, | |
| "rewards/check_word_guesses": 6.058666839599609, | |
| "step": 200 | |
| }, | |
| { | |
| "completion_length": 349.9, | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": NaN, | |
| "kl": 5074.338300862312, | |
| "learning_rate": 2.766666666666667e-06, | |
| "loss": 202.9736, | |
| "reward": -17.724167308807374, | |
| "reward_std": 6.207637655735016, | |
| "rewards/check_first_pass": -9.912833366394043, | |
| "rewards/check_solution": -7.358333556652069, | |
| "rewards/check_solution_words": -6.180666843354702, | |
| "rewards/check_word_guesses": 5.727666816711426, | |
| "step": 250 | |
| }, | |
| { | |
| "completion_length": 336.42, | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": NaN, | |
| "kl": 315.6221669435501, | |
| "learning_rate": 3.322222222222222e-06, | |
| "loss": 12.6249, | |
| "reward": -16.775000438690185, | |
| "reward_std": 5.353409328460693, | |
| "rewards/check_first_pass": -9.81633337020874, | |
| "rewards/check_solution": -7.341666927337647, | |
| "rewards/check_solution_words": -5.623000101844471, | |
| "rewards/check_word_guesses": 6.006000165939331, | |
| "step": 300 | |
| }, | |
| { | |
| "completion_length": 307.04, | |
| "epoch": 0.07777777777777778, | |
| "grad_norm": NaN, | |
| "kl": 6570.5719665384295, | |
| "learning_rate": 3.877777777777778e-06, | |
| "loss": 262.8229, | |
| "reward": -17.077000389099123, | |
| "reward_std": 5.669408960938454, | |
| "rewards/check_first_pass": -9.886666717529296, | |
| "rewards/check_solution": -7.250000200271606, | |
| "rewards/check_solution_words": -5.695666807889938, | |
| "rewards/check_word_guesses": 5.755333452224732, | |
| "step": 350 | |
| }, | |
| { | |
| "completion_length": 313.08, | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": NaN, | |
| "kl": 1532.4928638124466, | |
| "learning_rate": 4.433333333333334e-06, | |
| "loss": 61.2997, | |
| "reward": -17.507167091369627, | |
| "reward_std": 5.527194731235504, | |
| "rewards/check_first_pass": -9.908166694641114, | |
| "rewards/check_solution": -7.30833353638649, | |
| "rewards/check_solution_words": -6.251000165343284, | |
| "rewards/check_word_guesses": 5.9603334903717045, | |
| "step": 400 | |
| }, | |
| { | |
| "completion_length": 329.37666687011716, | |
| "epoch": 0.1, | |
| "grad_norm": NaN, | |
| "kl": 1601.70994805336, | |
| "learning_rate": 4.988888888888889e-06, | |
| "loss": 64.0684, | |
| "reward": -17.980167026519776, | |
| "reward_std": 6.458992264270782, | |
| "rewards/check_first_pass": -9.801500053405762, | |
| "rewards/check_solution": -7.2666668963432315, | |
| "rewards/check_solution_words": -6.554666934013366, | |
| "rewards/check_word_guesses": 5.64266683101654, | |
| "step": 450 | |
| }, | |
| { | |
| "completion_length": 307.52, | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": NaN, | |
| "kl": 702.5347912788391, | |
| "learning_rate": 4.998194324998843e-06, | |
| "loss": 28.1014, | |
| "reward": -16.74250042915344, | |
| "reward_std": 6.445133271217347, | |
| "rewards/check_first_pass": -9.824500045776368, | |
| "rewards/check_solution": -7.308333573341369, | |
| "rewards/check_solution_words": -5.524333542585373, | |
| "rewards/check_word_guesses": 5.914666795730591, | |
| "step": 500 | |
| }, | |
| { | |
| "completion_length": 335.9, | |
| "epoch": 0.12222222222222222, | |
| "grad_norm": NaN, | |
| "kl": 19601.83191286087, | |
| "learning_rate": 4.992631880567301e-06, | |
| "loss": 784.0733, | |
| "reward": -17.86000030517578, | |
| "reward_std": 7.05341215133667, | |
| "rewards/check_first_pass": -9.785000047683717, | |
| "rewards/check_solution": -7.49166690826416, | |
| "rewards/check_solution_words": -6.301333554983139, | |
| "rewards/check_word_guesses": 5.71800015449524, | |
| "step": 550 | |
| }, | |
| { | |
| "completion_length": 298.2, | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": NaN, | |
| "kl": 1115.117756202221, | |
| "learning_rate": 4.983320281008445e-06, | |
| "loss": 44.6047, | |
| "reward": -16.99700037956238, | |
| "reward_std": 5.631768324375153, | |
| "rewards/check_first_pass": -9.813000040054321, | |
| "rewards/check_solution": -7.041666898727417, | |
| "rewards/check_solution_words": -6.250666889995337, | |
| "rewards/check_word_guesses": 6.108333473205566, | |
| "step": 600 | |
| }, | |
| { | |
| "completion_length": 318.48, | |
| "epoch": 0.14444444444444443, | |
| "grad_norm": NaN, | |
| "kl": 3946.661036362648, | |
| "learning_rate": 4.970273531852536e-06, | |
| "loss": 157.8665, | |
| "reward": -17.999333934783934, | |
| "reward_std": 6.210418889522552, | |
| "rewards/check_first_pass": -9.89133337020874, | |
| "rewards/check_solution": -7.458333578109741, | |
| "rewards/check_solution_words": -6.459333531856537, | |
| "rewards/check_word_guesses": 5.809666805267334, | |
| "step": 650 | |
| }, | |
| { | |
| "completion_length": 351.9, | |
| "epoch": 0.15555555555555556, | |
| "grad_norm": NaN, | |
| "kl": 2870.44579018116, | |
| "learning_rate": 4.953511256649632e-06, | |
| "loss": 114.8178, | |
| "reward": -17.553834075927735, | |
| "reward_std": 5.835132333040238, | |
| "rewards/check_first_pass": -9.929833374023438, | |
| "rewards/check_solution": -7.383333520889282, | |
| "rewards/check_solution_words": -6.055666868388653, | |
| "rewards/check_word_guesses": 5.815000147819519, | |
| "step": 700 | |
| }, | |
| { | |
| "completion_length": 308.34, | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": NaN, | |
| "kl": 164.13174985408784, | |
| "learning_rate": 4.933058667453916e-06, | |
| "loss": 6.5653, | |
| "reward": -16.56966731071472, | |
| "reward_std": 6.621588716208935, | |
| "rewards/check_first_pass": -9.908333358764649, | |
| "rewards/check_solution": -7.291666874885559, | |
| "rewards/check_solution_words": -5.485666743516922, | |
| "rewards/check_word_guesses": 6.116000127792359, | |
| "step": 750 | |
| }, | |
| { | |
| "completion_length": 342.34, | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": NaN, | |
| "kl": 1447.0631847190857, | |
| "learning_rate": 4.9089465269023596e-06, | |
| "loss": 57.8825, | |
| "reward": -17.248333780765535, | |
| "reward_std": 6.114709348678589, | |
| "rewards/check_first_pass": -9.830000019073486, | |
| "rewards/check_solution": -7.2333335685729985, | |
| "rewards/check_solution_words": -6.300666825771332, | |
| "rewards/check_word_guesses": 6.115666842460632, | |
| "step": 800 | |
| }, | |
| { | |
| "completion_length": 354.18, | |
| "epoch": 0.18888888888888888, | |
| "grad_norm": NaN, | |
| "kl": 23526.59426044941, | |
| "learning_rate": 4.881211101944802e-06, | |
| "loss": 941.0638, | |
| "reward": -17.54183391571045, | |
| "reward_std": 6.4859533834457395, | |
| "rewards/check_first_pass": -9.808833379745483, | |
| "rewards/check_solution": -7.708333535194397, | |
| "rewards/check_solution_words": -5.9636668264865875, | |
| "rewards/check_word_guesses": 5.939000129699707, | |
| "step": 850 | |
| }, | |
| { | |
| "completion_length": 308.18, | |
| "epoch": 0.2, | |
| "grad_norm": NaN, | |
| "kl": 138.43031896591185, | |
| "learning_rate": 4.84989410929501e-06, | |
| "loss": 5.5372, | |
| "reward": -17.896833839416505, | |
| "reward_std": 5.668911509513855, | |
| "rewards/check_first_pass": -9.863166694641114, | |
| "rewards/check_solution": -7.233333587646484, | |
| "rewards/check_solution_words": -6.624666909873485, | |
| "rewards/check_word_guesses": 5.824333515167236, | |
| "step": 900 | |
| }, | |
| { | |
| "completion_length": 314.82, | |
| "epoch": 0.2111111111111111, | |
| "grad_norm": NaN, | |
| "kl": 1218.171366314888, | |
| "learning_rate": 4.815042652684779e-06, | |
| "loss": 48.7269, | |
| "reward": -16.533334035873413, | |
| "reward_std": 7.360376672744751, | |
| "rewards/check_first_pass": -9.612000093460082, | |
| "rewards/check_solution": -7.158333578109741, | |
| "rewards/check_solution_words": -5.995000202357769, | |
| "rewards/check_word_guesses": 6.232000198364258, | |
| "step": 950 | |
| }, | |
| { | |
| "completion_length": 339.66, | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": NaN, | |
| "kl": 174.28998464107514, | |
| "learning_rate": 4.776709152015443e-06, | |
| "loss": 6.9716, | |
| "reward": -17.22483383178711, | |
| "reward_std": 6.013938563764095, | |
| "rewards/check_first_pass": -9.816166725158691, | |
| "rewards/check_solution": -7.008333616256714, | |
| "rewards/check_solution_words": -6.318000204563141, | |
| "rewards/check_word_guesses": 5.9176667785644534, | |
| "step": 1000 | |
| }, | |
| { | |
| "completion_length": 299.26, | |
| "epoch": 0.23333333333333334, | |
| "grad_norm": NaN, | |
| "kl": 240.47271874427796, | |
| "learning_rate": 4.734951264513368e-06, | |
| "loss": 9.6189, | |
| "reward": -16.81516722679138, | |
| "reward_std": 5.74999471783638, | |
| "rewards/check_first_pass": -9.819500045776367, | |
| "rewards/check_solution": -7.191666922569275, | |
| "rewards/check_solution_words": -5.3703335279226305, | |
| "rewards/check_word_guesses": 5.566333475112915, | |
| "step": 1050 | |
| }, | |
| { | |
| "completion_length": 290.3, | |
| "epoch": 0.24444444444444444, | |
| "grad_norm": NaN, | |
| "kl": 5930.2736493730545, | |
| "learning_rate": 4.689831798008002e-06, | |
| "loss": 237.2109, | |
| "reward": -16.739000663757324, | |
| "reward_std": 6.145890753269196, | |
| "rewards/check_first_pass": -9.928000049591065, | |
| "rewards/check_solution": -7.291666860580444, | |
| "rewards/check_solution_words": -5.206000205874443, | |
| "rewards/check_word_guesses": 5.686666803359985, | |
| "step": 1100 | |
| }, | |
| { | |
| "completion_length": 313.16, | |
| "epoch": 0.25555555555555554, | |
| "grad_norm": NaN, | |
| "kl": 790.292287569046, | |
| "learning_rate": 4.641418616462938e-06, | |
| "loss": 31.6117, | |
| "reward": -18.431500701904298, | |
| "reward_std": 5.68714599609375, | |
| "rewards/check_first_pass": -9.87116668701172, | |
| "rewards/check_solution": -7.8250002098083495, | |
| "rewards/check_solution_words": -6.450666869878769, | |
| "rewards/check_word_guesses": 5.7153334808349605, | |
| "step": 1150 | |
| }, | |
| { | |
| "completion_length": 288.62, | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": NaN, | |
| "kl": 299.9416353178024, | |
| "learning_rate": 4.589784537902062e-06, | |
| "loss": 11.9977, | |
| "reward": -17.612167091369628, | |
| "reward_std": 5.032542688846588, | |
| "rewards/check_first_pass": -9.781500082015992, | |
| "rewards/check_solution": -7.425000247955322, | |
| "rewards/check_solution_words": -6.234666793346405, | |
| "rewards/check_word_guesses": 5.829000115394592, | |
| "step": 1200 | |
| }, | |
| { | |
| "completion_length": 322.98, | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": NaN, | |
| "kl": 2793.8524017858504, | |
| "learning_rate": 4.53500722488433e-06, | |
| "loss": 111.7541, | |
| "reward": -17.664333724975585, | |
| "reward_std": 5.747455310821533, | |
| "rewards/check_first_pass": -9.923000030517578, | |
| "rewards/check_solution": -7.4000002384185795, | |
| "rewards/check_solution_words": -6.01900016926229, | |
| "rewards/check_word_guesses": 5.6776668119430544, | |
| "step": 1250 | |
| }, | |
| { | |
| "completion_length": 339.3, | |
| "epoch": 0.28888888888888886, | |
| "grad_norm": NaN, | |
| "kl": 3813.7429452037813, | |
| "learning_rate": 4.477169067691902e-06, | |
| "loss": 152.5497, | |
| "reward": -17.690833921432496, | |
| "reward_std": 5.353043854236603, | |
| "rewards/check_first_pass": -9.892833366394044, | |
| "rewards/check_solution": -7.600000247955323, | |
| "rewards/check_solution_words": -5.904333523511887, | |
| "rewards/check_word_guesses": 5.706333441734314, | |
| "step": 1300 | |
| }, | |
| { | |
| "completion_length": 310.96, | |
| "epoch": 0.3, | |
| "grad_norm": NaN, | |
| "kl": 304.47424302577974, | |
| "learning_rate": 4.416357060407332e-06, | |
| "loss": 12.179, | |
| "reward": -17.26916711807251, | |
| "reward_std": 5.263389755487442, | |
| "rewards/check_first_pass": -9.854500017166139, | |
| "rewards/check_solution": -7.250000243186951, | |
| "rewards/check_solution_words": -5.872333557605743, | |
| "rewards/check_word_guesses": 5.707666797637939, | |
| "step": 1350 | |
| }, | |
| { | |
| "completion_length": 315.5, | |
| "epoch": 0.3111111111111111, | |
| "grad_norm": NaN, | |
| "kl": 13639.832137713433, | |
| "learning_rate": 4.3526626700662e-06, | |
| "loss": 545.5934, | |
| "reward": -18.18800064086914, | |
| "reward_std": 6.097169952392578, | |
| "rewards/check_first_pass": -9.844333381652833, | |
| "rewards/check_solution": -7.566666946411133, | |
| "rewards/check_solution_words": -6.689333482980728, | |
| "rewards/check_word_guesses": 5.91233346939087, | |
| "step": 1400 | |
| }, | |
| { | |
| "completion_length": 351.12, | |
| "epoch": 0.32222222222222224, | |
| "grad_norm": NaN, | |
| "kl": 2326.0249900770186, | |
| "learning_rate": 4.286181699082008e-06, | |
| "loss": 93.041, | |
| "reward": -18.623000659942626, | |
| "reward_std": 6.812479295730591, | |
| "rewards/check_first_pass": -9.90633337020874, | |
| "rewards/check_solution": -7.350000295639038, | |
| "rewards/check_solution_words": -7.338333506584167, | |
| "rewards/check_word_guesses": 5.971666851043701, | |
| "step": 1450 | |
| }, | |
| { | |
| "completion_length": 366.32, | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": NaN, | |
| "kl": 16549.004248199464, | |
| "learning_rate": 4.217014141150248e-06, | |
| "loss": 661.9602, | |
| "reward": -18.263500604629517, | |
| "reward_std": 6.059882239103318, | |
| "rewards/check_first_pass": -9.878166694641113, | |
| "rewards/check_solution": -7.3666668796539305, | |
| "rewards/check_solution_words": -6.7500001257658, | |
| "rewards/check_word_guesses": 5.731333417892456, | |
| "step": 1500 | |
| }, | |
| { | |
| "completion_length": 320.5, | |
| "epoch": 0.34444444444444444, | |
| "grad_norm": NaN, | |
| "kl": 8347.091685709953, | |
| "learning_rate": 4.145264030848381e-06, | |
| "loss": 333.8836, | |
| "reward": -17.722667150497436, | |
| "reward_std": 5.240287501811981, | |
| "rewards/check_first_pass": -9.976666679382324, | |
| "rewards/check_solution": -7.566666932106018, | |
| "rewards/check_solution_words": -5.952333456873894, | |
| "rewards/check_word_guesses": 5.773000164031982, | |
| "step": 1550 | |
| }, | |
| { | |
| "completion_length": 313.42, | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": NaN, | |
| "kl": 864.0743899011612, | |
| "learning_rate": 4.071039287157953e-06, | |
| "loss": 34.563, | |
| "reward": -17.983000602722168, | |
| "reward_std": 5.850111997127533, | |
| "rewards/check_first_pass": -9.918333358764649, | |
| "rewards/check_solution": -7.208333625793457, | |
| "rewards/check_solution_words": -6.453333538174629, | |
| "rewards/check_word_guesses": 5.597000193595886, | |
| "step": 1600 | |
| }, | |
| { | |
| "completion_length": 329.72, | |
| "epoch": 0.36666666666666664, | |
| "grad_norm": NaN, | |
| "kl": 396.7952742242813, | |
| "learning_rate": 3.9944515511441995e-06, | |
| "loss": 15.8718, | |
| "reward": -16.43366714477539, | |
| "reward_std": 7.244253120422363, | |
| "rewards/check_first_pass": -9.88666669845581, | |
| "rewards/check_solution": -6.900000200271607, | |
| "rewards/check_solution_words": -5.581333435922861, | |
| "rewards/check_word_guesses": 5.93433349609375, | |
| "step": 1650 | |
| }, | |
| { | |
| "completion_length": 295.6, | |
| "epoch": 0.37777777777777777, | |
| "grad_norm": NaN, | |
| "kl": 2458.04032143116, | |
| "learning_rate": 3.915616018037271e-06, | |
| "loss": 98.3216, | |
| "reward": -16.582167387008667, | |
| "reward_std": 6.116619675159455, | |
| "rewards/check_first_pass": -9.845500040054322, | |
| "rewards/check_solution": -7.29166687965393, | |
| "rewards/check_solution_words": -5.515666830142339, | |
| "rewards/check_word_guesses": 6.0706668472290035, | |
| "step": 1700 | |
| }, | |
| { | |
| "completion_length": 305.44, | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": NaN, | |
| "kl": 6297.921968564987, | |
| "learning_rate": 3.834651263967667e-06, | |
| "loss": 251.9169, | |
| "reward": -17.544833850860595, | |
| "reward_std": 6.234307850599289, | |
| "rewards/check_first_pass": -9.903166675567627, | |
| "rewards/check_solution": -7.258333530426025, | |
| "rewards/check_solution_words": -6.216000239551067, | |
| "rewards/check_word_guesses": 5.832666802406311, | |
| "step": 1750 | |
| }, | |
| { | |
| "completion_length": 293.7, | |
| "epoch": 0.4, | |
| "grad_norm": NaN, | |
| "kl": 4064.312862081528, | |
| "learning_rate": 3.7516790676164795e-06, | |
| "loss": 162.5725, | |
| "reward": -17.36033399581909, | |
| "reward_std": 5.197294096946717, | |
| "rewards/check_first_pass": -9.983333339691162, | |
| "rewards/check_solution": -7.516666932106018, | |
| "rewards/check_solution_words": -5.423000110387802, | |
| "rewards/check_word_guesses": 5.562666816711426, | |
| "step": 1800 | |
| }, | |
| { | |
| "completion_length": 320.76, | |
| "epoch": 0.4111111111111111, | |
| "grad_norm": NaN, | |
| "kl": 4698.212507400513, | |
| "learning_rate": 3.6668242270486736e-06, | |
| "loss": 187.9285, | |
| "reward": -17.611333808898927, | |
| "reward_std": 5.37955255150795, | |
| "rewards/check_first_pass": -9.976666679382324, | |
| "rewards/check_solution": -7.441666889190674, | |
| "rewards/check_solution_words": -6.205666851997376, | |
| "rewards/check_word_guesses": 6.012666845321656, | |
| "step": 1850 | |
| }, | |
| { | |
| "completion_length": 318.02, | |
| "epoch": 0.4222222222222222, | |
| "grad_norm": NaN, | |
| "kl": 467.2588349723816, | |
| "learning_rate": 3.5802143720049565e-06, | |
| "loss": 18.6904, | |
| "reward": -18.40666706085205, | |
| "reward_std": 5.797463660240173, | |
| "rewards/check_first_pass": -9.95166669845581, | |
| "rewards/check_solution": -7.400000267028808, | |
| "rewards/check_solution_words": -6.630000138878822, | |
| "rewards/check_word_guesses": 5.575000162124634, | |
| "step": 1900 | |
| }, | |
| { | |
| "completion_length": 317.28, | |
| "epoch": 0.43333333333333335, | |
| "grad_norm": NaN, | |
| "kl": 889510.5962282228, | |
| "learning_rate": 3.4919797719345172e-06, | |
| "loss": 35580.425, | |
| "reward": -17.448667163848878, | |
| "reward_std": 4.699667553901673, | |
| "rewards/check_first_pass": -9.881666679382324, | |
| "rewards/check_solution": -7.475000233650207, | |
| "rewards/check_solution_words": -5.838000079877674, | |
| "rewards/check_word_guesses": 5.746000151634217, | |
| "step": 1950 | |
| }, | |
| { | |
| "completion_length": 302.48, | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": NaN, | |
| "kl": 5604.918187556267, | |
| "learning_rate": 3.402253140057402e-06, | |
| "loss": 224.1967, | |
| "reward": -16.404333744049072, | |
| "reward_std": 5.4958923101425174, | |
| "rewards/check_first_pass": -9.890000019073486, | |
| "rewards/check_solution": -7.033333592414856, | |
| "rewards/check_solution_words": -5.368000164031982, | |
| "rewards/check_word_guesses": 5.887000150680542, | |
| "step": 2000 | |
| }, | |
| { | |
| "completion_length": 321.74, | |
| "epoch": 0.45555555555555555, | |
| "grad_norm": NaN, | |
| "kl": 7411.243695282936, | |
| "learning_rate": 3.311169433751226e-06, | |
| "loss": 296.4498, | |
| "reward": -16.623500537872314, | |
| "reward_std": 6.51646169424057, | |
| "rewards/check_first_pass": -9.827833366394042, | |
| "rewards/check_solution": -6.908333498239517, | |
| "rewards/check_solution_words": -5.6363335295766595, | |
| "rewards/check_word_guesses": 5.749000191688538, | |
| "step": 2050 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |