| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.3473684210526315, | |
| "eval_steps": 50, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013473684210526317, | |
| "grad_norm": 10.973633766174316, | |
| "learning_rate": 7.499999999999999e-06, | |
| "loss": 2.1479, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.013473684210526317, | |
| "eval_loss": 2.5371525287628174, | |
| "eval_runtime": 8.4173, | |
| "eval_samples_per_second": 29.701, | |
| "eval_steps_per_second": 14.85, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.026947368421052633, | |
| "grad_norm": 9.272090911865234, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 2.0187, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.04042105263157895, | |
| "grad_norm": 7.878329753875732, | |
| "learning_rate": 2.2499999999999998e-05, | |
| "loss": 2.1563, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.053894736842105266, | |
| "grad_norm": 7.982365608215332, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 2.282, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.06736842105263158, | |
| "grad_norm": 2.2053937911987305, | |
| "learning_rate": 3.75e-05, | |
| "loss": 2.1851, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0808421052631579, | |
| "grad_norm": 1.8837133646011353, | |
| "learning_rate": 4.4999999999999996e-05, | |
| "loss": 2.112, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.09431578947368421, | |
| "grad_norm": 1.4526352882385254, | |
| "learning_rate": 5.2499999999999995e-05, | |
| "loss": 2.0328, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.10778947368421053, | |
| "grad_norm": 1.4846237897872925, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 1.8783, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.12126315789473684, | |
| "grad_norm": 1.9902074337005615, | |
| "learning_rate": 6.75e-05, | |
| "loss": 1.9695, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.13473684210526315, | |
| "grad_norm": 1.404717206954956, | |
| "learning_rate": 7.5e-05, | |
| "loss": 1.9514, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1482105263157895, | |
| "grad_norm": 1.4216639995574951, | |
| "learning_rate": 8.25e-05, | |
| "loss": 1.8557, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1616842105263158, | |
| "grad_norm": 1.4013230800628662, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 1.8424, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.1751578947368421, | |
| "grad_norm": 1.1717782020568848, | |
| "learning_rate": 9.75e-05, | |
| "loss": 1.8625, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.18863157894736843, | |
| "grad_norm": 1.7480112314224243, | |
| "learning_rate": 0.00010499999999999999, | |
| "loss": 1.9016, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.20210526315789473, | |
| "grad_norm": 1.1352505683898926, | |
| "learning_rate": 0.0001125, | |
| "loss": 1.8889, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.21557894736842106, | |
| "grad_norm": 1.1477811336517334, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 1.5794, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.22905263157894737, | |
| "grad_norm": 1.3586668968200684, | |
| "learning_rate": 0.00012749999999999998, | |
| "loss": 1.5011, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.24252631578947367, | |
| "grad_norm": 0.8325414657592773, | |
| "learning_rate": 0.000135, | |
| "loss": 1.6469, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 1.5747898817062378, | |
| "learning_rate": 0.0001425, | |
| "loss": 1.6895, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.2694736842105263, | |
| "grad_norm": 0.9997685551643372, | |
| "learning_rate": 0.00015, | |
| "loss": 1.5248, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2829473684210526, | |
| "grad_norm": 1.195119857788086, | |
| "learning_rate": 0.00014994217771805422, | |
| "loss": 1.5649, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.296421052631579, | |
| "grad_norm": 0.8751718401908875, | |
| "learning_rate": 0.00014976880002998458, | |
| "loss": 1.5405, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.3098947368421053, | |
| "grad_norm": 0.8566117882728577, | |
| "learning_rate": 0.00014948013427161947, | |
| "loss": 1.5504, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3233684210526316, | |
| "grad_norm": 0.7322584390640259, | |
| "learning_rate": 0.00014907662554463532, | |
| "loss": 1.5034, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 0.9539948105812073, | |
| "learning_rate": 0.00014855889603024227, | |
| "loss": 1.4513, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.3503157894736842, | |
| "grad_norm": 0.7042058110237122, | |
| "learning_rate": 0.00014792774402982574, | |
| "loss": 1.5281, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.36378947368421055, | |
| "grad_norm": 0.6478146910667419, | |
| "learning_rate": 0.0001471841427340235, | |
| "loss": 1.5117, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.37726315789473686, | |
| "grad_norm": 0.6267299652099609, | |
| "learning_rate": 0.00014632923872213652, | |
| "loss": 1.383, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.39073684210526316, | |
| "grad_norm": 0.7713648676872253, | |
| "learning_rate": 0.0001453643501941863, | |
| "loss": 1.4844, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.40421052631578946, | |
| "grad_norm": 0.6838952898979187, | |
| "learning_rate": 0.0001442909649383465, | |
| "loss": 1.4825, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.41768421052631577, | |
| "grad_norm": 0.71690434217453, | |
| "learning_rate": 0.0001431107380368811, | |
| "loss": 1.4357, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.43115789473684213, | |
| "grad_norm": 0.6745509505271912, | |
| "learning_rate": 0.00014182548931412757, | |
| "loss": 1.4733, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.44463157894736843, | |
| "grad_norm": 0.7103040814399719, | |
| "learning_rate": 0.0001404372005304598, | |
| "loss": 1.3857, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.45810526315789474, | |
| "grad_norm": 0.6221896409988403, | |
| "learning_rate": 0.0001389480123265569, | |
| "loss": 1.2527, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.47157894736842104, | |
| "grad_norm": 0.562971293926239, | |
| "learning_rate": 0.0001373602209226909, | |
| "loss": 1.4486, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.48505263157894735, | |
| "grad_norm": 0.5778741240501404, | |
| "learning_rate": 0.00013567627457812106, | |
| "loss": 1.4134, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.4985263157894737, | |
| "grad_norm": 0.5704385042190552, | |
| "learning_rate": 0.00013389876981605584, | |
| "loss": 1.354, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.6227774024009705, | |
| "learning_rate": 0.00013203044742000233, | |
| "loss": 1.5718, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.5254736842105263, | |
| "grad_norm": 0.6505720615386963, | |
| "learning_rate": 0.0001300741882076764, | |
| "loss": 1.4278, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.5389473684210526, | |
| "grad_norm": 0.5715638995170593, | |
| "learning_rate": 0.00012803300858899104, | |
| "loss": 1.478, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5524210526315789, | |
| "grad_norm": 0.6021521091461182, | |
| "learning_rate": 0.00012591005591497064, | |
| "loss": 1.3556, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.5658947368421052, | |
| "grad_norm": 0.6821895837783813, | |
| "learning_rate": 0.00012370860362476374, | |
| "loss": 1.4947, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.5793684210526315, | |
| "grad_norm": 0.5453934073448181, | |
| "learning_rate": 0.00012143204619823755, | |
| "loss": 1.2477, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.592842105263158, | |
| "grad_norm": 0.6702715754508972, | |
| "learning_rate": 0.00011908389392193547, | |
| "loss": 1.4835, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.6063157894736843, | |
| "grad_norm": 0.6350681185722351, | |
| "learning_rate": 0.00011666776747647015, | |
| "loss": 1.3993, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6197894736842106, | |
| "grad_norm": 0.6066803336143494, | |
| "learning_rate": 0.00011418739235369615, | |
| "loss": 1.3974, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.6332631578947369, | |
| "grad_norm": 0.5659217238426208, | |
| "learning_rate": 0.00011164659311227163, | |
| "loss": 1.3268, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.6467368421052632, | |
| "grad_norm": 0.5807419419288635, | |
| "learning_rate": 0.00010904928748046599, | |
| "loss": 1.3408, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6602105263157895, | |
| "grad_norm": 0.6375626921653748, | |
| "learning_rate": 0.0001063994803153071, | |
| "loss": 1.4854, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 0.6418893933296204, | |
| "learning_rate": 0.00010370125742738173, | |
| "loss": 1.3679, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "eval_loss": 1.499872088432312, | |
| "eval_runtime": 8.4304, | |
| "eval_samples_per_second": 29.654, | |
| "eval_steps_per_second": 14.827, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6871578947368421, | |
| "grad_norm": 0.7283876538276672, | |
| "learning_rate": 0.00010095877928081196, | |
| "loss": 1.4891, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.7006315789473684, | |
| "grad_norm": 0.5404418706893921, | |
| "learning_rate": 9.817627457812105e-05, | |
| "loss": 1.0949, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.7141052631578947, | |
| "grad_norm": 0.6369166970252991, | |
| "learning_rate": 9.535803373988056e-05, | |
| "loss": 1.5966, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.7275789473684211, | |
| "grad_norm": 0.8547583818435669, | |
| "learning_rate": 9.25084022891929e-05, | |
| "loss": 1.3016, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.7410526315789474, | |
| "grad_norm": 0.6199703812599182, | |
| "learning_rate": 8.963177415120962e-05, | |
| "loss": 1.5274, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7545263157894737, | |
| "grad_norm": 0.6474433541297913, | |
| "learning_rate": 8.673258487801731e-05, | |
| "loss": 1.3812, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.5689646601676941, | |
| "learning_rate": 8.381530480933783e-05, | |
| "loss": 1.4005, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.7814736842105263, | |
| "grad_norm": 0.5995835661888123, | |
| "learning_rate": 8.088443217958837e-05, | |
| "loss": 1.2694, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.7949473684210526, | |
| "grad_norm": 0.5566097497940063, | |
| "learning_rate": 7.794448618193015e-05, | |
| "loss": 1.4341, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.8084210526315789, | |
| "grad_norm": 0.6118280291557312, | |
| "learning_rate": 7.5e-05, | |
| "loss": 1.378, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8218947368421052, | |
| "grad_norm": 0.5662732124328613, | |
| "learning_rate": 7.205551381806987e-05, | |
| "loss": 1.4138, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.8353684210526315, | |
| "grad_norm": 0.5312877893447876, | |
| "learning_rate": 6.911556782041163e-05, | |
| "loss": 1.4356, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.8488421052631578, | |
| "grad_norm": 0.5712493062019348, | |
| "learning_rate": 6.618469519066217e-05, | |
| "loss": 1.4667, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.8623157894736843, | |
| "grad_norm": 0.5462284684181213, | |
| "learning_rate": 6.326741512198266e-05, | |
| "loss": 1.4505, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8757894736842106, | |
| "grad_norm": 0.6210593581199646, | |
| "learning_rate": 6.036822584879038e-05, | |
| "loss": 1.2947, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8892631578947369, | |
| "grad_norm": 0.6453770399093628, | |
| "learning_rate": 5.7491597710807114e-05, | |
| "loss": 1.3575, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.9027368421052632, | |
| "grad_norm": 0.6173303127288818, | |
| "learning_rate": 5.464196626011943e-05, | |
| "loss": 1.3685, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.9162105263157895, | |
| "grad_norm": 0.6161783933639526, | |
| "learning_rate": 5.182372542187895e-05, | |
| "loss": 1.5084, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.9296842105263158, | |
| "grad_norm": 0.5926702618598938, | |
| "learning_rate": 4.904122071918801e-05, | |
| "loss": 1.5106, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.9431578947368421, | |
| "grad_norm": 0.7311588525772095, | |
| "learning_rate": 4.6298742572618266e-05, | |
| "loss": 1.3789, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9566315789473684, | |
| "grad_norm": 0.5569392442703247, | |
| "learning_rate": 4.360051968469291e-05, | |
| "loss": 1.2037, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.9701052631578947, | |
| "grad_norm": 0.49740126729011536, | |
| "learning_rate": 4.095071251953399e-05, | |
| "loss": 1.3472, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.983578947368421, | |
| "grad_norm": 0.5706843733787537, | |
| "learning_rate": 3.83534068877284e-05, | |
| "loss": 1.4041, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.9970526315789474, | |
| "grad_norm": 0.5967234373092651, | |
| "learning_rate": 3.5812607646303834e-05, | |
| "loss": 1.31, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.0105263157894737, | |
| "grad_norm": 1.052331566810608, | |
| "learning_rate": 3.333223252352985e-05, | |
| "loss": 2.0664, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.6153193712234497, | |
| "learning_rate": 3.091610607806452e-05, | |
| "loss": 1.516, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.0374736842105263, | |
| "grad_norm": 0.5821354389190674, | |
| "learning_rate": 2.856795380176244e-05, | |
| "loss": 1.2732, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.0509473684210526, | |
| "grad_norm": 0.6261878609657288, | |
| "learning_rate": 2.6291396375236232e-05, | |
| "loss": 1.2817, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.064421052631579, | |
| "grad_norm": 0.5795064568519592, | |
| "learning_rate": 2.4089944085029363e-05, | |
| "loss": 1.3216, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.0778947368421052, | |
| "grad_norm": 0.5134410262107849, | |
| "learning_rate": 2.1966991411008938e-05, | |
| "loss": 1.2917, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0913684210526315, | |
| "grad_norm": 0.6312588453292847, | |
| "learning_rate": 1.99258117923236e-05, | |
| "loss": 1.1945, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.1048421052631578, | |
| "grad_norm": 0.5317590832710266, | |
| "learning_rate": 1.796955257999768e-05, | |
| "loss": 1.2838, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.1183157894736842, | |
| "grad_norm": 0.6899747252464294, | |
| "learning_rate": 1.6101230183944144e-05, | |
| "loss": 1.3589, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.1317894736842105, | |
| "grad_norm": 0.5604771971702576, | |
| "learning_rate": 1.4323725421878949e-05, | |
| "loss": 1.1972, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.1452631578947368, | |
| "grad_norm": 0.6752080917358398, | |
| "learning_rate": 1.2639779077309098e-05, | |
| "loss": 1.5739, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.158736842105263, | |
| "grad_norm": 0.6165206432342529, | |
| "learning_rate": 1.1051987673443085e-05, | |
| "loss": 1.2631, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.1722105263157894, | |
| "grad_norm": 0.61894291639328, | |
| "learning_rate": 9.56279946954021e-06, | |
| "loss": 1.3167, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.1856842105263157, | |
| "grad_norm": 0.647057294845581, | |
| "learning_rate": 8.174510685872415e-06, | |
| "loss": 1.3185, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.1991578947368422, | |
| "grad_norm": 0.5210772752761841, | |
| "learning_rate": 6.889261963118898e-06, | |
| "loss": 1.2242, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.2126315789473685, | |
| "grad_norm": 0.5150516033172607, | |
| "learning_rate": 5.709035061653494e-06, | |
| "loss": 1.2346, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.2261052631578948, | |
| "grad_norm": 0.6364325284957886, | |
| "learning_rate": 4.635649805813696e-06, | |
| "loss": 1.3149, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.2395789473684211, | |
| "grad_norm": 0.6105322241783142, | |
| "learning_rate": 3.670761277863485e-06, | |
| "loss": 1.2594, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.2530526315789474, | |
| "grad_norm": 0.6162554621696472, | |
| "learning_rate": 2.815857265976462e-06, | |
| "loss": 1.277, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.2665263157894737, | |
| "grad_norm": 0.5331520438194275, | |
| "learning_rate": 2.072255970174258e-06, | |
| "loss": 1.2193, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.5716810822486877, | |
| "learning_rate": 1.4411039697577175e-06, | |
| "loss": 1.4455, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.2934736842105263, | |
| "grad_norm": 0.56926029920578, | |
| "learning_rate": 9.233744553646754e-07, | |
| "loss": 1.291, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.3069473684210526, | |
| "grad_norm": 0.5255064964294434, | |
| "learning_rate": 5.198657283805279e-07, | |
| "loss": 1.2986, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.320421052631579, | |
| "grad_norm": 0.5318115949630737, | |
| "learning_rate": 2.311999700154027e-07, | |
| "loss": 1.2152, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.3338947368421052, | |
| "grad_norm": 0.5788165330886841, | |
| "learning_rate": 5.7822281945782424e-08, | |
| "loss": 1.4851, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.3473684210526315, | |
| "grad_norm": 0.5943503379821777, | |
| "learning_rate": 0.0, | |
| "loss": 1.1959, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3473684210526315, | |
| "eval_loss": 1.457924246788025, | |
| "eval_runtime": 8.4236, | |
| "eval_samples_per_second": 29.678, | |
| "eval_steps_per_second": 14.839, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 9, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.33416392081408e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |