| { | |
| "best_metric": 4.5732855796813965, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-100", | |
| "epoch": 1.3071895424836601, | |
| "eval_steps": 50, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013071895424836602, | |
| "grad_norm": 7.257218360900879, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 2.9113, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.013071895424836602, | |
| "eval_loss": 8.609292030334473, | |
| "eval_runtime": 4.5671, | |
| "eval_samples_per_second": 28.245, | |
| "eval_steps_per_second": 7.226, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.026143790849673203, | |
| "grad_norm": 8.341460227966309, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 3.8712, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0392156862745098, | |
| "grad_norm": 14.018800735473633, | |
| "learning_rate": 1.5e-06, | |
| "loss": 5.3191, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.05228758169934641, | |
| "grad_norm": 19.952796936035156, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 6.9201, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.06535947712418301, | |
| "grad_norm": 20.201190948486328, | |
| "learning_rate": 2.5e-06, | |
| "loss": 6.9843, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 21.60859489440918, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7391, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0915032679738562, | |
| "grad_norm": 24.700639724731445, | |
| "learning_rate": 3.5e-06, | |
| "loss": 8.2004, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.10457516339869281, | |
| "grad_norm": 22.307209014892578, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 7.6429, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 25.259756088256836, | |
| "learning_rate": 4.5e-06, | |
| "loss": 8.005, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 23.363004684448242, | |
| "learning_rate": 5e-06, | |
| "loss": 7.4913, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1437908496732026, | |
| "grad_norm": 22.28295135498047, | |
| "learning_rate": 4.99847706754774e-06, | |
| "loss": 7.4698, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 25.160226821899414, | |
| "learning_rate": 4.993910125649561e-06, | |
| "loss": 7.8227, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.16993464052287582, | |
| "grad_norm": 26.892242431640625, | |
| "learning_rate": 4.986304738420684e-06, | |
| "loss": 9.0196, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1830065359477124, | |
| "grad_norm": 27.60188865661621, | |
| "learning_rate": 4.975670171853926e-06, | |
| "loss": 8.9304, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 25.04266929626465, | |
| "learning_rate": 4.962019382530521e-06, | |
| "loss": 8.8492, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.20915032679738563, | |
| "grad_norm": 25.570220947265625, | |
| "learning_rate": 4.9453690018345144e-06, | |
| "loss": 9.2126, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 23.66245460510254, | |
| "learning_rate": 4.925739315689991e-06, | |
| "loss": 8.7148, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 28.739566802978516, | |
| "learning_rate": 4.903154239845798e-06, | |
| "loss": 10.0651, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.24836601307189543, | |
| "grad_norm": 31.82867431640625, | |
| "learning_rate": 4.8776412907378845e-06, | |
| "loss": 11.8835, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 7.091309070587158, | |
| "learning_rate": 4.849231551964771e-06, | |
| "loss": 2.8035, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.27450980392156865, | |
| "grad_norm": 9.146745681762695, | |
| "learning_rate": 4.817959636416969e-06, | |
| "loss": 4.0063, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.2875816993464052, | |
| "grad_norm": 14.22527027130127, | |
| "learning_rate": 4.783863644106502e-06, | |
| "loss": 5.5576, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.3006535947712418, | |
| "grad_norm": 18.277191162109375, | |
| "learning_rate": 4.746985115747918e-06, | |
| "loss": 6.0938, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 19.253273010253906, | |
| "learning_rate": 4.707368982147318e-06, | |
| "loss": 6.4934, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.32679738562091504, | |
| "grad_norm": 22.427473068237305, | |
| "learning_rate": 4.665063509461098e-06, | |
| "loss": 6.9725, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.33986928104575165, | |
| "grad_norm": 21.749441146850586, | |
| "learning_rate": 4.620120240391065e-06, | |
| "loss": 6.7396, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 22.339990615844727, | |
| "learning_rate": 4.572593931387604e-06, | |
| "loss": 6.6181, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.3660130718954248, | |
| "grad_norm": 25.734874725341797, | |
| "learning_rate": 4.522542485937369e-06, | |
| "loss": 6.3757, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.3790849673202614, | |
| "grad_norm": 23.32753562927246, | |
| "learning_rate": 4.470026884016805e-06, | |
| "loss": 6.9307, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 22.613615036010742, | |
| "learning_rate": 4.415111107797445e-06, | |
| "loss": 7.2312, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.40522875816993464, | |
| "grad_norm": 23.05530548095703, | |
| "learning_rate": 4.357862063693486e-06, | |
| "loss": 6.3992, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.41830065359477125, | |
| "grad_norm": 24.813129425048828, | |
| "learning_rate": 4.2983495008466285e-06, | |
| "loss": 6.8739, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.43137254901960786, | |
| "grad_norm": 25.304807662963867, | |
| "learning_rate": 4.236645926147493e-06, | |
| "loss": 7.928, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 25.62441635131836, | |
| "learning_rate": 4.172826515897146e-06, | |
| "loss": 7.3664, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.45751633986928103, | |
| "grad_norm": 24.42110824584961, | |
| "learning_rate": 4.106969024216348e-06, | |
| "loss": 7.7416, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 25.079715728759766, | |
| "learning_rate": 4.039153688314146e-06, | |
| "loss": 8.1472, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.48366013071895425, | |
| "grad_norm": 30.301733016967773, | |
| "learning_rate": 3.969463130731183e-06, | |
| "loss": 9.266, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.49673202614379086, | |
| "grad_norm": 28.850906372070312, | |
| "learning_rate": 3.897982258676867e-06, | |
| "loss": 8.429, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.5098039215686274, | |
| "grad_norm": 5.361105918884277, | |
| "learning_rate": 3.824798160583012e-06, | |
| "loss": 2.6361, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 6.960734844207764, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 3.2406, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5359477124183006, | |
| "grad_norm": 12.633935928344727, | |
| "learning_rate": 3.6736789069647273e-06, | |
| "loss": 5.5256, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 15.817687034606934, | |
| "learning_rate": 3.595927866972694e-06, | |
| "loss": 5.1769, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.5620915032679739, | |
| "grad_norm": 19.869218826293945, | |
| "learning_rate": 3.516841607689501e-06, | |
| "loss": 5.3096, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.5751633986928104, | |
| "grad_norm": 16.122995376586914, | |
| "learning_rate": 3.436516483539781e-06, | |
| "loss": 4.771, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 19.55790138244629, | |
| "learning_rate": 3.3550503583141726e-06, | |
| "loss": 4.5633, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6013071895424836, | |
| "grad_norm": 20.046592712402344, | |
| "learning_rate": 3.272542485937369e-06, | |
| "loss": 5.5604, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.6143790849673203, | |
| "grad_norm": 19.207847595214844, | |
| "learning_rate": 3.189093389542498e-06, | |
| "loss": 5.6587, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 18.39757537841797, | |
| "learning_rate": 3.1048047389991693e-06, | |
| "loss": 5.0743, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6405228758169934, | |
| "grad_norm": 22.641244888305664, | |
| "learning_rate": 3.019779227044398e-06, | |
| "loss": 6.5356, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 21.692182540893555, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "loss": 4.7347, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "eval_loss": 5.583864212036133, | |
| "eval_runtime": 4.6346, | |
| "eval_samples_per_second": 27.834, | |
| "eval_steps_per_second": 7.12, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 20.410966873168945, | |
| "learning_rate": 2.847932752400164e-06, | |
| "loss": 5.2445, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.6797385620915033, | |
| "grad_norm": 22.301136016845703, | |
| "learning_rate": 2.761321158169134e-06, | |
| "loss": 6.6047, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.6928104575163399, | |
| "grad_norm": 26.021312713623047, | |
| "learning_rate": 2.6743911843603134e-06, | |
| "loss": 7.225, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 25.37548065185547, | |
| "learning_rate": 2.587248741756253e-06, | |
| "loss": 7.1886, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.7189542483660131, | |
| "grad_norm": 24.194847106933594, | |
| "learning_rate": 2.5e-06, | |
| "loss": 7.4705, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7320261437908496, | |
| "grad_norm": 31.78634262084961, | |
| "learning_rate": 2.4127512582437486e-06, | |
| "loss": 7.6195, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.7450980392156863, | |
| "grad_norm": 33.419776916503906, | |
| "learning_rate": 2.325608815639687e-06, | |
| "loss": 8.226, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.7581699346405228, | |
| "grad_norm": 5.473607063293457, | |
| "learning_rate": 2.238678841830867e-06, | |
| "loss": 2.4786, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.7712418300653595, | |
| "grad_norm": 7.471803665161133, | |
| "learning_rate": 2.1520672475998374e-06, | |
| "loss": 3.0997, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 11.024681091308594, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "loss": 3.7581, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7973856209150327, | |
| "grad_norm": 16.229074478149414, | |
| "learning_rate": 1.9802207729556023e-06, | |
| "loss": 4.2436, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.8104575163398693, | |
| "grad_norm": 16.74787139892578, | |
| "learning_rate": 1.895195261000831e-06, | |
| "loss": 4.3475, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 15.532852172851562, | |
| "learning_rate": 1.8109066104575023e-06, | |
| "loss": 4.1585, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.8366013071895425, | |
| "grad_norm": 17.108413696289062, | |
| "learning_rate": 1.7274575140626318e-06, | |
| "loss": 4.5843, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8496732026143791, | |
| "grad_norm": 19.03700065612793, | |
| "learning_rate": 1.6449496416858285e-06, | |
| "loss": 4.5228, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 18.226158142089844, | |
| "learning_rate": 1.56348351646022e-06, | |
| "loss": 4.3519, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.8758169934640523, | |
| "grad_norm": 17.857391357421875, | |
| "learning_rate": 1.4831583923105e-06, | |
| "loss": 4.3457, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 18.92527198791504, | |
| "learning_rate": 1.4040721330273063e-06, | |
| "loss": 4.9343, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.9019607843137255, | |
| "grad_norm": 21.539527893066406, | |
| "learning_rate": 1.3263210930352737e-06, | |
| "loss": 4.5055, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 18.269908905029297, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "loss": 4.9476, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9281045751633987, | |
| "grad_norm": 23.49378204345703, | |
| "learning_rate": 1.1752018394169882e-06, | |
| "loss": 5.0107, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 25.287702560424805, | |
| "learning_rate": 1.1020177413231334e-06, | |
| "loss": 5.5347, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.954248366013072, | |
| "grad_norm": 21.355749130249023, | |
| "learning_rate": 1.0305368692688175e-06, | |
| "loss": 5.5062, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.9673202614379085, | |
| "grad_norm": 25.698564529418945, | |
| "learning_rate": 9.608463116858544e-07, | |
| "loss": 6.4095, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 27.62260627746582, | |
| "learning_rate": 8.930309757836517e-07, | |
| "loss": 6.5146, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.9934640522875817, | |
| "grad_norm": 28.68571662902832, | |
| "learning_rate": 8.271734841028553e-07, | |
| "loss": 6.5249, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.0065359477124183, | |
| "grad_norm": 18.36124610900879, | |
| "learning_rate": 7.633540738525066e-07, | |
| "loss": 5.1321, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.0196078431372548, | |
| "grad_norm": 6.567688941955566, | |
| "learning_rate": 7.016504991533727e-07, | |
| "loss": 2.5439, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.0326797385620916, | |
| "grad_norm": 8.866209983825684, | |
| "learning_rate": 6.421379363065142e-07, | |
| "loss": 3.0478, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.0457516339869282, | |
| "grad_norm": 13.741599082946777, | |
| "learning_rate": 5.848888922025553e-07, | |
| "loss": 3.9493, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 14.087339401245117, | |
| "learning_rate": 5.299731159831953e-07, | |
| "loss": 3.115, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.0718954248366013, | |
| "grad_norm": 18.528522491455078, | |
| "learning_rate": 4.774575140626317e-07, | |
| "loss": 4.9909, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.0849673202614378, | |
| "grad_norm": 17.856454849243164, | |
| "learning_rate": 4.27406068612396e-07, | |
| "loss": 4.0747, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.0980392156862746, | |
| "grad_norm": 19.685808181762695, | |
| "learning_rate": 3.798797596089351e-07, | |
| "loss": 4.2566, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 18.7196102142334, | |
| "learning_rate": 3.3493649053890325e-07, | |
| "loss": 4.0926, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.1241830065359477, | |
| "grad_norm": 18.23643684387207, | |
| "learning_rate": 2.9263101785268253e-07, | |
| "loss": 4.174, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.1372549019607843, | |
| "grad_norm": 19.65151596069336, | |
| "learning_rate": 2.53014884252083e-07, | |
| "loss": 4.9878, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.1503267973856208, | |
| "grad_norm": 20.80632781982422, | |
| "learning_rate": 2.1613635589349756e-07, | |
| "loss": 4.5691, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.1633986928104576, | |
| "grad_norm": 19.64034652709961, | |
| "learning_rate": 1.8204036358303173e-07, | |
| "loss": 4.4191, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 20.260753631591797, | |
| "learning_rate": 1.507684480352292e-07, | |
| "loss": 4.5841, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1895424836601307, | |
| "grad_norm": 23.664470672607422, | |
| "learning_rate": 1.223587092621162e-07, | |
| "loss": 5.0259, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.2026143790849673, | |
| "grad_norm": 23.812976837158203, | |
| "learning_rate": 9.684576015420277e-08, | |
| "loss": 5.6728, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.215686274509804, | |
| "grad_norm": 29.3427791595459, | |
| "learning_rate": 7.426068431000883e-08, | |
| "loss": 7.135, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.2287581699346406, | |
| "grad_norm": 26.24110221862793, | |
| "learning_rate": 5.463099816548578e-08, | |
| "loss": 6.2265, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.2418300653594772, | |
| "grad_norm": 32.70111846923828, | |
| "learning_rate": 3.798061746947995e-08, | |
| "loss": 6.5576, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.2549019607843137, | |
| "grad_norm": 19.617277145385742, | |
| "learning_rate": 2.4329828146074096e-08, | |
| "loss": 4.6098, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.2679738562091503, | |
| "grad_norm": 7.279455661773682, | |
| "learning_rate": 1.3695261579316776e-08, | |
| "loss": 2.8293, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.2810457516339868, | |
| "grad_norm": 9.745218276977539, | |
| "learning_rate": 6.089874350439507e-09, | |
| "loss": 3.1096, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 16.444101333618164, | |
| "learning_rate": 1.5229324522605949e-09, | |
| "loss": 4.1709, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.3071895424836601, | |
| "grad_norm": 16.58370018005371, | |
| "learning_rate": 0.0, | |
| "loss": 4.0866, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3071895424836601, | |
| "eval_loss": 4.5732855796813965, | |
| "eval_runtime": 4.6201, | |
| "eval_samples_per_second": 27.921, | |
| "eval_steps_per_second": 7.143, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.931412223963955e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |