| { | |
| "best_metric": 1.1826926469802856, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-200", | |
| "epoch": 1.008816120906801, | |
| "eval_steps": 50, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005037783375314861, | |
| "grad_norm": 1.5144639015197754, | |
| "learning_rate": 5e-06, | |
| "loss": 1.4876, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005037783375314861, | |
| "eval_loss": 1.4588134288787842, | |
| "eval_runtime": 19.3448, | |
| "eval_samples_per_second": 17.317, | |
| "eval_steps_per_second": 8.685, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010075566750629723, | |
| "grad_norm": 1.5605663061141968, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4583, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.015113350125944584, | |
| "grad_norm": 1.5412099361419678, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.4862, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.020151133501259445, | |
| "grad_norm": 1.592400074005127, | |
| "learning_rate": 2e-05, | |
| "loss": 1.491, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02518891687657431, | |
| "grad_norm": 1.6345747709274292, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.446, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.030226700251889168, | |
| "grad_norm": 1.556227684020996, | |
| "learning_rate": 3e-05, | |
| "loss": 1.4938, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03526448362720403, | |
| "grad_norm": 1.8232054710388184, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.5025, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04030226700251889, | |
| "grad_norm": 1.6661618947982788, | |
| "learning_rate": 4e-05, | |
| "loss": 1.4873, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04534005037783375, | |
| "grad_norm": 1.6999740600585938, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.4715, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05037783375314862, | |
| "grad_norm": 2.0362396240234375, | |
| "learning_rate": 5e-05, | |
| "loss": 1.457, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.055415617128463476, | |
| "grad_norm": 1.5522006750106812, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 1.4636, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.060453400503778336, | |
| "grad_norm": 1.7916418313980103, | |
| "learning_rate": 6e-05, | |
| "loss": 1.4713, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0654911838790932, | |
| "grad_norm": 1.7970147132873535, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 1.4752, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07052896725440806, | |
| "grad_norm": 1.7493449449539185, | |
| "learning_rate": 7e-05, | |
| "loss": 1.4507, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07556675062972293, | |
| "grad_norm": 1.8799161911010742, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 1.469, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08060453400503778, | |
| "grad_norm": 1.8201518058776855, | |
| "learning_rate": 8e-05, | |
| "loss": 1.4958, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08564231738035265, | |
| "grad_norm": 1.7718278169631958, | |
| "learning_rate": 8.5e-05, | |
| "loss": 1.4514, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0906801007556675, | |
| "grad_norm": 1.7185187339782715, | |
| "learning_rate": 9e-05, | |
| "loss": 1.412, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09571788413098237, | |
| "grad_norm": 1.7553571462631226, | |
| "learning_rate": 9.5e-05, | |
| "loss": 1.4059, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10075566750629723, | |
| "grad_norm": 1.823019027709961, | |
| "learning_rate": 0.0001, | |
| "loss": 1.463, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10579345088161209, | |
| "grad_norm": 1.7108038663864136, | |
| "learning_rate": 9.999238475781957e-05, | |
| "loss": 1.4652, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11083123425692695, | |
| "grad_norm": 1.8570573329925537, | |
| "learning_rate": 9.99695413509548e-05, | |
| "loss": 1.4293, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.11586901763224182, | |
| "grad_norm": 1.6678049564361572, | |
| "learning_rate": 9.99314767377287e-05, | |
| "loss": 1.4303, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.12090680100755667, | |
| "grad_norm": 1.732181191444397, | |
| "learning_rate": 9.987820251299122e-05, | |
| "loss": 1.4409, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.12594458438287154, | |
| "grad_norm": 1.7983189821243286, | |
| "learning_rate": 9.980973490458728e-05, | |
| "loss": 1.488, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1309823677581864, | |
| "grad_norm": 1.9346188306808472, | |
| "learning_rate": 9.972609476841367e-05, | |
| "loss": 1.4189, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.13602015113350127, | |
| "grad_norm": 1.6753783226013184, | |
| "learning_rate": 9.962730758206611e-05, | |
| "loss": 1.3829, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.14105793450881612, | |
| "grad_norm": 1.5368069410324097, | |
| "learning_rate": 9.951340343707852e-05, | |
| "loss": 1.4312, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.14609571788413098, | |
| "grad_norm": 1.6217067241668701, | |
| "learning_rate": 9.938441702975689e-05, | |
| "loss": 1.4083, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.15113350125944586, | |
| "grad_norm": 1.6028562784194946, | |
| "learning_rate": 9.924038765061042e-05, | |
| "loss": 1.4271, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1561712846347607, | |
| "grad_norm": 1.7340667247772217, | |
| "learning_rate": 9.908135917238321e-05, | |
| "loss": 1.4043, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.16120906801007556, | |
| "grad_norm": 1.5199826955795288, | |
| "learning_rate": 9.890738003669029e-05, | |
| "loss": 1.3746, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.16624685138539042, | |
| "grad_norm": 1.5970052480697632, | |
| "learning_rate": 9.871850323926177e-05, | |
| "loss": 1.3754, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1712846347607053, | |
| "grad_norm": 1.5285452604293823, | |
| "learning_rate": 9.851478631379982e-05, | |
| "loss": 1.3821, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.17632241813602015, | |
| "grad_norm": 1.4565356969833374, | |
| "learning_rate": 9.829629131445342e-05, | |
| "loss": 1.3676, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.181360201511335, | |
| "grad_norm": 1.4147199392318726, | |
| "learning_rate": 9.806308479691595e-05, | |
| "loss": 1.4254, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.18639798488664988, | |
| "grad_norm": 1.4382061958312988, | |
| "learning_rate": 9.781523779815179e-05, | |
| "loss": 1.4457, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.19143576826196473, | |
| "grad_norm": 1.4405041933059692, | |
| "learning_rate": 9.755282581475769e-05, | |
| "loss": 1.431, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1964735516372796, | |
| "grad_norm": 1.3351069688796997, | |
| "learning_rate": 9.727592877996585e-05, | |
| "loss": 1.4381, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.20151133501259447, | |
| "grad_norm": 1.310120701789856, | |
| "learning_rate": 9.698463103929542e-05, | |
| "loss": 1.3856, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.20654911838790932, | |
| "grad_norm": 1.2783992290496826, | |
| "learning_rate": 9.667902132486009e-05, | |
| "loss": 1.3938, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.21158690176322417, | |
| "grad_norm": 1.2940365076065063, | |
| "learning_rate": 9.635919272833938e-05, | |
| "loss": 1.3899, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.21662468513853905, | |
| "grad_norm": 1.2611422538757324, | |
| "learning_rate": 9.602524267262203e-05, | |
| "loss": 1.3785, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2216624685138539, | |
| "grad_norm": 1.2587807178497314, | |
| "learning_rate": 9.567727288213005e-05, | |
| "loss": 1.3709, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.22670025188916876, | |
| "grad_norm": 1.2818981409072876, | |
| "learning_rate": 9.53153893518325e-05, | |
| "loss": 1.4439, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.23173803526448364, | |
| "grad_norm": 3.443629503250122, | |
| "learning_rate": 9.493970231495835e-05, | |
| "loss": 1.3471, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2367758186397985, | |
| "grad_norm": 1.3402560949325562, | |
| "learning_rate": 9.45503262094184e-05, | |
| "loss": 1.3811, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.24181360201511334, | |
| "grad_norm": 2.3149845600128174, | |
| "learning_rate": 9.414737964294636e-05, | |
| "loss": 1.4314, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.24685138539042822, | |
| "grad_norm": 2.120126485824585, | |
| "learning_rate": 9.373098535696979e-05, | |
| "loss": 1.6943, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2518891687657431, | |
| "grad_norm": 1.0270239114761353, | |
| "learning_rate": 9.330127018922194e-05, | |
| "loss": 1.3175, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2518891687657431, | |
| "eval_loss": 1.3126516342163086, | |
| "eval_runtime": 19.2777, | |
| "eval_samples_per_second": 17.378, | |
| "eval_steps_per_second": 8.715, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.25692695214105793, | |
| "grad_norm": 1.0754070281982422, | |
| "learning_rate": 9.285836503510562e-05, | |
| "loss": 1.3649, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2619647355163728, | |
| "grad_norm": 0.8903733491897583, | |
| "learning_rate": 9.24024048078213e-05, | |
| "loss": 1.3867, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.26700251889168763, | |
| "grad_norm": 0.7503011226654053, | |
| "learning_rate": 9.193352839727121e-05, | |
| "loss": 1.378, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.27204030226700254, | |
| "grad_norm": 0.7526147961616516, | |
| "learning_rate": 9.145187862775209e-05, | |
| "loss": 1.3398, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2770780856423174, | |
| "grad_norm": 0.6741443872451782, | |
| "learning_rate": 9.09576022144496e-05, | |
| "loss": 1.3456, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.28211586901763225, | |
| "grad_norm": 0.6993998289108276, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 1.3233, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2871536523929471, | |
| "grad_norm": 0.6690895557403564, | |
| "learning_rate": 8.993177550236464e-05, | |
| "loss": 1.3585, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.29219143576826195, | |
| "grad_norm": 0.6860454082489014, | |
| "learning_rate": 8.940053768033609e-05, | |
| "loss": 1.3368, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2972292191435768, | |
| "grad_norm": 0.7595508098602295, | |
| "learning_rate": 8.885729807284856e-05, | |
| "loss": 1.3665, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3022670025188917, | |
| "grad_norm": 0.8307892680168152, | |
| "learning_rate": 8.83022221559489e-05, | |
| "loss": 1.3454, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.30730478589420657, | |
| "grad_norm": 0.7602563500404358, | |
| "learning_rate": 8.773547901113862e-05, | |
| "loss": 1.3492, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3123425692695214, | |
| "grad_norm": 0.7564201951026917, | |
| "learning_rate": 8.715724127386972e-05, | |
| "loss": 1.3518, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.31738035264483627, | |
| "grad_norm": 0.7118662595748901, | |
| "learning_rate": 8.656768508095853e-05, | |
| "loss": 1.3344, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3224181360201511, | |
| "grad_norm": 0.7973348498344421, | |
| "learning_rate": 8.596699001693255e-05, | |
| "loss": 1.3479, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.327455919395466, | |
| "grad_norm": 0.8027709722518921, | |
| "learning_rate": 8.535533905932738e-05, | |
| "loss": 1.3022, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.33249370277078083, | |
| "grad_norm": 0.7900550365447998, | |
| "learning_rate": 8.473291852294987e-05, | |
| "loss": 1.3252, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.33753148614609574, | |
| "grad_norm": 0.792402982711792, | |
| "learning_rate": 8.409991800312493e-05, | |
| "loss": 1.3707, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3425692695214106, | |
| "grad_norm": 0.6617130637168884, | |
| "learning_rate": 8.345653031794292e-05, | |
| "loss": 1.3088, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.34760705289672544, | |
| "grad_norm": 0.8416574597358704, | |
| "learning_rate": 8.280295144952536e-05, | |
| "loss": 1.3632, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3526448362720403, | |
| "grad_norm": 0.7452552318572998, | |
| "learning_rate": 8.213938048432697e-05, | |
| "loss": 1.3131, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.35768261964735515, | |
| "grad_norm": 0.9572402238845825, | |
| "learning_rate": 8.146601955249188e-05, | |
| "loss": 1.3406, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.36272040302267, | |
| "grad_norm": 0.8076120018959045, | |
| "learning_rate": 8.07830737662829e-05, | |
| "loss": 1.2864, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3677581863979849, | |
| "grad_norm": 0.9595227837562561, | |
| "learning_rate": 8.009075115760243e-05, | |
| "loss": 1.3108, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.37279596977329976, | |
| "grad_norm": 0.9767167568206787, | |
| "learning_rate": 7.938926261462366e-05, | |
| "loss": 1.346, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3778337531486146, | |
| "grad_norm": 0.7328104972839355, | |
| "learning_rate": 7.86788218175523e-05, | |
| "loss": 1.3087, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.38287153652392947, | |
| "grad_norm": 0.8128076791763306, | |
| "learning_rate": 7.795964517353735e-05, | |
| "loss": 1.315, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3879093198992443, | |
| "grad_norm": 0.8831349611282349, | |
| "learning_rate": 7.723195175075136e-05, | |
| "loss": 1.2983, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3929471032745592, | |
| "grad_norm": 0.9544963836669922, | |
| "learning_rate": 7.649596321166024e-05, | |
| "loss": 1.2994, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3979848866498741, | |
| "grad_norm": 1.0878170728683472, | |
| "learning_rate": 7.575190374550272e-05, | |
| "loss": 1.2396, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.40302267002518893, | |
| "grad_norm": 0.982731282711029, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 1.2915, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4080604534005038, | |
| "grad_norm": 1.0462909936904907, | |
| "learning_rate": 7.424048101231686e-05, | |
| "loss": 1.3248, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.41309823677581864, | |
| "grad_norm": 0.9498928189277649, | |
| "learning_rate": 7.347357813929454e-05, | |
| "loss": 1.322, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4181360201511335, | |
| "grad_norm": 0.8573350310325623, | |
| "learning_rate": 7.269952498697734e-05, | |
| "loss": 1.36, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.42317380352644834, | |
| "grad_norm": 0.895943284034729, | |
| "learning_rate": 7.191855733945387e-05, | |
| "loss": 1.3121, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4282115869017632, | |
| "grad_norm": 0.9696531295776367, | |
| "learning_rate": 7.113091308703498e-05, | |
| "loss": 1.2962, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4332493702770781, | |
| "grad_norm": 0.9790425896644592, | |
| "learning_rate": 7.033683215379002e-05, | |
| "loss": 1.2979, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.43828715365239296, | |
| "grad_norm": 1.000526785850525, | |
| "learning_rate": 6.953655642446368e-05, | |
| "loss": 1.2579, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4433249370277078, | |
| "grad_norm": 0.9869703650474548, | |
| "learning_rate": 6.873032967079561e-05, | |
| "loss": 1.2483, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.44836272040302266, | |
| "grad_norm": 1.1385756731033325, | |
| "learning_rate": 6.7918397477265e-05, | |
| "loss": 1.3313, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4534005037783375, | |
| "grad_norm": 1.0134340524673462, | |
| "learning_rate": 6.710100716628344e-05, | |
| "loss": 1.2907, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.45843828715365237, | |
| "grad_norm": 1.240989327430725, | |
| "learning_rate": 6.627840772285784e-05, | |
| "loss": 1.3202, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4634760705289673, | |
| "grad_norm": 1.3036127090454102, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 1.3179, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.46851385390428213, | |
| "grad_norm": 1.2141302824020386, | |
| "learning_rate": 6.461858523613684e-05, | |
| "loss": 1.2917, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.473551637279597, | |
| "grad_norm": 1.3413684368133545, | |
| "learning_rate": 6.378186779084995e-05, | |
| "loss": 1.3128, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.47858942065491183, | |
| "grad_norm": 1.3085066080093384, | |
| "learning_rate": 6.294095225512603e-05, | |
| "loss": 1.3313, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.4836272040302267, | |
| "grad_norm": 1.555580973625183, | |
| "learning_rate": 6.209609477998338e-05, | |
| "loss": 1.3532, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.48866498740554154, | |
| "grad_norm": 1.8934412002563477, | |
| "learning_rate": 6.124755271719325e-05, | |
| "loss": 1.4231, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.49370277078085645, | |
| "grad_norm": 2.5842156410217285, | |
| "learning_rate": 6.0395584540887963e-05, | |
| "loss": 1.4556, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4987405541561713, | |
| "grad_norm": 0.8201480507850647, | |
| "learning_rate": 5.9540449768827246e-05, | |
| "loss": 1.2804, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5037783375314862, | |
| "grad_norm": 1.0748997926712036, | |
| "learning_rate": 5.868240888334653e-05, | |
| "loss": 1.3063, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5037783375314862, | |
| "eval_loss": 1.226228952407837, | |
| "eval_runtime": 19.3956, | |
| "eval_samples_per_second": 17.272, | |
| "eval_steps_per_second": 8.662, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5088161209068011, | |
| "grad_norm": 0.9275010228157043, | |
| "learning_rate": 5.782172325201155e-05, | |
| "loss": 1.3129, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5138539042821159, | |
| "grad_norm": 0.9533458948135376, | |
| "learning_rate": 5.695865504800327e-05, | |
| "loss": 1.2832, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5188916876574308, | |
| "grad_norm": 0.7950404286384583, | |
| "learning_rate": 5.6093467170257374e-05, | |
| "loss": 1.2729, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5239294710327456, | |
| "grad_norm": 0.7813581824302673, | |
| "learning_rate": 5.522642316338268e-05, | |
| "loss": 1.2425, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5289672544080605, | |
| "grad_norm": 0.9136513471603394, | |
| "learning_rate": 5.435778713738292e-05, | |
| "loss": 1.3132, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5340050377833753, | |
| "grad_norm": 0.7966837286949158, | |
| "learning_rate": 5.348782368720626e-05, | |
| "loss": 1.2751, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5390428211586902, | |
| "grad_norm": 0.8216973543167114, | |
| "learning_rate": 5.26167978121472e-05, | |
| "loss": 1.2749, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5440806045340051, | |
| "grad_norm": 0.951630711555481, | |
| "learning_rate": 5.174497483512506e-05, | |
| "loss": 1.308, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5491183879093199, | |
| "grad_norm": 0.9326642751693726, | |
| "learning_rate": 5.0872620321864185e-05, | |
| "loss": 1.2447, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5541561712846348, | |
| "grad_norm": 0.9708240628242493, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2911, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5591939546599496, | |
| "grad_norm": 0.9061233401298523, | |
| "learning_rate": 4.912737967813583e-05, | |
| "loss": 1.275, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5642317380352645, | |
| "grad_norm": 0.8922585248947144, | |
| "learning_rate": 4.825502516487497e-05, | |
| "loss": 1.2794, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5692695214105793, | |
| "grad_norm": 0.8978312611579895, | |
| "learning_rate": 4.738320218785281e-05, | |
| "loss": 1.2773, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5743073047858942, | |
| "grad_norm": 0.9046211242675781, | |
| "learning_rate": 4.6512176312793736e-05, | |
| "loss": 1.2735, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5793450881612091, | |
| "grad_norm": 0.8805850744247437, | |
| "learning_rate": 4.564221286261709e-05, | |
| "loss": 1.3408, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5843828715365239, | |
| "grad_norm": 0.877220869064331, | |
| "learning_rate": 4.477357683661734e-05, | |
| "loss": 1.2502, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5894206549118388, | |
| "grad_norm": 0.9436619281768799, | |
| "learning_rate": 4.390653282974264e-05, | |
| "loss": 1.2536, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5944584382871536, | |
| "grad_norm": 0.911101758480072, | |
| "learning_rate": 4.3041344951996746e-05, | |
| "loss": 1.2606, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5994962216624685, | |
| "grad_norm": 0.8994234800338745, | |
| "learning_rate": 4.2178276747988446e-05, | |
| "loss": 1.2354, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6045340050377834, | |
| "grad_norm": 1.0473874807357788, | |
| "learning_rate": 4.131759111665349e-05, | |
| "loss": 1.2518, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6095717884130982, | |
| "grad_norm": 0.9809603691101074, | |
| "learning_rate": 4.045955023117276e-05, | |
| "loss": 1.2468, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6146095717884131, | |
| "grad_norm": 0.9014551043510437, | |
| "learning_rate": 3.960441545911204e-05, | |
| "loss": 1.2181, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6196473551637279, | |
| "grad_norm": 3.2279932498931885, | |
| "learning_rate": 3.875244728280676e-05, | |
| "loss": 1.2466, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6246851385390428, | |
| "grad_norm": 1.0879408121109009, | |
| "learning_rate": 3.790390522001662e-05, | |
| "loss": 1.2314, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6297229219143576, | |
| "grad_norm": 1.0637506246566772, | |
| "learning_rate": 3.705904774487396e-05, | |
| "loss": 1.2564, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6347607052896725, | |
| "grad_norm": 1.1840940713882446, | |
| "learning_rate": 3.6218132209150045e-05, | |
| "loss": 1.2296, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6397984886649875, | |
| "grad_norm": 1.105326771736145, | |
| "learning_rate": 3.5381414763863166e-05, | |
| "loss": 1.2549, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6448362720403022, | |
| "grad_norm": 1.0958088636398315, | |
| "learning_rate": 3.4549150281252636e-05, | |
| "loss": 1.2328, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6498740554156172, | |
| "grad_norm": 1.1396890878677368, | |
| "learning_rate": 3.372159227714218e-05, | |
| "loss": 1.2363, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.654911838790932, | |
| "grad_norm": 1.4085423946380615, | |
| "learning_rate": 3.289899283371657e-05, | |
| "loss": 1.2242, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6599496221662469, | |
| "grad_norm": 1.1341376304626465, | |
| "learning_rate": 3.2081602522734986e-05, | |
| "loss": 1.2563, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6649874055415617, | |
| "grad_norm": 1.379789113998413, | |
| "learning_rate": 3.12696703292044e-05, | |
| "loss": 1.2659, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6700251889168766, | |
| "grad_norm": 1.2120745182037354, | |
| "learning_rate": 3.046344357553632e-05, | |
| "loss": 1.2329, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.6750629722921915, | |
| "grad_norm": 1.0146487951278687, | |
| "learning_rate": 2.9663167846209998e-05, | |
| "loss": 1.2198, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.6801007556675063, | |
| "grad_norm": 1.1056286096572876, | |
| "learning_rate": 2.886908691296504e-05, | |
| "loss": 1.2652, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.6851385390428212, | |
| "grad_norm": 1.1778992414474487, | |
| "learning_rate": 2.8081442660546125e-05, | |
| "loss": 1.2673, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.690176322418136, | |
| "grad_norm": 1.3261139392852783, | |
| "learning_rate": 2.7300475013022663e-05, | |
| "loss": 1.2619, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6952141057934509, | |
| "grad_norm": 1.0408475399017334, | |
| "learning_rate": 2.6526421860705473e-05, | |
| "loss": 1.3247, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7002518891687658, | |
| "grad_norm": 1.1338088512420654, | |
| "learning_rate": 2.575951898768315e-05, | |
| "loss": 1.2155, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7052896725440806, | |
| "grad_norm": 1.271579623222351, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 1.21, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7103274559193955, | |
| "grad_norm": 1.1375788450241089, | |
| "learning_rate": 2.4248096254497288e-05, | |
| "loss": 1.2732, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7153652392947103, | |
| "grad_norm": 1.446244239807129, | |
| "learning_rate": 2.350403678833976e-05, | |
| "loss": 1.2476, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7204030226700252, | |
| "grad_norm": 1.0757509469985962, | |
| "learning_rate": 2.2768048249248648e-05, | |
| "loss": 1.2661, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.72544080604534, | |
| "grad_norm": 1.2652767896652222, | |
| "learning_rate": 2.2040354826462668e-05, | |
| "loss": 1.3227, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7304785894206549, | |
| "grad_norm": 1.3920189142227173, | |
| "learning_rate": 2.132117818244771e-05, | |
| "loss": 1.3374, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7355163727959698, | |
| "grad_norm": 1.432496428489685, | |
| "learning_rate": 2.061073738537635e-05, | |
| "loss": 1.3587, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7405541561712846, | |
| "grad_norm": 2.871924638748169, | |
| "learning_rate": 1.9909248842397584e-05, | |
| "loss": 1.4344, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7455919395465995, | |
| "grad_norm": 0.7997872233390808, | |
| "learning_rate": 1.9216926233717085e-05, | |
| "loss": 1.2165, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7506297229219143, | |
| "grad_norm": 0.8687345385551453, | |
| "learning_rate": 1.8533980447508137e-05, | |
| "loss": 1.2801, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7556675062972292, | |
| "grad_norm": 1.1544013023376465, | |
| "learning_rate": 1.7860619515673033e-05, | |
| "loss": 1.2143, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7556675062972292, | |
| "eval_loss": 1.1899141073226929, | |
| "eval_runtime": 19.3256, | |
| "eval_samples_per_second": 17.335, | |
| "eval_steps_per_second": 8.693, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.760705289672544, | |
| "grad_norm": 0.9934613704681396, | |
| "learning_rate": 1.7197048550474643e-05, | |
| "loss": 1.2325, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.7657430730478589, | |
| "grad_norm": 0.8965021967887878, | |
| "learning_rate": 1.6543469682057106e-05, | |
| "loss": 1.2311, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7707808564231738, | |
| "grad_norm": 1.0818604230880737, | |
| "learning_rate": 1.5900081996875083e-05, | |
| "loss": 1.2623, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.7758186397984886, | |
| "grad_norm": 1.038767695426941, | |
| "learning_rate": 1.526708147705013e-05, | |
| "loss": 1.2225, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.7808564231738035, | |
| "grad_norm": 0.9397957921028137, | |
| "learning_rate": 1.4644660940672627e-05, | |
| "loss": 1.2607, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.7858942065491183, | |
| "grad_norm": 0.916916012763977, | |
| "learning_rate": 1.4033009983067452e-05, | |
| "loss": 1.2201, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.7909319899244333, | |
| "grad_norm": 2.7706637382507324, | |
| "learning_rate": 1.3432314919041478e-05, | |
| "loss": 1.2303, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.7959697732997482, | |
| "grad_norm": 0.9059083461761475, | |
| "learning_rate": 1.2842758726130283e-05, | |
| "loss": 1.2546, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.801007556675063, | |
| "grad_norm": 0.893086314201355, | |
| "learning_rate": 1.22645209888614e-05, | |
| "loss": 1.2919, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8060453400503779, | |
| "grad_norm": 0.8109311461448669, | |
| "learning_rate": 1.1697777844051105e-05, | |
| "loss": 1.2152, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8110831234256927, | |
| "grad_norm": 0.9078565835952759, | |
| "learning_rate": 1.1142701927151456e-05, | |
| "loss": 1.235, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8161209068010076, | |
| "grad_norm": 1.0009123086929321, | |
| "learning_rate": 1.0599462319663905e-05, | |
| "loss": 1.226, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8211586901763224, | |
| "grad_norm": 0.968085765838623, | |
| "learning_rate": 1.006822449763537e-05, | |
| "loss": 1.2374, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8261964735516373, | |
| "grad_norm": 1.0192447900772095, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 1.2487, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8312342569269522, | |
| "grad_norm": 1.0350886583328247, | |
| "learning_rate": 9.042397785550405e-06, | |
| "loss": 1.2813, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.836272040302267, | |
| "grad_norm": 1.1261175870895386, | |
| "learning_rate": 8.548121372247918e-06, | |
| "loss": 1.2274, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8413098236775819, | |
| "grad_norm": 0.9012370109558105, | |
| "learning_rate": 8.066471602728803e-06, | |
| "loss": 1.253, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8463476070528967, | |
| "grad_norm": 0.9083200097084045, | |
| "learning_rate": 7.597595192178702e-06, | |
| "loss": 1.2399, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8513853904282116, | |
| "grad_norm": 0.934179961681366, | |
| "learning_rate": 7.1416349648943894e-06, | |
| "loss": 1.2241, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8564231738035264, | |
| "grad_norm": 1.1478110551834106, | |
| "learning_rate": 6.698729810778065e-06, | |
| "loss": 1.2479, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8614609571788413, | |
| "grad_norm": 1.0051943063735962, | |
| "learning_rate": 6.269014643030213e-06, | |
| "loss": 1.2435, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8664987405541562, | |
| "grad_norm": 1.0844242572784424, | |
| "learning_rate": 5.852620357053651e-06, | |
| "loss": 1.2382, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.871536523929471, | |
| "grad_norm": 0.9379656314849854, | |
| "learning_rate": 5.449673790581611e-06, | |
| "loss": 1.251, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.8765743073047859, | |
| "grad_norm": 1.0667510032653809, | |
| "learning_rate": 5.060297685041659e-06, | |
| "loss": 1.2665, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.8816120906801007, | |
| "grad_norm": 1.1613593101501465, | |
| "learning_rate": 4.684610648167503e-06, | |
| "loss": 1.2412, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.8866498740554156, | |
| "grad_norm": 0.9372274875640869, | |
| "learning_rate": 4.322727117869951e-06, | |
| "loss": 1.1967, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.8916876574307305, | |
| "grad_norm": 0.9599543213844299, | |
| "learning_rate": 3.974757327377981e-06, | |
| "loss": 1.2442, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.8967254408060453, | |
| "grad_norm": 1.1632378101348877, | |
| "learning_rate": 3.6408072716606346e-06, | |
| "loss": 1.2514, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9017632241813602, | |
| "grad_norm": 1.0114705562591553, | |
| "learning_rate": 3.3209786751399187e-06, | |
| "loss": 1.1577, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.906801007556675, | |
| "grad_norm": 1.0662001371383667, | |
| "learning_rate": 3.0153689607045845e-06, | |
| "loss": 1.2004, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9118387909319899, | |
| "grad_norm": 1.0731958150863647, | |
| "learning_rate": 2.724071220034158e-06, | |
| "loss": 1.2546, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9168765743073047, | |
| "grad_norm": 1.0280028581619263, | |
| "learning_rate": 2.4471741852423237e-06, | |
| "loss": 1.2196, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9219143576826196, | |
| "grad_norm": 1.001569390296936, | |
| "learning_rate": 2.1847622018482283e-06, | |
| "loss": 1.2036, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9269521410579346, | |
| "grad_norm": 1.0607179403305054, | |
| "learning_rate": 1.9369152030840556e-06, | |
| "loss": 1.2184, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9319899244332494, | |
| "grad_norm": 1.0795390605926514, | |
| "learning_rate": 1.70370868554659e-06, | |
| "loss": 1.2137, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9370277078085643, | |
| "grad_norm": 1.0558546781539917, | |
| "learning_rate": 1.4852136862001764e-06, | |
| "loss": 1.1975, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9420654911838791, | |
| "grad_norm": 1.0757189989089966, | |
| "learning_rate": 1.2814967607382432e-06, | |
| "loss": 1.2419, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.947103274559194, | |
| "grad_norm": 0.9995512962341309, | |
| "learning_rate": 1.0926199633097157e-06, | |
| "loss": 1.2661, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9521410579345088, | |
| "grad_norm": 1.0607733726501465, | |
| "learning_rate": 9.186408276168013e-07, | |
| "loss": 1.2109, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9571788413098237, | |
| "grad_norm": 1.18296480178833, | |
| "learning_rate": 7.596123493895991e-07, | |
| "loss": 1.2535, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9622166246851386, | |
| "grad_norm": 1.2574044466018677, | |
| "learning_rate": 6.15582970243117e-07, | |
| "loss": 1.2651, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.9672544080604534, | |
| "grad_norm": 1.2701469659805298, | |
| "learning_rate": 4.865965629214819e-07, | |
| "loss": 1.2916, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.9722921914357683, | |
| "grad_norm": 1.3830208778381348, | |
| "learning_rate": 3.7269241793390085e-07, | |
| "loss": 1.284, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.9773299748110831, | |
| "grad_norm": 1.2639228105545044, | |
| "learning_rate": 2.7390523158633554e-07, | |
| "loss": 1.3334, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.982367758186398, | |
| "grad_norm": 1.8174062967300415, | |
| "learning_rate": 1.9026509541272275e-07, | |
| "loss": 1.475, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.9874055415617129, | |
| "grad_norm": 2.5040292739868164, | |
| "learning_rate": 1.2179748700879012e-07, | |
| "loss": 1.3532, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.9924433249370277, | |
| "grad_norm": 1.002044916152954, | |
| "learning_rate": 6.852326227130834e-08, | |
| "loss": 1.2387, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.9974811083123426, | |
| "grad_norm": 1.0022469758987427, | |
| "learning_rate": 3.04586490452119e-08, | |
| "loss": 1.2446, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0037783375314862, | |
| "grad_norm": 2.5255768299102783, | |
| "learning_rate": 7.615242180436522e-09, | |
| "loss": 2.1604, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.008816120906801, | |
| "grad_norm": 0.9008597135543823, | |
| "learning_rate": 0.0, | |
| "loss": 1.2288, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.008816120906801, | |
| "eval_loss": 1.1826926469802856, | |
| "eval_runtime": 19.3371, | |
| "eval_samples_per_second": 17.324, | |
| "eval_steps_per_second": 8.688, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1891400404631552e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |