| { | |
| "best_metric": 0.8415273271774395, | |
| "best_model_checkpoint": "results_retain/facebook/hubert-base-ls960/42/checkpoint-30000", | |
| "epoch": 69.20415224913495, | |
| "eval_steps": 1000, | |
| "global_step": 30000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.1534025374855825, | |
| "grad_norm": 2.9732823371887207, | |
| "learning_rate": 8.333333333333333e-05, | |
| "loss": 3.9827, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.306805074971165, | |
| "grad_norm": 3.6868040561676025, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 3.3559, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.306805074971165, | |
| "eval_accuracy": 0.33953082106313953, | |
| "eval_f1_macro": 0.12928496278744922, | |
| "eval_loss": 2.5730652809143066, | |
| "eval_runtime": 35.334, | |
| "eval_samples_per_second": 226.807, | |
| "eval_steps_per_second": 7.104, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.4602076124567476, | |
| "grad_norm": 7.014188766479492, | |
| "learning_rate": 0.00025, | |
| "loss": 2.1949, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.61361014994233, | |
| "grad_norm": 6.496627330780029, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 1.6389, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.61361014994233, | |
| "eval_accuracy": 0.6416271524831545, | |
| "eval_f1_macro": 0.4299797469182877, | |
| "eval_loss": 1.4779495000839233, | |
| "eval_runtime": 35.3694, | |
| "eval_samples_per_second": 226.58, | |
| "eval_steps_per_second": 7.097, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.767012687427912, | |
| "grad_norm": 7.616945266723633, | |
| "learning_rate": 0.0004166666666666667, | |
| "loss": 1.4162, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.920415224913495, | |
| "grad_norm": 8.488947868347168, | |
| "learning_rate": 0.0005, | |
| "loss": 1.3587, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.920415224913495, | |
| "eval_accuracy": 0.6574744197654105, | |
| "eval_f1_macro": 0.4595491108495356, | |
| "eval_loss": 1.427338719367981, | |
| "eval_runtime": 72.7464, | |
| "eval_samples_per_second": 110.164, | |
| "eval_steps_per_second": 3.45, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 8.073817762399077, | |
| "grad_norm": 8.156586647033691, | |
| "learning_rate": 0.0004907407407407408, | |
| "loss": 1.3027, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.22722029988466, | |
| "grad_norm": 6.934875965118408, | |
| "learning_rate": 0.00048148148148148144, | |
| "loss": 1.1695, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 9.22722029988466, | |
| "eval_accuracy": 0.6961567257299726, | |
| "eval_f1_macro": 0.5353451961331095, | |
| "eval_loss": 1.2435524463653564, | |
| "eval_runtime": 70.2906, | |
| "eval_samples_per_second": 114.012, | |
| "eval_steps_per_second": 3.571, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.380622837370241, | |
| "grad_norm": 7.151013374328613, | |
| "learning_rate": 0.00047222222222222224, | |
| "loss": 1.0695, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.534025374855824, | |
| "grad_norm": 7.771185874938965, | |
| "learning_rate": 0.000462962962962963, | |
| "loss": 0.9787, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 11.534025374855824, | |
| "eval_accuracy": 0.6901672073870726, | |
| "eval_f1_macro": 0.5134333803516367, | |
| "eval_loss": 1.3313419818878174, | |
| "eval_runtime": 71.0472, | |
| "eval_samples_per_second": 112.798, | |
| "eval_steps_per_second": 3.533, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 12.687427912341407, | |
| "grad_norm": 6.754736423492432, | |
| "learning_rate": 0.0004537037037037037, | |
| "loss": 0.9056, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 13.84083044982699, | |
| "grad_norm": 6.322958946228027, | |
| "learning_rate": 0.0004444444444444444, | |
| "loss": 0.836, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 13.84083044982699, | |
| "eval_accuracy": 0.7138757174943848, | |
| "eval_f1_macro": 0.5729793747297807, | |
| "eval_loss": 1.2415224313735962, | |
| "eval_runtime": 73.4693, | |
| "eval_samples_per_second": 109.08, | |
| "eval_steps_per_second": 3.416, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 14.994232987312571, | |
| "grad_norm": 7.2826619148254395, | |
| "learning_rate": 0.0004351851851851852, | |
| "loss": 0.7867, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 16.147635524798154, | |
| "grad_norm": 5.9969482421875, | |
| "learning_rate": 0.00042592592592592595, | |
| "loss": 0.7135, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 16.147635524798154, | |
| "eval_accuracy": 0.7389568255552783, | |
| "eval_f1_macro": 0.5793519253003285, | |
| "eval_loss": 1.1902633905410767, | |
| "eval_runtime": 73.1754, | |
| "eval_samples_per_second": 109.518, | |
| "eval_steps_per_second": 3.43, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 17.301038062283737, | |
| "grad_norm": 6.838934421539307, | |
| "learning_rate": 0.0004166666666666667, | |
| "loss": 0.6719, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 18.45444059976932, | |
| "grad_norm": 7.784801006317139, | |
| "learning_rate": 0.0004074074074074074, | |
| "loss": 0.6009, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 18.45444059976932, | |
| "eval_accuracy": 0.7414524581981532, | |
| "eval_f1_macro": 0.6081990369390977, | |
| "eval_loss": 1.2159614562988281, | |
| "eval_runtime": 55.5881, | |
| "eval_samples_per_second": 144.168, | |
| "eval_steps_per_second": 4.515, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 19.607843137254903, | |
| "grad_norm": 6.171660423278809, | |
| "learning_rate": 0.0003981481481481481, | |
| "loss": 0.5756, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 20.761245674740483, | |
| "grad_norm": 7.959474563598633, | |
| "learning_rate": 0.0003888888888888889, | |
| "loss": 0.5355, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 20.761245674740483, | |
| "eval_accuracy": 0.7543049663089593, | |
| "eval_f1_macro": 0.5947199785680519, | |
| "eval_loss": 1.1460059881210327, | |
| "eval_runtime": 55.0321, | |
| "eval_samples_per_second": 145.624, | |
| "eval_steps_per_second": 4.561, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 21.914648212226066, | |
| "grad_norm": 7.287164211273193, | |
| "learning_rate": 0.00037962962962962966, | |
| "loss": 0.5046, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 23.06805074971165, | |
| "grad_norm": 6.368403434753418, | |
| "learning_rate": 0.00037037037037037035, | |
| "loss": 0.4737, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 23.06805074971165, | |
| "eval_accuracy": 0.759920139755428, | |
| "eval_f1_macro": 0.6214937740706044, | |
| "eval_loss": 1.1644535064697266, | |
| "eval_runtime": 54.1187, | |
| "eval_samples_per_second": 148.082, | |
| "eval_steps_per_second": 4.638, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 24.22145328719723, | |
| "grad_norm": 9.294144630432129, | |
| "learning_rate": 0.0003611111111111111, | |
| "loss": 0.4349, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 25.374855824682815, | |
| "grad_norm": 7.4235310554504395, | |
| "learning_rate": 0.0003518518518518519, | |
| "loss": 0.4352, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 25.374855824682815, | |
| "eval_accuracy": 0.7545545295732469, | |
| "eval_f1_macro": 0.5917892398903293, | |
| "eval_loss": 1.213472843170166, | |
| "eval_runtime": 54.2317, | |
| "eval_samples_per_second": 147.773, | |
| "eval_steps_per_second": 4.628, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 26.528258362168398, | |
| "grad_norm": 5.937560558319092, | |
| "learning_rate": 0.00034259259259259263, | |
| "loss": 0.4017, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 27.68166089965398, | |
| "grad_norm": 6.036593914031982, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 0.3652, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 27.68166089965398, | |
| "eval_accuracy": 0.7732717743948091, | |
| "eval_f1_macro": 0.6375373960767734, | |
| "eval_loss": 1.1644330024719238, | |
| "eval_runtime": 75.4416, | |
| "eval_samples_per_second": 106.228, | |
| "eval_steps_per_second": 3.327, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 28.83506343713956, | |
| "grad_norm": 6.821892738342285, | |
| "learning_rate": 0.00032407407407407406, | |
| "loss": 0.3443, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 29.988465974625143, | |
| "grad_norm": 4.1507463455200195, | |
| "learning_rate": 0.0003148148148148148, | |
| "loss": 0.3246, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 29.988465974625143, | |
| "eval_accuracy": 0.7776391315198403, | |
| "eval_f1_macro": 0.6181477901694947, | |
| "eval_loss": 1.143282175064087, | |
| "eval_runtime": 75.3981, | |
| "eval_samples_per_second": 106.289, | |
| "eval_steps_per_second": 3.329, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 31.141868512110726, | |
| "grad_norm": 7.311563491821289, | |
| "learning_rate": 0.0003055555555555556, | |
| "loss": 0.3082, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 32.29527104959631, | |
| "grad_norm": 2.214399576187134, | |
| "learning_rate": 0.0002962962962962963, | |
| "loss": 0.2876, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 32.29527104959631, | |
| "eval_accuracy": 0.7700274519590716, | |
| "eval_f1_macro": 0.6278465966595438, | |
| "eval_loss": 1.2212963104248047, | |
| "eval_runtime": 73.2438, | |
| "eval_samples_per_second": 109.415, | |
| "eval_steps_per_second": 3.427, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 33.44867358708189, | |
| "grad_norm": 5.876758575439453, | |
| "learning_rate": 0.00028703703703703703, | |
| "loss": 0.2722, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 34.602076124567475, | |
| "grad_norm": 3.34192156791687, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 0.2539, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 34.602076124567475, | |
| "eval_accuracy": 0.7858747192413277, | |
| "eval_f1_macro": 0.6310309906248334, | |
| "eval_loss": 1.1600251197814941, | |
| "eval_runtime": 55.43, | |
| "eval_samples_per_second": 144.579, | |
| "eval_steps_per_second": 4.528, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 35.75547866205306, | |
| "grad_norm": 4.611924648284912, | |
| "learning_rate": 0.0002685185185185186, | |
| "loss": 0.2428, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 36.90888119953864, | |
| "grad_norm": 3.3283474445343018, | |
| "learning_rate": 0.00025925925925925926, | |
| "loss": 0.2322, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 36.90888119953864, | |
| "eval_accuracy": 0.7816321437484403, | |
| "eval_f1_macro": 0.6319203799590871, | |
| "eval_loss": 1.135780930519104, | |
| "eval_runtime": 63.9861, | |
| "eval_samples_per_second": 125.246, | |
| "eval_steps_per_second": 3.923, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 38.062283737024224, | |
| "grad_norm": 6.641352653503418, | |
| "learning_rate": 0.00025, | |
| "loss": 0.2146, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 39.21568627450981, | |
| "grad_norm": 6.247890949249268, | |
| "learning_rate": 0.00024074074074074072, | |
| "loss": 0.2003, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 39.21568627450981, | |
| "eval_accuracy": 0.7962315947092587, | |
| "eval_f1_macro": 0.6542244286445125, | |
| "eval_loss": 1.150564432144165, | |
| "eval_runtime": 69.2204, | |
| "eval_samples_per_second": 115.775, | |
| "eval_steps_per_second": 3.626, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 40.36908881199539, | |
| "grad_norm": 4.363713264465332, | |
| "learning_rate": 0.0002314814814814815, | |
| "loss": 0.1947, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 41.522491349480966, | |
| "grad_norm": 3.4260287284851074, | |
| "learning_rate": 0.0002222222222222222, | |
| "loss": 0.1794, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 41.522491349480966, | |
| "eval_accuracy": 0.7979785375592713, | |
| "eval_f1_macro": 0.6796792585107833, | |
| "eval_loss": 1.1864490509033203, | |
| "eval_runtime": 71.1001, | |
| "eval_samples_per_second": 112.714, | |
| "eval_steps_per_second": 3.53, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 42.67589388696655, | |
| "grad_norm": 2.606008291244507, | |
| "learning_rate": 0.00021296296296296298, | |
| "loss": 0.1689, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 43.82929642445213, | |
| "grad_norm": 4.665687084197998, | |
| "learning_rate": 0.0002037037037037037, | |
| "loss": 0.1645, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 43.82929642445213, | |
| "eval_accuracy": 0.80059895183429, | |
| "eval_f1_macro": 0.6667890419585701, | |
| "eval_loss": 1.2014110088348389, | |
| "eval_runtime": 74.7112, | |
| "eval_samples_per_second": 107.266, | |
| "eval_steps_per_second": 3.36, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 44.982698961937714, | |
| "grad_norm": 3.0378100872039795, | |
| "learning_rate": 0.00019444444444444446, | |
| "loss": 0.1602, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 46.1361014994233, | |
| "grad_norm": 5.274627685546875, | |
| "learning_rate": 0.00018518518518518518, | |
| "loss": 0.144, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 46.1361014994233, | |
| "eval_accuracy": 0.7989767906164212, | |
| "eval_f1_macro": 0.6582157335341974, | |
| "eval_loss": 1.1411352157592773, | |
| "eval_runtime": 73.2065, | |
| "eval_samples_per_second": 109.471, | |
| "eval_steps_per_second": 3.429, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 47.28950403690888, | |
| "grad_norm": 2.336925745010376, | |
| "learning_rate": 0.00017592592592592595, | |
| "loss": 0.1368, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 48.44290657439446, | |
| "grad_norm": 2.7309417724609375, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 0.1298, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 48.44290657439446, | |
| "eval_accuracy": 0.8064636885450461, | |
| "eval_f1_macro": 0.6782237618476237, | |
| "eval_loss": 1.1389836072921753, | |
| "eval_runtime": 73.3032, | |
| "eval_samples_per_second": 109.327, | |
| "eval_steps_per_second": 3.424, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 49.596309111880046, | |
| "grad_norm": 2.79067325592041, | |
| "learning_rate": 0.0001574074074074074, | |
| "loss": 0.1206, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 50.74971164936563, | |
| "grad_norm": 4.826747417449951, | |
| "learning_rate": 0.00014814814814814815, | |
| "loss": 0.1175, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 50.74971164936563, | |
| "eval_accuracy": 0.8068380334414774, | |
| "eval_f1_macro": 0.6700916407139905, | |
| "eval_loss": 1.2090946435928345, | |
| "eval_runtime": 73.8226, | |
| "eval_samples_per_second": 108.557, | |
| "eval_steps_per_second": 3.4, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 51.90311418685121, | |
| "grad_norm": 3.403858184814453, | |
| "learning_rate": 0.0001388888888888889, | |
| "loss": 0.1021, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 53.056516724336795, | |
| "grad_norm": 5.1802496910095215, | |
| "learning_rate": 0.00012962962962962963, | |
| "loss": 0.0977, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 53.056516724336795, | |
| "eval_accuracy": 0.8149488395308211, | |
| "eval_f1_macro": 0.682806558028361, | |
| "eval_loss": 1.1759377717971802, | |
| "eval_runtime": 73.8391, | |
| "eval_samples_per_second": 108.533, | |
| "eval_steps_per_second": 3.399, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 54.20991926182238, | |
| "grad_norm": 1.122316837310791, | |
| "learning_rate": 0.00012037037037037036, | |
| "loss": 0.0912, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 55.36332179930796, | |
| "grad_norm": 1.1100833415985107, | |
| "learning_rate": 0.0001111111111111111, | |
| "loss": 0.0823, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 55.36332179930796, | |
| "eval_accuracy": 0.8166957823808335, | |
| "eval_f1_macro": 0.7045678569443168, | |
| "eval_loss": 1.2304565906524658, | |
| "eval_runtime": 74.7299, | |
| "eval_samples_per_second": 107.24, | |
| "eval_steps_per_second": 3.359, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 56.516724336793544, | |
| "grad_norm": 4.60992956161499, | |
| "learning_rate": 0.00010185185185185185, | |
| "loss": 0.0873, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 57.67012687427912, | |
| "grad_norm": 5.945472240447998, | |
| "learning_rate": 9.259259259259259e-05, | |
| "loss": 0.0767, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 57.67012687427912, | |
| "eval_accuracy": 0.8238083354130272, | |
| "eval_f1_macro": 0.6889311414034964, | |
| "eval_loss": 1.231188416481018, | |
| "eval_runtime": 72.9471, | |
| "eval_samples_per_second": 109.86, | |
| "eval_steps_per_second": 3.441, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 58.8235294117647, | |
| "grad_norm": 6.51999044418335, | |
| "learning_rate": 8.333333333333333e-05, | |
| "loss": 0.0667, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 59.976931949250286, | |
| "grad_norm": 3.6006715297698975, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 0.066, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 59.976931949250286, | |
| "eval_accuracy": 0.8235587721487397, | |
| "eval_f1_macro": 0.7127282615425515, | |
| "eval_loss": 1.212782621383667, | |
| "eval_runtime": 70.4845, | |
| "eval_samples_per_second": 113.699, | |
| "eval_steps_per_second": 3.561, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 61.13033448673587, | |
| "grad_norm": 1.102469563484192, | |
| "learning_rate": 6.481481481481482e-05, | |
| "loss": 0.0601, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 62.28373702422145, | |
| "grad_norm": 3.476552724838257, | |
| "learning_rate": 5.555555555555555e-05, | |
| "loss": 0.0493, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 62.28373702422145, | |
| "eval_accuracy": 0.8310456700773646, | |
| "eval_f1_macro": 0.7115209260308665, | |
| "eval_loss": 1.15741765499115, | |
| "eval_runtime": 74.8395, | |
| "eval_samples_per_second": 107.083, | |
| "eval_steps_per_second": 3.354, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 63.437139561707035, | |
| "grad_norm": 0.26378124952316284, | |
| "learning_rate": 4.6296296296296294e-05, | |
| "loss": 0.0527, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 64.59054209919262, | |
| "grad_norm": 1.6174193620681763, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.0479, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 64.59054209919262, | |
| "eval_accuracy": 0.836785625155977, | |
| "eval_f1_macro": 0.7171903480510493, | |
| "eval_loss": 1.1416091918945312, | |
| "eval_runtime": 72.9274, | |
| "eval_samples_per_second": 109.89, | |
| "eval_steps_per_second": 3.442, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 65.7439446366782, | |
| "grad_norm": 1.4499250650405884, | |
| "learning_rate": 2.7777777777777776e-05, | |
| "loss": 0.0453, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 66.89734717416378, | |
| "grad_norm": 3.988093614578247, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.0389, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 66.89734717416378, | |
| "eval_accuracy": 0.8370351884202646, | |
| "eval_f1_macro": 0.7212408780468642, | |
| "eval_loss": 1.1253269910812378, | |
| "eval_runtime": 53.9632, | |
| "eval_samples_per_second": 148.509, | |
| "eval_steps_per_second": 4.651, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 68.05074971164936, | |
| "grad_norm": 2.909609317779541, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.0433, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 69.20415224913495, | |
| "grad_norm": 2.1668026447296143, | |
| "learning_rate": 0.0, | |
| "loss": 0.0343, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 69.20415224913495, | |
| "eval_accuracy": 0.8415273271774395, | |
| "eval_f1_macro": 0.7162084790077747, | |
| "eval_loss": 1.1328068971633911, | |
| "eval_runtime": 73.3281, | |
| "eval_samples_per_second": 109.29, | |
| "eval_steps_per_second": 3.423, | |
| "step": 30000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 30000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 70, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7427529644770302e+20, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |