{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 1, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 28.861963272094727, "learning_rate": 2.5e-05, "loss": 1.317, "step": 1 }, { "epoch": 0.016, "eval_exact_match": 0.12244897959183673, "eval_f1_a": 0.536, "eval_f1_m": 0.5144300144300145, "eval_loss": 0.9545605182647705, "eval_runtime": 14.8765, "eval_samples_per_second": 16.805, "eval_steps_per_second": 2.151, "step": 1 }, { "epoch": 0.032, "grad_norm": 23.436485290527344, "learning_rate": 5e-05, "loss": 1.0813, "step": 2 }, { "epoch": 0.032, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5182186234817813, "eval_f1_m": 0.4855184498041642, "eval_loss": 0.879025399684906, "eval_runtime": 14.844, "eval_samples_per_second": 16.842, "eval_steps_per_second": 2.156, "step": 2 }, { "epoch": 0.048, "grad_norm": 23.96354866027832, "learning_rate": 4.959016393442623e-05, "loss": 1.1106, "step": 3 }, { "epoch": 0.048, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.24427480916030533, "eval_f1_m": 0.41115607748260813, "eval_loss": 0.7947519421577454, "eval_runtime": 14.9565, "eval_samples_per_second": 16.715, "eval_steps_per_second": 2.14, "step": 3 }, { "epoch": 0.064, "grad_norm": 17.556169509887695, "learning_rate": 4.918032786885246e-05, "loss": 0.8645, "step": 4 }, { "epoch": 0.064, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.8334511518478394, "eval_runtime": 14.9727, "eval_samples_per_second": 16.697, "eval_steps_per_second": 2.137, "step": 4 }, { "epoch": 0.08, "grad_norm": 26.969839096069336, "learning_rate": 4.8770491803278687e-05, "loss": 1.1064, "step": 5 }, { "epoch": 0.08, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.8471953272819519, "eval_runtime": 14.8399, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 5 }, { "epoch": 0.096, "grad_norm": 9.782492637634277, "learning_rate": 4.836065573770492e-05, "loss": 0.7013, "step": 6 }, { "epoch": 0.096, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.8111386895179749, "eval_runtime": 14.8404, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 6 }, { "epoch": 0.112, "grad_norm": 40.01118469238281, "learning_rate": 4.795081967213115e-05, "loss": 1.3504, "step": 7 }, { "epoch": 0.112, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7666797041893005, "eval_runtime": 14.8407, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 7 }, { "epoch": 0.128, "grad_norm": 19.116071701049805, "learning_rate": 4.754098360655738e-05, "loss": 0.9683, "step": 8 }, { "epoch": 0.128, "eval_exact_match": 0.0, "eval_f1_a": 0.10619469026548671, "eval_f1_m": 0.3700268552309369, "eval_loss": 0.7262851595878601, "eval_runtime": 14.8429, "eval_samples_per_second": 16.843, "eval_steps_per_second": 2.156, "step": 8 }, { "epoch": 0.144, "grad_norm": 6.786198616027832, "learning_rate": 4.713114754098361e-05, "loss": 0.6998, "step": 9 }, { "epoch": 0.144, "eval_exact_match": 0.0, "eval_f1_a": 0.12903225806451613, "eval_f1_m": 0.3618708955443649, "eval_loss": 0.6986015439033508, "eval_runtime": 14.9502, "eval_samples_per_second": 16.722, "eval_steps_per_second": 2.14, "step": 9 }, { "epoch": 0.16, "grad_norm": 8.353687286376953, "learning_rate": 4.672131147540984e-05, "loss": 0.622, "step": 10 }, { "epoch": 0.16, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.3668639053254438, "eval_f1_m": 0.4365885814865408, "eval_loss": 0.6866718530654907, "eval_runtime": 14.9736, "eval_samples_per_second": 16.696, "eval_steps_per_second": 2.137, "step": 10 }, { "epoch": 0.176, "grad_norm": 19.929168701171875, "learning_rate": 4.631147540983607e-05, "loss": 0.7718, "step": 11 }, { "epoch": 0.176, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4060913705583757, "eval_f1_m": 0.4290875224548694, "eval_loss": 0.6896015405654907, "eval_runtime": 14.8531, "eval_samples_per_second": 16.832, "eval_steps_per_second": 2.154, "step": 11 }, { "epoch": 0.192, "grad_norm": 12.27945613861084, "learning_rate": 4.59016393442623e-05, "loss": 0.4989, "step": 12 }, { "epoch": 0.192, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5, "eval_f1_m": 0.5085295656724227, "eval_loss": 0.6914140582084656, "eval_runtime": 14.9377, "eval_samples_per_second": 16.736, "eval_steps_per_second": 2.142, "step": 12 }, { "epoch": 0.208, "grad_norm": 14.775979995727539, "learning_rate": 4.549180327868853e-05, "loss": 0.8284, "step": 13 }, { "epoch": 0.208, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5, "eval_f1_m": 0.5017629196200623, "eval_loss": 0.6864062547683716, "eval_runtime": 14.8403, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 13 }, { "epoch": 0.224, "grad_norm": 7.6953935623168945, "learning_rate": 4.508196721311476e-05, "loss": 0.7036, "step": 14 }, { "epoch": 0.224, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.509433962264151, "eval_f1_m": 0.5210648761669169, "eval_loss": 0.6841718554496765, "eval_runtime": 14.9471, "eval_samples_per_second": 16.726, "eval_steps_per_second": 2.141, "step": 14 }, { "epoch": 0.24, "grad_norm": 9.705180168151855, "learning_rate": 4.467213114754098e-05, "loss": 0.6891, "step": 15 }, { "epoch": 0.24, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.47761194029850745, "eval_f1_m": 0.49961716288246905, "eval_loss": 0.680414080619812, "eval_runtime": 14.9451, "eval_samples_per_second": 16.728, "eval_steps_per_second": 2.141, "step": 15 }, { "epoch": 0.256, "grad_norm": 7.837550640106201, "learning_rate": 4.426229508196721e-05, "loss": 0.6342, "step": 16 }, { "epoch": 0.256, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.36477987421383645, "eval_f1_m": 0.4679963760395931, "eval_loss": 0.6784765720367432, "eval_runtime": 14.9458, "eval_samples_per_second": 16.727, "eval_steps_per_second": 2.141, "step": 16 }, { "epoch": 0.272, "grad_norm": 15.83466625213623, "learning_rate": 4.3852459016393444e-05, "loss": 0.6824, "step": 17 }, { "epoch": 0.272, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.25675675675675674, "eval_f1_m": 0.3939566102831409, "eval_loss": 0.6770390868186951, "eval_runtime": 14.847, "eval_samples_per_second": 16.838, "eval_steps_per_second": 2.155, "step": 17 }, { "epoch": 0.288, "grad_norm": 6.236104965209961, "learning_rate": 4.3442622950819674e-05, "loss": 0.6458, "step": 18 }, { "epoch": 0.288, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.21768707482993196, "eval_f1_m": 0.3807364137796312, "eval_loss": 0.6826757788658142, "eval_runtime": 14.9372, "eval_samples_per_second": 16.737, "eval_steps_per_second": 2.142, "step": 18 }, { "epoch": 0.304, "grad_norm": 16.222063064575195, "learning_rate": 4.3032786885245904e-05, "loss": 0.7531, "step": 19 }, { "epoch": 0.304, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.2, "eval_f1_m": 0.38457067475674933, "eval_loss": 0.6964296698570251, "eval_runtime": 14.8472, "eval_samples_per_second": 16.838, "eval_steps_per_second": 2.155, "step": 19 }, { "epoch": 0.32, "grad_norm": 6.506382942199707, "learning_rate": 4.262295081967213e-05, "loss": 0.6011, "step": 20 }, { "epoch": 0.32, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.19858156028368795, "eval_f1_m": 0.3815094502669533, "eval_loss": 0.7106679677963257, "eval_runtime": 14.9465, "eval_samples_per_second": 16.726, "eval_steps_per_second": 2.141, "step": 20 }, { "epoch": 0.336, "grad_norm": 13.0455961227417, "learning_rate": 4.2213114754098365e-05, "loss": 0.7092, "step": 21 }, { "epoch": 0.336, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.1971830985915493, "eval_f1_m": 0.3764074094506268, "eval_loss": 0.7177265882492065, "eval_runtime": 14.8434, "eval_samples_per_second": 16.842, "eval_steps_per_second": 2.156, "step": 21 }, { "epoch": 0.352, "grad_norm": 11.621403694152832, "learning_rate": 4.1803278688524595e-05, "loss": 0.7849, "step": 22 }, { "epoch": 0.352, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.1971830985915493, "eval_f1_m": 0.3764074094506268, "eval_loss": 0.7185742259025574, "eval_runtime": 14.8398, "eval_samples_per_second": 16.847, "eval_steps_per_second": 2.156, "step": 22 }, { "epoch": 0.368, "grad_norm": 10.119505882263184, "learning_rate": 4.1393442622950826e-05, "loss": 0.7273, "step": 23 }, { "epoch": 0.368, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.19858156028368795, "eval_f1_m": 0.3815094502669533, "eval_loss": 0.723312497138977, "eval_runtime": 14.944, "eval_samples_per_second": 16.729, "eval_steps_per_second": 2.141, "step": 23 }, { "epoch": 0.384, "grad_norm": 12.396435737609863, "learning_rate": 4.098360655737705e-05, "loss": 0.7308, "step": 24 }, { "epoch": 0.384, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.18705035971223022, "eval_f1_m": 0.37541939519730644, "eval_loss": 0.7317734360694885, "eval_runtime": 14.9534, "eval_samples_per_second": 16.719, "eval_steps_per_second": 2.14, "step": 24 }, { "epoch": 0.4, "grad_norm": 10.40471076965332, "learning_rate": 4.057377049180328e-05, "loss": 0.6329, "step": 25 }, { "epoch": 0.4, "eval_exact_match": 0.0, "eval_f1_a": 0.16176470588235292, "eval_f1_m": 0.3606963631273355, "eval_loss": 0.7368066310882568, "eval_runtime": 14.8468, "eval_samples_per_second": 16.839, "eval_steps_per_second": 2.155, "step": 25 }, { "epoch": 0.416, "grad_norm": 10.4788236618042, "learning_rate": 4.016393442622951e-05, "loss": 0.6854, "step": 26 }, { "epoch": 0.416, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.1925925925925926, "eval_f1_m": 0.3871894278857065, "eval_loss": 0.722824215888977, "eval_runtime": 14.8755, "eval_samples_per_second": 16.806, "eval_steps_per_second": 2.151, "step": 26 }, { "epoch": 0.432, "grad_norm": 11.982220649719238, "learning_rate": 3.975409836065574e-05, "loss": 0.6819, "step": 27 }, { "epoch": 0.432, "eval_exact_match": 0.0, "eval_f1_a": 0.15037593984962408, "eval_f1_m": 0.36286086529183775, "eval_loss": 0.7107167840003967, "eval_runtime": 14.8778, "eval_samples_per_second": 16.804, "eval_steps_per_second": 2.151, "step": 27 }, { "epoch": 0.448, "grad_norm": 15.360139846801758, "learning_rate": 3.934426229508197e-05, "loss": 0.6321, "step": 28 }, { "epoch": 0.448, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.05607476635514018, "eval_f1_m": 0.3736604398669225, "eval_loss": 0.6969433426856995, "eval_runtime": 14.8437, "eval_samples_per_second": 16.842, "eval_steps_per_second": 2.156, "step": 28 }, { "epoch": 0.464, "grad_norm": 10.889041900634766, "learning_rate": 3.89344262295082e-05, "loss": 0.6502, "step": 29 }, { "epoch": 0.464, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6961054801940918, "eval_runtime": 14.8492, "eval_samples_per_second": 16.836, "eval_steps_per_second": 2.155, "step": 29 }, { "epoch": 0.48, "grad_norm": 20.48206329345703, "learning_rate": 3.8524590163934424e-05, "loss": 0.6842, "step": 30 }, { "epoch": 0.48, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7016035318374634, "eval_runtime": 14.8427, "eval_samples_per_second": 16.843, "eval_steps_per_second": 2.156, "step": 30 }, { "epoch": 0.496, "grad_norm": 10.129874229431152, "learning_rate": 3.8114754098360655e-05, "loss": 0.6571, "step": 31 }, { "epoch": 0.496, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7069492340087891, "eval_runtime": 14.9592, "eval_samples_per_second": 16.712, "eval_steps_per_second": 2.139, "step": 31 }, { "epoch": 0.512, "grad_norm": 11.055068969726562, "learning_rate": 3.7704918032786885e-05, "loss": 0.8142, "step": 32 }, { "epoch": 0.512, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7069980502128601, "eval_runtime": 14.9338, "eval_samples_per_second": 16.741, "eval_steps_per_second": 2.143, "step": 32 }, { "epoch": 0.528, "grad_norm": 7.073890209197998, "learning_rate": 3.729508196721312e-05, "loss": 0.7139, "step": 33 }, { "epoch": 0.528, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6995410323143005, "eval_runtime": 14.938, "eval_samples_per_second": 16.736, "eval_steps_per_second": 2.142, "step": 33 }, { "epoch": 0.544, "grad_norm": 9.653722763061523, "learning_rate": 3.6885245901639346e-05, "loss": 0.739, "step": 34 }, { "epoch": 0.544, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7059023380279541, "eval_runtime": 14.9369, "eval_samples_per_second": 16.737, "eval_steps_per_second": 2.142, "step": 34 }, { "epoch": 0.56, "grad_norm": 14.42663288116455, "learning_rate": 3.6475409836065576e-05, "loss": 0.754, "step": 35 }, { "epoch": 0.56, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.711187481880188, "eval_runtime": 14.9372, "eval_samples_per_second": 16.737, "eval_steps_per_second": 2.142, "step": 35 }, { "epoch": 0.576, "grad_norm": 15.347558975219727, "learning_rate": 3.6065573770491806e-05, "loss": 0.6121, "step": 36 }, { "epoch": 0.576, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7238671779632568, "eval_runtime": 14.8763, "eval_samples_per_second": 16.805, "eval_steps_per_second": 2.151, "step": 36 }, { "epoch": 0.592, "grad_norm": 14.132453918457031, "learning_rate": 3.5655737704918037e-05, "loss": 0.6459, "step": 37 }, { "epoch": 0.592, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7489452958106995, "eval_runtime": 14.8277, "eval_samples_per_second": 16.86, "eval_steps_per_second": 2.158, "step": 37 }, { "epoch": 0.608, "grad_norm": 9.270454406738281, "learning_rate": 3.524590163934427e-05, "loss": 0.8344, "step": 38 }, { "epoch": 0.608, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7667334079742432, "eval_runtime": 14.8307, "eval_samples_per_second": 16.857, "eval_steps_per_second": 2.158, "step": 38 }, { "epoch": 0.624, "grad_norm": 19.537500381469727, "learning_rate": 3.483606557377049e-05, "loss": 0.8093, "step": 39 }, { "epoch": 0.624, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7680966854095459, "eval_runtime": 14.819, "eval_samples_per_second": 16.87, "eval_steps_per_second": 2.159, "step": 39 }, { "epoch": 0.64, "grad_norm": 7.517643451690674, "learning_rate": 3.442622950819672e-05, "loss": 0.479, "step": 40 }, { "epoch": 0.64, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7835771441459656, "eval_runtime": 14.8238, "eval_samples_per_second": 16.865, "eval_steps_per_second": 2.159, "step": 40 }, { "epoch": 0.656, "grad_norm": 17.782522201538086, "learning_rate": 3.401639344262295e-05, "loss": 0.7549, "step": 41 }, { "epoch": 0.656, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7776455283164978, "eval_runtime": 14.9327, "eval_samples_per_second": 16.742, "eval_steps_per_second": 2.143, "step": 41 }, { "epoch": 0.672, "grad_norm": 21.214466094970703, "learning_rate": 3.360655737704918e-05, "loss": 0.8666, "step": 42 }, { "epoch": 0.672, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7636923789978027, "eval_runtime": 14.9379, "eval_samples_per_second": 16.736, "eval_steps_per_second": 2.142, "step": 42 }, { "epoch": 0.688, "grad_norm": 11.387210845947266, "learning_rate": 3.319672131147541e-05, "loss": 0.9136, "step": 43 }, { "epoch": 0.688, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7419248223304749, "eval_runtime": 14.9343, "eval_samples_per_second": 16.74, "eval_steps_per_second": 2.143, "step": 43 }, { "epoch": 0.704, "grad_norm": 20.449684143066406, "learning_rate": 3.2786885245901635e-05, "loss": 0.8711, "step": 44 }, { "epoch": 0.704, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7206465005874634, "eval_runtime": 14.835, "eval_samples_per_second": 16.852, "eval_steps_per_second": 2.157, "step": 44 }, { "epoch": 0.72, "grad_norm": 11.209440231323242, "learning_rate": 3.237704918032787e-05, "loss": 0.6057, "step": 45 }, { "epoch": 0.72, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7194277048110962, "eval_runtime": 14.8379, "eval_samples_per_second": 16.849, "eval_steps_per_second": 2.157, "step": 45 }, { "epoch": 0.736, "grad_norm": 9.76227855682373, "learning_rate": 3.19672131147541e-05, "loss": 0.717, "step": 46 }, { "epoch": 0.736, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7115039229393005, "eval_runtime": 14.9392, "eval_samples_per_second": 16.734, "eval_steps_per_second": 2.142, "step": 46 }, { "epoch": 0.752, "grad_norm": 14.18212890625, "learning_rate": 3.155737704918033e-05, "loss": 0.9202, "step": 47 }, { "epoch": 0.752, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7000234127044678, "eval_runtime": 14.8408, "eval_samples_per_second": 16.845, "eval_steps_per_second": 2.156, "step": 47 }, { "epoch": 0.768, "grad_norm": 15.248029708862305, "learning_rate": 3.114754098360656e-05, "loss": 0.6564, "step": 48 }, { "epoch": 0.768, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7032949328422546, "eval_runtime": 14.8352, "eval_samples_per_second": 16.852, "eval_steps_per_second": 2.157, "step": 48 }, { "epoch": 0.784, "grad_norm": 10.2866849899292, "learning_rate": 3.073770491803279e-05, "loss": 0.6748, "step": 49 }, { "epoch": 0.784, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7020527124404907, "eval_runtime": 14.9486, "eval_samples_per_second": 16.724, "eval_steps_per_second": 2.141, "step": 49 }, { "epoch": 0.8, "grad_norm": 20.127962112426758, "learning_rate": 3.0327868852459017e-05, "loss": 0.6995, "step": 50 }, { "epoch": 0.8, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6938906311988831, "eval_runtime": 14.8235, "eval_samples_per_second": 16.865, "eval_steps_per_second": 2.159, "step": 50 }, { "epoch": 0.816, "grad_norm": 7.355772018432617, "learning_rate": 2.9918032786885248e-05, "loss": 0.661, "step": 51 }, { "epoch": 0.816, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6910097599029541, "eval_runtime": 14.9278, "eval_samples_per_second": 16.747, "eval_steps_per_second": 2.144, "step": 51 }, { "epoch": 0.832, "grad_norm": 7.9901123046875, "learning_rate": 2.9508196721311478e-05, "loss": 0.5164, "step": 52 }, { "epoch": 0.832, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6915390491485596, "eval_runtime": 14.821, "eval_samples_per_second": 16.868, "eval_steps_per_second": 2.159, "step": 52 }, { "epoch": 0.848, "grad_norm": 17.773469924926758, "learning_rate": 2.9098360655737705e-05, "loss": 0.5481, "step": 53 }, { "epoch": 0.848, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7027539014816284, "eval_runtime": 14.9548, "eval_samples_per_second": 16.717, "eval_steps_per_second": 2.14, "step": 53 }, { "epoch": 0.864, "grad_norm": 6.020360469818115, "learning_rate": 2.8688524590163935e-05, "loss": 0.637, "step": 54 }, { "epoch": 0.864, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7196269631385803, "eval_runtime": 14.8795, "eval_samples_per_second": 16.802, "eval_steps_per_second": 2.151, "step": 54 }, { "epoch": 0.88, "grad_norm": 7.110622882843018, "learning_rate": 2.8278688524590162e-05, "loss": 0.6921, "step": 55 }, { "epoch": 0.88, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7422675490379333, "eval_runtime": 14.929, "eval_samples_per_second": 16.746, "eval_steps_per_second": 2.143, "step": 55 }, { "epoch": 0.896, "grad_norm": 8.011894226074219, "learning_rate": 2.7868852459016392e-05, "loss": 0.5475, "step": 56 }, { "epoch": 0.896, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7658622860908508, "eval_runtime": 14.8404, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 56 }, { "epoch": 0.912, "grad_norm": 11.89902114868164, "learning_rate": 2.7459016393442626e-05, "loss": 0.7004, "step": 57 }, { "epoch": 0.912, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.795605480670929, "eval_runtime": 14.9551, "eval_samples_per_second": 16.717, "eval_steps_per_second": 2.14, "step": 57 }, { "epoch": 0.928, "grad_norm": 17.989233016967773, "learning_rate": 2.7049180327868856e-05, "loss": 0.7371, "step": 58 }, { "epoch": 0.928, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.8144462704658508, "eval_runtime": 14.9635, "eval_samples_per_second": 16.707, "eval_steps_per_second": 2.139, "step": 58 }, { "epoch": 0.944, "grad_norm": 20.869640350341797, "learning_rate": 2.6639344262295087e-05, "loss": 0.7959, "step": 59 }, { "epoch": 0.944, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.8224091529846191, "eval_runtime": 14.8308, "eval_samples_per_second": 16.857, "eval_steps_per_second": 2.158, "step": 59 }, { "epoch": 0.96, "grad_norm": 18.80642318725586, "learning_rate": 2.6229508196721314e-05, "loss": 0.9499, "step": 60 }, { "epoch": 0.96, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.8185468912124634, "eval_runtime": 14.9357, "eval_samples_per_second": 16.738, "eval_steps_per_second": 2.143, "step": 60 }, { "epoch": 0.976, "grad_norm": 13.714020729064941, "learning_rate": 2.5819672131147544e-05, "loss": 0.686, "step": 61 }, { "epoch": 0.976, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.804934561252594, "eval_runtime": 14.9362, "eval_samples_per_second": 16.738, "eval_steps_per_second": 2.142, "step": 61 }, { "epoch": 0.992, "grad_norm": 8.152863502502441, "learning_rate": 2.540983606557377e-05, "loss": 0.586, "step": 62 }, { "epoch": 0.992, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7904736399650574, "eval_runtime": 14.8214, "eval_samples_per_second": 16.867, "eval_steps_per_second": 2.159, "step": 62 }, { "epoch": 1.008, "grad_norm": 15.25986099243164, "learning_rate": 2.5e-05, "loss": 0.8011, "step": 63 }, { "epoch": 1.008, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7669472694396973, "eval_runtime": 14.9396, "eval_samples_per_second": 16.734, "eval_steps_per_second": 2.142, "step": 63 }, { "epoch": 1.024, "grad_norm": 9.822798728942871, "learning_rate": 2.459016393442623e-05, "loss": 0.671, "step": 64 }, { "epoch": 1.024, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7386738061904907, "eval_runtime": 14.9319, "eval_samples_per_second": 16.743, "eval_steps_per_second": 2.143, "step": 64 }, { "epoch": 1.04, "grad_norm": 16.625377655029297, "learning_rate": 2.418032786885246e-05, "loss": 0.804, "step": 65 }, { "epoch": 1.04, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7147324085235596, "eval_runtime": 14.8352, "eval_samples_per_second": 16.852, "eval_steps_per_second": 2.157, "step": 65 }, { "epoch": 1.056, "grad_norm": 18.697471618652344, "learning_rate": 2.377049180327869e-05, "loss": 0.7867, "step": 66 }, { "epoch": 1.056, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6943925619125366, "eval_runtime": 14.9325, "eval_samples_per_second": 16.742, "eval_steps_per_second": 2.143, "step": 66 }, { "epoch": 1.072, "grad_norm": 9.6091947555542, "learning_rate": 2.336065573770492e-05, "loss": 0.6648, "step": 67 }, { "epoch": 1.072, "eval_exact_match": 0.0, "eval_f1_a": 0.07407407407407407, "eval_f1_m": 0.3703287375556284, "eval_loss": 0.6778144240379333, "eval_runtime": 14.9373, "eval_samples_per_second": 16.737, "eval_steps_per_second": 2.142, "step": 67 }, { "epoch": 1.088, "grad_norm": 14.563115119934082, "learning_rate": 2.295081967213115e-05, "loss": 0.614, "step": 68 }, { "epoch": 1.088, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.2835820895522388, "eval_f1_m": 0.4365348370450412, "eval_loss": 0.671093761920929, "eval_runtime": 14.8252, "eval_samples_per_second": 16.863, "eval_steps_per_second": 2.158, "step": 68 }, { "epoch": 1.104, "grad_norm": 11.200250625610352, "learning_rate": 2.254098360655738e-05, "loss": 0.5507, "step": 69 }, { "epoch": 1.104, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.36601307189542487, "eval_f1_m": 0.4596024157248647, "eval_loss": 0.667160153388977, "eval_runtime": 14.8406, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 69 }, { "epoch": 1.12, "grad_norm": 8.560547828674316, "learning_rate": 2.2131147540983607e-05, "loss": 0.5892, "step": 70 }, { "epoch": 1.12, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4022988505747126, "eval_f1_m": 0.45429236296583236, "eval_loss": 0.6715586185455322, "eval_runtime": 14.9417, "eval_samples_per_second": 16.732, "eval_steps_per_second": 2.142, "step": 70 }, { "epoch": 1.1360000000000001, "grad_norm": 8.961625099182129, "learning_rate": 2.1721311475409837e-05, "loss": 0.7664, "step": 71 }, { "epoch": 1.1360000000000001, "eval_exact_match": 0.0, "eval_f1_a": 0.3742690058479532, "eval_f1_m": 0.43382439555908947, "eval_loss": 0.6713827848434448, "eval_runtime": 14.8405, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.156, "step": 71 }, { "epoch": 1.152, "grad_norm": 7.123610973358154, "learning_rate": 2.1311475409836064e-05, "loss": 0.7411, "step": 72 }, { "epoch": 1.152, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.3977272727272727, "eval_f1_m": 0.44959213122478425, "eval_loss": 0.6721289157867432, "eval_runtime": 14.9381, "eval_samples_per_second": 16.736, "eval_steps_per_second": 2.142, "step": 72 }, { "epoch": 1.168, "grad_norm": 16.729991912841797, "learning_rate": 2.0901639344262298e-05, "loss": 0.8682, "step": 73 }, { "epoch": 1.168, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.393063583815029, "eval_f1_m": 0.45827664399092977, "eval_loss": 0.6705155968666077, "eval_runtime": 14.9797, "eval_samples_per_second": 16.689, "eval_steps_per_second": 2.136, "step": 73 }, { "epoch": 1.184, "grad_norm": 6.746473789215088, "learning_rate": 2.0491803278688525e-05, "loss": 0.6803, "step": 74 }, { "epoch": 1.184, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3508771929824561, "eval_f1_m": 0.4224165856818919, "eval_loss": 0.6745976805686951, "eval_runtime": 14.9511, "eval_samples_per_second": 16.721, "eval_steps_per_second": 2.14, "step": 74 }, { "epoch": 1.2, "grad_norm": 13.287238121032715, "learning_rate": 2.0081967213114755e-05, "loss": 0.6991, "step": 75 }, { "epoch": 1.2, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.41111111111111115, "eval_f1_m": 0.4558379262460896, "eval_loss": 0.6722421646118164, "eval_runtime": 15.4359, "eval_samples_per_second": 16.196, "eval_steps_per_second": 2.073, "step": 75 }, { "epoch": 1.216, "grad_norm": 20.5496883392334, "learning_rate": 1.9672131147540985e-05, "loss": 0.7513, "step": 76 }, { "epoch": 1.216, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3953488372093023, "eval_f1_m": 0.45770680566598937, "eval_loss": 0.6721171736717224, "eval_runtime": 15.0052, "eval_samples_per_second": 16.661, "eval_steps_per_second": 2.133, "step": 76 }, { "epoch": 1.232, "grad_norm": 5.193902015686035, "learning_rate": 1.9262295081967212e-05, "loss": 0.6274, "step": 77 }, { "epoch": 1.232, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.3932584269662921, "eval_f1_m": 0.44960334450130357, "eval_loss": 0.6729999780654907, "eval_runtime": 14.876, "eval_samples_per_second": 16.806, "eval_steps_per_second": 2.151, "step": 77 }, { "epoch": 1.248, "grad_norm": 13.002022743225098, "learning_rate": 1.8852459016393442e-05, "loss": 0.6462, "step": 78 }, { "epoch": 1.248, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.384180790960452, "eval_f1_m": 0.44293518273110116, "eval_loss": 0.6707578301429749, "eval_runtime": 14.9856, "eval_samples_per_second": 16.683, "eval_steps_per_second": 2.135, "step": 78 }, { "epoch": 1.264, "grad_norm": 12.032450675964355, "learning_rate": 1.8442622950819673e-05, "loss": 0.5559, "step": 79 }, { "epoch": 1.264, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.40437158469945356, "eval_f1_m": 0.4476750007362253, "eval_loss": 0.674972653388977, "eval_runtime": 14.9758, "eval_samples_per_second": 16.694, "eval_steps_per_second": 2.137, "step": 79 }, { "epoch": 1.28, "grad_norm": 22.6571044921875, "learning_rate": 1.8032786885245903e-05, "loss": 0.8383, "step": 80 }, { "epoch": 1.28, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.37569060773480667, "eval_f1_m": 0.4270836872877689, "eval_loss": 0.6759648323059082, "eval_runtime": 14.8726, "eval_samples_per_second": 16.809, "eval_steps_per_second": 2.152, "step": 80 }, { "epoch": 1.296, "grad_norm": 5.78735876083374, "learning_rate": 1.7622950819672133e-05, "loss": 0.4898, "step": 81 }, { "epoch": 1.296, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.40437158469945356, "eval_f1_m": 0.45088199782077343, "eval_loss": 0.6767851710319519, "eval_runtime": 14.9835, "eval_samples_per_second": 16.685, "eval_steps_per_second": 2.136, "step": 81 }, { "epoch": 1.312, "grad_norm": 9.217117309570312, "learning_rate": 1.721311475409836e-05, "loss": 0.7316, "step": 82 }, { "epoch": 1.312, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4148936170212766, "eval_f1_m": 0.4551692072100236, "eval_loss": 0.6751718521118164, "eval_runtime": 14.7473, "eval_samples_per_second": 16.952, "eval_steps_per_second": 2.17, "step": 82 }, { "epoch": 1.328, "grad_norm": 20.60079574584961, "learning_rate": 1.680327868852459e-05, "loss": 0.7904, "step": 83 }, { "epoch": 1.328, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.40449438202247184, "eval_f1_m": 0.4541353770945608, "eval_loss": 0.6738359332084656, "eval_runtime": 14.9829, "eval_samples_per_second": 16.686, "eval_steps_per_second": 2.136, "step": 83 }, { "epoch": 1.3439999999999999, "grad_norm": 7.957705497741699, "learning_rate": 1.6393442622950818e-05, "loss": 0.7006, "step": 84 }, { "epoch": 1.3439999999999999, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.38636363636363635, "eval_f1_m": 0.4470978001590247, "eval_loss": 0.6728906035423279, "eval_runtime": 14.9863, "eval_samples_per_second": 16.682, "eval_steps_per_second": 2.135, "step": 84 }, { "epoch": 1.3599999999999999, "grad_norm": 12.297500610351562, "learning_rate": 1.598360655737705e-05, "loss": 0.7209, "step": 85 }, { "epoch": 1.3599999999999999, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4318181818181818, "eval_f1_m": 0.48761612537122734, "eval_loss": 0.6701562404632568, "eval_runtime": 14.8996, "eval_samples_per_second": 16.779, "eval_steps_per_second": 2.148, "step": 85 }, { "epoch": 1.376, "grad_norm": 11.166065216064453, "learning_rate": 1.557377049180328e-05, "loss": 0.6469, "step": 86 }, { "epoch": 1.376, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4022988505747126, "eval_f1_m": 0.46504402626851604, "eval_loss": 0.6728047132492065, "eval_runtime": 14.9978, "eval_samples_per_second": 16.669, "eval_steps_per_second": 2.134, "step": 86 }, { "epoch": 1.392, "grad_norm": 5.032862186431885, "learning_rate": 1.5163934426229509e-05, "loss": 0.7119, "step": 87 }, { "epoch": 1.392, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.3950617283950618, "eval_f1_m": 0.4722391554024206, "eval_loss": 0.6710312366485596, "eval_runtime": 14.8702, "eval_samples_per_second": 16.812, "eval_steps_per_second": 2.152, "step": 87 }, { "epoch": 1.408, "grad_norm": 23.827577590942383, "learning_rate": 1.4754098360655739e-05, "loss": 0.7244, "step": 88 }, { "epoch": 1.408, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.3766233766233766, "eval_f1_m": 0.476664258777104, "eval_loss": 0.6692422032356262, "eval_runtime": 14.873, "eval_samples_per_second": 16.809, "eval_steps_per_second": 2.152, "step": 88 }, { "epoch": 1.424, "grad_norm": 17.192716598510742, "learning_rate": 1.4344262295081968e-05, "loss": 0.7095, "step": 89 }, { "epoch": 1.424, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3088235294117647, "eval_f1_m": 0.439154213133805, "eval_loss": 0.6696093678474426, "eval_runtime": 14.9667, "eval_samples_per_second": 16.704, "eval_steps_per_second": 2.138, "step": 89 }, { "epoch": 1.44, "grad_norm": 11.38125228881836, "learning_rate": 1.3934426229508196e-05, "loss": 0.7006, "step": 90 }, { "epoch": 1.44, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.32352941176470584, "eval_f1_m": 0.44573533042920804, "eval_loss": 0.6697929501533508, "eval_runtime": 14.9754, "eval_samples_per_second": 16.694, "eval_steps_per_second": 2.137, "step": 90 }, { "epoch": 1.456, "grad_norm": 9.435693740844727, "learning_rate": 1.3524590163934428e-05, "loss": 0.6404, "step": 91 }, { "epoch": 1.456, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.2741935483870968, "eval_f1_m": 0.4377146662860949, "eval_loss": 0.6705585718154907, "eval_runtime": 14.9011, "eval_samples_per_second": 16.777, "eval_steps_per_second": 2.147, "step": 91 }, { "epoch": 1.472, "grad_norm": 10.128896713256836, "learning_rate": 1.3114754098360657e-05, "loss": 0.7173, "step": 92 }, { "epoch": 1.472, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.17391304347826086, "eval_f1_m": 0.40892491861879615, "eval_loss": 0.6743906140327454, "eval_runtime": 14.8853, "eval_samples_per_second": 16.795, "eval_steps_per_second": 2.15, "step": 92 }, { "epoch": 1.488, "grad_norm": 17.08680534362793, "learning_rate": 1.2704918032786885e-05, "loss": 0.704, "step": 93 }, { "epoch": 1.488, "eval_exact_match": 0.0, "eval_f1_a": 0.09174311926605504, "eval_f1_m": 0.380698866293104, "eval_loss": 0.6720312237739563, "eval_runtime": 14.9786, "eval_samples_per_second": 16.691, "eval_steps_per_second": 2.136, "step": 93 }, { "epoch": 1.504, "grad_norm": 7.702959060668945, "learning_rate": 1.2295081967213116e-05, "loss": 0.5519, "step": 94 }, { "epoch": 1.504, "eval_exact_match": 0.0, "eval_f1_a": 0.09345794392523366, "eval_f1_m": 0.38281036024133264, "eval_loss": 0.6760703325271606, "eval_runtime": 14.8739, "eval_samples_per_second": 16.808, "eval_steps_per_second": 2.151, "step": 94 }, { "epoch": 1.52, "grad_norm": 14.851384162902832, "learning_rate": 1.1885245901639344e-05, "loss": 0.5096, "step": 95 }, { "epoch": 1.52, "eval_exact_match": 0.0, "eval_f1_a": 0.07619047619047618, "eval_f1_m": 0.3760076391528973, "eval_loss": 0.6754101514816284, "eval_runtime": 14.7313, "eval_samples_per_second": 16.971, "eval_steps_per_second": 2.172, "step": 95 }, { "epoch": 1.536, "grad_norm": 12.864474296569824, "learning_rate": 1.1475409836065575e-05, "loss": 0.5991, "step": 96 }, { "epoch": 1.536, "eval_exact_match": 0.0, "eval_f1_a": 0.038834951456310676, "eval_f1_m": 0.36619522934048754, "eval_loss": 0.6819843649864197, "eval_runtime": 14.9845, "eval_samples_per_second": 16.684, "eval_steps_per_second": 2.136, "step": 96 }, { "epoch": 1.552, "grad_norm": 7.286766529083252, "learning_rate": 1.1065573770491803e-05, "loss": 0.5792, "step": 97 }, { "epoch": 1.552, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6806367039680481, "eval_runtime": 14.9973, "eval_samples_per_second": 16.67, "eval_steps_per_second": 2.134, "step": 97 }, { "epoch": 1.568, "grad_norm": 17.556123733520508, "learning_rate": 1.0655737704918032e-05, "loss": 0.6297, "step": 98 }, { "epoch": 1.568, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6815586090087891, "eval_runtime": 15.128, "eval_samples_per_second": 16.526, "eval_steps_per_second": 2.115, "step": 98 }, { "epoch": 1.584, "grad_norm": 9.33618450164795, "learning_rate": 1.0245901639344262e-05, "loss": 0.6484, "step": 99 }, { "epoch": 1.584, "eval_exact_match": 0.0, "eval_f1_a": 0.0196078431372549, "eval_f1_m": 0.35848547877359416, "eval_loss": 0.6851093769073486, "eval_runtime": 14.9937, "eval_samples_per_second": 16.674, "eval_steps_per_second": 2.134, "step": 99 }, { "epoch": 1.6, "grad_norm": 10.44613265991211, "learning_rate": 9.836065573770493e-06, "loss": 0.6437, "step": 100 }, { "epoch": 1.6, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6876327991485596, "eval_runtime": 14.9142, "eval_samples_per_second": 16.763, "eval_steps_per_second": 2.146, "step": 100 }, { "epoch": 1.616, "grad_norm": 9.857686042785645, "learning_rate": 9.426229508196721e-06, "loss": 0.595, "step": 101 }, { "epoch": 1.616, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6902187466621399, "eval_runtime": 14.9287, "eval_samples_per_second": 16.746, "eval_steps_per_second": 2.144, "step": 101 }, { "epoch": 1.6320000000000001, "grad_norm": 17.187076568603516, "learning_rate": 9.016393442622952e-06, "loss": 0.6899, "step": 102 }, { "epoch": 1.6320000000000001, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.692144513130188, "eval_runtime": 14.9838, "eval_samples_per_second": 16.685, "eval_steps_per_second": 2.136, "step": 102 }, { "epoch": 1.6480000000000001, "grad_norm": 11.167596817016602, "learning_rate": 8.60655737704918e-06, "loss": 0.6288, "step": 103 }, { "epoch": 1.6480000000000001, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6949140429496765, "eval_runtime": 14.9759, "eval_samples_per_second": 16.693, "eval_steps_per_second": 2.137, "step": 103 }, { "epoch": 1.6640000000000001, "grad_norm": 12.586868286132812, "learning_rate": 8.196721311475409e-06, "loss": 0.552, "step": 104 }, { "epoch": 1.6640000000000001, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.6984257698059082, "eval_runtime": 14.8729, "eval_samples_per_second": 16.809, "eval_steps_per_second": 2.152, "step": 104 }, { "epoch": 1.6800000000000002, "grad_norm": 9.765538215637207, "learning_rate": 7.78688524590164e-06, "loss": 0.6785, "step": 105 }, { "epoch": 1.6800000000000002, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.704703152179718, "eval_runtime": 14.9801, "eval_samples_per_second": 16.689, "eval_steps_per_second": 2.136, "step": 105 }, { "epoch": 1.696, "grad_norm": 6.867098808288574, "learning_rate": 7.3770491803278695e-06, "loss": 0.6315, "step": 106 }, { "epoch": 1.696, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7083241939544678, "eval_runtime": 14.727, "eval_samples_per_second": 16.976, "eval_steps_per_second": 2.173, "step": 106 }, { "epoch": 1.712, "grad_norm": 8.60350513458252, "learning_rate": 6.967213114754098e-06, "loss": 0.5195, "step": 107 }, { "epoch": 1.712, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7091211080551147, "eval_runtime": 14.874, "eval_samples_per_second": 16.808, "eval_steps_per_second": 2.151, "step": 107 }, { "epoch": 1.728, "grad_norm": 8.559430122375488, "learning_rate": 6.557377049180328e-06, "loss": 0.7978, "step": 108 }, { "epoch": 1.728, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7129042744636536, "eval_runtime": 14.9768, "eval_samples_per_second": 16.693, "eval_steps_per_second": 2.137, "step": 108 }, { "epoch": 1.744, "grad_norm": 5.806375026702881, "learning_rate": 6.147540983606558e-06, "loss": 0.5901, "step": 109 }, { "epoch": 1.744, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.715499997138977, "eval_runtime": 14.8739, "eval_samples_per_second": 16.808, "eval_steps_per_second": 2.151, "step": 109 }, { "epoch": 1.76, "grad_norm": 8.308993339538574, "learning_rate": 5.737704918032787e-06, "loss": 0.7361, "step": 110 }, { "epoch": 1.76, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7156738042831421, "eval_runtime": 14.9831, "eval_samples_per_second": 16.685, "eval_steps_per_second": 2.136, "step": 110 }, { "epoch": 1.776, "grad_norm": 5.917534351348877, "learning_rate": 5.327868852459016e-06, "loss": 0.6439, "step": 111 }, { "epoch": 1.776, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7138105630874634, "eval_runtime": 14.9884, "eval_samples_per_second": 16.68, "eval_steps_per_second": 2.135, "step": 111 }, { "epoch": 1.792, "grad_norm": 12.60551929473877, "learning_rate": 4.918032786885246e-06, "loss": 0.5871, "step": 112 }, { "epoch": 1.792, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.714160144329071, "eval_runtime": 14.9755, "eval_samples_per_second": 16.694, "eval_steps_per_second": 2.137, "step": 112 }, { "epoch": 1.808, "grad_norm": 5.75510311126709, "learning_rate": 4.508196721311476e-06, "loss": 0.5484, "step": 113 }, { "epoch": 1.808, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7168398499488831, "eval_runtime": 14.8968, "eval_samples_per_second": 16.782, "eval_steps_per_second": 2.148, "step": 113 }, { "epoch": 1.8239999999999998, "grad_norm": 17.395551681518555, "learning_rate": 4.098360655737704e-06, "loss": 0.7153, "step": 114 }, { "epoch": 1.8239999999999998, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.712472677230835, "eval_runtime": 14.9892, "eval_samples_per_second": 16.679, "eval_steps_per_second": 2.135, "step": 114 }, { "epoch": 1.8399999999999999, "grad_norm": 13.188994407653809, "learning_rate": 3.6885245901639347e-06, "loss": 0.6075, "step": 115 }, { "epoch": 1.8399999999999999, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7133281230926514, "eval_runtime": 14.9832, "eval_samples_per_second": 16.685, "eval_steps_per_second": 2.136, "step": 115 }, { "epoch": 1.8559999999999999, "grad_norm": 7.989605903625488, "learning_rate": 3.278688524590164e-06, "loss": 0.6436, "step": 116 }, { "epoch": 1.8559999999999999, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.712271511554718, "eval_runtime": 14.8835, "eval_samples_per_second": 16.797, "eval_steps_per_second": 2.15, "step": 116 }, { "epoch": 1.8719999999999999, "grad_norm": 10.555407524108887, "learning_rate": 2.8688524590163937e-06, "loss": 0.6506, "step": 117 }, { "epoch": 1.8719999999999999, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7101816534996033, "eval_runtime": 14.8733, "eval_samples_per_second": 16.809, "eval_steps_per_second": 2.152, "step": 117 }, { "epoch": 1.888, "grad_norm": 6.743683815002441, "learning_rate": 2.459016393442623e-06, "loss": 0.6718, "step": 118 }, { "epoch": 1.888, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7138828039169312, "eval_runtime": 14.9743, "eval_samples_per_second": 16.695, "eval_steps_per_second": 2.137, "step": 118 }, { "epoch": 1.904, "grad_norm": 5.256937026977539, "learning_rate": 2.049180327868852e-06, "loss": 0.6927, "step": 119 }, { "epoch": 1.904, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7110937237739563, "eval_runtime": 14.8825, "eval_samples_per_second": 16.798, "eval_steps_per_second": 2.15, "step": 119 }, { "epoch": 1.92, "grad_norm": 15.087100982666016, "learning_rate": 1.639344262295082e-06, "loss": 0.7999, "step": 120 }, { "epoch": 1.92, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7134785056114197, "eval_runtime": 15.0787, "eval_samples_per_second": 16.58, "eval_steps_per_second": 2.122, "step": 120 }, { "epoch": 1.936, "grad_norm": 10.094852447509766, "learning_rate": 1.2295081967213116e-06, "loss": 0.5977, "step": 121 }, { "epoch": 1.936, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7145839929580688, "eval_runtime": 14.8868, "eval_samples_per_second": 16.793, "eval_steps_per_second": 2.15, "step": 121 }, { "epoch": 1.952, "grad_norm": 11.906929016113281, "learning_rate": 8.19672131147541e-07, "loss": 0.6042, "step": 122 }, { "epoch": 1.952, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7139609456062317, "eval_runtime": 14.9736, "eval_samples_per_second": 16.696, "eval_steps_per_second": 2.137, "step": 122 }, { "epoch": 1.968, "grad_norm": 9.525917053222656, "learning_rate": 4.098360655737705e-07, "loss": 0.6814, "step": 123 }, { "epoch": 1.968, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7123749852180481, "eval_runtime": 14.9772, "eval_samples_per_second": 16.692, "eval_steps_per_second": 2.137, "step": 123 }, { "epoch": 1.984, "grad_norm": 11.106483459472656, "learning_rate": 0.0, "loss": 0.5725, "step": 124 }, { "epoch": 1.984, "eval_exact_match": 0.0, "eval_f1_a": 0.0, "eval_f1_m": 0.35077572820670067, "eval_loss": 0.7136836051940918, "eval_runtime": 15.0058, "eval_samples_per_second": 16.66, "eval_steps_per_second": 2.133, "step": 124 }, { "epoch": 1.984, "step": 124, "total_flos": 3.579246986605363e+16, "train_loss": 0.7063398053569179, "train_runtime": 2392.3346, "train_samples_per_second": 0.836, "train_steps_per_second": 0.052 } ], "logging_steps": 1, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3.579246986605363e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }