| { | |
| "best_metric": 0.24614077806472778, | |
| "best_model_checkpoint": "../outputs/deductor-flant5-large/checkpoint-500", | |
| "epoch": 3.8314176245210727, | |
| "eval_steps": 50, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.952107279693487e-05, | |
| "loss": 0.5028, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.904214559386973e-05, | |
| "loss": 0.306, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_gen_len": 11.22110552763819, | |
| "eval_loss": 0.2959373891353607, | |
| "eval_rouge1": 89.3028, | |
| "eval_rouge2": 82.5127, | |
| "eval_rougeL": 87.4173, | |
| "eval_rougeLsum": 87.3544, | |
| "eval_runtime": 58.3773, | |
| "eval_samples_per_second": 40.906, | |
| "eval_steps_per_second": 1.285, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.85632183908046e-05, | |
| "loss": 0.2749, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.8084291187739464e-05, | |
| "loss": 0.2774, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_gen_len": 11.257118927973199, | |
| "eval_loss": 0.2716549038887024, | |
| "eval_rouge1": 90.8414, | |
| "eval_rouge2": 84.2378, | |
| "eval_rougeL": 88.9385, | |
| "eval_rougeLsum": 88.9058, | |
| "eval_runtime": 58.4865, | |
| "eval_samples_per_second": 40.83, | |
| "eval_steps_per_second": 1.282, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.760536398467433e-05, | |
| "loss": 0.2676, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.7126436781609195e-05, | |
| "loss": 0.2366, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_gen_len": 11.208123953098827, | |
| "eval_loss": 0.2612508535385132, | |
| "eval_rouge1": 91.0152, | |
| "eval_rouge2": 84.6687, | |
| "eval_rougeL": 89.2107, | |
| "eval_rougeLsum": 89.1735, | |
| "eval_runtime": 58.169, | |
| "eval_samples_per_second": 41.053, | |
| "eval_steps_per_second": 1.289, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.6647509578544064e-05, | |
| "loss": 0.274, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.616858237547893e-05, | |
| "loss": 0.2166, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_gen_len": 11.280150753768844, | |
| "eval_loss": 0.25851312279701233, | |
| "eval_rouge1": 91.5215, | |
| "eval_rouge2": 85.4308, | |
| "eval_rougeL": 89.7742, | |
| "eval_rougeLsum": 89.7422, | |
| "eval_runtime": 58.2807, | |
| "eval_samples_per_second": 40.974, | |
| "eval_steps_per_second": 1.287, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.5689655172413794e-05, | |
| "loss": 0.2063, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.5210727969348656e-05, | |
| "loss": 0.22, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_gen_len": 11.265494137353434, | |
| "eval_loss": 0.25169575214385986, | |
| "eval_rouge1": 91.5587, | |
| "eval_rouge2": 85.6107, | |
| "eval_rougeL": 89.8835, | |
| "eval_rougeLsum": 89.8621, | |
| "eval_runtime": 58.1997, | |
| "eval_samples_per_second": 41.031, | |
| "eval_steps_per_second": 1.289, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 4.4731800766283525e-05, | |
| "loss": 0.196, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.4252873563218394e-05, | |
| "loss": 0.1564, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_gen_len": 11.20393634840871, | |
| "eval_loss": 0.26295191049575806, | |
| "eval_rouge1": 91.999, | |
| "eval_rouge2": 86.0835, | |
| "eval_rougeL": 90.3611, | |
| "eval_rougeLsum": 90.3168, | |
| "eval_runtime": 58.4367, | |
| "eval_samples_per_second": 40.865, | |
| "eval_steps_per_second": 1.283, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.3773946360153256e-05, | |
| "loss": 0.1545, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.3295019157088125e-05, | |
| "loss": 0.1803, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_gen_len": 11.246231155778894, | |
| "eval_loss": 0.2546021044254303, | |
| "eval_rouge1": 91.5183, | |
| "eval_rouge2": 85.6214, | |
| "eval_rougeL": 89.9752, | |
| "eval_rougeLsum": 89.9323, | |
| "eval_runtime": 58.1825, | |
| "eval_samples_per_second": 41.043, | |
| "eval_steps_per_second": 1.289, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.2816091954022994e-05, | |
| "loss": 0.1793, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.2337164750957856e-05, | |
| "loss": 0.1737, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_gen_len": 11.194304857621441, | |
| "eval_loss": 0.24834655225276947, | |
| "eval_rouge1": 91.8342, | |
| "eval_rouge2": 86.0171, | |
| "eval_rougeL": 90.3042, | |
| "eval_rougeLsum": 90.2641, | |
| "eval_runtime": 58.3123, | |
| "eval_samples_per_second": 40.952, | |
| "eval_steps_per_second": 1.286, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.185823754789272e-05, | |
| "loss": 0.166, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.1379310344827587e-05, | |
| "loss": 0.157, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_gen_len": 10.937604690117253, | |
| "eval_loss": 0.24926304817199707, | |
| "eval_rouge1": 91.6585, | |
| "eval_rouge2": 85.4651, | |
| "eval_rougeL": 90.0181, | |
| "eval_rougeLsum": 89.9991, | |
| "eval_runtime": 57.6625, | |
| "eval_samples_per_second": 41.413, | |
| "eval_steps_per_second": 1.301, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.0900383141762455e-05, | |
| "loss": 0.1612, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.0421455938697324e-05, | |
| "loss": 0.1561, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_gen_len": 11.20142378559464, | |
| "eval_loss": 0.24614077806472778, | |
| "eval_rouge1": 92.1213, | |
| "eval_rouge2": 86.4281, | |
| "eval_rougeL": 90.5846, | |
| "eval_rougeLsum": 90.5294, | |
| "eval_runtime": 58.4684, | |
| "eval_samples_per_second": 40.843, | |
| "eval_steps_per_second": 1.283, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.9942528735632186e-05, | |
| "loss": 0.1472, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.9463601532567055e-05, | |
| "loss": 0.1191, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_gen_len": 11.241624790619765, | |
| "eval_loss": 0.2584824860095978, | |
| "eval_rouge1": 92.4493, | |
| "eval_rouge2": 86.6961, | |
| "eval_rougeL": 90.9293, | |
| "eval_rougeLsum": 90.8761, | |
| "eval_runtime": 58.3545, | |
| "eval_samples_per_second": 40.922, | |
| "eval_steps_per_second": 1.285, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.898467432950192e-05, | |
| "loss": 0.1252, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.850574712643678e-05, | |
| "loss": 0.1134, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_gen_len": 11.16750418760469, | |
| "eval_loss": 0.2633197009563446, | |
| "eval_rouge1": 92.4707, | |
| "eval_rouge2": 86.833, | |
| "eval_rougeL": 90.9516, | |
| "eval_rougeLsum": 90.9195, | |
| "eval_runtime": 58.1972, | |
| "eval_samples_per_second": 41.033, | |
| "eval_steps_per_second": 1.289, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.802681992337165e-05, | |
| "loss": 0.1128, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.7547892720306517e-05, | |
| "loss": 0.1227, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_gen_len": 11.264237855946398, | |
| "eval_loss": 0.25923022627830505, | |
| "eval_rouge1": 92.2738, | |
| "eval_rouge2": 86.5064, | |
| "eval_rougeL": 90.7556, | |
| "eval_rougeLsum": 90.6998, | |
| "eval_runtime": 58.2266, | |
| "eval_samples_per_second": 41.012, | |
| "eval_steps_per_second": 1.288, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.7068965517241385e-05, | |
| "loss": 0.1232, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 3.659003831417625e-05, | |
| "loss": 0.1175, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_gen_len": 11.17001675041876, | |
| "eval_loss": 0.2656923830509186, | |
| "eval_rouge1": 92.0861, | |
| "eval_rouge2": 86.2203, | |
| "eval_rougeL": 90.6168, | |
| "eval_rougeLsum": 90.5657, | |
| "eval_runtime": 58.0131, | |
| "eval_samples_per_second": 41.163, | |
| "eval_steps_per_second": 1.293, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.1095, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.563218390804598e-05, | |
| "loss": 0.1132, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_gen_len": 11.212311557788945, | |
| "eval_loss": 0.26437509059906006, | |
| "eval_rouge1": 92.3834, | |
| "eval_rouge2": 86.7237, | |
| "eval_rougeL": 90.8761, | |
| "eval_rougeLsum": 90.8389, | |
| "eval_runtime": 58.1768, | |
| "eval_samples_per_second": 41.047, | |
| "eval_steps_per_second": 1.289, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.515325670498085e-05, | |
| "loss": 0.1124, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.467432950191571e-05, | |
| "loss": 0.1097, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "eval_gen_len": 11.1821608040201, | |
| "eval_loss": 0.2691878378391266, | |
| "eval_rouge1": 92.3356, | |
| "eval_rouge2": 86.7021, | |
| "eval_rougeL": 90.8717, | |
| "eval_rougeLsum": 90.8185, | |
| "eval_runtime": 58.2708, | |
| "eval_samples_per_second": 40.981, | |
| "eval_steps_per_second": 1.287, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 3.419540229885058e-05, | |
| "loss": 0.0874, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 3.371647509578545e-05, | |
| "loss": 0.0949, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_gen_len": 11.278475711892797, | |
| "eval_loss": 0.26897767186164856, | |
| "eval_rouge1": 92.5746, | |
| "eval_rouge2": 87.001, | |
| "eval_rougeL": 91.1734, | |
| "eval_rougeLsum": 91.1222, | |
| "eval_runtime": 58.645, | |
| "eval_samples_per_second": 40.72, | |
| "eval_steps_per_second": 1.279, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.323754789272031e-05, | |
| "loss": 0.0901, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 3.275862068965517e-05, | |
| "loss": 0.0813, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_gen_len": 11.22571189279732, | |
| "eval_loss": 0.2874927222728729, | |
| "eval_rouge1": 92.5641, | |
| "eval_rouge2": 86.9813, | |
| "eval_rougeL": 91.0881, | |
| "eval_rougeLsum": 91.0411, | |
| "eval_runtime": 58.3729, | |
| "eval_samples_per_second": 40.909, | |
| "eval_steps_per_second": 1.285, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 3.227969348659004e-05, | |
| "loss": 0.1005, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 3.180076628352491e-05, | |
| "loss": 0.0861, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_gen_len": 11.21356783919598, | |
| "eval_loss": 0.280032217502594, | |
| "eval_rouge1": 92.4738, | |
| "eval_rouge2": 86.9379, | |
| "eval_rougeL": 91.0384, | |
| "eval_rougeLsum": 90.9995, | |
| "eval_runtime": 58.3261, | |
| "eval_samples_per_second": 40.942, | |
| "eval_steps_per_second": 1.286, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.132183908045977e-05, | |
| "loss": 0.0828, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 3.084291187739464e-05, | |
| "loss": 0.0879, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "eval_gen_len": 11.23031825795645, | |
| "eval_loss": 0.27702075242996216, | |
| "eval_rouge1": 92.6025, | |
| "eval_rouge2": 87.105, | |
| "eval_rougeL": 91.1632, | |
| "eval_rougeLsum": 91.1292, | |
| "eval_runtime": 58.0228, | |
| "eval_samples_per_second": 41.156, | |
| "eval_steps_per_second": 1.293, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "step": 1000, | |
| "total_flos": 2.492186999051059e+16, | |
| "train_loss": 0.16499798774719238, | |
| "train_runtime": 3538.4305, | |
| "train_samples_per_second": 47.196, | |
| "train_steps_per_second": 0.738 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 2610, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 50, | |
| "total_flos": 2.492186999051059e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |