| { | |
| "best_metric": 1.4579006433486938, | |
| "best_model_checkpoint": "/home/alejandro.vaca/new_checkpoints_xlm_roberta/checkpoint-78800", | |
| "epoch": 0.22749530494715703, | |
| "global_step": 141600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.5191565642755143e-09, | |
| "loss": 2.6373, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.076626257102056e-07, | |
| "loss": 2.2331, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.9753497838974, | |
| "eval_runtime": 146.0905, | |
| "eval_samples_per_second": 136.901, | |
| "eval_steps_per_second": 2.143, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.2153252514204113e-06, | |
| "loss": 2.0391, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.9167841672897339, | |
| "eval_runtime": 146.7121, | |
| "eval_samples_per_second": 136.321, | |
| "eval_steps_per_second": 2.133, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.822987877130617e-06, | |
| "loss": 1.9963, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.8917230367660522, | |
| "eval_runtime": 144.3485, | |
| "eval_samples_per_second": 138.554, | |
| "eval_steps_per_second": 2.168, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.4306505028408226e-06, | |
| "loss": 1.9736, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.8841623067855835, | |
| "eval_runtime": 142.7005, | |
| "eval_samples_per_second": 140.154, | |
| "eval_steps_per_second": 2.193, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.0383131285510288e-06, | |
| "loss": 1.9579, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.8723009824752808, | |
| "eval_runtime": 144.148, | |
| "eval_samples_per_second": 138.746, | |
| "eval_steps_per_second": 2.171, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.645975754261234e-06, | |
| "loss": 1.9465, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.8523199558258057, | |
| "eval_runtime": 143.784, | |
| "eval_samples_per_second": 139.098, | |
| "eval_steps_per_second": 2.177, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.25363837997144e-06, | |
| "loss": 1.9352, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.843543529510498, | |
| "eval_runtime": 146.189, | |
| "eval_samples_per_second": 136.809, | |
| "eval_steps_per_second": 2.141, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.861301005681645e-06, | |
| "loss": 1.9244, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.8356839418411255, | |
| "eval_runtime": 143.4998, | |
| "eval_samples_per_second": 139.373, | |
| "eval_steps_per_second": 2.181, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.468963631391851e-06, | |
| "loss": 1.9143, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.8183759450912476, | |
| "eval_runtime": 144.8799, | |
| "eval_samples_per_second": 138.045, | |
| "eval_steps_per_second": 2.16, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.0766262571020576e-06, | |
| "loss": 1.9042, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.8169724941253662, | |
| "eval_runtime": 145.0112, | |
| "eval_samples_per_second": 137.92, | |
| "eval_steps_per_second": 2.158, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.684288882812263e-06, | |
| "loss": 1.8971, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.808371901512146, | |
| "eval_runtime": 147.4613, | |
| "eval_samples_per_second": 135.629, | |
| "eval_steps_per_second": 2.123, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.291951508522468e-06, | |
| "loss": 1.888, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.8037678003311157, | |
| "eval_runtime": 150.0493, | |
| "eval_samples_per_second": 133.29, | |
| "eval_steps_per_second": 2.086, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.899614134232675e-06, | |
| "loss": 1.8809, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.7955741882324219, | |
| "eval_runtime": 148.2714, | |
| "eval_samples_per_second": 134.888, | |
| "eval_steps_per_second": 2.111, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.50727675994288e-06, | |
| "loss": 1.8741, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.7888526916503906, | |
| "eval_runtime": 147.2993, | |
| "eval_samples_per_second": 135.778, | |
| "eval_steps_per_second": 2.125, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9.114939385653086e-06, | |
| "loss": 1.8685, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.785848617553711, | |
| "eval_runtime": 143.6092, | |
| "eval_samples_per_second": 139.267, | |
| "eval_steps_per_second": 2.18, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.72260201136329e-06, | |
| "loss": 1.8589, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.781029462814331, | |
| "eval_runtime": 147.7751, | |
| "eval_samples_per_second": 135.341, | |
| "eval_steps_per_second": 2.118, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.0330264637073497e-05, | |
| "loss": 1.8544, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.7769867181777954, | |
| "eval_runtime": 147.2523, | |
| "eval_samples_per_second": 135.821, | |
| "eval_steps_per_second": 2.126, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.0937927262783703e-05, | |
| "loss": 1.8481, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.7637484073638916, | |
| "eval_runtime": 143.1769, | |
| "eval_samples_per_second": 139.687, | |
| "eval_steps_per_second": 2.186, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.1545589888493909e-05, | |
| "loss": 1.8428, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.756960391998291, | |
| "eval_runtime": 145.8319, | |
| "eval_samples_per_second": 137.144, | |
| "eval_steps_per_second": 2.146, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.2153252514204115e-05, | |
| "loss": 1.8373, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.7565785646438599, | |
| "eval_runtime": 144.3741, | |
| "eval_samples_per_second": 138.529, | |
| "eval_steps_per_second": 2.168, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.276091513991432e-05, | |
| "loss": 1.8304, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.742794156074524, | |
| "eval_runtime": 146.6168, | |
| "eval_samples_per_second": 136.41, | |
| "eval_steps_per_second": 2.135, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.3368577765624526e-05, | |
| "loss": 1.8259, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.7337759733200073, | |
| "eval_runtime": 145.9226, | |
| "eval_samples_per_second": 137.059, | |
| "eval_steps_per_second": 2.145, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.3976240391334734e-05, | |
| "loss": 1.8219, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 1.7424650192260742, | |
| "eval_runtime": 145.6453, | |
| "eval_samples_per_second": 137.32, | |
| "eval_steps_per_second": 2.149, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.4583903017044936e-05, | |
| "loss": 1.8162, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 1.7316113710403442, | |
| "eval_runtime": 145.2248, | |
| "eval_samples_per_second": 137.718, | |
| "eval_steps_per_second": 2.155, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.5191565642755143e-05, | |
| "loss": 1.8112, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 1.7247357368469238, | |
| "eval_runtime": 146.3969, | |
| "eval_samples_per_second": 136.615, | |
| "eval_steps_per_second": 2.138, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.579922826846535e-05, | |
| "loss": 1.807, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.725953459739685, | |
| "eval_runtime": 144.2953, | |
| "eval_samples_per_second": 138.605, | |
| "eval_steps_per_second": 2.169, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.6406890894175555e-05, | |
| "loss": 1.8034, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.721238136291504, | |
| "eval_runtime": 145.7833, | |
| "eval_samples_per_second": 137.19, | |
| "eval_steps_per_second": 2.147, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.701455351988576e-05, | |
| "loss": 1.7984, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.7204127311706543, | |
| "eval_runtime": 147.5961, | |
| "eval_samples_per_second": 135.505, | |
| "eval_steps_per_second": 2.121, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.7622216145595964e-05, | |
| "loss": 1.7944, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.7186585664749146, | |
| "eval_runtime": 143.1913, | |
| "eval_samples_per_second": 139.673, | |
| "eval_steps_per_second": 2.186, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.8229878771306172e-05, | |
| "loss": 1.7915, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.7116312980651855, | |
| "eval_runtime": 149.8115, | |
| "eval_samples_per_second": 133.501, | |
| "eval_steps_per_second": 2.089, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.883754139701638e-05, | |
| "loss": 1.7864, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.705054521560669, | |
| "eval_runtime": 147.1783, | |
| "eval_samples_per_second": 135.89, | |
| "eval_steps_per_second": 2.127, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.944520402272658e-05, | |
| "loss": 1.7819, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.6974027156829834, | |
| "eval_runtime": 151.8787, | |
| "eval_samples_per_second": 131.684, | |
| "eval_steps_per_second": 2.061, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.005286664843679e-05, | |
| "loss": 1.7751, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.7015215158462524, | |
| "eval_runtime": 166.3617, | |
| "eval_samples_per_second": 120.22, | |
| "eval_steps_per_second": 1.881, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.0660529274146993e-05, | |
| "loss": 1.774, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.697357177734375, | |
| "eval_runtime": 210.2219, | |
| "eval_samples_per_second": 95.138, | |
| "eval_steps_per_second": 1.489, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.12681918998572e-05, | |
| "loss": 1.7685, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.7000294923782349, | |
| "eval_runtime": 388.8288, | |
| "eval_samples_per_second": 51.437, | |
| "eval_steps_per_second": 0.805, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.1875854525567406e-05, | |
| "loss": 1.7656, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.6891347169876099, | |
| "eval_runtime": 149.9008, | |
| "eval_samples_per_second": 133.422, | |
| "eval_steps_per_second": 2.088, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.248351715127761e-05, | |
| "loss": 1.7601, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.691114902496338, | |
| "eval_runtime": 148.0615, | |
| "eval_samples_per_second": 135.079, | |
| "eval_steps_per_second": 2.114, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.3091179776987818e-05, | |
| "loss": 1.7574, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.6803953647613525, | |
| "eval_runtime": 162.6955, | |
| "eval_samples_per_second": 122.929, | |
| "eval_steps_per_second": 1.924, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.3698842402698022e-05, | |
| "loss": 1.7528, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.6794207096099854, | |
| "eval_runtime": 152.5086, | |
| "eval_samples_per_second": 131.14, | |
| "eval_steps_per_second": 2.052, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.430650502840823e-05, | |
| "loss": 1.7494, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 1.6750398874282837, | |
| "eval_runtime": 150.7356, | |
| "eval_samples_per_second": 132.683, | |
| "eval_steps_per_second": 2.076, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.4914167654118435e-05, | |
| "loss": 1.7441, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 1.6635680198669434, | |
| "eval_runtime": 158.1954, | |
| "eval_samples_per_second": 126.426, | |
| "eval_steps_per_second": 1.979, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.552183027982864e-05, | |
| "loss": 1.7405, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 1.660568118095398, | |
| "eval_runtime": 144.9472, | |
| "eval_samples_per_second": 137.981, | |
| "eval_steps_per_second": 2.159, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.6129492905538844e-05, | |
| "loss": 1.7373, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.6654884815216064, | |
| "eval_runtime": 161.3267, | |
| "eval_samples_per_second": 123.972, | |
| "eval_steps_per_second": 1.94, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.673715553124905e-05, | |
| "loss": 1.7336, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.6575931310653687, | |
| "eval_runtime": 248.169, | |
| "eval_samples_per_second": 80.59, | |
| "eval_steps_per_second": 1.261, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.7344818156959256e-05, | |
| "loss": 1.7291, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.6604431867599487, | |
| "eval_runtime": 145.1056, | |
| "eval_samples_per_second": 137.831, | |
| "eval_steps_per_second": 2.157, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.7952480782669467e-05, | |
| "loss": 1.7243, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.6801910400390625, | |
| "eval_runtime": 153.7947, | |
| "eval_samples_per_second": 130.043, | |
| "eval_steps_per_second": 2.035, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.856014340837967e-05, | |
| "loss": 1.7214, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 1.6495254039764404, | |
| "eval_runtime": 148.9903, | |
| "eval_samples_per_second": 134.237, | |
| "eval_steps_per_second": 2.101, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.9167806034089873e-05, | |
| "loss": 1.7178, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 1.6446107625961304, | |
| "eval_runtime": 149.3174, | |
| "eval_samples_per_second": 133.943, | |
| "eval_steps_per_second": 2.096, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.977546865980008e-05, | |
| "loss": 1.7146, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 1.641605019569397, | |
| "eval_runtime": 154.456, | |
| "eval_samples_per_second": 129.487, | |
| "eval_steps_per_second": 2.026, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.0383131285510285e-05, | |
| "loss": 1.7118, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.6381052732467651, | |
| "eval_runtime": 151.3863, | |
| "eval_samples_per_second": 132.112, | |
| "eval_steps_per_second": 2.068, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.099079391122049e-05, | |
| "loss": 1.7083, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.6341092586517334, | |
| "eval_runtime": 238.9093, | |
| "eval_samples_per_second": 83.714, | |
| "eval_steps_per_second": 1.31, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.15984565369307e-05, | |
| "loss": 1.7062, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.6292831897735596, | |
| "eval_runtime": 152.4932, | |
| "eval_samples_per_second": 131.153, | |
| "eval_steps_per_second": 2.053, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.22061191626409e-05, | |
| "loss": 1.7054, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.6273330450057983, | |
| "eval_runtime": 151.097, | |
| "eval_samples_per_second": 132.365, | |
| "eval_steps_per_second": 2.072, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.281378178835111e-05, | |
| "loss": 1.7012, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 1.6267642974853516, | |
| "eval_runtime": 149.0125, | |
| "eval_samples_per_second": 134.217, | |
| "eval_steps_per_second": 2.1, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.3421444414061314e-05, | |
| "loss": 1.6993, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 1.6256201267242432, | |
| "eval_runtime": 149.4973, | |
| "eval_samples_per_second": 133.782, | |
| "eval_steps_per_second": 2.094, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.402910703977152e-05, | |
| "loss": 1.697, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 1.6158908605575562, | |
| "eval_runtime": 152.4757, | |
| "eval_samples_per_second": 131.168, | |
| "eval_steps_per_second": 2.053, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 3.463676966548173e-05, | |
| "loss": 1.6938, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 1.6134721040725708, | |
| "eval_runtime": 152.3857, | |
| "eval_samples_per_second": 131.246, | |
| "eval_steps_per_second": 2.054, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 3.524443229119193e-05, | |
| "loss": 1.6923, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 1.6194721460342407, | |
| "eval_runtime": 149.9763, | |
| "eval_samples_per_second": 133.354, | |
| "eval_steps_per_second": 2.087, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 3.585209491690214e-05, | |
| "loss": 1.6888, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 1.6149234771728516, | |
| "eval_runtime": 150.7266, | |
| "eval_samples_per_second": 132.691, | |
| "eval_steps_per_second": 2.077, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.6459757542612344e-05, | |
| "loss": 1.687, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.6148015260696411, | |
| "eval_runtime": 152.1295, | |
| "eval_samples_per_second": 131.467, | |
| "eval_steps_per_second": 2.057, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.706742016832255e-05, | |
| "loss": 1.6886, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.6169975996017456, | |
| "eval_runtime": 152.146, | |
| "eval_samples_per_second": 131.453, | |
| "eval_steps_per_second": 2.057, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.767508279403276e-05, | |
| "loss": 1.6865, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.6124180555343628, | |
| "eval_runtime": 174.6369, | |
| "eval_samples_per_second": 114.523, | |
| "eval_steps_per_second": 1.792, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.828274541974296e-05, | |
| "loss": 1.6829, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.6170154809951782, | |
| "eval_runtime": 262.8027, | |
| "eval_samples_per_second": 76.103, | |
| "eval_steps_per_second": 1.191, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.889040804545316e-05, | |
| "loss": 1.6813, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.6040676832199097, | |
| "eval_runtime": 255.431, | |
| "eval_samples_per_second": 78.299, | |
| "eval_steps_per_second": 1.225, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.949807067116337e-05, | |
| "loss": 1.6806, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.6070351600646973, | |
| "eval_runtime": 151.6507, | |
| "eval_samples_per_second": 131.882, | |
| "eval_steps_per_second": 2.064, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.010573329687358e-05, | |
| "loss": 1.6763, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.599661946296692, | |
| "eval_runtime": 150.2287, | |
| "eval_samples_per_second": 133.13, | |
| "eval_steps_per_second": 2.083, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.071339592258379e-05, | |
| "loss": 1.6733, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 1.6072720289230347, | |
| "eval_runtime": 152.0466, | |
| "eval_samples_per_second": 131.539, | |
| "eval_steps_per_second": 2.059, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.1321058548293986e-05, | |
| "loss": 1.6695, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 1.6115573644638062, | |
| "eval_runtime": 148.9069, | |
| "eval_samples_per_second": 134.312, | |
| "eval_steps_per_second": 2.102, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.192872117400419e-05, | |
| "loss": 1.6687, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 1.611473798751831, | |
| "eval_runtime": 158.4393, | |
| "eval_samples_per_second": 126.231, | |
| "eval_steps_per_second": 1.976, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.25363837997144e-05, | |
| "loss": 1.6673, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 1.606929898262024, | |
| "eval_runtime": 159.9938, | |
| "eval_samples_per_second": 125.005, | |
| "eval_steps_per_second": 1.956, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.3144046425424607e-05, | |
| "loss": 1.6655, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 1.5869165658950806, | |
| "eval_runtime": 154.5639, | |
| "eval_samples_per_second": 129.396, | |
| "eval_steps_per_second": 2.025, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.375170905113481e-05, | |
| "loss": 1.6622, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 1.6052591800689697, | |
| "eval_runtime": 368.5722, | |
| "eval_samples_per_second": 54.263, | |
| "eval_steps_per_second": 0.849, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.4359371676845016e-05, | |
| "loss": 1.6598, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 1.58935546875, | |
| "eval_runtime": 152.6646, | |
| "eval_samples_per_second": 131.006, | |
| "eval_steps_per_second": 2.05, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.496703430255522e-05, | |
| "loss": 1.659, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.5808852910995483, | |
| "eval_runtime": 155.4857, | |
| "eval_samples_per_second": 128.629, | |
| "eval_steps_per_second": 2.013, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.557469692826543e-05, | |
| "loss": 1.6583, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.588645100593567, | |
| "eval_runtime": 277.2998, | |
| "eval_samples_per_second": 72.124, | |
| "eval_steps_per_second": 1.129, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.6182359553975636e-05, | |
| "loss": 1.6555, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.5864471197128296, | |
| "eval_runtime": 161.6086, | |
| "eval_samples_per_second": 123.756, | |
| "eval_steps_per_second": 1.937, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.679002217968584e-05, | |
| "loss": 1.6559, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.583774209022522, | |
| "eval_runtime": 346.1205, | |
| "eval_samples_per_second": 57.783, | |
| "eval_steps_per_second": 0.904, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.7397684805396045e-05, | |
| "loss": 1.6522, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 1.5791034698486328, | |
| "eval_runtime": 326.2692, | |
| "eval_samples_per_second": 61.299, | |
| "eval_steps_per_second": 0.959, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.800534743110625e-05, | |
| "loss": 1.6499, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 1.5826290845870972, | |
| "eval_runtime": 175.7208, | |
| "eval_samples_per_second": 113.817, | |
| "eval_steps_per_second": 1.781, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.861301005681646e-05, | |
| "loss": 1.6506, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 1.5759295225143433, | |
| "eval_runtime": 317.0144, | |
| "eval_samples_per_second": 63.089, | |
| "eval_steps_per_second": 0.987, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.9220672682526665e-05, | |
| "loss": 1.6498, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 1.5828478336334229, | |
| "eval_runtime": 219.1147, | |
| "eval_samples_per_second": 91.276, | |
| "eval_steps_per_second": 1.428, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.982833530823687e-05, | |
| "loss": 1.6473, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 1.572839617729187, | |
| "eval_runtime": 188.3492, | |
| "eval_samples_per_second": 106.186, | |
| "eval_steps_per_second": 1.662, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.99405445046135e-05, | |
| "loss": 1.644, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 1.5747781991958618, | |
| "eval_runtime": 261.2843, | |
| "eval_samples_per_second": 76.545, | |
| "eval_steps_per_second": 1.198, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.9857679702681096e-05, | |
| "loss": 1.6419, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 1.569125771522522, | |
| "eval_runtime": 238.4252, | |
| "eval_samples_per_second": 83.884, | |
| "eval_steps_per_second": 1.313, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.977481490074868e-05, | |
| "loss": 1.6416, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 1.5649021863937378, | |
| "eval_runtime": 403.1613, | |
| "eval_samples_per_second": 49.608, | |
| "eval_steps_per_second": 0.776, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.969195009881628e-05, | |
| "loss": 1.6365, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 1.5665974617004395, | |
| "eval_runtime": 154.8734, | |
| "eval_samples_per_second": 129.138, | |
| "eval_steps_per_second": 2.021, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.9609085296883874e-05, | |
| "loss": 1.6348, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 1.5668097734451294, | |
| "eval_runtime": 193.4192, | |
| "eval_samples_per_second": 103.402, | |
| "eval_steps_per_second": 1.618, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.9526220494951466e-05, | |
| "loss": 1.6342, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.5644603967666626, | |
| "eval_runtime": 525.4863, | |
| "eval_samples_per_second": 38.06, | |
| "eval_steps_per_second": 0.596, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.944335569301905e-05, | |
| "loss": 1.6319, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.5583738088607788, | |
| "eval_runtime": 152.5499, | |
| "eval_samples_per_second": 131.105, | |
| "eval_steps_per_second": 2.052, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.936049089108665e-05, | |
| "loss": 1.6304, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.5624059438705444, | |
| "eval_runtime": 195.0553, | |
| "eval_samples_per_second": 102.535, | |
| "eval_steps_per_second": 1.605, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9277626089154245e-05, | |
| "loss": 1.6287, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.5545308589935303, | |
| "eval_runtime": 220.9752, | |
| "eval_samples_per_second": 90.508, | |
| "eval_steps_per_second": 1.416, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.919476128722184e-05, | |
| "loss": 1.6301, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.5592070817947388, | |
| "eval_runtime": 213.6052, | |
| "eval_samples_per_second": 93.631, | |
| "eval_steps_per_second": 1.465, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.911189648528943e-05, | |
| "loss": 1.6272, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.5615522861480713, | |
| "eval_runtime": 263.8607, | |
| "eval_samples_per_second": 75.798, | |
| "eval_steps_per_second": 1.186, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9029031683357016e-05, | |
| "loss": 1.6267, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.558023452758789, | |
| "eval_runtime": 1163.3408, | |
| "eval_samples_per_second": 17.192, | |
| "eval_steps_per_second": 0.269, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.8946166881424615e-05, | |
| "loss": 1.624, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 1.550244688987732, | |
| "eval_runtime": 156.4776, | |
| "eval_samples_per_second": 127.814, | |
| "eval_steps_per_second": 2.0, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.886330207949221e-05, | |
| "loss": 1.6238, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 1.5512545108795166, | |
| "eval_runtime": 178.078, | |
| "eval_samples_per_second": 112.31, | |
| "eval_steps_per_second": 1.758, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.87804372775598e-05, | |
| "loss": 1.623, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 1.5499157905578613, | |
| "eval_runtime": 411.8408, | |
| "eval_samples_per_second": 48.562, | |
| "eval_steps_per_second": 0.76, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.869757247562739e-05, | |
| "loss": 1.6214, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.554477572441101, | |
| "eval_runtime": 152.8841, | |
| "eval_samples_per_second": 130.818, | |
| "eval_steps_per_second": 2.047, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.8614707673694986e-05, | |
| "loss": 1.6173, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.5494047403335571, | |
| "eval_runtime": 154.2296, | |
| "eval_samples_per_second": 129.677, | |
| "eval_steps_per_second": 2.029, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.853184287176258e-05, | |
| "loss": 1.6159, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.5492123365402222, | |
| "eval_runtime": 358.728, | |
| "eval_samples_per_second": 55.753, | |
| "eval_steps_per_second": 0.873, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.844897806983017e-05, | |
| "loss": 1.6131, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.5435105562210083, | |
| "eval_runtime": 424.1722, | |
| "eval_samples_per_second": 47.151, | |
| "eval_steps_per_second": 0.738, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.8366113267897764e-05, | |
| "loss": 1.6125, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 1.5407049655914307, | |
| "eval_runtime": 233.4949, | |
| "eval_samples_per_second": 85.655, | |
| "eval_steps_per_second": 1.341, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.828324846596536e-05, | |
| "loss": 1.6129, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 1.5503147840499878, | |
| "eval_runtime": 154.5487, | |
| "eval_samples_per_second": 129.409, | |
| "eval_steps_per_second": 2.025, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.820038366403295e-05, | |
| "loss": 1.61, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 1.5319013595581055, | |
| "eval_runtime": 229.3937, | |
| "eval_samples_per_second": 87.186, | |
| "eval_steps_per_second": 1.364, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.811751886210054e-05, | |
| "loss": 1.6083, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 1.540002465248108, | |
| "eval_runtime": 200.5559, | |
| "eval_samples_per_second": 99.723, | |
| "eval_steps_per_second": 1.561, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.8034654060168135e-05, | |
| "loss": 1.6049, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 1.5374138355255127, | |
| "eval_runtime": 353.7763, | |
| "eval_samples_per_second": 56.533, | |
| "eval_steps_per_second": 0.885, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.795178925823573e-05, | |
| "loss": 1.6048, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 1.5372508764266968, | |
| "eval_runtime": 306.8656, | |
| "eval_samples_per_second": 65.175, | |
| "eval_steps_per_second": 1.02, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.786892445630332e-05, | |
| "loss": 1.6036, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.538548469543457, | |
| "eval_runtime": 875.5249, | |
| "eval_samples_per_second": 22.843, | |
| "eval_steps_per_second": 0.357, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.778605965437091e-05, | |
| "loss": 1.6025, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.5447686910629272, | |
| "eval_runtime": 216.6802, | |
| "eval_samples_per_second": 92.302, | |
| "eval_steps_per_second": 1.445, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.7703194852438506e-05, | |
| "loss": 1.5987, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.534464716911316, | |
| "eval_runtime": 527.5707, | |
| "eval_samples_per_second": 37.91, | |
| "eval_steps_per_second": 0.593, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.76203300505061e-05, | |
| "loss": 1.5995, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.537174105644226, | |
| "eval_runtime": 157.1885, | |
| "eval_samples_per_second": 127.236, | |
| "eval_steps_per_second": 1.991, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.753746524857369e-05, | |
| "loss": 1.5995, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 1.5312557220458984, | |
| "eval_runtime": 590.8847, | |
| "eval_samples_per_second": 33.848, | |
| "eval_steps_per_second": 0.53, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.7454600446641284e-05, | |
| "loss": 1.6002, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 1.5247910022735596, | |
| "eval_runtime": 197.0178, | |
| "eval_samples_per_second": 101.514, | |
| "eval_steps_per_second": 1.589, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.737173564470888e-05, | |
| "loss": 1.5985, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 1.5312753915786743, | |
| "eval_runtime": 217.2767, | |
| "eval_samples_per_second": 92.049, | |
| "eval_steps_per_second": 1.441, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.728887084277647e-05, | |
| "loss": 1.5975, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 1.5283282995224, | |
| "eval_runtime": 247.4783, | |
| "eval_samples_per_second": 80.815, | |
| "eval_steps_per_second": 1.265, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.720600604084406e-05, | |
| "loss": 1.5942, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 1.5262142419815063, | |
| "eval_runtime": 943.4579, | |
| "eval_samples_per_second": 21.199, | |
| "eval_steps_per_second": 0.332, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.7123141238911655e-05, | |
| "loss": 1.5946, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 1.5237544775009155, | |
| "eval_runtime": 157.7499, | |
| "eval_samples_per_second": 126.783, | |
| "eval_steps_per_second": 1.984, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.704027643697925e-05, | |
| "loss": 1.592, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 1.5289279222488403, | |
| "eval_runtime": 380.6257, | |
| "eval_samples_per_second": 52.545, | |
| "eval_steps_per_second": 0.822, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.695741163504685e-05, | |
| "loss": 1.5924, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 1.523956298828125, | |
| "eval_runtime": 154.9689, | |
| "eval_samples_per_second": 129.058, | |
| "eval_steps_per_second": 2.02, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.687454683311443e-05, | |
| "loss": 1.5901, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 1.5227019786834717, | |
| "eval_runtime": 728.6822, | |
| "eval_samples_per_second": 27.447, | |
| "eval_steps_per_second": 0.43, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.6791682031182026e-05, | |
| "loss": 1.589, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 1.5262556076049805, | |
| "eval_runtime": 156.5557, | |
| "eval_samples_per_second": 127.75, | |
| "eval_steps_per_second": 1.999, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.670881722924962e-05, | |
| "loss": 1.5875, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.5185788869857788, | |
| "eval_runtime": 355.4244, | |
| "eval_samples_per_second": 56.271, | |
| "eval_steps_per_second": 0.881, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.662595242731722e-05, | |
| "loss": 1.5867, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.51908278465271, | |
| "eval_runtime": 729.5519, | |
| "eval_samples_per_second": 27.414, | |
| "eval_steps_per_second": 0.429, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.6543087625384804e-05, | |
| "loss": 1.5849, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.5164732933044434, | |
| "eval_runtime": 852.7453, | |
| "eval_samples_per_second": 23.454, | |
| "eval_steps_per_second": 0.367, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.64602228234524e-05, | |
| "loss": 1.5828, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.5202162265777588, | |
| "eval_runtime": 157.0148, | |
| "eval_samples_per_second": 127.377, | |
| "eval_steps_per_second": 1.993, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.637735802151999e-05, | |
| "loss": 1.5816, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.5152881145477295, | |
| "eval_runtime": 211.7581, | |
| "eval_samples_per_second": 94.447, | |
| "eval_steps_per_second": 1.478, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.629449321958758e-05, | |
| "loss": 1.5809, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.5141160488128662, | |
| "eval_runtime": 164.3824, | |
| "eval_samples_per_second": 121.668, | |
| "eval_steps_per_second": 1.904, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.621162841765518e-05, | |
| "loss": 1.5771, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.5138821601867676, | |
| "eval_runtime": 462.6007, | |
| "eval_samples_per_second": 43.234, | |
| "eval_steps_per_second": 0.677, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.612876361572277e-05, | |
| "loss": 1.5775, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 1.509470820426941, | |
| "eval_runtime": 775.3154, | |
| "eval_samples_per_second": 25.796, | |
| "eval_steps_per_second": 0.404, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.604589881379036e-05, | |
| "loss": 1.5767, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 1.5092774629592896, | |
| "eval_runtime": 186.3503, | |
| "eval_samples_per_second": 107.325, | |
| "eval_steps_per_second": 1.68, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.596303401185795e-05, | |
| "loss": 1.5757, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 1.5057079792022705, | |
| "eval_runtime": 159.2417, | |
| "eval_samples_per_second": 125.595, | |
| "eval_steps_per_second": 1.966, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.588016920992555e-05, | |
| "loss": 1.5752, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.5144433975219727, | |
| "eval_runtime": 159.6541, | |
| "eval_samples_per_second": 125.271, | |
| "eval_steps_per_second": 1.96, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.579730440799314e-05, | |
| "loss": 1.5752, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.506042242050171, | |
| "eval_runtime": 406.841, | |
| "eval_samples_per_second": 49.159, | |
| "eval_steps_per_second": 0.769, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.571443960606073e-05, | |
| "loss": 1.5759, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.511734962463379, | |
| "eval_runtime": 956.1026, | |
| "eval_samples_per_second": 20.918, | |
| "eval_steps_per_second": 0.327, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.5631574804128324e-05, | |
| "loss": 1.5749, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.5020769834518433, | |
| "eval_runtime": 261.4557, | |
| "eval_samples_per_second": 76.495, | |
| "eval_steps_per_second": 1.197, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.554871000219592e-05, | |
| "loss": 1.5732, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.536434531211853, | |
| "eval_runtime": 200.7371, | |
| "eval_samples_per_second": 99.633, | |
| "eval_steps_per_second": 1.559, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.5465845200263516e-05, | |
| "loss": 1.5728, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.5178890228271484, | |
| "eval_runtime": 188.6203, | |
| "eval_samples_per_second": 106.033, | |
| "eval_steps_per_second": 1.659, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.53829803983311e-05, | |
| "loss": 1.5742, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.503977656364441, | |
| "eval_runtime": 232.3531, | |
| "eval_samples_per_second": 86.076, | |
| "eval_steps_per_second": 1.347, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.5300115596398695e-05, | |
| "loss": 1.5701, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.5044046640396118, | |
| "eval_runtime": 248.7056, | |
| "eval_samples_per_second": 80.416, | |
| "eval_steps_per_second": 1.259, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.5217250794466294e-05, | |
| "loss": 1.569, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.5002530813217163, | |
| "eval_runtime": 155.8533, | |
| "eval_samples_per_second": 128.326, | |
| "eval_steps_per_second": 2.008, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.513438599253389e-05, | |
| "loss": 1.5671, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.5035356283187866, | |
| "eval_runtime": 591.7888, | |
| "eval_samples_per_second": 33.796, | |
| "eval_steps_per_second": 0.529, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.505152119060147e-05, | |
| "loss": 1.5663, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.5083376169204712, | |
| "eval_runtime": 235.8323, | |
| "eval_samples_per_second": 84.806, | |
| "eval_steps_per_second": 1.327, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.4968656388669065e-05, | |
| "loss": 1.566, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.500092625617981, | |
| "eval_runtime": 122.1502, | |
| "eval_samples_per_second": 163.733, | |
| "eval_steps_per_second": 2.562, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.4885791586736665e-05, | |
| "loss": 1.5667, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4975863695144653, | |
| "eval_runtime": 122.6954, | |
| "eval_samples_per_second": 163.005, | |
| "eval_steps_per_second": 2.551, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.480292678480426e-05, | |
| "loss": 1.5657, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4931672811508179, | |
| "eval_runtime": 123.0954, | |
| "eval_samples_per_second": 162.476, | |
| "eval_steps_per_second": 2.543, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.472006198287185e-05, | |
| "loss": 1.5642, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4972225427627563, | |
| "eval_runtime": 123.364, | |
| "eval_samples_per_second": 162.122, | |
| "eval_steps_per_second": 2.537, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.4637197180939436e-05, | |
| "loss": 1.5622, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.49701988697052, | |
| "eval_runtime": 123.4986, | |
| "eval_samples_per_second": 161.945, | |
| "eval_steps_per_second": 2.534, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.4554332379007036e-05, | |
| "loss": 1.5607, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4874858856201172, | |
| "eval_runtime": 123.9331, | |
| "eval_samples_per_second": 161.377, | |
| "eval_steps_per_second": 2.526, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.447146757707463e-05, | |
| "loss": 1.5607, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4898470640182495, | |
| "eval_runtime": 120.8464, | |
| "eval_samples_per_second": 165.499, | |
| "eval_steps_per_second": 2.59, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.438860277514222e-05, | |
| "loss": 1.5586, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.494850754737854, | |
| "eval_runtime": 124.1267, | |
| "eval_samples_per_second": 161.126, | |
| "eval_steps_per_second": 2.522, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.430573797320981e-05, | |
| "loss": 1.5582, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.4933040142059326, | |
| "eval_runtime": 122.7367, | |
| "eval_samples_per_second": 162.95, | |
| "eval_steps_per_second": 2.55, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.4222873171277407e-05, | |
| "loss": 1.5579, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.4987492561340332, | |
| "eval_runtime": 123.7785, | |
| "eval_samples_per_second": 161.579, | |
| "eval_steps_per_second": 2.529, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.4140008369345e-05, | |
| "loss": 1.5577, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.489683747291565, | |
| "eval_runtime": 121.2724, | |
| "eval_samples_per_second": 164.918, | |
| "eval_steps_per_second": 2.581, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.405714356741259e-05, | |
| "loss": 1.5574, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.4959229230880737, | |
| "eval_runtime": 121.5786, | |
| "eval_samples_per_second": 164.503, | |
| "eval_steps_per_second": 2.574, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.397427876548018e-05, | |
| "loss": 1.5551, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.496133804321289, | |
| "eval_runtime": 121.3994, | |
| "eval_samples_per_second": 164.746, | |
| "eval_steps_per_second": 2.578, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.389141396354778e-05, | |
| "loss": 1.5549, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.4901236295700073, | |
| "eval_runtime": 122.3573, | |
| "eval_samples_per_second": 163.456, | |
| "eval_steps_per_second": 2.558, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.380854916161537e-05, | |
| "loss": 1.5535, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.4875001907348633, | |
| "eval_runtime": 105.5644, | |
| "eval_samples_per_second": 189.458, | |
| "eval_steps_per_second": 2.965, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.372568435968296e-05, | |
| "loss": 1.5542, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4935693740844727, | |
| "eval_runtime": 112.1109, | |
| "eval_samples_per_second": 178.395, | |
| "eval_steps_per_second": 2.792, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.3642819557750556e-05, | |
| "loss": 1.5512, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4915146827697754, | |
| "eval_runtime": 109.7302, | |
| "eval_samples_per_second": 182.265, | |
| "eval_steps_per_second": 2.852, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.355995475581814e-05, | |
| "loss": 1.5515, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4876190423965454, | |
| "eval_runtime": 111.5094, | |
| "eval_samples_per_second": 179.357, | |
| "eval_steps_per_second": 2.807, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.347708995388574e-05, | |
| "loss": 1.549, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4836992025375366, | |
| "eval_runtime": 112.3893, | |
| "eval_samples_per_second": 177.953, | |
| "eval_steps_per_second": 2.785, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.3394225151953334e-05, | |
| "loss": 1.5479, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4897727966308594, | |
| "eval_runtime": 111.5923, | |
| "eval_samples_per_second": 179.224, | |
| "eval_steps_per_second": 2.805, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.3311360350020926e-05, | |
| "loss": 1.5492, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.484372615814209, | |
| "eval_runtime": 111.9819, | |
| "eval_samples_per_second": 178.6, | |
| "eval_steps_per_second": 2.795, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.322849554808851e-05, | |
| "loss": 1.5468, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4826014041900635, | |
| "eval_runtime": 112.199, | |
| "eval_samples_per_second": 178.255, | |
| "eval_steps_per_second": 2.79, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.314563074615611e-05, | |
| "loss": 1.5476, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.4857112169265747, | |
| "eval_runtime": 112.9874, | |
| "eval_samples_per_second": 177.011, | |
| "eval_steps_per_second": 2.77, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.3062765944223705e-05, | |
| "loss": 1.5473, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.487414836883545, | |
| "eval_runtime": 114.6833, | |
| "eval_samples_per_second": 174.393, | |
| "eval_steps_per_second": 2.729, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.29799011422913e-05, | |
| "loss": 1.5487, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.4894484281539917, | |
| "eval_runtime": 109.3915, | |
| "eval_samples_per_second": 182.83, | |
| "eval_steps_per_second": 2.861, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.289703634035889e-05, | |
| "loss": 1.5476, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.4839718341827393, | |
| "eval_runtime": 114.1702, | |
| "eval_samples_per_second": 175.177, | |
| "eval_steps_per_second": 2.742, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.281417153842648e-05, | |
| "loss": 1.5459, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.4786709547042847, | |
| "eval_runtime": 112.7446, | |
| "eval_samples_per_second": 177.392, | |
| "eval_steps_per_second": 2.776, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.2731306736494075e-05, | |
| "loss": 1.5431, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.48154616355896, | |
| "eval_runtime": 113.692, | |
| "eval_samples_per_second": 175.914, | |
| "eval_steps_per_second": 2.753, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.264844193456167e-05, | |
| "loss": 1.544, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 1.4801952838897705, | |
| "eval_runtime": 111.6945, | |
| "eval_samples_per_second": 179.06, | |
| "eval_steps_per_second": 2.802, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.256557713262926e-05, | |
| "loss": 1.5436, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.478300929069519, | |
| "eval_runtime": 114.4135, | |
| "eval_samples_per_second": 174.805, | |
| "eval_steps_per_second": 2.736, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.2482712330696853e-05, | |
| "loss": 1.5411, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.484221339225769, | |
| "eval_runtime": 114.3599, | |
| "eval_samples_per_second": 174.886, | |
| "eval_steps_per_second": 2.737, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.2399847528764446e-05, | |
| "loss": 1.5446, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 1.4805113077163696, | |
| "eval_runtime": 115.5225, | |
| "eval_samples_per_second": 173.126, | |
| "eval_steps_per_second": 2.709, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.231698272683204e-05, | |
| "loss": 1.5441, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.4875138998031616, | |
| "eval_runtime": 114.7419, | |
| "eval_samples_per_second": 174.304, | |
| "eval_steps_per_second": 2.728, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.223411792489963e-05, | |
| "loss": 1.5446, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.4801757335662842, | |
| "eval_runtime": 119.231, | |
| "eval_samples_per_second": 167.742, | |
| "eval_steps_per_second": 2.625, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.2151253122967224e-05, | |
| "loss": 1.5443, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.4772462844848633, | |
| "eval_runtime": 115.591, | |
| "eval_samples_per_second": 173.024, | |
| "eval_steps_per_second": 2.708, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.206838832103482e-05, | |
| "loss": 1.5411, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.4795691967010498, | |
| "eval_runtime": 118.6003, | |
| "eval_samples_per_second": 168.634, | |
| "eval_steps_per_second": 2.639, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.198552351910241e-05, | |
| "loss": 1.5413, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 1.4804329872131348, | |
| "eval_runtime": 119.1285, | |
| "eval_samples_per_second": 167.886, | |
| "eval_steps_per_second": 2.627, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.190265871717e-05, | |
| "loss": 1.5415, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 1.4793719053268433, | |
| "eval_runtime": 117.4287, | |
| "eval_samples_per_second": 170.316, | |
| "eval_steps_per_second": 2.665, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.1819793915237595e-05, | |
| "loss": 1.5414, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 1.4818830490112305, | |
| "eval_runtime": 119.5626, | |
| "eval_samples_per_second": 167.276, | |
| "eval_steps_per_second": 2.618, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.173692911330519e-05, | |
| "loss": 1.5423, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.4779819250106812, | |
| "eval_runtime": 118.4406, | |
| "eval_samples_per_second": 168.861, | |
| "eval_steps_per_second": 2.643, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.165406431137278e-05, | |
| "loss": 1.5386, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.4759750366210938, | |
| "eval_runtime": 115.7547, | |
| "eval_samples_per_second": 172.779, | |
| "eval_steps_per_second": 2.704, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.157119950944037e-05, | |
| "loss": 1.5386, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.4726980924606323, | |
| "eval_runtime": 119.2625, | |
| "eval_samples_per_second": 167.697, | |
| "eval_steps_per_second": 2.624, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.148833470750797e-05, | |
| "loss": 1.5375, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 1.4739803075790405, | |
| "eval_runtime": 118.9815, | |
| "eval_samples_per_second": 168.093, | |
| "eval_steps_per_second": 2.631, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.140546990557556e-05, | |
| "loss": 1.5376, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.4678592681884766, | |
| "eval_runtime": 117.6181, | |
| "eval_samples_per_second": 170.042, | |
| "eval_steps_per_second": 2.661, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.132260510364315e-05, | |
| "loss": 1.5365, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.4694132804870605, | |
| "eval_runtime": 118.0975, | |
| "eval_samples_per_second": 169.352, | |
| "eval_steps_per_second": 2.65, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.1239740301710744e-05, | |
| "loss": 1.5356, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 1.4689810276031494, | |
| "eval_runtime": 119.48, | |
| "eval_samples_per_second": 167.392, | |
| "eval_steps_per_second": 2.62, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.1156875499778344e-05, | |
| "loss": 1.5353, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.4731059074401855, | |
| "eval_runtime": 117.5581, | |
| "eval_samples_per_second": 170.129, | |
| "eval_steps_per_second": 2.663, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.107401069784593e-05, | |
| "loss": 1.5348, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.466073751449585, | |
| "eval_runtime": 118.6436, | |
| "eval_samples_per_second": 168.572, | |
| "eval_steps_per_second": 2.638, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.099114589591352e-05, | |
| "loss": 1.5336, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.4694697856903076, | |
| "eval_runtime": 117.8705, | |
| "eval_samples_per_second": 169.678, | |
| "eval_steps_per_second": 2.655, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.0908281093981115e-05, | |
| "loss": 1.5331, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.470395803451538, | |
| "eval_runtime": 119.1567, | |
| "eval_samples_per_second": 167.846, | |
| "eval_steps_per_second": 2.627, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.0825416292048714e-05, | |
| "loss": 1.5336, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.4707101583480835, | |
| "eval_runtime": 217.6239, | |
| "eval_samples_per_second": 91.902, | |
| "eval_steps_per_second": 1.438, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.074255149011631e-05, | |
| "loss": 1.5303, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 1.4677211046218872, | |
| "eval_runtime": 111.0323, | |
| "eval_samples_per_second": 180.128, | |
| "eval_steps_per_second": 2.819, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.065968668818389e-05, | |
| "loss": 1.5302, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4664534330368042, | |
| "eval_runtime": 111.6113, | |
| "eval_samples_per_second": 179.193, | |
| "eval_steps_per_second": 2.804, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.0576821886251486e-05, | |
| "loss": 1.5288, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4657336473464966, | |
| "eval_runtime": 109.321, | |
| "eval_samples_per_second": 182.947, | |
| "eval_steps_per_second": 2.863, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.049395708431908e-05, | |
| "loss": 1.5284, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4579006433486938, | |
| "eval_runtime": 109.1836, | |
| "eval_samples_per_second": 183.178, | |
| "eval_steps_per_second": 2.867, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.041109228238668e-05, | |
| "loss": 1.5277, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 1.4642364978790283, | |
| "eval_runtime": 108.8787, | |
| "eval_samples_per_second": 183.691, | |
| "eval_steps_per_second": 2.875, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.0328227480454264e-05, | |
| "loss": 1.5254, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4699641466140747, | |
| "eval_runtime": 110.6507, | |
| "eval_samples_per_second": 180.749, | |
| "eval_steps_per_second": 2.829, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.024536267852186e-05, | |
| "loss": 1.526, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4663636684417725, | |
| "eval_runtime": 108.472, | |
| "eval_samples_per_second": 184.379, | |
| "eval_steps_per_second": 2.886, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.016249787658945e-05, | |
| "loss": 1.5242, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 1.4651668071746826, | |
| "eval_runtime": 111.7826, | |
| "eval_samples_per_second": 178.919, | |
| "eval_steps_per_second": 2.8, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.007963307465705e-05, | |
| "loss": 1.523, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 1.4634953737258911, | |
| "eval_runtime": 110.7712, | |
| "eval_samples_per_second": 180.552, | |
| "eval_steps_per_second": 2.826, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.9109053272894466e-05, | |
| "loss": 1.524, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.39056134223938, | |
| "eval_runtime": 20.0958, | |
| "eval_samples_per_second": 175.112, | |
| "eval_steps_per_second": 5.474, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.902803602027052e-05, | |
| "loss": 1.5242, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3666460514068604, | |
| "eval_runtime": 18.7692, | |
| "eval_samples_per_second": 187.488, | |
| "eval_steps_per_second": 5.861, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.894701876764657e-05, | |
| "loss": 1.5237, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.385453939437866, | |
| "eval_runtime": 19.4223, | |
| "eval_samples_per_second": 181.184, | |
| "eval_steps_per_second": 5.664, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.886600151502263e-05, | |
| "loss": 1.5226, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.393972396850586, | |
| "eval_runtime": 19.0938, | |
| "eval_samples_per_second": 184.301, | |
| "eval_steps_per_second": 5.761, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.8784984262398676e-05, | |
| "loss": 1.5218, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.456040620803833, | |
| "eval_runtime": 18.9643, | |
| "eval_samples_per_second": 185.56, | |
| "eval_steps_per_second": 5.8, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.870396700977473e-05, | |
| "loss": 1.5215, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.395426034927368, | |
| "eval_runtime": 19.2189, | |
| "eval_samples_per_second": 183.101, | |
| "eval_steps_per_second": 5.724, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.862294975715079e-05, | |
| "loss": 1.521, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.4465413093566895, | |
| "eval_runtime": 18.8719, | |
| "eval_samples_per_second": 186.468, | |
| "eval_steps_per_second": 5.829, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.854193250452684e-05, | |
| "loss": 1.5209, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.396277904510498, | |
| "eval_runtime": 18.9346, | |
| "eval_samples_per_second": 185.85, | |
| "eval_steps_per_second": 5.809, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.846091525190289e-05, | |
| "loss": 1.5188, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.4277689456939697, | |
| "eval_runtime": 20.9367, | |
| "eval_samples_per_second": 168.078, | |
| "eval_steps_per_second": 5.254, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.837989799927895e-05, | |
| "loss": 1.5177, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.378986120223999, | |
| "eval_runtime": 20.2239, | |
| "eval_samples_per_second": 174.002, | |
| "eval_steps_per_second": 5.439, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.8298880746655e-05, | |
| "loss": 1.5184, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.395463705062866, | |
| "eval_runtime": 19.3659, | |
| "eval_samples_per_second": 181.711, | |
| "eval_steps_per_second": 5.68, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.8217863494031056e-05, | |
| "loss": 1.5166, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.421231269836426, | |
| "eval_runtime": 20.4856, | |
| "eval_samples_per_second": 171.779, | |
| "eval_steps_per_second": 5.37, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.813684624140711e-05, | |
| "loss": 1.5158, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.4270944595336914, | |
| "eval_runtime": 19.2825, | |
| "eval_samples_per_second": 182.497, | |
| "eval_steps_per_second": 5.705, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.8055828988783165e-05, | |
| "loss": 1.5157, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.4186675548553467, | |
| "eval_runtime": 19.3721, | |
| "eval_samples_per_second": 181.653, | |
| "eval_steps_per_second": 5.678, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.797481173615922e-05, | |
| "loss": 1.5156, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.4075629711151123, | |
| "eval_runtime": 19.2183, | |
| "eval_samples_per_second": 183.107, | |
| "eval_steps_per_second": 5.724, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.789379448353527e-05, | |
| "loss": 1.5147, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.471975803375244, | |
| "eval_runtime": 20.3949, | |
| "eval_samples_per_second": 172.543, | |
| "eval_steps_per_second": 5.394, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.781277723091132e-05, | |
| "loss": 1.5127, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.3385655879974365, | |
| "eval_runtime": 20.157, | |
| "eval_samples_per_second": 174.58, | |
| "eval_steps_per_second": 5.457, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.773175997828738e-05, | |
| "loss": 1.5129, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.381673574447632, | |
| "eval_runtime": 20.5337, | |
| "eval_samples_per_second": 171.377, | |
| "eval_steps_per_second": 5.357, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.765074272566343e-05, | |
| "loss": 1.5123, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.35689377784729, | |
| "eval_runtime": 20.7328, | |
| "eval_samples_per_second": 169.731, | |
| "eval_steps_per_second": 5.306, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.7569725473039484e-05, | |
| "loss": 1.5121, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3643054962158203, | |
| "eval_runtime": 19.1222, | |
| "eval_samples_per_second": 184.026, | |
| "eval_steps_per_second": 5.752, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.7488708220415545e-05, | |
| "loss": 1.5118, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.334357261657715, | |
| "eval_runtime": 19.3908, | |
| "eval_samples_per_second": 181.478, | |
| "eval_steps_per_second": 5.673, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.740769096779159e-05, | |
| "loss": 1.5102, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.401927947998047, | |
| "eval_runtime": 20.1285, | |
| "eval_samples_per_second": 174.827, | |
| "eval_steps_per_second": 5.465, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.7326673715167647e-05, | |
| "loss": 1.5097, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.4241695404052734, | |
| "eval_runtime": 20.5668, | |
| "eval_samples_per_second": 171.101, | |
| "eval_steps_per_second": 5.348, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.72456564625437e-05, | |
| "loss": 1.5103, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.393686532974243, | |
| "eval_runtime": 19.2168, | |
| "eval_samples_per_second": 183.121, | |
| "eval_steps_per_second": 5.724, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.7164639209919755e-05, | |
| "loss": 1.5112, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.3694939613342285, | |
| "eval_runtime": 20.1373, | |
| "eval_samples_per_second": 174.751, | |
| "eval_steps_per_second": 5.463, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.70836219572958e-05, | |
| "loss": 1.5108, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.345815420150757, | |
| "eval_runtime": 20.1959, | |
| "eval_samples_per_second": 174.243, | |
| "eval_steps_per_second": 5.447, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.7002604704671864e-05, | |
| "loss": 1.511, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.3629839420318604, | |
| "eval_runtime": 19.3875, | |
| "eval_samples_per_second": 181.508, | |
| "eval_steps_per_second": 5.674, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.692158745204792e-05, | |
| "loss": 1.5089, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.385115385055542, | |
| "eval_runtime": 20.4471, | |
| "eval_samples_per_second": 172.103, | |
| "eval_steps_per_second": 5.38, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.6840570199423966e-05, | |
| "loss": 1.5095, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.319392442703247, | |
| "eval_runtime": 19.8755, | |
| "eval_samples_per_second": 177.052, | |
| "eval_steps_per_second": 5.534, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.6759552946800027e-05, | |
| "loss": 1.5094, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.3495166301727295, | |
| "eval_runtime": 19.4501, | |
| "eval_samples_per_second": 180.925, | |
| "eval_steps_per_second": 5.656, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.6678535694176074e-05, | |
| "loss": 1.5101, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.365245819091797, | |
| "eval_runtime": 19.578, | |
| "eval_samples_per_second": 179.743, | |
| "eval_steps_per_second": 5.619, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.659751844155213e-05, | |
| "loss": 1.5089, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.371981143951416, | |
| "eval_runtime": 19.798, | |
| "eval_samples_per_second": 177.745, | |
| "eval_steps_per_second": 5.556, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.651650118892818e-05, | |
| "loss": 1.509, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.332063913345337, | |
| "eval_runtime": 19.3403, | |
| "eval_samples_per_second": 181.952, | |
| "eval_steps_per_second": 5.688, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.643548393630424e-05, | |
| "loss": 1.5096, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.404459238052368, | |
| "eval_runtime": 19.2128, | |
| "eval_samples_per_second": 183.159, | |
| "eval_steps_per_second": 5.725, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.635446668368029e-05, | |
| "loss": 1.5089, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3641324043273926, | |
| "eval_runtime": 19.4859, | |
| "eval_samples_per_second": 180.592, | |
| "eval_steps_per_second": 5.645, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.6273449431056346e-05, | |
| "loss": 1.5084, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3842105865478516, | |
| "eval_runtime": 19.764, | |
| "eval_samples_per_second": 178.051, | |
| "eval_steps_per_second": 5.566, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.61924321784324e-05, | |
| "loss": 1.5089, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3656747341156006, | |
| "eval_runtime": 20.585, | |
| "eval_samples_per_second": 170.949, | |
| "eval_steps_per_second": 5.344, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.6111414925808454e-05, | |
| "loss": 1.5097, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.374446153640747, | |
| "eval_runtime": 19.4426, | |
| "eval_samples_per_second": 180.994, | |
| "eval_steps_per_second": 5.658, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.603039767318451e-05, | |
| "loss": 1.5072, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.385554552078247, | |
| "eval_runtime": 20.3681, | |
| "eval_samples_per_second": 172.771, | |
| "eval_steps_per_second": 5.401, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.5949380420560556e-05, | |
| "loss": 1.5041, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.3629019260406494, | |
| "eval_runtime": 18.0818, | |
| "eval_samples_per_second": 194.616, | |
| "eval_steps_per_second": 6.083, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.586836316793662e-05, | |
| "loss": 1.5036, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3723270893096924, | |
| "eval_runtime": 17.4087, | |
| "eval_samples_per_second": 202.14, | |
| "eval_steps_per_second": 6.319, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.578734591531267e-05, | |
| "loss": 1.504, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.390188217163086, | |
| "eval_runtime": 17.5005, | |
| "eval_samples_per_second": 201.081, | |
| "eval_steps_per_second": 6.286, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.570632866268872e-05, | |
| "loss": 1.5034, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3117146492004395, | |
| "eval_runtime": 17.3837, | |
| "eval_samples_per_second": 202.431, | |
| "eval_steps_per_second": 6.328, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.562531141006478e-05, | |
| "loss": 1.5021, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3584558963775635, | |
| "eval_runtime": 18.524, | |
| "eval_samples_per_second": 189.97, | |
| "eval_steps_per_second": 5.938, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.554429415744083e-05, | |
| "loss": 1.501, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.2931323051452637, | |
| "eval_runtime": 17.3901, | |
| "eval_samples_per_second": 202.357, | |
| "eval_steps_per_second": 6.325, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.546327690481688e-05, | |
| "loss": 1.501, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.3333306312561035, | |
| "eval_runtime": 17.4003, | |
| "eval_samples_per_second": 202.238, | |
| "eval_steps_per_second": 6.322, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.5382259652192936e-05, | |
| "loss": 1.4992, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.342263698577881, | |
| "eval_runtime": 17.3606, | |
| "eval_samples_per_second": 202.701, | |
| "eval_steps_per_second": 6.336, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.530124239956899e-05, | |
| "loss": 1.5008, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.336986541748047, | |
| "eval_runtime": 17.0114, | |
| "eval_samples_per_second": 206.861, | |
| "eval_steps_per_second": 6.466, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.5220225146945045e-05, | |
| "loss": 1.5002, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.3513643741607666, | |
| "eval_runtime": 17.6104, | |
| "eval_samples_per_second": 199.825, | |
| "eval_steps_per_second": 6.246, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.51392078943211e-05, | |
| "loss": 1.5016, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.3241846561431885, | |
| "eval_runtime": 17.6475, | |
| "eval_samples_per_second": 199.405, | |
| "eval_steps_per_second": 6.233, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.505819064169715e-05, | |
| "loss": 1.4988, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.359363317489624, | |
| "eval_runtime": 17.067, | |
| "eval_samples_per_second": 206.187, | |
| "eval_steps_per_second": 6.445, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.49771733890732e-05, | |
| "loss": 1.4992, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.348477363586426, | |
| "eval_runtime": 17.779, | |
| "eval_samples_per_second": 197.93, | |
| "eval_steps_per_second": 6.187, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.489615613644926e-05, | |
| "loss": 1.5003, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.4026684761047363, | |
| "eval_runtime": 17.0398, | |
| "eval_samples_per_second": 206.516, | |
| "eval_steps_per_second": 6.455, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.481513888382531e-05, | |
| "loss": 1.4994, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.365537643432617, | |
| "eval_runtime": 17.5601, | |
| "eval_samples_per_second": 200.397, | |
| "eval_steps_per_second": 6.264, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.4734121631201364e-05, | |
| "loss": 1.499, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.381800651550293, | |
| "eval_runtime": 16.8498, | |
| "eval_samples_per_second": 208.846, | |
| "eval_steps_per_second": 6.528, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.4653104378577425e-05, | |
| "loss": 1.4996, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.401005506515503, | |
| "eval_runtime": 16.9826, | |
| "eval_samples_per_second": 207.212, | |
| "eval_steps_per_second": 6.477, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.457208712595347e-05, | |
| "loss": 1.4985, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.399085283279419, | |
| "eval_runtime": 17.0074, | |
| "eval_samples_per_second": 206.91, | |
| "eval_steps_per_second": 6.468, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.4491069873329527e-05, | |
| "loss": 1.4984, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3661704063415527, | |
| "eval_runtime": 16.9552, | |
| "eval_samples_per_second": 207.547, | |
| "eval_steps_per_second": 6.488, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.441005262070558e-05, | |
| "loss": 1.4975, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.4111948013305664, | |
| "eval_runtime": 16.975, | |
| "eval_samples_per_second": 207.304, | |
| "eval_steps_per_second": 6.48, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.4329035368081635e-05, | |
| "loss": 1.4987, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3549654483795166, | |
| "eval_runtime": 17.004, | |
| "eval_samples_per_second": 206.951, | |
| "eval_steps_per_second": 6.469, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.424801811545769e-05, | |
| "loss": 1.4975, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3696866035461426, | |
| "eval_runtime": 16.9769, | |
| "eval_samples_per_second": 207.282, | |
| "eval_steps_per_second": 6.479, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.4167000862833744e-05, | |
| "loss": 1.4978, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.4747281074523926, | |
| "eval_runtime": 17.0304, | |
| "eval_samples_per_second": 206.63, | |
| "eval_steps_per_second": 6.459, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.40859836102098e-05, | |
| "loss": 1.4985, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.3790531158447266, | |
| "eval_runtime": 16.9847, | |
| "eval_samples_per_second": 207.187, | |
| "eval_steps_per_second": 6.476, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.400496635758585e-05, | |
| "loss": 1.4961, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.390604019165039, | |
| "eval_runtime": 17.3582, | |
| "eval_samples_per_second": 202.729, | |
| "eval_steps_per_second": 6.337, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.392394910496191e-05, | |
| "loss": 1.4959, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.415346622467041, | |
| "eval_runtime": 20.0907, | |
| "eval_samples_per_second": 175.156, | |
| "eval_steps_per_second": 5.475, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.3842931852337954e-05, | |
| "loss": 1.4956, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.4299123287200928, | |
| "eval_runtime": 18.8725, | |
| "eval_samples_per_second": 186.462, | |
| "eval_steps_per_second": 5.829, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.376191459971401e-05, | |
| "loss": 1.4964, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.448073625564575, | |
| "eval_runtime": 18.7704, | |
| "eval_samples_per_second": 187.476, | |
| "eval_steps_per_second": 5.86, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.368089734709006e-05, | |
| "loss": 1.497, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.390690565109253, | |
| "eval_runtime": 18.5096, | |
| "eval_samples_per_second": 190.118, | |
| "eval_steps_per_second": 5.943, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.359988009446612e-05, | |
| "loss": 1.4955, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.383636713027954, | |
| "eval_runtime": 18.4941, | |
| "eval_samples_per_second": 190.277, | |
| "eval_steps_per_second": 5.948, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.351886284184217e-05, | |
| "loss": 1.4953, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.400592565536499, | |
| "eval_runtime": 18.4735, | |
| "eval_samples_per_second": 190.489, | |
| "eval_steps_per_second": 5.954, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.3437845589218226e-05, | |
| "loss": 1.4939, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.349822759628296, | |
| "eval_runtime": 18.6128, | |
| "eval_samples_per_second": 189.063, | |
| "eval_steps_per_second": 5.91, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.335682833659428e-05, | |
| "loss": 1.4943, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.3708629608154297, | |
| "eval_runtime": 18.5009, | |
| "eval_samples_per_second": 190.207, | |
| "eval_steps_per_second": 5.946, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.3275811083970334e-05, | |
| "loss": 1.4942, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.338743209838867, | |
| "eval_runtime": 18.4865, | |
| "eval_samples_per_second": 190.355, | |
| "eval_steps_per_second": 5.95, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.319479383134639e-05, | |
| "loss": 1.4923, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.4041731357574463, | |
| "eval_runtime": 18.5038, | |
| "eval_samples_per_second": 190.177, | |
| "eval_steps_per_second": 5.945, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.3113776578722436e-05, | |
| "loss": 1.4934, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "eval_loss": 2.4086883068084717, | |
| "eval_runtime": 17.8895, | |
| "eval_samples_per_second": 196.707, | |
| "eval_steps_per_second": 6.149, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.30327593260985e-05, | |
| "loss": 1.4917, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3683786392211914, | |
| "eval_runtime": 17.4874, | |
| "eval_samples_per_second": 201.23, | |
| "eval_steps_per_second": 6.29, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.2951742073474545e-05, | |
| "loss": 1.4926, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3743233680725098, | |
| "eval_runtime": 17.4669, | |
| "eval_samples_per_second": 201.467, | |
| "eval_steps_per_second": 6.298, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.28707248208506e-05, | |
| "loss": 1.4913, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.3969030380249023, | |
| "eval_runtime": 17.4406, | |
| "eval_samples_per_second": 201.77, | |
| "eval_steps_per_second": 6.307, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.278970756822666e-05, | |
| "loss": 1.4923, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.373997688293457, | |
| "eval_runtime": 17.6827, | |
| "eval_samples_per_second": 199.008, | |
| "eval_steps_per_second": 6.221, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.270869031560271e-05, | |
| "loss": 1.4913, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.3612871170043945, | |
| "eval_runtime": 17.4041, | |
| "eval_samples_per_second": 202.193, | |
| "eval_steps_per_second": 6.32, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.262767306297876e-05, | |
| "loss": 1.4909, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.3404111862182617, | |
| "eval_runtime": 17.5513, | |
| "eval_samples_per_second": 200.498, | |
| "eval_steps_per_second": 6.267, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.2546655810354816e-05, | |
| "loss": 1.491, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.2388041019439697, | |
| "eval_runtime": 17.6295, | |
| "eval_samples_per_second": 199.609, | |
| "eval_steps_per_second": 6.24, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.246563855773087e-05, | |
| "loss": 1.4896, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.3492588996887207, | |
| "eval_runtime": 17.3833, | |
| "eval_samples_per_second": 202.436, | |
| "eval_steps_per_second": 6.328, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.238462130510692e-05, | |
| "loss": 1.4899, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.347364664077759, | |
| "eval_runtime": 17.468, | |
| "eval_samples_per_second": 201.454, | |
| "eval_steps_per_second": 6.297, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.230360405248298e-05, | |
| "loss": 1.4881, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.315025568008423, | |
| "eval_runtime": 17.4, | |
| "eval_samples_per_second": 202.242, | |
| "eval_steps_per_second": 6.322, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.2222586799859033e-05, | |
| "loss": 1.4905, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.344813346862793, | |
| "eval_runtime": 17.3103, | |
| "eval_samples_per_second": 203.29, | |
| "eval_steps_per_second": 6.355, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.214156954723508e-05, | |
| "loss": 1.4894, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.350853443145752, | |
| "eval_runtime": 17.3476, | |
| "eval_samples_per_second": 202.852, | |
| "eval_steps_per_second": 6.341, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.206055229461114e-05, | |
| "loss": 1.4885, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.273857355117798, | |
| "eval_runtime": 17.3165, | |
| "eval_samples_per_second": 203.217, | |
| "eval_steps_per_second": 6.352, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.197953504198719e-05, | |
| "loss": 1.4895, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.3339993953704834, | |
| "eval_runtime": 17.3637, | |
| "eval_samples_per_second": 202.664, | |
| "eval_steps_per_second": 6.335, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.1898517789363244e-05, | |
| "loss": 1.4886, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.3035190105438232, | |
| "eval_runtime": 17.249, | |
| "eval_samples_per_second": 204.011, | |
| "eval_steps_per_second": 6.377, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.18175005367393e-05, | |
| "loss": 1.4867, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.355330467224121, | |
| "eval_runtime": 17.2592, | |
| "eval_samples_per_second": 203.891, | |
| "eval_steps_per_second": 6.373, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.173648328411535e-05, | |
| "loss": 1.4859, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.3306944370269775, | |
| "eval_runtime": 17.5199, | |
| "eval_samples_per_second": 200.857, | |
| "eval_steps_per_second": 6.279, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.165546603149141e-05, | |
| "loss": 1.4879, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3352627754211426, | |
| "eval_runtime": 17.5475, | |
| "eval_samples_per_second": 200.542, | |
| "eval_steps_per_second": 6.269, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.157444877886746e-05, | |
| "loss": 1.4863, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.357405662536621, | |
| "eval_runtime": 17.7502, | |
| "eval_samples_per_second": 198.252, | |
| "eval_steps_per_second": 6.197, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.1493431526243515e-05, | |
| "loss": 1.4858, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.3991518020629883, | |
| "eval_runtime": 17.6792, | |
| "eval_samples_per_second": 199.048, | |
| "eval_steps_per_second": 6.222, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.141241427361957e-05, | |
| "loss": 1.4855, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.353144884109497, | |
| "eval_runtime": 17.7114, | |
| "eval_samples_per_second": 198.685, | |
| "eval_steps_per_second": 6.211, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.1331397020995624e-05, | |
| "loss": 1.4856, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.409151315689087, | |
| "eval_runtime": 17.7645, | |
| "eval_samples_per_second": 198.092, | |
| "eval_steps_per_second": 6.192, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.125037976837167e-05, | |
| "loss": 1.4876, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.3355095386505127, | |
| "eval_runtime": 17.7334, | |
| "eval_samples_per_second": 198.439, | |
| "eval_steps_per_second": 6.203, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.116936251574773e-05, | |
| "loss": 1.4874, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.3579752445220947, | |
| "eval_runtime": 17.7018, | |
| "eval_samples_per_second": 198.793, | |
| "eval_steps_per_second": 6.214, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.108834526312379e-05, | |
| "loss": 1.4867, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.3405985832214355, | |
| "eval_runtime": 17.7175, | |
| "eval_samples_per_second": 198.617, | |
| "eval_steps_per_second": 6.209, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.1007328010499834e-05, | |
| "loss": 1.4847, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.321049213409424, | |
| "eval_runtime": 17.748, | |
| "eval_samples_per_second": 198.276, | |
| "eval_steps_per_second": 6.198, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.0926310757875895e-05, | |
| "loss": 1.4842, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.3495261669158936, | |
| "eval_runtime": 17.6755, | |
| "eval_samples_per_second": 199.09, | |
| "eval_steps_per_second": 6.223, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 3.084529350525194e-05, | |
| "loss": 1.484, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.3278751373291016, | |
| "eval_runtime": 17.6587, | |
| "eval_samples_per_second": 199.278, | |
| "eval_steps_per_second": 6.229, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 3.0764276252628e-05, | |
| "loss": 1.4817, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 2.352627754211426, | |
| "eval_runtime": 17.7968, | |
| "eval_samples_per_second": 197.732, | |
| "eval_steps_per_second": 6.181, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 3.068325900000405e-05, | |
| "loss": 1.4823, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 2.3326263427734375, | |
| "eval_runtime": 17.8301, | |
| "eval_samples_per_second": 197.363, | |
| "eval_steps_per_second": 6.169, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 3.0602241747380106e-05, | |
| "loss": 1.4814, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 2.4039418697357178, | |
| "eval_runtime": 17.726, | |
| "eval_samples_per_second": 198.522, | |
| "eval_steps_per_second": 6.206, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 3.052122449475616e-05, | |
| "loss": 1.4802, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.3534297943115234, | |
| "eval_runtime": 18.0233, | |
| "eval_samples_per_second": 195.247, | |
| "eval_steps_per_second": 6.103, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 3.044020724213221e-05, | |
| "loss": 1.4823, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.3589508533477783, | |
| "eval_runtime": 18.0015, | |
| "eval_samples_per_second": 195.484, | |
| "eval_steps_per_second": 6.111, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 3.035918998950827e-05, | |
| "loss": 1.4806, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.3476579189300537, | |
| "eval_runtime": 18.054, | |
| "eval_samples_per_second": 194.916, | |
| "eval_steps_per_second": 6.093, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 3.027817273688432e-05, | |
| "loss": 1.481, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.3086392879486084, | |
| "eval_runtime": 18.0863, | |
| "eval_samples_per_second": 194.567, | |
| "eval_steps_per_second": 6.082, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 3.0197155484260374e-05, | |
| "loss": 1.4798, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 2.331632375717163, | |
| "eval_runtime": 18.0209, | |
| "eval_samples_per_second": 195.274, | |
| "eval_steps_per_second": 6.104, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 3.0116138231636425e-05, | |
| "loss": 1.481, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 2.321038246154785, | |
| "eval_runtime": 18.138, | |
| "eval_samples_per_second": 194.012, | |
| "eval_steps_per_second": 6.065, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 3.0035120979012482e-05, | |
| "loss": 1.4792, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 2.3609230518341064, | |
| "eval_runtime": 18.1227, | |
| "eval_samples_per_second": 194.176, | |
| "eval_steps_per_second": 6.07, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9954103726388537e-05, | |
| "loss": 1.4783, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 2.348484516143799, | |
| "eval_runtime": 18.2068, | |
| "eval_samples_per_second": 193.279, | |
| "eval_steps_per_second": 6.042, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9873086473764588e-05, | |
| "loss": 1.4783, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 2.3550658226013184, | |
| "eval_runtime": 18.1831, | |
| "eval_samples_per_second": 193.532, | |
| "eval_steps_per_second": 6.05, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9792069221140645e-05, | |
| "loss": 1.478, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 2.352349042892456, | |
| "eval_runtime": 18.3773, | |
| "eval_samples_per_second": 191.487, | |
| "eval_steps_per_second": 5.986, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.9711051968516696e-05, | |
| "loss": 1.479, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 2.3229057788848877, | |
| "eval_runtime": 18.4727, | |
| "eval_samples_per_second": 190.498, | |
| "eval_steps_per_second": 5.955, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.963003471589275e-05, | |
| "loss": 1.4787, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 2.3134686946868896, | |
| "eval_runtime": 18.5086, | |
| "eval_samples_per_second": 190.128, | |
| "eval_steps_per_second": 5.943, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.95490174632688e-05, | |
| "loss": 1.4775, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 2.27996826171875, | |
| "eval_runtime": 18.3605, | |
| "eval_samples_per_second": 191.661, | |
| "eval_steps_per_second": 5.991, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.946800021064486e-05, | |
| "loss": 1.4766, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 2.2963178157806396, | |
| "eval_runtime": 18.3042, | |
| "eval_samples_per_second": 192.251, | |
| "eval_steps_per_second": 6.01, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.9386982958020913e-05, | |
| "loss": 1.4762, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 2.3238120079040527, | |
| "eval_runtime": 18.4678, | |
| "eval_samples_per_second": 190.548, | |
| "eval_steps_per_second": 5.956, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.9305965705396964e-05, | |
| "loss": 1.4769, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 2.3036534786224365, | |
| "eval_runtime": 18.3198, | |
| "eval_samples_per_second": 192.087, | |
| "eval_steps_per_second": 6.004, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.9224948452773022e-05, | |
| "loss": 1.4756, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 2.3685128688812256, | |
| "eval_runtime": 18.2275, | |
| "eval_samples_per_second": 193.06, | |
| "eval_steps_per_second": 6.035, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.9143931200149073e-05, | |
| "loss": 1.4752, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.288372278213501, | |
| "eval_runtime": 18.3274, | |
| "eval_samples_per_second": 192.008, | |
| "eval_steps_per_second": 6.002, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.9062913947525127e-05, | |
| "loss": 1.4747, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.3392255306243896, | |
| "eval_runtime": 18.2629, | |
| "eval_samples_per_second": 192.686, | |
| "eval_steps_per_second": 6.023, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.8981896694901178e-05, | |
| "loss": 1.4738, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.3563013076782227, | |
| "eval_runtime": 18.4362, | |
| "eval_samples_per_second": 190.875, | |
| "eval_steps_per_second": 5.967, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.8900879442277236e-05, | |
| "loss": 1.4749, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.330927610397339, | |
| "eval_runtime": 18.1578, | |
| "eval_samples_per_second": 193.801, | |
| "eval_steps_per_second": 6.058, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.881986218965329e-05, | |
| "loss": 1.4748, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 2.33650279045105, | |
| "eval_runtime": 18.3527, | |
| "eval_samples_per_second": 191.743, | |
| "eval_steps_per_second": 5.994, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.873884493702934e-05, | |
| "loss": 1.4737, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 2.3835794925689697, | |
| "eval_runtime": 18.2768, | |
| "eval_samples_per_second": 192.539, | |
| "eval_steps_per_second": 6.019, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.86578276844054e-05, | |
| "loss": 1.474, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 2.4150733947753906, | |
| "eval_runtime": 18.2593, | |
| "eval_samples_per_second": 192.724, | |
| "eval_steps_per_second": 6.024, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8576810431781446e-05, | |
| "loss": 1.4743, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.36186146736145, | |
| "eval_runtime": 18.123, | |
| "eval_samples_per_second": 194.173, | |
| "eval_steps_per_second": 6.07, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8495793179157504e-05, | |
| "loss": 1.4735, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.356795310974121, | |
| "eval_runtime": 18.2043, | |
| "eval_samples_per_second": 193.306, | |
| "eval_steps_per_second": 6.043, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.8414775926533555e-05, | |
| "loss": 1.4735, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.3677237033843994, | |
| "eval_runtime": 18.253, | |
| "eval_samples_per_second": 192.791, | |
| "eval_steps_per_second": 6.026, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 2.833375867390961e-05, | |
| "loss": 1.4715, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.361776113510132, | |
| "eval_runtime": 18.182, | |
| "eval_samples_per_second": 193.543, | |
| "eval_steps_per_second": 6.05, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.8252741421285667e-05, | |
| "loss": 1.4726, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 2.3906137943267822, | |
| "eval_runtime": 18.0913, | |
| "eval_samples_per_second": 194.513, | |
| "eval_steps_per_second": 6.08, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.8171724168661718e-05, | |
| "loss": 1.4716, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 2.340426445007324, | |
| "eval_runtime": 18.1553, | |
| "eval_samples_per_second": 193.828, | |
| "eval_steps_per_second": 6.059, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 2.8090706916037772e-05, | |
| "loss": 1.4719, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 2.340381383895874, | |
| "eval_runtime": 18.1363, | |
| "eval_samples_per_second": 194.031, | |
| "eval_steps_per_second": 6.065, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.8009689663413823e-05, | |
| "loss": 1.4725, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 2.370542526245117, | |
| "eval_runtime": 18.2157, | |
| "eval_samples_per_second": 193.185, | |
| "eval_steps_per_second": 6.039, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.792867241078988e-05, | |
| "loss": 1.4713, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 2.360673189163208, | |
| "eval_runtime": 18.2181, | |
| "eval_samples_per_second": 193.159, | |
| "eval_steps_per_second": 6.038, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.784765515816593e-05, | |
| "loss": 1.4714, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 2.3657426834106445, | |
| "eval_runtime": 18.2301, | |
| "eval_samples_per_second": 193.032, | |
| "eval_steps_per_second": 6.034, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.7766637905541986e-05, | |
| "loss": 1.4706, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 2.3723626136779785, | |
| "eval_runtime": 18.1723, | |
| "eval_samples_per_second": 193.646, | |
| "eval_steps_per_second": 6.053, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.7685620652918044e-05, | |
| "loss": 1.47, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 2.3738961219787598, | |
| "eval_runtime": 18.1983, | |
| "eval_samples_per_second": 193.37, | |
| "eval_steps_per_second": 6.045, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.7604603400294094e-05, | |
| "loss": 1.4686, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 2.3388829231262207, | |
| "eval_runtime": 18.2362, | |
| "eval_samples_per_second": 192.968, | |
| "eval_steps_per_second": 6.032, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 2.752358614767015e-05, | |
| "loss": 1.469, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 2.3783812522888184, | |
| "eval_runtime": 18.2567, | |
| "eval_samples_per_second": 192.751, | |
| "eval_steps_per_second": 6.025, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.74425688950462e-05, | |
| "loss": 1.4682, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 2.3429505825042725, | |
| "eval_runtime": 18.2164, | |
| "eval_samples_per_second": 193.177, | |
| "eval_steps_per_second": 6.039, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.7361551642422257e-05, | |
| "loss": 1.4698, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 2.3579936027526855, | |
| "eval_runtime": 18.1836, | |
| "eval_samples_per_second": 193.526, | |
| "eval_steps_per_second": 6.049, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 2.7280534389798308e-05, | |
| "loss": 1.4676, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 2.3819713592529297, | |
| "eval_runtime": 18.2677, | |
| "eval_samples_per_second": 192.635, | |
| "eval_steps_per_second": 6.022, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.7199517137174363e-05, | |
| "loss": 1.4683, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 2.426044225692749, | |
| "eval_runtime": 18.2225, | |
| "eval_samples_per_second": 193.113, | |
| "eval_steps_per_second": 6.036, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.711849988455042e-05, | |
| "loss": 1.4677, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 2.3789823055267334, | |
| "eval_runtime": 31.3826, | |
| "eval_samples_per_second": 112.132, | |
| "eval_steps_per_second": 3.505, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.703748263192647e-05, | |
| "loss": 1.4686, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 2.329643487930298, | |
| "eval_runtime": 18.3935, | |
| "eval_samples_per_second": 191.317, | |
| "eval_steps_per_second": 5.98, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.6956465379302525e-05, | |
| "loss": 1.4679, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 2.4011151790618896, | |
| "eval_runtime": 18.2288, | |
| "eval_samples_per_second": 193.046, | |
| "eval_steps_per_second": 6.034, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.6875448126678576e-05, | |
| "loss": 1.4676, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 2.377561092376709, | |
| "eval_runtime": 20.2447, | |
| "eval_samples_per_second": 173.823, | |
| "eval_steps_per_second": 5.434, | |
| "step": 141600 | |
| } | |
| ], | |
| "max_steps": 274290, | |
| "num_train_epochs": 2, | |
| "total_flos": 2.641163282310901e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |