{ "best_metric": 1.4579006433486938, "best_model_checkpoint": "/home/alejandro.vaca/new_checkpoints_xlm_roberta/checkpoint-78800", "epoch": 0.22749530494715703, "global_step": 141600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.5191565642755143e-09, "loss": 2.6373, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.076626257102056e-07, "loss": 2.2331, "step": 400 }, { "epoch": 0.0, "eval_loss": 1.9753497838974, "eval_runtime": 146.0905, "eval_samples_per_second": 136.901, "eval_steps_per_second": 2.143, "step": 400 }, { "epoch": 0.01, "learning_rate": 1.2153252514204113e-06, "loss": 2.0391, "step": 800 }, { "epoch": 0.01, "eval_loss": 1.9167841672897339, "eval_runtime": 146.7121, "eval_samples_per_second": 136.321, "eval_steps_per_second": 2.133, "step": 800 }, { "epoch": 0.01, "learning_rate": 1.822987877130617e-06, "loss": 1.9963, "step": 1200 }, { "epoch": 0.01, "eval_loss": 1.8917230367660522, "eval_runtime": 144.3485, "eval_samples_per_second": 138.554, "eval_steps_per_second": 2.168, "step": 1200 }, { "epoch": 0.01, "learning_rate": 2.4306505028408226e-06, "loss": 1.9736, "step": 1600 }, { "epoch": 0.01, "eval_loss": 1.8841623067855835, "eval_runtime": 142.7005, "eval_samples_per_second": 140.154, "eval_steps_per_second": 2.193, "step": 1600 }, { "epoch": 0.01, "learning_rate": 3.0383131285510288e-06, "loss": 1.9579, "step": 2000 }, { "epoch": 0.01, "eval_loss": 1.8723009824752808, "eval_runtime": 144.148, "eval_samples_per_second": 138.746, "eval_steps_per_second": 2.171, "step": 2000 }, { "epoch": 0.02, "learning_rate": 3.645975754261234e-06, "loss": 1.9465, "step": 2400 }, { "epoch": 0.02, "eval_loss": 1.8523199558258057, "eval_runtime": 143.784, "eval_samples_per_second": 139.098, "eval_steps_per_second": 2.177, "step": 2400 }, { "epoch": 0.02, "learning_rate": 4.25363837997144e-06, "loss": 1.9352, "step": 2800 }, { "epoch": 0.02, "eval_loss": 1.843543529510498, "eval_runtime": 146.189, "eval_samples_per_second": 136.809, "eval_steps_per_second": 2.141, "step": 2800 }, { "epoch": 0.02, "learning_rate": 4.861301005681645e-06, "loss": 1.9244, "step": 3200 }, { "epoch": 0.02, "eval_loss": 1.8356839418411255, "eval_runtime": 143.4998, "eval_samples_per_second": 139.373, "eval_steps_per_second": 2.181, "step": 3200 }, { "epoch": 0.03, "learning_rate": 5.468963631391851e-06, "loss": 1.9143, "step": 3600 }, { "epoch": 0.03, "eval_loss": 1.8183759450912476, "eval_runtime": 144.8799, "eval_samples_per_second": 138.045, "eval_steps_per_second": 2.16, "step": 3600 }, { "epoch": 0.03, "learning_rate": 6.0766262571020576e-06, "loss": 1.9042, "step": 4000 }, { "epoch": 0.03, "eval_loss": 1.8169724941253662, "eval_runtime": 145.0112, "eval_samples_per_second": 137.92, "eval_steps_per_second": 2.158, "step": 4000 }, { "epoch": 0.03, "learning_rate": 6.684288882812263e-06, "loss": 1.8971, "step": 4400 }, { "epoch": 0.03, "eval_loss": 1.808371901512146, "eval_runtime": 147.4613, "eval_samples_per_second": 135.629, "eval_steps_per_second": 2.123, "step": 4400 }, { "epoch": 0.04, "learning_rate": 7.291951508522468e-06, "loss": 1.888, "step": 4800 }, { "epoch": 0.04, "eval_loss": 1.8037678003311157, "eval_runtime": 150.0493, "eval_samples_per_second": 133.29, "eval_steps_per_second": 2.086, "step": 4800 }, { "epoch": 0.04, "learning_rate": 7.899614134232675e-06, "loss": 1.8809, "step": 5200 }, { "epoch": 0.04, "eval_loss": 1.7955741882324219, "eval_runtime": 148.2714, "eval_samples_per_second": 134.888, "eval_steps_per_second": 2.111, "step": 5200 }, { "epoch": 0.04, "learning_rate": 8.50727675994288e-06, "loss": 1.8741, "step": 5600 }, { "epoch": 0.04, "eval_loss": 1.7888526916503906, "eval_runtime": 147.2993, "eval_samples_per_second": 135.778, "eval_steps_per_second": 2.125, "step": 5600 }, { "epoch": 0.04, "learning_rate": 9.114939385653086e-06, "loss": 1.8685, "step": 6000 }, { "epoch": 0.04, "eval_loss": 1.785848617553711, "eval_runtime": 143.6092, "eval_samples_per_second": 139.267, "eval_steps_per_second": 2.18, "step": 6000 }, { "epoch": 0.05, "learning_rate": 9.72260201136329e-06, "loss": 1.8589, "step": 6400 }, { "epoch": 0.05, "eval_loss": 1.781029462814331, "eval_runtime": 147.7751, "eval_samples_per_second": 135.341, "eval_steps_per_second": 2.118, "step": 6400 }, { "epoch": 0.05, "learning_rate": 1.0330264637073497e-05, "loss": 1.8544, "step": 6800 }, { "epoch": 0.05, "eval_loss": 1.7769867181777954, "eval_runtime": 147.2523, "eval_samples_per_second": 135.821, "eval_steps_per_second": 2.126, "step": 6800 }, { "epoch": 0.05, "learning_rate": 1.0937927262783703e-05, "loss": 1.8481, "step": 7200 }, { "epoch": 0.05, "eval_loss": 1.7637484073638916, "eval_runtime": 143.1769, "eval_samples_per_second": 139.687, "eval_steps_per_second": 2.186, "step": 7200 }, { "epoch": 0.06, "learning_rate": 1.1545589888493909e-05, "loss": 1.8428, "step": 7600 }, { "epoch": 0.06, "eval_loss": 1.756960391998291, "eval_runtime": 145.8319, "eval_samples_per_second": 137.144, "eval_steps_per_second": 2.146, "step": 7600 }, { "epoch": 0.06, "learning_rate": 1.2153252514204115e-05, "loss": 1.8373, "step": 8000 }, { "epoch": 0.06, "eval_loss": 1.7565785646438599, "eval_runtime": 144.3741, "eval_samples_per_second": 138.529, "eval_steps_per_second": 2.168, "step": 8000 }, { "epoch": 0.06, "learning_rate": 1.276091513991432e-05, "loss": 1.8304, "step": 8400 }, { "epoch": 0.06, "eval_loss": 1.742794156074524, "eval_runtime": 146.6168, "eval_samples_per_second": 136.41, "eval_steps_per_second": 2.135, "step": 8400 }, { "epoch": 0.06, "learning_rate": 1.3368577765624526e-05, "loss": 1.8259, "step": 8800 }, { "epoch": 0.06, "eval_loss": 1.7337759733200073, "eval_runtime": 145.9226, "eval_samples_per_second": 137.059, "eval_steps_per_second": 2.145, "step": 8800 }, { "epoch": 0.07, "learning_rate": 1.3976240391334734e-05, "loss": 1.8219, "step": 9200 }, { "epoch": 0.07, "eval_loss": 1.7424650192260742, "eval_runtime": 145.6453, "eval_samples_per_second": 137.32, "eval_steps_per_second": 2.149, "step": 9200 }, { "epoch": 0.07, "learning_rate": 1.4583903017044936e-05, "loss": 1.8162, "step": 9600 }, { "epoch": 0.07, "eval_loss": 1.7316113710403442, "eval_runtime": 145.2248, "eval_samples_per_second": 137.718, "eval_steps_per_second": 2.155, "step": 9600 }, { "epoch": 0.07, "learning_rate": 1.5191565642755143e-05, "loss": 1.8112, "step": 10000 }, { "epoch": 0.07, "eval_loss": 1.7247357368469238, "eval_runtime": 146.3969, "eval_samples_per_second": 136.615, "eval_steps_per_second": 2.138, "step": 10000 }, { "epoch": 0.08, "learning_rate": 1.579922826846535e-05, "loss": 1.807, "step": 10400 }, { "epoch": 0.08, "eval_loss": 1.725953459739685, "eval_runtime": 144.2953, "eval_samples_per_second": 138.605, "eval_steps_per_second": 2.169, "step": 10400 }, { "epoch": 0.08, "learning_rate": 1.6406890894175555e-05, "loss": 1.8034, "step": 10800 }, { "epoch": 0.08, "eval_loss": 1.721238136291504, "eval_runtime": 145.7833, "eval_samples_per_second": 137.19, "eval_steps_per_second": 2.147, "step": 10800 }, { "epoch": 0.08, "learning_rate": 1.701455351988576e-05, "loss": 1.7984, "step": 11200 }, { "epoch": 0.08, "eval_loss": 1.7204127311706543, "eval_runtime": 147.5961, "eval_samples_per_second": 135.505, "eval_steps_per_second": 2.121, "step": 11200 }, { "epoch": 0.08, "learning_rate": 1.7622216145595964e-05, "loss": 1.7944, "step": 11600 }, { "epoch": 0.08, "eval_loss": 1.7186585664749146, "eval_runtime": 143.1913, "eval_samples_per_second": 139.673, "eval_steps_per_second": 2.186, "step": 11600 }, { "epoch": 0.09, "learning_rate": 1.8229878771306172e-05, "loss": 1.7915, "step": 12000 }, { "epoch": 0.09, "eval_loss": 1.7116312980651855, "eval_runtime": 149.8115, "eval_samples_per_second": 133.501, "eval_steps_per_second": 2.089, "step": 12000 }, { "epoch": 0.09, "learning_rate": 1.883754139701638e-05, "loss": 1.7864, "step": 12400 }, { "epoch": 0.09, "eval_loss": 1.705054521560669, "eval_runtime": 147.1783, "eval_samples_per_second": 135.89, "eval_steps_per_second": 2.127, "step": 12400 }, { "epoch": 0.09, "learning_rate": 1.944520402272658e-05, "loss": 1.7819, "step": 12800 }, { "epoch": 0.09, "eval_loss": 1.6974027156829834, "eval_runtime": 151.8787, "eval_samples_per_second": 131.684, "eval_steps_per_second": 2.061, "step": 12800 }, { "epoch": 0.1, "learning_rate": 2.005286664843679e-05, "loss": 1.7751, "step": 13200 }, { "epoch": 0.1, "eval_loss": 1.7015215158462524, "eval_runtime": 166.3617, "eval_samples_per_second": 120.22, "eval_steps_per_second": 1.881, "step": 13200 }, { "epoch": 0.1, "learning_rate": 2.0660529274146993e-05, "loss": 1.774, "step": 13600 }, { "epoch": 0.1, "eval_loss": 1.697357177734375, "eval_runtime": 210.2219, "eval_samples_per_second": 95.138, "eval_steps_per_second": 1.489, "step": 13600 }, { "epoch": 0.1, "learning_rate": 2.12681918998572e-05, "loss": 1.7685, "step": 14000 }, { "epoch": 0.1, "eval_loss": 1.7000294923782349, "eval_runtime": 388.8288, "eval_samples_per_second": 51.437, "eval_steps_per_second": 0.805, "step": 14000 }, { "epoch": 0.11, "learning_rate": 2.1875854525567406e-05, "loss": 1.7656, "step": 14400 }, { "epoch": 0.11, "eval_loss": 1.6891347169876099, "eval_runtime": 149.9008, "eval_samples_per_second": 133.422, "eval_steps_per_second": 2.088, "step": 14400 }, { "epoch": 0.11, "learning_rate": 2.248351715127761e-05, "loss": 1.7601, "step": 14800 }, { "epoch": 0.11, "eval_loss": 1.691114902496338, "eval_runtime": 148.0615, "eval_samples_per_second": 135.079, "eval_steps_per_second": 2.114, "step": 14800 }, { "epoch": 0.11, "learning_rate": 2.3091179776987818e-05, "loss": 1.7574, "step": 15200 }, { "epoch": 0.11, "eval_loss": 1.6803953647613525, "eval_runtime": 162.6955, "eval_samples_per_second": 122.929, "eval_steps_per_second": 1.924, "step": 15200 }, { "epoch": 0.11, "learning_rate": 2.3698842402698022e-05, "loss": 1.7528, "step": 15600 }, { "epoch": 0.11, "eval_loss": 1.6794207096099854, "eval_runtime": 152.5086, "eval_samples_per_second": 131.14, "eval_steps_per_second": 2.052, "step": 15600 }, { "epoch": 0.12, "learning_rate": 2.430650502840823e-05, "loss": 1.7494, "step": 16000 }, { "epoch": 0.12, "eval_loss": 1.6750398874282837, "eval_runtime": 150.7356, "eval_samples_per_second": 132.683, "eval_steps_per_second": 2.076, "step": 16000 }, { "epoch": 0.12, "learning_rate": 2.4914167654118435e-05, "loss": 1.7441, "step": 16400 }, { "epoch": 0.12, "eval_loss": 1.6635680198669434, "eval_runtime": 158.1954, "eval_samples_per_second": 126.426, "eval_steps_per_second": 1.979, "step": 16400 }, { "epoch": 0.12, "learning_rate": 2.552183027982864e-05, "loss": 1.7405, "step": 16800 }, { "epoch": 0.12, "eval_loss": 1.660568118095398, "eval_runtime": 144.9472, "eval_samples_per_second": 137.981, "eval_steps_per_second": 2.159, "step": 16800 }, { "epoch": 0.13, "learning_rate": 2.6129492905538844e-05, "loss": 1.7373, "step": 17200 }, { "epoch": 0.13, "eval_loss": 1.6654884815216064, "eval_runtime": 161.3267, "eval_samples_per_second": 123.972, "eval_steps_per_second": 1.94, "step": 17200 }, { "epoch": 0.13, "learning_rate": 2.673715553124905e-05, "loss": 1.7336, "step": 17600 }, { "epoch": 0.13, "eval_loss": 1.6575931310653687, "eval_runtime": 248.169, "eval_samples_per_second": 80.59, "eval_steps_per_second": 1.261, "step": 17600 }, { "epoch": 0.13, "learning_rate": 2.7344818156959256e-05, "loss": 1.7291, "step": 18000 }, { "epoch": 0.13, "eval_loss": 1.6604431867599487, "eval_runtime": 145.1056, "eval_samples_per_second": 137.831, "eval_steps_per_second": 2.157, "step": 18000 }, { "epoch": 0.13, "learning_rate": 2.7952480782669467e-05, "loss": 1.7243, "step": 18400 }, { "epoch": 0.13, "eval_loss": 1.6801910400390625, "eval_runtime": 153.7947, "eval_samples_per_second": 130.043, "eval_steps_per_second": 2.035, "step": 18400 }, { "epoch": 0.14, "learning_rate": 2.856014340837967e-05, "loss": 1.7214, "step": 18800 }, { "epoch": 0.14, "eval_loss": 1.6495254039764404, "eval_runtime": 148.9903, "eval_samples_per_second": 134.237, "eval_steps_per_second": 2.101, "step": 18800 }, { "epoch": 0.14, "learning_rate": 2.9167806034089873e-05, "loss": 1.7178, "step": 19200 }, { "epoch": 0.14, "eval_loss": 1.6446107625961304, "eval_runtime": 149.3174, "eval_samples_per_second": 133.943, "eval_steps_per_second": 2.096, "step": 19200 }, { "epoch": 0.14, "learning_rate": 2.977546865980008e-05, "loss": 1.7146, "step": 19600 }, { "epoch": 0.14, "eval_loss": 1.641605019569397, "eval_runtime": 154.456, "eval_samples_per_second": 129.487, "eval_steps_per_second": 2.026, "step": 19600 }, { "epoch": 0.15, "learning_rate": 3.0383131285510285e-05, "loss": 1.7118, "step": 20000 }, { "epoch": 0.15, "eval_loss": 1.6381052732467651, "eval_runtime": 151.3863, "eval_samples_per_second": 132.112, "eval_steps_per_second": 2.068, "step": 20000 }, { "epoch": 0.15, "learning_rate": 3.099079391122049e-05, "loss": 1.7083, "step": 20400 }, { "epoch": 0.15, "eval_loss": 1.6341092586517334, "eval_runtime": 238.9093, "eval_samples_per_second": 83.714, "eval_steps_per_second": 1.31, "step": 20400 }, { "epoch": 0.15, "learning_rate": 3.15984565369307e-05, "loss": 1.7062, "step": 20800 }, { "epoch": 0.15, "eval_loss": 1.6292831897735596, "eval_runtime": 152.4932, "eval_samples_per_second": 131.153, "eval_steps_per_second": 2.053, "step": 20800 }, { "epoch": 0.15, "learning_rate": 3.22061191626409e-05, "loss": 1.7054, "step": 21200 }, { "epoch": 0.15, "eval_loss": 1.6273330450057983, "eval_runtime": 151.097, "eval_samples_per_second": 132.365, "eval_steps_per_second": 2.072, "step": 21200 }, { "epoch": 0.16, "learning_rate": 3.281378178835111e-05, "loss": 1.7012, "step": 21600 }, { "epoch": 0.16, "eval_loss": 1.6267642974853516, "eval_runtime": 149.0125, "eval_samples_per_second": 134.217, "eval_steps_per_second": 2.1, "step": 21600 }, { "epoch": 0.16, "learning_rate": 3.3421444414061314e-05, "loss": 1.6993, "step": 22000 }, { "epoch": 0.16, "eval_loss": 1.6256201267242432, "eval_runtime": 149.4973, "eval_samples_per_second": 133.782, "eval_steps_per_second": 2.094, "step": 22000 }, { "epoch": 0.16, "learning_rate": 3.402910703977152e-05, "loss": 1.697, "step": 22400 }, { "epoch": 0.16, "eval_loss": 1.6158908605575562, "eval_runtime": 152.4757, "eval_samples_per_second": 131.168, "eval_steps_per_second": 2.053, "step": 22400 }, { "epoch": 0.17, "learning_rate": 3.463676966548173e-05, "loss": 1.6938, "step": 22800 }, { "epoch": 0.17, "eval_loss": 1.6134721040725708, "eval_runtime": 152.3857, "eval_samples_per_second": 131.246, "eval_steps_per_second": 2.054, "step": 22800 }, { "epoch": 0.17, "learning_rate": 3.524443229119193e-05, "loss": 1.6923, "step": 23200 }, { "epoch": 0.17, "eval_loss": 1.6194721460342407, "eval_runtime": 149.9763, "eval_samples_per_second": 133.354, "eval_steps_per_second": 2.087, "step": 23200 }, { "epoch": 0.17, "learning_rate": 3.585209491690214e-05, "loss": 1.6888, "step": 23600 }, { "epoch": 0.17, "eval_loss": 1.6149234771728516, "eval_runtime": 150.7266, "eval_samples_per_second": 132.691, "eval_steps_per_second": 2.077, "step": 23600 }, { "epoch": 0.18, "learning_rate": 3.6459757542612344e-05, "loss": 1.687, "step": 24000 }, { "epoch": 0.18, "eval_loss": 1.6148015260696411, "eval_runtime": 152.1295, "eval_samples_per_second": 131.467, "eval_steps_per_second": 2.057, "step": 24000 }, { "epoch": 0.18, "learning_rate": 3.706742016832255e-05, "loss": 1.6886, "step": 24400 }, { "epoch": 0.18, "eval_loss": 1.6169975996017456, "eval_runtime": 152.146, "eval_samples_per_second": 131.453, "eval_steps_per_second": 2.057, "step": 24400 }, { "epoch": 0.18, "learning_rate": 3.767508279403276e-05, "loss": 1.6865, "step": 24800 }, { "epoch": 0.18, "eval_loss": 1.6124180555343628, "eval_runtime": 174.6369, "eval_samples_per_second": 114.523, "eval_steps_per_second": 1.792, "step": 24800 }, { "epoch": 0.18, "learning_rate": 3.828274541974296e-05, "loss": 1.6829, "step": 25200 }, { "epoch": 0.18, "eval_loss": 1.6170154809951782, "eval_runtime": 262.8027, "eval_samples_per_second": 76.103, "eval_steps_per_second": 1.191, "step": 25200 }, { "epoch": 0.19, "learning_rate": 3.889040804545316e-05, "loss": 1.6813, "step": 25600 }, { "epoch": 0.19, "eval_loss": 1.6040676832199097, "eval_runtime": 255.431, "eval_samples_per_second": 78.299, "eval_steps_per_second": 1.225, "step": 25600 }, { "epoch": 0.19, "learning_rate": 3.949807067116337e-05, "loss": 1.6806, "step": 26000 }, { "epoch": 0.19, "eval_loss": 1.6070351600646973, "eval_runtime": 151.6507, "eval_samples_per_second": 131.882, "eval_steps_per_second": 2.064, "step": 26000 }, { "epoch": 0.19, "learning_rate": 4.010573329687358e-05, "loss": 1.6763, "step": 26400 }, { "epoch": 0.19, "eval_loss": 1.599661946296692, "eval_runtime": 150.2287, "eval_samples_per_second": 133.13, "eval_steps_per_second": 2.083, "step": 26400 }, { "epoch": 0.2, "learning_rate": 4.071339592258379e-05, "loss": 1.6733, "step": 26800 }, { "epoch": 0.2, "eval_loss": 1.6072720289230347, "eval_runtime": 152.0466, "eval_samples_per_second": 131.539, "eval_steps_per_second": 2.059, "step": 26800 }, { "epoch": 0.2, "learning_rate": 4.1321058548293986e-05, "loss": 1.6695, "step": 27200 }, { "epoch": 0.2, "eval_loss": 1.6115573644638062, "eval_runtime": 148.9069, "eval_samples_per_second": 134.312, "eval_steps_per_second": 2.102, "step": 27200 }, { "epoch": 0.2, "learning_rate": 4.192872117400419e-05, "loss": 1.6687, "step": 27600 }, { "epoch": 0.2, "eval_loss": 1.611473798751831, "eval_runtime": 158.4393, "eval_samples_per_second": 126.231, "eval_steps_per_second": 1.976, "step": 27600 }, { "epoch": 0.2, "learning_rate": 4.25363837997144e-05, "loss": 1.6673, "step": 28000 }, { "epoch": 0.2, "eval_loss": 1.606929898262024, "eval_runtime": 159.9938, "eval_samples_per_second": 125.005, "eval_steps_per_second": 1.956, "step": 28000 }, { "epoch": 0.21, "learning_rate": 4.3144046425424607e-05, "loss": 1.6655, "step": 28400 }, { "epoch": 0.21, "eval_loss": 1.5869165658950806, "eval_runtime": 154.5639, "eval_samples_per_second": 129.396, "eval_steps_per_second": 2.025, "step": 28400 }, { "epoch": 0.21, "learning_rate": 4.375170905113481e-05, "loss": 1.6622, "step": 28800 }, { "epoch": 0.21, "eval_loss": 1.6052591800689697, "eval_runtime": 368.5722, "eval_samples_per_second": 54.263, "eval_steps_per_second": 0.849, "step": 28800 }, { "epoch": 0.21, "learning_rate": 4.4359371676845016e-05, "loss": 1.6598, "step": 29200 }, { "epoch": 0.21, "eval_loss": 1.58935546875, "eval_runtime": 152.6646, "eval_samples_per_second": 131.006, "eval_steps_per_second": 2.05, "step": 29200 }, { "epoch": 0.22, "learning_rate": 4.496703430255522e-05, "loss": 1.659, "step": 29600 }, { "epoch": 0.22, "eval_loss": 1.5808852910995483, "eval_runtime": 155.4857, "eval_samples_per_second": 128.629, "eval_steps_per_second": 2.013, "step": 29600 }, { "epoch": 0.22, "learning_rate": 4.557469692826543e-05, "loss": 1.6583, "step": 30000 }, { "epoch": 0.22, "eval_loss": 1.588645100593567, "eval_runtime": 277.2998, "eval_samples_per_second": 72.124, "eval_steps_per_second": 1.129, "step": 30000 }, { "epoch": 0.22, "learning_rate": 4.6182359553975636e-05, "loss": 1.6555, "step": 30400 }, { "epoch": 0.22, "eval_loss": 1.5864471197128296, "eval_runtime": 161.6086, "eval_samples_per_second": 123.756, "eval_steps_per_second": 1.937, "step": 30400 }, { "epoch": 0.22, "learning_rate": 4.679002217968584e-05, "loss": 1.6559, "step": 30800 }, { "epoch": 0.22, "eval_loss": 1.583774209022522, "eval_runtime": 346.1205, "eval_samples_per_second": 57.783, "eval_steps_per_second": 0.904, "step": 30800 }, { "epoch": 0.23, "learning_rate": 4.7397684805396045e-05, "loss": 1.6522, "step": 31200 }, { "epoch": 0.23, "eval_loss": 1.5791034698486328, "eval_runtime": 326.2692, "eval_samples_per_second": 61.299, "eval_steps_per_second": 0.959, "step": 31200 }, { "epoch": 0.23, "learning_rate": 4.800534743110625e-05, "loss": 1.6499, "step": 31600 }, { "epoch": 0.23, "eval_loss": 1.5826290845870972, "eval_runtime": 175.7208, "eval_samples_per_second": 113.817, "eval_steps_per_second": 1.781, "step": 31600 }, { "epoch": 0.23, "learning_rate": 4.861301005681646e-05, "loss": 1.6506, "step": 32000 }, { "epoch": 0.23, "eval_loss": 1.5759295225143433, "eval_runtime": 317.0144, "eval_samples_per_second": 63.089, "eval_steps_per_second": 0.987, "step": 32000 }, { "epoch": 0.24, "learning_rate": 4.9220672682526665e-05, "loss": 1.6498, "step": 32400 }, { "epoch": 0.24, "eval_loss": 1.5828478336334229, "eval_runtime": 219.1147, "eval_samples_per_second": 91.276, "eval_steps_per_second": 1.428, "step": 32400 }, { "epoch": 0.24, "learning_rate": 4.982833530823687e-05, "loss": 1.6473, "step": 32800 }, { "epoch": 0.24, "eval_loss": 1.572839617729187, "eval_runtime": 188.3492, "eval_samples_per_second": 106.186, "eval_steps_per_second": 1.662, "step": 32800 }, { "epoch": 0.24, "learning_rate": 4.99405445046135e-05, "loss": 1.644, "step": 33200 }, { "epoch": 0.24, "eval_loss": 1.5747781991958618, "eval_runtime": 261.2843, "eval_samples_per_second": 76.545, "eval_steps_per_second": 1.198, "step": 33200 }, { "epoch": 0.25, "learning_rate": 4.9857679702681096e-05, "loss": 1.6419, "step": 33600 }, { "epoch": 0.25, "eval_loss": 1.569125771522522, "eval_runtime": 238.4252, "eval_samples_per_second": 83.884, "eval_steps_per_second": 1.313, "step": 33600 }, { "epoch": 0.25, "learning_rate": 4.977481490074868e-05, "loss": 1.6416, "step": 34000 }, { "epoch": 0.25, "eval_loss": 1.5649021863937378, "eval_runtime": 403.1613, "eval_samples_per_second": 49.608, "eval_steps_per_second": 0.776, "step": 34000 }, { "epoch": 0.25, "learning_rate": 4.969195009881628e-05, "loss": 1.6365, "step": 34400 }, { "epoch": 0.25, "eval_loss": 1.5665974617004395, "eval_runtime": 154.8734, "eval_samples_per_second": 129.138, "eval_steps_per_second": 2.021, "step": 34400 }, { "epoch": 0.25, "learning_rate": 4.9609085296883874e-05, "loss": 1.6348, "step": 34800 }, { "epoch": 0.25, "eval_loss": 1.5668097734451294, "eval_runtime": 193.4192, "eval_samples_per_second": 103.402, "eval_steps_per_second": 1.618, "step": 34800 }, { "epoch": 0.26, "learning_rate": 4.9526220494951466e-05, "loss": 1.6342, "step": 35200 }, { "epoch": 0.26, "eval_loss": 1.5644603967666626, "eval_runtime": 525.4863, "eval_samples_per_second": 38.06, "eval_steps_per_second": 0.596, "step": 35200 }, { "epoch": 0.26, "learning_rate": 4.944335569301905e-05, "loss": 1.6319, "step": 35600 }, { "epoch": 0.26, "eval_loss": 1.5583738088607788, "eval_runtime": 152.5499, "eval_samples_per_second": 131.105, "eval_steps_per_second": 2.052, "step": 35600 }, { "epoch": 0.26, "learning_rate": 4.936049089108665e-05, "loss": 1.6304, "step": 36000 }, { "epoch": 0.26, "eval_loss": 1.5624059438705444, "eval_runtime": 195.0553, "eval_samples_per_second": 102.535, "eval_steps_per_second": 1.605, "step": 36000 }, { "epoch": 0.27, "learning_rate": 4.9277626089154245e-05, "loss": 1.6287, "step": 36400 }, { "epoch": 0.27, "eval_loss": 1.5545308589935303, "eval_runtime": 220.9752, "eval_samples_per_second": 90.508, "eval_steps_per_second": 1.416, "step": 36400 }, { "epoch": 0.27, "learning_rate": 4.919476128722184e-05, "loss": 1.6301, "step": 36800 }, { "epoch": 0.27, "eval_loss": 1.5592070817947388, "eval_runtime": 213.6052, "eval_samples_per_second": 93.631, "eval_steps_per_second": 1.465, "step": 36800 }, { "epoch": 0.27, "learning_rate": 4.911189648528943e-05, "loss": 1.6272, "step": 37200 }, { "epoch": 0.27, "eval_loss": 1.5615522861480713, "eval_runtime": 263.8607, "eval_samples_per_second": 75.798, "eval_steps_per_second": 1.186, "step": 37200 }, { "epoch": 0.27, "learning_rate": 4.9029031683357016e-05, "loss": 1.6267, "step": 37600 }, { "epoch": 0.27, "eval_loss": 1.558023452758789, "eval_runtime": 1163.3408, "eval_samples_per_second": 17.192, "eval_steps_per_second": 0.269, "step": 37600 }, { "epoch": 0.28, "learning_rate": 4.8946166881424615e-05, "loss": 1.624, "step": 38000 }, { "epoch": 0.28, "eval_loss": 1.550244688987732, "eval_runtime": 156.4776, "eval_samples_per_second": 127.814, "eval_steps_per_second": 2.0, "step": 38000 }, { "epoch": 0.28, "learning_rate": 4.886330207949221e-05, "loss": 1.6238, "step": 38400 }, { "epoch": 0.28, "eval_loss": 1.5512545108795166, "eval_runtime": 178.078, "eval_samples_per_second": 112.31, "eval_steps_per_second": 1.758, "step": 38400 }, { "epoch": 0.28, "learning_rate": 4.87804372775598e-05, "loss": 1.623, "step": 38800 }, { "epoch": 0.28, "eval_loss": 1.5499157905578613, "eval_runtime": 411.8408, "eval_samples_per_second": 48.562, "eval_steps_per_second": 0.76, "step": 38800 }, { "epoch": 0.29, "learning_rate": 4.869757247562739e-05, "loss": 1.6214, "step": 39200 }, { "epoch": 0.29, "eval_loss": 1.554477572441101, "eval_runtime": 152.8841, "eval_samples_per_second": 130.818, "eval_steps_per_second": 2.047, "step": 39200 }, { "epoch": 0.29, "learning_rate": 4.8614707673694986e-05, "loss": 1.6173, "step": 39600 }, { "epoch": 0.29, "eval_loss": 1.5494047403335571, "eval_runtime": 154.2296, "eval_samples_per_second": 129.677, "eval_steps_per_second": 2.029, "step": 39600 }, { "epoch": 0.29, "learning_rate": 4.853184287176258e-05, "loss": 1.6159, "step": 40000 }, { "epoch": 0.29, "eval_loss": 1.5492123365402222, "eval_runtime": 358.728, "eval_samples_per_second": 55.753, "eval_steps_per_second": 0.873, "step": 40000 }, { "epoch": 0.29, "learning_rate": 4.844897806983017e-05, "loss": 1.6131, "step": 40400 }, { "epoch": 0.29, "eval_loss": 1.5435105562210083, "eval_runtime": 424.1722, "eval_samples_per_second": 47.151, "eval_steps_per_second": 0.738, "step": 40400 }, { "epoch": 0.3, "learning_rate": 4.8366113267897764e-05, "loss": 1.6125, "step": 40800 }, { "epoch": 0.3, "eval_loss": 1.5407049655914307, "eval_runtime": 233.4949, "eval_samples_per_second": 85.655, "eval_steps_per_second": 1.341, "step": 40800 }, { "epoch": 0.3, "learning_rate": 4.828324846596536e-05, "loss": 1.6129, "step": 41200 }, { "epoch": 0.3, "eval_loss": 1.5503147840499878, "eval_runtime": 154.5487, "eval_samples_per_second": 129.409, "eval_steps_per_second": 2.025, "step": 41200 }, { "epoch": 0.3, "learning_rate": 4.820038366403295e-05, "loss": 1.61, "step": 41600 }, { "epoch": 0.3, "eval_loss": 1.5319013595581055, "eval_runtime": 229.3937, "eval_samples_per_second": 87.186, "eval_steps_per_second": 1.364, "step": 41600 }, { "epoch": 0.31, "learning_rate": 4.811751886210054e-05, "loss": 1.6083, "step": 42000 }, { "epoch": 0.31, "eval_loss": 1.540002465248108, "eval_runtime": 200.5559, "eval_samples_per_second": 99.723, "eval_steps_per_second": 1.561, "step": 42000 }, { "epoch": 0.31, "learning_rate": 4.8034654060168135e-05, "loss": 1.6049, "step": 42400 }, { "epoch": 0.31, "eval_loss": 1.5374138355255127, "eval_runtime": 353.7763, "eval_samples_per_second": 56.533, "eval_steps_per_second": 0.885, "step": 42400 }, { "epoch": 0.31, "learning_rate": 4.795178925823573e-05, "loss": 1.6048, "step": 42800 }, { "epoch": 0.31, "eval_loss": 1.5372508764266968, "eval_runtime": 306.8656, "eval_samples_per_second": 65.175, "eval_steps_per_second": 1.02, "step": 42800 }, { "epoch": 0.32, "learning_rate": 4.786892445630332e-05, "loss": 1.6036, "step": 43200 }, { "epoch": 0.32, "eval_loss": 1.538548469543457, "eval_runtime": 875.5249, "eval_samples_per_second": 22.843, "eval_steps_per_second": 0.357, "step": 43200 }, { "epoch": 0.32, "learning_rate": 4.778605965437091e-05, "loss": 1.6025, "step": 43600 }, { "epoch": 0.32, "eval_loss": 1.5447686910629272, "eval_runtime": 216.6802, "eval_samples_per_second": 92.302, "eval_steps_per_second": 1.445, "step": 43600 }, { "epoch": 0.32, "learning_rate": 4.7703194852438506e-05, "loss": 1.5987, "step": 44000 }, { "epoch": 0.32, "eval_loss": 1.534464716911316, "eval_runtime": 527.5707, "eval_samples_per_second": 37.91, "eval_steps_per_second": 0.593, "step": 44000 }, { "epoch": 0.32, "learning_rate": 4.76203300505061e-05, "loss": 1.5995, "step": 44400 }, { "epoch": 0.32, "eval_loss": 1.537174105644226, "eval_runtime": 157.1885, "eval_samples_per_second": 127.236, "eval_steps_per_second": 1.991, "step": 44400 }, { "epoch": 0.33, "learning_rate": 4.753746524857369e-05, "loss": 1.5995, "step": 44800 }, { "epoch": 0.33, "eval_loss": 1.5312557220458984, "eval_runtime": 590.8847, "eval_samples_per_second": 33.848, "eval_steps_per_second": 0.53, "step": 44800 }, { "epoch": 0.33, "learning_rate": 4.7454600446641284e-05, "loss": 1.6002, "step": 45200 }, { "epoch": 0.33, "eval_loss": 1.5247910022735596, "eval_runtime": 197.0178, "eval_samples_per_second": 101.514, "eval_steps_per_second": 1.589, "step": 45200 }, { "epoch": 0.33, "learning_rate": 4.737173564470888e-05, "loss": 1.5985, "step": 45600 }, { "epoch": 0.33, "eval_loss": 1.5312753915786743, "eval_runtime": 217.2767, "eval_samples_per_second": 92.049, "eval_steps_per_second": 1.441, "step": 45600 }, { "epoch": 0.34, "learning_rate": 4.728887084277647e-05, "loss": 1.5975, "step": 46000 }, { "epoch": 0.34, "eval_loss": 1.5283282995224, "eval_runtime": 247.4783, "eval_samples_per_second": 80.815, "eval_steps_per_second": 1.265, "step": 46000 }, { "epoch": 0.34, "learning_rate": 4.720600604084406e-05, "loss": 1.5942, "step": 46400 }, { "epoch": 0.34, "eval_loss": 1.5262142419815063, "eval_runtime": 943.4579, "eval_samples_per_second": 21.199, "eval_steps_per_second": 0.332, "step": 46400 }, { "epoch": 0.34, "learning_rate": 4.7123141238911655e-05, "loss": 1.5946, "step": 46800 }, { "epoch": 0.34, "eval_loss": 1.5237544775009155, "eval_runtime": 157.7499, "eval_samples_per_second": 126.783, "eval_steps_per_second": 1.984, "step": 46800 }, { "epoch": 0.34, "learning_rate": 4.704027643697925e-05, "loss": 1.592, "step": 47200 }, { "epoch": 0.34, "eval_loss": 1.5289279222488403, "eval_runtime": 380.6257, "eval_samples_per_second": 52.545, "eval_steps_per_second": 0.822, "step": 47200 }, { "epoch": 0.35, "learning_rate": 4.695741163504685e-05, "loss": 1.5924, "step": 47600 }, { "epoch": 0.35, "eval_loss": 1.523956298828125, "eval_runtime": 154.9689, "eval_samples_per_second": 129.058, "eval_steps_per_second": 2.02, "step": 47600 }, { "epoch": 0.35, "learning_rate": 4.687454683311443e-05, "loss": 1.5901, "step": 48000 }, { "epoch": 0.35, "eval_loss": 1.5227019786834717, "eval_runtime": 728.6822, "eval_samples_per_second": 27.447, "eval_steps_per_second": 0.43, "step": 48000 }, { "epoch": 0.35, "learning_rate": 4.6791682031182026e-05, "loss": 1.589, "step": 48400 }, { "epoch": 0.35, "eval_loss": 1.5262556076049805, "eval_runtime": 156.5557, "eval_samples_per_second": 127.75, "eval_steps_per_second": 1.999, "step": 48400 }, { "epoch": 0.36, "learning_rate": 4.670881722924962e-05, "loss": 1.5875, "step": 48800 }, { "epoch": 0.36, "eval_loss": 1.5185788869857788, "eval_runtime": 355.4244, "eval_samples_per_second": 56.271, "eval_steps_per_second": 0.881, "step": 48800 }, { "epoch": 0.36, "learning_rate": 4.662595242731722e-05, "loss": 1.5867, "step": 49200 }, { "epoch": 0.36, "eval_loss": 1.51908278465271, "eval_runtime": 729.5519, "eval_samples_per_second": 27.414, "eval_steps_per_second": 0.429, "step": 49200 }, { "epoch": 0.36, "learning_rate": 4.6543087625384804e-05, "loss": 1.5849, "step": 49600 }, { "epoch": 0.36, "eval_loss": 1.5164732933044434, "eval_runtime": 852.7453, "eval_samples_per_second": 23.454, "eval_steps_per_second": 0.367, "step": 49600 }, { "epoch": 0.36, "learning_rate": 4.64602228234524e-05, "loss": 1.5828, "step": 50000 }, { "epoch": 0.36, "eval_loss": 1.5202162265777588, "eval_runtime": 157.0148, "eval_samples_per_second": 127.377, "eval_steps_per_second": 1.993, "step": 50000 }, { "epoch": 0.37, "learning_rate": 4.637735802151999e-05, "loss": 1.5816, "step": 50400 }, { "epoch": 0.37, "eval_loss": 1.5152881145477295, "eval_runtime": 211.7581, "eval_samples_per_second": 94.447, "eval_steps_per_second": 1.478, "step": 50400 }, { "epoch": 0.37, "learning_rate": 4.629449321958758e-05, "loss": 1.5809, "step": 50800 }, { "epoch": 0.37, "eval_loss": 1.5141160488128662, "eval_runtime": 164.3824, "eval_samples_per_second": 121.668, "eval_steps_per_second": 1.904, "step": 50800 }, { "epoch": 0.37, "learning_rate": 4.621162841765518e-05, "loss": 1.5771, "step": 51200 }, { "epoch": 0.37, "eval_loss": 1.5138821601867676, "eval_runtime": 462.6007, "eval_samples_per_second": 43.234, "eval_steps_per_second": 0.677, "step": 51200 }, { "epoch": 0.38, "learning_rate": 4.612876361572277e-05, "loss": 1.5775, "step": 51600 }, { "epoch": 0.38, "eval_loss": 1.509470820426941, "eval_runtime": 775.3154, "eval_samples_per_second": 25.796, "eval_steps_per_second": 0.404, "step": 51600 }, { "epoch": 0.38, "learning_rate": 4.604589881379036e-05, "loss": 1.5767, "step": 52000 }, { "epoch": 0.38, "eval_loss": 1.5092774629592896, "eval_runtime": 186.3503, "eval_samples_per_second": 107.325, "eval_steps_per_second": 1.68, "step": 52000 }, { "epoch": 0.38, "learning_rate": 4.596303401185795e-05, "loss": 1.5757, "step": 52400 }, { "epoch": 0.38, "eval_loss": 1.5057079792022705, "eval_runtime": 159.2417, "eval_samples_per_second": 125.595, "eval_steps_per_second": 1.966, "step": 52400 }, { "epoch": 0.39, "learning_rate": 4.588016920992555e-05, "loss": 1.5752, "step": 52800 }, { "epoch": 0.39, "eval_loss": 1.5144433975219727, "eval_runtime": 159.6541, "eval_samples_per_second": 125.271, "eval_steps_per_second": 1.96, "step": 52800 }, { "epoch": 0.39, "learning_rate": 4.579730440799314e-05, "loss": 1.5752, "step": 53200 }, { "epoch": 0.39, "eval_loss": 1.506042242050171, "eval_runtime": 406.841, "eval_samples_per_second": 49.159, "eval_steps_per_second": 0.769, "step": 53200 }, { "epoch": 0.39, "learning_rate": 4.571443960606073e-05, "loss": 1.5759, "step": 53600 }, { "epoch": 0.39, "eval_loss": 1.511734962463379, "eval_runtime": 956.1026, "eval_samples_per_second": 20.918, "eval_steps_per_second": 0.327, "step": 53600 }, { "epoch": 0.39, "learning_rate": 4.5631574804128324e-05, "loss": 1.5749, "step": 54000 }, { "epoch": 0.39, "eval_loss": 1.5020769834518433, "eval_runtime": 261.4557, "eval_samples_per_second": 76.495, "eval_steps_per_second": 1.197, "step": 54000 }, { "epoch": 0.4, "learning_rate": 4.554871000219592e-05, "loss": 1.5732, "step": 54400 }, { "epoch": 0.4, "eval_loss": 1.536434531211853, "eval_runtime": 200.7371, "eval_samples_per_second": 99.633, "eval_steps_per_second": 1.559, "step": 54400 }, { "epoch": 0.4, "learning_rate": 4.5465845200263516e-05, "loss": 1.5728, "step": 54800 }, { "epoch": 0.4, "eval_loss": 1.5178890228271484, "eval_runtime": 188.6203, "eval_samples_per_second": 106.033, "eval_steps_per_second": 1.659, "step": 54800 }, { "epoch": 0.4, "learning_rate": 4.53829803983311e-05, "loss": 1.5742, "step": 55200 }, { "epoch": 0.4, "eval_loss": 1.503977656364441, "eval_runtime": 232.3531, "eval_samples_per_second": 86.076, "eval_steps_per_second": 1.347, "step": 55200 }, { "epoch": 0.41, "learning_rate": 4.5300115596398695e-05, "loss": 1.5701, "step": 55600 }, { "epoch": 0.41, "eval_loss": 1.5044046640396118, "eval_runtime": 248.7056, "eval_samples_per_second": 80.416, "eval_steps_per_second": 1.259, "step": 55600 }, { "epoch": 0.41, "learning_rate": 4.5217250794466294e-05, "loss": 1.569, "step": 56000 }, { "epoch": 0.41, "eval_loss": 1.5002530813217163, "eval_runtime": 155.8533, "eval_samples_per_second": 128.326, "eval_steps_per_second": 2.008, "step": 56000 }, { "epoch": 0.41, "learning_rate": 4.513438599253389e-05, "loss": 1.5671, "step": 56400 }, { "epoch": 0.41, "eval_loss": 1.5035356283187866, "eval_runtime": 591.7888, "eval_samples_per_second": 33.796, "eval_steps_per_second": 0.529, "step": 56400 }, { "epoch": 0.41, "learning_rate": 4.505152119060147e-05, "loss": 1.5663, "step": 56800 }, { "epoch": 0.41, "eval_loss": 1.5083376169204712, "eval_runtime": 235.8323, "eval_samples_per_second": 84.806, "eval_steps_per_second": 1.327, "step": 56800 }, { "epoch": 0.0, "learning_rate": 4.4968656388669065e-05, "loss": 1.566, "step": 57200 }, { "epoch": 0.0, "eval_loss": 1.500092625617981, "eval_runtime": 122.1502, "eval_samples_per_second": 163.733, "eval_steps_per_second": 2.562, "step": 57200 }, { "epoch": 0.01, "learning_rate": 4.4885791586736665e-05, "loss": 1.5667, "step": 57600 }, { "epoch": 0.01, "eval_loss": 1.4975863695144653, "eval_runtime": 122.6954, "eval_samples_per_second": 163.005, "eval_steps_per_second": 2.551, "step": 57600 }, { "epoch": 0.01, "learning_rate": 4.480292678480426e-05, "loss": 1.5657, "step": 58000 }, { "epoch": 0.01, "eval_loss": 1.4931672811508179, "eval_runtime": 123.0954, "eval_samples_per_second": 162.476, "eval_steps_per_second": 2.543, "step": 58000 }, { "epoch": 0.01, "learning_rate": 4.472006198287185e-05, "loss": 1.5642, "step": 58400 }, { "epoch": 0.01, "eval_loss": 1.4972225427627563, "eval_runtime": 123.364, "eval_samples_per_second": 162.122, "eval_steps_per_second": 2.537, "step": 58400 }, { "epoch": 0.01, "learning_rate": 4.4637197180939436e-05, "loss": 1.5622, "step": 58800 }, { "epoch": 0.01, "eval_loss": 1.49701988697052, "eval_runtime": 123.4986, "eval_samples_per_second": 161.945, "eval_steps_per_second": 2.534, "step": 58800 }, { "epoch": 0.02, "learning_rate": 4.4554332379007036e-05, "loss": 1.5607, "step": 59200 }, { "epoch": 0.02, "eval_loss": 1.4874858856201172, "eval_runtime": 123.9331, "eval_samples_per_second": 161.377, "eval_steps_per_second": 2.526, "step": 59200 }, { "epoch": 0.02, "learning_rate": 4.447146757707463e-05, "loss": 1.5607, "step": 59600 }, { "epoch": 0.02, "eval_loss": 1.4898470640182495, "eval_runtime": 120.8464, "eval_samples_per_second": 165.499, "eval_steps_per_second": 2.59, "step": 59600 }, { "epoch": 0.02, "learning_rate": 4.438860277514222e-05, "loss": 1.5586, "step": 60000 }, { "epoch": 0.02, "eval_loss": 1.494850754737854, "eval_runtime": 124.1267, "eval_samples_per_second": 161.126, "eval_steps_per_second": 2.522, "step": 60000 }, { "epoch": 0.03, "learning_rate": 4.430573797320981e-05, "loss": 1.5582, "step": 60400 }, { "epoch": 0.03, "eval_loss": 1.4933040142059326, "eval_runtime": 122.7367, "eval_samples_per_second": 162.95, "eval_steps_per_second": 2.55, "step": 60400 }, { "epoch": 0.03, "learning_rate": 4.4222873171277407e-05, "loss": 1.5579, "step": 60800 }, { "epoch": 0.03, "eval_loss": 1.4987492561340332, "eval_runtime": 123.7785, "eval_samples_per_second": 161.579, "eval_steps_per_second": 2.529, "step": 60800 }, { "epoch": 0.03, "learning_rate": 4.4140008369345e-05, "loss": 1.5577, "step": 61200 }, { "epoch": 0.03, "eval_loss": 1.489683747291565, "eval_runtime": 121.2724, "eval_samples_per_second": 164.918, "eval_steps_per_second": 2.581, "step": 61200 }, { "epoch": 0.04, "learning_rate": 4.405714356741259e-05, "loss": 1.5574, "step": 61600 }, { "epoch": 0.04, "eval_loss": 1.4959229230880737, "eval_runtime": 121.5786, "eval_samples_per_second": 164.503, "eval_steps_per_second": 2.574, "step": 61600 }, { "epoch": 0.04, "learning_rate": 4.397427876548018e-05, "loss": 1.5551, "step": 62000 }, { "epoch": 0.04, "eval_loss": 1.496133804321289, "eval_runtime": 121.3994, "eval_samples_per_second": 164.746, "eval_steps_per_second": 2.578, "step": 62000 }, { "epoch": 0.04, "learning_rate": 4.389141396354778e-05, "loss": 1.5549, "step": 62400 }, { "epoch": 0.04, "eval_loss": 1.4901236295700073, "eval_runtime": 122.3573, "eval_samples_per_second": 163.456, "eval_steps_per_second": 2.558, "step": 62400 }, { "epoch": 0.0, "learning_rate": 4.380854916161537e-05, "loss": 1.5535, "step": 62800 }, { "epoch": 0.0, "eval_loss": 1.4875001907348633, "eval_runtime": 105.5644, "eval_samples_per_second": 189.458, "eval_steps_per_second": 2.965, "step": 62800 }, { "epoch": 0.01, "learning_rate": 4.372568435968296e-05, "loss": 1.5542, "step": 63200 }, { "epoch": 0.01, "eval_loss": 1.4935693740844727, "eval_runtime": 112.1109, "eval_samples_per_second": 178.395, "eval_steps_per_second": 2.792, "step": 63200 }, { "epoch": 0.01, "learning_rate": 4.3642819557750556e-05, "loss": 1.5512, "step": 63600 }, { "epoch": 0.01, "eval_loss": 1.4915146827697754, "eval_runtime": 109.7302, "eval_samples_per_second": 182.265, "eval_steps_per_second": 2.852, "step": 63600 }, { "epoch": 0.01, "learning_rate": 4.355995475581814e-05, "loss": 1.5515, "step": 64000 }, { "epoch": 0.01, "eval_loss": 1.4876190423965454, "eval_runtime": 111.5094, "eval_samples_per_second": 179.357, "eval_steps_per_second": 2.807, "step": 64000 }, { "epoch": 0.01, "learning_rate": 4.347708995388574e-05, "loss": 1.549, "step": 64400 }, { "epoch": 0.01, "eval_loss": 1.4836992025375366, "eval_runtime": 112.3893, "eval_samples_per_second": 177.953, "eval_steps_per_second": 2.785, "step": 64400 }, { "epoch": 0.02, "learning_rate": 4.3394225151953334e-05, "loss": 1.5479, "step": 64800 }, { "epoch": 0.02, "eval_loss": 1.4897727966308594, "eval_runtime": 111.5923, "eval_samples_per_second": 179.224, "eval_steps_per_second": 2.805, "step": 64800 }, { "epoch": 0.02, "learning_rate": 4.3311360350020926e-05, "loss": 1.5492, "step": 65200 }, { "epoch": 0.02, "eval_loss": 1.484372615814209, "eval_runtime": 111.9819, "eval_samples_per_second": 178.6, "eval_steps_per_second": 2.795, "step": 65200 }, { "epoch": 0.02, "learning_rate": 4.322849554808851e-05, "loss": 1.5468, "step": 65600 }, { "epoch": 0.02, "eval_loss": 1.4826014041900635, "eval_runtime": 112.199, "eval_samples_per_second": 178.255, "eval_steps_per_second": 2.79, "step": 65600 }, { "epoch": 0.03, "learning_rate": 4.314563074615611e-05, "loss": 1.5476, "step": 66000 }, { "epoch": 0.03, "eval_loss": 1.4857112169265747, "eval_runtime": 112.9874, "eval_samples_per_second": 177.011, "eval_steps_per_second": 2.77, "step": 66000 }, { "epoch": 0.03, "learning_rate": 4.3062765944223705e-05, "loss": 1.5473, "step": 66400 }, { "epoch": 0.03, "eval_loss": 1.487414836883545, "eval_runtime": 114.6833, "eval_samples_per_second": 174.393, "eval_steps_per_second": 2.729, "step": 66400 }, { "epoch": 0.03, "learning_rate": 4.29799011422913e-05, "loss": 1.5487, "step": 66800 }, { "epoch": 0.03, "eval_loss": 1.4894484281539917, "eval_runtime": 109.3915, "eval_samples_per_second": 182.83, "eval_steps_per_second": 2.861, "step": 66800 }, { "epoch": 0.04, "learning_rate": 4.289703634035889e-05, "loss": 1.5476, "step": 67200 }, { "epoch": 0.04, "eval_loss": 1.4839718341827393, "eval_runtime": 114.1702, "eval_samples_per_second": 175.177, "eval_steps_per_second": 2.742, "step": 67200 }, { "epoch": 0.04, "learning_rate": 4.281417153842648e-05, "loss": 1.5459, "step": 67600 }, { "epoch": 0.04, "eval_loss": 1.4786709547042847, "eval_runtime": 112.7446, "eval_samples_per_second": 177.392, "eval_steps_per_second": 2.776, "step": 67600 }, { "epoch": 0.04, "learning_rate": 4.2731306736494075e-05, "loss": 1.5431, "step": 68000 }, { "epoch": 0.04, "eval_loss": 1.48154616355896, "eval_runtime": 113.692, "eval_samples_per_second": 175.914, "eval_steps_per_second": 2.753, "step": 68000 }, { "epoch": 0.04, "learning_rate": 4.264844193456167e-05, "loss": 1.544, "step": 68400 }, { "epoch": 0.04, "eval_loss": 1.4801952838897705, "eval_runtime": 111.6945, "eval_samples_per_second": 179.06, "eval_steps_per_second": 2.802, "step": 68400 }, { "epoch": 0.05, "learning_rate": 4.256557713262926e-05, "loss": 1.5436, "step": 68800 }, { "epoch": 0.05, "eval_loss": 1.478300929069519, "eval_runtime": 114.4135, "eval_samples_per_second": 174.805, "eval_steps_per_second": 2.736, "step": 68800 }, { "epoch": 0.05, "learning_rate": 4.2482712330696853e-05, "loss": 1.5411, "step": 69200 }, { "epoch": 0.05, "eval_loss": 1.484221339225769, "eval_runtime": 114.3599, "eval_samples_per_second": 174.886, "eval_steps_per_second": 2.737, "step": 69200 }, { "epoch": 0.05, "learning_rate": 4.2399847528764446e-05, "loss": 1.5446, "step": 69600 }, { "epoch": 0.05, "eval_loss": 1.4805113077163696, "eval_runtime": 115.5225, "eval_samples_per_second": 173.126, "eval_steps_per_second": 2.709, "step": 69600 }, { "epoch": 0.06, "learning_rate": 4.231698272683204e-05, "loss": 1.5441, "step": 70000 }, { "epoch": 0.06, "eval_loss": 1.4875138998031616, "eval_runtime": 114.7419, "eval_samples_per_second": 174.304, "eval_steps_per_second": 2.728, "step": 70000 }, { "epoch": 0.06, "learning_rate": 4.223411792489963e-05, "loss": 1.5446, "step": 70400 }, { "epoch": 0.06, "eval_loss": 1.4801757335662842, "eval_runtime": 119.231, "eval_samples_per_second": 167.742, "eval_steps_per_second": 2.625, "step": 70400 }, { "epoch": 0.06, "learning_rate": 4.2151253122967224e-05, "loss": 1.5443, "step": 70800 }, { "epoch": 0.06, "eval_loss": 1.4772462844848633, "eval_runtime": 115.591, "eval_samples_per_second": 173.024, "eval_steps_per_second": 2.708, "step": 70800 }, { "epoch": 0.06, "learning_rate": 4.206838832103482e-05, "loss": 1.5411, "step": 71200 }, { "epoch": 0.06, "eval_loss": 1.4795691967010498, "eval_runtime": 118.6003, "eval_samples_per_second": 168.634, "eval_steps_per_second": 2.639, "step": 71200 }, { "epoch": 0.07, "learning_rate": 4.198552351910241e-05, "loss": 1.5413, "step": 71600 }, { "epoch": 0.07, "eval_loss": 1.4804329872131348, "eval_runtime": 119.1285, "eval_samples_per_second": 167.886, "eval_steps_per_second": 2.627, "step": 71600 }, { "epoch": 0.07, "learning_rate": 4.190265871717e-05, "loss": 1.5415, "step": 72000 }, { "epoch": 0.07, "eval_loss": 1.4793719053268433, "eval_runtime": 117.4287, "eval_samples_per_second": 170.316, "eval_steps_per_second": 2.665, "step": 72000 }, { "epoch": 0.07, "learning_rate": 4.1819793915237595e-05, "loss": 1.5414, "step": 72400 }, { "epoch": 0.07, "eval_loss": 1.4818830490112305, "eval_runtime": 119.5626, "eval_samples_per_second": 167.276, "eval_steps_per_second": 2.618, "step": 72400 }, { "epoch": 0.08, "learning_rate": 4.173692911330519e-05, "loss": 1.5423, "step": 72800 }, { "epoch": 0.08, "eval_loss": 1.4779819250106812, "eval_runtime": 118.4406, "eval_samples_per_second": 168.861, "eval_steps_per_second": 2.643, "step": 72800 }, { "epoch": 0.08, "learning_rate": 4.165406431137278e-05, "loss": 1.5386, "step": 73200 }, { "epoch": 0.08, "eval_loss": 1.4759750366210938, "eval_runtime": 115.7547, "eval_samples_per_second": 172.779, "eval_steps_per_second": 2.704, "step": 73200 }, { "epoch": 0.08, "learning_rate": 4.157119950944037e-05, "loss": 1.5386, "step": 73600 }, { "epoch": 0.08, "eval_loss": 1.4726980924606323, "eval_runtime": 119.2625, "eval_samples_per_second": 167.697, "eval_steps_per_second": 2.624, "step": 73600 }, { "epoch": 0.08, "learning_rate": 4.148833470750797e-05, "loss": 1.5375, "step": 74000 }, { "epoch": 0.08, "eval_loss": 1.4739803075790405, "eval_runtime": 118.9815, "eval_samples_per_second": 168.093, "eval_steps_per_second": 2.631, "step": 74000 }, { "epoch": 0.09, "learning_rate": 4.140546990557556e-05, "loss": 1.5376, "step": 74400 }, { "epoch": 0.09, "eval_loss": 1.4678592681884766, "eval_runtime": 117.6181, "eval_samples_per_second": 170.042, "eval_steps_per_second": 2.661, "step": 74400 }, { "epoch": 0.09, "learning_rate": 4.132260510364315e-05, "loss": 1.5365, "step": 74800 }, { "epoch": 0.09, "eval_loss": 1.4694132804870605, "eval_runtime": 118.0975, "eval_samples_per_second": 169.352, "eval_steps_per_second": 2.65, "step": 74800 }, { "epoch": 0.09, "learning_rate": 4.1239740301710744e-05, "loss": 1.5356, "step": 75200 }, { "epoch": 0.09, "eval_loss": 1.4689810276031494, "eval_runtime": 119.48, "eval_samples_per_second": 167.392, "eval_steps_per_second": 2.62, "step": 75200 }, { "epoch": 0.1, "learning_rate": 4.1156875499778344e-05, "loss": 1.5353, "step": 75600 }, { "epoch": 0.1, "eval_loss": 1.4731059074401855, "eval_runtime": 117.5581, "eval_samples_per_second": 170.129, "eval_steps_per_second": 2.663, "step": 75600 }, { "epoch": 0.1, "learning_rate": 4.107401069784593e-05, "loss": 1.5348, "step": 76000 }, { "epoch": 0.1, "eval_loss": 1.466073751449585, "eval_runtime": 118.6436, "eval_samples_per_second": 168.572, "eval_steps_per_second": 2.638, "step": 76000 }, { "epoch": 0.1, "learning_rate": 4.099114589591352e-05, "loss": 1.5336, "step": 76400 }, { "epoch": 0.1, "eval_loss": 1.4694697856903076, "eval_runtime": 117.8705, "eval_samples_per_second": 169.678, "eval_steps_per_second": 2.655, "step": 76400 }, { "epoch": 0.11, "learning_rate": 4.0908281093981115e-05, "loss": 1.5331, "step": 76800 }, { "epoch": 0.11, "eval_loss": 1.470395803451538, "eval_runtime": 119.1567, "eval_samples_per_second": 167.846, "eval_steps_per_second": 2.627, "step": 76800 }, { "epoch": 0.11, "learning_rate": 4.0825416292048714e-05, "loss": 1.5336, "step": 77200 }, { "epoch": 0.11, "eval_loss": 1.4707101583480835, "eval_runtime": 217.6239, "eval_samples_per_second": 91.902, "eval_steps_per_second": 1.438, "step": 77200 }, { "epoch": 0.0, "learning_rate": 4.074255149011631e-05, "loss": 1.5303, "step": 77600 }, { "epoch": 0.0, "eval_loss": 1.4677211046218872, "eval_runtime": 111.0323, "eval_samples_per_second": 180.128, "eval_steps_per_second": 2.819, "step": 77600 }, { "epoch": 0.01, "learning_rate": 4.065968668818389e-05, "loss": 1.5302, "step": 78000 }, { "epoch": 0.01, "eval_loss": 1.4664534330368042, "eval_runtime": 111.6113, "eval_samples_per_second": 179.193, "eval_steps_per_second": 2.804, "step": 78000 }, { "epoch": 0.01, "learning_rate": 4.0576821886251486e-05, "loss": 1.5288, "step": 78400 }, { "epoch": 0.01, "eval_loss": 1.4657336473464966, "eval_runtime": 109.321, "eval_samples_per_second": 182.947, "eval_steps_per_second": 2.863, "step": 78400 }, { "epoch": 0.01, "learning_rate": 4.049395708431908e-05, "loss": 1.5284, "step": 78800 }, { "epoch": 0.01, "eval_loss": 1.4579006433486938, "eval_runtime": 109.1836, "eval_samples_per_second": 183.178, "eval_steps_per_second": 2.867, "step": 78800 }, { "epoch": 0.01, "learning_rate": 4.041109228238668e-05, "loss": 1.5277, "step": 79200 }, { "epoch": 0.01, "eval_loss": 1.4642364978790283, "eval_runtime": 108.8787, "eval_samples_per_second": 183.691, "eval_steps_per_second": 2.875, "step": 79200 }, { "epoch": 0.02, "learning_rate": 4.0328227480454264e-05, "loss": 1.5254, "step": 79600 }, { "epoch": 0.02, "eval_loss": 1.4699641466140747, "eval_runtime": 110.6507, "eval_samples_per_second": 180.749, "eval_steps_per_second": 2.829, "step": 79600 }, { "epoch": 0.02, "learning_rate": 4.024536267852186e-05, "loss": 1.526, "step": 80000 }, { "epoch": 0.02, "eval_loss": 1.4663636684417725, "eval_runtime": 108.472, "eval_samples_per_second": 184.379, "eval_steps_per_second": 2.886, "step": 80000 }, { "epoch": 0.02, "learning_rate": 4.016249787658945e-05, "loss": 1.5242, "step": 80400 }, { "epoch": 0.02, "eval_loss": 1.4651668071746826, "eval_runtime": 111.7826, "eval_samples_per_second": 178.919, "eval_steps_per_second": 2.8, "step": 80400 }, { "epoch": 0.03, "learning_rate": 4.007963307465705e-05, "loss": 1.523, "step": 80800 }, { "epoch": 0.03, "eval_loss": 1.4634953737258911, "eval_runtime": 110.7712, "eval_samples_per_second": 180.552, "eval_steps_per_second": 2.826, "step": 80800 }, { "epoch": 0.0, "learning_rate": 3.9109053272894466e-05, "loss": 1.524, "step": 81200 }, { "epoch": 0.0, "eval_loss": 2.39056134223938, "eval_runtime": 20.0958, "eval_samples_per_second": 175.112, "eval_steps_per_second": 5.474, "step": 81200 }, { "epoch": 0.01, "learning_rate": 3.902803602027052e-05, "loss": 1.5242, "step": 81600 }, { "epoch": 0.01, "eval_loss": 2.3666460514068604, "eval_runtime": 18.7692, "eval_samples_per_second": 187.488, "eval_steps_per_second": 5.861, "step": 81600 }, { "epoch": 0.01, "learning_rate": 3.894701876764657e-05, "loss": 1.5237, "step": 82000 }, { "epoch": 0.01, "eval_loss": 2.385453939437866, "eval_runtime": 19.4223, "eval_samples_per_second": 181.184, "eval_steps_per_second": 5.664, "step": 82000 }, { "epoch": 0.01, "learning_rate": 3.886600151502263e-05, "loss": 1.5226, "step": 82400 }, { "epoch": 0.01, "eval_loss": 2.393972396850586, "eval_runtime": 19.0938, "eval_samples_per_second": 184.301, "eval_steps_per_second": 5.761, "step": 82400 }, { "epoch": 0.01, "learning_rate": 3.8784984262398676e-05, "loss": 1.5218, "step": 82800 }, { "epoch": 0.01, "eval_loss": 2.456040620803833, "eval_runtime": 18.9643, "eval_samples_per_second": 185.56, "eval_steps_per_second": 5.8, "step": 82800 }, { "epoch": 0.02, "learning_rate": 3.870396700977473e-05, "loss": 1.5215, "step": 83200 }, { "epoch": 0.02, "eval_loss": 2.395426034927368, "eval_runtime": 19.2189, "eval_samples_per_second": 183.101, "eval_steps_per_second": 5.724, "step": 83200 }, { "epoch": 0.02, "learning_rate": 3.862294975715079e-05, "loss": 1.521, "step": 83600 }, { "epoch": 0.02, "eval_loss": 2.4465413093566895, "eval_runtime": 18.8719, "eval_samples_per_second": 186.468, "eval_steps_per_second": 5.829, "step": 83600 }, { "epoch": 0.02, "learning_rate": 3.854193250452684e-05, "loss": 1.5209, "step": 84000 }, { "epoch": 0.02, "eval_loss": 2.396277904510498, "eval_runtime": 18.9346, "eval_samples_per_second": 185.85, "eval_steps_per_second": 5.809, "step": 84000 }, { "epoch": 0.0, "learning_rate": 3.846091525190289e-05, "loss": 1.5188, "step": 84400 }, { "epoch": 0.0, "eval_loss": 2.4277689456939697, "eval_runtime": 20.9367, "eval_samples_per_second": 168.078, "eval_steps_per_second": 5.254, "step": 84400 }, { "epoch": 0.01, "learning_rate": 3.837989799927895e-05, "loss": 1.5177, "step": 84800 }, { "epoch": 0.01, "eval_loss": 2.378986120223999, "eval_runtime": 20.2239, "eval_samples_per_second": 174.002, "eval_steps_per_second": 5.439, "step": 84800 }, { "epoch": 0.01, "learning_rate": 3.8298880746655e-05, "loss": 1.5184, "step": 85200 }, { "epoch": 0.01, "eval_loss": 2.395463705062866, "eval_runtime": 19.3659, "eval_samples_per_second": 181.711, "eval_steps_per_second": 5.68, "step": 85200 }, { "epoch": 0.01, "learning_rate": 3.8217863494031056e-05, "loss": 1.5166, "step": 85600 }, { "epoch": 0.01, "eval_loss": 2.421231269836426, "eval_runtime": 20.4856, "eval_samples_per_second": 171.779, "eval_steps_per_second": 5.37, "step": 85600 }, { "epoch": 0.01, "learning_rate": 3.813684624140711e-05, "loss": 1.5158, "step": 86000 }, { "epoch": 0.01, "eval_loss": 2.4270944595336914, "eval_runtime": 19.2825, "eval_samples_per_second": 182.497, "eval_steps_per_second": 5.705, "step": 86000 }, { "epoch": 0.02, "learning_rate": 3.8055828988783165e-05, "loss": 1.5157, "step": 86400 }, { "epoch": 0.02, "eval_loss": 2.4186675548553467, "eval_runtime": 19.3721, "eval_samples_per_second": 181.653, "eval_steps_per_second": 5.678, "step": 86400 }, { "epoch": 0.02, "learning_rate": 3.797481173615922e-05, "loss": 1.5156, "step": 86800 }, { "epoch": 0.02, "eval_loss": 2.4075629711151123, "eval_runtime": 19.2183, "eval_samples_per_second": 183.107, "eval_steps_per_second": 5.724, "step": 86800 }, { "epoch": 0.02, "learning_rate": 3.789379448353527e-05, "loss": 1.5147, "step": 87200 }, { "epoch": 0.02, "eval_loss": 2.471975803375244, "eval_runtime": 20.3949, "eval_samples_per_second": 172.543, "eval_steps_per_second": 5.394, "step": 87200 }, { "epoch": 0.0, "learning_rate": 3.781277723091132e-05, "loss": 1.5127, "step": 87600 }, { "epoch": 0.0, "eval_loss": 2.3385655879974365, "eval_runtime": 20.157, "eval_samples_per_second": 174.58, "eval_steps_per_second": 5.457, "step": 87600 }, { "epoch": 0.01, "learning_rate": 3.773175997828738e-05, "loss": 1.5129, "step": 88000 }, { "epoch": 0.01, "eval_loss": 2.381673574447632, "eval_runtime": 20.5337, "eval_samples_per_second": 171.377, "eval_steps_per_second": 5.357, "step": 88000 }, { "epoch": 0.01, "learning_rate": 3.765074272566343e-05, "loss": 1.5123, "step": 88400 }, { "epoch": 0.01, "eval_loss": 2.35689377784729, "eval_runtime": 20.7328, "eval_samples_per_second": 169.731, "eval_steps_per_second": 5.306, "step": 88400 }, { "epoch": 0.01, "learning_rate": 3.7569725473039484e-05, "loss": 1.5121, "step": 88800 }, { "epoch": 0.01, "eval_loss": 2.3643054962158203, "eval_runtime": 19.1222, "eval_samples_per_second": 184.026, "eval_steps_per_second": 5.752, "step": 88800 }, { "epoch": 0.01, "learning_rate": 3.7488708220415545e-05, "loss": 1.5118, "step": 89200 }, { "epoch": 0.01, "eval_loss": 2.334357261657715, "eval_runtime": 19.3908, "eval_samples_per_second": 181.478, "eval_steps_per_second": 5.673, "step": 89200 }, { "epoch": 0.02, "learning_rate": 3.740769096779159e-05, "loss": 1.5102, "step": 89600 }, { "epoch": 0.02, "eval_loss": 2.401927947998047, "eval_runtime": 20.1285, "eval_samples_per_second": 174.827, "eval_steps_per_second": 5.465, "step": 89600 }, { "epoch": 0.02, "learning_rate": 3.7326673715167647e-05, "loss": 1.5097, "step": 90000 }, { "epoch": 0.02, "eval_loss": 2.4241695404052734, "eval_runtime": 20.5668, "eval_samples_per_second": 171.101, "eval_steps_per_second": 5.348, "step": 90000 }, { "epoch": 0.02, "learning_rate": 3.72456564625437e-05, "loss": 1.5103, "step": 90400 }, { "epoch": 0.02, "eval_loss": 2.393686532974243, "eval_runtime": 19.2168, "eval_samples_per_second": 183.121, "eval_steps_per_second": 5.724, "step": 90400 }, { "epoch": 0.03, "learning_rate": 3.7164639209919755e-05, "loss": 1.5112, "step": 90800 }, { "epoch": 0.03, "eval_loss": 2.3694939613342285, "eval_runtime": 20.1373, "eval_samples_per_second": 174.751, "eval_steps_per_second": 5.463, "step": 90800 }, { "epoch": 0.03, "learning_rate": 3.70836219572958e-05, "loss": 1.5108, "step": 91200 }, { "epoch": 0.03, "eval_loss": 2.345815420150757, "eval_runtime": 20.1959, "eval_samples_per_second": 174.243, "eval_steps_per_second": 5.447, "step": 91200 }, { "epoch": 0.03, "learning_rate": 3.7002604704671864e-05, "loss": 1.511, "step": 91600 }, { "epoch": 0.03, "eval_loss": 2.3629839420318604, "eval_runtime": 19.3875, "eval_samples_per_second": 181.508, "eval_steps_per_second": 5.674, "step": 91600 }, { "epoch": 0.03, "learning_rate": 3.692158745204792e-05, "loss": 1.5089, "step": 92000 }, { "epoch": 0.03, "eval_loss": 2.385115385055542, "eval_runtime": 20.4471, "eval_samples_per_second": 172.103, "eval_steps_per_second": 5.38, "step": 92000 }, { "epoch": 0.04, "learning_rate": 3.6840570199423966e-05, "loss": 1.5095, "step": 92400 }, { "epoch": 0.04, "eval_loss": 2.319392442703247, "eval_runtime": 19.8755, "eval_samples_per_second": 177.052, "eval_steps_per_second": 5.534, "step": 92400 }, { "epoch": 0.04, "learning_rate": 3.6759552946800027e-05, "loss": 1.5094, "step": 92800 }, { "epoch": 0.04, "eval_loss": 2.3495166301727295, "eval_runtime": 19.4501, "eval_samples_per_second": 180.925, "eval_steps_per_second": 5.656, "step": 92800 }, { "epoch": 0.04, "learning_rate": 3.6678535694176074e-05, "loss": 1.5101, "step": 93200 }, { "epoch": 0.04, "eval_loss": 2.365245819091797, "eval_runtime": 19.578, "eval_samples_per_second": 179.743, "eval_steps_per_second": 5.619, "step": 93200 }, { "epoch": 0.05, "learning_rate": 3.659751844155213e-05, "loss": 1.5089, "step": 93600 }, { "epoch": 0.05, "eval_loss": 2.371981143951416, "eval_runtime": 19.798, "eval_samples_per_second": 177.745, "eval_steps_per_second": 5.556, "step": 93600 }, { "epoch": 0.05, "learning_rate": 3.651650118892818e-05, "loss": 1.509, "step": 94000 }, { "epoch": 0.05, "eval_loss": 2.332063913345337, "eval_runtime": 19.3403, "eval_samples_per_second": 181.952, "eval_steps_per_second": 5.688, "step": 94000 }, { "epoch": 0.05, "learning_rate": 3.643548393630424e-05, "loss": 1.5096, "step": 94400 }, { "epoch": 0.05, "eval_loss": 2.404459238052368, "eval_runtime": 19.2128, "eval_samples_per_second": 183.159, "eval_steps_per_second": 5.725, "step": 94400 }, { "epoch": 0.06, "learning_rate": 3.635446668368029e-05, "loss": 1.5089, "step": 94800 }, { "epoch": 0.06, "eval_loss": 2.3641324043273926, "eval_runtime": 19.4859, "eval_samples_per_second": 180.592, "eval_steps_per_second": 5.645, "step": 94800 }, { "epoch": 0.06, "learning_rate": 3.6273449431056346e-05, "loss": 1.5084, "step": 95200 }, { "epoch": 0.06, "eval_loss": 2.3842105865478516, "eval_runtime": 19.764, "eval_samples_per_second": 178.051, "eval_steps_per_second": 5.566, "step": 95200 }, { "epoch": 0.06, "learning_rate": 3.61924321784324e-05, "loss": 1.5089, "step": 95600 }, { "epoch": 0.06, "eval_loss": 2.3656747341156006, "eval_runtime": 20.585, "eval_samples_per_second": 170.949, "eval_steps_per_second": 5.344, "step": 95600 }, { "epoch": 0.06, "learning_rate": 3.6111414925808454e-05, "loss": 1.5097, "step": 96000 }, { "epoch": 0.06, "eval_loss": 2.374446153640747, "eval_runtime": 19.4426, "eval_samples_per_second": 180.994, "eval_steps_per_second": 5.658, "step": 96000 }, { "epoch": 0.07, "learning_rate": 3.603039767318451e-05, "loss": 1.5072, "step": 96400 }, { "epoch": 0.07, "eval_loss": 2.385554552078247, "eval_runtime": 20.3681, "eval_samples_per_second": 172.771, "eval_steps_per_second": 5.401, "step": 96400 }, { "epoch": 0.0, "learning_rate": 3.5949380420560556e-05, "loss": 1.5041, "step": 96800 }, { "epoch": 0.0, "eval_loss": 2.3629019260406494, "eval_runtime": 18.0818, "eval_samples_per_second": 194.616, "eval_steps_per_second": 6.083, "step": 96800 }, { "epoch": 0.01, "learning_rate": 3.586836316793662e-05, "loss": 1.5036, "step": 97200 }, { "epoch": 0.01, "eval_loss": 2.3723270893096924, "eval_runtime": 17.4087, "eval_samples_per_second": 202.14, "eval_steps_per_second": 6.319, "step": 97200 }, { "epoch": 0.01, "learning_rate": 3.578734591531267e-05, "loss": 1.504, "step": 97600 }, { "epoch": 0.01, "eval_loss": 2.390188217163086, "eval_runtime": 17.5005, "eval_samples_per_second": 201.081, "eval_steps_per_second": 6.286, "step": 97600 }, { "epoch": 0.01, "learning_rate": 3.570632866268872e-05, "loss": 1.5034, "step": 98000 }, { "epoch": 0.01, "eval_loss": 2.3117146492004395, "eval_runtime": 17.3837, "eval_samples_per_second": 202.431, "eval_steps_per_second": 6.328, "step": 98000 }, { "epoch": 0.01, "learning_rate": 3.562531141006478e-05, "loss": 1.5021, "step": 98400 }, { "epoch": 0.01, "eval_loss": 2.3584558963775635, "eval_runtime": 18.524, "eval_samples_per_second": 189.97, "eval_steps_per_second": 5.938, "step": 98400 }, { "epoch": 0.02, "learning_rate": 3.554429415744083e-05, "loss": 1.501, "step": 98800 }, { "epoch": 0.02, "eval_loss": 2.2931323051452637, "eval_runtime": 17.3901, "eval_samples_per_second": 202.357, "eval_steps_per_second": 6.325, "step": 98800 }, { "epoch": 0.02, "learning_rate": 3.546327690481688e-05, "loss": 1.501, "step": 99200 }, { "epoch": 0.02, "eval_loss": 2.3333306312561035, "eval_runtime": 17.4003, "eval_samples_per_second": 202.238, "eval_steps_per_second": 6.322, "step": 99200 }, { "epoch": 0.02, "learning_rate": 3.5382259652192936e-05, "loss": 1.4992, "step": 99600 }, { "epoch": 0.02, "eval_loss": 2.342263698577881, "eval_runtime": 17.3606, "eval_samples_per_second": 202.701, "eval_steps_per_second": 6.336, "step": 99600 }, { "epoch": 0.03, "learning_rate": 3.530124239956899e-05, "loss": 1.5008, "step": 100000 }, { "epoch": 0.03, "eval_loss": 2.336986541748047, "eval_runtime": 17.0114, "eval_samples_per_second": 206.861, "eval_steps_per_second": 6.466, "step": 100000 }, { "epoch": 0.03, "learning_rate": 3.5220225146945045e-05, "loss": 1.5002, "step": 100400 }, { "epoch": 0.03, "eval_loss": 2.3513643741607666, "eval_runtime": 17.6104, "eval_samples_per_second": 199.825, "eval_steps_per_second": 6.246, "step": 100400 }, { "epoch": 0.03, "learning_rate": 3.51392078943211e-05, "loss": 1.5016, "step": 100800 }, { "epoch": 0.03, "eval_loss": 2.3241846561431885, "eval_runtime": 17.6475, "eval_samples_per_second": 199.405, "eval_steps_per_second": 6.233, "step": 100800 }, { "epoch": 0.03, "learning_rate": 3.505819064169715e-05, "loss": 1.4988, "step": 101200 }, { "epoch": 0.03, "eval_loss": 2.359363317489624, "eval_runtime": 17.067, "eval_samples_per_second": 206.187, "eval_steps_per_second": 6.445, "step": 101200 }, { "epoch": 0.04, "learning_rate": 3.49771733890732e-05, "loss": 1.4992, "step": 101600 }, { "epoch": 0.04, "eval_loss": 2.348477363586426, "eval_runtime": 17.779, "eval_samples_per_second": 197.93, "eval_steps_per_second": 6.187, "step": 101600 }, { "epoch": 0.04, "learning_rate": 3.489615613644926e-05, "loss": 1.5003, "step": 102000 }, { "epoch": 0.04, "eval_loss": 2.4026684761047363, "eval_runtime": 17.0398, "eval_samples_per_second": 206.516, "eval_steps_per_second": 6.455, "step": 102000 }, { "epoch": 0.04, "learning_rate": 3.481513888382531e-05, "loss": 1.4994, "step": 102400 }, { "epoch": 0.04, "eval_loss": 2.365537643432617, "eval_runtime": 17.5601, "eval_samples_per_second": 200.397, "eval_steps_per_second": 6.264, "step": 102400 }, { "epoch": 0.05, "learning_rate": 3.4734121631201364e-05, "loss": 1.499, "step": 102800 }, { "epoch": 0.05, "eval_loss": 2.381800651550293, "eval_runtime": 16.8498, "eval_samples_per_second": 208.846, "eval_steps_per_second": 6.528, "step": 102800 }, { "epoch": 0.05, "learning_rate": 3.4653104378577425e-05, "loss": 1.4996, "step": 103200 }, { "epoch": 0.05, "eval_loss": 2.401005506515503, "eval_runtime": 16.9826, "eval_samples_per_second": 207.212, "eval_steps_per_second": 6.477, "step": 103200 }, { "epoch": 0.05, "learning_rate": 3.457208712595347e-05, "loss": 1.4985, "step": 103600 }, { "epoch": 0.05, "eval_loss": 2.399085283279419, "eval_runtime": 17.0074, "eval_samples_per_second": 206.91, "eval_steps_per_second": 6.468, "step": 103600 }, { "epoch": 0.06, "learning_rate": 3.4491069873329527e-05, "loss": 1.4984, "step": 104000 }, { "epoch": 0.06, "eval_loss": 2.3661704063415527, "eval_runtime": 16.9552, "eval_samples_per_second": 207.547, "eval_steps_per_second": 6.488, "step": 104000 }, { "epoch": 0.06, "learning_rate": 3.441005262070558e-05, "loss": 1.4975, "step": 104400 }, { "epoch": 0.06, "eval_loss": 2.4111948013305664, "eval_runtime": 16.975, "eval_samples_per_second": 207.304, "eval_steps_per_second": 6.48, "step": 104400 }, { "epoch": 0.06, "learning_rate": 3.4329035368081635e-05, "loss": 1.4987, "step": 104800 }, { "epoch": 0.06, "eval_loss": 2.3549654483795166, "eval_runtime": 17.004, "eval_samples_per_second": 206.951, "eval_steps_per_second": 6.469, "step": 104800 }, { "epoch": 0.06, "learning_rate": 3.424801811545769e-05, "loss": 1.4975, "step": 105200 }, { "epoch": 0.06, "eval_loss": 2.3696866035461426, "eval_runtime": 16.9769, "eval_samples_per_second": 207.282, "eval_steps_per_second": 6.479, "step": 105200 }, { "epoch": 0.07, "learning_rate": 3.4167000862833744e-05, "loss": 1.4978, "step": 105600 }, { "epoch": 0.07, "eval_loss": 2.4747281074523926, "eval_runtime": 17.0304, "eval_samples_per_second": 206.63, "eval_steps_per_second": 6.459, "step": 105600 }, { "epoch": 0.07, "learning_rate": 3.40859836102098e-05, "loss": 1.4985, "step": 106000 }, { "epoch": 0.07, "eval_loss": 2.3790531158447266, "eval_runtime": 16.9847, "eval_samples_per_second": 207.187, "eval_steps_per_second": 6.476, "step": 106000 }, { "epoch": 0.07, "learning_rate": 3.400496635758585e-05, "loss": 1.4961, "step": 106400 }, { "epoch": 0.07, "eval_loss": 2.390604019165039, "eval_runtime": 17.3582, "eval_samples_per_second": 202.729, "eval_steps_per_second": 6.337, "step": 106400 }, { "epoch": 0.0, "learning_rate": 3.392394910496191e-05, "loss": 1.4959, "step": 106800 }, { "epoch": 0.0, "eval_loss": 2.415346622467041, "eval_runtime": 20.0907, "eval_samples_per_second": 175.156, "eval_steps_per_second": 5.475, "step": 106800 }, { "epoch": 0.01, "learning_rate": 3.3842931852337954e-05, "loss": 1.4956, "step": 107200 }, { "epoch": 0.01, "eval_loss": 2.4299123287200928, "eval_runtime": 18.8725, "eval_samples_per_second": 186.462, "eval_steps_per_second": 5.829, "step": 107200 }, { "epoch": 0.01, "learning_rate": 3.376191459971401e-05, "loss": 1.4964, "step": 107600 }, { "epoch": 0.01, "eval_loss": 2.448073625564575, "eval_runtime": 18.7704, "eval_samples_per_second": 187.476, "eval_steps_per_second": 5.86, "step": 107600 }, { "epoch": 0.01, "learning_rate": 3.368089734709006e-05, "loss": 1.497, "step": 108000 }, { "epoch": 0.01, "eval_loss": 2.390690565109253, "eval_runtime": 18.5096, "eval_samples_per_second": 190.118, "eval_steps_per_second": 5.943, "step": 108000 }, { "epoch": 0.01, "learning_rate": 3.359988009446612e-05, "loss": 1.4955, "step": 108400 }, { "epoch": 0.01, "eval_loss": 2.383636713027954, "eval_runtime": 18.4941, "eval_samples_per_second": 190.277, "eval_steps_per_second": 5.948, "step": 108400 }, { "epoch": 0.02, "learning_rate": 3.351886284184217e-05, "loss": 1.4953, "step": 108800 }, { "epoch": 0.02, "eval_loss": 2.400592565536499, "eval_runtime": 18.4735, "eval_samples_per_second": 190.489, "eval_steps_per_second": 5.954, "step": 108800 }, { "epoch": 0.02, "learning_rate": 3.3437845589218226e-05, "loss": 1.4939, "step": 109200 }, { "epoch": 0.02, "eval_loss": 2.349822759628296, "eval_runtime": 18.6128, "eval_samples_per_second": 189.063, "eval_steps_per_second": 5.91, "step": 109200 }, { "epoch": 0.02, "learning_rate": 3.335682833659428e-05, "loss": 1.4943, "step": 109600 }, { "epoch": 0.02, "eval_loss": 2.3708629608154297, "eval_runtime": 18.5009, "eval_samples_per_second": 190.207, "eval_steps_per_second": 5.946, "step": 109600 }, { "epoch": 0.03, "learning_rate": 3.3275811083970334e-05, "loss": 1.4942, "step": 110000 }, { "epoch": 0.03, "eval_loss": 2.338743209838867, "eval_runtime": 18.4865, "eval_samples_per_second": 190.355, "eval_steps_per_second": 5.95, "step": 110000 }, { "epoch": 0.03, "learning_rate": 3.319479383134639e-05, "loss": 1.4923, "step": 110400 }, { "epoch": 0.03, "eval_loss": 2.4041731357574463, "eval_runtime": 18.5038, "eval_samples_per_second": 190.177, "eval_steps_per_second": 5.945, "step": 110400 }, { "epoch": 0.0, "learning_rate": 3.3113776578722436e-05, "loss": 1.4934, "step": 110800 }, { "epoch": 0.0, "eval_loss": 2.4086883068084717, "eval_runtime": 17.8895, "eval_samples_per_second": 196.707, "eval_steps_per_second": 6.149, "step": 110800 }, { "epoch": 0.01, "learning_rate": 3.30327593260985e-05, "loss": 1.4917, "step": 111200 }, { "epoch": 0.01, "eval_loss": 2.3683786392211914, "eval_runtime": 17.4874, "eval_samples_per_second": 201.23, "eval_steps_per_second": 6.29, "step": 111200 }, { "epoch": 0.01, "learning_rate": 3.2951742073474545e-05, "loss": 1.4926, "step": 111600 }, { "epoch": 0.01, "eval_loss": 2.3743233680725098, "eval_runtime": 17.4669, "eval_samples_per_second": 201.467, "eval_steps_per_second": 6.298, "step": 111600 }, { "epoch": 0.01, "learning_rate": 3.28707248208506e-05, "loss": 1.4913, "step": 112000 }, { "epoch": 0.01, "eval_loss": 2.3969030380249023, "eval_runtime": 17.4406, "eval_samples_per_second": 201.77, "eval_steps_per_second": 6.307, "step": 112000 }, { "epoch": 0.01, "learning_rate": 3.278970756822666e-05, "loss": 1.4923, "step": 112400 }, { "epoch": 0.01, "eval_loss": 2.373997688293457, "eval_runtime": 17.6827, "eval_samples_per_second": 199.008, "eval_steps_per_second": 6.221, "step": 112400 }, { "epoch": 0.02, "learning_rate": 3.270869031560271e-05, "loss": 1.4913, "step": 112800 }, { "epoch": 0.02, "eval_loss": 2.3612871170043945, "eval_runtime": 17.4041, "eval_samples_per_second": 202.193, "eval_steps_per_second": 6.32, "step": 112800 }, { "epoch": 0.02, "learning_rate": 3.262767306297876e-05, "loss": 1.4909, "step": 113200 }, { "epoch": 0.02, "eval_loss": 2.3404111862182617, "eval_runtime": 17.5513, "eval_samples_per_second": 200.498, "eval_steps_per_second": 6.267, "step": 113200 }, { "epoch": 0.02, "learning_rate": 3.2546655810354816e-05, "loss": 1.491, "step": 113600 }, { "epoch": 0.02, "eval_loss": 2.2388041019439697, "eval_runtime": 17.6295, "eval_samples_per_second": 199.609, "eval_steps_per_second": 6.24, "step": 113600 }, { "epoch": 0.03, "learning_rate": 3.246563855773087e-05, "loss": 1.4896, "step": 114000 }, { "epoch": 0.03, "eval_loss": 2.3492588996887207, "eval_runtime": 17.3833, "eval_samples_per_second": 202.436, "eval_steps_per_second": 6.328, "step": 114000 }, { "epoch": 0.03, "learning_rate": 3.238462130510692e-05, "loss": 1.4899, "step": 114400 }, { "epoch": 0.03, "eval_loss": 2.347364664077759, "eval_runtime": 17.468, "eval_samples_per_second": 201.454, "eval_steps_per_second": 6.297, "step": 114400 }, { "epoch": 0.03, "learning_rate": 3.230360405248298e-05, "loss": 1.4881, "step": 114800 }, { "epoch": 0.03, "eval_loss": 2.315025568008423, "eval_runtime": 17.4, "eval_samples_per_second": 202.242, "eval_steps_per_second": 6.322, "step": 114800 }, { "epoch": 0.03, "learning_rate": 3.2222586799859033e-05, "loss": 1.4905, "step": 115200 }, { "epoch": 0.03, "eval_loss": 2.344813346862793, "eval_runtime": 17.3103, "eval_samples_per_second": 203.29, "eval_steps_per_second": 6.355, "step": 115200 }, { "epoch": 0.04, "learning_rate": 3.214156954723508e-05, "loss": 1.4894, "step": 115600 }, { "epoch": 0.04, "eval_loss": 2.350853443145752, "eval_runtime": 17.3476, "eval_samples_per_second": 202.852, "eval_steps_per_second": 6.341, "step": 115600 }, { "epoch": 0.04, "learning_rate": 3.206055229461114e-05, "loss": 1.4885, "step": 116000 }, { "epoch": 0.04, "eval_loss": 2.273857355117798, "eval_runtime": 17.3165, "eval_samples_per_second": 203.217, "eval_steps_per_second": 6.352, "step": 116000 }, { "epoch": 0.04, "learning_rate": 3.197953504198719e-05, "loss": 1.4895, "step": 116400 }, { "epoch": 0.04, "eval_loss": 2.3339993953704834, "eval_runtime": 17.3637, "eval_samples_per_second": 202.664, "eval_steps_per_second": 6.335, "step": 116400 }, { "epoch": 0.05, "learning_rate": 3.1898517789363244e-05, "loss": 1.4886, "step": 116800 }, { "epoch": 0.05, "eval_loss": 2.3035190105438232, "eval_runtime": 17.249, "eval_samples_per_second": 204.011, "eval_steps_per_second": 6.377, "step": 116800 }, { "epoch": 0.05, "learning_rate": 3.18175005367393e-05, "loss": 1.4867, "step": 117200 }, { "epoch": 0.05, "eval_loss": 2.355330467224121, "eval_runtime": 17.2592, "eval_samples_per_second": 203.891, "eval_steps_per_second": 6.373, "step": 117200 }, { "epoch": 0.05, "learning_rate": 3.173648328411535e-05, "loss": 1.4859, "step": 117600 }, { "epoch": 0.05, "eval_loss": 2.3306944370269775, "eval_runtime": 17.5199, "eval_samples_per_second": 200.857, "eval_steps_per_second": 6.279, "step": 117600 }, { "epoch": 0.06, "learning_rate": 3.165546603149141e-05, "loss": 1.4879, "step": 118000 }, { "epoch": 0.06, "eval_loss": 2.3352627754211426, "eval_runtime": 17.5475, "eval_samples_per_second": 200.542, "eval_steps_per_second": 6.269, "step": 118000 }, { "epoch": 0.06, "learning_rate": 3.157444877886746e-05, "loss": 1.4863, "step": 118400 }, { "epoch": 0.06, "eval_loss": 2.357405662536621, "eval_runtime": 17.7502, "eval_samples_per_second": 198.252, "eval_steps_per_second": 6.197, "step": 118400 }, { "epoch": 0.06, "learning_rate": 3.1493431526243515e-05, "loss": 1.4858, "step": 118800 }, { "epoch": 0.06, "eval_loss": 2.3991518020629883, "eval_runtime": 17.6792, "eval_samples_per_second": 199.048, "eval_steps_per_second": 6.222, "step": 118800 }, { "epoch": 0.06, "learning_rate": 3.141241427361957e-05, "loss": 1.4855, "step": 119200 }, { "epoch": 0.06, "eval_loss": 2.353144884109497, "eval_runtime": 17.7114, "eval_samples_per_second": 198.685, "eval_steps_per_second": 6.211, "step": 119200 }, { "epoch": 0.07, "learning_rate": 3.1331397020995624e-05, "loss": 1.4856, "step": 119600 }, { "epoch": 0.07, "eval_loss": 2.409151315689087, "eval_runtime": 17.7645, "eval_samples_per_second": 198.092, "eval_steps_per_second": 6.192, "step": 119600 }, { "epoch": 0.07, "learning_rate": 3.125037976837167e-05, "loss": 1.4876, "step": 120000 }, { "epoch": 0.07, "eval_loss": 2.3355095386505127, "eval_runtime": 17.7334, "eval_samples_per_second": 198.439, "eval_steps_per_second": 6.203, "step": 120000 }, { "epoch": 0.07, "learning_rate": 3.116936251574773e-05, "loss": 1.4874, "step": 120400 }, { "epoch": 0.07, "eval_loss": 2.3579752445220947, "eval_runtime": 17.7018, "eval_samples_per_second": 198.793, "eval_steps_per_second": 6.214, "step": 120400 }, { "epoch": 0.08, "learning_rate": 3.108834526312379e-05, "loss": 1.4867, "step": 120800 }, { "epoch": 0.08, "eval_loss": 2.3405985832214355, "eval_runtime": 17.7175, "eval_samples_per_second": 198.617, "eval_steps_per_second": 6.209, "step": 120800 }, { "epoch": 0.08, "learning_rate": 3.1007328010499834e-05, "loss": 1.4847, "step": 121200 }, { "epoch": 0.08, "eval_loss": 2.321049213409424, "eval_runtime": 17.748, "eval_samples_per_second": 198.276, "eval_steps_per_second": 6.198, "step": 121200 }, { "epoch": 0.08, "learning_rate": 3.0926310757875895e-05, "loss": 1.4842, "step": 121600 }, { "epoch": 0.08, "eval_loss": 2.3495261669158936, "eval_runtime": 17.6755, "eval_samples_per_second": 199.09, "eval_steps_per_second": 6.223, "step": 121600 }, { "epoch": 0.08, "learning_rate": 3.084529350525194e-05, "loss": 1.484, "step": 122000 }, { "epoch": 0.08, "eval_loss": 2.3278751373291016, "eval_runtime": 17.6587, "eval_samples_per_second": 199.278, "eval_steps_per_second": 6.229, "step": 122000 }, { "epoch": 0.09, "learning_rate": 3.0764276252628e-05, "loss": 1.4817, "step": 122400 }, { "epoch": 0.09, "eval_loss": 2.352627754211426, "eval_runtime": 17.7968, "eval_samples_per_second": 197.732, "eval_steps_per_second": 6.181, "step": 122400 }, { "epoch": 0.09, "learning_rate": 3.068325900000405e-05, "loss": 1.4823, "step": 122800 }, { "epoch": 0.09, "eval_loss": 2.3326263427734375, "eval_runtime": 17.8301, "eval_samples_per_second": 197.363, "eval_steps_per_second": 6.169, "step": 122800 }, { "epoch": 0.09, "learning_rate": 3.0602241747380106e-05, "loss": 1.4814, "step": 123200 }, { "epoch": 0.09, "eval_loss": 2.4039418697357178, "eval_runtime": 17.726, "eval_samples_per_second": 198.522, "eval_steps_per_second": 6.206, "step": 123200 }, { "epoch": 0.1, "learning_rate": 3.052122449475616e-05, "loss": 1.4802, "step": 123600 }, { "epoch": 0.1, "eval_loss": 2.3534297943115234, "eval_runtime": 18.0233, "eval_samples_per_second": 195.247, "eval_steps_per_second": 6.103, "step": 123600 }, { "epoch": 0.1, "learning_rate": 3.044020724213221e-05, "loss": 1.4823, "step": 124000 }, { "epoch": 0.1, "eval_loss": 2.3589508533477783, "eval_runtime": 18.0015, "eval_samples_per_second": 195.484, "eval_steps_per_second": 6.111, "step": 124000 }, { "epoch": 0.1, "learning_rate": 3.035918998950827e-05, "loss": 1.4806, "step": 124400 }, { "epoch": 0.1, "eval_loss": 2.3476579189300537, "eval_runtime": 18.054, "eval_samples_per_second": 194.916, "eval_steps_per_second": 6.093, "step": 124400 }, { "epoch": 0.1, "learning_rate": 3.027817273688432e-05, "loss": 1.481, "step": 124800 }, { "epoch": 0.1, "eval_loss": 2.3086392879486084, "eval_runtime": 18.0863, "eval_samples_per_second": 194.567, "eval_steps_per_second": 6.082, "step": 124800 }, { "epoch": 0.11, "learning_rate": 3.0197155484260374e-05, "loss": 1.4798, "step": 125200 }, { "epoch": 0.11, "eval_loss": 2.331632375717163, "eval_runtime": 18.0209, "eval_samples_per_second": 195.274, "eval_steps_per_second": 6.104, "step": 125200 }, { "epoch": 0.11, "learning_rate": 3.0116138231636425e-05, "loss": 1.481, "step": 125600 }, { "epoch": 0.11, "eval_loss": 2.321038246154785, "eval_runtime": 18.138, "eval_samples_per_second": 194.012, "eval_steps_per_second": 6.065, "step": 125600 }, { "epoch": 0.11, "learning_rate": 3.0035120979012482e-05, "loss": 1.4792, "step": 126000 }, { "epoch": 0.11, "eval_loss": 2.3609230518341064, "eval_runtime": 18.1227, "eval_samples_per_second": 194.176, "eval_steps_per_second": 6.07, "step": 126000 }, { "epoch": 0.12, "learning_rate": 2.9954103726388537e-05, "loss": 1.4783, "step": 126400 }, { "epoch": 0.12, "eval_loss": 2.348484516143799, "eval_runtime": 18.2068, "eval_samples_per_second": 193.279, "eval_steps_per_second": 6.042, "step": 126400 }, { "epoch": 0.12, "learning_rate": 2.9873086473764588e-05, "loss": 1.4783, "step": 126800 }, { "epoch": 0.12, "eval_loss": 2.3550658226013184, "eval_runtime": 18.1831, "eval_samples_per_second": 193.532, "eval_steps_per_second": 6.05, "step": 126800 }, { "epoch": 0.12, "learning_rate": 2.9792069221140645e-05, "loss": 1.478, "step": 127200 }, { "epoch": 0.12, "eval_loss": 2.352349042892456, "eval_runtime": 18.3773, "eval_samples_per_second": 191.487, "eval_steps_per_second": 5.986, "step": 127200 }, { "epoch": 0.13, "learning_rate": 2.9711051968516696e-05, "loss": 1.479, "step": 127600 }, { "epoch": 0.13, "eval_loss": 2.3229057788848877, "eval_runtime": 18.4727, "eval_samples_per_second": 190.498, "eval_steps_per_second": 5.955, "step": 127600 }, { "epoch": 0.13, "learning_rate": 2.963003471589275e-05, "loss": 1.4787, "step": 128000 }, { "epoch": 0.13, "eval_loss": 2.3134686946868896, "eval_runtime": 18.5086, "eval_samples_per_second": 190.128, "eval_steps_per_second": 5.943, "step": 128000 }, { "epoch": 0.13, "learning_rate": 2.95490174632688e-05, "loss": 1.4775, "step": 128400 }, { "epoch": 0.13, "eval_loss": 2.27996826171875, "eval_runtime": 18.3605, "eval_samples_per_second": 191.661, "eval_steps_per_second": 5.991, "step": 128400 }, { "epoch": 0.13, "learning_rate": 2.946800021064486e-05, "loss": 1.4766, "step": 128800 }, { "epoch": 0.13, "eval_loss": 2.2963178157806396, "eval_runtime": 18.3042, "eval_samples_per_second": 192.251, "eval_steps_per_second": 6.01, "step": 128800 }, { "epoch": 0.14, "learning_rate": 2.9386982958020913e-05, "loss": 1.4762, "step": 129200 }, { "epoch": 0.14, "eval_loss": 2.3238120079040527, "eval_runtime": 18.4678, "eval_samples_per_second": 190.548, "eval_steps_per_second": 5.956, "step": 129200 }, { "epoch": 0.14, "learning_rate": 2.9305965705396964e-05, "loss": 1.4769, "step": 129600 }, { "epoch": 0.14, "eval_loss": 2.3036534786224365, "eval_runtime": 18.3198, "eval_samples_per_second": 192.087, "eval_steps_per_second": 6.004, "step": 129600 }, { "epoch": 0.14, "learning_rate": 2.9224948452773022e-05, "loss": 1.4756, "step": 130000 }, { "epoch": 0.14, "eval_loss": 2.3685128688812256, "eval_runtime": 18.2275, "eval_samples_per_second": 193.06, "eval_steps_per_second": 6.035, "step": 130000 }, { "epoch": 0.15, "learning_rate": 2.9143931200149073e-05, "loss": 1.4752, "step": 130400 }, { "epoch": 0.15, "eval_loss": 2.288372278213501, "eval_runtime": 18.3274, "eval_samples_per_second": 192.008, "eval_steps_per_second": 6.002, "step": 130400 }, { "epoch": 0.15, "learning_rate": 2.9062913947525127e-05, "loss": 1.4747, "step": 130800 }, { "epoch": 0.15, "eval_loss": 2.3392255306243896, "eval_runtime": 18.2629, "eval_samples_per_second": 192.686, "eval_steps_per_second": 6.023, "step": 130800 }, { "epoch": 0.15, "learning_rate": 2.8981896694901178e-05, "loss": 1.4738, "step": 131200 }, { "epoch": 0.15, "eval_loss": 2.3563013076782227, "eval_runtime": 18.4362, "eval_samples_per_second": 190.875, "eval_steps_per_second": 5.967, "step": 131200 }, { "epoch": 0.15, "learning_rate": 2.8900879442277236e-05, "loss": 1.4749, "step": 131600 }, { "epoch": 0.15, "eval_loss": 2.330927610397339, "eval_runtime": 18.1578, "eval_samples_per_second": 193.801, "eval_steps_per_second": 6.058, "step": 131600 }, { "epoch": 0.16, "learning_rate": 2.881986218965329e-05, "loss": 1.4748, "step": 132000 }, { "epoch": 0.16, "eval_loss": 2.33650279045105, "eval_runtime": 18.3527, "eval_samples_per_second": 191.743, "eval_steps_per_second": 5.994, "step": 132000 }, { "epoch": 0.16, "learning_rate": 2.873884493702934e-05, "loss": 1.4737, "step": 132400 }, { "epoch": 0.16, "eval_loss": 2.3835794925689697, "eval_runtime": 18.2768, "eval_samples_per_second": 192.539, "eval_steps_per_second": 6.019, "step": 132400 }, { "epoch": 0.16, "learning_rate": 2.86578276844054e-05, "loss": 1.474, "step": 132800 }, { "epoch": 0.16, "eval_loss": 2.4150733947753906, "eval_runtime": 18.2593, "eval_samples_per_second": 192.724, "eval_steps_per_second": 6.024, "step": 132800 }, { "epoch": 0.17, "learning_rate": 2.8576810431781446e-05, "loss": 1.4743, "step": 133200 }, { "epoch": 0.17, "eval_loss": 2.36186146736145, "eval_runtime": 18.123, "eval_samples_per_second": 194.173, "eval_steps_per_second": 6.07, "step": 133200 }, { "epoch": 0.17, "learning_rate": 2.8495793179157504e-05, "loss": 1.4735, "step": 133600 }, { "epoch": 0.17, "eval_loss": 2.356795310974121, "eval_runtime": 18.2043, "eval_samples_per_second": 193.306, "eval_steps_per_second": 6.043, "step": 133600 }, { "epoch": 0.17, "learning_rate": 2.8414775926533555e-05, "loss": 1.4735, "step": 134000 }, { "epoch": 0.17, "eval_loss": 2.3677237033843994, "eval_runtime": 18.253, "eval_samples_per_second": 192.791, "eval_steps_per_second": 6.026, "step": 134000 }, { "epoch": 0.17, "learning_rate": 2.833375867390961e-05, "loss": 1.4715, "step": 134400 }, { "epoch": 0.17, "eval_loss": 2.361776113510132, "eval_runtime": 18.182, "eval_samples_per_second": 193.543, "eval_steps_per_second": 6.05, "step": 134400 }, { "epoch": 0.18, "learning_rate": 2.8252741421285667e-05, "loss": 1.4726, "step": 134800 }, { "epoch": 0.18, "eval_loss": 2.3906137943267822, "eval_runtime": 18.0913, "eval_samples_per_second": 194.513, "eval_steps_per_second": 6.08, "step": 134800 }, { "epoch": 0.18, "learning_rate": 2.8171724168661718e-05, "loss": 1.4716, "step": 135200 }, { "epoch": 0.18, "eval_loss": 2.340426445007324, "eval_runtime": 18.1553, "eval_samples_per_second": 193.828, "eval_steps_per_second": 6.059, "step": 135200 }, { "epoch": 0.18, "learning_rate": 2.8090706916037772e-05, "loss": 1.4719, "step": 135600 }, { "epoch": 0.18, "eval_loss": 2.340381383895874, "eval_runtime": 18.1363, "eval_samples_per_second": 194.031, "eval_steps_per_second": 6.065, "step": 135600 }, { "epoch": 0.19, "learning_rate": 2.8009689663413823e-05, "loss": 1.4725, "step": 136000 }, { "epoch": 0.19, "eval_loss": 2.370542526245117, "eval_runtime": 18.2157, "eval_samples_per_second": 193.185, "eval_steps_per_second": 6.039, "step": 136000 }, { "epoch": 0.19, "learning_rate": 2.792867241078988e-05, "loss": 1.4713, "step": 136400 }, { "epoch": 0.19, "eval_loss": 2.360673189163208, "eval_runtime": 18.2181, "eval_samples_per_second": 193.159, "eval_steps_per_second": 6.038, "step": 136400 }, { "epoch": 0.19, "learning_rate": 2.784765515816593e-05, "loss": 1.4714, "step": 136800 }, { "epoch": 0.19, "eval_loss": 2.3657426834106445, "eval_runtime": 18.2301, "eval_samples_per_second": 193.032, "eval_steps_per_second": 6.034, "step": 136800 }, { "epoch": 0.2, "learning_rate": 2.7766637905541986e-05, "loss": 1.4706, "step": 137200 }, { "epoch": 0.2, "eval_loss": 2.3723626136779785, "eval_runtime": 18.1723, "eval_samples_per_second": 193.646, "eval_steps_per_second": 6.053, "step": 137200 }, { "epoch": 0.2, "learning_rate": 2.7685620652918044e-05, "loss": 1.47, "step": 137600 }, { "epoch": 0.2, "eval_loss": 2.3738961219787598, "eval_runtime": 18.1983, "eval_samples_per_second": 193.37, "eval_steps_per_second": 6.045, "step": 137600 }, { "epoch": 0.2, "learning_rate": 2.7604603400294094e-05, "loss": 1.4686, "step": 138000 }, { "epoch": 0.2, "eval_loss": 2.3388829231262207, "eval_runtime": 18.2362, "eval_samples_per_second": 192.968, "eval_steps_per_second": 6.032, "step": 138000 }, { "epoch": 0.2, "learning_rate": 2.752358614767015e-05, "loss": 1.469, "step": 138400 }, { "epoch": 0.2, "eval_loss": 2.3783812522888184, "eval_runtime": 18.2567, "eval_samples_per_second": 192.751, "eval_steps_per_second": 6.025, "step": 138400 }, { "epoch": 0.21, "learning_rate": 2.74425688950462e-05, "loss": 1.4682, "step": 138800 }, { "epoch": 0.21, "eval_loss": 2.3429505825042725, "eval_runtime": 18.2164, "eval_samples_per_second": 193.177, "eval_steps_per_second": 6.039, "step": 138800 }, { "epoch": 0.21, "learning_rate": 2.7361551642422257e-05, "loss": 1.4698, "step": 139200 }, { "epoch": 0.21, "eval_loss": 2.3579936027526855, "eval_runtime": 18.1836, "eval_samples_per_second": 193.526, "eval_steps_per_second": 6.049, "step": 139200 }, { "epoch": 0.21, "learning_rate": 2.7280534389798308e-05, "loss": 1.4676, "step": 139600 }, { "epoch": 0.21, "eval_loss": 2.3819713592529297, "eval_runtime": 18.2677, "eval_samples_per_second": 192.635, "eval_steps_per_second": 6.022, "step": 139600 }, { "epoch": 0.22, "learning_rate": 2.7199517137174363e-05, "loss": 1.4683, "step": 140000 }, { "epoch": 0.22, "eval_loss": 2.426044225692749, "eval_runtime": 18.2225, "eval_samples_per_second": 193.113, "eval_steps_per_second": 6.036, "step": 140000 }, { "epoch": 0.22, "learning_rate": 2.711849988455042e-05, "loss": 1.4677, "step": 140400 }, { "epoch": 0.22, "eval_loss": 2.3789823055267334, "eval_runtime": 31.3826, "eval_samples_per_second": 112.132, "eval_steps_per_second": 3.505, "step": 140400 }, { "epoch": 0.22, "learning_rate": 2.703748263192647e-05, "loss": 1.4686, "step": 140800 }, { "epoch": 0.22, "eval_loss": 2.329643487930298, "eval_runtime": 18.3935, "eval_samples_per_second": 191.317, "eval_steps_per_second": 5.98, "step": 140800 }, { "epoch": 0.22, "learning_rate": 2.6956465379302525e-05, "loss": 1.4679, "step": 141200 }, { "epoch": 0.22, "eval_loss": 2.4011151790618896, "eval_runtime": 18.2288, "eval_samples_per_second": 193.046, "eval_steps_per_second": 6.034, "step": 141200 }, { "epoch": 0.23, "learning_rate": 2.6875448126678576e-05, "loss": 1.4676, "step": 141600 }, { "epoch": 0.23, "eval_loss": 2.377561092376709, "eval_runtime": 20.2447, "eval_samples_per_second": 173.823, "eval_steps_per_second": 5.434, "step": 141600 } ], "max_steps": 274290, "num_train_epochs": 2, "total_flos": 2.641163282310901e+20, "trial_name": null, "trial_params": null }