{ "best_metric": 1.8613990545272827, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 3.0078740157480315, "eval_steps": 50, "global_step": 191, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015748031496062992, "grad_norm": 0.6889989376068115, "learning_rate": 1e-05, "loss": 2.394, "step": 1 }, { "epoch": 0.015748031496062992, "eval_loss": 2.2865548133850098, "eval_runtime": 15.0021, "eval_samples_per_second": 7.132, "eval_steps_per_second": 1.8, "step": 1 }, { "epoch": 0.031496062992125984, "grad_norm": 0.736168622970581, "learning_rate": 2e-05, "loss": 2.2659, "step": 2 }, { "epoch": 0.047244094488188976, "grad_norm": 0.7737554907798767, "learning_rate": 3e-05, "loss": 2.3654, "step": 3 }, { "epoch": 0.06299212598425197, "grad_norm": 0.7534223198890686, "learning_rate": 4e-05, "loss": 2.1618, "step": 4 }, { "epoch": 0.07874015748031496, "grad_norm": 0.6633960008621216, "learning_rate": 5e-05, "loss": 2.1513, "step": 5 }, { "epoch": 0.09448818897637795, "grad_norm": 0.48568227887153625, "learning_rate": 6e-05, "loss": 2.1469, "step": 6 }, { "epoch": 0.11023622047244094, "grad_norm": 0.26518014073371887, "learning_rate": 7e-05, "loss": 2.0181, "step": 7 }, { "epoch": 0.12598425196850394, "grad_norm": 0.2001028209924698, "learning_rate": 8e-05, "loss": 2.0167, "step": 8 }, { "epoch": 0.14173228346456693, "grad_norm": 0.18076087534427643, "learning_rate": 9e-05, "loss": 2.1332, "step": 9 }, { "epoch": 0.15748031496062992, "grad_norm": 0.20848870277404785, "learning_rate": 0.0001, "loss": 2.0462, "step": 10 }, { "epoch": 0.1732283464566929, "grad_norm": 0.2854937016963959, "learning_rate": 9.999246866958692e-05, "loss": 1.9888, "step": 11 }, { "epoch": 0.1889763779527559, "grad_norm": 0.25899070501327515, "learning_rate": 9.996987694718519e-05, "loss": 1.8892, "step": 12 }, { "epoch": 0.2047244094488189, "grad_norm": 0.22199353575706482, "learning_rate": 9.993223163862385e-05, "loss": 1.991, "step": 13 }, { "epoch": 0.2204724409448819, "grad_norm": 0.1916697472333908, "learning_rate": 9.98795440846732e-05, "loss": 1.9856, "step": 14 }, { "epoch": 0.23622047244094488, "grad_norm": 0.16782154142856598, "learning_rate": 9.981183015762832e-05, "loss": 1.945, "step": 15 }, { "epoch": 0.25196850393700787, "grad_norm": 0.15450695157051086, "learning_rate": 9.972911025652753e-05, "loss": 1.9615, "step": 16 }, { "epoch": 0.2677165354330709, "grad_norm": 0.15731865167617798, "learning_rate": 9.963140930100714e-05, "loss": 1.9636, "step": 17 }, { "epoch": 0.28346456692913385, "grad_norm": 0.1690342128276825, "learning_rate": 9.951875672379424e-05, "loss": 1.9034, "step": 18 }, { "epoch": 0.2992125984251969, "grad_norm": 0.1610170602798462, "learning_rate": 9.939118646184007e-05, "loss": 1.8512, "step": 19 }, { "epoch": 0.31496062992125984, "grad_norm": 0.1504582166671753, "learning_rate": 9.924873694609636e-05, "loss": 1.8922, "step": 20 }, { "epoch": 0.33070866141732286, "grad_norm": 0.14803199470043182, "learning_rate": 9.909145108993794e-05, "loss": 1.8427, "step": 21 }, { "epoch": 0.3464566929133858, "grad_norm": 0.13034066557884216, "learning_rate": 9.891937627623487e-05, "loss": 1.9188, "step": 22 }, { "epoch": 0.36220472440944884, "grad_norm": 0.1349916309118271, "learning_rate": 9.873256434307828e-05, "loss": 1.8646, "step": 23 }, { "epoch": 0.3779527559055118, "grad_norm": 0.13976942002773285, "learning_rate": 9.853107156816392e-05, "loss": 1.8348, "step": 24 }, { "epoch": 0.3937007874015748, "grad_norm": 0.13671717047691345, "learning_rate": 9.831495865183831e-05, "loss": 1.8667, "step": 25 }, { "epoch": 0.4094488188976378, "grad_norm": 0.13872890174388885, "learning_rate": 9.808429069881267e-05, "loss": 1.9304, "step": 26 }, { "epoch": 0.4251968503937008, "grad_norm": 0.14029130339622498, "learning_rate": 9.783913719854977e-05, "loss": 2.0072, "step": 27 }, { "epoch": 0.4409448818897638, "grad_norm": 0.12837955355644226, "learning_rate": 9.75795720043301e-05, "loss": 1.9957, "step": 28 }, { "epoch": 0.4566929133858268, "grad_norm": 0.1369132250547409, "learning_rate": 9.730567331100334e-05, "loss": 1.919, "step": 29 }, { "epoch": 0.47244094488188976, "grad_norm": 0.1519915908575058, "learning_rate": 9.701752363143184e-05, "loss": 1.9602, "step": 30 }, { "epoch": 0.4881889763779528, "grad_norm": 0.1411469429731369, "learning_rate": 9.67152097716334e-05, "loss": 2.0029, "step": 31 }, { "epoch": 0.5039370078740157, "grad_norm": 0.1330920159816742, "learning_rate": 9.639882280463071e-05, "loss": 1.8221, "step": 32 }, { "epoch": 0.5196850393700787, "grad_norm": 0.13089032471179962, "learning_rate": 9.606845804301524e-05, "loss": 1.8134, "step": 33 }, { "epoch": 0.5354330708661418, "grad_norm": 0.13314710557460785, "learning_rate": 9.572421501023403e-05, "loss": 1.8679, "step": 34 }, { "epoch": 0.5511811023622047, "grad_norm": 0.13057661056518555, "learning_rate": 9.5366197410608e-05, "loss": 1.8047, "step": 35 }, { "epoch": 0.5669291338582677, "grad_norm": 0.13016384840011597, "learning_rate": 9.499451309809058e-05, "loss": 1.9159, "step": 36 }, { "epoch": 0.5826771653543307, "grad_norm": 0.14073412120342255, "learning_rate": 9.460927404377647e-05, "loss": 1.8997, "step": 37 }, { "epoch": 0.5984251968503937, "grad_norm": 0.13576357066631317, "learning_rate": 9.421059630216991e-05, "loss": 1.9469, "step": 38 }, { "epoch": 0.6141732283464567, "grad_norm": 0.13763922452926636, "learning_rate": 9.37985999762229e-05, "loss": 1.979, "step": 39 }, { "epoch": 0.6299212598425197, "grad_norm": 0.13905873894691467, "learning_rate": 9.337340918115385e-05, "loss": 1.9257, "step": 40 }, { "epoch": 0.6456692913385826, "grad_norm": 0.14740782976150513, "learning_rate": 9.29351520070574e-05, "loss": 1.898, "step": 41 }, { "epoch": 0.6614173228346457, "grad_norm": 0.14223144948482513, "learning_rate": 9.24839604803169e-05, "loss": 1.9446, "step": 42 }, { "epoch": 0.6771653543307087, "grad_norm": 0.14436115324497223, "learning_rate": 9.201997052383106e-05, "loss": 1.9188, "step": 43 }, { "epoch": 0.6929133858267716, "grad_norm": 0.15662525594234467, "learning_rate": 9.154332191606672e-05, "loss": 1.8277, "step": 44 }, { "epoch": 0.7086614173228346, "grad_norm": 0.1833668351173401, "learning_rate": 9.105415824895009e-05, "loss": 1.9246, "step": 45 }, { "epoch": 0.7244094488188977, "grad_norm": 0.15985259413719177, "learning_rate": 9.055262688460931e-05, "loss": 1.9636, "step": 46 }, { "epoch": 0.7401574803149606, "grad_norm": 0.14858020842075348, "learning_rate": 9.003887891098107e-05, "loss": 1.7672, "step": 47 }, { "epoch": 0.7559055118110236, "grad_norm": 0.14195561408996582, "learning_rate": 8.951306909629492e-05, "loss": 1.8929, "step": 48 }, { "epoch": 0.7716535433070866, "grad_norm": 0.15022964775562286, "learning_rate": 8.89753558424488e-05, "loss": 1.7574, "step": 49 }, { "epoch": 0.7874015748031497, "grad_norm": 0.15346434712409973, "learning_rate": 8.842590113729001e-05, "loss": 1.8341, "step": 50 }, { "epoch": 0.7874015748031497, "eval_loss": 1.8785834312438965, "eval_runtime": 15.3515, "eval_samples_per_second": 6.97, "eval_steps_per_second": 1.759, "step": 50 }, { "epoch": 0.8031496062992126, "grad_norm": 0.1622915118932724, "learning_rate": 8.786487050581583e-05, "loss": 1.8973, "step": 51 }, { "epoch": 0.8188976377952756, "grad_norm": 0.15684755146503448, "learning_rate": 8.729243296030852e-05, "loss": 1.878, "step": 52 }, { "epoch": 0.8346456692913385, "grad_norm": 0.14528700709342957, "learning_rate": 8.670876094941991e-05, "loss": 1.8766, "step": 53 }, { "epoch": 0.8503937007874016, "grad_norm": 0.1434098184108734, "learning_rate": 8.611403030622074e-05, "loss": 1.8305, "step": 54 }, { "epoch": 0.8661417322834646, "grad_norm": 0.1428898572921753, "learning_rate": 8.550842019523018e-05, "loss": 1.8276, "step": 55 }, { "epoch": 0.8818897637795275, "grad_norm": 0.15573981404304504, "learning_rate": 8.489211305844215e-05, "loss": 1.9209, "step": 56 }, { "epoch": 0.8976377952755905, "grad_norm": 0.1492464542388916, "learning_rate": 8.426529456036401e-05, "loss": 1.8397, "step": 57 }, { "epoch": 0.9133858267716536, "grad_norm": 0.14895202219486237, "learning_rate": 8.36281535320844e-05, "loss": 1.8792, "step": 58 }, { "epoch": 0.9291338582677166, "grad_norm": 0.1692606657743454, "learning_rate": 8.298088191438753e-05, "loss": 1.8976, "step": 59 }, { "epoch": 0.9448818897637795, "grad_norm": 0.17514702677726746, "learning_rate": 8.232367469993018e-05, "loss": 1.9241, "step": 60 }, { "epoch": 0.9606299212598425, "grad_norm": 0.15032561123371124, "learning_rate": 8.165672987449962e-05, "loss": 1.8196, "step": 61 }, { "epoch": 0.9763779527559056, "grad_norm": 0.15248027443885803, "learning_rate": 8.098024835736977e-05, "loss": 1.8387, "step": 62 }, { "epoch": 0.9921259842519685, "grad_norm": 0.1518544852733612, "learning_rate": 8.029443394077356e-05, "loss": 1.838, "step": 63 }, { "epoch": 1.0078740157480315, "grad_norm": 0.30551785230636597, "learning_rate": 7.959949322850995e-05, "loss": 2.9279, "step": 64 }, { "epoch": 1.0236220472440944, "grad_norm": 0.15582340955734253, "learning_rate": 7.889563557370378e-05, "loss": 1.8368, "step": 65 }, { "epoch": 1.0393700787401574, "grad_norm": 0.15312014520168304, "learning_rate": 7.818307301573756e-05, "loss": 1.8002, "step": 66 }, { "epoch": 1.0551181102362204, "grad_norm": 0.14516696333885193, "learning_rate": 7.746202021637385e-05, "loss": 1.8024, "step": 67 }, { "epoch": 1.0708661417322836, "grad_norm": 0.14469550549983978, "learning_rate": 7.67326943950877e-05, "loss": 1.8087, "step": 68 }, { "epoch": 1.0866141732283465, "grad_norm": 0.15195263922214508, "learning_rate": 7.599531526362873e-05, "loss": 1.8305, "step": 69 }, { "epoch": 1.1023622047244095, "grad_norm": 0.15140549838542938, "learning_rate": 7.525010495983203e-05, "loss": 1.7677, "step": 70 }, { "epoch": 1.1181102362204725, "grad_norm": 0.15748384594917297, "learning_rate": 7.449728798069864e-05, "loss": 1.8204, "step": 71 }, { "epoch": 1.1338582677165354, "grad_norm": 0.1564524620771408, "learning_rate": 7.373709111476497e-05, "loss": 1.805, "step": 72 }, { "epoch": 1.1496062992125984, "grad_norm": 0.15556955337524414, "learning_rate": 7.296974337378209e-05, "loss": 1.7831, "step": 73 }, { "epoch": 1.1653543307086613, "grad_norm": 0.1593746393918991, "learning_rate": 7.219547592372512e-05, "loss": 1.805, "step": 74 }, { "epoch": 1.1811023622047245, "grad_norm": 0.1602640002965927, "learning_rate": 7.141452201515386e-05, "loss": 1.8682, "step": 75 }, { "epoch": 1.1968503937007875, "grad_norm": 0.16497738659381866, "learning_rate": 7.062711691294524e-05, "loss": 1.8562, "step": 76 }, { "epoch": 1.2125984251968505, "grad_norm": 0.17264147102832794, "learning_rate": 6.983349782541901e-05, "loss": 1.8635, "step": 77 }, { "epoch": 1.2283464566929134, "grad_norm": 0.19027890264987946, "learning_rate": 6.903390383287794e-05, "loss": 1.8777, "step": 78 }, { "epoch": 1.2440944881889764, "grad_norm": 0.1816241592168808, "learning_rate": 6.822857581558423e-05, "loss": 1.8048, "step": 79 }, { "epoch": 1.2598425196850394, "grad_norm": 0.18363086879253387, "learning_rate": 6.741775638119345e-05, "loss": 1.7665, "step": 80 }, { "epoch": 1.2755905511811023, "grad_norm": 0.17592447996139526, "learning_rate": 6.66016897916682e-05, "loss": 1.8175, "step": 81 }, { "epoch": 1.2913385826771653, "grad_norm": 0.19258500635623932, "learning_rate": 6.578062188969349e-05, "loss": 1.7171, "step": 82 }, { "epoch": 1.3070866141732282, "grad_norm": 0.17426124215126038, "learning_rate": 6.495480002461577e-05, "loss": 1.755, "step": 83 }, { "epoch": 1.3228346456692912, "grad_norm": 0.18073202669620514, "learning_rate": 6.412447297792818e-05, "loss": 1.7719, "step": 84 }, { "epoch": 1.3385826771653544, "grad_norm": 0.18829776346683502, "learning_rate": 6.32898908883243e-05, "loss": 1.8041, "step": 85 }, { "epoch": 1.3543307086614174, "grad_norm": 0.1842590570449829, "learning_rate": 6.245130517634306e-05, "loss": 1.8843, "step": 86 }, { "epoch": 1.3700787401574803, "grad_norm": 0.1831497699022293, "learning_rate": 6.160896846862754e-05, "loss": 1.8305, "step": 87 }, { "epoch": 1.3858267716535433, "grad_norm": 0.18280945718288422, "learning_rate": 6.076313452182033e-05, "loss": 1.7976, "step": 88 }, { "epoch": 1.4015748031496063, "grad_norm": 0.19274728000164032, "learning_rate": 5.9914058146118545e-05, "loss": 1.9423, "step": 89 }, { "epoch": 1.4173228346456692, "grad_norm": 0.19037386775016785, "learning_rate": 5.906199512851145e-05, "loss": 1.7821, "step": 90 }, { "epoch": 1.4330708661417324, "grad_norm": 0.18613559007644653, "learning_rate": 5.820720215572375e-05, "loss": 1.82, "step": 91 }, { "epoch": 1.4488188976377954, "grad_norm": 0.19664619863033295, "learning_rate": 5.734993673688801e-05, "loss": 1.8388, "step": 92 }, { "epoch": 1.4645669291338583, "grad_norm": 0.21224085986614227, "learning_rate": 5.649045712596903e-05, "loss": 1.8331, "step": 93 }, { "epoch": 1.4803149606299213, "grad_norm": 0.20970676839351654, "learning_rate": 5.5629022243964156e-05, "loss": 1.9577, "step": 94 }, { "epoch": 1.4960629921259843, "grad_norm": 0.18832777440547943, "learning_rate": 5.476589160090238e-05, "loss": 1.7784, "step": 95 }, { "epoch": 1.5118110236220472, "grad_norm": 0.1885678768157959, "learning_rate": 5.390132521766625e-05, "loss": 1.8074, "step": 96 }, { "epoch": 1.5275590551181102, "grad_norm": 0.20179858803749084, "learning_rate": 5.303558354765959e-05, "loss": 1.8537, "step": 97 }, { "epoch": 1.5433070866141732, "grad_norm": 0.20285721123218536, "learning_rate": 5.216892739834519e-05, "loss": 1.7856, "step": 98 }, { "epoch": 1.5590551181102361, "grad_norm": 0.19566109776496887, "learning_rate": 5.13016178526756e-05, "loss": 1.7443, "step": 99 }, { "epoch": 1.574803149606299, "grad_norm": 0.2034953087568283, "learning_rate": 5.043391619044122e-05, "loss": 1.8672, "step": 100 }, { "epoch": 1.574803149606299, "eval_loss": 1.8641308546066284, "eval_runtime": 15.0768, "eval_samples_per_second": 7.097, "eval_steps_per_second": 1.791, "step": 100 }, { "epoch": 1.590551181102362, "grad_norm": 0.2120634764432907, "learning_rate": 4.956608380955877e-05, "loss": 1.8115, "step": 101 }, { "epoch": 1.6062992125984252, "grad_norm": 0.20978185534477234, "learning_rate": 4.8698382147324405e-05, "loss": 1.8382, "step": 102 }, { "epoch": 1.6220472440944882, "grad_norm": 0.20224004983901978, "learning_rate": 4.783107260165483e-05, "loss": 1.8086, "step": 103 }, { "epoch": 1.6377952755905512, "grad_norm": 0.20605164766311646, "learning_rate": 4.696441645234041e-05, "loss": 1.8638, "step": 104 }, { "epoch": 1.6535433070866141, "grad_norm": 0.20777210593223572, "learning_rate": 4.609867478233376e-05, "loss": 1.8972, "step": 105 }, { "epoch": 1.6692913385826773, "grad_norm": 0.2162010818719864, "learning_rate": 4.5234108399097635e-05, "loss": 1.8377, "step": 106 }, { "epoch": 1.6850393700787403, "grad_norm": 0.20739884674549103, "learning_rate": 4.437097775603587e-05, "loss": 1.7573, "step": 107 }, { "epoch": 1.7007874015748032, "grad_norm": 0.23839743435382843, "learning_rate": 4.3509542874030986e-05, "loss": 1.8118, "step": 108 }, { "epoch": 1.7165354330708662, "grad_norm": 0.2258765697479248, "learning_rate": 4.265006326311199e-05, "loss": 1.7988, "step": 109 }, { "epoch": 1.7322834645669292, "grad_norm": 0.20277836918830872, "learning_rate": 4.1792797844276244e-05, "loss": 1.7289, "step": 110 }, { "epoch": 1.7480314960629921, "grad_norm": 0.20125284790992737, "learning_rate": 4.093800487148857e-05, "loss": 1.7703, "step": 111 }, { "epoch": 1.763779527559055, "grad_norm": 0.2080337256193161, "learning_rate": 4.008594185388146e-05, "loss": 1.7483, "step": 112 }, { "epoch": 1.779527559055118, "grad_norm": 0.1968063861131668, "learning_rate": 3.9236865478179685e-05, "loss": 1.7455, "step": 113 }, { "epoch": 1.795275590551181, "grad_norm": 0.2077552229166031, "learning_rate": 3.8391031531372476e-05, "loss": 1.7466, "step": 114 }, { "epoch": 1.811023622047244, "grad_norm": 0.1954686939716339, "learning_rate": 3.7548694823656944e-05, "loss": 1.7816, "step": 115 }, { "epoch": 1.826771653543307, "grad_norm": 0.2064894586801529, "learning_rate": 3.6710109111675715e-05, "loss": 1.7347, "step": 116 }, { "epoch": 1.84251968503937, "grad_norm": 0.21842887997627258, "learning_rate": 3.587552702207181e-05, "loss": 1.7776, "step": 117 }, { "epoch": 1.858267716535433, "grad_norm": 0.20662468671798706, "learning_rate": 3.504519997538422e-05, "loss": 1.85, "step": 118 }, { "epoch": 1.874015748031496, "grad_norm": 0.20142106711864471, "learning_rate": 3.421937811030652e-05, "loss": 1.7975, "step": 119 }, { "epoch": 1.889763779527559, "grad_norm": 0.20684291422367096, "learning_rate": 3.339831020833181e-05, "loss": 1.8593, "step": 120 }, { "epoch": 1.905511811023622, "grad_norm": 0.2090405374765396, "learning_rate": 3.258224361880657e-05, "loss": 1.8422, "step": 121 }, { "epoch": 1.9212598425196852, "grad_norm": 0.2096734493970871, "learning_rate": 3.177142418441578e-05, "loss": 1.8178, "step": 122 }, { "epoch": 1.9370078740157481, "grad_norm": 0.2367476224899292, "learning_rate": 3.096609616712207e-05, "loss": 1.866, "step": 123 }, { "epoch": 1.952755905511811, "grad_norm": 0.2206001877784729, "learning_rate": 3.0166502174581014e-05, "loss": 1.8322, "step": 124 }, { "epoch": 1.968503937007874, "grad_norm": 0.24348478019237518, "learning_rate": 2.9372883087054748e-05, "loss": 1.8421, "step": 125 }, { "epoch": 1.984251968503937, "grad_norm": 0.22002661228179932, "learning_rate": 2.8585477984846132e-05, "loss": 1.7946, "step": 126 }, { "epoch": 2.0, "grad_norm": 0.396971732378006, "learning_rate": 2.7804524076274897e-05, "loss": 2.8211, "step": 127 }, { "epoch": 2.015748031496063, "grad_norm": 0.20417475700378418, "learning_rate": 2.703025662621793e-05, "loss": 1.8079, "step": 128 }, { "epoch": 2.031496062992126, "grad_norm": 0.20215673744678497, "learning_rate": 2.6262908885235042e-05, "loss": 1.7662, "step": 129 }, { "epoch": 2.047244094488189, "grad_norm": 0.2186560183763504, "learning_rate": 2.5502712019301362e-05, "loss": 1.7182, "step": 130 }, { "epoch": 2.062992125984252, "grad_norm": 0.20896507799625397, "learning_rate": 2.474989504016798e-05, "loss": 1.8453, "step": 131 }, { "epoch": 2.078740157480315, "grad_norm": 0.21589994430541992, "learning_rate": 2.4004684736371274e-05, "loss": 1.7326, "step": 132 }, { "epoch": 2.094488188976378, "grad_norm": 0.21222083270549774, "learning_rate": 2.32673056049123e-05, "loss": 1.8329, "step": 133 }, { "epoch": 2.1102362204724407, "grad_norm": 0.2157982736825943, "learning_rate": 2.2537979783626167e-05, "loss": 1.7735, "step": 134 }, { "epoch": 2.1259842519685037, "grad_norm": 0.2057959884405136, "learning_rate": 2.1816926984262455e-05, "loss": 1.7196, "step": 135 }, { "epoch": 2.141732283464567, "grad_norm": 0.23256689310073853, "learning_rate": 2.1104364426296235e-05, "loss": 1.7251, "step": 136 }, { "epoch": 2.15748031496063, "grad_norm": 0.21598421037197113, "learning_rate": 2.040050677149008e-05, "loss": 1.8282, "step": 137 }, { "epoch": 2.173228346456693, "grad_norm": 0.21759317815303802, "learning_rate": 1.9705566059226448e-05, "loss": 1.7381, "step": 138 }, { "epoch": 2.188976377952756, "grad_norm": 0.21445372700691223, "learning_rate": 1.9019751642630253e-05, "loss": 1.7411, "step": 139 }, { "epoch": 2.204724409448819, "grad_norm": 0.23186103999614716, "learning_rate": 1.8343270125500378e-05, "loss": 1.7852, "step": 140 }, { "epoch": 2.220472440944882, "grad_norm": 0.25332340598106384, "learning_rate": 1.7676325300069825e-05, "loss": 1.7437, "step": 141 }, { "epoch": 2.236220472440945, "grad_norm": 0.2537396252155304, "learning_rate": 1.7019118085612473e-05, "loss": 1.7367, "step": 142 }, { "epoch": 2.251968503937008, "grad_norm": 0.23744948208332062, "learning_rate": 1.6371846467915603e-05, "loss": 1.8681, "step": 143 }, { "epoch": 2.267716535433071, "grad_norm": 0.23731382191181183, "learning_rate": 1.5734705439636016e-05, "loss": 1.7224, "step": 144 }, { "epoch": 2.283464566929134, "grad_norm": 0.22520650923252106, "learning_rate": 1.5107886941557853e-05, "loss": 1.6721, "step": 145 }, { "epoch": 2.2992125984251968, "grad_norm": 0.23845182359218597, "learning_rate": 1.4491579804769818e-05, "loss": 1.7474, "step": 146 }, { "epoch": 2.3149606299212597, "grad_norm": 0.2290286421775818, "learning_rate": 1.3885969693779277e-05, "loss": 1.6738, "step": 147 }, { "epoch": 2.3307086614173227, "grad_norm": 0.21695880591869354, "learning_rate": 1.3291239050580084e-05, "loss": 1.6648, "step": 148 }, { "epoch": 2.3464566929133857, "grad_norm": 0.22965914011001587, "learning_rate": 1.2707567039691503e-05, "loss": 1.7734, "step": 149 }, { "epoch": 2.362204724409449, "grad_norm": 0.2199874073266983, "learning_rate": 1.2135129494184188e-05, "loss": 1.6455, "step": 150 }, { "epoch": 2.362204724409449, "eval_loss": 1.8613990545272827, "eval_runtime": 15.3382, "eval_samples_per_second": 6.976, "eval_steps_per_second": 1.76, "step": 150 }, { "epoch": 2.377952755905512, "grad_norm": 0.23553703725337982, "learning_rate": 1.1574098862709992e-05, "loss": 1.7086, "step": 151 }, { "epoch": 2.393700787401575, "grad_norm": 0.2348308116197586, "learning_rate": 1.1024644157551206e-05, "loss": 1.7374, "step": 152 }, { "epoch": 2.409448818897638, "grad_norm": 0.24118292331695557, "learning_rate": 1.0486930903705094e-05, "loss": 1.7819, "step": 153 }, { "epoch": 2.425196850393701, "grad_norm": 0.22775617241859436, "learning_rate": 9.961121089018932e-06, "loss": 1.7897, "step": 154 }, { "epoch": 2.440944881889764, "grad_norm": 0.22295954823493958, "learning_rate": 9.4473731153907e-06, "loss": 1.7652, "step": 155 }, { "epoch": 2.456692913385827, "grad_norm": 0.23308737576007843, "learning_rate": 8.945841751049915e-06, "loss": 1.7224, "step": 156 }, { "epoch": 2.47244094488189, "grad_norm": 0.25666847825050354, "learning_rate": 8.456678083933289e-06, "loss": 1.7704, "step": 157 }, { "epoch": 2.4881889763779528, "grad_norm": 0.24374538660049438, "learning_rate": 7.980029476168944e-06, "loss": 1.958, "step": 158 }, { "epoch": 2.5039370078740157, "grad_norm": 0.2336760014295578, "learning_rate": 7.5160395196831046e-06, "loss": 1.6695, "step": 159 }, { "epoch": 2.5196850393700787, "grad_norm": 0.23040024936199188, "learning_rate": 7.064847992942614e-06, "loss": 1.748, "step": 160 }, { "epoch": 2.5354330708661417, "grad_norm": 0.2296517789363861, "learning_rate": 6.626590818846162e-06, "loss": 1.7122, "step": 161 }, { "epoch": 2.5511811023622046, "grad_norm": 0.2303856462240219, "learning_rate": 6.201400023777104e-06, "loss": 1.7648, "step": 162 }, { "epoch": 2.5669291338582676, "grad_norm": 0.2246444672346115, "learning_rate": 5.7894036978301035e-06, "loss": 1.6923, "step": 163 }, { "epoch": 2.5826771653543306, "grad_norm": 0.23633837699890137, "learning_rate": 5.39072595622353e-06, "loss": 1.7337, "step": 164 }, { "epoch": 2.5984251968503935, "grad_norm": 0.24737262725830078, "learning_rate": 5.005486901909428e-06, "loss": 1.7537, "step": 165 }, { "epoch": 2.6141732283464565, "grad_norm": 0.24044926464557648, "learning_rate": 4.6338025893920166e-06, "loss": 1.8285, "step": 166 }, { "epoch": 2.6299212598425195, "grad_norm": 0.2352256029844284, "learning_rate": 4.275784989765985e-06, "loss": 1.7291, "step": 167 }, { "epoch": 2.6456692913385824, "grad_norm": 0.23979999125003815, "learning_rate": 3.93154195698478e-06, "loss": 1.7735, "step": 168 }, { "epoch": 2.661417322834646, "grad_norm": 0.23789139091968536, "learning_rate": 3.601177195369304e-06, "loss": 1.8119, "step": 169 }, { "epoch": 2.677165354330709, "grad_norm": 0.2280394583940506, "learning_rate": 3.2847902283666022e-06, "loss": 1.7269, "step": 170 }, { "epoch": 2.6929133858267718, "grad_norm": 0.22925442457199097, "learning_rate": 2.9824763685681766e-06, "loss": 1.7427, "step": 171 }, { "epoch": 2.7086614173228347, "grad_norm": 0.25694581866264343, "learning_rate": 2.694326688996662e-06, "loss": 1.6969, "step": 172 }, { "epoch": 2.7244094488188977, "grad_norm": 0.24270634353160858, "learning_rate": 2.4204279956698995e-06, "loss": 1.8398, "step": 173 }, { "epoch": 2.7401574803149606, "grad_norm": 0.23506343364715576, "learning_rate": 2.1608628014502365e-06, "loss": 1.6692, "step": 174 }, { "epoch": 2.7559055118110236, "grad_norm": 0.2271362543106079, "learning_rate": 1.915709301187335e-06, "loss": 1.6801, "step": 175 }, { "epoch": 2.7716535433070866, "grad_norm": 0.22768786549568176, "learning_rate": 1.6850413481616868e-06, "loss": 1.5849, "step": 176 }, { "epoch": 2.7874015748031495, "grad_norm": 0.24064046144485474, "learning_rate": 1.4689284318360918e-06, "loss": 1.7468, "step": 177 }, { "epoch": 2.8031496062992125, "grad_norm": 0.2282429337501526, "learning_rate": 1.2674356569217282e-06, "loss": 1.7783, "step": 178 }, { "epoch": 2.8188976377952755, "grad_norm": 0.2347092181444168, "learning_rate": 1.080623723765134e-06, "loss": 1.7443, "step": 179 }, { "epoch": 2.8346456692913384, "grad_norm": 0.23791402578353882, "learning_rate": 9.085489100620737e-07, "loss": 1.8306, "step": 180 }, { "epoch": 2.850393700787402, "grad_norm": 0.23669809103012085, "learning_rate": 7.512630539036502e-07, "loss": 1.7913, "step": 181 }, { "epoch": 2.866141732283465, "grad_norm": 0.24042251706123352, "learning_rate": 6.088135381599414e-07, "loss": 1.7201, "step": 182 }, { "epoch": 2.8818897637795278, "grad_norm": 0.23241114616394043, "learning_rate": 4.812432762057673e-07, "loss": 1.7816, "step": 183 }, { "epoch": 2.8976377952755907, "grad_norm": 0.24429729580879211, "learning_rate": 3.685906989928656e-07, "loss": 1.8598, "step": 184 }, { "epoch": 2.9133858267716537, "grad_norm": 0.23608095943927765, "learning_rate": 2.7088974347246887e-07, "loss": 1.7586, "step": 185 }, { "epoch": 2.9291338582677167, "grad_norm": 0.2519053816795349, "learning_rate": 1.8816984237169376e-07, "loss": 1.7577, "step": 186 }, { "epoch": 2.9448818897637796, "grad_norm": 0.27554014325141907, "learning_rate": 1.2045591532681145e-07, "loss": 1.7541, "step": 187 }, { "epoch": 2.9606299212598426, "grad_norm": 0.24321898818016052, "learning_rate": 6.776836137615262e-08, "loss": 1.7011, "step": 188 }, { "epoch": 2.9763779527559056, "grad_norm": 0.24500428140163422, "learning_rate": 3.0123052814812206e-08, "loss": 1.7679, "step": 189 }, { "epoch": 2.9921259842519685, "grad_norm": 0.2374401092529297, "learning_rate": 7.53133041307974e-09, "loss": 1.7637, "step": 190 }, { "epoch": 3.0078740157480315, "grad_norm": 0.47424909472465515, "learning_rate": 0.0, "loss": 2.8832, "step": 191 } ], "logging_steps": 1, "max_steps": 191, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.319641044038451e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }