{ "best_metric": 0.6319752335548401, "best_model_checkpoint": "miner_id_24/checkpoint-150", "epoch": 0.8053691275167785, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005369127516778523, "grad_norm": 0.17935922741889954, "learning_rate": 1e-05, "loss": 0.9868, "step": 1 }, { "epoch": 0.005369127516778523, "eval_loss": 1.000736117362976, "eval_runtime": 23.3567, "eval_samples_per_second": 13.444, "eval_steps_per_second": 3.382, "step": 1 }, { "epoch": 0.010738255033557046, "grad_norm": 0.1903594434261322, "learning_rate": 2e-05, "loss": 0.9936, "step": 2 }, { "epoch": 0.016107382550335572, "grad_norm": 0.18861770629882812, "learning_rate": 3e-05, "loss": 1.0668, "step": 3 }, { "epoch": 0.021476510067114093, "grad_norm": 0.18700075149536133, "learning_rate": 4e-05, "loss": 0.9855, "step": 4 }, { "epoch": 0.026845637583892617, "grad_norm": 0.19070813059806824, "learning_rate": 5e-05, "loss": 1.0233, "step": 5 }, { "epoch": 0.032214765100671144, "grad_norm": 0.1933891475200653, "learning_rate": 6e-05, "loss": 0.9753, "step": 6 }, { "epoch": 0.03758389261744966, "grad_norm": 0.20060376822948456, "learning_rate": 7e-05, "loss": 0.9847, "step": 7 }, { "epoch": 0.042953020134228186, "grad_norm": 0.16197127103805542, "learning_rate": 8e-05, "loss": 0.9503, "step": 8 }, { "epoch": 0.04832214765100671, "grad_norm": 0.19797426462173462, "learning_rate": 9e-05, "loss": 0.953, "step": 9 }, { "epoch": 0.053691275167785234, "grad_norm": 0.29969143867492676, "learning_rate": 0.0001, "loss": 0.9191, "step": 10 }, { "epoch": 0.05906040268456376, "grad_norm": 0.3108259439468384, "learning_rate": 9.999316524962345e-05, "loss": 0.9037, "step": 11 }, { "epoch": 0.06442953020134229, "grad_norm": 0.251691609621048, "learning_rate": 9.997266286704631e-05, "loss": 0.8751, "step": 12 }, { "epoch": 0.0697986577181208, "grad_norm": 0.14941531419754028, "learning_rate": 9.993849845741524e-05, "loss": 0.8419, "step": 13 }, { "epoch": 0.07516778523489932, "grad_norm": 0.1460723727941513, "learning_rate": 9.989068136093873e-05, "loss": 0.8255, "step": 14 }, { "epoch": 0.08053691275167785, "grad_norm": 0.13464461266994476, "learning_rate": 9.98292246503335e-05, "loss": 0.8443, "step": 15 }, { "epoch": 0.08590604026845637, "grad_norm": 0.13969996571540833, "learning_rate": 9.975414512725057e-05, "loss": 0.8443, "step": 16 }, { "epoch": 0.0912751677852349, "grad_norm": 0.137205109000206, "learning_rate": 9.966546331768191e-05, "loss": 0.8176, "step": 17 }, { "epoch": 0.09664429530201342, "grad_norm": 0.14688464999198914, "learning_rate": 9.956320346634876e-05, "loss": 0.8359, "step": 18 }, { "epoch": 0.10201342281879194, "grad_norm": 0.14736346900463104, "learning_rate": 9.944739353007344e-05, "loss": 0.7738, "step": 19 }, { "epoch": 0.10738255033557047, "grad_norm": 0.14455458521842957, "learning_rate": 9.931806517013612e-05, "loss": 0.7582, "step": 20 }, { "epoch": 0.11275167785234899, "grad_norm": 0.1578955203294754, "learning_rate": 9.917525374361912e-05, "loss": 0.82, "step": 21 }, { "epoch": 0.11812080536912752, "grad_norm": 0.14775967597961426, "learning_rate": 9.901899829374047e-05, "loss": 0.7869, "step": 22 }, { "epoch": 0.12348993288590604, "grad_norm": 0.14364656805992126, "learning_rate": 9.884934153917997e-05, "loss": 0.7462, "step": 23 }, { "epoch": 0.12885906040268458, "grad_norm": 0.15344202518463135, "learning_rate": 9.86663298624003e-05, "loss": 0.8008, "step": 24 }, { "epoch": 0.1342281879194631, "grad_norm": 0.13498349487781525, "learning_rate": 9.847001329696653e-05, "loss": 0.7505, "step": 25 }, { "epoch": 0.1395973154362416, "grad_norm": 0.14210180938243866, "learning_rate": 9.826044551386744e-05, "loss": 0.7629, "step": 26 }, { "epoch": 0.14496644295302014, "grad_norm": 0.15320271253585815, "learning_rate": 9.803768380684242e-05, "loss": 0.7964, "step": 27 }, { "epoch": 0.15033557046979865, "grad_norm": 0.1431950330734253, "learning_rate": 9.780178907671789e-05, "loss": 0.772, "step": 28 }, { "epoch": 0.15570469798657718, "grad_norm": 0.14278143644332886, "learning_rate": 9.755282581475769e-05, "loss": 0.7287, "step": 29 }, { "epoch": 0.1610738255033557, "grad_norm": 0.14499890804290771, "learning_rate": 9.729086208503174e-05, "loss": 0.7024, "step": 30 }, { "epoch": 0.16644295302013423, "grad_norm": 0.1663365364074707, "learning_rate": 9.701596950580806e-05, "loss": 0.7815, "step": 31 }, { "epoch": 0.17181208053691274, "grad_norm": 0.15140050649642944, "learning_rate": 9.672822322997305e-05, "loss": 0.7511, "step": 32 }, { "epoch": 0.17718120805369128, "grad_norm": 0.15589123964309692, "learning_rate": 9.642770192448536e-05, "loss": 0.7298, "step": 33 }, { "epoch": 0.1825503355704698, "grad_norm": 0.16241595149040222, "learning_rate": 9.611448774886924e-05, "loss": 0.7514, "step": 34 }, { "epoch": 0.18791946308724833, "grad_norm": 0.16453929245471954, "learning_rate": 9.578866633275288e-05, "loss": 0.7704, "step": 35 }, { "epoch": 0.19328859060402684, "grad_norm": 0.1668446958065033, "learning_rate": 9.545032675245813e-05, "loss": 0.7722, "step": 36 }, { "epoch": 0.19865771812080538, "grad_norm": 0.16324329376220703, "learning_rate": 9.509956150664796e-05, "loss": 0.7183, "step": 37 }, { "epoch": 0.2040268456375839, "grad_norm": 0.16436836123466492, "learning_rate": 9.473646649103818e-05, "loss": 0.6956, "step": 38 }, { "epoch": 0.20939597315436242, "grad_norm": 0.16654111444950104, "learning_rate": 9.43611409721806e-05, "loss": 0.7059, "step": 39 }, { "epoch": 0.21476510067114093, "grad_norm": 0.1823245733976364, "learning_rate": 9.397368756032445e-05, "loss": 0.704, "step": 40 }, { "epoch": 0.22013422818791947, "grad_norm": 0.16855204105377197, "learning_rate": 9.357421218136386e-05, "loss": 0.755, "step": 41 }, { "epoch": 0.22550335570469798, "grad_norm": 0.18215735256671906, "learning_rate": 9.316282404787871e-05, "loss": 0.7237, "step": 42 }, { "epoch": 0.23087248322147652, "grad_norm": 0.17373588681221008, "learning_rate": 9.273963562927695e-05, "loss": 0.75, "step": 43 }, { "epoch": 0.23624161073825503, "grad_norm": 0.1918681114912033, "learning_rate": 9.230476262104677e-05, "loss": 0.748, "step": 44 }, { "epoch": 0.24161073825503357, "grad_norm": 0.18946495652198792, "learning_rate": 9.185832391312644e-05, "loss": 0.6984, "step": 45 }, { "epoch": 0.24697986577181208, "grad_norm": 0.21563592553138733, "learning_rate": 9.140044155740101e-05, "loss": 0.6322, "step": 46 }, { "epoch": 0.2523489932885906, "grad_norm": 0.16525784134864807, "learning_rate": 9.093124073433463e-05, "loss": 0.7595, "step": 47 }, { "epoch": 0.25771812080536916, "grad_norm": 0.16520261764526367, "learning_rate": 9.045084971874738e-05, "loss": 0.722, "step": 48 }, { "epoch": 0.26308724832214764, "grad_norm": 0.14878354966640472, "learning_rate": 8.995939984474624e-05, "loss": 0.7551, "step": 49 }, { "epoch": 0.2684563758389262, "grad_norm": 0.1611238270998001, "learning_rate": 8.945702546981969e-05, "loss": 0.7313, "step": 50 }, { "epoch": 0.2684563758389262, "eval_loss": 0.7033802270889282, "eval_runtime": 23.9904, "eval_samples_per_second": 13.089, "eval_steps_per_second": 3.293, "step": 50 }, { "epoch": 0.2738255033557047, "grad_norm": 0.15793895721435547, "learning_rate": 8.894386393810563e-05, "loss": 0.7594, "step": 51 }, { "epoch": 0.2791946308724832, "grad_norm": 0.17630554735660553, "learning_rate": 8.842005554284296e-05, "loss": 0.7322, "step": 52 }, { "epoch": 0.28456375838926173, "grad_norm": 0.16733869910240173, "learning_rate": 8.788574348801675e-05, "loss": 0.7153, "step": 53 }, { "epoch": 0.28993288590604027, "grad_norm": 0.1671655774116516, "learning_rate": 8.73410738492077e-05, "loss": 0.6773, "step": 54 }, { "epoch": 0.2953020134228188, "grad_norm": 0.17812763154506683, "learning_rate": 8.678619553365659e-05, "loss": 0.7242, "step": 55 }, { "epoch": 0.3006711409395973, "grad_norm": 0.15653777122497559, "learning_rate": 8.622126023955446e-05, "loss": 0.6803, "step": 56 }, { "epoch": 0.30604026845637583, "grad_norm": 0.17024900019168854, "learning_rate": 8.564642241456986e-05, "loss": 0.7095, "step": 57 }, { "epoch": 0.31140939597315437, "grad_norm": 0.1669471561908722, "learning_rate": 8.506183921362443e-05, "loss": 0.7242, "step": 58 }, { "epoch": 0.3167785234899329, "grad_norm": 0.1669328212738037, "learning_rate": 8.44676704559283e-05, "loss": 0.702, "step": 59 }, { "epoch": 0.3221476510067114, "grad_norm": 0.16168099641799927, "learning_rate": 8.386407858128706e-05, "loss": 0.7038, "step": 60 }, { "epoch": 0.3275167785234899, "grad_norm": 0.1733594536781311, "learning_rate": 8.32512286056924e-05, "loss": 0.742, "step": 61 }, { "epoch": 0.33288590604026846, "grad_norm": 0.16489963233470917, "learning_rate": 8.262928807620843e-05, "loss": 0.7032, "step": 62 }, { "epoch": 0.338255033557047, "grad_norm": 0.1702485829591751, "learning_rate": 8.199842702516583e-05, "loss": 0.7188, "step": 63 }, { "epoch": 0.3436241610738255, "grad_norm": 0.16496972739696503, "learning_rate": 8.135881792367686e-05, "loss": 0.7085, "step": 64 }, { "epoch": 0.348993288590604, "grad_norm": 0.16463220119476318, "learning_rate": 8.07106356344834e-05, "loss": 0.6886, "step": 65 }, { "epoch": 0.35436241610738256, "grad_norm": 0.16626203060150146, "learning_rate": 8.005405736415126e-05, "loss": 0.7033, "step": 66 }, { "epoch": 0.3597315436241611, "grad_norm": 0.1716552972793579, "learning_rate": 7.938926261462366e-05, "loss": 0.6994, "step": 67 }, { "epoch": 0.3651006711409396, "grad_norm": 0.17230892181396484, "learning_rate": 7.871643313414718e-05, "loss": 0.6905, "step": 68 }, { "epoch": 0.3704697986577181, "grad_norm": 0.18411889672279358, "learning_rate": 7.803575286758364e-05, "loss": 0.6921, "step": 69 }, { "epoch": 0.37583892617449666, "grad_norm": 0.18664830923080444, "learning_rate": 7.734740790612136e-05, "loss": 0.7306, "step": 70 }, { "epoch": 0.3812080536912752, "grad_norm": 0.1846221536397934, "learning_rate": 7.66515864363997e-05, "loss": 0.6792, "step": 71 }, { "epoch": 0.3865771812080537, "grad_norm": 0.1809433102607727, "learning_rate": 7.594847868906076e-05, "loss": 0.6841, "step": 72 }, { "epoch": 0.3919463087248322, "grad_norm": 0.18039539456367493, "learning_rate": 7.52382768867422e-05, "loss": 0.6987, "step": 73 }, { "epoch": 0.39731543624161075, "grad_norm": 0.17351402342319489, "learning_rate": 7.452117519152542e-05, "loss": 0.7099, "step": 74 }, { "epoch": 0.40268456375838924, "grad_norm": 0.1782248616218567, "learning_rate": 7.379736965185368e-05, "loss": 0.6626, "step": 75 }, { "epoch": 0.4080536912751678, "grad_norm": 0.1830756962299347, "learning_rate": 7.30670581489344e-05, "loss": 0.6905, "step": 76 }, { "epoch": 0.4134228187919463, "grad_norm": 0.18826453387737274, "learning_rate": 7.233044034264034e-05, "loss": 0.6654, "step": 77 }, { "epoch": 0.41879194630872485, "grad_norm": 0.18378107249736786, "learning_rate": 7.158771761692464e-05, "loss": 0.665, "step": 78 }, { "epoch": 0.42416107382550333, "grad_norm": 0.19378505647182465, "learning_rate": 7.083909302476453e-05, "loss": 0.6915, "step": 79 }, { "epoch": 0.42953020134228187, "grad_norm": 0.20365728437900543, "learning_rate": 7.008477123264848e-05, "loss": 0.7173, "step": 80 }, { "epoch": 0.4348993288590604, "grad_norm": 0.21446937322616577, "learning_rate": 6.932495846462261e-05, "loss": 0.6877, "step": 81 }, { "epoch": 0.44026845637583895, "grad_norm": 0.19280417263507843, "learning_rate": 6.855986244591104e-05, "loss": 0.679, "step": 82 }, { "epoch": 0.44563758389261743, "grad_norm": 0.1929524540901184, "learning_rate": 6.778969234612584e-05, "loss": 0.6628, "step": 83 }, { "epoch": 0.45100671140939597, "grad_norm": 0.2053307145833969, "learning_rate": 6.701465872208216e-05, "loss": 0.651, "step": 84 }, { "epoch": 0.4563758389261745, "grad_norm": 0.1985684186220169, "learning_rate": 6.623497346023418e-05, "loss": 0.6602, "step": 85 }, { "epoch": 0.46174496644295304, "grad_norm": 0.1963135302066803, "learning_rate": 6.545084971874738e-05, "loss": 0.6256, "step": 86 }, { "epoch": 0.4671140939597315, "grad_norm": 0.20027688145637512, "learning_rate": 6.466250186922325e-05, "loss": 0.6658, "step": 87 }, { "epoch": 0.47248322147651006, "grad_norm": 0.20961076021194458, "learning_rate": 6.387014543809223e-05, "loss": 0.6722, "step": 88 }, { "epoch": 0.4778523489932886, "grad_norm": 0.20259904861450195, "learning_rate": 6.307399704769099e-05, "loss": 0.6527, "step": 89 }, { "epoch": 0.48322147651006714, "grad_norm": 0.23572717607021332, "learning_rate": 6.227427435703997e-05, "loss": 0.6791, "step": 90 }, { "epoch": 0.4885906040268456, "grad_norm": 0.22425709664821625, "learning_rate": 6.147119600233758e-05, "loss": 0.6486, "step": 91 }, { "epoch": 0.49395973154362416, "grad_norm": 0.2310435175895691, "learning_rate": 6.066498153718735e-05, "loss": 0.57, "step": 92 }, { "epoch": 0.4993288590604027, "grad_norm": 0.21507185697555542, "learning_rate": 5.985585137257401e-05, "loss": 0.669, "step": 93 }, { "epoch": 0.5046979865771812, "grad_norm": 0.19094298779964447, "learning_rate": 5.90440267166055e-05, "loss": 0.6834, "step": 94 }, { "epoch": 0.5100671140939598, "grad_norm": 0.17572060227394104, "learning_rate": 5.8229729514036705e-05, "loss": 0.7109, "step": 95 }, { "epoch": 0.5154362416107383, "grad_norm": 0.16800172626972198, "learning_rate": 5.74131823855921e-05, "loss": 0.6952, "step": 96 }, { "epoch": 0.5208053691275167, "grad_norm": 0.177272766828537, "learning_rate": 5.6594608567103456e-05, "loss": 0.6919, "step": 97 }, { "epoch": 0.5261744966442953, "grad_norm": 0.19817174971103668, "learning_rate": 5.577423184847932e-05, "loss": 0.6399, "step": 98 }, { "epoch": 0.5315436241610738, "grad_norm": 0.17929233610630035, "learning_rate": 5.495227651252315e-05, "loss": 0.6435, "step": 99 }, { "epoch": 0.5369127516778524, "grad_norm": 0.1787331998348236, "learning_rate": 5.4128967273616625e-05, "loss": 0.6656, "step": 100 }, { "epoch": 0.5369127516778524, "eval_loss": 0.6536333560943604, "eval_runtime": 23.4935, "eval_samples_per_second": 13.365, "eval_steps_per_second": 3.363, "step": 100 }, { "epoch": 0.5422818791946309, "grad_norm": 0.18226991593837738, "learning_rate": 5.330452921628497e-05, "loss": 0.6319, "step": 101 }, { "epoch": 0.5476510067114094, "grad_norm": 0.1781727373600006, "learning_rate": 5.247918773366112e-05, "loss": 0.6676, "step": 102 }, { "epoch": 0.553020134228188, "grad_norm": 0.17426691949367523, "learning_rate": 5.165316846586541e-05, "loss": 0.6091, "step": 103 }, { "epoch": 0.5583892617449664, "grad_norm": 0.19165848195552826, "learning_rate": 5.0826697238317935e-05, "loss": 0.636, "step": 104 }, { "epoch": 0.5637583892617449, "grad_norm": 0.1880224198102951, "learning_rate": 5e-05, "loss": 0.684, "step": 105 }, { "epoch": 0.5691275167785235, "grad_norm": 0.1876673400402069, "learning_rate": 4.917330276168208e-05, "loss": 0.6554, "step": 106 }, { "epoch": 0.574496644295302, "grad_norm": 0.18983015418052673, "learning_rate": 4.834683153413459e-05, "loss": 0.6895, "step": 107 }, { "epoch": 0.5798657718120805, "grad_norm": 0.181321382522583, "learning_rate": 4.7520812266338885e-05, "loss": 0.6862, "step": 108 }, { "epoch": 0.5852348993288591, "grad_norm": 0.18645992875099182, "learning_rate": 4.669547078371504e-05, "loss": 0.6709, "step": 109 }, { "epoch": 0.5906040268456376, "grad_norm": 0.18336749076843262, "learning_rate": 4.5871032726383386e-05, "loss": 0.6345, "step": 110 }, { "epoch": 0.5959731543624162, "grad_norm": 0.19022372364997864, "learning_rate": 4.504772348747687e-05, "loss": 0.6468, "step": 111 }, { "epoch": 0.6013422818791946, "grad_norm": 0.19106510281562805, "learning_rate": 4.4225768151520694e-05, "loss": 0.6402, "step": 112 }, { "epoch": 0.6067114093959731, "grad_norm": 0.1971225142478943, "learning_rate": 4.3405391432896555e-05, "loss": 0.6412, "step": 113 }, { "epoch": 0.6120805369127517, "grad_norm": 0.18851780891418457, "learning_rate": 4.2586817614407895e-05, "loss": 0.7026, "step": 114 }, { "epoch": 0.6174496644295302, "grad_norm": 0.20051071047782898, "learning_rate": 4.17702704859633e-05, "loss": 0.6316, "step": 115 }, { "epoch": 0.6228187919463087, "grad_norm": 0.19763968884944916, "learning_rate": 4.095597328339452e-05, "loss": 0.6779, "step": 116 }, { "epoch": 0.6281879194630873, "grad_norm": 0.1965392380952835, "learning_rate": 4.0144148627425993e-05, "loss": 0.6542, "step": 117 }, { "epoch": 0.6335570469798658, "grad_norm": 0.19687078893184662, "learning_rate": 3.933501846281267e-05, "loss": 0.6869, "step": 118 }, { "epoch": 0.6389261744966444, "grad_norm": 0.20442505180835724, "learning_rate": 3.852880399766243e-05, "loss": 0.6652, "step": 119 }, { "epoch": 0.6442953020134228, "grad_norm": 0.19597525894641876, "learning_rate": 3.772572564296005e-05, "loss": 0.6469, "step": 120 }, { "epoch": 0.6496644295302013, "grad_norm": 0.19051812589168549, "learning_rate": 3.6926002952309016e-05, "loss": 0.6295, "step": 121 }, { "epoch": 0.6550335570469799, "grad_norm": 0.2047436237335205, "learning_rate": 3.612985456190778e-05, "loss": 0.6709, "step": 122 }, { "epoch": 0.6604026845637584, "grad_norm": 0.1932784467935562, "learning_rate": 3.533749813077677e-05, "loss": 0.6481, "step": 123 }, { "epoch": 0.6657718120805369, "grad_norm": 0.19861674308776855, "learning_rate": 3.4549150281252636e-05, "loss": 0.5989, "step": 124 }, { "epoch": 0.6711409395973155, "grad_norm": 0.19298812747001648, "learning_rate": 3.3765026539765834e-05, "loss": 0.6121, "step": 125 }, { "epoch": 0.676510067114094, "grad_norm": 0.1975845992565155, "learning_rate": 3.298534127791785e-05, "loss": 0.5962, "step": 126 }, { "epoch": 0.6818791946308724, "grad_norm": 0.20991215109825134, "learning_rate": 3.221030765387417e-05, "loss": 0.6705, "step": 127 }, { "epoch": 0.687248322147651, "grad_norm": 0.21251270174980164, "learning_rate": 3.144013755408895e-05, "loss": 0.6383, "step": 128 }, { "epoch": 0.6926174496644295, "grad_norm": 0.2004583477973938, "learning_rate": 3.0675041535377405e-05, "loss": 0.6391, "step": 129 }, { "epoch": 0.697986577181208, "grad_norm": 0.21894288063049316, "learning_rate": 2.991522876735154e-05, "loss": 0.6504, "step": 130 }, { "epoch": 0.7033557046979866, "grad_norm": 0.21032415330410004, "learning_rate": 2.916090697523549e-05, "loss": 0.6776, "step": 131 }, { "epoch": 0.7087248322147651, "grad_norm": 0.21922121942043304, "learning_rate": 2.8412282383075363e-05, "loss": 0.6522, "step": 132 }, { "epoch": 0.7140939597315437, "grad_norm": 0.2144535481929779, "learning_rate": 2.766955965735968e-05, "loss": 0.6355, "step": 133 }, { "epoch": 0.7194630872483222, "grad_norm": 0.21111609041690826, "learning_rate": 2.693294185106562e-05, "loss": 0.6681, "step": 134 }, { "epoch": 0.7248322147651006, "grad_norm": 0.21090613305568695, "learning_rate": 2.6202630348146324e-05, "loss": 0.6237, "step": 135 }, { "epoch": 0.7302013422818792, "grad_norm": 0.21762122213840485, "learning_rate": 2.547882480847461e-05, "loss": 0.6421, "step": 136 }, { "epoch": 0.7355704697986577, "grad_norm": 0.2338666021823883, "learning_rate": 2.476172311325783e-05, "loss": 0.6195, "step": 137 }, { "epoch": 0.7409395973154362, "grad_norm": 0.2523767352104187, "learning_rate": 2.405152131093926e-05, "loss": 0.5805, "step": 138 }, { "epoch": 0.7463087248322148, "grad_norm": 0.22406931221485138, "learning_rate": 2.3348413563600325e-05, "loss": 0.6769, "step": 139 }, { "epoch": 0.7516778523489933, "grad_norm": 0.21790383756160736, "learning_rate": 2.2652592093878666e-05, "loss": 0.6533, "step": 140 }, { "epoch": 0.7570469798657719, "grad_norm": 0.19225141406059265, "learning_rate": 2.196424713241637e-05, "loss": 0.675, "step": 141 }, { "epoch": 0.7624161073825504, "grad_norm": 0.19361992180347443, "learning_rate": 2.128356686585282e-05, "loss": 0.6946, "step": 142 }, { "epoch": 0.7677852348993288, "grad_norm": 0.18840467929840088, "learning_rate": 2.061073738537635e-05, "loss": 0.6627, "step": 143 }, { "epoch": 0.7731543624161074, "grad_norm": 0.18914321064949036, "learning_rate": 1.9945942635848748e-05, "loss": 0.6741, "step": 144 }, { "epoch": 0.7785234899328859, "grad_norm": 0.18746811151504517, "learning_rate": 1.928936436551661e-05, "loss": 0.6667, "step": 145 }, { "epoch": 0.7838926174496644, "grad_norm": 0.19153329730033875, "learning_rate": 1.8641182076323148e-05, "loss": 0.6435, "step": 146 }, { "epoch": 0.789261744966443, "grad_norm": 0.19970956444740295, "learning_rate": 1.800157297483417e-05, "loss": 0.6672, "step": 147 }, { "epoch": 0.7946308724832215, "grad_norm": 0.20399050414562225, "learning_rate": 1.7370711923791567e-05, "loss": 0.6903, "step": 148 }, { "epoch": 0.8, "grad_norm": 0.20653124153614044, "learning_rate": 1.6748771394307585e-05, "loss": 0.6271, "step": 149 }, { "epoch": 0.8053691275167785, "grad_norm": 0.2060387283563614, "learning_rate": 1.6135921418712956e-05, "loss": 0.6813, "step": 150 }, { "epoch": 0.8053691275167785, "eval_loss": 0.6319752335548401, "eval_runtime": 23.9924, "eval_samples_per_second": 13.087, "eval_steps_per_second": 3.293, "step": 150 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0622884217028608e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }