| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5153532316942238, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005726147018824708, | |
| "grad_norm": 7.761023998260498, | |
| "learning_rate": 0.0, | |
| "loss": 6.0592, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0011452294037649416, | |
| "grad_norm": 7.8541951179504395, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 6.0156, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0022904588075298832, | |
| "grad_norm": 7.347611904144287, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 6.0103, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003435688211294825, | |
| "grad_norm": 5.382428169250488, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 5.9221, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0045809176150597665, | |
| "grad_norm": 5.063406467437744, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 6.0365, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005726147018824708, | |
| "grad_norm": 9.779157638549805, | |
| "learning_rate": 5.142857142857143e-06, | |
| "loss": 6.0336, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00687137642258965, | |
| "grad_norm": 7.555446147918701, | |
| "learning_rate": 6.285714285714287e-06, | |
| "loss": 6.0328, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008016605826354592, | |
| "grad_norm": 6.790043354034424, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 5.7848, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.009161835230119533, | |
| "grad_norm": 4.4132208824157715, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 5.8207, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.010307064633884476, | |
| "grad_norm": 4.064995765686035, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 5.6497, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.011452294037649417, | |
| "grad_norm": 3.357184410095215, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 5.7758, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012597523441414358, | |
| "grad_norm": 2.742230176925659, | |
| "learning_rate": 1.2e-05, | |
| "loss": 5.6173, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0137427528451793, | |
| "grad_norm": 2.491459369659424, | |
| "learning_rate": 1.3142857142857143e-05, | |
| "loss": 5.6681, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.014887982248944241, | |
| "grad_norm": 2.7569029331207275, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 5.6393, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.016033211652709184, | |
| "grad_norm": 2.208378791809082, | |
| "learning_rate": 1.5428571428571428e-05, | |
| "loss": 5.5768, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.017178441056474127, | |
| "grad_norm": 3.2770133018493652, | |
| "learning_rate": 1.657142857142857e-05, | |
| "loss": 5.484, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018323670460239066, | |
| "grad_norm": 3.177299976348877, | |
| "learning_rate": 1.7714285714285713e-05, | |
| "loss": 5.528, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01946889986400401, | |
| "grad_norm": 2.1981537342071533, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 5.6327, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02061412926776895, | |
| "grad_norm": 3.265881061553955, | |
| "learning_rate": 2e-05, | |
| "loss": 5.6288, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02175935867153389, | |
| "grad_norm": 3.6059298515319824, | |
| "learning_rate": 2.1142857142857144e-05, | |
| "loss": 5.4789, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022904588075298833, | |
| "grad_norm": 2.4080026149749756, | |
| "learning_rate": 2.2285714285714287e-05, | |
| "loss": 5.4046, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024049817479063776, | |
| "grad_norm": 2.142902135848999, | |
| "learning_rate": 2.342857142857143e-05, | |
| "loss": 5.4738, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.025195046882828715, | |
| "grad_norm": 2.4021224975585938, | |
| "learning_rate": 2.4571428571428572e-05, | |
| "loss": 5.4649, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.026340276286593658, | |
| "grad_norm": 2.172009229660034, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 5.4302, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0274855056903586, | |
| "grad_norm": 2.9737730026245117, | |
| "learning_rate": 2.6857142857142857e-05, | |
| "loss": 5.3045, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.028630735094123543, | |
| "grad_norm": 3.0378615856170654, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 5.2185, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029775964497888482, | |
| "grad_norm": 3.4448676109313965, | |
| "learning_rate": 2.9142857142857146e-05, | |
| "loss": 5.1838, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.030921193901653425, | |
| "grad_norm": 2.469245672225952, | |
| "learning_rate": 3.0285714285714288e-05, | |
| "loss": 5.1637, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03206642330541837, | |
| "grad_norm": 3.58486008644104, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 5.2063, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03321165270918331, | |
| "grad_norm": 3.0815446376800537, | |
| "learning_rate": 3.257142857142857e-05, | |
| "loss": 5.2317, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.03435688211294825, | |
| "grad_norm": 3.6842119693756104, | |
| "learning_rate": 3.3714285714285716e-05, | |
| "loss": 5.2695, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03550211151671319, | |
| "grad_norm": 2.9440791606903076, | |
| "learning_rate": 3.485714285714286e-05, | |
| "loss": 5.2686, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.03664734092047813, | |
| "grad_norm": 3.9632568359375, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.1262, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.037792570324243074, | |
| "grad_norm": 4.045065402984619, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 5.1546, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03893779972800802, | |
| "grad_norm": 3.5707085132598877, | |
| "learning_rate": 3.8285714285714286e-05, | |
| "loss": 5.0036, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.04008302913177296, | |
| "grad_norm": 3.014404535293579, | |
| "learning_rate": 3.942857142857143e-05, | |
| "loss": 5.026, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0412282585355379, | |
| "grad_norm": 2.708796977996826, | |
| "learning_rate": 4.057142857142857e-05, | |
| "loss": 4.9442, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04237348793930284, | |
| "grad_norm": 2.5384011268615723, | |
| "learning_rate": 4.1714285714285714e-05, | |
| "loss": 5.0223, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04351871734306778, | |
| "grad_norm": 3.006281852722168, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 4.9827, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.044663946746832724, | |
| "grad_norm": 2.5772130489349365, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 4.9675, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.045809176150597666, | |
| "grad_norm": 3.456017255783081, | |
| "learning_rate": 4.514285714285714e-05, | |
| "loss": 5.0341, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04695440555436261, | |
| "grad_norm": 3.3163113594055176, | |
| "learning_rate": 4.628571428571429e-05, | |
| "loss": 4.9867, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04809963495812755, | |
| "grad_norm": 3.7568469047546387, | |
| "learning_rate": 4.742857142857143e-05, | |
| "loss": 4.8652, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.049244864361892494, | |
| "grad_norm": 4.19318151473999, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 5.0602, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05039009376565743, | |
| "grad_norm": 5.1034064292907715, | |
| "learning_rate": 4.971428571428572e-05, | |
| "loss": 4.9757, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.05153532316942237, | |
| "grad_norm": 4.0827484130859375, | |
| "learning_rate": 5.085714285714286e-05, | |
| "loss": 4.8486, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.052680552573187316, | |
| "grad_norm": 4.6189446449279785, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 4.9595, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.05382578197695226, | |
| "grad_norm": 3.988513469696045, | |
| "learning_rate": 5.314285714285715e-05, | |
| "loss": 4.9035, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0549710113807172, | |
| "grad_norm": 3.857276678085327, | |
| "learning_rate": 5.428571428571428e-05, | |
| "loss": 4.8277, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.056116240784482144, | |
| "grad_norm": 3.5372354984283447, | |
| "learning_rate": 5.542857142857143e-05, | |
| "loss": 4.7718, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.057261470188247086, | |
| "grad_norm": 3.3853676319122314, | |
| "learning_rate": 5.6571428571428574e-05, | |
| "loss": 4.8098, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05840669959201202, | |
| "grad_norm": 2.1142077445983887, | |
| "learning_rate": 5.771428571428572e-05, | |
| "loss": 4.7975, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.059551928995776965, | |
| "grad_norm": 3.2275538444519043, | |
| "learning_rate": 5.885714285714285e-05, | |
| "loss": 4.8509, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06069715839954191, | |
| "grad_norm": 3.5413126945495605, | |
| "learning_rate": 6e-05, | |
| "loss": 4.6069, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06184238780330685, | |
| "grad_norm": 2.755648374557495, | |
| "learning_rate": 6.114285714285714e-05, | |
| "loss": 4.6951, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06298761720707179, | |
| "grad_norm": 2.980039596557617, | |
| "learning_rate": 6.22857142857143e-05, | |
| "loss": 4.7012, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06413284661083674, | |
| "grad_norm": 4.890020370483398, | |
| "learning_rate": 6.342857142857143e-05, | |
| "loss": 4.8008, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06527807601460167, | |
| "grad_norm": 4.35846471786499, | |
| "learning_rate": 6.457142857142856e-05, | |
| "loss": 4.8587, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.06642330541836662, | |
| "grad_norm": 3.6171813011169434, | |
| "learning_rate": 6.571428571428571e-05, | |
| "loss": 4.7473, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06756853482213156, | |
| "grad_norm": 2.4927010536193848, | |
| "learning_rate": 6.685714285714286e-05, | |
| "loss": 4.7113, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0687137642258965, | |
| "grad_norm": 3.3327009677886963, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 4.6105, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06985899362966144, | |
| "grad_norm": 3.1123206615448, | |
| "learning_rate": 6.914285714285715e-05, | |
| "loss": 4.5968, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.07100422303342638, | |
| "grad_norm": 2.6985421180725098, | |
| "learning_rate": 7.028571428571428e-05, | |
| "loss": 4.6323, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07214945243719133, | |
| "grad_norm": 2.058084011077881, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 4.5721, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07329468184095626, | |
| "grad_norm": 2.144658327102661, | |
| "learning_rate": 7.257142857142858e-05, | |
| "loss": 4.6125, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07443991124472121, | |
| "grad_norm": 2.477219820022583, | |
| "learning_rate": 7.371428571428572e-05, | |
| "loss": 4.4727, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07558514064848615, | |
| "grad_norm": 3.8517298698425293, | |
| "learning_rate": 7.485714285714285e-05, | |
| "loss": 4.5696, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.0767303700522511, | |
| "grad_norm": 3.0253565311431885, | |
| "learning_rate": 7.6e-05, | |
| "loss": 4.4838, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07787559945601603, | |
| "grad_norm": 3.397569179534912, | |
| "learning_rate": 7.714285714285715e-05, | |
| "loss": 4.6431, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.07902082885978097, | |
| "grad_norm": 2.435197114944458, | |
| "learning_rate": 7.828571428571429e-05, | |
| "loss": 4.4681, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08016605826354592, | |
| "grad_norm": 2.6476476192474365, | |
| "learning_rate": 7.942857142857143e-05, | |
| "loss": 4.4462, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08131128766731086, | |
| "grad_norm": 2.1929690837860107, | |
| "learning_rate": 8.057142857142857e-05, | |
| "loss": 4.5136, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.0824565170710758, | |
| "grad_norm": 2.4533395767211914, | |
| "learning_rate": 8.171428571428572e-05, | |
| "loss": 4.5572, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.08360174647484074, | |
| "grad_norm": 2.601806879043579, | |
| "learning_rate": 8.285714285714287e-05, | |
| "loss": 4.4121, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08474697587860568, | |
| "grad_norm": 3.233973741531372, | |
| "learning_rate": 8.4e-05, | |
| "loss": 4.4599, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08589220528237063, | |
| "grad_norm": 2.6353538036346436, | |
| "learning_rate": 8.514285714285714e-05, | |
| "loss": 4.4533, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08703743468613556, | |
| "grad_norm": 2.8465511798858643, | |
| "learning_rate": 8.62857142857143e-05, | |
| "loss": 4.5246, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.08818266408990051, | |
| "grad_norm": 2.8642711639404297, | |
| "learning_rate": 8.742857142857144e-05, | |
| "loss": 4.4659, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.08932789349366545, | |
| "grad_norm": 2.793112277984619, | |
| "learning_rate": 8.857142857142857e-05, | |
| "loss": 4.5107, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0904731228974304, | |
| "grad_norm": 3.43472957611084, | |
| "learning_rate": 8.971428571428571e-05, | |
| "loss": 4.4079, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09161835230119533, | |
| "grad_norm": 2.9260294437408447, | |
| "learning_rate": 9.085714285714286e-05, | |
| "loss": 4.4047, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09276358170496027, | |
| "grad_norm": 2.6336724758148193, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 4.4777, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09390881110872522, | |
| "grad_norm": 2.8348231315612793, | |
| "learning_rate": 9.314285714285715e-05, | |
| "loss": 4.3445, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.09505404051249015, | |
| "grad_norm": 4.271595478057861, | |
| "learning_rate": 9.428571428571429e-05, | |
| "loss": 4.4234, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.0961992699162551, | |
| "grad_norm": 3.4789109230041504, | |
| "learning_rate": 9.542857142857143e-05, | |
| "loss": 4.2872, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09734449932002004, | |
| "grad_norm": 2.57273530960083, | |
| "learning_rate": 9.657142857142858e-05, | |
| "loss": 4.4177, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09848972872378499, | |
| "grad_norm": 2.185086250305176, | |
| "learning_rate": 9.771428571428572e-05, | |
| "loss": 4.3568, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.09963495812754992, | |
| "grad_norm": 2.771744966506958, | |
| "learning_rate": 9.885714285714286e-05, | |
| "loss": 4.3392, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.10078018753131486, | |
| "grad_norm": 1.950353741645813, | |
| "learning_rate": 0.0001, | |
| "loss": 4.1931, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10192541693507981, | |
| "grad_norm": 2.4709694385528564, | |
| "learning_rate": 9.999991040472416e-05, | |
| "loss": 4.2936, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10307064633884475, | |
| "grad_norm": 2.140997886657715, | |
| "learning_rate": 9.999964161921776e-05, | |
| "loss": 4.1653, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1042158757426097, | |
| "grad_norm": 2.491321563720703, | |
| "learning_rate": 9.999919364444403e-05, | |
| "loss": 4.3202, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.10536110514637463, | |
| "grad_norm": 2.5410189628601074, | |
| "learning_rate": 9.999856648200845e-05, | |
| "loss": 4.2657, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10650633455013958, | |
| "grad_norm": 2.1820590496063232, | |
| "learning_rate": 9.999776013415866e-05, | |
| "loss": 4.2282, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10765156395390452, | |
| "grad_norm": 1.7251808643341064, | |
| "learning_rate": 9.999677460378444e-05, | |
| "loss": 4.3421, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.10879679335766945, | |
| "grad_norm": 2.002145290374756, | |
| "learning_rate": 9.999560989441779e-05, | |
| "loss": 4.1361, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1099420227614344, | |
| "grad_norm": 1.9663431644439697, | |
| "learning_rate": 9.999426601023274e-05, | |
| "loss": 4.201, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.11108725216519934, | |
| "grad_norm": 2.1406776905059814, | |
| "learning_rate": 9.999274295604558e-05, | |
| "loss": 4.1086, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11223248156896429, | |
| "grad_norm": 3.3888607025146484, | |
| "learning_rate": 9.999104073731458e-05, | |
| "loss": 4.2723, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.11337771097272922, | |
| "grad_norm": 2.371840715408325, | |
| "learning_rate": 9.998915936014024e-05, | |
| "loss": 4.1893, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11452294037649417, | |
| "grad_norm": 2.0502302646636963, | |
| "learning_rate": 9.998709883126502e-05, | |
| "loss": 4.1395, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11566816978025911, | |
| "grad_norm": 1.6674678325653076, | |
| "learning_rate": 9.998485915807347e-05, | |
| "loss": 4.071, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.11681339918402404, | |
| "grad_norm": 1.7829004526138306, | |
| "learning_rate": 9.998244034859219e-05, | |
| "loss": 4.1107, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.117958628587789, | |
| "grad_norm": 1.763493299484253, | |
| "learning_rate": 9.997984241148967e-05, | |
| "loss": 4.1142, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.11910385799155393, | |
| "grad_norm": 2.069258213043213, | |
| "learning_rate": 9.997706535607649e-05, | |
| "loss": 4.047, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.12024908739531888, | |
| "grad_norm": 2.4262139797210693, | |
| "learning_rate": 9.997410919230505e-05, | |
| "loss": 4.0396, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12139431679908382, | |
| "grad_norm": 1.820494532585144, | |
| "learning_rate": 9.997097393076971e-05, | |
| "loss": 4.1548, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12253954620284876, | |
| "grad_norm": 2.1332643032073975, | |
| "learning_rate": 9.996765958270664e-05, | |
| "loss": 4.1384, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1236847756066137, | |
| "grad_norm": 2.1329920291900635, | |
| "learning_rate": 9.996416615999384e-05, | |
| "loss": 4.0315, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.12483000501037864, | |
| "grad_norm": 2.29955792427063, | |
| "learning_rate": 9.996049367515108e-05, | |
| "loss": 4.0963, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.12597523441414357, | |
| "grad_norm": 2.225827693939209, | |
| "learning_rate": 9.995664214133983e-05, | |
| "loss": 4.1247, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12712046381790854, | |
| "grad_norm": 1.794838786125183, | |
| "learning_rate": 9.99526115723633e-05, | |
| "loss": 4.0449, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.12826569322167347, | |
| "grad_norm": 1.7548491954803467, | |
| "learning_rate": 9.994840198266626e-05, | |
| "loss": 3.927, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1294109226254384, | |
| "grad_norm": 1.487001895904541, | |
| "learning_rate": 9.994401338733508e-05, | |
| "loss": 3.9714, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.13055615202920334, | |
| "grad_norm": 1.9811242818832397, | |
| "learning_rate": 9.993944580209768e-05, | |
| "loss": 4.0094, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13170138143296828, | |
| "grad_norm": 1.4257248640060425, | |
| "learning_rate": 9.99346992433234e-05, | |
| "loss": 4.0213, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13284661083673324, | |
| "grad_norm": 1.545812726020813, | |
| "learning_rate": 9.992977372802302e-05, | |
| "loss": 4.0076, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.13399184024049818, | |
| "grad_norm": 1.8193179368972778, | |
| "learning_rate": 9.992466927384865e-05, | |
| "loss": 4.0536, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1351370696442631, | |
| "grad_norm": 2.329951763153076, | |
| "learning_rate": 9.991938589909369e-05, | |
| "loss": 3.9284, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13628229904802805, | |
| "grad_norm": 1.928336501121521, | |
| "learning_rate": 9.991392362269276e-05, | |
| "loss": 3.9462, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.137427528451793, | |
| "grad_norm": 1.4073456525802612, | |
| "learning_rate": 9.990828246422164e-05, | |
| "loss": 3.9525, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13857275785555795, | |
| "grad_norm": 1.6663973331451416, | |
| "learning_rate": 9.990246244389713e-05, | |
| "loss": 3.9685, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.13971798725932288, | |
| "grad_norm": 1.8091737031936646, | |
| "learning_rate": 9.989646358257715e-05, | |
| "loss": 3.9284, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.14086321666308782, | |
| "grad_norm": 1.5511283874511719, | |
| "learning_rate": 9.989028590176044e-05, | |
| "loss": 3.9289, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14200844606685276, | |
| "grad_norm": 1.5394625663757324, | |
| "learning_rate": 9.988392942358664e-05, | |
| "loss": 3.9849, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.14315367547061772, | |
| "grad_norm": 1.680882453918457, | |
| "learning_rate": 9.98773941708362e-05, | |
| "loss": 3.9452, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14429890487438266, | |
| "grad_norm": 1.6341670751571655, | |
| "learning_rate": 9.98706801669302e-05, | |
| "loss": 3.8317, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1454441342781476, | |
| "grad_norm": 1.9933757781982422, | |
| "learning_rate": 9.986378743593036e-05, | |
| "loss": 3.9665, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14658936368191253, | |
| "grad_norm": 2.2253994941711426, | |
| "learning_rate": 9.985671600253894e-05, | |
| "loss": 3.9239, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.14773459308567746, | |
| "grad_norm": 2.2543365955352783, | |
| "learning_rate": 9.984946589209862e-05, | |
| "loss": 3.8639, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.14887982248944243, | |
| "grad_norm": 1.8106629848480225, | |
| "learning_rate": 9.984203713059241e-05, | |
| "loss": 3.9178, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15002505189320736, | |
| "grad_norm": 1.638542652130127, | |
| "learning_rate": 9.983442974464362e-05, | |
| "loss": 3.9169, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1511702812969723, | |
| "grad_norm": 1.3521384000778198, | |
| "learning_rate": 9.982664376151564e-05, | |
| "loss": 3.8682, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.15231551070073723, | |
| "grad_norm": 1.6458699703216553, | |
| "learning_rate": 9.981867920911201e-05, | |
| "loss": 3.9566, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.1534607401045022, | |
| "grad_norm": 1.7851066589355469, | |
| "learning_rate": 9.981053611597615e-05, | |
| "loss": 3.9085, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15460596950826713, | |
| "grad_norm": 1.6740517616271973, | |
| "learning_rate": 9.980221451129137e-05, | |
| "loss": 3.8899, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15575119891203207, | |
| "grad_norm": 1.117129921913147, | |
| "learning_rate": 9.979371442488073e-05, | |
| "loss": 3.7544, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.156896428315797, | |
| "grad_norm": 1.5676058530807495, | |
| "learning_rate": 9.978503588720694e-05, | |
| "loss": 3.7753, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.15804165771956194, | |
| "grad_norm": 1.6609163284301758, | |
| "learning_rate": 9.977617892937223e-05, | |
| "loss": 3.8463, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1591868871233269, | |
| "grad_norm": 1.7229987382888794, | |
| "learning_rate": 9.976714358311828e-05, | |
| "loss": 3.8446, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.16033211652709184, | |
| "grad_norm": 1.6770962476730347, | |
| "learning_rate": 9.975792988082603e-05, | |
| "loss": 3.8684, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16147734593085677, | |
| "grad_norm": 1.215281367301941, | |
| "learning_rate": 9.974853785551568e-05, | |
| "loss": 3.7788, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1626225753346217, | |
| "grad_norm": 1.208257794380188, | |
| "learning_rate": 9.973896754084646e-05, | |
| "loss": 3.8338, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16376780473838665, | |
| "grad_norm": 1.4068255424499512, | |
| "learning_rate": 9.972921897111658e-05, | |
| "loss": 3.8583, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1649130341421516, | |
| "grad_norm": 1.4898021221160889, | |
| "learning_rate": 9.971929218126306e-05, | |
| "loss": 3.8051, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16605826354591655, | |
| "grad_norm": 1.6303211450576782, | |
| "learning_rate": 9.970918720686164e-05, | |
| "loss": 3.8598, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16720349294968148, | |
| "grad_norm": 1.6599496603012085, | |
| "learning_rate": 9.969890408412665e-05, | |
| "loss": 3.7214, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.16834872235344642, | |
| "grad_norm": 1.1958950757980347, | |
| "learning_rate": 9.968844284991086e-05, | |
| "loss": 3.7042, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.16949395175721135, | |
| "grad_norm": 1.3099420070648193, | |
| "learning_rate": 9.967780354170533e-05, | |
| "loss": 3.7405, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.17063918116097632, | |
| "grad_norm": 1.5054072141647339, | |
| "learning_rate": 9.966698619763936e-05, | |
| "loss": 3.7827, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.17178441056474125, | |
| "grad_norm": 1.444757103919983, | |
| "learning_rate": 9.965599085648025e-05, | |
| "loss": 3.7361, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1729296399685062, | |
| "grad_norm": 0.9423370361328125, | |
| "learning_rate": 9.964481755763322e-05, | |
| "loss": 3.7063, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.17407486937227112, | |
| "grad_norm": 1.044169306755066, | |
| "learning_rate": 9.963346634114128e-05, | |
| "loss": 3.7999, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1752200987760361, | |
| "grad_norm": 1.578296184539795, | |
| "learning_rate": 9.962193724768503e-05, | |
| "loss": 3.7448, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17636532817980102, | |
| "grad_norm": 1.4953491687774658, | |
| "learning_rate": 9.961023031858258e-05, | |
| "loss": 3.7625, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.17751055758356596, | |
| "grad_norm": 1.295817494392395, | |
| "learning_rate": 9.959834559578934e-05, | |
| "loss": 3.7042, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1786557869873309, | |
| "grad_norm": 1.4001609086990356, | |
| "learning_rate": 9.95862831218979e-05, | |
| "loss": 3.7272, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.17980101639109583, | |
| "grad_norm": 1.8881722688674927, | |
| "learning_rate": 9.95740429401379e-05, | |
| "loss": 3.6904, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.1809462457948608, | |
| "grad_norm": 1.919791340827942, | |
| "learning_rate": 9.956162509437584e-05, | |
| "loss": 3.7071, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.18209147519862573, | |
| "grad_norm": 1.758253574371338, | |
| "learning_rate": 9.954902962911494e-05, | |
| "loss": 3.7906, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18323670460239067, | |
| "grad_norm": 1.480323314666748, | |
| "learning_rate": 9.953625658949494e-05, | |
| "loss": 3.7697, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1843819340061556, | |
| "grad_norm": 1.5573948621749878, | |
| "learning_rate": 9.952330602129202e-05, | |
| "loss": 3.752, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.18552716340992054, | |
| "grad_norm": 1.3204878568649292, | |
| "learning_rate": 9.951017797091858e-05, | |
| "loss": 3.6479, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1866723928136855, | |
| "grad_norm": 1.5514147281646729, | |
| "learning_rate": 9.949687248542303e-05, | |
| "loss": 3.7199, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.18781762221745044, | |
| "grad_norm": 1.2910770177841187, | |
| "learning_rate": 9.948338961248977e-05, | |
| "loss": 3.7427, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.18896285162121537, | |
| "grad_norm": 1.1663178205490112, | |
| "learning_rate": 9.946972940043882e-05, | |
| "loss": 3.6616, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1901080810249803, | |
| "grad_norm": 1.3439650535583496, | |
| "learning_rate": 9.945589189822584e-05, | |
| "loss": 3.7385, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.19125331042874527, | |
| "grad_norm": 1.1256877183914185, | |
| "learning_rate": 9.94418771554418e-05, | |
| "loss": 3.6056, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1923985398325102, | |
| "grad_norm": 1.1813896894454956, | |
| "learning_rate": 9.942768522231289e-05, | |
| "loss": 3.5544, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19354376923627514, | |
| "grad_norm": 1.2541157007217407, | |
| "learning_rate": 9.941331614970031e-05, | |
| "loss": 3.6401, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.19468899864004008, | |
| "grad_norm": 1.237069010734558, | |
| "learning_rate": 9.939876998910012e-05, | |
| "loss": 3.7564, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19583422804380501, | |
| "grad_norm": 1.1157530546188354, | |
| "learning_rate": 9.938404679264301e-05, | |
| "loss": 3.6164, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.19697945744756998, | |
| "grad_norm": 1.149465560913086, | |
| "learning_rate": 9.936914661309412e-05, | |
| "loss": 3.6968, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1981246868513349, | |
| "grad_norm": 0.9530683755874634, | |
| "learning_rate": 9.93540695038529e-05, | |
| "loss": 3.6194, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.19926991625509985, | |
| "grad_norm": 1.1686296463012695, | |
| "learning_rate": 9.933881551895281e-05, | |
| "loss": 3.7604, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.20041514565886479, | |
| "grad_norm": 1.2699095010757446, | |
| "learning_rate": 9.93233847130613e-05, | |
| "loss": 3.6371, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20156037506262972, | |
| "grad_norm": 1.1345208883285522, | |
| "learning_rate": 9.930777714147945e-05, | |
| "loss": 3.6146, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.20270560446639468, | |
| "grad_norm": 1.3319895267486572, | |
| "learning_rate": 9.929199286014185e-05, | |
| "loss": 3.6443, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.20385083387015962, | |
| "grad_norm": 1.6053088903427124, | |
| "learning_rate": 9.927603192561637e-05, | |
| "loss": 3.6277, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20499606327392456, | |
| "grad_norm": 1.2149386405944824, | |
| "learning_rate": 9.925989439510398e-05, | |
| "loss": 3.5555, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2061412926776895, | |
| "grad_norm": 1.0859287977218628, | |
| "learning_rate": 9.924358032643855e-05, | |
| "loss": 3.6253, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20728652208145446, | |
| "grad_norm": 0.9613994359970093, | |
| "learning_rate": 9.922708977808663e-05, | |
| "loss": 3.5826, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2084317514852194, | |
| "grad_norm": 1.0509222745895386, | |
| "learning_rate": 9.921042280914721e-05, | |
| "loss": 3.6263, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.20957698088898433, | |
| "grad_norm": 1.3777049779891968, | |
| "learning_rate": 9.919357947935156e-05, | |
| "loss": 3.6187, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.21072221029274926, | |
| "grad_norm": 1.3364644050598145, | |
| "learning_rate": 9.9176559849063e-05, | |
| "loss": 3.5946, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2118674396965142, | |
| "grad_norm": 1.4562104940414429, | |
| "learning_rate": 9.915936397927665e-05, | |
| "loss": 3.6099, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21301266910027916, | |
| "grad_norm": 1.066383719444275, | |
| "learning_rate": 9.91419919316193e-05, | |
| "loss": 3.5395, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.2141578985040441, | |
| "grad_norm": 1.6498733758926392, | |
| "learning_rate": 9.912444376834903e-05, | |
| "loss": 3.6083, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.21530312790780903, | |
| "grad_norm": 0.9828553795814514, | |
| "learning_rate": 9.910671955235518e-05, | |
| "loss": 3.5409, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.21644835731157397, | |
| "grad_norm": 1.178269624710083, | |
| "learning_rate": 9.908881934715798e-05, | |
| "loss": 3.6018, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.2175935867153389, | |
| "grad_norm": 1.3328818082809448, | |
| "learning_rate": 9.907074321690838e-05, | |
| "loss": 3.5718, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21873881611910387, | |
| "grad_norm": 1.1077896356582642, | |
| "learning_rate": 9.905249122638783e-05, | |
| "loss": 3.581, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.2198840455228688, | |
| "grad_norm": 1.220638394355774, | |
| "learning_rate": 9.903406344100798e-05, | |
| "loss": 3.5813, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.22102927492663374, | |
| "grad_norm": 1.5574766397476196, | |
| "learning_rate": 9.901545992681057e-05, | |
| "loss": 3.5785, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.22217450433039868, | |
| "grad_norm": 1.013902187347412, | |
| "learning_rate": 9.899668075046706e-05, | |
| "loss": 3.6156, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2233197337341636, | |
| "grad_norm": 1.197936773300171, | |
| "learning_rate": 9.897772597927848e-05, | |
| "loss": 3.5428, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22446496313792857, | |
| "grad_norm": 0.9838180541992188, | |
| "learning_rate": 9.895859568117512e-05, | |
| "loss": 3.534, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2256101925416935, | |
| "grad_norm": 1.0316840410232544, | |
| "learning_rate": 9.893928992471639e-05, | |
| "loss": 3.5691, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.22675542194545845, | |
| "grad_norm": 0.9378739595413208, | |
| "learning_rate": 9.891980877909045e-05, | |
| "loss": 3.5368, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.22790065134922338, | |
| "grad_norm": 1.4947346448898315, | |
| "learning_rate": 9.890015231411404e-05, | |
| "loss": 3.5709, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.22904588075298835, | |
| "grad_norm": 0.9118148684501648, | |
| "learning_rate": 9.888032060023225e-05, | |
| "loss": 3.527, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23019111015675328, | |
| "grad_norm": 1.2407753467559814, | |
| "learning_rate": 9.886031370851816e-05, | |
| "loss": 3.5301, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.23133633956051822, | |
| "grad_norm": 1.7163093090057373, | |
| "learning_rate": 9.88401317106727e-05, | |
| "loss": 3.5828, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23248156896428315, | |
| "grad_norm": 1.0757009983062744, | |
| "learning_rate": 9.881977467902434e-05, | |
| "loss": 3.4831, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2336267983680481, | |
| "grad_norm": 0.9473862648010254, | |
| "learning_rate": 9.879924268652885e-05, | |
| "loss": 3.5196, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23477202777181305, | |
| "grad_norm": 1.199771761894226, | |
| "learning_rate": 9.877853580676897e-05, | |
| "loss": 3.574, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.235917257175578, | |
| "grad_norm": 0.9006698131561279, | |
| "learning_rate": 9.875765411395428e-05, | |
| "loss": 3.5348, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.23706248657934292, | |
| "grad_norm": 1.1242282390594482, | |
| "learning_rate": 9.873659768292081e-05, | |
| "loss": 3.5249, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.23820771598310786, | |
| "grad_norm": 1.0675747394561768, | |
| "learning_rate": 9.871536658913082e-05, | |
| "loss": 3.5086, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.2393529453868728, | |
| "grad_norm": 0.8544116616249084, | |
| "learning_rate": 9.869396090867255e-05, | |
| "loss": 3.546, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.24049817479063776, | |
| "grad_norm": 1.3136742115020752, | |
| "learning_rate": 9.867238071825992e-05, | |
| "loss": 3.4937, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2416434041944027, | |
| "grad_norm": 1.3740772008895874, | |
| "learning_rate": 9.865062609523223e-05, | |
| "loss": 3.4303, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.24278863359816763, | |
| "grad_norm": 1.342213749885559, | |
| "learning_rate": 9.862869711755397e-05, | |
| "loss": 3.4982, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24393386300193257, | |
| "grad_norm": 1.0677942037582397, | |
| "learning_rate": 9.860659386381443e-05, | |
| "loss": 3.4288, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.24507909240569753, | |
| "grad_norm": 0.9615838527679443, | |
| "learning_rate": 9.858431641322749e-05, | |
| "loss": 3.4787, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.24622432180946247, | |
| "grad_norm": 1.0572890043258667, | |
| "learning_rate": 9.856186484563134e-05, | |
| "loss": 3.5314, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2473695512132274, | |
| "grad_norm": 1.158275842666626, | |
| "learning_rate": 9.853923924148815e-05, | |
| "loss": 3.5504, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.24851478061699234, | |
| "grad_norm": 1.171581745147705, | |
| "learning_rate": 9.851643968188383e-05, | |
| "loss": 3.5478, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.24966001002075727, | |
| "grad_norm": 1.0333714485168457, | |
| "learning_rate": 9.849346624852764e-05, | |
| "loss": 3.5497, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2508052394245222, | |
| "grad_norm": 0.9459155797958374, | |
| "learning_rate": 9.847031902375207e-05, | |
| "loss": 3.5074, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.25195046882828714, | |
| "grad_norm": 1.0424790382385254, | |
| "learning_rate": 9.84469980905124e-05, | |
| "loss": 3.4961, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.25309569823205214, | |
| "grad_norm": 1.0463571548461914, | |
| "learning_rate": 9.842350353238642e-05, | |
| "loss": 3.4405, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.25424092763581707, | |
| "grad_norm": 1.000319242477417, | |
| "learning_rate": 9.839983543357421e-05, | |
| "loss": 3.4595, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.255386157039582, | |
| "grad_norm": 1.2526150941848755, | |
| "learning_rate": 9.837599387889773e-05, | |
| "loss": 3.5012, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.25653138644334694, | |
| "grad_norm": 1.3148843050003052, | |
| "learning_rate": 9.835197895380065e-05, | |
| "loss": 3.4767, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2576766158471119, | |
| "grad_norm": 1.3939634561538696, | |
| "learning_rate": 9.83277907443479e-05, | |
| "loss": 3.3783, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2588218452508768, | |
| "grad_norm": 1.0367929935455322, | |
| "learning_rate": 9.830342933722545e-05, | |
| "loss": 3.4289, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.25996707465464175, | |
| "grad_norm": 0.9439120888710022, | |
| "learning_rate": 9.827889481974e-05, | |
| "loss": 3.4728, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2611123040584067, | |
| "grad_norm": 1.2146074771881104, | |
| "learning_rate": 9.82541872798186e-05, | |
| "loss": 3.4257, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2622575334621716, | |
| "grad_norm": 1.0530729293823242, | |
| "learning_rate": 9.822930680600841e-05, | |
| "loss": 3.4681, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.26340276286593656, | |
| "grad_norm": 1.1026678085327148, | |
| "learning_rate": 9.820425348747637e-05, | |
| "loss": 3.4298, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.26454799226970155, | |
| "grad_norm": 1.2520779371261597, | |
| "learning_rate": 9.817902741400879e-05, | |
| "loss": 3.4191, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.2656932216734665, | |
| "grad_norm": 1.1041593551635742, | |
| "learning_rate": 9.815362867601121e-05, | |
| "loss": 3.466, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.2668384510772314, | |
| "grad_norm": 0.881693422794342, | |
| "learning_rate": 9.812805736450786e-05, | |
| "loss": 3.4929, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.26798368048099636, | |
| "grad_norm": 1.3125033378601074, | |
| "learning_rate": 9.810231357114152e-05, | |
| "loss": 3.4592, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.2691289098847613, | |
| "grad_norm": 1.2968268394470215, | |
| "learning_rate": 9.807639738817307e-05, | |
| "loss": 3.4851, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2702741392885262, | |
| "grad_norm": 0.9855544567108154, | |
| "learning_rate": 9.805030890848119e-05, | |
| "loss": 3.4487, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.27141936869229116, | |
| "grad_norm": 1.3063323497772217, | |
| "learning_rate": 9.802404822556209e-05, | |
| "loss": 3.4961, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.2725645980960561, | |
| "grad_norm": 1.0567957162857056, | |
| "learning_rate": 9.79976154335291e-05, | |
| "loss": 3.3975, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.27370982749982103, | |
| "grad_norm": 0.9473979473114014, | |
| "learning_rate": 9.797101062711231e-05, | |
| "loss": 3.4573, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.274855056903586, | |
| "grad_norm": 1.2931294441223145, | |
| "learning_rate": 9.794423390165837e-05, | |
| "loss": 3.3732, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.27600028630735096, | |
| "grad_norm": 1.233302116394043, | |
| "learning_rate": 9.791728535312998e-05, | |
| "loss": 3.419, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2771455157111159, | |
| "grad_norm": 0.9638918042182922, | |
| "learning_rate": 9.789016507810564e-05, | |
| "loss": 3.4119, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.27829074511488083, | |
| "grad_norm": 1.105643391609192, | |
| "learning_rate": 9.786287317377929e-05, | |
| "loss": 3.3909, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.27943597451864577, | |
| "grad_norm": 0.9666796922683716, | |
| "learning_rate": 9.783540973795998e-05, | |
| "loss": 3.4194, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.2805812039224107, | |
| "grad_norm": 1.3533586263656616, | |
| "learning_rate": 9.780777486907146e-05, | |
| "loss": 3.3789, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.28172643332617564, | |
| "grad_norm": 1.1253416538238525, | |
| "learning_rate": 9.777996866615186e-05, | |
| "loss": 3.4385, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.2828716627299406, | |
| "grad_norm": 0.7198868989944458, | |
| "learning_rate": 9.775199122885339e-05, | |
| "loss": 3.4038, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2840168921337055, | |
| "grad_norm": 0.9696770310401917, | |
| "learning_rate": 9.772384265744188e-05, | |
| "loss": 3.4576, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.28516212153747045, | |
| "grad_norm": 1.321269154548645, | |
| "learning_rate": 9.76955230527965e-05, | |
| "loss": 3.4348, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.28630735094123544, | |
| "grad_norm": 1.3119802474975586, | |
| "learning_rate": 9.766703251640934e-05, | |
| "loss": 3.3848, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2874525803450004, | |
| "grad_norm": 1.0199967622756958, | |
| "learning_rate": 9.763837115038513e-05, | |
| "loss": 3.4108, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2885978097487653, | |
| "grad_norm": 0.9925194382667542, | |
| "learning_rate": 9.760953905744075e-05, | |
| "loss": 3.31, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.28974303915253025, | |
| "grad_norm": 0.9447107315063477, | |
| "learning_rate": 9.758053634090502e-05, | |
| "loss": 3.3598, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.2908882685562952, | |
| "grad_norm": 1.052873134613037, | |
| "learning_rate": 9.755136310471817e-05, | |
| "loss": 3.3704, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.2920334979600601, | |
| "grad_norm": 1.061514139175415, | |
| "learning_rate": 9.752201945343156e-05, | |
| "loss": 3.3642, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.29317872736382505, | |
| "grad_norm": 0.8627074956893921, | |
| "learning_rate": 9.74925054922073e-05, | |
| "loss": 3.367, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.29432395676759, | |
| "grad_norm": 1.0214530229568481, | |
| "learning_rate": 9.746282132681785e-05, | |
| "loss": 3.3266, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.2954691861713549, | |
| "grad_norm": 1.1223275661468506, | |
| "learning_rate": 9.743296706364565e-05, | |
| "loss": 3.4194, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2966144155751199, | |
| "grad_norm": 0.9849138259887695, | |
| "learning_rate": 9.740294280968273e-05, | |
| "loss": 3.3664, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.29775964497888485, | |
| "grad_norm": 0.7025099396705627, | |
| "learning_rate": 9.737274867253034e-05, | |
| "loss": 3.3772, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2989048743826498, | |
| "grad_norm": 0.936536967754364, | |
| "learning_rate": 9.734238476039858e-05, | |
| "loss": 3.3196, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3000501037864147, | |
| "grad_norm": 1.113277792930603, | |
| "learning_rate": 9.731185118210598e-05, | |
| "loss": 3.4606, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.30119533319017966, | |
| "grad_norm": 1.0153186321258545, | |
| "learning_rate": 9.728114804707909e-05, | |
| "loss": 3.4079, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3023405625939446, | |
| "grad_norm": 1.1675206422805786, | |
| "learning_rate": 9.725027546535215e-05, | |
| "loss": 3.4111, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.30348579199770953, | |
| "grad_norm": 0.9518959522247314, | |
| "learning_rate": 9.721923354756665e-05, | |
| "loss": 3.3905, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.30463102140147447, | |
| "grad_norm": 0.9693425297737122, | |
| "learning_rate": 9.718802240497098e-05, | |
| "loss": 3.4364, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3057762508052394, | |
| "grad_norm": 1.1249076128005981, | |
| "learning_rate": 9.715664214941997e-05, | |
| "loss": 3.3373, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.3069214802090044, | |
| "grad_norm": 0.8406875133514404, | |
| "learning_rate": 9.712509289337453e-05, | |
| "loss": 3.321, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.30806670961276933, | |
| "grad_norm": 0.9538395404815674, | |
| "learning_rate": 9.709337474990121e-05, | |
| "loss": 3.4007, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.30921193901653427, | |
| "grad_norm": 0.8003599047660828, | |
| "learning_rate": 9.706148783267187e-05, | |
| "loss": 3.3798, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3103571684202992, | |
| "grad_norm": 0.8605026602745056, | |
| "learning_rate": 9.702943225596316e-05, | |
| "loss": 3.2908, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.31150239782406414, | |
| "grad_norm": 0.7349815964698792, | |
| "learning_rate": 9.699720813465625e-05, | |
| "loss": 3.408, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3126476272278291, | |
| "grad_norm": 1.1622780561447144, | |
| "learning_rate": 9.696481558423628e-05, | |
| "loss": 3.3212, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.313792856631594, | |
| "grad_norm": 0.9829496145248413, | |
| "learning_rate": 9.693225472079204e-05, | |
| "loss": 3.4067, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.31493808603535894, | |
| "grad_norm": 1.1378313302993774, | |
| "learning_rate": 9.689952566101548e-05, | |
| "loss": 3.3556, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3160833154391239, | |
| "grad_norm": 0.9355561137199402, | |
| "learning_rate": 9.686662852220142e-05, | |
| "loss": 3.3281, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3172285448428888, | |
| "grad_norm": 0.9328277111053467, | |
| "learning_rate": 9.683356342224694e-05, | |
| "loss": 3.313, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3183737742466538, | |
| "grad_norm": 1.277377724647522, | |
| "learning_rate": 9.680033047965114e-05, | |
| "loss": 3.3499, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.31951900365041874, | |
| "grad_norm": 1.0239235162734985, | |
| "learning_rate": 9.67669298135146e-05, | |
| "loss": 3.3936, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.3206642330541837, | |
| "grad_norm": 0.6908963322639465, | |
| "learning_rate": 9.673336154353899e-05, | |
| "loss": 3.3584, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3218094624579486, | |
| "grad_norm": 0.8835290670394897, | |
| "learning_rate": 9.669962579002664e-05, | |
| "loss": 3.3728, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.32295469186171355, | |
| "grad_norm": 1.0561710596084595, | |
| "learning_rate": 9.666572267388013e-05, | |
| "loss": 3.3579, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3240999212654785, | |
| "grad_norm": 0.8400120735168457, | |
| "learning_rate": 9.663165231660181e-05, | |
| "loss": 3.3224, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3252451506692434, | |
| "grad_norm": 0.8960584998130798, | |
| "learning_rate": 9.659741484029341e-05, | |
| "loss": 3.3434, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.32639038007300836, | |
| "grad_norm": 0.9615944027900696, | |
| "learning_rate": 9.656301036765558e-05, | |
| "loss": 3.2587, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3275356094767733, | |
| "grad_norm": 0.983391523361206, | |
| "learning_rate": 9.652843902198743e-05, | |
| "loss": 3.2396, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.3286808388805383, | |
| "grad_norm": 0.7758197784423828, | |
| "learning_rate": 9.649370092718615e-05, | |
| "loss": 3.2948, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.3298260682843032, | |
| "grad_norm": 0.9714862704277039, | |
| "learning_rate": 9.64587962077465e-05, | |
| "loss": 3.3381, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.33097129768806816, | |
| "grad_norm": 0.8628116846084595, | |
| "learning_rate": 9.64237249887604e-05, | |
| "loss": 3.294, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3321165270918331, | |
| "grad_norm": 0.9794777035713196, | |
| "learning_rate": 9.638848739591646e-05, | |
| "loss": 3.3119, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.333261756495598, | |
| "grad_norm": 0.8179820775985718, | |
| "learning_rate": 9.635308355549957e-05, | |
| "loss": 3.3009, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.33440698589936296, | |
| "grad_norm": 0.8732323050498962, | |
| "learning_rate": 9.63175135943904e-05, | |
| "loss": 3.3207, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.3355522153031279, | |
| "grad_norm": 1.0355788469314575, | |
| "learning_rate": 9.628177764006497e-05, | |
| "loss": 3.2889, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.33669744470689283, | |
| "grad_norm": 0.8974720239639282, | |
| "learning_rate": 9.624587582059417e-05, | |
| "loss": 3.3089, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.33784267411065777, | |
| "grad_norm": 0.7800531387329102, | |
| "learning_rate": 9.620980826464335e-05, | |
| "loss": 3.2999, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3389879035144227, | |
| "grad_norm": 0.7294676899909973, | |
| "learning_rate": 9.617357510147182e-05, | |
| "loss": 3.3634, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.3401331329181877, | |
| "grad_norm": 0.7799131274223328, | |
| "learning_rate": 9.613717646093239e-05, | |
| "loss": 3.308, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.34127836232195263, | |
| "grad_norm": 0.9899328947067261, | |
| "learning_rate": 9.610061247347091e-05, | |
| "loss": 3.3191, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.34242359172571757, | |
| "grad_norm": 1.0520347356796265, | |
| "learning_rate": 9.606388327012579e-05, | |
| "loss": 3.389, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.3435688211294825, | |
| "grad_norm": 0.9768466353416443, | |
| "learning_rate": 9.602698898252756e-05, | |
| "loss": 3.2905, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.34471405053324744, | |
| "grad_norm": 0.9359555244445801, | |
| "learning_rate": 9.598992974289837e-05, | |
| "loss": 3.3022, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.3458592799370124, | |
| "grad_norm": 0.7487738728523254, | |
| "learning_rate": 9.595270568405156e-05, | |
| "loss": 3.2234, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3470045093407773, | |
| "grad_norm": 0.8295655846595764, | |
| "learning_rate": 9.591531693939109e-05, | |
| "loss": 3.3506, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.34814973874454225, | |
| "grad_norm": 0.9020605683326721, | |
| "learning_rate": 9.587776364291117e-05, | |
| "loss": 3.3026, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.3492949681483072, | |
| "grad_norm": 0.7868961095809937, | |
| "learning_rate": 9.58400459291957e-05, | |
| "loss": 3.2393, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3504401975520722, | |
| "grad_norm": 0.9779835939407349, | |
| "learning_rate": 9.580216393341785e-05, | |
| "loss": 3.3254, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3515854269558371, | |
| "grad_norm": 0.8962246179580688, | |
| "learning_rate": 9.576411779133956e-05, | |
| "loss": 3.2486, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.35273065635960205, | |
| "grad_norm": 0.9166551828384399, | |
| "learning_rate": 9.572590763931097e-05, | |
| "loss": 3.2193, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.353875885763367, | |
| "grad_norm": 0.7779364585876465, | |
| "learning_rate": 9.568753361427009e-05, | |
| "loss": 3.2469, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.3550211151671319, | |
| "grad_norm": 0.750092089176178, | |
| "learning_rate": 9.564899585374214e-05, | |
| "loss": 3.2532, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.35616634457089685, | |
| "grad_norm": 1.0269392728805542, | |
| "learning_rate": 9.561029449583919e-05, | |
| "loss": 3.3331, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.3573115739746618, | |
| "grad_norm": 0.7937965989112854, | |
| "learning_rate": 9.557142967925956e-05, | |
| "loss": 3.314, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.3584568033784267, | |
| "grad_norm": 1.1338940858840942, | |
| "learning_rate": 9.553240154328744e-05, | |
| "loss": 3.3375, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.35960203278219166, | |
| "grad_norm": 0.7937076091766357, | |
| "learning_rate": 9.549321022779229e-05, | |
| "loss": 3.2691, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.36074726218595665, | |
| "grad_norm": 0.8552340865135193, | |
| "learning_rate": 9.545385587322839e-05, | |
| "loss": 3.3107, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3618924915897216, | |
| "grad_norm": 1.0279617309570312, | |
| "learning_rate": 9.541433862063429e-05, | |
| "loss": 3.2552, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.3630377209934865, | |
| "grad_norm": 0.9652466177940369, | |
| "learning_rate": 9.537465861163237e-05, | |
| "loss": 3.242, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.36418295039725146, | |
| "grad_norm": 0.9129723310470581, | |
| "learning_rate": 9.533481598842827e-05, | |
| "loss": 3.3131, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3653281798010164, | |
| "grad_norm": 0.9316424131393433, | |
| "learning_rate": 9.529481089381042e-05, | |
| "loss": 3.3288, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.36647340920478133, | |
| "grad_norm": 0.7987300753593445, | |
| "learning_rate": 9.525464347114953e-05, | |
| "loss": 3.2832, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.36761863860854627, | |
| "grad_norm": 0.7103368043899536, | |
| "learning_rate": 9.521431386439807e-05, | |
| "loss": 3.2339, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.3687638680123112, | |
| "grad_norm": 0.7420955896377563, | |
| "learning_rate": 9.517382221808969e-05, | |
| "loss": 3.1662, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.36990909741607614, | |
| "grad_norm": 0.8201749324798584, | |
| "learning_rate": 9.513316867733883e-05, | |
| "loss": 3.2837, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.3710543268198411, | |
| "grad_norm": 0.8581364154815674, | |
| "learning_rate": 9.509235338784009e-05, | |
| "loss": 3.2949, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.37219955622360607, | |
| "grad_norm": 0.956118643283844, | |
| "learning_rate": 9.505137649586775e-05, | |
| "loss": 3.316, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.373344785627371, | |
| "grad_norm": 0.708759069442749, | |
| "learning_rate": 9.501023814827524e-05, | |
| "loss": 3.1951, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.37449001503113594, | |
| "grad_norm": 0.8143038153648376, | |
| "learning_rate": 9.496893849249464e-05, | |
| "loss": 3.2738, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.3756352444349009, | |
| "grad_norm": 0.6578754782676697, | |
| "learning_rate": 9.492747767653611e-05, | |
| "loss": 3.2809, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.3767804738386658, | |
| "grad_norm": 0.8550508618354797, | |
| "learning_rate": 9.488585584898738e-05, | |
| "loss": 3.2668, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.37792570324243074, | |
| "grad_norm": 0.795080304145813, | |
| "learning_rate": 9.48440731590132e-05, | |
| "loss": 3.28, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3790709326461957, | |
| "grad_norm": 0.9933105707168579, | |
| "learning_rate": 9.480212975635486e-05, | |
| "loss": 3.3104, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.3802161620499606, | |
| "grad_norm": 1.224338412284851, | |
| "learning_rate": 9.476002579132957e-05, | |
| "loss": 3.29, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.38136139145372555, | |
| "grad_norm": 0.8564585447311401, | |
| "learning_rate": 9.471776141483e-05, | |
| "loss": 3.2, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.38250662085749054, | |
| "grad_norm": 1.160684585571289, | |
| "learning_rate": 9.467533677832365e-05, | |
| "loss": 3.2226, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.3836518502612555, | |
| "grad_norm": 0.8671857714653015, | |
| "learning_rate": 9.463275203385244e-05, | |
| "loss": 3.2453, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3847970796650204, | |
| "grad_norm": 1.0225045680999756, | |
| "learning_rate": 9.459000733403205e-05, | |
| "loss": 3.2283, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.38594230906878535, | |
| "grad_norm": 0.8350477814674377, | |
| "learning_rate": 9.454710283205139e-05, | |
| "loss": 3.2584, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3870875384725503, | |
| "grad_norm": 0.8098021745681763, | |
| "learning_rate": 9.450403868167208e-05, | |
| "loss": 3.2836, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.3882327678763152, | |
| "grad_norm": 0.8174638748168945, | |
| "learning_rate": 9.446081503722792e-05, | |
| "loss": 3.1896, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.38937799728008016, | |
| "grad_norm": 0.6904940009117126, | |
| "learning_rate": 9.441743205362426e-05, | |
| "loss": 3.2464, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3905232266838451, | |
| "grad_norm": 0.692864716053009, | |
| "learning_rate": 9.437388988633752e-05, | |
| "loss": 3.2277, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.39166845608761003, | |
| "grad_norm": 0.7014842629432678, | |
| "learning_rate": 9.433018869141464e-05, | |
| "loss": 3.2372, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.39281368549137496, | |
| "grad_norm": 0.6166806817054749, | |
| "learning_rate": 9.428632862547237e-05, | |
| "loss": 3.2501, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.39395891489513996, | |
| "grad_norm": 0.7060846090316772, | |
| "learning_rate": 9.424230984569696e-05, | |
| "loss": 3.2881, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3951041442989049, | |
| "grad_norm": 0.7771391272544861, | |
| "learning_rate": 9.419813250984337e-05, | |
| "loss": 3.2149, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3962493737026698, | |
| "grad_norm": 0.6290923953056335, | |
| "learning_rate": 9.415379677623485e-05, | |
| "loss": 3.1555, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.39739460310643476, | |
| "grad_norm": 0.7270971536636353, | |
| "learning_rate": 9.410930280376225e-05, | |
| "loss": 3.2554, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.3985398325101997, | |
| "grad_norm": 0.681962788105011, | |
| "learning_rate": 9.40646507518836e-05, | |
| "loss": 3.1671, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.39968506191396463, | |
| "grad_norm": 0.5727997422218323, | |
| "learning_rate": 9.40198407806234e-05, | |
| "loss": 3.237, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.40083029131772957, | |
| "grad_norm": 0.7687988877296448, | |
| "learning_rate": 9.39748730505721e-05, | |
| "loss": 3.2357, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4019755207214945, | |
| "grad_norm": 0.7813317179679871, | |
| "learning_rate": 9.392974772288558e-05, | |
| "loss": 3.2101, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.40312075012525944, | |
| "grad_norm": 0.8766132593154907, | |
| "learning_rate": 9.388446495928446e-05, | |
| "loss": 3.2852, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.40426597952902443, | |
| "grad_norm": 0.7857736349105835, | |
| "learning_rate": 9.383902492205363e-05, | |
| "loss": 3.2113, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.40541120893278937, | |
| "grad_norm": 0.9073331356048584, | |
| "learning_rate": 9.379342777404159e-05, | |
| "loss": 3.2478, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.4065564383365543, | |
| "grad_norm": 0.8033682107925415, | |
| "learning_rate": 9.374767367865989e-05, | |
| "loss": 3.3159, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.40770166774031924, | |
| "grad_norm": 0.7821508646011353, | |
| "learning_rate": 9.370176279988256e-05, | |
| "loss": 3.2362, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4088468971440842, | |
| "grad_norm": 0.8257923126220703, | |
| "learning_rate": 9.365569530224554e-05, | |
| "loss": 3.1832, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4099921265478491, | |
| "grad_norm": 0.8349987864494324, | |
| "learning_rate": 9.360947135084603e-05, | |
| "loss": 3.1995, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.41113735595161405, | |
| "grad_norm": 0.8590210676193237, | |
| "learning_rate": 9.356309111134191e-05, | |
| "loss": 3.2119, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.412282585355379, | |
| "grad_norm": 0.8512969017028809, | |
| "learning_rate": 9.351655474995122e-05, | |
| "loss": 3.2323, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4134278147591439, | |
| "grad_norm": 0.6388457417488098, | |
| "learning_rate": 9.346986243345149e-05, | |
| "loss": 3.1677, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4145730441629089, | |
| "grad_norm": 0.8811210989952087, | |
| "learning_rate": 9.342301432917912e-05, | |
| "loss": 3.2307, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.41571827356667385, | |
| "grad_norm": 0.9297654628753662, | |
| "learning_rate": 9.337601060502891e-05, | |
| "loss": 3.1838, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.4168635029704388, | |
| "grad_norm": 0.750491201877594, | |
| "learning_rate": 9.332885142945329e-05, | |
| "loss": 3.23, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.4180087323742037, | |
| "grad_norm": 0.8282638192176819, | |
| "learning_rate": 9.328153697146186e-05, | |
| "loss": 3.1789, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.41915396177796865, | |
| "grad_norm": 0.7395208477973938, | |
| "learning_rate": 9.323406740062068e-05, | |
| "loss": 3.2881, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4202991911817336, | |
| "grad_norm": 0.5959879755973816, | |
| "learning_rate": 9.318644288705172e-05, | |
| "loss": 3.1879, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4214444205854985, | |
| "grad_norm": 0.6063298583030701, | |
| "learning_rate": 9.313866360143227e-05, | |
| "loss": 3.273, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.42258964998926346, | |
| "grad_norm": 0.6868070960044861, | |
| "learning_rate": 9.309072971499422e-05, | |
| "loss": 3.2145, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.4237348793930284, | |
| "grad_norm": 0.6153081655502319, | |
| "learning_rate": 9.304264139952356e-05, | |
| "loss": 3.0791, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.42488010879679333, | |
| "grad_norm": 0.6345932483673096, | |
| "learning_rate": 9.299439882735977e-05, | |
| "loss": 3.1991, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.4260253382005583, | |
| "grad_norm": 0.7605310082435608, | |
| "learning_rate": 9.294600217139506e-05, | |
| "loss": 3.1272, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.42717056760432326, | |
| "grad_norm": 0.6695173382759094, | |
| "learning_rate": 9.289745160507395e-05, | |
| "loss": 3.1482, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.4283157970080882, | |
| "grad_norm": 0.8121134638786316, | |
| "learning_rate": 9.284874730239244e-05, | |
| "loss": 3.2122, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.42946102641185313, | |
| "grad_norm": 0.8771198391914368, | |
| "learning_rate": 9.279988943789759e-05, | |
| "loss": 3.1768, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.43060625581561807, | |
| "grad_norm": 0.7993550300598145, | |
| "learning_rate": 9.275087818668675e-05, | |
| "loss": 3.1944, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.431751485219383, | |
| "grad_norm": 0.6639721393585205, | |
| "learning_rate": 9.270171372440697e-05, | |
| "loss": 3.1418, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.43289671462314794, | |
| "grad_norm": 0.7494943737983704, | |
| "learning_rate": 9.265239622725438e-05, | |
| "loss": 3.1956, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.4340419440269129, | |
| "grad_norm": 0.7307000160217285, | |
| "learning_rate": 9.26029258719736e-05, | |
| "loss": 3.133, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.4351871734306778, | |
| "grad_norm": 0.7357375621795654, | |
| "learning_rate": 9.255330283585701e-05, | |
| "loss": 3.1898, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4363324028344428, | |
| "grad_norm": 0.6649693250656128, | |
| "learning_rate": 9.250352729674422e-05, | |
| "loss": 3.2147, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.43747763223820774, | |
| "grad_norm": 0.6873495578765869, | |
| "learning_rate": 9.245359943302133e-05, | |
| "loss": 3.2341, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.43862286164197267, | |
| "grad_norm": 0.7320956587791443, | |
| "learning_rate": 9.240351942362038e-05, | |
| "loss": 3.1241, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.4397680910457376, | |
| "grad_norm": 0.6137463450431824, | |
| "learning_rate": 9.235328744801868e-05, | |
| "loss": 3.1529, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.44091332044950254, | |
| "grad_norm": 0.8658304214477539, | |
| "learning_rate": 9.230290368623809e-05, | |
| "loss": 3.2168, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4420585498532675, | |
| "grad_norm": 0.7436694502830505, | |
| "learning_rate": 9.225236831884454e-05, | |
| "loss": 3.1798, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.4432037792570324, | |
| "grad_norm": 0.9040384888648987, | |
| "learning_rate": 9.220168152694722e-05, | |
| "loss": 3.2241, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.44434900866079735, | |
| "grad_norm": 0.7236924171447754, | |
| "learning_rate": 9.215084349219801e-05, | |
| "loss": 3.183, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.4454942380645623, | |
| "grad_norm": 0.8633347153663635, | |
| "learning_rate": 9.209985439679081e-05, | |
| "loss": 3.1776, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.4466394674683272, | |
| "grad_norm": 0.730910062789917, | |
| "learning_rate": 9.204871442346091e-05, | |
| "loss": 3.1633, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4477846968720922, | |
| "grad_norm": 0.809923529624939, | |
| "learning_rate": 9.199742375548432e-05, | |
| "loss": 3.1736, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.44892992627585715, | |
| "grad_norm": 0.7229586839675903, | |
| "learning_rate": 9.194598257667711e-05, | |
| "loss": 3.1813, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.4500751556796221, | |
| "grad_norm": 0.6999960541725159, | |
| "learning_rate": 9.189439107139472e-05, | |
| "loss": 3.1125, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.451220385083387, | |
| "grad_norm": 0.7234693169593811, | |
| "learning_rate": 9.184264942453138e-05, | |
| "loss": 3.137, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.45236561448715196, | |
| "grad_norm": 0.7283908724784851, | |
| "learning_rate": 9.179075782151936e-05, | |
| "loss": 3.1672, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4535108438909169, | |
| "grad_norm": 0.793543815612793, | |
| "learning_rate": 9.173871644832834e-05, | |
| "loss": 3.1925, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.45465607329468183, | |
| "grad_norm": 0.7263696789741516, | |
| "learning_rate": 9.168652549146481e-05, | |
| "loss": 3.1609, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.45580130269844676, | |
| "grad_norm": 0.7698031663894653, | |
| "learning_rate": 9.163418513797126e-05, | |
| "loss": 3.2547, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.4569465321022117, | |
| "grad_norm": 0.908698320388794, | |
| "learning_rate": 9.158169557542566e-05, | |
| "loss": 3.2165, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.4580917615059767, | |
| "grad_norm": 0.9588857293128967, | |
| "learning_rate": 9.152905699194065e-05, | |
| "loss": 3.1743, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4592369909097416, | |
| "grad_norm": 0.7442302107810974, | |
| "learning_rate": 9.1476269576163e-05, | |
| "loss": 3.1088, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.46038222031350656, | |
| "grad_norm": 0.7421006560325623, | |
| "learning_rate": 9.14233335172728e-05, | |
| "loss": 3.1497, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.4615274497172715, | |
| "grad_norm": 0.8878415822982788, | |
| "learning_rate": 9.13702490049829e-05, | |
| "loss": 3.1924, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.46267267912103643, | |
| "grad_norm": 0.6878317594528198, | |
| "learning_rate": 9.131701622953816e-05, | |
| "loss": 3.1366, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.46381790852480137, | |
| "grad_norm": 0.7945599555969238, | |
| "learning_rate": 9.126363538171478e-05, | |
| "loss": 3.1926, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4649631379285663, | |
| "grad_norm": 0.7997886538505554, | |
| "learning_rate": 9.121010665281964e-05, | |
| "loss": 3.1521, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.46610836733233124, | |
| "grad_norm": 0.715614378452301, | |
| "learning_rate": 9.115643023468958e-05, | |
| "loss": 3.1904, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.4672535967360962, | |
| "grad_norm": 0.7846017479896545, | |
| "learning_rate": 9.110260631969077e-05, | |
| "loss": 3.1338, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.46839882613986117, | |
| "grad_norm": 0.6939677596092224, | |
| "learning_rate": 9.10486351007179e-05, | |
| "loss": 3.1635, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.4695440555436261, | |
| "grad_norm": 0.7764283418655396, | |
| "learning_rate": 9.099451677119366e-05, | |
| "loss": 3.1922, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.47068928494739104, | |
| "grad_norm": 0.753666877746582, | |
| "learning_rate": 9.094025152506788e-05, | |
| "loss": 3.0827, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.471834514351156, | |
| "grad_norm": 0.6793937683105469, | |
| "learning_rate": 9.088583955681699e-05, | |
| "loss": 3.1235, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.4729797437549209, | |
| "grad_norm": 0.645055890083313, | |
| "learning_rate": 9.08312810614432e-05, | |
| "loss": 3.1758, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.47412497315868585, | |
| "grad_norm": 0.7241025567054749, | |
| "learning_rate": 9.077657623447379e-05, | |
| "loss": 3.1636, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.4752702025624508, | |
| "grad_norm": 0.762117862701416, | |
| "learning_rate": 9.07217252719606e-05, | |
| "loss": 3.1423, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4764154319662157, | |
| "grad_norm": 0.7575943470001221, | |
| "learning_rate": 9.066672837047907e-05, | |
| "loss": 3.1304, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.47756066136998065, | |
| "grad_norm": 0.8326764106750488, | |
| "learning_rate": 9.061158572712769e-05, | |
| "loss": 3.1807, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.4787058907737456, | |
| "grad_norm": 0.7815741300582886, | |
| "learning_rate": 9.055629753952731e-05, | |
| "loss": 3.2113, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.4798511201775106, | |
| "grad_norm": 0.7716583609580994, | |
| "learning_rate": 9.050086400582033e-05, | |
| "loss": 3.1791, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.4809963495812755, | |
| "grad_norm": 0.6160004734992981, | |
| "learning_rate": 9.044528532467006e-05, | |
| "loss": 3.1696, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.48214157898504045, | |
| "grad_norm": 0.8025004267692566, | |
| "learning_rate": 9.038956169525998e-05, | |
| "loss": 3.2002, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.4832868083888054, | |
| "grad_norm": 0.733741819858551, | |
| "learning_rate": 9.033369331729307e-05, | |
| "loss": 3.1661, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.4844320377925703, | |
| "grad_norm": 0.7210118770599365, | |
| "learning_rate": 9.027768039099103e-05, | |
| "loss": 3.1492, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.48557726719633526, | |
| "grad_norm": 0.6915583610534668, | |
| "learning_rate": 9.02215231170936e-05, | |
| "loss": 3.1892, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.4867224966001002, | |
| "grad_norm": 0.6812649965286255, | |
| "learning_rate": 9.016522169685783e-05, | |
| "loss": 3.1404, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.48786772600386513, | |
| "grad_norm": 0.7272056341171265, | |
| "learning_rate": 9.010877633205738e-05, | |
| "loss": 3.1935, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.48901295540763007, | |
| "grad_norm": 0.7162798643112183, | |
| "learning_rate": 9.005218722498177e-05, | |
| "loss": 3.1949, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.49015818481139506, | |
| "grad_norm": 0.6110600829124451, | |
| "learning_rate": 8.999545457843568e-05, | |
| "loss": 3.1217, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.49130341421516, | |
| "grad_norm": 0.657370924949646, | |
| "learning_rate": 8.993857859573818e-05, | |
| "loss": 3.1381, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.49244864361892493, | |
| "grad_norm": 0.8181600570678711, | |
| "learning_rate": 8.988155948072203e-05, | |
| "loss": 3.1527, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.49359387302268987, | |
| "grad_norm": 0.586644172668457, | |
| "learning_rate": 8.9824397437733e-05, | |
| "loss": 3.1328, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.4947391024264548, | |
| "grad_norm": 0.8710150718688965, | |
| "learning_rate": 8.976709267162903e-05, | |
| "loss": 3.1509, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.49588433183021974, | |
| "grad_norm": 0.7185545563697815, | |
| "learning_rate": 8.970964538777957e-05, | |
| "loss": 3.0628, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.4970295612339847, | |
| "grad_norm": 0.7242484092712402, | |
| "learning_rate": 8.965205579206483e-05, | |
| "loss": 3.0603, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.4981747906377496, | |
| "grad_norm": 0.7996972799301147, | |
| "learning_rate": 8.959432409087504e-05, | |
| "loss": 3.2346, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.49932002004151455, | |
| "grad_norm": 0.6038782000541687, | |
| "learning_rate": 8.953645049110971e-05, | |
| "loss": 3.0751, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5004652494452795, | |
| "grad_norm": 0.7712786197662354, | |
| "learning_rate": 8.94784352001769e-05, | |
| "loss": 3.1086, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5016104788490444, | |
| "grad_norm": 0.6952617168426514, | |
| "learning_rate": 8.94202784259924e-05, | |
| "loss": 3.13, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5027557082528094, | |
| "grad_norm": 0.7420851588249207, | |
| "learning_rate": 8.936198037697916e-05, | |
| "loss": 3.1094, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5039009376565743, | |
| "grad_norm": 0.6883806586265564, | |
| "learning_rate": 8.930354126206634e-05, | |
| "loss": 3.0722, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5050461670603392, | |
| "grad_norm": 0.7546491026878357, | |
| "learning_rate": 8.92449612906887e-05, | |
| "loss": 3.1571, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5061913964641043, | |
| "grad_norm": 0.7471094727516174, | |
| "learning_rate": 8.918624067278576e-05, | |
| "loss": 3.1842, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5073366258678692, | |
| "grad_norm": 0.8344042897224426, | |
| "learning_rate": 8.912737961880116e-05, | |
| "loss": 3.1709, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5084818552716341, | |
| "grad_norm": 0.6555135250091553, | |
| "learning_rate": 8.906837833968174e-05, | |
| "loss": 3.1777, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5096270846753991, | |
| "grad_norm": 0.799281120300293, | |
| "learning_rate": 8.900923704687697e-05, | |
| "loss": 3.176, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.510772314079164, | |
| "grad_norm": 0.8266319632530212, | |
| "learning_rate": 8.894995595233809e-05, | |
| "loss": 3.1353, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.511917543482929, | |
| "grad_norm": 0.7263309955596924, | |
| "learning_rate": 8.889053526851729e-05, | |
| "loss": 3.0824, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5130627728866939, | |
| "grad_norm": 0.7665941119194031, | |
| "learning_rate": 8.88309752083671e-05, | |
| "loss": 3.1808, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5142080022904588, | |
| "grad_norm": 0.7014003396034241, | |
| "learning_rate": 8.877127598533952e-05, | |
| "loss": 3.1158, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5153532316942238, | |
| "grad_norm": 0.6320556998252869, | |
| "learning_rate": 8.871143781338529e-05, | |
| "loss": 3.1276, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3494, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0132651008589824e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |