| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.687137642258965, | |
| "eval_steps": 500, | |
| "global_step": 1200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005726147018824708, | |
| "grad_norm": 7.761023998260498, | |
| "learning_rate": 0.0, | |
| "loss": 6.0592, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0011452294037649416, | |
| "grad_norm": 7.8541951179504395, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 6.0156, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0022904588075298832, | |
| "grad_norm": 7.347611904144287, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 6.0103, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003435688211294825, | |
| "grad_norm": 5.382428169250488, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 5.9221, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0045809176150597665, | |
| "grad_norm": 5.063406467437744, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 6.0365, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005726147018824708, | |
| "grad_norm": 9.779157638549805, | |
| "learning_rate": 5.142857142857143e-06, | |
| "loss": 6.0336, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00687137642258965, | |
| "grad_norm": 7.555446147918701, | |
| "learning_rate": 6.285714285714287e-06, | |
| "loss": 6.0328, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008016605826354592, | |
| "grad_norm": 6.790043354034424, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 5.7848, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.009161835230119533, | |
| "grad_norm": 4.4132208824157715, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 5.8207, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.010307064633884476, | |
| "grad_norm": 4.064995765686035, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 5.6497, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.011452294037649417, | |
| "grad_norm": 3.357184410095215, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 5.7758, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012597523441414358, | |
| "grad_norm": 2.742230176925659, | |
| "learning_rate": 1.2e-05, | |
| "loss": 5.6173, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0137427528451793, | |
| "grad_norm": 2.491459369659424, | |
| "learning_rate": 1.3142857142857143e-05, | |
| "loss": 5.6681, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.014887982248944241, | |
| "grad_norm": 2.7569029331207275, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 5.6393, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.016033211652709184, | |
| "grad_norm": 2.208378791809082, | |
| "learning_rate": 1.5428571428571428e-05, | |
| "loss": 5.5768, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.017178441056474127, | |
| "grad_norm": 3.2770133018493652, | |
| "learning_rate": 1.657142857142857e-05, | |
| "loss": 5.484, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018323670460239066, | |
| "grad_norm": 3.177299976348877, | |
| "learning_rate": 1.7714285714285713e-05, | |
| "loss": 5.528, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01946889986400401, | |
| "grad_norm": 2.1981537342071533, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 5.6327, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02061412926776895, | |
| "grad_norm": 3.265881061553955, | |
| "learning_rate": 2e-05, | |
| "loss": 5.6288, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02175935867153389, | |
| "grad_norm": 3.6059298515319824, | |
| "learning_rate": 2.1142857142857144e-05, | |
| "loss": 5.4789, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022904588075298833, | |
| "grad_norm": 2.4080026149749756, | |
| "learning_rate": 2.2285714285714287e-05, | |
| "loss": 5.4046, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024049817479063776, | |
| "grad_norm": 2.142902135848999, | |
| "learning_rate": 2.342857142857143e-05, | |
| "loss": 5.4738, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.025195046882828715, | |
| "grad_norm": 2.4021224975585938, | |
| "learning_rate": 2.4571428571428572e-05, | |
| "loss": 5.4649, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.026340276286593658, | |
| "grad_norm": 2.172009229660034, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 5.4302, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0274855056903586, | |
| "grad_norm": 2.9737730026245117, | |
| "learning_rate": 2.6857142857142857e-05, | |
| "loss": 5.3045, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.028630735094123543, | |
| "grad_norm": 3.0378615856170654, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 5.2185, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029775964497888482, | |
| "grad_norm": 3.4448676109313965, | |
| "learning_rate": 2.9142857142857146e-05, | |
| "loss": 5.1838, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.030921193901653425, | |
| "grad_norm": 2.469245672225952, | |
| "learning_rate": 3.0285714285714288e-05, | |
| "loss": 5.1637, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03206642330541837, | |
| "grad_norm": 3.58486008644104, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 5.2063, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03321165270918331, | |
| "grad_norm": 3.0815446376800537, | |
| "learning_rate": 3.257142857142857e-05, | |
| "loss": 5.2317, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.03435688211294825, | |
| "grad_norm": 3.6842119693756104, | |
| "learning_rate": 3.3714285714285716e-05, | |
| "loss": 5.2695, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03550211151671319, | |
| "grad_norm": 2.9440791606903076, | |
| "learning_rate": 3.485714285714286e-05, | |
| "loss": 5.2686, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.03664734092047813, | |
| "grad_norm": 3.9632568359375, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.1262, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.037792570324243074, | |
| "grad_norm": 4.045065402984619, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 5.1546, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03893779972800802, | |
| "grad_norm": 3.5707085132598877, | |
| "learning_rate": 3.8285714285714286e-05, | |
| "loss": 5.0036, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.04008302913177296, | |
| "grad_norm": 3.014404535293579, | |
| "learning_rate": 3.942857142857143e-05, | |
| "loss": 5.026, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0412282585355379, | |
| "grad_norm": 2.708796977996826, | |
| "learning_rate": 4.057142857142857e-05, | |
| "loss": 4.9442, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04237348793930284, | |
| "grad_norm": 2.5384011268615723, | |
| "learning_rate": 4.1714285714285714e-05, | |
| "loss": 5.0223, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04351871734306778, | |
| "grad_norm": 3.006281852722168, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 4.9827, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.044663946746832724, | |
| "grad_norm": 2.5772130489349365, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 4.9675, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.045809176150597666, | |
| "grad_norm": 3.456017255783081, | |
| "learning_rate": 4.514285714285714e-05, | |
| "loss": 5.0341, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04695440555436261, | |
| "grad_norm": 3.3163113594055176, | |
| "learning_rate": 4.628571428571429e-05, | |
| "loss": 4.9867, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04809963495812755, | |
| "grad_norm": 3.7568469047546387, | |
| "learning_rate": 4.742857142857143e-05, | |
| "loss": 4.8652, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.049244864361892494, | |
| "grad_norm": 4.19318151473999, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 5.0602, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05039009376565743, | |
| "grad_norm": 5.1034064292907715, | |
| "learning_rate": 4.971428571428572e-05, | |
| "loss": 4.9757, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.05153532316942237, | |
| "grad_norm": 4.0827484130859375, | |
| "learning_rate": 5.085714285714286e-05, | |
| "loss": 4.8486, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.052680552573187316, | |
| "grad_norm": 4.6189446449279785, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 4.9595, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.05382578197695226, | |
| "grad_norm": 3.988513469696045, | |
| "learning_rate": 5.314285714285715e-05, | |
| "loss": 4.9035, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0549710113807172, | |
| "grad_norm": 3.857276678085327, | |
| "learning_rate": 5.428571428571428e-05, | |
| "loss": 4.8277, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.056116240784482144, | |
| "grad_norm": 3.5372354984283447, | |
| "learning_rate": 5.542857142857143e-05, | |
| "loss": 4.7718, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.057261470188247086, | |
| "grad_norm": 3.3853676319122314, | |
| "learning_rate": 5.6571428571428574e-05, | |
| "loss": 4.8098, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05840669959201202, | |
| "grad_norm": 2.1142077445983887, | |
| "learning_rate": 5.771428571428572e-05, | |
| "loss": 4.7975, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.059551928995776965, | |
| "grad_norm": 3.2275538444519043, | |
| "learning_rate": 5.885714285714285e-05, | |
| "loss": 4.8509, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06069715839954191, | |
| "grad_norm": 3.5413126945495605, | |
| "learning_rate": 6e-05, | |
| "loss": 4.6069, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06184238780330685, | |
| "grad_norm": 2.755648374557495, | |
| "learning_rate": 6.114285714285714e-05, | |
| "loss": 4.6951, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06298761720707179, | |
| "grad_norm": 2.980039596557617, | |
| "learning_rate": 6.22857142857143e-05, | |
| "loss": 4.7012, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06413284661083674, | |
| "grad_norm": 4.890020370483398, | |
| "learning_rate": 6.342857142857143e-05, | |
| "loss": 4.8008, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06527807601460167, | |
| "grad_norm": 4.35846471786499, | |
| "learning_rate": 6.457142857142856e-05, | |
| "loss": 4.8587, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.06642330541836662, | |
| "grad_norm": 3.6171813011169434, | |
| "learning_rate": 6.571428571428571e-05, | |
| "loss": 4.7473, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06756853482213156, | |
| "grad_norm": 2.4927010536193848, | |
| "learning_rate": 6.685714285714286e-05, | |
| "loss": 4.7113, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0687137642258965, | |
| "grad_norm": 3.3327009677886963, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 4.6105, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06985899362966144, | |
| "grad_norm": 3.1123206615448, | |
| "learning_rate": 6.914285714285715e-05, | |
| "loss": 4.5968, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.07100422303342638, | |
| "grad_norm": 2.6985421180725098, | |
| "learning_rate": 7.028571428571428e-05, | |
| "loss": 4.6323, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07214945243719133, | |
| "grad_norm": 2.058084011077881, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 4.5721, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07329468184095626, | |
| "grad_norm": 2.144658327102661, | |
| "learning_rate": 7.257142857142858e-05, | |
| "loss": 4.6125, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07443991124472121, | |
| "grad_norm": 2.477219820022583, | |
| "learning_rate": 7.371428571428572e-05, | |
| "loss": 4.4727, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07558514064848615, | |
| "grad_norm": 3.8517298698425293, | |
| "learning_rate": 7.485714285714285e-05, | |
| "loss": 4.5696, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.0767303700522511, | |
| "grad_norm": 3.0253565311431885, | |
| "learning_rate": 7.6e-05, | |
| "loss": 4.4838, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07787559945601603, | |
| "grad_norm": 3.397569179534912, | |
| "learning_rate": 7.714285714285715e-05, | |
| "loss": 4.6431, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.07902082885978097, | |
| "grad_norm": 2.435197114944458, | |
| "learning_rate": 7.828571428571429e-05, | |
| "loss": 4.4681, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08016605826354592, | |
| "grad_norm": 2.6476476192474365, | |
| "learning_rate": 7.942857142857143e-05, | |
| "loss": 4.4462, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08131128766731086, | |
| "grad_norm": 2.1929690837860107, | |
| "learning_rate": 8.057142857142857e-05, | |
| "loss": 4.5136, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.0824565170710758, | |
| "grad_norm": 2.4533395767211914, | |
| "learning_rate": 8.171428571428572e-05, | |
| "loss": 4.5572, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.08360174647484074, | |
| "grad_norm": 2.601806879043579, | |
| "learning_rate": 8.285714285714287e-05, | |
| "loss": 4.4121, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08474697587860568, | |
| "grad_norm": 3.233973741531372, | |
| "learning_rate": 8.4e-05, | |
| "loss": 4.4599, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08589220528237063, | |
| "grad_norm": 2.6353538036346436, | |
| "learning_rate": 8.514285714285714e-05, | |
| "loss": 4.4533, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08703743468613556, | |
| "grad_norm": 2.8465511798858643, | |
| "learning_rate": 8.62857142857143e-05, | |
| "loss": 4.5246, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.08818266408990051, | |
| "grad_norm": 2.8642711639404297, | |
| "learning_rate": 8.742857142857144e-05, | |
| "loss": 4.4659, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.08932789349366545, | |
| "grad_norm": 2.793112277984619, | |
| "learning_rate": 8.857142857142857e-05, | |
| "loss": 4.5107, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0904731228974304, | |
| "grad_norm": 3.43472957611084, | |
| "learning_rate": 8.971428571428571e-05, | |
| "loss": 4.4079, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09161835230119533, | |
| "grad_norm": 2.9260294437408447, | |
| "learning_rate": 9.085714285714286e-05, | |
| "loss": 4.4047, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09276358170496027, | |
| "grad_norm": 2.6336724758148193, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 4.4777, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09390881110872522, | |
| "grad_norm": 2.8348231315612793, | |
| "learning_rate": 9.314285714285715e-05, | |
| "loss": 4.3445, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.09505404051249015, | |
| "grad_norm": 4.271595478057861, | |
| "learning_rate": 9.428571428571429e-05, | |
| "loss": 4.4234, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.0961992699162551, | |
| "grad_norm": 3.4789109230041504, | |
| "learning_rate": 9.542857142857143e-05, | |
| "loss": 4.2872, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09734449932002004, | |
| "grad_norm": 2.57273530960083, | |
| "learning_rate": 9.657142857142858e-05, | |
| "loss": 4.4177, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09848972872378499, | |
| "grad_norm": 2.185086250305176, | |
| "learning_rate": 9.771428571428572e-05, | |
| "loss": 4.3568, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.09963495812754992, | |
| "grad_norm": 2.771744966506958, | |
| "learning_rate": 9.885714285714286e-05, | |
| "loss": 4.3392, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.10078018753131486, | |
| "grad_norm": 1.950353741645813, | |
| "learning_rate": 0.0001, | |
| "loss": 4.1931, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10192541693507981, | |
| "grad_norm": 2.4709694385528564, | |
| "learning_rate": 9.999991040472416e-05, | |
| "loss": 4.2936, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10307064633884475, | |
| "grad_norm": 2.140997886657715, | |
| "learning_rate": 9.999964161921776e-05, | |
| "loss": 4.1653, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1042158757426097, | |
| "grad_norm": 2.491321563720703, | |
| "learning_rate": 9.999919364444403e-05, | |
| "loss": 4.3202, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.10536110514637463, | |
| "grad_norm": 2.5410189628601074, | |
| "learning_rate": 9.999856648200845e-05, | |
| "loss": 4.2657, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10650633455013958, | |
| "grad_norm": 2.1820590496063232, | |
| "learning_rate": 9.999776013415866e-05, | |
| "loss": 4.2282, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10765156395390452, | |
| "grad_norm": 1.7251808643341064, | |
| "learning_rate": 9.999677460378444e-05, | |
| "loss": 4.3421, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.10879679335766945, | |
| "grad_norm": 2.002145290374756, | |
| "learning_rate": 9.999560989441779e-05, | |
| "loss": 4.1361, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1099420227614344, | |
| "grad_norm": 1.9663431644439697, | |
| "learning_rate": 9.999426601023274e-05, | |
| "loss": 4.201, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.11108725216519934, | |
| "grad_norm": 2.1406776905059814, | |
| "learning_rate": 9.999274295604558e-05, | |
| "loss": 4.1086, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11223248156896429, | |
| "grad_norm": 3.3888607025146484, | |
| "learning_rate": 9.999104073731458e-05, | |
| "loss": 4.2723, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.11337771097272922, | |
| "grad_norm": 2.371840715408325, | |
| "learning_rate": 9.998915936014024e-05, | |
| "loss": 4.1893, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11452294037649417, | |
| "grad_norm": 2.0502302646636963, | |
| "learning_rate": 9.998709883126502e-05, | |
| "loss": 4.1395, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11566816978025911, | |
| "grad_norm": 1.6674678325653076, | |
| "learning_rate": 9.998485915807347e-05, | |
| "loss": 4.071, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.11681339918402404, | |
| "grad_norm": 1.7829004526138306, | |
| "learning_rate": 9.998244034859219e-05, | |
| "loss": 4.1107, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.117958628587789, | |
| "grad_norm": 1.763493299484253, | |
| "learning_rate": 9.997984241148967e-05, | |
| "loss": 4.1142, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.11910385799155393, | |
| "grad_norm": 2.069258213043213, | |
| "learning_rate": 9.997706535607649e-05, | |
| "loss": 4.047, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.12024908739531888, | |
| "grad_norm": 2.4262139797210693, | |
| "learning_rate": 9.997410919230505e-05, | |
| "loss": 4.0396, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12139431679908382, | |
| "grad_norm": 1.820494532585144, | |
| "learning_rate": 9.997097393076971e-05, | |
| "loss": 4.1548, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12253954620284876, | |
| "grad_norm": 2.1332643032073975, | |
| "learning_rate": 9.996765958270664e-05, | |
| "loss": 4.1384, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1236847756066137, | |
| "grad_norm": 2.1329920291900635, | |
| "learning_rate": 9.996416615999384e-05, | |
| "loss": 4.0315, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.12483000501037864, | |
| "grad_norm": 2.29955792427063, | |
| "learning_rate": 9.996049367515108e-05, | |
| "loss": 4.0963, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.12597523441414357, | |
| "grad_norm": 2.225827693939209, | |
| "learning_rate": 9.995664214133983e-05, | |
| "loss": 4.1247, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12712046381790854, | |
| "grad_norm": 1.794838786125183, | |
| "learning_rate": 9.99526115723633e-05, | |
| "loss": 4.0449, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.12826569322167347, | |
| "grad_norm": 1.7548491954803467, | |
| "learning_rate": 9.994840198266626e-05, | |
| "loss": 3.927, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1294109226254384, | |
| "grad_norm": 1.487001895904541, | |
| "learning_rate": 9.994401338733508e-05, | |
| "loss": 3.9714, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.13055615202920334, | |
| "grad_norm": 1.9811242818832397, | |
| "learning_rate": 9.993944580209768e-05, | |
| "loss": 4.0094, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13170138143296828, | |
| "grad_norm": 1.4257248640060425, | |
| "learning_rate": 9.99346992433234e-05, | |
| "loss": 4.0213, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13284661083673324, | |
| "grad_norm": 1.545812726020813, | |
| "learning_rate": 9.992977372802302e-05, | |
| "loss": 4.0076, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.13399184024049818, | |
| "grad_norm": 1.8193179368972778, | |
| "learning_rate": 9.992466927384865e-05, | |
| "loss": 4.0536, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1351370696442631, | |
| "grad_norm": 2.329951763153076, | |
| "learning_rate": 9.991938589909369e-05, | |
| "loss": 3.9284, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13628229904802805, | |
| "grad_norm": 1.928336501121521, | |
| "learning_rate": 9.991392362269276e-05, | |
| "loss": 3.9462, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.137427528451793, | |
| "grad_norm": 1.4073456525802612, | |
| "learning_rate": 9.990828246422164e-05, | |
| "loss": 3.9525, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13857275785555795, | |
| "grad_norm": 1.6663973331451416, | |
| "learning_rate": 9.990246244389713e-05, | |
| "loss": 3.9685, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.13971798725932288, | |
| "grad_norm": 1.8091737031936646, | |
| "learning_rate": 9.989646358257715e-05, | |
| "loss": 3.9284, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.14086321666308782, | |
| "grad_norm": 1.5511283874511719, | |
| "learning_rate": 9.989028590176044e-05, | |
| "loss": 3.9289, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14200844606685276, | |
| "grad_norm": 1.5394625663757324, | |
| "learning_rate": 9.988392942358664e-05, | |
| "loss": 3.9849, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.14315367547061772, | |
| "grad_norm": 1.680882453918457, | |
| "learning_rate": 9.98773941708362e-05, | |
| "loss": 3.9452, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14429890487438266, | |
| "grad_norm": 1.6341670751571655, | |
| "learning_rate": 9.98706801669302e-05, | |
| "loss": 3.8317, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1454441342781476, | |
| "grad_norm": 1.9933757781982422, | |
| "learning_rate": 9.986378743593036e-05, | |
| "loss": 3.9665, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14658936368191253, | |
| "grad_norm": 2.2253994941711426, | |
| "learning_rate": 9.985671600253894e-05, | |
| "loss": 3.9239, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.14773459308567746, | |
| "grad_norm": 2.2543365955352783, | |
| "learning_rate": 9.984946589209862e-05, | |
| "loss": 3.8639, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.14887982248944243, | |
| "grad_norm": 1.8106629848480225, | |
| "learning_rate": 9.984203713059241e-05, | |
| "loss": 3.9178, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15002505189320736, | |
| "grad_norm": 1.638542652130127, | |
| "learning_rate": 9.983442974464362e-05, | |
| "loss": 3.9169, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1511702812969723, | |
| "grad_norm": 1.3521384000778198, | |
| "learning_rate": 9.982664376151564e-05, | |
| "loss": 3.8682, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.15231551070073723, | |
| "grad_norm": 1.6458699703216553, | |
| "learning_rate": 9.981867920911201e-05, | |
| "loss": 3.9566, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.1534607401045022, | |
| "grad_norm": 1.7851066589355469, | |
| "learning_rate": 9.981053611597615e-05, | |
| "loss": 3.9085, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15460596950826713, | |
| "grad_norm": 1.6740517616271973, | |
| "learning_rate": 9.980221451129137e-05, | |
| "loss": 3.8899, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15575119891203207, | |
| "grad_norm": 1.117129921913147, | |
| "learning_rate": 9.979371442488073e-05, | |
| "loss": 3.7544, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.156896428315797, | |
| "grad_norm": 1.5676058530807495, | |
| "learning_rate": 9.978503588720694e-05, | |
| "loss": 3.7753, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.15804165771956194, | |
| "grad_norm": 1.6609163284301758, | |
| "learning_rate": 9.977617892937223e-05, | |
| "loss": 3.8463, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1591868871233269, | |
| "grad_norm": 1.7229987382888794, | |
| "learning_rate": 9.976714358311828e-05, | |
| "loss": 3.8446, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.16033211652709184, | |
| "grad_norm": 1.6770962476730347, | |
| "learning_rate": 9.975792988082603e-05, | |
| "loss": 3.8684, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16147734593085677, | |
| "grad_norm": 1.215281367301941, | |
| "learning_rate": 9.974853785551568e-05, | |
| "loss": 3.7788, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1626225753346217, | |
| "grad_norm": 1.208257794380188, | |
| "learning_rate": 9.973896754084646e-05, | |
| "loss": 3.8338, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16376780473838665, | |
| "grad_norm": 1.4068255424499512, | |
| "learning_rate": 9.972921897111658e-05, | |
| "loss": 3.8583, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1649130341421516, | |
| "grad_norm": 1.4898021221160889, | |
| "learning_rate": 9.971929218126306e-05, | |
| "loss": 3.8051, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16605826354591655, | |
| "grad_norm": 1.6303211450576782, | |
| "learning_rate": 9.970918720686164e-05, | |
| "loss": 3.8598, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16720349294968148, | |
| "grad_norm": 1.6599496603012085, | |
| "learning_rate": 9.969890408412665e-05, | |
| "loss": 3.7214, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.16834872235344642, | |
| "grad_norm": 1.1958950757980347, | |
| "learning_rate": 9.968844284991086e-05, | |
| "loss": 3.7042, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.16949395175721135, | |
| "grad_norm": 1.3099420070648193, | |
| "learning_rate": 9.967780354170533e-05, | |
| "loss": 3.7405, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.17063918116097632, | |
| "grad_norm": 1.5054072141647339, | |
| "learning_rate": 9.966698619763936e-05, | |
| "loss": 3.7827, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.17178441056474125, | |
| "grad_norm": 1.444757103919983, | |
| "learning_rate": 9.965599085648025e-05, | |
| "loss": 3.7361, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1729296399685062, | |
| "grad_norm": 0.9423370361328125, | |
| "learning_rate": 9.964481755763322e-05, | |
| "loss": 3.7063, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.17407486937227112, | |
| "grad_norm": 1.044169306755066, | |
| "learning_rate": 9.963346634114128e-05, | |
| "loss": 3.7999, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1752200987760361, | |
| "grad_norm": 1.578296184539795, | |
| "learning_rate": 9.962193724768503e-05, | |
| "loss": 3.7448, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17636532817980102, | |
| "grad_norm": 1.4953491687774658, | |
| "learning_rate": 9.961023031858258e-05, | |
| "loss": 3.7625, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.17751055758356596, | |
| "grad_norm": 1.295817494392395, | |
| "learning_rate": 9.959834559578934e-05, | |
| "loss": 3.7042, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1786557869873309, | |
| "grad_norm": 1.4001609086990356, | |
| "learning_rate": 9.95862831218979e-05, | |
| "loss": 3.7272, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.17980101639109583, | |
| "grad_norm": 1.8881722688674927, | |
| "learning_rate": 9.95740429401379e-05, | |
| "loss": 3.6904, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.1809462457948608, | |
| "grad_norm": 1.919791340827942, | |
| "learning_rate": 9.956162509437584e-05, | |
| "loss": 3.7071, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.18209147519862573, | |
| "grad_norm": 1.758253574371338, | |
| "learning_rate": 9.954902962911494e-05, | |
| "loss": 3.7906, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18323670460239067, | |
| "grad_norm": 1.480323314666748, | |
| "learning_rate": 9.953625658949494e-05, | |
| "loss": 3.7697, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1843819340061556, | |
| "grad_norm": 1.5573948621749878, | |
| "learning_rate": 9.952330602129202e-05, | |
| "loss": 3.752, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.18552716340992054, | |
| "grad_norm": 1.3204878568649292, | |
| "learning_rate": 9.951017797091858e-05, | |
| "loss": 3.6479, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1866723928136855, | |
| "grad_norm": 1.5514147281646729, | |
| "learning_rate": 9.949687248542303e-05, | |
| "loss": 3.7199, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.18781762221745044, | |
| "grad_norm": 1.2910770177841187, | |
| "learning_rate": 9.948338961248977e-05, | |
| "loss": 3.7427, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.18896285162121537, | |
| "grad_norm": 1.1663178205490112, | |
| "learning_rate": 9.946972940043882e-05, | |
| "loss": 3.6616, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1901080810249803, | |
| "grad_norm": 1.3439650535583496, | |
| "learning_rate": 9.945589189822584e-05, | |
| "loss": 3.7385, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.19125331042874527, | |
| "grad_norm": 1.1256877183914185, | |
| "learning_rate": 9.94418771554418e-05, | |
| "loss": 3.6056, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1923985398325102, | |
| "grad_norm": 1.1813896894454956, | |
| "learning_rate": 9.942768522231289e-05, | |
| "loss": 3.5544, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19354376923627514, | |
| "grad_norm": 1.2541157007217407, | |
| "learning_rate": 9.941331614970031e-05, | |
| "loss": 3.6401, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.19468899864004008, | |
| "grad_norm": 1.237069010734558, | |
| "learning_rate": 9.939876998910012e-05, | |
| "loss": 3.7564, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19583422804380501, | |
| "grad_norm": 1.1157530546188354, | |
| "learning_rate": 9.938404679264301e-05, | |
| "loss": 3.6164, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.19697945744756998, | |
| "grad_norm": 1.149465560913086, | |
| "learning_rate": 9.936914661309412e-05, | |
| "loss": 3.6968, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1981246868513349, | |
| "grad_norm": 0.9530683755874634, | |
| "learning_rate": 9.93540695038529e-05, | |
| "loss": 3.6194, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.19926991625509985, | |
| "grad_norm": 1.1686296463012695, | |
| "learning_rate": 9.933881551895281e-05, | |
| "loss": 3.7604, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.20041514565886479, | |
| "grad_norm": 1.2699095010757446, | |
| "learning_rate": 9.93233847130613e-05, | |
| "loss": 3.6371, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20156037506262972, | |
| "grad_norm": 1.1345208883285522, | |
| "learning_rate": 9.930777714147945e-05, | |
| "loss": 3.6146, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.20270560446639468, | |
| "grad_norm": 1.3319895267486572, | |
| "learning_rate": 9.929199286014185e-05, | |
| "loss": 3.6443, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.20385083387015962, | |
| "grad_norm": 1.6053088903427124, | |
| "learning_rate": 9.927603192561637e-05, | |
| "loss": 3.6277, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20499606327392456, | |
| "grad_norm": 1.2149386405944824, | |
| "learning_rate": 9.925989439510398e-05, | |
| "loss": 3.5555, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2061412926776895, | |
| "grad_norm": 1.0859287977218628, | |
| "learning_rate": 9.924358032643855e-05, | |
| "loss": 3.6253, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20728652208145446, | |
| "grad_norm": 0.9613994359970093, | |
| "learning_rate": 9.922708977808663e-05, | |
| "loss": 3.5826, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2084317514852194, | |
| "grad_norm": 1.0509222745895386, | |
| "learning_rate": 9.921042280914721e-05, | |
| "loss": 3.6263, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.20957698088898433, | |
| "grad_norm": 1.3777049779891968, | |
| "learning_rate": 9.919357947935156e-05, | |
| "loss": 3.6187, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.21072221029274926, | |
| "grad_norm": 1.3364644050598145, | |
| "learning_rate": 9.9176559849063e-05, | |
| "loss": 3.5946, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2118674396965142, | |
| "grad_norm": 1.4562104940414429, | |
| "learning_rate": 9.915936397927665e-05, | |
| "loss": 3.6099, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21301266910027916, | |
| "grad_norm": 1.066383719444275, | |
| "learning_rate": 9.91419919316193e-05, | |
| "loss": 3.5395, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.2141578985040441, | |
| "grad_norm": 1.6498733758926392, | |
| "learning_rate": 9.912444376834903e-05, | |
| "loss": 3.6083, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.21530312790780903, | |
| "grad_norm": 0.9828553795814514, | |
| "learning_rate": 9.910671955235518e-05, | |
| "loss": 3.5409, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.21644835731157397, | |
| "grad_norm": 1.178269624710083, | |
| "learning_rate": 9.908881934715798e-05, | |
| "loss": 3.6018, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.2175935867153389, | |
| "grad_norm": 1.3328818082809448, | |
| "learning_rate": 9.907074321690838e-05, | |
| "loss": 3.5718, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21873881611910387, | |
| "grad_norm": 1.1077896356582642, | |
| "learning_rate": 9.905249122638783e-05, | |
| "loss": 3.581, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.2198840455228688, | |
| "grad_norm": 1.220638394355774, | |
| "learning_rate": 9.903406344100798e-05, | |
| "loss": 3.5813, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.22102927492663374, | |
| "grad_norm": 1.5574766397476196, | |
| "learning_rate": 9.901545992681057e-05, | |
| "loss": 3.5785, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.22217450433039868, | |
| "grad_norm": 1.013902187347412, | |
| "learning_rate": 9.899668075046706e-05, | |
| "loss": 3.6156, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2233197337341636, | |
| "grad_norm": 1.197936773300171, | |
| "learning_rate": 9.897772597927848e-05, | |
| "loss": 3.5428, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22446496313792857, | |
| "grad_norm": 0.9838180541992188, | |
| "learning_rate": 9.895859568117512e-05, | |
| "loss": 3.534, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2256101925416935, | |
| "grad_norm": 1.0316840410232544, | |
| "learning_rate": 9.893928992471639e-05, | |
| "loss": 3.5691, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.22675542194545845, | |
| "grad_norm": 0.9378739595413208, | |
| "learning_rate": 9.891980877909045e-05, | |
| "loss": 3.5368, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.22790065134922338, | |
| "grad_norm": 1.4947346448898315, | |
| "learning_rate": 9.890015231411404e-05, | |
| "loss": 3.5709, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.22904588075298835, | |
| "grad_norm": 0.9118148684501648, | |
| "learning_rate": 9.888032060023225e-05, | |
| "loss": 3.527, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23019111015675328, | |
| "grad_norm": 1.2407753467559814, | |
| "learning_rate": 9.886031370851816e-05, | |
| "loss": 3.5301, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.23133633956051822, | |
| "grad_norm": 1.7163093090057373, | |
| "learning_rate": 9.88401317106727e-05, | |
| "loss": 3.5828, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23248156896428315, | |
| "grad_norm": 1.0757009983062744, | |
| "learning_rate": 9.881977467902434e-05, | |
| "loss": 3.4831, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2336267983680481, | |
| "grad_norm": 0.9473862648010254, | |
| "learning_rate": 9.879924268652885e-05, | |
| "loss": 3.5196, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23477202777181305, | |
| "grad_norm": 1.199771761894226, | |
| "learning_rate": 9.877853580676897e-05, | |
| "loss": 3.574, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.235917257175578, | |
| "grad_norm": 0.9006698131561279, | |
| "learning_rate": 9.875765411395428e-05, | |
| "loss": 3.5348, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.23706248657934292, | |
| "grad_norm": 1.1242282390594482, | |
| "learning_rate": 9.873659768292081e-05, | |
| "loss": 3.5249, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.23820771598310786, | |
| "grad_norm": 1.0675747394561768, | |
| "learning_rate": 9.871536658913082e-05, | |
| "loss": 3.5086, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.2393529453868728, | |
| "grad_norm": 0.8544116616249084, | |
| "learning_rate": 9.869396090867255e-05, | |
| "loss": 3.546, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.24049817479063776, | |
| "grad_norm": 1.3136742115020752, | |
| "learning_rate": 9.867238071825992e-05, | |
| "loss": 3.4937, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2416434041944027, | |
| "grad_norm": 1.3740772008895874, | |
| "learning_rate": 9.865062609523223e-05, | |
| "loss": 3.4303, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.24278863359816763, | |
| "grad_norm": 1.342213749885559, | |
| "learning_rate": 9.862869711755397e-05, | |
| "loss": 3.4982, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24393386300193257, | |
| "grad_norm": 1.0677942037582397, | |
| "learning_rate": 9.860659386381443e-05, | |
| "loss": 3.4288, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.24507909240569753, | |
| "grad_norm": 0.9615838527679443, | |
| "learning_rate": 9.858431641322749e-05, | |
| "loss": 3.4787, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.24622432180946247, | |
| "grad_norm": 1.0572890043258667, | |
| "learning_rate": 9.856186484563134e-05, | |
| "loss": 3.5314, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2473695512132274, | |
| "grad_norm": 1.158275842666626, | |
| "learning_rate": 9.853923924148815e-05, | |
| "loss": 3.5504, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.24851478061699234, | |
| "grad_norm": 1.171581745147705, | |
| "learning_rate": 9.851643968188383e-05, | |
| "loss": 3.5478, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.24966001002075727, | |
| "grad_norm": 1.0333714485168457, | |
| "learning_rate": 9.849346624852764e-05, | |
| "loss": 3.5497, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2508052394245222, | |
| "grad_norm": 0.9459155797958374, | |
| "learning_rate": 9.847031902375207e-05, | |
| "loss": 3.5074, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.25195046882828714, | |
| "grad_norm": 1.0424790382385254, | |
| "learning_rate": 9.84469980905124e-05, | |
| "loss": 3.4961, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.25309569823205214, | |
| "grad_norm": 1.0463571548461914, | |
| "learning_rate": 9.842350353238642e-05, | |
| "loss": 3.4405, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.25424092763581707, | |
| "grad_norm": 1.000319242477417, | |
| "learning_rate": 9.839983543357421e-05, | |
| "loss": 3.4595, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.255386157039582, | |
| "grad_norm": 1.2526150941848755, | |
| "learning_rate": 9.837599387889773e-05, | |
| "loss": 3.5012, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.25653138644334694, | |
| "grad_norm": 1.3148843050003052, | |
| "learning_rate": 9.835197895380065e-05, | |
| "loss": 3.4767, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2576766158471119, | |
| "grad_norm": 1.3939634561538696, | |
| "learning_rate": 9.83277907443479e-05, | |
| "loss": 3.3783, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2588218452508768, | |
| "grad_norm": 1.0367929935455322, | |
| "learning_rate": 9.830342933722545e-05, | |
| "loss": 3.4289, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.25996707465464175, | |
| "grad_norm": 0.9439120888710022, | |
| "learning_rate": 9.827889481974e-05, | |
| "loss": 3.4728, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2611123040584067, | |
| "grad_norm": 1.2146074771881104, | |
| "learning_rate": 9.82541872798186e-05, | |
| "loss": 3.4257, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2622575334621716, | |
| "grad_norm": 1.0530729293823242, | |
| "learning_rate": 9.822930680600841e-05, | |
| "loss": 3.4681, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.26340276286593656, | |
| "grad_norm": 1.1026678085327148, | |
| "learning_rate": 9.820425348747637e-05, | |
| "loss": 3.4298, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.26454799226970155, | |
| "grad_norm": 1.2520779371261597, | |
| "learning_rate": 9.817902741400879e-05, | |
| "loss": 3.4191, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.2656932216734665, | |
| "grad_norm": 1.1041593551635742, | |
| "learning_rate": 9.815362867601121e-05, | |
| "loss": 3.466, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.2668384510772314, | |
| "grad_norm": 0.881693422794342, | |
| "learning_rate": 9.812805736450786e-05, | |
| "loss": 3.4929, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.26798368048099636, | |
| "grad_norm": 1.3125033378601074, | |
| "learning_rate": 9.810231357114152e-05, | |
| "loss": 3.4592, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.2691289098847613, | |
| "grad_norm": 1.2968268394470215, | |
| "learning_rate": 9.807639738817307e-05, | |
| "loss": 3.4851, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2702741392885262, | |
| "grad_norm": 0.9855544567108154, | |
| "learning_rate": 9.805030890848119e-05, | |
| "loss": 3.4487, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.27141936869229116, | |
| "grad_norm": 1.3063323497772217, | |
| "learning_rate": 9.802404822556209e-05, | |
| "loss": 3.4961, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.2725645980960561, | |
| "grad_norm": 1.0567957162857056, | |
| "learning_rate": 9.79976154335291e-05, | |
| "loss": 3.3975, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.27370982749982103, | |
| "grad_norm": 0.9473979473114014, | |
| "learning_rate": 9.797101062711231e-05, | |
| "loss": 3.4573, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.274855056903586, | |
| "grad_norm": 1.2931294441223145, | |
| "learning_rate": 9.794423390165837e-05, | |
| "loss": 3.3732, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.27600028630735096, | |
| "grad_norm": 1.233302116394043, | |
| "learning_rate": 9.791728535312998e-05, | |
| "loss": 3.419, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2771455157111159, | |
| "grad_norm": 0.9638918042182922, | |
| "learning_rate": 9.789016507810564e-05, | |
| "loss": 3.4119, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.27829074511488083, | |
| "grad_norm": 1.105643391609192, | |
| "learning_rate": 9.786287317377929e-05, | |
| "loss": 3.3909, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.27943597451864577, | |
| "grad_norm": 0.9666796922683716, | |
| "learning_rate": 9.783540973795998e-05, | |
| "loss": 3.4194, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.2805812039224107, | |
| "grad_norm": 1.3533586263656616, | |
| "learning_rate": 9.780777486907146e-05, | |
| "loss": 3.3789, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.28172643332617564, | |
| "grad_norm": 1.1253416538238525, | |
| "learning_rate": 9.777996866615186e-05, | |
| "loss": 3.4385, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.2828716627299406, | |
| "grad_norm": 0.7198868989944458, | |
| "learning_rate": 9.775199122885339e-05, | |
| "loss": 3.4038, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2840168921337055, | |
| "grad_norm": 0.9696770310401917, | |
| "learning_rate": 9.772384265744188e-05, | |
| "loss": 3.4576, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.28516212153747045, | |
| "grad_norm": 1.321269154548645, | |
| "learning_rate": 9.76955230527965e-05, | |
| "loss": 3.4348, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.28630735094123544, | |
| "grad_norm": 1.3119802474975586, | |
| "learning_rate": 9.766703251640934e-05, | |
| "loss": 3.3848, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2874525803450004, | |
| "grad_norm": 1.0199967622756958, | |
| "learning_rate": 9.763837115038513e-05, | |
| "loss": 3.4108, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2885978097487653, | |
| "grad_norm": 0.9925194382667542, | |
| "learning_rate": 9.760953905744075e-05, | |
| "loss": 3.31, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.28974303915253025, | |
| "grad_norm": 0.9447107315063477, | |
| "learning_rate": 9.758053634090502e-05, | |
| "loss": 3.3598, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.2908882685562952, | |
| "grad_norm": 1.052873134613037, | |
| "learning_rate": 9.755136310471817e-05, | |
| "loss": 3.3704, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.2920334979600601, | |
| "grad_norm": 1.061514139175415, | |
| "learning_rate": 9.752201945343156e-05, | |
| "loss": 3.3642, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.29317872736382505, | |
| "grad_norm": 0.8627074956893921, | |
| "learning_rate": 9.74925054922073e-05, | |
| "loss": 3.367, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.29432395676759, | |
| "grad_norm": 1.0214530229568481, | |
| "learning_rate": 9.746282132681785e-05, | |
| "loss": 3.3266, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.2954691861713549, | |
| "grad_norm": 1.1223275661468506, | |
| "learning_rate": 9.743296706364565e-05, | |
| "loss": 3.4194, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2966144155751199, | |
| "grad_norm": 0.9849138259887695, | |
| "learning_rate": 9.740294280968273e-05, | |
| "loss": 3.3664, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.29775964497888485, | |
| "grad_norm": 0.7025099396705627, | |
| "learning_rate": 9.737274867253034e-05, | |
| "loss": 3.3772, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2989048743826498, | |
| "grad_norm": 0.936536967754364, | |
| "learning_rate": 9.734238476039858e-05, | |
| "loss": 3.3196, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3000501037864147, | |
| "grad_norm": 1.113277792930603, | |
| "learning_rate": 9.731185118210598e-05, | |
| "loss": 3.4606, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.30119533319017966, | |
| "grad_norm": 1.0153186321258545, | |
| "learning_rate": 9.728114804707909e-05, | |
| "loss": 3.4079, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3023405625939446, | |
| "grad_norm": 1.1675206422805786, | |
| "learning_rate": 9.725027546535215e-05, | |
| "loss": 3.4111, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.30348579199770953, | |
| "grad_norm": 0.9518959522247314, | |
| "learning_rate": 9.721923354756665e-05, | |
| "loss": 3.3905, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.30463102140147447, | |
| "grad_norm": 0.9693425297737122, | |
| "learning_rate": 9.718802240497098e-05, | |
| "loss": 3.4364, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3057762508052394, | |
| "grad_norm": 1.1249076128005981, | |
| "learning_rate": 9.715664214941997e-05, | |
| "loss": 3.3373, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.3069214802090044, | |
| "grad_norm": 0.8406875133514404, | |
| "learning_rate": 9.712509289337453e-05, | |
| "loss": 3.321, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.30806670961276933, | |
| "grad_norm": 0.9538395404815674, | |
| "learning_rate": 9.709337474990121e-05, | |
| "loss": 3.4007, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.30921193901653427, | |
| "grad_norm": 0.8003599047660828, | |
| "learning_rate": 9.706148783267187e-05, | |
| "loss": 3.3798, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3103571684202992, | |
| "grad_norm": 0.8605026602745056, | |
| "learning_rate": 9.702943225596316e-05, | |
| "loss": 3.2908, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.31150239782406414, | |
| "grad_norm": 0.7349815964698792, | |
| "learning_rate": 9.699720813465625e-05, | |
| "loss": 3.408, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3126476272278291, | |
| "grad_norm": 1.1622780561447144, | |
| "learning_rate": 9.696481558423628e-05, | |
| "loss": 3.3212, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.313792856631594, | |
| "grad_norm": 0.9829496145248413, | |
| "learning_rate": 9.693225472079204e-05, | |
| "loss": 3.4067, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.31493808603535894, | |
| "grad_norm": 1.1378313302993774, | |
| "learning_rate": 9.689952566101548e-05, | |
| "loss": 3.3556, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3160833154391239, | |
| "grad_norm": 0.9355561137199402, | |
| "learning_rate": 9.686662852220142e-05, | |
| "loss": 3.3281, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3172285448428888, | |
| "grad_norm": 0.9328277111053467, | |
| "learning_rate": 9.683356342224694e-05, | |
| "loss": 3.313, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3183737742466538, | |
| "grad_norm": 1.277377724647522, | |
| "learning_rate": 9.680033047965114e-05, | |
| "loss": 3.3499, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.31951900365041874, | |
| "grad_norm": 1.0239235162734985, | |
| "learning_rate": 9.67669298135146e-05, | |
| "loss": 3.3936, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.3206642330541837, | |
| "grad_norm": 0.6908963322639465, | |
| "learning_rate": 9.673336154353899e-05, | |
| "loss": 3.3584, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3218094624579486, | |
| "grad_norm": 0.8835290670394897, | |
| "learning_rate": 9.669962579002664e-05, | |
| "loss": 3.3728, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.32295469186171355, | |
| "grad_norm": 1.0561710596084595, | |
| "learning_rate": 9.666572267388013e-05, | |
| "loss": 3.3579, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3240999212654785, | |
| "grad_norm": 0.8400120735168457, | |
| "learning_rate": 9.663165231660181e-05, | |
| "loss": 3.3224, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3252451506692434, | |
| "grad_norm": 0.8960584998130798, | |
| "learning_rate": 9.659741484029341e-05, | |
| "loss": 3.3434, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.32639038007300836, | |
| "grad_norm": 0.9615944027900696, | |
| "learning_rate": 9.656301036765558e-05, | |
| "loss": 3.2587, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3275356094767733, | |
| "grad_norm": 0.983391523361206, | |
| "learning_rate": 9.652843902198743e-05, | |
| "loss": 3.2396, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.3286808388805383, | |
| "grad_norm": 0.7758197784423828, | |
| "learning_rate": 9.649370092718615e-05, | |
| "loss": 3.2948, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.3298260682843032, | |
| "grad_norm": 0.9714862704277039, | |
| "learning_rate": 9.64587962077465e-05, | |
| "loss": 3.3381, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.33097129768806816, | |
| "grad_norm": 0.8628116846084595, | |
| "learning_rate": 9.64237249887604e-05, | |
| "loss": 3.294, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3321165270918331, | |
| "grad_norm": 0.9794777035713196, | |
| "learning_rate": 9.638848739591646e-05, | |
| "loss": 3.3119, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.333261756495598, | |
| "grad_norm": 0.8179820775985718, | |
| "learning_rate": 9.635308355549957e-05, | |
| "loss": 3.3009, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.33440698589936296, | |
| "grad_norm": 0.8732323050498962, | |
| "learning_rate": 9.63175135943904e-05, | |
| "loss": 3.3207, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.3355522153031279, | |
| "grad_norm": 1.0355788469314575, | |
| "learning_rate": 9.628177764006497e-05, | |
| "loss": 3.2889, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.33669744470689283, | |
| "grad_norm": 0.8974720239639282, | |
| "learning_rate": 9.624587582059417e-05, | |
| "loss": 3.3089, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.33784267411065777, | |
| "grad_norm": 0.7800531387329102, | |
| "learning_rate": 9.620980826464335e-05, | |
| "loss": 3.2999, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3389879035144227, | |
| "grad_norm": 0.7294676899909973, | |
| "learning_rate": 9.617357510147182e-05, | |
| "loss": 3.3634, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.3401331329181877, | |
| "grad_norm": 0.7799131274223328, | |
| "learning_rate": 9.613717646093239e-05, | |
| "loss": 3.308, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.34127836232195263, | |
| "grad_norm": 0.9899328947067261, | |
| "learning_rate": 9.610061247347091e-05, | |
| "loss": 3.3191, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.34242359172571757, | |
| "grad_norm": 1.0520347356796265, | |
| "learning_rate": 9.606388327012579e-05, | |
| "loss": 3.389, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.3435688211294825, | |
| "grad_norm": 0.9768466353416443, | |
| "learning_rate": 9.602698898252756e-05, | |
| "loss": 3.2905, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.34471405053324744, | |
| "grad_norm": 0.9359555244445801, | |
| "learning_rate": 9.598992974289837e-05, | |
| "loss": 3.3022, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.3458592799370124, | |
| "grad_norm": 0.7487738728523254, | |
| "learning_rate": 9.595270568405156e-05, | |
| "loss": 3.2234, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3470045093407773, | |
| "grad_norm": 0.8295655846595764, | |
| "learning_rate": 9.591531693939109e-05, | |
| "loss": 3.3506, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.34814973874454225, | |
| "grad_norm": 0.9020605683326721, | |
| "learning_rate": 9.587776364291117e-05, | |
| "loss": 3.3026, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.3492949681483072, | |
| "grad_norm": 0.7868961095809937, | |
| "learning_rate": 9.58400459291957e-05, | |
| "loss": 3.2393, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3504401975520722, | |
| "grad_norm": 0.9779835939407349, | |
| "learning_rate": 9.580216393341785e-05, | |
| "loss": 3.3254, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3515854269558371, | |
| "grad_norm": 0.8962246179580688, | |
| "learning_rate": 9.576411779133956e-05, | |
| "loss": 3.2486, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.35273065635960205, | |
| "grad_norm": 0.9166551828384399, | |
| "learning_rate": 9.572590763931097e-05, | |
| "loss": 3.2193, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.353875885763367, | |
| "grad_norm": 0.7779364585876465, | |
| "learning_rate": 9.568753361427009e-05, | |
| "loss": 3.2469, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.3550211151671319, | |
| "grad_norm": 0.750092089176178, | |
| "learning_rate": 9.564899585374214e-05, | |
| "loss": 3.2532, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.35616634457089685, | |
| "grad_norm": 1.0269392728805542, | |
| "learning_rate": 9.561029449583919e-05, | |
| "loss": 3.3331, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.3573115739746618, | |
| "grad_norm": 0.7937965989112854, | |
| "learning_rate": 9.557142967925956e-05, | |
| "loss": 3.314, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.3584568033784267, | |
| "grad_norm": 1.1338940858840942, | |
| "learning_rate": 9.553240154328744e-05, | |
| "loss": 3.3375, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.35960203278219166, | |
| "grad_norm": 0.7937076091766357, | |
| "learning_rate": 9.549321022779229e-05, | |
| "loss": 3.2691, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.36074726218595665, | |
| "grad_norm": 0.8552340865135193, | |
| "learning_rate": 9.545385587322839e-05, | |
| "loss": 3.3107, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3618924915897216, | |
| "grad_norm": 1.0279617309570312, | |
| "learning_rate": 9.541433862063429e-05, | |
| "loss": 3.2552, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.3630377209934865, | |
| "grad_norm": 0.9652466177940369, | |
| "learning_rate": 9.537465861163237e-05, | |
| "loss": 3.242, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.36418295039725146, | |
| "grad_norm": 0.9129723310470581, | |
| "learning_rate": 9.533481598842827e-05, | |
| "loss": 3.3131, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3653281798010164, | |
| "grad_norm": 0.9316424131393433, | |
| "learning_rate": 9.529481089381042e-05, | |
| "loss": 3.3288, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.36647340920478133, | |
| "grad_norm": 0.7987300753593445, | |
| "learning_rate": 9.525464347114953e-05, | |
| "loss": 3.2832, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.36761863860854627, | |
| "grad_norm": 0.7103368043899536, | |
| "learning_rate": 9.521431386439807e-05, | |
| "loss": 3.2339, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.3687638680123112, | |
| "grad_norm": 0.7420955896377563, | |
| "learning_rate": 9.517382221808969e-05, | |
| "loss": 3.1662, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.36990909741607614, | |
| "grad_norm": 0.8201749324798584, | |
| "learning_rate": 9.513316867733883e-05, | |
| "loss": 3.2837, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.3710543268198411, | |
| "grad_norm": 0.8581364154815674, | |
| "learning_rate": 9.509235338784009e-05, | |
| "loss": 3.2949, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.37219955622360607, | |
| "grad_norm": 0.956118643283844, | |
| "learning_rate": 9.505137649586775e-05, | |
| "loss": 3.316, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.373344785627371, | |
| "grad_norm": 0.708759069442749, | |
| "learning_rate": 9.501023814827524e-05, | |
| "loss": 3.1951, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.37449001503113594, | |
| "grad_norm": 0.8143038153648376, | |
| "learning_rate": 9.496893849249464e-05, | |
| "loss": 3.2738, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.3756352444349009, | |
| "grad_norm": 0.6578754782676697, | |
| "learning_rate": 9.492747767653611e-05, | |
| "loss": 3.2809, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.3767804738386658, | |
| "grad_norm": 0.8550508618354797, | |
| "learning_rate": 9.488585584898738e-05, | |
| "loss": 3.2668, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.37792570324243074, | |
| "grad_norm": 0.795080304145813, | |
| "learning_rate": 9.48440731590132e-05, | |
| "loss": 3.28, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3790709326461957, | |
| "grad_norm": 0.9933105707168579, | |
| "learning_rate": 9.480212975635486e-05, | |
| "loss": 3.3104, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.3802161620499606, | |
| "grad_norm": 1.224338412284851, | |
| "learning_rate": 9.476002579132957e-05, | |
| "loss": 3.29, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.38136139145372555, | |
| "grad_norm": 0.8564585447311401, | |
| "learning_rate": 9.471776141483e-05, | |
| "loss": 3.2, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.38250662085749054, | |
| "grad_norm": 1.160684585571289, | |
| "learning_rate": 9.467533677832365e-05, | |
| "loss": 3.2226, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.3836518502612555, | |
| "grad_norm": 0.8671857714653015, | |
| "learning_rate": 9.463275203385244e-05, | |
| "loss": 3.2453, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3847970796650204, | |
| "grad_norm": 1.0225045680999756, | |
| "learning_rate": 9.459000733403205e-05, | |
| "loss": 3.2283, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.38594230906878535, | |
| "grad_norm": 0.8350477814674377, | |
| "learning_rate": 9.454710283205139e-05, | |
| "loss": 3.2584, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3870875384725503, | |
| "grad_norm": 0.8098021745681763, | |
| "learning_rate": 9.450403868167208e-05, | |
| "loss": 3.2836, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.3882327678763152, | |
| "grad_norm": 0.8174638748168945, | |
| "learning_rate": 9.446081503722792e-05, | |
| "loss": 3.1896, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.38937799728008016, | |
| "grad_norm": 0.6904940009117126, | |
| "learning_rate": 9.441743205362426e-05, | |
| "loss": 3.2464, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3905232266838451, | |
| "grad_norm": 0.692864716053009, | |
| "learning_rate": 9.437388988633752e-05, | |
| "loss": 3.2277, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.39166845608761003, | |
| "grad_norm": 0.7014842629432678, | |
| "learning_rate": 9.433018869141464e-05, | |
| "loss": 3.2372, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.39281368549137496, | |
| "grad_norm": 0.6166806817054749, | |
| "learning_rate": 9.428632862547237e-05, | |
| "loss": 3.2501, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.39395891489513996, | |
| "grad_norm": 0.7060846090316772, | |
| "learning_rate": 9.424230984569696e-05, | |
| "loss": 3.2881, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3951041442989049, | |
| "grad_norm": 0.7771391272544861, | |
| "learning_rate": 9.419813250984337e-05, | |
| "loss": 3.2149, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3962493737026698, | |
| "grad_norm": 0.6290923953056335, | |
| "learning_rate": 9.415379677623485e-05, | |
| "loss": 3.1555, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.39739460310643476, | |
| "grad_norm": 0.7270971536636353, | |
| "learning_rate": 9.410930280376225e-05, | |
| "loss": 3.2554, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.3985398325101997, | |
| "grad_norm": 0.681962788105011, | |
| "learning_rate": 9.40646507518836e-05, | |
| "loss": 3.1671, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.39968506191396463, | |
| "grad_norm": 0.5727997422218323, | |
| "learning_rate": 9.40198407806234e-05, | |
| "loss": 3.237, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.40083029131772957, | |
| "grad_norm": 0.7687988877296448, | |
| "learning_rate": 9.39748730505721e-05, | |
| "loss": 3.2357, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4019755207214945, | |
| "grad_norm": 0.7813317179679871, | |
| "learning_rate": 9.392974772288558e-05, | |
| "loss": 3.2101, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.40312075012525944, | |
| "grad_norm": 0.8766132593154907, | |
| "learning_rate": 9.388446495928446e-05, | |
| "loss": 3.2852, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.40426597952902443, | |
| "grad_norm": 0.7857736349105835, | |
| "learning_rate": 9.383902492205363e-05, | |
| "loss": 3.2113, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.40541120893278937, | |
| "grad_norm": 0.9073331356048584, | |
| "learning_rate": 9.379342777404159e-05, | |
| "loss": 3.2478, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.4065564383365543, | |
| "grad_norm": 0.8033682107925415, | |
| "learning_rate": 9.374767367865989e-05, | |
| "loss": 3.3159, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.40770166774031924, | |
| "grad_norm": 0.7821508646011353, | |
| "learning_rate": 9.370176279988256e-05, | |
| "loss": 3.2362, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4088468971440842, | |
| "grad_norm": 0.8257923126220703, | |
| "learning_rate": 9.365569530224554e-05, | |
| "loss": 3.1832, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4099921265478491, | |
| "grad_norm": 0.8349987864494324, | |
| "learning_rate": 9.360947135084603e-05, | |
| "loss": 3.1995, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.41113735595161405, | |
| "grad_norm": 0.8590210676193237, | |
| "learning_rate": 9.356309111134191e-05, | |
| "loss": 3.2119, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.412282585355379, | |
| "grad_norm": 0.8512969017028809, | |
| "learning_rate": 9.351655474995122e-05, | |
| "loss": 3.2323, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4134278147591439, | |
| "grad_norm": 0.6388457417488098, | |
| "learning_rate": 9.346986243345149e-05, | |
| "loss": 3.1677, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4145730441629089, | |
| "grad_norm": 0.8811210989952087, | |
| "learning_rate": 9.342301432917912e-05, | |
| "loss": 3.2307, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.41571827356667385, | |
| "grad_norm": 0.9297654628753662, | |
| "learning_rate": 9.337601060502891e-05, | |
| "loss": 3.1838, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.4168635029704388, | |
| "grad_norm": 0.750491201877594, | |
| "learning_rate": 9.332885142945329e-05, | |
| "loss": 3.23, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.4180087323742037, | |
| "grad_norm": 0.8282638192176819, | |
| "learning_rate": 9.328153697146186e-05, | |
| "loss": 3.1789, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.41915396177796865, | |
| "grad_norm": 0.7395208477973938, | |
| "learning_rate": 9.323406740062068e-05, | |
| "loss": 3.2881, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4202991911817336, | |
| "grad_norm": 0.5959879755973816, | |
| "learning_rate": 9.318644288705172e-05, | |
| "loss": 3.1879, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4214444205854985, | |
| "grad_norm": 0.6063298583030701, | |
| "learning_rate": 9.313866360143227e-05, | |
| "loss": 3.273, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.42258964998926346, | |
| "grad_norm": 0.6868070960044861, | |
| "learning_rate": 9.309072971499422e-05, | |
| "loss": 3.2145, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.4237348793930284, | |
| "grad_norm": 0.6153081655502319, | |
| "learning_rate": 9.304264139952356e-05, | |
| "loss": 3.0791, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.42488010879679333, | |
| "grad_norm": 0.6345932483673096, | |
| "learning_rate": 9.299439882735977e-05, | |
| "loss": 3.1991, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.4260253382005583, | |
| "grad_norm": 0.7605310082435608, | |
| "learning_rate": 9.294600217139506e-05, | |
| "loss": 3.1272, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.42717056760432326, | |
| "grad_norm": 0.6695173382759094, | |
| "learning_rate": 9.289745160507395e-05, | |
| "loss": 3.1482, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.4283157970080882, | |
| "grad_norm": 0.8121134638786316, | |
| "learning_rate": 9.284874730239244e-05, | |
| "loss": 3.2122, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.42946102641185313, | |
| "grad_norm": 0.8771198391914368, | |
| "learning_rate": 9.279988943789759e-05, | |
| "loss": 3.1768, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.43060625581561807, | |
| "grad_norm": 0.7993550300598145, | |
| "learning_rate": 9.275087818668675e-05, | |
| "loss": 3.1944, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.431751485219383, | |
| "grad_norm": 0.6639721393585205, | |
| "learning_rate": 9.270171372440697e-05, | |
| "loss": 3.1418, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.43289671462314794, | |
| "grad_norm": 0.7494943737983704, | |
| "learning_rate": 9.265239622725438e-05, | |
| "loss": 3.1956, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.4340419440269129, | |
| "grad_norm": 0.7307000160217285, | |
| "learning_rate": 9.26029258719736e-05, | |
| "loss": 3.133, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.4351871734306778, | |
| "grad_norm": 0.7357375621795654, | |
| "learning_rate": 9.255330283585701e-05, | |
| "loss": 3.1898, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4363324028344428, | |
| "grad_norm": 0.6649693250656128, | |
| "learning_rate": 9.250352729674422e-05, | |
| "loss": 3.2147, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.43747763223820774, | |
| "grad_norm": 0.6873495578765869, | |
| "learning_rate": 9.245359943302133e-05, | |
| "loss": 3.2341, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.43862286164197267, | |
| "grad_norm": 0.7320956587791443, | |
| "learning_rate": 9.240351942362038e-05, | |
| "loss": 3.1241, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.4397680910457376, | |
| "grad_norm": 0.6137463450431824, | |
| "learning_rate": 9.235328744801868e-05, | |
| "loss": 3.1529, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.44091332044950254, | |
| "grad_norm": 0.8658304214477539, | |
| "learning_rate": 9.230290368623809e-05, | |
| "loss": 3.2168, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4420585498532675, | |
| "grad_norm": 0.7436694502830505, | |
| "learning_rate": 9.225236831884454e-05, | |
| "loss": 3.1798, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.4432037792570324, | |
| "grad_norm": 0.9040384888648987, | |
| "learning_rate": 9.220168152694722e-05, | |
| "loss": 3.2241, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.44434900866079735, | |
| "grad_norm": 0.7236924171447754, | |
| "learning_rate": 9.215084349219801e-05, | |
| "loss": 3.183, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.4454942380645623, | |
| "grad_norm": 0.8633347153663635, | |
| "learning_rate": 9.209985439679081e-05, | |
| "loss": 3.1776, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.4466394674683272, | |
| "grad_norm": 0.730910062789917, | |
| "learning_rate": 9.204871442346091e-05, | |
| "loss": 3.1633, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4477846968720922, | |
| "grad_norm": 0.809923529624939, | |
| "learning_rate": 9.199742375548432e-05, | |
| "loss": 3.1736, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.44892992627585715, | |
| "grad_norm": 0.7229586839675903, | |
| "learning_rate": 9.194598257667711e-05, | |
| "loss": 3.1813, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.4500751556796221, | |
| "grad_norm": 0.6999960541725159, | |
| "learning_rate": 9.189439107139472e-05, | |
| "loss": 3.1125, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.451220385083387, | |
| "grad_norm": 0.7234693169593811, | |
| "learning_rate": 9.184264942453138e-05, | |
| "loss": 3.137, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.45236561448715196, | |
| "grad_norm": 0.7283908724784851, | |
| "learning_rate": 9.179075782151936e-05, | |
| "loss": 3.1672, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4535108438909169, | |
| "grad_norm": 0.793543815612793, | |
| "learning_rate": 9.173871644832834e-05, | |
| "loss": 3.1925, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.45465607329468183, | |
| "grad_norm": 0.7263696789741516, | |
| "learning_rate": 9.168652549146481e-05, | |
| "loss": 3.1609, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.45580130269844676, | |
| "grad_norm": 0.7698031663894653, | |
| "learning_rate": 9.163418513797126e-05, | |
| "loss": 3.2547, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.4569465321022117, | |
| "grad_norm": 0.908698320388794, | |
| "learning_rate": 9.158169557542566e-05, | |
| "loss": 3.2165, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.4580917615059767, | |
| "grad_norm": 0.9588857293128967, | |
| "learning_rate": 9.152905699194065e-05, | |
| "loss": 3.1743, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4592369909097416, | |
| "grad_norm": 0.7442302107810974, | |
| "learning_rate": 9.1476269576163e-05, | |
| "loss": 3.1088, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.46038222031350656, | |
| "grad_norm": 0.7421006560325623, | |
| "learning_rate": 9.14233335172728e-05, | |
| "loss": 3.1497, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.4615274497172715, | |
| "grad_norm": 0.8878415822982788, | |
| "learning_rate": 9.13702490049829e-05, | |
| "loss": 3.1924, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.46267267912103643, | |
| "grad_norm": 0.6878317594528198, | |
| "learning_rate": 9.131701622953816e-05, | |
| "loss": 3.1366, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.46381790852480137, | |
| "grad_norm": 0.7945599555969238, | |
| "learning_rate": 9.126363538171478e-05, | |
| "loss": 3.1926, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4649631379285663, | |
| "grad_norm": 0.7997886538505554, | |
| "learning_rate": 9.121010665281964e-05, | |
| "loss": 3.1521, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.46610836733233124, | |
| "grad_norm": 0.715614378452301, | |
| "learning_rate": 9.115643023468958e-05, | |
| "loss": 3.1904, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.4672535967360962, | |
| "grad_norm": 0.7846017479896545, | |
| "learning_rate": 9.110260631969077e-05, | |
| "loss": 3.1338, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.46839882613986117, | |
| "grad_norm": 0.6939677596092224, | |
| "learning_rate": 9.10486351007179e-05, | |
| "loss": 3.1635, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.4695440555436261, | |
| "grad_norm": 0.7764283418655396, | |
| "learning_rate": 9.099451677119366e-05, | |
| "loss": 3.1922, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.47068928494739104, | |
| "grad_norm": 0.753666877746582, | |
| "learning_rate": 9.094025152506788e-05, | |
| "loss": 3.0827, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.471834514351156, | |
| "grad_norm": 0.6793937683105469, | |
| "learning_rate": 9.088583955681699e-05, | |
| "loss": 3.1235, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.4729797437549209, | |
| "grad_norm": 0.645055890083313, | |
| "learning_rate": 9.08312810614432e-05, | |
| "loss": 3.1758, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.47412497315868585, | |
| "grad_norm": 0.7241025567054749, | |
| "learning_rate": 9.077657623447379e-05, | |
| "loss": 3.1636, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.4752702025624508, | |
| "grad_norm": 0.762117862701416, | |
| "learning_rate": 9.07217252719606e-05, | |
| "loss": 3.1423, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4764154319662157, | |
| "grad_norm": 0.7575943470001221, | |
| "learning_rate": 9.066672837047907e-05, | |
| "loss": 3.1304, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.47756066136998065, | |
| "grad_norm": 0.8326764106750488, | |
| "learning_rate": 9.061158572712769e-05, | |
| "loss": 3.1807, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.4787058907737456, | |
| "grad_norm": 0.7815741300582886, | |
| "learning_rate": 9.055629753952731e-05, | |
| "loss": 3.2113, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.4798511201775106, | |
| "grad_norm": 0.7716583609580994, | |
| "learning_rate": 9.050086400582033e-05, | |
| "loss": 3.1791, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.4809963495812755, | |
| "grad_norm": 0.6160004734992981, | |
| "learning_rate": 9.044528532467006e-05, | |
| "loss": 3.1696, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.48214157898504045, | |
| "grad_norm": 0.8025004267692566, | |
| "learning_rate": 9.038956169525998e-05, | |
| "loss": 3.2002, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.4832868083888054, | |
| "grad_norm": 0.733741819858551, | |
| "learning_rate": 9.033369331729307e-05, | |
| "loss": 3.1661, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.4844320377925703, | |
| "grad_norm": 0.7210118770599365, | |
| "learning_rate": 9.027768039099103e-05, | |
| "loss": 3.1492, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.48557726719633526, | |
| "grad_norm": 0.6915583610534668, | |
| "learning_rate": 9.02215231170936e-05, | |
| "loss": 3.1892, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.4867224966001002, | |
| "grad_norm": 0.6812649965286255, | |
| "learning_rate": 9.016522169685783e-05, | |
| "loss": 3.1404, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.48786772600386513, | |
| "grad_norm": 0.7272056341171265, | |
| "learning_rate": 9.010877633205738e-05, | |
| "loss": 3.1935, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.48901295540763007, | |
| "grad_norm": 0.7162798643112183, | |
| "learning_rate": 9.005218722498177e-05, | |
| "loss": 3.1949, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.49015818481139506, | |
| "grad_norm": 0.6110600829124451, | |
| "learning_rate": 8.999545457843568e-05, | |
| "loss": 3.1217, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.49130341421516, | |
| "grad_norm": 0.657370924949646, | |
| "learning_rate": 8.993857859573818e-05, | |
| "loss": 3.1381, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.49244864361892493, | |
| "grad_norm": 0.8181600570678711, | |
| "learning_rate": 8.988155948072203e-05, | |
| "loss": 3.1527, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.49359387302268987, | |
| "grad_norm": 0.586644172668457, | |
| "learning_rate": 8.9824397437733e-05, | |
| "loss": 3.1328, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.4947391024264548, | |
| "grad_norm": 0.8710150718688965, | |
| "learning_rate": 8.976709267162903e-05, | |
| "loss": 3.1509, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.49588433183021974, | |
| "grad_norm": 0.7185545563697815, | |
| "learning_rate": 8.970964538777957e-05, | |
| "loss": 3.0628, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.4970295612339847, | |
| "grad_norm": 0.7242484092712402, | |
| "learning_rate": 8.965205579206483e-05, | |
| "loss": 3.0603, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.4981747906377496, | |
| "grad_norm": 0.7996972799301147, | |
| "learning_rate": 8.959432409087504e-05, | |
| "loss": 3.2346, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.49932002004151455, | |
| "grad_norm": 0.6038782000541687, | |
| "learning_rate": 8.953645049110971e-05, | |
| "loss": 3.0751, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5004652494452795, | |
| "grad_norm": 0.7712786197662354, | |
| "learning_rate": 8.94784352001769e-05, | |
| "loss": 3.1086, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5016104788490444, | |
| "grad_norm": 0.6952617168426514, | |
| "learning_rate": 8.94202784259924e-05, | |
| "loss": 3.13, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5027557082528094, | |
| "grad_norm": 0.7420851588249207, | |
| "learning_rate": 8.936198037697916e-05, | |
| "loss": 3.1094, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5039009376565743, | |
| "grad_norm": 0.6883806586265564, | |
| "learning_rate": 8.930354126206634e-05, | |
| "loss": 3.0722, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5050461670603392, | |
| "grad_norm": 0.7546491026878357, | |
| "learning_rate": 8.92449612906887e-05, | |
| "loss": 3.1571, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5061913964641043, | |
| "grad_norm": 0.7471094727516174, | |
| "learning_rate": 8.918624067278576e-05, | |
| "loss": 3.1842, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5073366258678692, | |
| "grad_norm": 0.8344042897224426, | |
| "learning_rate": 8.912737961880116e-05, | |
| "loss": 3.1709, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5084818552716341, | |
| "grad_norm": 0.6555135250091553, | |
| "learning_rate": 8.906837833968174e-05, | |
| "loss": 3.1777, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5096270846753991, | |
| "grad_norm": 0.799281120300293, | |
| "learning_rate": 8.900923704687697e-05, | |
| "loss": 3.176, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.510772314079164, | |
| "grad_norm": 0.8266319632530212, | |
| "learning_rate": 8.894995595233809e-05, | |
| "loss": 3.1353, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.511917543482929, | |
| "grad_norm": 0.7263309955596924, | |
| "learning_rate": 8.889053526851729e-05, | |
| "loss": 3.0824, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5130627728866939, | |
| "grad_norm": 0.7665941119194031, | |
| "learning_rate": 8.88309752083671e-05, | |
| "loss": 3.1808, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5142080022904588, | |
| "grad_norm": 0.7014003396034241, | |
| "learning_rate": 8.877127598533952e-05, | |
| "loss": 3.1158, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5153532316942238, | |
| "grad_norm": 0.6320556998252869, | |
| "learning_rate": 8.871143781338529e-05, | |
| "loss": 3.1276, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5164984610979887, | |
| "grad_norm": 0.8376429677009583, | |
| "learning_rate": 8.865146090695308e-05, | |
| "loss": 3.1422, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.5176436905017536, | |
| "grad_norm": 0.6639658212661743, | |
| "learning_rate": 8.859134548098883e-05, | |
| "loss": 3.0622, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.5187889199055186, | |
| "grad_norm": 0.6442060470581055, | |
| "learning_rate": 8.853109175093486e-05, | |
| "loss": 3.1206, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.5199341493092835, | |
| "grad_norm": 0.6882277131080627, | |
| "learning_rate": 8.847069993272912e-05, | |
| "loss": 3.1315, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5210793787130484, | |
| "grad_norm": 0.8141956329345703, | |
| "learning_rate": 8.841017024280449e-05, | |
| "loss": 3.1498, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5222246081168134, | |
| "grad_norm": 0.6133621335029602, | |
| "learning_rate": 8.834950289808796e-05, | |
| "loss": 3.0971, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5233698375205783, | |
| "grad_norm": 0.6844592690467834, | |
| "learning_rate": 8.828869811599982e-05, | |
| "loss": 3.1408, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5245150669243432, | |
| "grad_norm": 0.7407364845275879, | |
| "learning_rate": 8.822775611445289e-05, | |
| "loss": 3.1356, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.5256602963281082, | |
| "grad_norm": 0.7962344884872437, | |
| "learning_rate": 8.816667711185183e-05, | |
| "loss": 3.037, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.5268055257318731, | |
| "grad_norm": 0.6615867018699646, | |
| "learning_rate": 8.81054613270922e-05, | |
| "loss": 3.119, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5279507551356382, | |
| "grad_norm": 0.6886764168739319, | |
| "learning_rate": 8.804410897955986e-05, | |
| "loss": 3.1686, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.5290959845394031, | |
| "grad_norm": 0.6714747548103333, | |
| "learning_rate": 8.798262028913e-05, | |
| "loss": 3.0539, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.530241213943168, | |
| "grad_norm": 0.630648672580719, | |
| "learning_rate": 8.792099547616646e-05, | |
| "loss": 3.03, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.531386443346933, | |
| "grad_norm": 0.6129744648933411, | |
| "learning_rate": 8.785923476152092e-05, | |
| "loss": 3.112, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.5325316727506979, | |
| "grad_norm": 0.6656561493873596, | |
| "learning_rate": 8.779733836653213e-05, | |
| "loss": 3.0675, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5336769021544628, | |
| "grad_norm": 0.6855784058570862, | |
| "learning_rate": 8.773530651302506e-05, | |
| "loss": 3.0567, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.5348221315582278, | |
| "grad_norm": 0.6233646869659424, | |
| "learning_rate": 8.767313942331016e-05, | |
| "loss": 3.1316, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.5359673609619927, | |
| "grad_norm": 0.5950207710266113, | |
| "learning_rate": 8.761083732018253e-05, | |
| "loss": 3.0838, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5371125903657576, | |
| "grad_norm": 0.6548320651054382, | |
| "learning_rate": 8.754840042692114e-05, | |
| "loss": 3.128, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.5382578197695226, | |
| "grad_norm": 0.5914682149887085, | |
| "learning_rate": 8.748582896728801e-05, | |
| "loss": 3.0995, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5394030491732875, | |
| "grad_norm": 0.8005509376525879, | |
| "learning_rate": 8.742312316552741e-05, | |
| "loss": 3.1194, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.5405482785770525, | |
| "grad_norm": 0.6313744783401489, | |
| "learning_rate": 8.736028324636511e-05, | |
| "loss": 3.1332, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.5416935079808174, | |
| "grad_norm": 0.6614211797714233, | |
| "learning_rate": 8.729730943500751e-05, | |
| "loss": 3.1808, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.5428387373845823, | |
| "grad_norm": 0.6834341883659363, | |
| "learning_rate": 8.723420195714083e-05, | |
| "loss": 3.146, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.5439839667883473, | |
| "grad_norm": 0.6552104353904724, | |
| "learning_rate": 8.717096103893034e-05, | |
| "loss": 3.1339, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5451291961921122, | |
| "grad_norm": 0.6134440302848816, | |
| "learning_rate": 8.710758690701957e-05, | |
| "loss": 3.1073, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.5462744255958771, | |
| "grad_norm": 0.6617953181266785, | |
| "learning_rate": 8.704407978852941e-05, | |
| "loss": 3.0803, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.5474196549996421, | |
| "grad_norm": 0.6200254559516907, | |
| "learning_rate": 8.698043991105738e-05, | |
| "loss": 3.0902, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.548564884403407, | |
| "grad_norm": 0.766797661781311, | |
| "learning_rate": 8.691666750267677e-05, | |
| "loss": 3.0533, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.549710113807172, | |
| "grad_norm": 0.8062739372253418, | |
| "learning_rate": 8.685276279193583e-05, | |
| "loss": 3.074, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.550855343210937, | |
| "grad_norm": 0.7662980556488037, | |
| "learning_rate": 8.678872600785702e-05, | |
| "loss": 3.0567, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.5520005726147019, | |
| "grad_norm": 0.610495388507843, | |
| "learning_rate": 8.672455737993601e-05, | |
| "loss": 3.0505, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.5531458020184669, | |
| "grad_norm": 0.7125016450881958, | |
| "learning_rate": 8.666025713814106e-05, | |
| "loss": 3.1392, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.5542910314222318, | |
| "grad_norm": 0.6135743260383606, | |
| "learning_rate": 8.65958255129121e-05, | |
| "loss": 3.0789, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.5554362608259967, | |
| "grad_norm": 0.5998417735099792, | |
| "learning_rate": 8.653126273515988e-05, | |
| "loss": 3.0702, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5565814902297617, | |
| "grad_norm": 0.7088379859924316, | |
| "learning_rate": 8.64665690362652e-05, | |
| "loss": 3.084, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.5577267196335266, | |
| "grad_norm": 0.6542948484420776, | |
| "learning_rate": 8.640174464807805e-05, | |
| "loss": 3.1164, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.5588719490372915, | |
| "grad_norm": 0.5674989819526672, | |
| "learning_rate": 8.63367898029168e-05, | |
| "loss": 3.1048, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.5600171784410565, | |
| "grad_norm": 0.6628077626228333, | |
| "learning_rate": 8.627170473356733e-05, | |
| "loss": 3.0492, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.5611624078448214, | |
| "grad_norm": 0.6939430832862854, | |
| "learning_rate": 8.620648967328224e-05, | |
| "loss": 3.1041, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5623076372485863, | |
| "grad_norm": 0.7876750230789185, | |
| "learning_rate": 8.614114485577996e-05, | |
| "loss": 3.1109, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.5634528666523513, | |
| "grad_norm": 0.7786777019500732, | |
| "learning_rate": 8.607567051524399e-05, | |
| "loss": 3.0289, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.5645980960561162, | |
| "grad_norm": 0.6663212180137634, | |
| "learning_rate": 8.601006688632199e-05, | |
| "loss": 3.0807, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.5657433254598812, | |
| "grad_norm": 0.7165863513946533, | |
| "learning_rate": 8.594433420412496e-05, | |
| "loss": 3.0755, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.5668885548636461, | |
| "grad_norm": 0.7116391658782959, | |
| "learning_rate": 8.587847270422642e-05, | |
| "loss": 3.064, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.568033784267411, | |
| "grad_norm": 0.7216659188270569, | |
| "learning_rate": 8.581248262266155e-05, | |
| "loss": 3.0844, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.569179013671176, | |
| "grad_norm": 0.600975751876831, | |
| "learning_rate": 8.57463641959263e-05, | |
| "loss": 2.9771, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.5703242430749409, | |
| "grad_norm": 0.6743506789207458, | |
| "learning_rate": 8.568011766097666e-05, | |
| "loss": 3.1177, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.5714694724787059, | |
| "grad_norm": 0.6986669301986694, | |
| "learning_rate": 8.561374325522764e-05, | |
| "loss": 3.0838, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.5726147018824709, | |
| "grad_norm": 0.8114129900932312, | |
| "learning_rate": 8.554724121655262e-05, | |
| "loss": 3.1444, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5737599312862358, | |
| "grad_norm": 0.7919934988021851, | |
| "learning_rate": 8.548061178328233e-05, | |
| "loss": 3.0166, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.5749051606900007, | |
| "grad_norm": 0.6979469656944275, | |
| "learning_rate": 8.541385519420403e-05, | |
| "loss": 3.0737, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.5760503900937657, | |
| "grad_norm": 0.6499598026275635, | |
| "learning_rate": 8.534697168856076e-05, | |
| "loss": 3.0649, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.5771956194975306, | |
| "grad_norm": 0.7335128784179688, | |
| "learning_rate": 8.527996150605034e-05, | |
| "loss": 3.0403, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.5783408489012956, | |
| "grad_norm": 0.7250447869300842, | |
| "learning_rate": 8.521282488682463e-05, | |
| "loss": 3.0069, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5794860783050605, | |
| "grad_norm": 0.6807704567909241, | |
| "learning_rate": 8.514556207148857e-05, | |
| "loss": 3.119, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.5806313077088254, | |
| "grad_norm": 0.7379552721977234, | |
| "learning_rate": 8.507817330109936e-05, | |
| "loss": 3.0773, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.5817765371125904, | |
| "grad_norm": 0.6113300919532776, | |
| "learning_rate": 8.501065881716566e-05, | |
| "loss": 3.0768, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.5829217665163553, | |
| "grad_norm": 0.6463739275932312, | |
| "learning_rate": 8.494301886164658e-05, | |
| "loss": 3.0759, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.5840669959201202, | |
| "grad_norm": 0.6680572032928467, | |
| "learning_rate": 8.487525367695098e-05, | |
| "loss": 3.032, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5852122253238852, | |
| "grad_norm": 0.7283656597137451, | |
| "learning_rate": 8.480736350593644e-05, | |
| "loss": 3.0986, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.5863574547276501, | |
| "grad_norm": 0.6844098567962646, | |
| "learning_rate": 8.473934859190853e-05, | |
| "loss": 3.0703, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.587502684131415, | |
| "grad_norm": 0.6737761497497559, | |
| "learning_rate": 8.467120917861984e-05, | |
| "loss": 3.0775, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.58864791353518, | |
| "grad_norm": 0.8135201930999756, | |
| "learning_rate": 8.460294551026916e-05, | |
| "loss": 3.0802, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.5897931429389449, | |
| "grad_norm": 0.6999467015266418, | |
| "learning_rate": 8.453455783150054e-05, | |
| "loss": 3.047, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5909383723427099, | |
| "grad_norm": 0.7999339699745178, | |
| "learning_rate": 8.446604638740256e-05, | |
| "loss": 3.1247, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.5920836017464748, | |
| "grad_norm": 0.7229709029197693, | |
| "learning_rate": 8.439741142350725e-05, | |
| "loss": 3.1009, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.5932288311502398, | |
| "grad_norm": 0.7535393238067627, | |
| "learning_rate": 8.432865318578935e-05, | |
| "loss": 3.0566, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.5943740605540048, | |
| "grad_norm": 0.7364835143089294, | |
| "learning_rate": 8.425977192066539e-05, | |
| "loss": 3.0751, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.5955192899577697, | |
| "grad_norm": 0.7996159791946411, | |
| "learning_rate": 8.419076787499283e-05, | |
| "loss": 3.1277, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5966645193615346, | |
| "grad_norm": 0.6993304491043091, | |
| "learning_rate": 8.412164129606911e-05, | |
| "loss": 3.0713, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.5978097487652996, | |
| "grad_norm": 0.6996495127677917, | |
| "learning_rate": 8.405239243163084e-05, | |
| "loss": 3.0595, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.5989549781690645, | |
| "grad_norm": 0.6525830626487732, | |
| "learning_rate": 8.398302152985285e-05, | |
| "loss": 3.0666, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.6001002075728294, | |
| "grad_norm": 0.6092258095741272, | |
| "learning_rate": 8.391352883934733e-05, | |
| "loss": 3.0453, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.6012454369765944, | |
| "grad_norm": 0.7509777545928955, | |
| "learning_rate": 8.3843914609163e-05, | |
| "loss": 3.016, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6023906663803593, | |
| "grad_norm": 0.6374807953834534, | |
| "learning_rate": 8.377417908878406e-05, | |
| "loss": 2.9986, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.6035358957841243, | |
| "grad_norm": 0.6541762351989746, | |
| "learning_rate": 8.370432252812946e-05, | |
| "loss": 3.0299, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.6046811251878892, | |
| "grad_norm": 0.5995933413505554, | |
| "learning_rate": 8.363434517755191e-05, | |
| "loss": 3.0853, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6058263545916541, | |
| "grad_norm": 0.7466599941253662, | |
| "learning_rate": 8.356424728783702e-05, | |
| "loss": 3.1482, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.6069715839954191, | |
| "grad_norm": 0.7076915502548218, | |
| "learning_rate": 8.349402911020234e-05, | |
| "loss": 3.0681, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.608116813399184, | |
| "grad_norm": 0.6121742725372314, | |
| "learning_rate": 8.34236908962966e-05, | |
| "loss": 3.0771, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.6092620428029489, | |
| "grad_norm": 0.7337540984153748, | |
| "learning_rate": 8.335323289819865e-05, | |
| "loss": 3.1927, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.6104072722067139, | |
| "grad_norm": 0.6119634509086609, | |
| "learning_rate": 8.328265536841662e-05, | |
| "loss": 3.0988, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.6115525016104788, | |
| "grad_norm": 0.6291252970695496, | |
| "learning_rate": 8.321195855988706e-05, | |
| "loss": 3.0667, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.6126977310142437, | |
| "grad_norm": 0.5802082419395447, | |
| "learning_rate": 8.314114272597398e-05, | |
| "loss": 3.0118, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6138429604180088, | |
| "grad_norm": 0.6166985630989075, | |
| "learning_rate": 8.307020812046792e-05, | |
| "loss": 3.1762, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.6149881898217737, | |
| "grad_norm": 0.5751842260360718, | |
| "learning_rate": 8.299915499758514e-05, | |
| "loss": 3.0107, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.6161334192255387, | |
| "grad_norm": 0.6163948178291321, | |
| "learning_rate": 8.292798361196658e-05, | |
| "loss": 3.0617, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.6172786486293036, | |
| "grad_norm": 0.6665089130401611, | |
| "learning_rate": 8.285669421867703e-05, | |
| "loss": 3.0729, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.6184238780330685, | |
| "grad_norm": 0.630814254283905, | |
| "learning_rate": 8.278528707320421e-05, | |
| "loss": 2.9811, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6195691074368335, | |
| "grad_norm": 0.785892903804779, | |
| "learning_rate": 8.271376243145786e-05, | |
| "loss": 3.0561, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.6207143368405984, | |
| "grad_norm": 0.6047619581222534, | |
| "learning_rate": 8.264212054976875e-05, | |
| "loss": 3.0595, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.6218595662443633, | |
| "grad_norm": 0.6675294041633606, | |
| "learning_rate": 8.257036168488785e-05, | |
| "loss": 3.1725, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.6230047956481283, | |
| "grad_norm": 0.6342408657073975, | |
| "learning_rate": 8.24984860939854e-05, | |
| "loss": 2.9766, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.6241500250518932, | |
| "grad_norm": 0.5901287794113159, | |
| "learning_rate": 8.242649403464989e-05, | |
| "loss": 3.1021, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6252952544556581, | |
| "grad_norm": 0.5638805627822876, | |
| "learning_rate": 8.23543857648873e-05, | |
| "loss": 2.9866, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.6264404838594231, | |
| "grad_norm": 0.5482515692710876, | |
| "learning_rate": 8.228216154312001e-05, | |
| "loss": 3.0344, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.627585713263188, | |
| "grad_norm": 0.7258690595626831, | |
| "learning_rate": 8.2209821628186e-05, | |
| "loss": 3.0339, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.628730942666953, | |
| "grad_norm": 0.6626359820365906, | |
| "learning_rate": 8.213736627933786e-05, | |
| "loss": 3.1191, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.6298761720707179, | |
| "grad_norm": 0.5897409319877625, | |
| "learning_rate": 8.206479575624186e-05, | |
| "loss": 2.9604, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6310214014744828, | |
| "grad_norm": 0.661314070224762, | |
| "learning_rate": 8.199211031897704e-05, | |
| "loss": 3.0568, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.6321666308782478, | |
| "grad_norm": 0.7244003415107727, | |
| "learning_rate": 8.191931022803427e-05, | |
| "loss": 3.0202, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.6333118602820127, | |
| "grad_norm": 0.6676930785179138, | |
| "learning_rate": 8.184639574431532e-05, | |
| "loss": 3.0692, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.6344570896857776, | |
| "grad_norm": 0.8161568641662598, | |
| "learning_rate": 8.177336712913194e-05, | |
| "loss": 3.0835, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.6356023190895427, | |
| "grad_norm": 0.9007164239883423, | |
| "learning_rate": 8.170022464420486e-05, | |
| "loss": 3.0665, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6367475484933076, | |
| "grad_norm": 0.7467122673988342, | |
| "learning_rate": 8.162696855166294e-05, | |
| "loss": 3.1098, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.6378927778970725, | |
| "grad_norm": 0.5917842388153076, | |
| "learning_rate": 8.155359911404217e-05, | |
| "loss": 3.0047, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.6390380073008375, | |
| "grad_norm": 0.7426056861877441, | |
| "learning_rate": 8.148011659428474e-05, | |
| "loss": 3.1037, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.6401832367046024, | |
| "grad_norm": 0.8367446660995483, | |
| "learning_rate": 8.140652125573813e-05, | |
| "loss": 2.9628, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.6413284661083674, | |
| "grad_norm": 0.6199979186058044, | |
| "learning_rate": 8.133281336215412e-05, | |
| "loss": 3.0239, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6424736955121323, | |
| "grad_norm": 0.7761691808700562, | |
| "learning_rate": 8.125899317768786e-05, | |
| "loss": 3.0609, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.6436189249158972, | |
| "grad_norm": 0.6743906140327454, | |
| "learning_rate": 8.118506096689698e-05, | |
| "loss": 3.0696, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.6447641543196622, | |
| "grad_norm": 0.6743597388267517, | |
| "learning_rate": 8.111101699474051e-05, | |
| "loss": 3.045, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.6459093837234271, | |
| "grad_norm": 0.6069556474685669, | |
| "learning_rate": 8.103686152657808e-05, | |
| "loss": 3.0171, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.647054613127192, | |
| "grad_norm": 0.5756711959838867, | |
| "learning_rate": 8.096259482816886e-05, | |
| "loss": 3.0161, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.648199842530957, | |
| "grad_norm": 0.6239808797836304, | |
| "learning_rate": 8.088821716567066e-05, | |
| "loss": 3.0887, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.6493450719347219, | |
| "grad_norm": 0.5236758589744568, | |
| "learning_rate": 8.081372880563898e-05, | |
| "loss": 2.9743, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.6504903013384868, | |
| "grad_norm": 0.6389586925506592, | |
| "learning_rate": 8.073913001502605e-05, | |
| "loss": 2.9972, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.6516355307422518, | |
| "grad_norm": 0.5799978971481323, | |
| "learning_rate": 8.066442106117978e-05, | |
| "loss": 3.0043, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.6527807601460167, | |
| "grad_norm": 0.6182774901390076, | |
| "learning_rate": 8.058960221184298e-05, | |
| "loss": 3.065, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6539259895497816, | |
| "grad_norm": 0.5762799382209778, | |
| "learning_rate": 8.051467373515228e-05, | |
| "loss": 3.0374, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.6550712189535466, | |
| "grad_norm": 0.6707761287689209, | |
| "learning_rate": 8.043963589963714e-05, | |
| "loss": 3.0056, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.6562164483573115, | |
| "grad_norm": 0.6148689985275269, | |
| "learning_rate": 8.036448897421903e-05, | |
| "loss": 3.0222, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.6573616777610766, | |
| "grad_norm": 0.6503751277923584, | |
| "learning_rate": 8.028923322821031e-05, | |
| "loss": 3.0186, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.6585069071648415, | |
| "grad_norm": 0.7160323858261108, | |
| "learning_rate": 8.021386893131334e-05, | |
| "loss": 3.0785, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6596521365686064, | |
| "grad_norm": 0.5561687350273132, | |
| "learning_rate": 8.013839635361953e-05, | |
| "loss": 3.0425, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.6607973659723714, | |
| "grad_norm": 0.7166488170623779, | |
| "learning_rate": 8.006281576560834e-05, | |
| "loss": 2.9722, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.6619425953761363, | |
| "grad_norm": 0.5913854241371155, | |
| "learning_rate": 7.99871274381463e-05, | |
| "loss": 2.9645, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.6630878247799012, | |
| "grad_norm": 0.5721243619918823, | |
| "learning_rate": 7.99113316424861e-05, | |
| "loss": 3.0326, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.6642330541836662, | |
| "grad_norm": 0.6886599659919739, | |
| "learning_rate": 7.983542865026552e-05, | |
| "loss": 2.9934, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6653782835874311, | |
| "grad_norm": 0.6505165100097656, | |
| "learning_rate": 7.975941873350656e-05, | |
| "loss": 3.0275, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.666523512991196, | |
| "grad_norm": 0.5963652729988098, | |
| "learning_rate": 7.968330216461439e-05, | |
| "loss": 3.0581, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.667668742394961, | |
| "grad_norm": 0.7020843029022217, | |
| "learning_rate": 7.960707921637642e-05, | |
| "loss": 3.0214, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.6688139717987259, | |
| "grad_norm": 0.5729818344116211, | |
| "learning_rate": 7.953075016196128e-05, | |
| "loss": 3.0928, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.6699592012024909, | |
| "grad_norm": 0.6135843992233276, | |
| "learning_rate": 7.945431527491788e-05, | |
| "loss": 3.0281, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6711044306062558, | |
| "grad_norm": 0.844972550868988, | |
| "learning_rate": 7.937777482917441e-05, | |
| "loss": 3.0451, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.6722496600100207, | |
| "grad_norm": 0.6200757026672363, | |
| "learning_rate": 7.930112909903737e-05, | |
| "loss": 2.9982, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.6733948894137857, | |
| "grad_norm": 0.6621441841125488, | |
| "learning_rate": 7.922437835919059e-05, | |
| "loss": 3.0133, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.6745401188175506, | |
| "grad_norm": 0.6099239587783813, | |
| "learning_rate": 7.914752288469418e-05, | |
| "loss": 3.0359, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.6756853482213155, | |
| "grad_norm": 0.6415863633155823, | |
| "learning_rate": 7.907056295098367e-05, | |
| "loss": 3.0456, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6768305776250805, | |
| "grad_norm": 0.5651492476463318, | |
| "learning_rate": 7.89934988338689e-05, | |
| "loss": 3.0138, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.6779758070288454, | |
| "grad_norm": 0.6899843215942383, | |
| "learning_rate": 7.891633080953309e-05, | |
| "loss": 3.1091, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.6791210364326105, | |
| "grad_norm": 0.6236230134963989, | |
| "learning_rate": 7.883905915453191e-05, | |
| "loss": 3.0477, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.6802662658363754, | |
| "grad_norm": 0.6632122993469238, | |
| "learning_rate": 7.876168414579232e-05, | |
| "loss": 3.0023, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.6814114952401403, | |
| "grad_norm": 0.5697975158691406, | |
| "learning_rate": 7.868420606061174e-05, | |
| "loss": 3.0046, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6825567246439053, | |
| "grad_norm": 0.6847530603408813, | |
| "learning_rate": 7.8606625176657e-05, | |
| "loss": 3.0155, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.6837019540476702, | |
| "grad_norm": 0.6649438738822937, | |
| "learning_rate": 7.852894177196333e-05, | |
| "loss": 3.0616, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.6848471834514351, | |
| "grad_norm": 0.6214346289634705, | |
| "learning_rate": 7.845115612493335e-05, | |
| "loss": 2.963, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.6859924128552001, | |
| "grad_norm": 0.6587514877319336, | |
| "learning_rate": 7.837326851433614e-05, | |
| "loss": 3.0344, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.687137642258965, | |
| "grad_norm": 0.6454896330833435, | |
| "learning_rate": 7.829527921930617e-05, | |
| "loss": 3.0191, | |
| "step": 1200 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3494, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3510201344786432e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |