| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.2021329897645123, | |
| "eval_steps": 500, | |
| "global_step": 2100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005726147018824708, | |
| "grad_norm": 7.761023998260498, | |
| "learning_rate": 0.0, | |
| "loss": 6.0592, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0011452294037649416, | |
| "grad_norm": 7.8541951179504395, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 6.0156, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0022904588075298832, | |
| "grad_norm": 7.347611904144287, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 6.0103, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003435688211294825, | |
| "grad_norm": 5.382428169250488, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 5.9221, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0045809176150597665, | |
| "grad_norm": 5.063406467437744, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 6.0365, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005726147018824708, | |
| "grad_norm": 9.779157638549805, | |
| "learning_rate": 5.142857142857143e-06, | |
| "loss": 6.0336, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00687137642258965, | |
| "grad_norm": 7.555446147918701, | |
| "learning_rate": 6.285714285714287e-06, | |
| "loss": 6.0328, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008016605826354592, | |
| "grad_norm": 6.790043354034424, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 5.7848, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.009161835230119533, | |
| "grad_norm": 4.4132208824157715, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 5.8207, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.010307064633884476, | |
| "grad_norm": 4.064995765686035, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 5.6497, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.011452294037649417, | |
| "grad_norm": 3.357184410095215, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 5.7758, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012597523441414358, | |
| "grad_norm": 2.742230176925659, | |
| "learning_rate": 1.2e-05, | |
| "loss": 5.6173, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0137427528451793, | |
| "grad_norm": 2.491459369659424, | |
| "learning_rate": 1.3142857142857143e-05, | |
| "loss": 5.6681, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.014887982248944241, | |
| "grad_norm": 2.7569029331207275, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 5.6393, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.016033211652709184, | |
| "grad_norm": 2.208378791809082, | |
| "learning_rate": 1.5428571428571428e-05, | |
| "loss": 5.5768, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.017178441056474127, | |
| "grad_norm": 3.2770133018493652, | |
| "learning_rate": 1.657142857142857e-05, | |
| "loss": 5.484, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018323670460239066, | |
| "grad_norm": 3.177299976348877, | |
| "learning_rate": 1.7714285714285713e-05, | |
| "loss": 5.528, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01946889986400401, | |
| "grad_norm": 2.1981537342071533, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 5.6327, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02061412926776895, | |
| "grad_norm": 3.265881061553955, | |
| "learning_rate": 2e-05, | |
| "loss": 5.6288, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02175935867153389, | |
| "grad_norm": 3.6059298515319824, | |
| "learning_rate": 2.1142857142857144e-05, | |
| "loss": 5.4789, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022904588075298833, | |
| "grad_norm": 2.4080026149749756, | |
| "learning_rate": 2.2285714285714287e-05, | |
| "loss": 5.4046, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024049817479063776, | |
| "grad_norm": 2.142902135848999, | |
| "learning_rate": 2.342857142857143e-05, | |
| "loss": 5.4738, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.025195046882828715, | |
| "grad_norm": 2.4021224975585938, | |
| "learning_rate": 2.4571428571428572e-05, | |
| "loss": 5.4649, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.026340276286593658, | |
| "grad_norm": 2.172009229660034, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 5.4302, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0274855056903586, | |
| "grad_norm": 2.9737730026245117, | |
| "learning_rate": 2.6857142857142857e-05, | |
| "loss": 5.3045, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.028630735094123543, | |
| "grad_norm": 3.0378615856170654, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 5.2185, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029775964497888482, | |
| "grad_norm": 3.4448676109313965, | |
| "learning_rate": 2.9142857142857146e-05, | |
| "loss": 5.1838, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.030921193901653425, | |
| "grad_norm": 2.469245672225952, | |
| "learning_rate": 3.0285714285714288e-05, | |
| "loss": 5.1637, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03206642330541837, | |
| "grad_norm": 3.58486008644104, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 5.2063, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03321165270918331, | |
| "grad_norm": 3.0815446376800537, | |
| "learning_rate": 3.257142857142857e-05, | |
| "loss": 5.2317, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.03435688211294825, | |
| "grad_norm": 3.6842119693756104, | |
| "learning_rate": 3.3714285714285716e-05, | |
| "loss": 5.2695, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03550211151671319, | |
| "grad_norm": 2.9440791606903076, | |
| "learning_rate": 3.485714285714286e-05, | |
| "loss": 5.2686, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.03664734092047813, | |
| "grad_norm": 3.9632568359375, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.1262, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.037792570324243074, | |
| "grad_norm": 4.045065402984619, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 5.1546, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03893779972800802, | |
| "grad_norm": 3.5707085132598877, | |
| "learning_rate": 3.8285714285714286e-05, | |
| "loss": 5.0036, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.04008302913177296, | |
| "grad_norm": 3.014404535293579, | |
| "learning_rate": 3.942857142857143e-05, | |
| "loss": 5.026, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0412282585355379, | |
| "grad_norm": 2.708796977996826, | |
| "learning_rate": 4.057142857142857e-05, | |
| "loss": 4.9442, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04237348793930284, | |
| "grad_norm": 2.5384011268615723, | |
| "learning_rate": 4.1714285714285714e-05, | |
| "loss": 5.0223, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04351871734306778, | |
| "grad_norm": 3.006281852722168, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 4.9827, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.044663946746832724, | |
| "grad_norm": 2.5772130489349365, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 4.9675, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.045809176150597666, | |
| "grad_norm": 3.456017255783081, | |
| "learning_rate": 4.514285714285714e-05, | |
| "loss": 5.0341, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04695440555436261, | |
| "grad_norm": 3.3163113594055176, | |
| "learning_rate": 4.628571428571429e-05, | |
| "loss": 4.9867, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04809963495812755, | |
| "grad_norm": 3.7568469047546387, | |
| "learning_rate": 4.742857142857143e-05, | |
| "loss": 4.8652, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.049244864361892494, | |
| "grad_norm": 4.19318151473999, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 5.0602, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05039009376565743, | |
| "grad_norm": 5.1034064292907715, | |
| "learning_rate": 4.971428571428572e-05, | |
| "loss": 4.9757, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.05153532316942237, | |
| "grad_norm": 4.0827484130859375, | |
| "learning_rate": 5.085714285714286e-05, | |
| "loss": 4.8486, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.052680552573187316, | |
| "grad_norm": 4.6189446449279785, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 4.9595, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.05382578197695226, | |
| "grad_norm": 3.988513469696045, | |
| "learning_rate": 5.314285714285715e-05, | |
| "loss": 4.9035, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0549710113807172, | |
| "grad_norm": 3.857276678085327, | |
| "learning_rate": 5.428571428571428e-05, | |
| "loss": 4.8277, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.056116240784482144, | |
| "grad_norm": 3.5372354984283447, | |
| "learning_rate": 5.542857142857143e-05, | |
| "loss": 4.7718, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.057261470188247086, | |
| "grad_norm": 3.3853676319122314, | |
| "learning_rate": 5.6571428571428574e-05, | |
| "loss": 4.8098, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05840669959201202, | |
| "grad_norm": 2.1142077445983887, | |
| "learning_rate": 5.771428571428572e-05, | |
| "loss": 4.7975, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.059551928995776965, | |
| "grad_norm": 3.2275538444519043, | |
| "learning_rate": 5.885714285714285e-05, | |
| "loss": 4.8509, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06069715839954191, | |
| "grad_norm": 3.5413126945495605, | |
| "learning_rate": 6e-05, | |
| "loss": 4.6069, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06184238780330685, | |
| "grad_norm": 2.755648374557495, | |
| "learning_rate": 6.114285714285714e-05, | |
| "loss": 4.6951, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06298761720707179, | |
| "grad_norm": 2.980039596557617, | |
| "learning_rate": 6.22857142857143e-05, | |
| "loss": 4.7012, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06413284661083674, | |
| "grad_norm": 4.890020370483398, | |
| "learning_rate": 6.342857142857143e-05, | |
| "loss": 4.8008, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06527807601460167, | |
| "grad_norm": 4.35846471786499, | |
| "learning_rate": 6.457142857142856e-05, | |
| "loss": 4.8587, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.06642330541836662, | |
| "grad_norm": 3.6171813011169434, | |
| "learning_rate": 6.571428571428571e-05, | |
| "loss": 4.7473, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06756853482213156, | |
| "grad_norm": 2.4927010536193848, | |
| "learning_rate": 6.685714285714286e-05, | |
| "loss": 4.7113, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0687137642258965, | |
| "grad_norm": 3.3327009677886963, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 4.6105, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06985899362966144, | |
| "grad_norm": 3.1123206615448, | |
| "learning_rate": 6.914285714285715e-05, | |
| "loss": 4.5968, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.07100422303342638, | |
| "grad_norm": 2.6985421180725098, | |
| "learning_rate": 7.028571428571428e-05, | |
| "loss": 4.6323, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07214945243719133, | |
| "grad_norm": 2.058084011077881, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 4.5721, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07329468184095626, | |
| "grad_norm": 2.144658327102661, | |
| "learning_rate": 7.257142857142858e-05, | |
| "loss": 4.6125, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07443991124472121, | |
| "grad_norm": 2.477219820022583, | |
| "learning_rate": 7.371428571428572e-05, | |
| "loss": 4.4727, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07558514064848615, | |
| "grad_norm": 3.8517298698425293, | |
| "learning_rate": 7.485714285714285e-05, | |
| "loss": 4.5696, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.0767303700522511, | |
| "grad_norm": 3.0253565311431885, | |
| "learning_rate": 7.6e-05, | |
| "loss": 4.4838, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07787559945601603, | |
| "grad_norm": 3.397569179534912, | |
| "learning_rate": 7.714285714285715e-05, | |
| "loss": 4.6431, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.07902082885978097, | |
| "grad_norm": 2.435197114944458, | |
| "learning_rate": 7.828571428571429e-05, | |
| "loss": 4.4681, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08016605826354592, | |
| "grad_norm": 2.6476476192474365, | |
| "learning_rate": 7.942857142857143e-05, | |
| "loss": 4.4462, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08131128766731086, | |
| "grad_norm": 2.1929690837860107, | |
| "learning_rate": 8.057142857142857e-05, | |
| "loss": 4.5136, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.0824565170710758, | |
| "grad_norm": 2.4533395767211914, | |
| "learning_rate": 8.171428571428572e-05, | |
| "loss": 4.5572, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.08360174647484074, | |
| "grad_norm": 2.601806879043579, | |
| "learning_rate": 8.285714285714287e-05, | |
| "loss": 4.4121, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08474697587860568, | |
| "grad_norm": 3.233973741531372, | |
| "learning_rate": 8.4e-05, | |
| "loss": 4.4599, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08589220528237063, | |
| "grad_norm": 2.6353538036346436, | |
| "learning_rate": 8.514285714285714e-05, | |
| "loss": 4.4533, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08703743468613556, | |
| "grad_norm": 2.8465511798858643, | |
| "learning_rate": 8.62857142857143e-05, | |
| "loss": 4.5246, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.08818266408990051, | |
| "grad_norm": 2.8642711639404297, | |
| "learning_rate": 8.742857142857144e-05, | |
| "loss": 4.4659, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.08932789349366545, | |
| "grad_norm": 2.793112277984619, | |
| "learning_rate": 8.857142857142857e-05, | |
| "loss": 4.5107, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0904731228974304, | |
| "grad_norm": 3.43472957611084, | |
| "learning_rate": 8.971428571428571e-05, | |
| "loss": 4.4079, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09161835230119533, | |
| "grad_norm": 2.9260294437408447, | |
| "learning_rate": 9.085714285714286e-05, | |
| "loss": 4.4047, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09276358170496027, | |
| "grad_norm": 2.6336724758148193, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 4.4777, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09390881110872522, | |
| "grad_norm": 2.8348231315612793, | |
| "learning_rate": 9.314285714285715e-05, | |
| "loss": 4.3445, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.09505404051249015, | |
| "grad_norm": 4.271595478057861, | |
| "learning_rate": 9.428571428571429e-05, | |
| "loss": 4.4234, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.0961992699162551, | |
| "grad_norm": 3.4789109230041504, | |
| "learning_rate": 9.542857142857143e-05, | |
| "loss": 4.2872, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09734449932002004, | |
| "grad_norm": 2.57273530960083, | |
| "learning_rate": 9.657142857142858e-05, | |
| "loss": 4.4177, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09848972872378499, | |
| "grad_norm": 2.185086250305176, | |
| "learning_rate": 9.771428571428572e-05, | |
| "loss": 4.3568, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.09963495812754992, | |
| "grad_norm": 2.771744966506958, | |
| "learning_rate": 9.885714285714286e-05, | |
| "loss": 4.3392, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.10078018753131486, | |
| "grad_norm": 1.950353741645813, | |
| "learning_rate": 0.0001, | |
| "loss": 4.1931, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10192541693507981, | |
| "grad_norm": 2.4709694385528564, | |
| "learning_rate": 9.999991040472416e-05, | |
| "loss": 4.2936, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10307064633884475, | |
| "grad_norm": 2.140997886657715, | |
| "learning_rate": 9.999964161921776e-05, | |
| "loss": 4.1653, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1042158757426097, | |
| "grad_norm": 2.491321563720703, | |
| "learning_rate": 9.999919364444403e-05, | |
| "loss": 4.3202, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.10536110514637463, | |
| "grad_norm": 2.5410189628601074, | |
| "learning_rate": 9.999856648200845e-05, | |
| "loss": 4.2657, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10650633455013958, | |
| "grad_norm": 2.1820590496063232, | |
| "learning_rate": 9.999776013415866e-05, | |
| "loss": 4.2282, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10765156395390452, | |
| "grad_norm": 1.7251808643341064, | |
| "learning_rate": 9.999677460378444e-05, | |
| "loss": 4.3421, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.10879679335766945, | |
| "grad_norm": 2.002145290374756, | |
| "learning_rate": 9.999560989441779e-05, | |
| "loss": 4.1361, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1099420227614344, | |
| "grad_norm": 1.9663431644439697, | |
| "learning_rate": 9.999426601023274e-05, | |
| "loss": 4.201, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.11108725216519934, | |
| "grad_norm": 2.1406776905059814, | |
| "learning_rate": 9.999274295604558e-05, | |
| "loss": 4.1086, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11223248156896429, | |
| "grad_norm": 3.3888607025146484, | |
| "learning_rate": 9.999104073731458e-05, | |
| "loss": 4.2723, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.11337771097272922, | |
| "grad_norm": 2.371840715408325, | |
| "learning_rate": 9.998915936014024e-05, | |
| "loss": 4.1893, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11452294037649417, | |
| "grad_norm": 2.0502302646636963, | |
| "learning_rate": 9.998709883126502e-05, | |
| "loss": 4.1395, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11566816978025911, | |
| "grad_norm": 1.6674678325653076, | |
| "learning_rate": 9.998485915807347e-05, | |
| "loss": 4.071, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.11681339918402404, | |
| "grad_norm": 1.7829004526138306, | |
| "learning_rate": 9.998244034859219e-05, | |
| "loss": 4.1107, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.117958628587789, | |
| "grad_norm": 1.763493299484253, | |
| "learning_rate": 9.997984241148967e-05, | |
| "loss": 4.1142, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.11910385799155393, | |
| "grad_norm": 2.069258213043213, | |
| "learning_rate": 9.997706535607649e-05, | |
| "loss": 4.047, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.12024908739531888, | |
| "grad_norm": 2.4262139797210693, | |
| "learning_rate": 9.997410919230505e-05, | |
| "loss": 4.0396, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12139431679908382, | |
| "grad_norm": 1.820494532585144, | |
| "learning_rate": 9.997097393076971e-05, | |
| "loss": 4.1548, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12253954620284876, | |
| "grad_norm": 2.1332643032073975, | |
| "learning_rate": 9.996765958270664e-05, | |
| "loss": 4.1384, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1236847756066137, | |
| "grad_norm": 2.1329920291900635, | |
| "learning_rate": 9.996416615999384e-05, | |
| "loss": 4.0315, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.12483000501037864, | |
| "grad_norm": 2.29955792427063, | |
| "learning_rate": 9.996049367515108e-05, | |
| "loss": 4.0963, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.12597523441414357, | |
| "grad_norm": 2.225827693939209, | |
| "learning_rate": 9.995664214133983e-05, | |
| "loss": 4.1247, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12712046381790854, | |
| "grad_norm": 1.794838786125183, | |
| "learning_rate": 9.99526115723633e-05, | |
| "loss": 4.0449, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.12826569322167347, | |
| "grad_norm": 1.7548491954803467, | |
| "learning_rate": 9.994840198266626e-05, | |
| "loss": 3.927, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1294109226254384, | |
| "grad_norm": 1.487001895904541, | |
| "learning_rate": 9.994401338733508e-05, | |
| "loss": 3.9714, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.13055615202920334, | |
| "grad_norm": 1.9811242818832397, | |
| "learning_rate": 9.993944580209768e-05, | |
| "loss": 4.0094, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13170138143296828, | |
| "grad_norm": 1.4257248640060425, | |
| "learning_rate": 9.99346992433234e-05, | |
| "loss": 4.0213, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13284661083673324, | |
| "grad_norm": 1.545812726020813, | |
| "learning_rate": 9.992977372802302e-05, | |
| "loss": 4.0076, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.13399184024049818, | |
| "grad_norm": 1.8193179368972778, | |
| "learning_rate": 9.992466927384865e-05, | |
| "loss": 4.0536, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1351370696442631, | |
| "grad_norm": 2.329951763153076, | |
| "learning_rate": 9.991938589909369e-05, | |
| "loss": 3.9284, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13628229904802805, | |
| "grad_norm": 1.928336501121521, | |
| "learning_rate": 9.991392362269276e-05, | |
| "loss": 3.9462, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.137427528451793, | |
| "grad_norm": 1.4073456525802612, | |
| "learning_rate": 9.990828246422164e-05, | |
| "loss": 3.9525, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13857275785555795, | |
| "grad_norm": 1.6663973331451416, | |
| "learning_rate": 9.990246244389713e-05, | |
| "loss": 3.9685, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.13971798725932288, | |
| "grad_norm": 1.8091737031936646, | |
| "learning_rate": 9.989646358257715e-05, | |
| "loss": 3.9284, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.14086321666308782, | |
| "grad_norm": 1.5511283874511719, | |
| "learning_rate": 9.989028590176044e-05, | |
| "loss": 3.9289, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14200844606685276, | |
| "grad_norm": 1.5394625663757324, | |
| "learning_rate": 9.988392942358664e-05, | |
| "loss": 3.9849, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.14315367547061772, | |
| "grad_norm": 1.680882453918457, | |
| "learning_rate": 9.98773941708362e-05, | |
| "loss": 3.9452, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14429890487438266, | |
| "grad_norm": 1.6341670751571655, | |
| "learning_rate": 9.98706801669302e-05, | |
| "loss": 3.8317, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1454441342781476, | |
| "grad_norm": 1.9933757781982422, | |
| "learning_rate": 9.986378743593036e-05, | |
| "loss": 3.9665, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14658936368191253, | |
| "grad_norm": 2.2253994941711426, | |
| "learning_rate": 9.985671600253894e-05, | |
| "loss": 3.9239, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.14773459308567746, | |
| "grad_norm": 2.2543365955352783, | |
| "learning_rate": 9.984946589209862e-05, | |
| "loss": 3.8639, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.14887982248944243, | |
| "grad_norm": 1.8106629848480225, | |
| "learning_rate": 9.984203713059241e-05, | |
| "loss": 3.9178, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15002505189320736, | |
| "grad_norm": 1.638542652130127, | |
| "learning_rate": 9.983442974464362e-05, | |
| "loss": 3.9169, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1511702812969723, | |
| "grad_norm": 1.3521384000778198, | |
| "learning_rate": 9.982664376151564e-05, | |
| "loss": 3.8682, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.15231551070073723, | |
| "grad_norm": 1.6458699703216553, | |
| "learning_rate": 9.981867920911201e-05, | |
| "loss": 3.9566, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.1534607401045022, | |
| "grad_norm": 1.7851066589355469, | |
| "learning_rate": 9.981053611597615e-05, | |
| "loss": 3.9085, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15460596950826713, | |
| "grad_norm": 1.6740517616271973, | |
| "learning_rate": 9.980221451129137e-05, | |
| "loss": 3.8899, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.15575119891203207, | |
| "grad_norm": 1.117129921913147, | |
| "learning_rate": 9.979371442488073e-05, | |
| "loss": 3.7544, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.156896428315797, | |
| "grad_norm": 1.5676058530807495, | |
| "learning_rate": 9.978503588720694e-05, | |
| "loss": 3.7753, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.15804165771956194, | |
| "grad_norm": 1.6609163284301758, | |
| "learning_rate": 9.977617892937223e-05, | |
| "loss": 3.8463, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1591868871233269, | |
| "grad_norm": 1.7229987382888794, | |
| "learning_rate": 9.976714358311828e-05, | |
| "loss": 3.8446, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.16033211652709184, | |
| "grad_norm": 1.6770962476730347, | |
| "learning_rate": 9.975792988082603e-05, | |
| "loss": 3.8684, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16147734593085677, | |
| "grad_norm": 1.215281367301941, | |
| "learning_rate": 9.974853785551568e-05, | |
| "loss": 3.7788, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1626225753346217, | |
| "grad_norm": 1.208257794380188, | |
| "learning_rate": 9.973896754084646e-05, | |
| "loss": 3.8338, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16376780473838665, | |
| "grad_norm": 1.4068255424499512, | |
| "learning_rate": 9.972921897111658e-05, | |
| "loss": 3.8583, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1649130341421516, | |
| "grad_norm": 1.4898021221160889, | |
| "learning_rate": 9.971929218126306e-05, | |
| "loss": 3.8051, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16605826354591655, | |
| "grad_norm": 1.6303211450576782, | |
| "learning_rate": 9.970918720686164e-05, | |
| "loss": 3.8598, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.16720349294968148, | |
| "grad_norm": 1.6599496603012085, | |
| "learning_rate": 9.969890408412665e-05, | |
| "loss": 3.7214, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.16834872235344642, | |
| "grad_norm": 1.1958950757980347, | |
| "learning_rate": 9.968844284991086e-05, | |
| "loss": 3.7042, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.16949395175721135, | |
| "grad_norm": 1.3099420070648193, | |
| "learning_rate": 9.967780354170533e-05, | |
| "loss": 3.7405, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.17063918116097632, | |
| "grad_norm": 1.5054072141647339, | |
| "learning_rate": 9.966698619763936e-05, | |
| "loss": 3.7827, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.17178441056474125, | |
| "grad_norm": 1.444757103919983, | |
| "learning_rate": 9.965599085648025e-05, | |
| "loss": 3.7361, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1729296399685062, | |
| "grad_norm": 0.9423370361328125, | |
| "learning_rate": 9.964481755763322e-05, | |
| "loss": 3.7063, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.17407486937227112, | |
| "grad_norm": 1.044169306755066, | |
| "learning_rate": 9.963346634114128e-05, | |
| "loss": 3.7999, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1752200987760361, | |
| "grad_norm": 1.578296184539795, | |
| "learning_rate": 9.962193724768503e-05, | |
| "loss": 3.7448, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17636532817980102, | |
| "grad_norm": 1.4953491687774658, | |
| "learning_rate": 9.961023031858258e-05, | |
| "loss": 3.7625, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.17751055758356596, | |
| "grad_norm": 1.295817494392395, | |
| "learning_rate": 9.959834559578934e-05, | |
| "loss": 3.7042, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1786557869873309, | |
| "grad_norm": 1.4001609086990356, | |
| "learning_rate": 9.95862831218979e-05, | |
| "loss": 3.7272, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.17980101639109583, | |
| "grad_norm": 1.8881722688674927, | |
| "learning_rate": 9.95740429401379e-05, | |
| "loss": 3.6904, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.1809462457948608, | |
| "grad_norm": 1.919791340827942, | |
| "learning_rate": 9.956162509437584e-05, | |
| "loss": 3.7071, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.18209147519862573, | |
| "grad_norm": 1.758253574371338, | |
| "learning_rate": 9.954902962911494e-05, | |
| "loss": 3.7906, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18323670460239067, | |
| "grad_norm": 1.480323314666748, | |
| "learning_rate": 9.953625658949494e-05, | |
| "loss": 3.7697, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1843819340061556, | |
| "grad_norm": 1.5573948621749878, | |
| "learning_rate": 9.952330602129202e-05, | |
| "loss": 3.752, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.18552716340992054, | |
| "grad_norm": 1.3204878568649292, | |
| "learning_rate": 9.951017797091858e-05, | |
| "loss": 3.6479, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1866723928136855, | |
| "grad_norm": 1.5514147281646729, | |
| "learning_rate": 9.949687248542303e-05, | |
| "loss": 3.7199, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.18781762221745044, | |
| "grad_norm": 1.2910770177841187, | |
| "learning_rate": 9.948338961248977e-05, | |
| "loss": 3.7427, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.18896285162121537, | |
| "grad_norm": 1.1663178205490112, | |
| "learning_rate": 9.946972940043882e-05, | |
| "loss": 3.6616, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1901080810249803, | |
| "grad_norm": 1.3439650535583496, | |
| "learning_rate": 9.945589189822584e-05, | |
| "loss": 3.7385, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.19125331042874527, | |
| "grad_norm": 1.1256877183914185, | |
| "learning_rate": 9.94418771554418e-05, | |
| "loss": 3.6056, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1923985398325102, | |
| "grad_norm": 1.1813896894454956, | |
| "learning_rate": 9.942768522231289e-05, | |
| "loss": 3.5544, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19354376923627514, | |
| "grad_norm": 1.2541157007217407, | |
| "learning_rate": 9.941331614970031e-05, | |
| "loss": 3.6401, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.19468899864004008, | |
| "grad_norm": 1.237069010734558, | |
| "learning_rate": 9.939876998910012e-05, | |
| "loss": 3.7564, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19583422804380501, | |
| "grad_norm": 1.1157530546188354, | |
| "learning_rate": 9.938404679264301e-05, | |
| "loss": 3.6164, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.19697945744756998, | |
| "grad_norm": 1.149465560913086, | |
| "learning_rate": 9.936914661309412e-05, | |
| "loss": 3.6968, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1981246868513349, | |
| "grad_norm": 0.9530683755874634, | |
| "learning_rate": 9.93540695038529e-05, | |
| "loss": 3.6194, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.19926991625509985, | |
| "grad_norm": 1.1686296463012695, | |
| "learning_rate": 9.933881551895281e-05, | |
| "loss": 3.7604, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.20041514565886479, | |
| "grad_norm": 1.2699095010757446, | |
| "learning_rate": 9.93233847130613e-05, | |
| "loss": 3.6371, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20156037506262972, | |
| "grad_norm": 1.1345208883285522, | |
| "learning_rate": 9.930777714147945e-05, | |
| "loss": 3.6146, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.20270560446639468, | |
| "grad_norm": 1.3319895267486572, | |
| "learning_rate": 9.929199286014185e-05, | |
| "loss": 3.6443, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.20385083387015962, | |
| "grad_norm": 1.6053088903427124, | |
| "learning_rate": 9.927603192561637e-05, | |
| "loss": 3.6277, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20499606327392456, | |
| "grad_norm": 1.2149386405944824, | |
| "learning_rate": 9.925989439510398e-05, | |
| "loss": 3.5555, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2061412926776895, | |
| "grad_norm": 1.0859287977218628, | |
| "learning_rate": 9.924358032643855e-05, | |
| "loss": 3.6253, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.20728652208145446, | |
| "grad_norm": 0.9613994359970093, | |
| "learning_rate": 9.922708977808663e-05, | |
| "loss": 3.5826, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2084317514852194, | |
| "grad_norm": 1.0509222745895386, | |
| "learning_rate": 9.921042280914721e-05, | |
| "loss": 3.6263, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.20957698088898433, | |
| "grad_norm": 1.3777049779891968, | |
| "learning_rate": 9.919357947935156e-05, | |
| "loss": 3.6187, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.21072221029274926, | |
| "grad_norm": 1.3364644050598145, | |
| "learning_rate": 9.9176559849063e-05, | |
| "loss": 3.5946, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2118674396965142, | |
| "grad_norm": 1.4562104940414429, | |
| "learning_rate": 9.915936397927665e-05, | |
| "loss": 3.6099, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21301266910027916, | |
| "grad_norm": 1.066383719444275, | |
| "learning_rate": 9.91419919316193e-05, | |
| "loss": 3.5395, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.2141578985040441, | |
| "grad_norm": 1.6498733758926392, | |
| "learning_rate": 9.912444376834903e-05, | |
| "loss": 3.6083, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.21530312790780903, | |
| "grad_norm": 0.9828553795814514, | |
| "learning_rate": 9.910671955235518e-05, | |
| "loss": 3.5409, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.21644835731157397, | |
| "grad_norm": 1.178269624710083, | |
| "learning_rate": 9.908881934715798e-05, | |
| "loss": 3.6018, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.2175935867153389, | |
| "grad_norm": 1.3328818082809448, | |
| "learning_rate": 9.907074321690838e-05, | |
| "loss": 3.5718, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.21873881611910387, | |
| "grad_norm": 1.1077896356582642, | |
| "learning_rate": 9.905249122638783e-05, | |
| "loss": 3.581, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.2198840455228688, | |
| "grad_norm": 1.220638394355774, | |
| "learning_rate": 9.903406344100798e-05, | |
| "loss": 3.5813, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.22102927492663374, | |
| "grad_norm": 1.5574766397476196, | |
| "learning_rate": 9.901545992681057e-05, | |
| "loss": 3.5785, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.22217450433039868, | |
| "grad_norm": 1.013902187347412, | |
| "learning_rate": 9.899668075046706e-05, | |
| "loss": 3.6156, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2233197337341636, | |
| "grad_norm": 1.197936773300171, | |
| "learning_rate": 9.897772597927848e-05, | |
| "loss": 3.5428, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.22446496313792857, | |
| "grad_norm": 0.9838180541992188, | |
| "learning_rate": 9.895859568117512e-05, | |
| "loss": 3.534, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.2256101925416935, | |
| "grad_norm": 1.0316840410232544, | |
| "learning_rate": 9.893928992471639e-05, | |
| "loss": 3.5691, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.22675542194545845, | |
| "grad_norm": 0.9378739595413208, | |
| "learning_rate": 9.891980877909045e-05, | |
| "loss": 3.5368, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.22790065134922338, | |
| "grad_norm": 1.4947346448898315, | |
| "learning_rate": 9.890015231411404e-05, | |
| "loss": 3.5709, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.22904588075298835, | |
| "grad_norm": 0.9118148684501648, | |
| "learning_rate": 9.888032060023225e-05, | |
| "loss": 3.527, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23019111015675328, | |
| "grad_norm": 1.2407753467559814, | |
| "learning_rate": 9.886031370851816e-05, | |
| "loss": 3.5301, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.23133633956051822, | |
| "grad_norm": 1.7163093090057373, | |
| "learning_rate": 9.88401317106727e-05, | |
| "loss": 3.5828, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23248156896428315, | |
| "grad_norm": 1.0757009983062744, | |
| "learning_rate": 9.881977467902434e-05, | |
| "loss": 3.4831, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2336267983680481, | |
| "grad_norm": 0.9473862648010254, | |
| "learning_rate": 9.879924268652885e-05, | |
| "loss": 3.5196, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23477202777181305, | |
| "grad_norm": 1.199771761894226, | |
| "learning_rate": 9.877853580676897e-05, | |
| "loss": 3.574, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.235917257175578, | |
| "grad_norm": 0.9006698131561279, | |
| "learning_rate": 9.875765411395428e-05, | |
| "loss": 3.5348, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.23706248657934292, | |
| "grad_norm": 1.1242282390594482, | |
| "learning_rate": 9.873659768292081e-05, | |
| "loss": 3.5249, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.23820771598310786, | |
| "grad_norm": 1.0675747394561768, | |
| "learning_rate": 9.871536658913082e-05, | |
| "loss": 3.5086, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.2393529453868728, | |
| "grad_norm": 0.8544116616249084, | |
| "learning_rate": 9.869396090867255e-05, | |
| "loss": 3.546, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.24049817479063776, | |
| "grad_norm": 1.3136742115020752, | |
| "learning_rate": 9.867238071825992e-05, | |
| "loss": 3.4937, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2416434041944027, | |
| "grad_norm": 1.3740772008895874, | |
| "learning_rate": 9.865062609523223e-05, | |
| "loss": 3.4303, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.24278863359816763, | |
| "grad_norm": 1.342213749885559, | |
| "learning_rate": 9.862869711755397e-05, | |
| "loss": 3.4982, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24393386300193257, | |
| "grad_norm": 1.0677942037582397, | |
| "learning_rate": 9.860659386381443e-05, | |
| "loss": 3.4288, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.24507909240569753, | |
| "grad_norm": 0.9615838527679443, | |
| "learning_rate": 9.858431641322749e-05, | |
| "loss": 3.4787, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.24622432180946247, | |
| "grad_norm": 1.0572890043258667, | |
| "learning_rate": 9.856186484563134e-05, | |
| "loss": 3.5314, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2473695512132274, | |
| "grad_norm": 1.158275842666626, | |
| "learning_rate": 9.853923924148815e-05, | |
| "loss": 3.5504, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.24851478061699234, | |
| "grad_norm": 1.171581745147705, | |
| "learning_rate": 9.851643968188383e-05, | |
| "loss": 3.5478, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.24966001002075727, | |
| "grad_norm": 1.0333714485168457, | |
| "learning_rate": 9.849346624852764e-05, | |
| "loss": 3.5497, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2508052394245222, | |
| "grad_norm": 0.9459155797958374, | |
| "learning_rate": 9.847031902375207e-05, | |
| "loss": 3.5074, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.25195046882828714, | |
| "grad_norm": 1.0424790382385254, | |
| "learning_rate": 9.84469980905124e-05, | |
| "loss": 3.4961, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.25309569823205214, | |
| "grad_norm": 1.0463571548461914, | |
| "learning_rate": 9.842350353238642e-05, | |
| "loss": 3.4405, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.25424092763581707, | |
| "grad_norm": 1.000319242477417, | |
| "learning_rate": 9.839983543357421e-05, | |
| "loss": 3.4595, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.255386157039582, | |
| "grad_norm": 1.2526150941848755, | |
| "learning_rate": 9.837599387889773e-05, | |
| "loss": 3.5012, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.25653138644334694, | |
| "grad_norm": 1.3148843050003052, | |
| "learning_rate": 9.835197895380065e-05, | |
| "loss": 3.4767, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2576766158471119, | |
| "grad_norm": 1.3939634561538696, | |
| "learning_rate": 9.83277907443479e-05, | |
| "loss": 3.3783, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2588218452508768, | |
| "grad_norm": 1.0367929935455322, | |
| "learning_rate": 9.830342933722545e-05, | |
| "loss": 3.4289, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.25996707465464175, | |
| "grad_norm": 0.9439120888710022, | |
| "learning_rate": 9.827889481974e-05, | |
| "loss": 3.4728, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2611123040584067, | |
| "grad_norm": 1.2146074771881104, | |
| "learning_rate": 9.82541872798186e-05, | |
| "loss": 3.4257, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2622575334621716, | |
| "grad_norm": 1.0530729293823242, | |
| "learning_rate": 9.822930680600841e-05, | |
| "loss": 3.4681, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.26340276286593656, | |
| "grad_norm": 1.1026678085327148, | |
| "learning_rate": 9.820425348747637e-05, | |
| "loss": 3.4298, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.26454799226970155, | |
| "grad_norm": 1.2520779371261597, | |
| "learning_rate": 9.817902741400879e-05, | |
| "loss": 3.4191, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.2656932216734665, | |
| "grad_norm": 1.1041593551635742, | |
| "learning_rate": 9.815362867601121e-05, | |
| "loss": 3.466, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.2668384510772314, | |
| "grad_norm": 0.881693422794342, | |
| "learning_rate": 9.812805736450786e-05, | |
| "loss": 3.4929, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.26798368048099636, | |
| "grad_norm": 1.3125033378601074, | |
| "learning_rate": 9.810231357114152e-05, | |
| "loss": 3.4592, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.2691289098847613, | |
| "grad_norm": 1.2968268394470215, | |
| "learning_rate": 9.807639738817307e-05, | |
| "loss": 3.4851, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2702741392885262, | |
| "grad_norm": 0.9855544567108154, | |
| "learning_rate": 9.805030890848119e-05, | |
| "loss": 3.4487, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.27141936869229116, | |
| "grad_norm": 1.3063323497772217, | |
| "learning_rate": 9.802404822556209e-05, | |
| "loss": 3.4961, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.2725645980960561, | |
| "grad_norm": 1.0567957162857056, | |
| "learning_rate": 9.79976154335291e-05, | |
| "loss": 3.3975, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.27370982749982103, | |
| "grad_norm": 0.9473979473114014, | |
| "learning_rate": 9.797101062711231e-05, | |
| "loss": 3.4573, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.274855056903586, | |
| "grad_norm": 1.2931294441223145, | |
| "learning_rate": 9.794423390165837e-05, | |
| "loss": 3.3732, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.27600028630735096, | |
| "grad_norm": 1.233302116394043, | |
| "learning_rate": 9.791728535312998e-05, | |
| "loss": 3.419, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2771455157111159, | |
| "grad_norm": 0.9638918042182922, | |
| "learning_rate": 9.789016507810564e-05, | |
| "loss": 3.4119, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.27829074511488083, | |
| "grad_norm": 1.105643391609192, | |
| "learning_rate": 9.786287317377929e-05, | |
| "loss": 3.3909, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.27943597451864577, | |
| "grad_norm": 0.9666796922683716, | |
| "learning_rate": 9.783540973795998e-05, | |
| "loss": 3.4194, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.2805812039224107, | |
| "grad_norm": 1.3533586263656616, | |
| "learning_rate": 9.780777486907146e-05, | |
| "loss": 3.3789, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.28172643332617564, | |
| "grad_norm": 1.1253416538238525, | |
| "learning_rate": 9.777996866615186e-05, | |
| "loss": 3.4385, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.2828716627299406, | |
| "grad_norm": 0.7198868989944458, | |
| "learning_rate": 9.775199122885339e-05, | |
| "loss": 3.4038, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2840168921337055, | |
| "grad_norm": 0.9696770310401917, | |
| "learning_rate": 9.772384265744188e-05, | |
| "loss": 3.4576, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.28516212153747045, | |
| "grad_norm": 1.321269154548645, | |
| "learning_rate": 9.76955230527965e-05, | |
| "loss": 3.4348, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.28630735094123544, | |
| "grad_norm": 1.3119802474975586, | |
| "learning_rate": 9.766703251640934e-05, | |
| "loss": 3.3848, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2874525803450004, | |
| "grad_norm": 1.0199967622756958, | |
| "learning_rate": 9.763837115038513e-05, | |
| "loss": 3.4108, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2885978097487653, | |
| "grad_norm": 0.9925194382667542, | |
| "learning_rate": 9.760953905744075e-05, | |
| "loss": 3.31, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.28974303915253025, | |
| "grad_norm": 0.9447107315063477, | |
| "learning_rate": 9.758053634090502e-05, | |
| "loss": 3.3598, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.2908882685562952, | |
| "grad_norm": 1.052873134613037, | |
| "learning_rate": 9.755136310471817e-05, | |
| "loss": 3.3704, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.2920334979600601, | |
| "grad_norm": 1.061514139175415, | |
| "learning_rate": 9.752201945343156e-05, | |
| "loss": 3.3642, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.29317872736382505, | |
| "grad_norm": 0.8627074956893921, | |
| "learning_rate": 9.74925054922073e-05, | |
| "loss": 3.367, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.29432395676759, | |
| "grad_norm": 1.0214530229568481, | |
| "learning_rate": 9.746282132681785e-05, | |
| "loss": 3.3266, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.2954691861713549, | |
| "grad_norm": 1.1223275661468506, | |
| "learning_rate": 9.743296706364565e-05, | |
| "loss": 3.4194, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.2966144155751199, | |
| "grad_norm": 0.9849138259887695, | |
| "learning_rate": 9.740294280968273e-05, | |
| "loss": 3.3664, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.29775964497888485, | |
| "grad_norm": 0.7025099396705627, | |
| "learning_rate": 9.737274867253034e-05, | |
| "loss": 3.3772, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2989048743826498, | |
| "grad_norm": 0.936536967754364, | |
| "learning_rate": 9.734238476039858e-05, | |
| "loss": 3.3196, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3000501037864147, | |
| "grad_norm": 1.113277792930603, | |
| "learning_rate": 9.731185118210598e-05, | |
| "loss": 3.4606, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.30119533319017966, | |
| "grad_norm": 1.0153186321258545, | |
| "learning_rate": 9.728114804707909e-05, | |
| "loss": 3.4079, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.3023405625939446, | |
| "grad_norm": 1.1675206422805786, | |
| "learning_rate": 9.725027546535215e-05, | |
| "loss": 3.4111, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.30348579199770953, | |
| "grad_norm": 0.9518959522247314, | |
| "learning_rate": 9.721923354756665e-05, | |
| "loss": 3.3905, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.30463102140147447, | |
| "grad_norm": 0.9693425297737122, | |
| "learning_rate": 9.718802240497098e-05, | |
| "loss": 3.4364, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3057762508052394, | |
| "grad_norm": 1.1249076128005981, | |
| "learning_rate": 9.715664214941997e-05, | |
| "loss": 3.3373, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.3069214802090044, | |
| "grad_norm": 0.8406875133514404, | |
| "learning_rate": 9.712509289337453e-05, | |
| "loss": 3.321, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.30806670961276933, | |
| "grad_norm": 0.9538395404815674, | |
| "learning_rate": 9.709337474990121e-05, | |
| "loss": 3.4007, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.30921193901653427, | |
| "grad_norm": 0.8003599047660828, | |
| "learning_rate": 9.706148783267187e-05, | |
| "loss": 3.3798, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3103571684202992, | |
| "grad_norm": 0.8605026602745056, | |
| "learning_rate": 9.702943225596316e-05, | |
| "loss": 3.2908, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.31150239782406414, | |
| "grad_norm": 0.7349815964698792, | |
| "learning_rate": 9.699720813465625e-05, | |
| "loss": 3.408, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3126476272278291, | |
| "grad_norm": 1.1622780561447144, | |
| "learning_rate": 9.696481558423628e-05, | |
| "loss": 3.3212, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.313792856631594, | |
| "grad_norm": 0.9829496145248413, | |
| "learning_rate": 9.693225472079204e-05, | |
| "loss": 3.4067, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.31493808603535894, | |
| "grad_norm": 1.1378313302993774, | |
| "learning_rate": 9.689952566101548e-05, | |
| "loss": 3.3556, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3160833154391239, | |
| "grad_norm": 0.9355561137199402, | |
| "learning_rate": 9.686662852220142e-05, | |
| "loss": 3.3281, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3172285448428888, | |
| "grad_norm": 0.9328277111053467, | |
| "learning_rate": 9.683356342224694e-05, | |
| "loss": 3.313, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3183737742466538, | |
| "grad_norm": 1.277377724647522, | |
| "learning_rate": 9.680033047965114e-05, | |
| "loss": 3.3499, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.31951900365041874, | |
| "grad_norm": 1.0239235162734985, | |
| "learning_rate": 9.67669298135146e-05, | |
| "loss": 3.3936, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.3206642330541837, | |
| "grad_norm": 0.6908963322639465, | |
| "learning_rate": 9.673336154353899e-05, | |
| "loss": 3.3584, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3218094624579486, | |
| "grad_norm": 0.8835290670394897, | |
| "learning_rate": 9.669962579002664e-05, | |
| "loss": 3.3728, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.32295469186171355, | |
| "grad_norm": 1.0561710596084595, | |
| "learning_rate": 9.666572267388013e-05, | |
| "loss": 3.3579, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3240999212654785, | |
| "grad_norm": 0.8400120735168457, | |
| "learning_rate": 9.663165231660181e-05, | |
| "loss": 3.3224, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3252451506692434, | |
| "grad_norm": 0.8960584998130798, | |
| "learning_rate": 9.659741484029341e-05, | |
| "loss": 3.3434, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.32639038007300836, | |
| "grad_norm": 0.9615944027900696, | |
| "learning_rate": 9.656301036765558e-05, | |
| "loss": 3.2587, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3275356094767733, | |
| "grad_norm": 0.983391523361206, | |
| "learning_rate": 9.652843902198743e-05, | |
| "loss": 3.2396, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.3286808388805383, | |
| "grad_norm": 0.7758197784423828, | |
| "learning_rate": 9.649370092718615e-05, | |
| "loss": 3.2948, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.3298260682843032, | |
| "grad_norm": 0.9714862704277039, | |
| "learning_rate": 9.64587962077465e-05, | |
| "loss": 3.3381, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.33097129768806816, | |
| "grad_norm": 0.8628116846084595, | |
| "learning_rate": 9.64237249887604e-05, | |
| "loss": 3.294, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3321165270918331, | |
| "grad_norm": 0.9794777035713196, | |
| "learning_rate": 9.638848739591646e-05, | |
| "loss": 3.3119, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.333261756495598, | |
| "grad_norm": 0.8179820775985718, | |
| "learning_rate": 9.635308355549957e-05, | |
| "loss": 3.3009, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.33440698589936296, | |
| "grad_norm": 0.8732323050498962, | |
| "learning_rate": 9.63175135943904e-05, | |
| "loss": 3.3207, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.3355522153031279, | |
| "grad_norm": 1.0355788469314575, | |
| "learning_rate": 9.628177764006497e-05, | |
| "loss": 3.2889, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.33669744470689283, | |
| "grad_norm": 0.8974720239639282, | |
| "learning_rate": 9.624587582059417e-05, | |
| "loss": 3.3089, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.33784267411065777, | |
| "grad_norm": 0.7800531387329102, | |
| "learning_rate": 9.620980826464335e-05, | |
| "loss": 3.2999, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3389879035144227, | |
| "grad_norm": 0.7294676899909973, | |
| "learning_rate": 9.617357510147182e-05, | |
| "loss": 3.3634, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.3401331329181877, | |
| "grad_norm": 0.7799131274223328, | |
| "learning_rate": 9.613717646093239e-05, | |
| "loss": 3.308, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.34127836232195263, | |
| "grad_norm": 0.9899328947067261, | |
| "learning_rate": 9.610061247347091e-05, | |
| "loss": 3.3191, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.34242359172571757, | |
| "grad_norm": 1.0520347356796265, | |
| "learning_rate": 9.606388327012579e-05, | |
| "loss": 3.389, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.3435688211294825, | |
| "grad_norm": 0.9768466353416443, | |
| "learning_rate": 9.602698898252756e-05, | |
| "loss": 3.2905, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.34471405053324744, | |
| "grad_norm": 0.9359555244445801, | |
| "learning_rate": 9.598992974289837e-05, | |
| "loss": 3.3022, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.3458592799370124, | |
| "grad_norm": 0.7487738728523254, | |
| "learning_rate": 9.595270568405156e-05, | |
| "loss": 3.2234, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3470045093407773, | |
| "grad_norm": 0.8295655846595764, | |
| "learning_rate": 9.591531693939109e-05, | |
| "loss": 3.3506, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.34814973874454225, | |
| "grad_norm": 0.9020605683326721, | |
| "learning_rate": 9.587776364291117e-05, | |
| "loss": 3.3026, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.3492949681483072, | |
| "grad_norm": 0.7868961095809937, | |
| "learning_rate": 9.58400459291957e-05, | |
| "loss": 3.2393, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3504401975520722, | |
| "grad_norm": 0.9779835939407349, | |
| "learning_rate": 9.580216393341785e-05, | |
| "loss": 3.3254, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3515854269558371, | |
| "grad_norm": 0.8962246179580688, | |
| "learning_rate": 9.576411779133956e-05, | |
| "loss": 3.2486, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.35273065635960205, | |
| "grad_norm": 0.9166551828384399, | |
| "learning_rate": 9.572590763931097e-05, | |
| "loss": 3.2193, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.353875885763367, | |
| "grad_norm": 0.7779364585876465, | |
| "learning_rate": 9.568753361427009e-05, | |
| "loss": 3.2469, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.3550211151671319, | |
| "grad_norm": 0.750092089176178, | |
| "learning_rate": 9.564899585374214e-05, | |
| "loss": 3.2532, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.35616634457089685, | |
| "grad_norm": 1.0269392728805542, | |
| "learning_rate": 9.561029449583919e-05, | |
| "loss": 3.3331, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.3573115739746618, | |
| "grad_norm": 0.7937965989112854, | |
| "learning_rate": 9.557142967925956e-05, | |
| "loss": 3.314, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.3584568033784267, | |
| "grad_norm": 1.1338940858840942, | |
| "learning_rate": 9.553240154328744e-05, | |
| "loss": 3.3375, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.35960203278219166, | |
| "grad_norm": 0.7937076091766357, | |
| "learning_rate": 9.549321022779229e-05, | |
| "loss": 3.2691, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.36074726218595665, | |
| "grad_norm": 0.8552340865135193, | |
| "learning_rate": 9.545385587322839e-05, | |
| "loss": 3.3107, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3618924915897216, | |
| "grad_norm": 1.0279617309570312, | |
| "learning_rate": 9.541433862063429e-05, | |
| "loss": 3.2552, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.3630377209934865, | |
| "grad_norm": 0.9652466177940369, | |
| "learning_rate": 9.537465861163237e-05, | |
| "loss": 3.242, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.36418295039725146, | |
| "grad_norm": 0.9129723310470581, | |
| "learning_rate": 9.533481598842827e-05, | |
| "loss": 3.3131, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3653281798010164, | |
| "grad_norm": 0.9316424131393433, | |
| "learning_rate": 9.529481089381042e-05, | |
| "loss": 3.3288, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.36647340920478133, | |
| "grad_norm": 0.7987300753593445, | |
| "learning_rate": 9.525464347114953e-05, | |
| "loss": 3.2832, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.36761863860854627, | |
| "grad_norm": 0.7103368043899536, | |
| "learning_rate": 9.521431386439807e-05, | |
| "loss": 3.2339, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.3687638680123112, | |
| "grad_norm": 0.7420955896377563, | |
| "learning_rate": 9.517382221808969e-05, | |
| "loss": 3.1662, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.36990909741607614, | |
| "grad_norm": 0.8201749324798584, | |
| "learning_rate": 9.513316867733883e-05, | |
| "loss": 3.2837, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.3710543268198411, | |
| "grad_norm": 0.8581364154815674, | |
| "learning_rate": 9.509235338784009e-05, | |
| "loss": 3.2949, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.37219955622360607, | |
| "grad_norm": 0.956118643283844, | |
| "learning_rate": 9.505137649586775e-05, | |
| "loss": 3.316, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.373344785627371, | |
| "grad_norm": 0.708759069442749, | |
| "learning_rate": 9.501023814827524e-05, | |
| "loss": 3.1951, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.37449001503113594, | |
| "grad_norm": 0.8143038153648376, | |
| "learning_rate": 9.496893849249464e-05, | |
| "loss": 3.2738, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.3756352444349009, | |
| "grad_norm": 0.6578754782676697, | |
| "learning_rate": 9.492747767653611e-05, | |
| "loss": 3.2809, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.3767804738386658, | |
| "grad_norm": 0.8550508618354797, | |
| "learning_rate": 9.488585584898738e-05, | |
| "loss": 3.2668, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.37792570324243074, | |
| "grad_norm": 0.795080304145813, | |
| "learning_rate": 9.48440731590132e-05, | |
| "loss": 3.28, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3790709326461957, | |
| "grad_norm": 0.9933105707168579, | |
| "learning_rate": 9.480212975635486e-05, | |
| "loss": 3.3104, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.3802161620499606, | |
| "grad_norm": 1.224338412284851, | |
| "learning_rate": 9.476002579132957e-05, | |
| "loss": 3.29, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.38136139145372555, | |
| "grad_norm": 0.8564585447311401, | |
| "learning_rate": 9.471776141483e-05, | |
| "loss": 3.2, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.38250662085749054, | |
| "grad_norm": 1.160684585571289, | |
| "learning_rate": 9.467533677832365e-05, | |
| "loss": 3.2226, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.3836518502612555, | |
| "grad_norm": 0.8671857714653015, | |
| "learning_rate": 9.463275203385244e-05, | |
| "loss": 3.2453, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3847970796650204, | |
| "grad_norm": 1.0225045680999756, | |
| "learning_rate": 9.459000733403205e-05, | |
| "loss": 3.2283, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.38594230906878535, | |
| "grad_norm": 0.8350477814674377, | |
| "learning_rate": 9.454710283205139e-05, | |
| "loss": 3.2584, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3870875384725503, | |
| "grad_norm": 0.8098021745681763, | |
| "learning_rate": 9.450403868167208e-05, | |
| "loss": 3.2836, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.3882327678763152, | |
| "grad_norm": 0.8174638748168945, | |
| "learning_rate": 9.446081503722792e-05, | |
| "loss": 3.1896, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.38937799728008016, | |
| "grad_norm": 0.6904940009117126, | |
| "learning_rate": 9.441743205362426e-05, | |
| "loss": 3.2464, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3905232266838451, | |
| "grad_norm": 0.692864716053009, | |
| "learning_rate": 9.437388988633752e-05, | |
| "loss": 3.2277, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.39166845608761003, | |
| "grad_norm": 0.7014842629432678, | |
| "learning_rate": 9.433018869141464e-05, | |
| "loss": 3.2372, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.39281368549137496, | |
| "grad_norm": 0.6166806817054749, | |
| "learning_rate": 9.428632862547237e-05, | |
| "loss": 3.2501, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.39395891489513996, | |
| "grad_norm": 0.7060846090316772, | |
| "learning_rate": 9.424230984569696e-05, | |
| "loss": 3.2881, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.3951041442989049, | |
| "grad_norm": 0.7771391272544861, | |
| "learning_rate": 9.419813250984337e-05, | |
| "loss": 3.2149, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3962493737026698, | |
| "grad_norm": 0.6290923953056335, | |
| "learning_rate": 9.415379677623485e-05, | |
| "loss": 3.1555, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.39739460310643476, | |
| "grad_norm": 0.7270971536636353, | |
| "learning_rate": 9.410930280376225e-05, | |
| "loss": 3.2554, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.3985398325101997, | |
| "grad_norm": 0.681962788105011, | |
| "learning_rate": 9.40646507518836e-05, | |
| "loss": 3.1671, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.39968506191396463, | |
| "grad_norm": 0.5727997422218323, | |
| "learning_rate": 9.40198407806234e-05, | |
| "loss": 3.237, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.40083029131772957, | |
| "grad_norm": 0.7687988877296448, | |
| "learning_rate": 9.39748730505721e-05, | |
| "loss": 3.2357, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4019755207214945, | |
| "grad_norm": 0.7813317179679871, | |
| "learning_rate": 9.392974772288558e-05, | |
| "loss": 3.2101, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.40312075012525944, | |
| "grad_norm": 0.8766132593154907, | |
| "learning_rate": 9.388446495928446e-05, | |
| "loss": 3.2852, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.40426597952902443, | |
| "grad_norm": 0.7857736349105835, | |
| "learning_rate": 9.383902492205363e-05, | |
| "loss": 3.2113, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.40541120893278937, | |
| "grad_norm": 0.9073331356048584, | |
| "learning_rate": 9.379342777404159e-05, | |
| "loss": 3.2478, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.4065564383365543, | |
| "grad_norm": 0.8033682107925415, | |
| "learning_rate": 9.374767367865989e-05, | |
| "loss": 3.3159, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.40770166774031924, | |
| "grad_norm": 0.7821508646011353, | |
| "learning_rate": 9.370176279988256e-05, | |
| "loss": 3.2362, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4088468971440842, | |
| "grad_norm": 0.8257923126220703, | |
| "learning_rate": 9.365569530224554e-05, | |
| "loss": 3.1832, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4099921265478491, | |
| "grad_norm": 0.8349987864494324, | |
| "learning_rate": 9.360947135084603e-05, | |
| "loss": 3.1995, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.41113735595161405, | |
| "grad_norm": 0.8590210676193237, | |
| "learning_rate": 9.356309111134191e-05, | |
| "loss": 3.2119, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.412282585355379, | |
| "grad_norm": 0.8512969017028809, | |
| "learning_rate": 9.351655474995122e-05, | |
| "loss": 3.2323, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4134278147591439, | |
| "grad_norm": 0.6388457417488098, | |
| "learning_rate": 9.346986243345149e-05, | |
| "loss": 3.1677, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.4145730441629089, | |
| "grad_norm": 0.8811210989952087, | |
| "learning_rate": 9.342301432917912e-05, | |
| "loss": 3.2307, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.41571827356667385, | |
| "grad_norm": 0.9297654628753662, | |
| "learning_rate": 9.337601060502891e-05, | |
| "loss": 3.1838, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.4168635029704388, | |
| "grad_norm": 0.750491201877594, | |
| "learning_rate": 9.332885142945329e-05, | |
| "loss": 3.23, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.4180087323742037, | |
| "grad_norm": 0.8282638192176819, | |
| "learning_rate": 9.328153697146186e-05, | |
| "loss": 3.1789, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.41915396177796865, | |
| "grad_norm": 0.7395208477973938, | |
| "learning_rate": 9.323406740062068e-05, | |
| "loss": 3.2881, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4202991911817336, | |
| "grad_norm": 0.5959879755973816, | |
| "learning_rate": 9.318644288705172e-05, | |
| "loss": 3.1879, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.4214444205854985, | |
| "grad_norm": 0.6063298583030701, | |
| "learning_rate": 9.313866360143227e-05, | |
| "loss": 3.273, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.42258964998926346, | |
| "grad_norm": 0.6868070960044861, | |
| "learning_rate": 9.309072971499422e-05, | |
| "loss": 3.2145, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.4237348793930284, | |
| "grad_norm": 0.6153081655502319, | |
| "learning_rate": 9.304264139952356e-05, | |
| "loss": 3.0791, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.42488010879679333, | |
| "grad_norm": 0.6345932483673096, | |
| "learning_rate": 9.299439882735977e-05, | |
| "loss": 3.1991, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.4260253382005583, | |
| "grad_norm": 0.7605310082435608, | |
| "learning_rate": 9.294600217139506e-05, | |
| "loss": 3.1272, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.42717056760432326, | |
| "grad_norm": 0.6695173382759094, | |
| "learning_rate": 9.289745160507395e-05, | |
| "loss": 3.1482, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.4283157970080882, | |
| "grad_norm": 0.8121134638786316, | |
| "learning_rate": 9.284874730239244e-05, | |
| "loss": 3.2122, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.42946102641185313, | |
| "grad_norm": 0.8771198391914368, | |
| "learning_rate": 9.279988943789759e-05, | |
| "loss": 3.1768, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.43060625581561807, | |
| "grad_norm": 0.7993550300598145, | |
| "learning_rate": 9.275087818668675e-05, | |
| "loss": 3.1944, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.431751485219383, | |
| "grad_norm": 0.6639721393585205, | |
| "learning_rate": 9.270171372440697e-05, | |
| "loss": 3.1418, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.43289671462314794, | |
| "grad_norm": 0.7494943737983704, | |
| "learning_rate": 9.265239622725438e-05, | |
| "loss": 3.1956, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.4340419440269129, | |
| "grad_norm": 0.7307000160217285, | |
| "learning_rate": 9.26029258719736e-05, | |
| "loss": 3.133, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.4351871734306778, | |
| "grad_norm": 0.7357375621795654, | |
| "learning_rate": 9.255330283585701e-05, | |
| "loss": 3.1898, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4363324028344428, | |
| "grad_norm": 0.6649693250656128, | |
| "learning_rate": 9.250352729674422e-05, | |
| "loss": 3.2147, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.43747763223820774, | |
| "grad_norm": 0.6873495578765869, | |
| "learning_rate": 9.245359943302133e-05, | |
| "loss": 3.2341, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.43862286164197267, | |
| "grad_norm": 0.7320956587791443, | |
| "learning_rate": 9.240351942362038e-05, | |
| "loss": 3.1241, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.4397680910457376, | |
| "grad_norm": 0.6137463450431824, | |
| "learning_rate": 9.235328744801868e-05, | |
| "loss": 3.1529, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.44091332044950254, | |
| "grad_norm": 0.8658304214477539, | |
| "learning_rate": 9.230290368623809e-05, | |
| "loss": 3.2168, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4420585498532675, | |
| "grad_norm": 0.7436694502830505, | |
| "learning_rate": 9.225236831884454e-05, | |
| "loss": 3.1798, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.4432037792570324, | |
| "grad_norm": 0.9040384888648987, | |
| "learning_rate": 9.220168152694722e-05, | |
| "loss": 3.2241, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.44434900866079735, | |
| "grad_norm": 0.7236924171447754, | |
| "learning_rate": 9.215084349219801e-05, | |
| "loss": 3.183, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.4454942380645623, | |
| "grad_norm": 0.8633347153663635, | |
| "learning_rate": 9.209985439679081e-05, | |
| "loss": 3.1776, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.4466394674683272, | |
| "grad_norm": 0.730910062789917, | |
| "learning_rate": 9.204871442346091e-05, | |
| "loss": 3.1633, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4477846968720922, | |
| "grad_norm": 0.809923529624939, | |
| "learning_rate": 9.199742375548432e-05, | |
| "loss": 3.1736, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.44892992627585715, | |
| "grad_norm": 0.7229586839675903, | |
| "learning_rate": 9.194598257667711e-05, | |
| "loss": 3.1813, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.4500751556796221, | |
| "grad_norm": 0.6999960541725159, | |
| "learning_rate": 9.189439107139472e-05, | |
| "loss": 3.1125, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.451220385083387, | |
| "grad_norm": 0.7234693169593811, | |
| "learning_rate": 9.184264942453138e-05, | |
| "loss": 3.137, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.45236561448715196, | |
| "grad_norm": 0.7283908724784851, | |
| "learning_rate": 9.179075782151936e-05, | |
| "loss": 3.1672, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4535108438909169, | |
| "grad_norm": 0.793543815612793, | |
| "learning_rate": 9.173871644832834e-05, | |
| "loss": 3.1925, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.45465607329468183, | |
| "grad_norm": 0.7263696789741516, | |
| "learning_rate": 9.168652549146481e-05, | |
| "loss": 3.1609, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.45580130269844676, | |
| "grad_norm": 0.7698031663894653, | |
| "learning_rate": 9.163418513797126e-05, | |
| "loss": 3.2547, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.4569465321022117, | |
| "grad_norm": 0.908698320388794, | |
| "learning_rate": 9.158169557542566e-05, | |
| "loss": 3.2165, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.4580917615059767, | |
| "grad_norm": 0.9588857293128967, | |
| "learning_rate": 9.152905699194065e-05, | |
| "loss": 3.1743, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4592369909097416, | |
| "grad_norm": 0.7442302107810974, | |
| "learning_rate": 9.1476269576163e-05, | |
| "loss": 3.1088, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.46038222031350656, | |
| "grad_norm": 0.7421006560325623, | |
| "learning_rate": 9.14233335172728e-05, | |
| "loss": 3.1497, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.4615274497172715, | |
| "grad_norm": 0.8878415822982788, | |
| "learning_rate": 9.13702490049829e-05, | |
| "loss": 3.1924, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.46267267912103643, | |
| "grad_norm": 0.6878317594528198, | |
| "learning_rate": 9.131701622953816e-05, | |
| "loss": 3.1366, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.46381790852480137, | |
| "grad_norm": 0.7945599555969238, | |
| "learning_rate": 9.126363538171478e-05, | |
| "loss": 3.1926, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4649631379285663, | |
| "grad_norm": 0.7997886538505554, | |
| "learning_rate": 9.121010665281964e-05, | |
| "loss": 3.1521, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.46610836733233124, | |
| "grad_norm": 0.715614378452301, | |
| "learning_rate": 9.115643023468958e-05, | |
| "loss": 3.1904, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.4672535967360962, | |
| "grad_norm": 0.7846017479896545, | |
| "learning_rate": 9.110260631969077e-05, | |
| "loss": 3.1338, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.46839882613986117, | |
| "grad_norm": 0.6939677596092224, | |
| "learning_rate": 9.10486351007179e-05, | |
| "loss": 3.1635, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.4695440555436261, | |
| "grad_norm": 0.7764283418655396, | |
| "learning_rate": 9.099451677119366e-05, | |
| "loss": 3.1922, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.47068928494739104, | |
| "grad_norm": 0.753666877746582, | |
| "learning_rate": 9.094025152506788e-05, | |
| "loss": 3.0827, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.471834514351156, | |
| "grad_norm": 0.6793937683105469, | |
| "learning_rate": 9.088583955681699e-05, | |
| "loss": 3.1235, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.4729797437549209, | |
| "grad_norm": 0.645055890083313, | |
| "learning_rate": 9.08312810614432e-05, | |
| "loss": 3.1758, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.47412497315868585, | |
| "grad_norm": 0.7241025567054749, | |
| "learning_rate": 9.077657623447379e-05, | |
| "loss": 3.1636, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.4752702025624508, | |
| "grad_norm": 0.762117862701416, | |
| "learning_rate": 9.07217252719606e-05, | |
| "loss": 3.1423, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4764154319662157, | |
| "grad_norm": 0.7575943470001221, | |
| "learning_rate": 9.066672837047907e-05, | |
| "loss": 3.1304, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.47756066136998065, | |
| "grad_norm": 0.8326764106750488, | |
| "learning_rate": 9.061158572712769e-05, | |
| "loss": 3.1807, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.4787058907737456, | |
| "grad_norm": 0.7815741300582886, | |
| "learning_rate": 9.055629753952731e-05, | |
| "loss": 3.2113, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.4798511201775106, | |
| "grad_norm": 0.7716583609580994, | |
| "learning_rate": 9.050086400582033e-05, | |
| "loss": 3.1791, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.4809963495812755, | |
| "grad_norm": 0.6160004734992981, | |
| "learning_rate": 9.044528532467006e-05, | |
| "loss": 3.1696, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.48214157898504045, | |
| "grad_norm": 0.8025004267692566, | |
| "learning_rate": 9.038956169525998e-05, | |
| "loss": 3.2002, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.4832868083888054, | |
| "grad_norm": 0.733741819858551, | |
| "learning_rate": 9.033369331729307e-05, | |
| "loss": 3.1661, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.4844320377925703, | |
| "grad_norm": 0.7210118770599365, | |
| "learning_rate": 9.027768039099103e-05, | |
| "loss": 3.1492, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.48557726719633526, | |
| "grad_norm": 0.6915583610534668, | |
| "learning_rate": 9.02215231170936e-05, | |
| "loss": 3.1892, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.4867224966001002, | |
| "grad_norm": 0.6812649965286255, | |
| "learning_rate": 9.016522169685783e-05, | |
| "loss": 3.1404, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.48786772600386513, | |
| "grad_norm": 0.7272056341171265, | |
| "learning_rate": 9.010877633205738e-05, | |
| "loss": 3.1935, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.48901295540763007, | |
| "grad_norm": 0.7162798643112183, | |
| "learning_rate": 9.005218722498177e-05, | |
| "loss": 3.1949, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.49015818481139506, | |
| "grad_norm": 0.6110600829124451, | |
| "learning_rate": 8.999545457843568e-05, | |
| "loss": 3.1217, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.49130341421516, | |
| "grad_norm": 0.657370924949646, | |
| "learning_rate": 8.993857859573818e-05, | |
| "loss": 3.1381, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.49244864361892493, | |
| "grad_norm": 0.8181600570678711, | |
| "learning_rate": 8.988155948072203e-05, | |
| "loss": 3.1527, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.49359387302268987, | |
| "grad_norm": 0.586644172668457, | |
| "learning_rate": 8.9824397437733e-05, | |
| "loss": 3.1328, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.4947391024264548, | |
| "grad_norm": 0.8710150718688965, | |
| "learning_rate": 8.976709267162903e-05, | |
| "loss": 3.1509, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.49588433183021974, | |
| "grad_norm": 0.7185545563697815, | |
| "learning_rate": 8.970964538777957e-05, | |
| "loss": 3.0628, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.4970295612339847, | |
| "grad_norm": 0.7242484092712402, | |
| "learning_rate": 8.965205579206483e-05, | |
| "loss": 3.0603, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.4981747906377496, | |
| "grad_norm": 0.7996972799301147, | |
| "learning_rate": 8.959432409087504e-05, | |
| "loss": 3.2346, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.49932002004151455, | |
| "grad_norm": 0.6038782000541687, | |
| "learning_rate": 8.953645049110971e-05, | |
| "loss": 3.0751, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5004652494452795, | |
| "grad_norm": 0.7712786197662354, | |
| "learning_rate": 8.94784352001769e-05, | |
| "loss": 3.1086, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5016104788490444, | |
| "grad_norm": 0.6952617168426514, | |
| "learning_rate": 8.94202784259924e-05, | |
| "loss": 3.13, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.5027557082528094, | |
| "grad_norm": 0.7420851588249207, | |
| "learning_rate": 8.936198037697916e-05, | |
| "loss": 3.1094, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5039009376565743, | |
| "grad_norm": 0.6883806586265564, | |
| "learning_rate": 8.930354126206634e-05, | |
| "loss": 3.0722, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5050461670603392, | |
| "grad_norm": 0.7546491026878357, | |
| "learning_rate": 8.92449612906887e-05, | |
| "loss": 3.1571, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5061913964641043, | |
| "grad_norm": 0.7471094727516174, | |
| "learning_rate": 8.918624067278576e-05, | |
| "loss": 3.1842, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5073366258678692, | |
| "grad_norm": 0.8344042897224426, | |
| "learning_rate": 8.912737961880116e-05, | |
| "loss": 3.1709, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5084818552716341, | |
| "grad_norm": 0.6555135250091553, | |
| "learning_rate": 8.906837833968174e-05, | |
| "loss": 3.1777, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5096270846753991, | |
| "grad_norm": 0.799281120300293, | |
| "learning_rate": 8.900923704687697e-05, | |
| "loss": 3.176, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.510772314079164, | |
| "grad_norm": 0.8266319632530212, | |
| "learning_rate": 8.894995595233809e-05, | |
| "loss": 3.1353, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.511917543482929, | |
| "grad_norm": 0.7263309955596924, | |
| "learning_rate": 8.889053526851729e-05, | |
| "loss": 3.0824, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5130627728866939, | |
| "grad_norm": 0.7665941119194031, | |
| "learning_rate": 8.88309752083671e-05, | |
| "loss": 3.1808, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5142080022904588, | |
| "grad_norm": 0.7014003396034241, | |
| "learning_rate": 8.877127598533952e-05, | |
| "loss": 3.1158, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5153532316942238, | |
| "grad_norm": 0.6320556998252869, | |
| "learning_rate": 8.871143781338529e-05, | |
| "loss": 3.1276, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5164984610979887, | |
| "grad_norm": 0.8376429677009583, | |
| "learning_rate": 8.865146090695308e-05, | |
| "loss": 3.1422, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.5176436905017536, | |
| "grad_norm": 0.6639658212661743, | |
| "learning_rate": 8.859134548098883e-05, | |
| "loss": 3.0622, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.5187889199055186, | |
| "grad_norm": 0.6442060470581055, | |
| "learning_rate": 8.853109175093486e-05, | |
| "loss": 3.1206, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.5199341493092835, | |
| "grad_norm": 0.6882277131080627, | |
| "learning_rate": 8.847069993272912e-05, | |
| "loss": 3.1315, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5210793787130484, | |
| "grad_norm": 0.8141956329345703, | |
| "learning_rate": 8.841017024280449e-05, | |
| "loss": 3.1498, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5222246081168134, | |
| "grad_norm": 0.6133621335029602, | |
| "learning_rate": 8.834950289808796e-05, | |
| "loss": 3.0971, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5233698375205783, | |
| "grad_norm": 0.6844592690467834, | |
| "learning_rate": 8.828869811599982e-05, | |
| "loss": 3.1408, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5245150669243432, | |
| "grad_norm": 0.7407364845275879, | |
| "learning_rate": 8.822775611445289e-05, | |
| "loss": 3.1356, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.5256602963281082, | |
| "grad_norm": 0.7962344884872437, | |
| "learning_rate": 8.816667711185183e-05, | |
| "loss": 3.037, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.5268055257318731, | |
| "grad_norm": 0.6615867018699646, | |
| "learning_rate": 8.81054613270922e-05, | |
| "loss": 3.119, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5279507551356382, | |
| "grad_norm": 0.6886764168739319, | |
| "learning_rate": 8.804410897955986e-05, | |
| "loss": 3.1686, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.5290959845394031, | |
| "grad_norm": 0.6714747548103333, | |
| "learning_rate": 8.798262028913e-05, | |
| "loss": 3.0539, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.530241213943168, | |
| "grad_norm": 0.630648672580719, | |
| "learning_rate": 8.792099547616646e-05, | |
| "loss": 3.03, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.531386443346933, | |
| "grad_norm": 0.6129744648933411, | |
| "learning_rate": 8.785923476152092e-05, | |
| "loss": 3.112, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.5325316727506979, | |
| "grad_norm": 0.6656561493873596, | |
| "learning_rate": 8.779733836653213e-05, | |
| "loss": 3.0675, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5336769021544628, | |
| "grad_norm": 0.6855784058570862, | |
| "learning_rate": 8.773530651302506e-05, | |
| "loss": 3.0567, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.5348221315582278, | |
| "grad_norm": 0.6233646869659424, | |
| "learning_rate": 8.767313942331016e-05, | |
| "loss": 3.1316, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.5359673609619927, | |
| "grad_norm": 0.5950207710266113, | |
| "learning_rate": 8.761083732018253e-05, | |
| "loss": 3.0838, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5371125903657576, | |
| "grad_norm": 0.6548320651054382, | |
| "learning_rate": 8.754840042692114e-05, | |
| "loss": 3.128, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.5382578197695226, | |
| "grad_norm": 0.5914682149887085, | |
| "learning_rate": 8.748582896728801e-05, | |
| "loss": 3.0995, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5394030491732875, | |
| "grad_norm": 0.8005509376525879, | |
| "learning_rate": 8.742312316552741e-05, | |
| "loss": 3.1194, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.5405482785770525, | |
| "grad_norm": 0.6313744783401489, | |
| "learning_rate": 8.736028324636511e-05, | |
| "loss": 3.1332, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.5416935079808174, | |
| "grad_norm": 0.6614211797714233, | |
| "learning_rate": 8.729730943500751e-05, | |
| "loss": 3.1808, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.5428387373845823, | |
| "grad_norm": 0.6834341883659363, | |
| "learning_rate": 8.723420195714083e-05, | |
| "loss": 3.146, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.5439839667883473, | |
| "grad_norm": 0.6552104353904724, | |
| "learning_rate": 8.717096103893034e-05, | |
| "loss": 3.1339, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5451291961921122, | |
| "grad_norm": 0.6134440302848816, | |
| "learning_rate": 8.710758690701957e-05, | |
| "loss": 3.1073, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.5462744255958771, | |
| "grad_norm": 0.6617953181266785, | |
| "learning_rate": 8.704407978852941e-05, | |
| "loss": 3.0803, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.5474196549996421, | |
| "grad_norm": 0.6200254559516907, | |
| "learning_rate": 8.698043991105738e-05, | |
| "loss": 3.0902, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.548564884403407, | |
| "grad_norm": 0.766797661781311, | |
| "learning_rate": 8.691666750267677e-05, | |
| "loss": 3.0533, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.549710113807172, | |
| "grad_norm": 0.8062739372253418, | |
| "learning_rate": 8.685276279193583e-05, | |
| "loss": 3.074, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.550855343210937, | |
| "grad_norm": 0.7662980556488037, | |
| "learning_rate": 8.678872600785702e-05, | |
| "loss": 3.0567, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.5520005726147019, | |
| "grad_norm": 0.610495388507843, | |
| "learning_rate": 8.672455737993601e-05, | |
| "loss": 3.0505, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.5531458020184669, | |
| "grad_norm": 0.7125016450881958, | |
| "learning_rate": 8.666025713814106e-05, | |
| "loss": 3.1392, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.5542910314222318, | |
| "grad_norm": 0.6135743260383606, | |
| "learning_rate": 8.65958255129121e-05, | |
| "loss": 3.0789, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.5554362608259967, | |
| "grad_norm": 0.5998417735099792, | |
| "learning_rate": 8.653126273515988e-05, | |
| "loss": 3.0702, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5565814902297617, | |
| "grad_norm": 0.7088379859924316, | |
| "learning_rate": 8.64665690362652e-05, | |
| "loss": 3.084, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.5577267196335266, | |
| "grad_norm": 0.6542948484420776, | |
| "learning_rate": 8.640174464807805e-05, | |
| "loss": 3.1164, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.5588719490372915, | |
| "grad_norm": 0.5674989819526672, | |
| "learning_rate": 8.63367898029168e-05, | |
| "loss": 3.1048, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.5600171784410565, | |
| "grad_norm": 0.6628077626228333, | |
| "learning_rate": 8.627170473356733e-05, | |
| "loss": 3.0492, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.5611624078448214, | |
| "grad_norm": 0.6939430832862854, | |
| "learning_rate": 8.620648967328224e-05, | |
| "loss": 3.1041, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5623076372485863, | |
| "grad_norm": 0.7876750230789185, | |
| "learning_rate": 8.614114485577996e-05, | |
| "loss": 3.1109, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.5634528666523513, | |
| "grad_norm": 0.7786777019500732, | |
| "learning_rate": 8.607567051524399e-05, | |
| "loss": 3.0289, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.5645980960561162, | |
| "grad_norm": 0.6663212180137634, | |
| "learning_rate": 8.601006688632199e-05, | |
| "loss": 3.0807, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.5657433254598812, | |
| "grad_norm": 0.7165863513946533, | |
| "learning_rate": 8.594433420412496e-05, | |
| "loss": 3.0755, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.5668885548636461, | |
| "grad_norm": 0.7116391658782959, | |
| "learning_rate": 8.587847270422642e-05, | |
| "loss": 3.064, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.568033784267411, | |
| "grad_norm": 0.7216659188270569, | |
| "learning_rate": 8.581248262266155e-05, | |
| "loss": 3.0844, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.569179013671176, | |
| "grad_norm": 0.600975751876831, | |
| "learning_rate": 8.57463641959263e-05, | |
| "loss": 2.9771, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.5703242430749409, | |
| "grad_norm": 0.6743506789207458, | |
| "learning_rate": 8.568011766097666e-05, | |
| "loss": 3.1177, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.5714694724787059, | |
| "grad_norm": 0.6986669301986694, | |
| "learning_rate": 8.561374325522764e-05, | |
| "loss": 3.0838, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.5726147018824709, | |
| "grad_norm": 0.8114129900932312, | |
| "learning_rate": 8.554724121655262e-05, | |
| "loss": 3.1444, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5737599312862358, | |
| "grad_norm": 0.7919934988021851, | |
| "learning_rate": 8.548061178328233e-05, | |
| "loss": 3.0166, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.5749051606900007, | |
| "grad_norm": 0.6979469656944275, | |
| "learning_rate": 8.541385519420403e-05, | |
| "loss": 3.0737, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.5760503900937657, | |
| "grad_norm": 0.6499598026275635, | |
| "learning_rate": 8.534697168856076e-05, | |
| "loss": 3.0649, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.5771956194975306, | |
| "grad_norm": 0.7335128784179688, | |
| "learning_rate": 8.527996150605034e-05, | |
| "loss": 3.0403, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.5783408489012956, | |
| "grad_norm": 0.7250447869300842, | |
| "learning_rate": 8.521282488682463e-05, | |
| "loss": 3.0069, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5794860783050605, | |
| "grad_norm": 0.6807704567909241, | |
| "learning_rate": 8.514556207148857e-05, | |
| "loss": 3.119, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.5806313077088254, | |
| "grad_norm": 0.7379552721977234, | |
| "learning_rate": 8.507817330109936e-05, | |
| "loss": 3.0773, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.5817765371125904, | |
| "grad_norm": 0.6113300919532776, | |
| "learning_rate": 8.501065881716566e-05, | |
| "loss": 3.0768, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.5829217665163553, | |
| "grad_norm": 0.6463739275932312, | |
| "learning_rate": 8.494301886164658e-05, | |
| "loss": 3.0759, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.5840669959201202, | |
| "grad_norm": 0.6680572032928467, | |
| "learning_rate": 8.487525367695098e-05, | |
| "loss": 3.032, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5852122253238852, | |
| "grad_norm": 0.7283656597137451, | |
| "learning_rate": 8.480736350593644e-05, | |
| "loss": 3.0986, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.5863574547276501, | |
| "grad_norm": 0.6844098567962646, | |
| "learning_rate": 8.473934859190853e-05, | |
| "loss": 3.0703, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.587502684131415, | |
| "grad_norm": 0.6737761497497559, | |
| "learning_rate": 8.467120917861984e-05, | |
| "loss": 3.0775, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.58864791353518, | |
| "grad_norm": 0.8135201930999756, | |
| "learning_rate": 8.460294551026916e-05, | |
| "loss": 3.0802, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.5897931429389449, | |
| "grad_norm": 0.6999467015266418, | |
| "learning_rate": 8.453455783150054e-05, | |
| "loss": 3.047, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5909383723427099, | |
| "grad_norm": 0.7999339699745178, | |
| "learning_rate": 8.446604638740256e-05, | |
| "loss": 3.1247, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.5920836017464748, | |
| "grad_norm": 0.7229709029197693, | |
| "learning_rate": 8.439741142350725e-05, | |
| "loss": 3.1009, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.5932288311502398, | |
| "grad_norm": 0.7535393238067627, | |
| "learning_rate": 8.432865318578935e-05, | |
| "loss": 3.0566, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.5943740605540048, | |
| "grad_norm": 0.7364835143089294, | |
| "learning_rate": 8.425977192066539e-05, | |
| "loss": 3.0751, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.5955192899577697, | |
| "grad_norm": 0.7996159791946411, | |
| "learning_rate": 8.419076787499283e-05, | |
| "loss": 3.1277, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5966645193615346, | |
| "grad_norm": 0.6993304491043091, | |
| "learning_rate": 8.412164129606911e-05, | |
| "loss": 3.0713, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.5978097487652996, | |
| "grad_norm": 0.6996495127677917, | |
| "learning_rate": 8.405239243163084e-05, | |
| "loss": 3.0595, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.5989549781690645, | |
| "grad_norm": 0.6525830626487732, | |
| "learning_rate": 8.398302152985285e-05, | |
| "loss": 3.0666, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.6001002075728294, | |
| "grad_norm": 0.6092258095741272, | |
| "learning_rate": 8.391352883934733e-05, | |
| "loss": 3.0453, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.6012454369765944, | |
| "grad_norm": 0.7509777545928955, | |
| "learning_rate": 8.3843914609163e-05, | |
| "loss": 3.016, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6023906663803593, | |
| "grad_norm": 0.6374807953834534, | |
| "learning_rate": 8.377417908878406e-05, | |
| "loss": 2.9986, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.6035358957841243, | |
| "grad_norm": 0.6541762351989746, | |
| "learning_rate": 8.370432252812946e-05, | |
| "loss": 3.0299, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.6046811251878892, | |
| "grad_norm": 0.5995933413505554, | |
| "learning_rate": 8.363434517755191e-05, | |
| "loss": 3.0853, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6058263545916541, | |
| "grad_norm": 0.7466599941253662, | |
| "learning_rate": 8.356424728783702e-05, | |
| "loss": 3.1482, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.6069715839954191, | |
| "grad_norm": 0.7076915502548218, | |
| "learning_rate": 8.349402911020234e-05, | |
| "loss": 3.0681, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.608116813399184, | |
| "grad_norm": 0.6121742725372314, | |
| "learning_rate": 8.34236908962966e-05, | |
| "loss": 3.0771, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.6092620428029489, | |
| "grad_norm": 0.7337540984153748, | |
| "learning_rate": 8.335323289819865e-05, | |
| "loss": 3.1927, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.6104072722067139, | |
| "grad_norm": 0.6119634509086609, | |
| "learning_rate": 8.328265536841662e-05, | |
| "loss": 3.0988, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.6115525016104788, | |
| "grad_norm": 0.6291252970695496, | |
| "learning_rate": 8.321195855988706e-05, | |
| "loss": 3.0667, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.6126977310142437, | |
| "grad_norm": 0.5802082419395447, | |
| "learning_rate": 8.314114272597398e-05, | |
| "loss": 3.0118, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6138429604180088, | |
| "grad_norm": 0.6166985630989075, | |
| "learning_rate": 8.307020812046792e-05, | |
| "loss": 3.1762, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.6149881898217737, | |
| "grad_norm": 0.5751842260360718, | |
| "learning_rate": 8.299915499758514e-05, | |
| "loss": 3.0107, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.6161334192255387, | |
| "grad_norm": 0.6163948178291321, | |
| "learning_rate": 8.292798361196658e-05, | |
| "loss": 3.0617, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.6172786486293036, | |
| "grad_norm": 0.6665089130401611, | |
| "learning_rate": 8.285669421867703e-05, | |
| "loss": 3.0729, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.6184238780330685, | |
| "grad_norm": 0.630814254283905, | |
| "learning_rate": 8.278528707320421e-05, | |
| "loss": 2.9811, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6195691074368335, | |
| "grad_norm": 0.785892903804779, | |
| "learning_rate": 8.271376243145786e-05, | |
| "loss": 3.0561, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.6207143368405984, | |
| "grad_norm": 0.6047619581222534, | |
| "learning_rate": 8.264212054976875e-05, | |
| "loss": 3.0595, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.6218595662443633, | |
| "grad_norm": 0.6675294041633606, | |
| "learning_rate": 8.257036168488785e-05, | |
| "loss": 3.1725, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.6230047956481283, | |
| "grad_norm": 0.6342408657073975, | |
| "learning_rate": 8.24984860939854e-05, | |
| "loss": 2.9766, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.6241500250518932, | |
| "grad_norm": 0.5901287794113159, | |
| "learning_rate": 8.242649403464989e-05, | |
| "loss": 3.1021, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6252952544556581, | |
| "grad_norm": 0.5638805627822876, | |
| "learning_rate": 8.23543857648873e-05, | |
| "loss": 2.9866, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.6264404838594231, | |
| "grad_norm": 0.5482515692710876, | |
| "learning_rate": 8.228216154312001e-05, | |
| "loss": 3.0344, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.627585713263188, | |
| "grad_norm": 0.7258690595626831, | |
| "learning_rate": 8.2209821628186e-05, | |
| "loss": 3.0339, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.628730942666953, | |
| "grad_norm": 0.6626359820365906, | |
| "learning_rate": 8.213736627933786e-05, | |
| "loss": 3.1191, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.6298761720707179, | |
| "grad_norm": 0.5897409319877625, | |
| "learning_rate": 8.206479575624186e-05, | |
| "loss": 2.9604, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6310214014744828, | |
| "grad_norm": 0.661314070224762, | |
| "learning_rate": 8.199211031897704e-05, | |
| "loss": 3.0568, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.6321666308782478, | |
| "grad_norm": 0.7244003415107727, | |
| "learning_rate": 8.191931022803427e-05, | |
| "loss": 3.0202, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.6333118602820127, | |
| "grad_norm": 0.6676930785179138, | |
| "learning_rate": 8.184639574431532e-05, | |
| "loss": 3.0692, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.6344570896857776, | |
| "grad_norm": 0.8161568641662598, | |
| "learning_rate": 8.177336712913194e-05, | |
| "loss": 3.0835, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.6356023190895427, | |
| "grad_norm": 0.9007164239883423, | |
| "learning_rate": 8.170022464420486e-05, | |
| "loss": 3.0665, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6367475484933076, | |
| "grad_norm": 0.7467122673988342, | |
| "learning_rate": 8.162696855166294e-05, | |
| "loss": 3.1098, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.6378927778970725, | |
| "grad_norm": 0.5917842388153076, | |
| "learning_rate": 8.155359911404217e-05, | |
| "loss": 3.0047, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.6390380073008375, | |
| "grad_norm": 0.7426056861877441, | |
| "learning_rate": 8.148011659428474e-05, | |
| "loss": 3.1037, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.6401832367046024, | |
| "grad_norm": 0.8367446660995483, | |
| "learning_rate": 8.140652125573813e-05, | |
| "loss": 2.9628, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.6413284661083674, | |
| "grad_norm": 0.6199979186058044, | |
| "learning_rate": 8.133281336215412e-05, | |
| "loss": 3.0239, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6424736955121323, | |
| "grad_norm": 0.7761691808700562, | |
| "learning_rate": 8.125899317768786e-05, | |
| "loss": 3.0609, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.6436189249158972, | |
| "grad_norm": 0.6743906140327454, | |
| "learning_rate": 8.118506096689698e-05, | |
| "loss": 3.0696, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.6447641543196622, | |
| "grad_norm": 0.6743597388267517, | |
| "learning_rate": 8.111101699474051e-05, | |
| "loss": 3.045, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.6459093837234271, | |
| "grad_norm": 0.6069556474685669, | |
| "learning_rate": 8.103686152657808e-05, | |
| "loss": 3.0171, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.647054613127192, | |
| "grad_norm": 0.5756711959838867, | |
| "learning_rate": 8.096259482816886e-05, | |
| "loss": 3.0161, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.648199842530957, | |
| "grad_norm": 0.6239808797836304, | |
| "learning_rate": 8.088821716567066e-05, | |
| "loss": 3.0887, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.6493450719347219, | |
| "grad_norm": 0.5236758589744568, | |
| "learning_rate": 8.081372880563898e-05, | |
| "loss": 2.9743, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.6504903013384868, | |
| "grad_norm": 0.6389586925506592, | |
| "learning_rate": 8.073913001502605e-05, | |
| "loss": 2.9972, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.6516355307422518, | |
| "grad_norm": 0.5799978971481323, | |
| "learning_rate": 8.066442106117978e-05, | |
| "loss": 3.0043, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.6527807601460167, | |
| "grad_norm": 0.6182774901390076, | |
| "learning_rate": 8.058960221184298e-05, | |
| "loss": 3.065, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6539259895497816, | |
| "grad_norm": 0.5762799382209778, | |
| "learning_rate": 8.051467373515228e-05, | |
| "loss": 3.0374, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.6550712189535466, | |
| "grad_norm": 0.6707761287689209, | |
| "learning_rate": 8.043963589963714e-05, | |
| "loss": 3.0056, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.6562164483573115, | |
| "grad_norm": 0.6148689985275269, | |
| "learning_rate": 8.036448897421903e-05, | |
| "loss": 3.0222, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.6573616777610766, | |
| "grad_norm": 0.6503751277923584, | |
| "learning_rate": 8.028923322821031e-05, | |
| "loss": 3.0186, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.6585069071648415, | |
| "grad_norm": 0.7160323858261108, | |
| "learning_rate": 8.021386893131334e-05, | |
| "loss": 3.0785, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6596521365686064, | |
| "grad_norm": 0.5561687350273132, | |
| "learning_rate": 8.013839635361953e-05, | |
| "loss": 3.0425, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.6607973659723714, | |
| "grad_norm": 0.7166488170623779, | |
| "learning_rate": 8.006281576560834e-05, | |
| "loss": 2.9722, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.6619425953761363, | |
| "grad_norm": 0.5913854241371155, | |
| "learning_rate": 7.99871274381463e-05, | |
| "loss": 2.9645, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.6630878247799012, | |
| "grad_norm": 0.5721243619918823, | |
| "learning_rate": 7.99113316424861e-05, | |
| "loss": 3.0326, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.6642330541836662, | |
| "grad_norm": 0.6886599659919739, | |
| "learning_rate": 7.983542865026552e-05, | |
| "loss": 2.9934, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6653782835874311, | |
| "grad_norm": 0.6505165100097656, | |
| "learning_rate": 7.975941873350656e-05, | |
| "loss": 3.0275, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.666523512991196, | |
| "grad_norm": 0.5963652729988098, | |
| "learning_rate": 7.968330216461439e-05, | |
| "loss": 3.0581, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.667668742394961, | |
| "grad_norm": 0.7020843029022217, | |
| "learning_rate": 7.960707921637642e-05, | |
| "loss": 3.0214, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.6688139717987259, | |
| "grad_norm": 0.5729818344116211, | |
| "learning_rate": 7.953075016196128e-05, | |
| "loss": 3.0928, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.6699592012024909, | |
| "grad_norm": 0.6135843992233276, | |
| "learning_rate": 7.945431527491788e-05, | |
| "loss": 3.0281, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6711044306062558, | |
| "grad_norm": 0.844972550868988, | |
| "learning_rate": 7.937777482917441e-05, | |
| "loss": 3.0451, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.6722496600100207, | |
| "grad_norm": 0.6200757026672363, | |
| "learning_rate": 7.930112909903737e-05, | |
| "loss": 2.9982, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.6733948894137857, | |
| "grad_norm": 0.6621441841125488, | |
| "learning_rate": 7.922437835919059e-05, | |
| "loss": 3.0133, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.6745401188175506, | |
| "grad_norm": 0.6099239587783813, | |
| "learning_rate": 7.914752288469418e-05, | |
| "loss": 3.0359, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.6756853482213155, | |
| "grad_norm": 0.6415863633155823, | |
| "learning_rate": 7.907056295098367e-05, | |
| "loss": 3.0456, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6768305776250805, | |
| "grad_norm": 0.5651492476463318, | |
| "learning_rate": 7.89934988338689e-05, | |
| "loss": 3.0138, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.6779758070288454, | |
| "grad_norm": 0.6899843215942383, | |
| "learning_rate": 7.891633080953309e-05, | |
| "loss": 3.1091, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.6791210364326105, | |
| "grad_norm": 0.6236230134963989, | |
| "learning_rate": 7.883905915453191e-05, | |
| "loss": 3.0477, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.6802662658363754, | |
| "grad_norm": 0.6632122993469238, | |
| "learning_rate": 7.876168414579232e-05, | |
| "loss": 3.0023, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.6814114952401403, | |
| "grad_norm": 0.5697975158691406, | |
| "learning_rate": 7.868420606061174e-05, | |
| "loss": 3.0046, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6825567246439053, | |
| "grad_norm": 0.6847530603408813, | |
| "learning_rate": 7.8606625176657e-05, | |
| "loss": 3.0155, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.6837019540476702, | |
| "grad_norm": 0.6649438738822937, | |
| "learning_rate": 7.852894177196333e-05, | |
| "loss": 3.0616, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.6848471834514351, | |
| "grad_norm": 0.6214346289634705, | |
| "learning_rate": 7.845115612493335e-05, | |
| "loss": 2.963, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.6859924128552001, | |
| "grad_norm": 0.6587514877319336, | |
| "learning_rate": 7.837326851433614e-05, | |
| "loss": 3.0344, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.687137642258965, | |
| "grad_norm": 0.6454896330833435, | |
| "learning_rate": 7.829527921930617e-05, | |
| "loss": 3.0191, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.68828287166273, | |
| "grad_norm": 0.7053160071372986, | |
| "learning_rate": 7.821718851934235e-05, | |
| "loss": 2.9888, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.6894281010664949, | |
| "grad_norm": 0.6341421604156494, | |
| "learning_rate": 7.813899669430695e-05, | |
| "loss": 3.0688, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.6905733304702598, | |
| "grad_norm": 0.5567854642868042, | |
| "learning_rate": 7.806070402442476e-05, | |
| "loss": 3.0409, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.6917185598740248, | |
| "grad_norm": 0.5970193147659302, | |
| "learning_rate": 7.798231079028186e-05, | |
| "loss": 3.0569, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.6928637892777897, | |
| "grad_norm": 0.6818400621414185, | |
| "learning_rate": 7.79038172728248e-05, | |
| "loss": 2.9987, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.6940090186815546, | |
| "grad_norm": 0.7994418740272522, | |
| "learning_rate": 7.782522375335954e-05, | |
| "loss": 2.9835, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.6951542480853196, | |
| "grad_norm": 0.7498995065689087, | |
| "learning_rate": 7.774653051355039e-05, | |
| "loss": 3.0296, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.6962994774890845, | |
| "grad_norm": 0.6866205930709839, | |
| "learning_rate": 7.766773783541902e-05, | |
| "loss": 3.0301, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.6974447068928494, | |
| "grad_norm": 0.6896367073059082, | |
| "learning_rate": 7.758884600134354e-05, | |
| "loss": 3.0219, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.6985899362966144, | |
| "grad_norm": 0.668050229549408, | |
| "learning_rate": 7.750985529405736e-05, | |
| "loss": 3.0341, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6997351657003793, | |
| "grad_norm": 0.666374146938324, | |
| "learning_rate": 7.743076599664824e-05, | |
| "loss": 3.0484, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.7008803951041443, | |
| "grad_norm": 0.7941027283668518, | |
| "learning_rate": 7.735157839255728e-05, | |
| "loss": 3.0032, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.7020256245079093, | |
| "grad_norm": 0.6927073001861572, | |
| "learning_rate": 7.727229276557791e-05, | |
| "loss": 3.0483, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.7031708539116742, | |
| "grad_norm": 0.6417405009269714, | |
| "learning_rate": 7.71929093998548e-05, | |
| "loss": 3.0354, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.7043160833154392, | |
| "grad_norm": 0.6400049328804016, | |
| "learning_rate": 7.711342857988295e-05, | |
| "loss": 3.013, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.7054613127192041, | |
| "grad_norm": 0.6526817083358765, | |
| "learning_rate": 7.703385059050662e-05, | |
| "loss": 3.1439, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.706606542122969, | |
| "grad_norm": 0.5762894749641418, | |
| "learning_rate": 7.695417571691825e-05, | |
| "loss": 2.9981, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.707751771526734, | |
| "grad_norm": 0.6319015622138977, | |
| "learning_rate": 7.687440424465755e-05, | |
| "loss": 3.0592, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.7088970009304989, | |
| "grad_norm": 0.6258828043937683, | |
| "learning_rate": 7.679453645961039e-05, | |
| "loss": 3.0415, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.7100422303342638, | |
| "grad_norm": 0.6156604290008545, | |
| "learning_rate": 7.671457264800784e-05, | |
| "loss": 3.0128, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7111874597380288, | |
| "grad_norm": 0.6246117353439331, | |
| "learning_rate": 7.663451309642509e-05, | |
| "loss": 3.0324, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.7123326891417937, | |
| "grad_norm": 0.5683282017707825, | |
| "learning_rate": 7.65543580917804e-05, | |
| "loss": 2.9937, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.7134779185455586, | |
| "grad_norm": 0.7062090635299683, | |
| "learning_rate": 7.647410792133422e-05, | |
| "loss": 3.0414, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.7146231479493236, | |
| "grad_norm": 0.6515636444091797, | |
| "learning_rate": 7.639376287268798e-05, | |
| "loss": 3.029, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.7157683773530885, | |
| "grad_norm": 0.6117521524429321, | |
| "learning_rate": 7.631332323378314e-05, | |
| "loss": 3.0049, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7169136067568534, | |
| "grad_norm": 0.7275700569152832, | |
| "learning_rate": 7.623278929290013e-05, | |
| "loss": 3.0346, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.7180588361606184, | |
| "grad_norm": 0.6181052923202515, | |
| "learning_rate": 7.615216133865744e-05, | |
| "loss": 2.9997, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.7192040655643833, | |
| "grad_norm": 0.5979108810424805, | |
| "learning_rate": 7.607143966001041e-05, | |
| "loss": 3.0351, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.7203492949681483, | |
| "grad_norm": 0.6783589720726013, | |
| "learning_rate": 7.599062454625023e-05, | |
| "loss": 2.9636, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.7214945243719133, | |
| "grad_norm": 0.6468908190727234, | |
| "learning_rate": 7.590971628700305e-05, | |
| "loss": 3.0285, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7226397537756782, | |
| "grad_norm": 0.6515734195709229, | |
| "learning_rate": 7.582871517222876e-05, | |
| "loss": 3.0494, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.7237849831794432, | |
| "grad_norm": 0.6727483868598938, | |
| "learning_rate": 7.574762149222007e-05, | |
| "loss": 3.08, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.7249302125832081, | |
| "grad_norm": 0.70179283618927, | |
| "learning_rate": 7.566643553760138e-05, | |
| "loss": 2.9622, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.726075441986973, | |
| "grad_norm": 0.7478881478309631, | |
| "learning_rate": 7.558515759932782e-05, | |
| "loss": 3.0535, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.727220671390738, | |
| "grad_norm": 0.7690526247024536, | |
| "learning_rate": 7.550378796868417e-05, | |
| "loss": 3.0019, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.7283659007945029, | |
| "grad_norm": 0.6407628059387207, | |
| "learning_rate": 7.542232693728379e-05, | |
| "loss": 2.9642, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.7295111301982679, | |
| "grad_norm": 0.5795389413833618, | |
| "learning_rate": 7.534077479706764e-05, | |
| "loss": 3.0628, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.7306563596020328, | |
| "grad_norm": 0.6217789053916931, | |
| "learning_rate": 7.525913184030319e-05, | |
| "loss": 3.0264, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.7318015890057977, | |
| "grad_norm": 0.6992785930633545, | |
| "learning_rate": 7.517739835958335e-05, | |
| "loss": 3.0133, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.7329468184095627, | |
| "grad_norm": 0.6179572343826294, | |
| "learning_rate": 7.509557464782546e-05, | |
| "loss": 2.9636, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7340920478133276, | |
| "grad_norm": 0.5889167189598083, | |
| "learning_rate": 7.501366099827025e-05, | |
| "loss": 3.0598, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.7352372772170925, | |
| "grad_norm": 0.6687774062156677, | |
| "learning_rate": 7.493165770448078e-05, | |
| "loss": 3.0248, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.7363825066208575, | |
| "grad_norm": 0.6092299818992615, | |
| "learning_rate": 7.484956506034136e-05, | |
| "loss": 3.0009, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.7375277360246224, | |
| "grad_norm": 0.5790627598762512, | |
| "learning_rate": 7.476738336005647e-05, | |
| "loss": 2.9792, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.7386729654283873, | |
| "grad_norm": 0.5962207913398743, | |
| "learning_rate": 7.468511289814983e-05, | |
| "loss": 2.9984, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.7398181948321523, | |
| "grad_norm": 0.6334844827651978, | |
| "learning_rate": 7.460275396946323e-05, | |
| "loss": 3.041, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.7409634242359172, | |
| "grad_norm": 0.6076721549034119, | |
| "learning_rate": 7.45203068691555e-05, | |
| "loss": 3.0255, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.7421086536396821, | |
| "grad_norm": 0.7529584169387817, | |
| "learning_rate": 7.443777189270147e-05, | |
| "loss": 3.0043, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.7432538830434472, | |
| "grad_norm": 0.6646864414215088, | |
| "learning_rate": 7.435514933589089e-05, | |
| "loss": 3.0181, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.7443991124472121, | |
| "grad_norm": 0.6356727480888367, | |
| "learning_rate": 7.427243949482741e-05, | |
| "loss": 3.0157, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7455443418509771, | |
| "grad_norm": 0.5824810266494751, | |
| "learning_rate": 7.418964266592744e-05, | |
| "loss": 2.9286, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.746689571254742, | |
| "grad_norm": 0.6385902166366577, | |
| "learning_rate": 7.410675914591921e-05, | |
| "loss": 2.9807, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.7478348006585069, | |
| "grad_norm": 0.7312979102134705, | |
| "learning_rate": 7.402378923184156e-05, | |
| "loss": 3.0521, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.7489800300622719, | |
| "grad_norm": 0.5312052965164185, | |
| "learning_rate": 7.394073322104298e-05, | |
| "loss": 2.8997, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.7501252594660368, | |
| "grad_norm": 0.6032156348228455, | |
| "learning_rate": 7.385759141118055e-05, | |
| "loss": 2.9969, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7512704888698017, | |
| "grad_norm": 0.6151460409164429, | |
| "learning_rate": 7.377436410021878e-05, | |
| "loss": 2.9889, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.7524157182735667, | |
| "grad_norm": 0.6497369408607483, | |
| "learning_rate": 7.369105158642863e-05, | |
| "loss": 3.0514, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.7535609476773316, | |
| "grad_norm": 0.5319530367851257, | |
| "learning_rate": 7.360765416838643e-05, | |
| "loss": 2.9673, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.7547061770810966, | |
| "grad_norm": 0.6640217900276184, | |
| "learning_rate": 7.352417214497272e-05, | |
| "loss": 2.9538, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.7558514064848615, | |
| "grad_norm": 0.5952315926551819, | |
| "learning_rate": 7.344060581537134e-05, | |
| "loss": 3.0166, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7569966358886264, | |
| "grad_norm": 0.6607936024665833, | |
| "learning_rate": 7.335695547906821e-05, | |
| "loss": 3.053, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.7581418652923914, | |
| "grad_norm": 0.5945944786071777, | |
| "learning_rate": 7.327322143585033e-05, | |
| "loss": 2.9579, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.7592870946961563, | |
| "grad_norm": 0.5982474088668823, | |
| "learning_rate": 7.318940398580467e-05, | |
| "loss": 3.0381, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.7604323240999212, | |
| "grad_norm": 0.557156503200531, | |
| "learning_rate": 7.310550342931714e-05, | |
| "loss": 2.9517, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.7615775535036862, | |
| "grad_norm": 0.519133985042572, | |
| "learning_rate": 7.30215200670715e-05, | |
| "loss": 2.95, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.7627227829074511, | |
| "grad_norm": 0.6734493970870972, | |
| "learning_rate": 7.293745420004823e-05, | |
| "loss": 3.0136, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.763868012311216, | |
| "grad_norm": 0.6518685817718506, | |
| "learning_rate": 7.28533061295235e-05, | |
| "loss": 2.9696, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.7650132417149811, | |
| "grad_norm": 0.5936789512634277, | |
| "learning_rate": 7.276907615706814e-05, | |
| "loss": 2.9877, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.766158471118746, | |
| "grad_norm": 0.5671294331550598, | |
| "learning_rate": 7.268476458454642e-05, | |
| "loss": 2.9858, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.767303700522511, | |
| "grad_norm": 0.5697634816169739, | |
| "learning_rate": 7.260037171411508e-05, | |
| "loss": 2.9434, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7684489299262759, | |
| "grad_norm": 0.6007529497146606, | |
| "learning_rate": 7.251589784822224e-05, | |
| "loss": 2.9387, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.7695941593300408, | |
| "grad_norm": 0.5928565263748169, | |
| "learning_rate": 7.243134328960625e-05, | |
| "loss": 2.9275, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.7707393887338058, | |
| "grad_norm": 0.6670402884483337, | |
| "learning_rate": 7.234670834129469e-05, | |
| "loss": 2.9812, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.7718846181375707, | |
| "grad_norm": 0.6268984079360962, | |
| "learning_rate": 7.226199330660322e-05, | |
| "loss": 2.9359, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.7730298475413356, | |
| "grad_norm": 0.5983556509017944, | |
| "learning_rate": 7.217719848913451e-05, | |
| "loss": 2.9524, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7741750769451006, | |
| "grad_norm": 0.5522739291191101, | |
| "learning_rate": 7.209232419277714e-05, | |
| "loss": 2.9182, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.7753203063488655, | |
| "grad_norm": 0.5227957367897034, | |
| "learning_rate": 7.20073707217046e-05, | |
| "loss": 2.9703, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.7764655357526304, | |
| "grad_norm": 0.484824538230896, | |
| "learning_rate": 7.192233838037403e-05, | |
| "loss": 2.985, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.7776107651563954, | |
| "grad_norm": 0.5953473448753357, | |
| "learning_rate": 7.183722747352531e-05, | |
| "loss": 2.9942, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.7787559945601603, | |
| "grad_norm": 0.550989031791687, | |
| "learning_rate": 7.175203830617983e-05, | |
| "loss": 2.9782, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7799012239639252, | |
| "grad_norm": 0.5300703048706055, | |
| "learning_rate": 7.166677118363945e-05, | |
| "loss": 2.9471, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.7810464533676902, | |
| "grad_norm": 0.6735565066337585, | |
| "learning_rate": 7.158142641148546e-05, | |
| "loss": 2.9581, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.7821916827714551, | |
| "grad_norm": 0.6232147216796875, | |
| "learning_rate": 7.149600429557734e-05, | |
| "loss": 2.9921, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.7833369121752201, | |
| "grad_norm": 0.563504159450531, | |
| "learning_rate": 7.14105051420519e-05, | |
| "loss": 2.9923, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.784482141578985, | |
| "grad_norm": 0.5748158097267151, | |
| "learning_rate": 7.132492925732187e-05, | |
| "loss": 3.0145, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7856273709827499, | |
| "grad_norm": 0.5430135130882263, | |
| "learning_rate": 7.12392769480751e-05, | |
| "loss": 2.9535, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.786772600386515, | |
| "grad_norm": 0.6344389319419861, | |
| "learning_rate": 7.115354852127324e-05, | |
| "loss": 3.0599, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.7879178297902799, | |
| "grad_norm": 0.6093223094940186, | |
| "learning_rate": 7.106774428415079e-05, | |
| "loss": 3.0051, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.7890630591940448, | |
| "grad_norm": 0.5969236493110657, | |
| "learning_rate": 7.098186454421393e-05, | |
| "loss": 2.9106, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.7902082885978098, | |
| "grad_norm": 0.5839152336120605, | |
| "learning_rate": 7.089590960923943e-05, | |
| "loss": 2.9917, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7913535180015747, | |
| "grad_norm": 0.6248955130577087, | |
| "learning_rate": 7.080987978727349e-05, | |
| "loss": 3.0072, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.7924987474053397, | |
| "grad_norm": 0.6621485352516174, | |
| "learning_rate": 7.072377538663079e-05, | |
| "loss": 2.9616, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.7936439768091046, | |
| "grad_norm": 0.5360151529312134, | |
| "learning_rate": 7.063759671589319e-05, | |
| "loss": 3.0115, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.7947892062128695, | |
| "grad_norm": 0.6625118851661682, | |
| "learning_rate": 7.055134408390877e-05, | |
| "loss": 3.0551, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.7959344356166345, | |
| "grad_norm": 0.5611973404884338, | |
| "learning_rate": 7.046501779979068e-05, | |
| "loss": 2.9427, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7970796650203994, | |
| "grad_norm": 0.6025654673576355, | |
| "learning_rate": 7.037861817291598e-05, | |
| "loss": 2.985, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.7982248944241643, | |
| "grad_norm": 0.7166057229042053, | |
| "learning_rate": 7.029214551292465e-05, | |
| "loss": 3.006, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.7993701238279293, | |
| "grad_norm": 0.586137056350708, | |
| "learning_rate": 7.020560012971832e-05, | |
| "loss": 2.9835, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.8005153532316942, | |
| "grad_norm": 0.7084041833877563, | |
| "learning_rate": 7.011898233345931e-05, | |
| "loss": 2.9931, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.8016605826354591, | |
| "grad_norm": 0.6771812438964844, | |
| "learning_rate": 7.003229243456944e-05, | |
| "loss": 3.0061, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.8028058120392241, | |
| "grad_norm": 0.6903461813926697, | |
| "learning_rate": 6.994553074372891e-05, | |
| "loss": 2.977, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.803951041442989, | |
| "grad_norm": 0.6033274531364441, | |
| "learning_rate": 6.985869757187523e-05, | |
| "loss": 2.935, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.805096270846754, | |
| "grad_norm": 0.6533209085464478, | |
| "learning_rate": 6.977179323020207e-05, | |
| "loss": 3.0233, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.8062415002505189, | |
| "grad_norm": 0.5902218222618103, | |
| "learning_rate": 6.96848180301582e-05, | |
| "loss": 3.0012, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.8073867296542838, | |
| "grad_norm": 0.6266187429428101, | |
| "learning_rate": 6.959777228344628e-05, | |
| "loss": 2.9518, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.8085319590580489, | |
| "grad_norm": 0.5876622200012207, | |
| "learning_rate": 6.95106563020218e-05, | |
| "loss": 2.9377, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.8096771884618138, | |
| "grad_norm": 0.5977484583854675, | |
| "learning_rate": 6.942347039809201e-05, | |
| "loss": 2.9814, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.8108224178655787, | |
| "grad_norm": 0.64298015832901, | |
| "learning_rate": 6.933621488411468e-05, | |
| "loss": 3.0269, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.8119676472693437, | |
| "grad_norm": 0.5676386952400208, | |
| "learning_rate": 6.924889007279712e-05, | |
| "loss": 2.9237, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.8131128766731086, | |
| "grad_norm": 0.5827841758728027, | |
| "learning_rate": 6.916149627709494e-05, | |
| "loss": 2.867, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.8142581060768735, | |
| "grad_norm": 0.6391154527664185, | |
| "learning_rate": 6.907403381021097e-05, | |
| "loss": 2.9802, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.8154033354806385, | |
| "grad_norm": 0.5791281461715698, | |
| "learning_rate": 6.89865029855942e-05, | |
| "loss": 2.9945, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.8165485648844034, | |
| "grad_norm": 0.5360828638076782, | |
| "learning_rate": 6.88989041169385e-05, | |
| "loss": 2.9882, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.8176937942881684, | |
| "grad_norm": 0.5865568518638611, | |
| "learning_rate": 6.881123751818175e-05, | |
| "loss": 2.9096, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.8188390236919333, | |
| "grad_norm": 0.5686046481132507, | |
| "learning_rate": 6.87235035035044e-05, | |
| "loss": 3.0026, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.8199842530956982, | |
| "grad_norm": 0.6230041980743408, | |
| "learning_rate": 6.863570238732862e-05, | |
| "loss": 2.9601, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.8211294824994632, | |
| "grad_norm": 0.61517733335495, | |
| "learning_rate": 6.854783448431702e-05, | |
| "loss": 2.9286, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.8222747119032281, | |
| "grad_norm": 0.5126785635948181, | |
| "learning_rate": 6.845990010937152e-05, | |
| "loss": 2.9896, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.823419941306993, | |
| "grad_norm": 0.5101936459541321, | |
| "learning_rate": 6.837189957763234e-05, | |
| "loss": 2.9612, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.824565170710758, | |
| "grad_norm": 0.6595968008041382, | |
| "learning_rate": 6.828383320447675e-05, | |
| "loss": 3.0712, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.8257104001145229, | |
| "grad_norm": 0.5970346927642822, | |
| "learning_rate": 6.8195701305518e-05, | |
| "loss": 2.9534, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.8268556295182878, | |
| "grad_norm": 0.645318865776062, | |
| "learning_rate": 6.810750419660415e-05, | |
| "loss": 2.9673, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.8280008589220528, | |
| "grad_norm": 0.6072613596916199, | |
| "learning_rate": 6.801924219381695e-05, | |
| "loss": 2.9525, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.8291460883258178, | |
| "grad_norm": 0.5918760299682617, | |
| "learning_rate": 6.793091561347078e-05, | |
| "loss": 2.961, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.8302913177295828, | |
| "grad_norm": 0.6208258271217346, | |
| "learning_rate": 6.784252477211138e-05, | |
| "loss": 2.9874, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.8314365471333477, | |
| "grad_norm": 0.601787805557251, | |
| "learning_rate": 6.775406998651484e-05, | |
| "loss": 2.9412, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.8325817765371126, | |
| "grad_norm": 0.5628416538238525, | |
| "learning_rate": 6.76655515736864e-05, | |
| "loss": 2.9098, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.8337270059408776, | |
| "grad_norm": 0.559907078742981, | |
| "learning_rate": 6.757696985085931e-05, | |
| "loss": 3.0108, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.8348722353446425, | |
| "grad_norm": 0.5879938006401062, | |
| "learning_rate": 6.748832513549373e-05, | |
| "loss": 2.9376, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.8360174647484074, | |
| "grad_norm": 0.581619143486023, | |
| "learning_rate": 6.739961774527557e-05, | |
| "loss": 2.9491, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.8371626941521724, | |
| "grad_norm": 0.686090350151062, | |
| "learning_rate": 6.731084799811536e-05, | |
| "loss": 2.9458, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.8383079235559373, | |
| "grad_norm": 0.6862696409225464, | |
| "learning_rate": 6.72220162121471e-05, | |
| "loss": 3.0272, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.8394531529597022, | |
| "grad_norm": 0.5614147782325745, | |
| "learning_rate": 6.713312270572711e-05, | |
| "loss": 2.9941, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.8405983823634672, | |
| "grad_norm": 0.6119958162307739, | |
| "learning_rate": 6.704416779743295e-05, | |
| "loss": 2.9241, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.8417436117672321, | |
| "grad_norm": 0.5873327851295471, | |
| "learning_rate": 6.695515180606217e-05, | |
| "loss": 2.9539, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.842888841170997, | |
| "grad_norm": 0.5834409594535828, | |
| "learning_rate": 6.686607505063127e-05, | |
| "loss": 2.981, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.844034070574762, | |
| "grad_norm": 0.6550679206848145, | |
| "learning_rate": 6.677693785037452e-05, | |
| "loss": 2.9575, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.8451792999785269, | |
| "grad_norm": 0.7193329930305481, | |
| "learning_rate": 6.668774052474278e-05, | |
| "loss": 2.9882, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.8463245293822919, | |
| "grad_norm": 0.741956353187561, | |
| "learning_rate": 6.659848339340243e-05, | |
| "loss": 2.9814, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.8474697587860568, | |
| "grad_norm": 0.7412140369415283, | |
| "learning_rate": 6.650916677623415e-05, | |
| "loss": 2.9989, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8486149881898217, | |
| "grad_norm": 0.709784984588623, | |
| "learning_rate": 6.641979099333183e-05, | |
| "loss": 2.9654, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.8497602175935867, | |
| "grad_norm": 0.6807482838630676, | |
| "learning_rate": 6.633035636500137e-05, | |
| "loss": 2.9919, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.8509054469973517, | |
| "grad_norm": 0.6146615147590637, | |
| "learning_rate": 6.62408632117596e-05, | |
| "loss": 2.9711, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.8520506764011166, | |
| "grad_norm": 0.4998982846736908, | |
| "learning_rate": 6.615131185433306e-05, | |
| "loss": 2.9859, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.8531959058048816, | |
| "grad_norm": 0.577151358127594, | |
| "learning_rate": 6.606170261365689e-05, | |
| "loss": 3.0008, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.8543411352086465, | |
| "grad_norm": 0.5823954939842224, | |
| "learning_rate": 6.597203581087367e-05, | |
| "loss": 2.9355, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.8554863646124115, | |
| "grad_norm": 0.5608075857162476, | |
| "learning_rate": 6.588231176733228e-05, | |
| "loss": 2.9701, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.8566315940161764, | |
| "grad_norm": 0.5515905618667603, | |
| "learning_rate": 6.579253080458676e-05, | |
| "loss": 2.9437, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.8577768234199413, | |
| "grad_norm": 0.5510479211807251, | |
| "learning_rate": 6.570269324439509e-05, | |
| "loss": 2.889, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.8589220528237063, | |
| "grad_norm": 0.5867220163345337, | |
| "learning_rate": 6.561279940871809e-05, | |
| "loss": 2.9781, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8600672822274712, | |
| "grad_norm": 0.5506752133369446, | |
| "learning_rate": 6.552284961971834e-05, | |
| "loss": 2.9802, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.8612125116312361, | |
| "grad_norm": 0.6004698872566223, | |
| "learning_rate": 6.543284419975884e-05, | |
| "loss": 2.9445, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.8623577410350011, | |
| "grad_norm": 0.5422489047050476, | |
| "learning_rate": 6.5342783471402e-05, | |
| "loss": 2.9621, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.863502970438766, | |
| "grad_norm": 0.5704418420791626, | |
| "learning_rate": 6.52526677574085e-05, | |
| "loss": 2.9643, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.8646481998425309, | |
| "grad_norm": 0.6530454754829407, | |
| "learning_rate": 6.516249738073597e-05, | |
| "loss": 2.9549, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.8657934292462959, | |
| "grad_norm": 0.5491419434547424, | |
| "learning_rate": 6.507227266453806e-05, | |
| "loss": 2.9119, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.8669386586500608, | |
| "grad_norm": 0.5081238150596619, | |
| "learning_rate": 6.498199393216305e-05, | |
| "loss": 2.9302, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.8680838880538257, | |
| "grad_norm": 0.5380941033363342, | |
| "learning_rate": 6.48916615071529e-05, | |
| "loss": 2.8767, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.8692291174575907, | |
| "grad_norm": 0.5369439125061035, | |
| "learning_rate": 6.480127571324193e-05, | |
| "loss": 2.9488, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.8703743468613556, | |
| "grad_norm": 0.5196496248245239, | |
| "learning_rate": 6.471083687435575e-05, | |
| "loss": 2.9781, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.8715195762651206, | |
| "grad_norm": 0.5687289834022522, | |
| "learning_rate": 6.462034531461008e-05, | |
| "loss": 2.9714, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.8726648056688856, | |
| "grad_norm": 0.535955548286438, | |
| "learning_rate": 6.452980135830952e-05, | |
| "loss": 2.9167, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.8738100350726505, | |
| "grad_norm": 0.5874583721160889, | |
| "learning_rate": 6.443920532994658e-05, | |
| "loss": 2.9149, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.8749552644764155, | |
| "grad_norm": 0.6629985570907593, | |
| "learning_rate": 6.434855755420024e-05, | |
| "loss": 2.9535, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.8761004938801804, | |
| "grad_norm": 0.7059323787689209, | |
| "learning_rate": 6.425785835593503e-05, | |
| "loss": 2.9856, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8772457232839453, | |
| "grad_norm": 0.6743423342704773, | |
| "learning_rate": 6.416710806019973e-05, | |
| "loss": 2.997, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.8783909526877103, | |
| "grad_norm": 0.5754982233047485, | |
| "learning_rate": 6.407630699222624e-05, | |
| "loss": 2.9411, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.8795361820914752, | |
| "grad_norm": 0.5872779488563538, | |
| "learning_rate": 6.398545547742846e-05, | |
| "loss": 2.9959, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.8806814114952402, | |
| "grad_norm": 0.5496954321861267, | |
| "learning_rate": 6.389455384140101e-05, | |
| "loss": 2.9483, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.8818266408990051, | |
| "grad_norm": 0.6256377100944519, | |
| "learning_rate": 6.380360240991821e-05, | |
| "loss": 2.8727, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.88297187030277, | |
| "grad_norm": 0.6160191893577576, | |
| "learning_rate": 6.37126015089328e-05, | |
| "loss": 2.9312, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.884117099706535, | |
| "grad_norm": 0.5502288937568665, | |
| "learning_rate": 6.362155146457478e-05, | |
| "loss": 3.0298, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.8852623291102999, | |
| "grad_norm": 0.5236011743545532, | |
| "learning_rate": 6.353045260315035e-05, | |
| "loss": 2.8955, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.8864075585140648, | |
| "grad_norm": 0.5943360924720764, | |
| "learning_rate": 6.34393052511406e-05, | |
| "loss": 2.9258, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.8875527879178298, | |
| "grad_norm": 0.5703767538070679, | |
| "learning_rate": 6.33481097352004e-05, | |
| "loss": 2.9084, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8886980173215947, | |
| "grad_norm": 0.5543270707130432, | |
| "learning_rate": 6.325686638215724e-05, | |
| "loss": 2.9434, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.8898432467253596, | |
| "grad_norm": 0.6018961668014526, | |
| "learning_rate": 6.316557551901006e-05, | |
| "loss": 2.9177, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.8909884761291246, | |
| "grad_norm": 0.5374200940132141, | |
| "learning_rate": 6.307423747292811e-05, | |
| "loss": 2.9867, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.8921337055328895, | |
| "grad_norm": 0.6098986864089966, | |
| "learning_rate": 6.298285257124963e-05, | |
| "loss": 3.0203, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.8932789349366544, | |
| "grad_norm": 0.5418615937232971, | |
| "learning_rate": 6.289142114148085e-05, | |
| "loss": 2.9239, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8944241643404195, | |
| "grad_norm": 0.5681843757629395, | |
| "learning_rate": 6.279994351129476e-05, | |
| "loss": 2.9077, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.8955693937441844, | |
| "grad_norm": 0.5646500587463379, | |
| "learning_rate": 6.270842000852988e-05, | |
| "loss": 2.9635, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.8967146231479494, | |
| "grad_norm": 0.513608455657959, | |
| "learning_rate": 6.261685096118917e-05, | |
| "loss": 2.9215, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.8978598525517143, | |
| "grad_norm": 0.5145309567451477, | |
| "learning_rate": 6.252523669743876e-05, | |
| "loss": 3.0064, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.8990050819554792, | |
| "grad_norm": 0.5833147168159485, | |
| "learning_rate": 6.243357754560688e-05, | |
| "loss": 2.9831, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.9001503113592442, | |
| "grad_norm": 0.5844488739967346, | |
| "learning_rate": 6.23418738341826e-05, | |
| "loss": 2.9308, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.9012955407630091, | |
| "grad_norm": 0.5789006352424622, | |
| "learning_rate": 6.225012589181471e-05, | |
| "loss": 2.9829, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.902440770166774, | |
| "grad_norm": 0.620063304901123, | |
| "learning_rate": 6.21583340473105e-05, | |
| "loss": 2.9039, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.903585999570539, | |
| "grad_norm": 0.6860834956169128, | |
| "learning_rate": 6.206649862963457e-05, | |
| "loss": 2.9643, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.9047312289743039, | |
| "grad_norm": 0.5990268588066101, | |
| "learning_rate": 6.19746199679077e-05, | |
| "loss": 2.9227, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.9058764583780688, | |
| "grad_norm": 0.549895167350769, | |
| "learning_rate": 6.188269839140569e-05, | |
| "loss": 2.9304, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.9070216877818338, | |
| "grad_norm": 0.5849148631095886, | |
| "learning_rate": 6.179073422955806e-05, | |
| "loss": 2.9216, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.9081669171855987, | |
| "grad_norm": 0.5945584177970886, | |
| "learning_rate": 6.169872781194701e-05, | |
| "loss": 2.9938, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.9093121465893637, | |
| "grad_norm": 0.5605809688568115, | |
| "learning_rate": 6.160667946830616e-05, | |
| "loss": 2.9508, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.9104573759931286, | |
| "grad_norm": 0.641635537147522, | |
| "learning_rate": 6.151458952851935e-05, | |
| "loss": 2.9194, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.9116026053968935, | |
| "grad_norm": 0.6415832042694092, | |
| "learning_rate": 6.142245832261956e-05, | |
| "loss": 2.9365, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.9127478348006585, | |
| "grad_norm": 0.5336154103279114, | |
| "learning_rate": 6.133028618078759e-05, | |
| "loss": 2.9021, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.9138930642044234, | |
| "grad_norm": 0.5383312702178955, | |
| "learning_rate": 6.1238073433351e-05, | |
| "loss": 2.8608, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.9150382936081883, | |
| "grad_norm": 0.5748524069786072, | |
| "learning_rate": 6.114582041078285e-05, | |
| "loss": 2.9156, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.9161835230119534, | |
| "grad_norm": 0.5506983995437622, | |
| "learning_rate": 6.105352744370053e-05, | |
| "loss": 2.9455, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9173287524157183, | |
| "grad_norm": 0.5026014447212219, | |
| "learning_rate": 6.09611948628646e-05, | |
| "loss": 2.9324, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.9184739818194833, | |
| "grad_norm": 0.5291290879249573, | |
| "learning_rate": 6.086882299917758e-05, | |
| "loss": 2.9622, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.9196192112232482, | |
| "grad_norm": 0.5395295023918152, | |
| "learning_rate": 6.077641218368276e-05, | |
| "loss": 2.8979, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.9207644406270131, | |
| "grad_norm": 0.5202158093452454, | |
| "learning_rate": 6.068396274756306e-05, | |
| "loss": 2.8641, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.9219096700307781, | |
| "grad_norm": 0.4785449206829071, | |
| "learning_rate": 6.05914750221398e-05, | |
| "loss": 2.9755, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.923054899434543, | |
| "grad_norm": 0.4841679632663727, | |
| "learning_rate": 6.0498949338871494e-05, | |
| "loss": 2.9413, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.9242001288383079, | |
| "grad_norm": 0.512946367263794, | |
| "learning_rate": 6.040638602935268e-05, | |
| "loss": 2.954, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.9253453582420729, | |
| "grad_norm": 0.6236635446548462, | |
| "learning_rate": 6.031378542531282e-05, | |
| "loss": 3.0108, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.9264905876458378, | |
| "grad_norm": 0.6436796188354492, | |
| "learning_rate": 6.0221147858614944e-05, | |
| "loss": 2.9034, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.9276358170496027, | |
| "grad_norm": 0.6704596281051636, | |
| "learning_rate": 6.0128473661254605e-05, | |
| "loss": 2.9564, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.9287810464533677, | |
| "grad_norm": 0.5924867987632751, | |
| "learning_rate": 6.00357631653586e-05, | |
| "loss": 3.0252, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.9299262758571326, | |
| "grad_norm": 0.5814377069473267, | |
| "learning_rate": 5.994301670318385e-05, | |
| "loss": 2.907, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.9310715052608975, | |
| "grad_norm": 0.7266978621482849, | |
| "learning_rate": 5.985023460711612e-05, | |
| "loss": 2.9158, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.9322167346646625, | |
| "grad_norm": 0.6032978892326355, | |
| "learning_rate": 5.975741720966892e-05, | |
| "loss": 2.9025, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.9333619640684274, | |
| "grad_norm": 0.5802534818649292, | |
| "learning_rate": 5.966456484348226e-05, | |
| "loss": 2.9143, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.9345071934721924, | |
| "grad_norm": 0.6050094366073608, | |
| "learning_rate": 5.9571677841321494e-05, | |
| "loss": 2.9139, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.9356524228759573, | |
| "grad_norm": 0.6297913789749146, | |
| "learning_rate": 5.947875653607606e-05, | |
| "loss": 2.963, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.9367976522797223, | |
| "grad_norm": 0.5898680686950684, | |
| "learning_rate": 5.938580126075838e-05, | |
| "loss": 2.9342, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.9379428816834873, | |
| "grad_norm": 0.5346818566322327, | |
| "learning_rate": 5.929281234850257e-05, | |
| "loss": 2.9321, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.9390881110872522, | |
| "grad_norm": 0.5342332720756531, | |
| "learning_rate": 5.919979013256335e-05, | |
| "loss": 2.8957, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.9402333404910171, | |
| "grad_norm": 0.5973726511001587, | |
| "learning_rate": 5.910673494631474e-05, | |
| "loss": 2.9555, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.9413785698947821, | |
| "grad_norm": 0.6567181348800659, | |
| "learning_rate": 5.901364712324894e-05, | |
| "loss": 2.9272, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.942523799298547, | |
| "grad_norm": 0.6484449505805969, | |
| "learning_rate": 5.892052699697514e-05, | |
| "loss": 2.9415, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.943669028702312, | |
| "grad_norm": 0.5099656581878662, | |
| "learning_rate": 5.8827374901218256e-05, | |
| "loss": 2.927, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.9448142581060769, | |
| "grad_norm": 0.5123404860496521, | |
| "learning_rate": 5.873419116981782e-05, | |
| "loss": 2.8556, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.9459594875098418, | |
| "grad_norm": 0.6220594644546509, | |
| "learning_rate": 5.864097613672669e-05, | |
| "loss": 2.9163, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.9471047169136068, | |
| "grad_norm": 0.5449616312980652, | |
| "learning_rate": 5.854773013600993e-05, | |
| "loss": 2.9281, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.9482499463173717, | |
| "grad_norm": 0.5175302028656006, | |
| "learning_rate": 5.845445350184361e-05, | |
| "loss": 2.9238, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.9493951757211366, | |
| "grad_norm": 0.5361518859863281, | |
| "learning_rate": 5.836114656851354e-05, | |
| "loss": 2.8362, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.9505404051249016, | |
| "grad_norm": 0.5278197526931763, | |
| "learning_rate": 5.826780967041415e-05, | |
| "loss": 2.9431, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.9516856345286665, | |
| "grad_norm": 0.49755188822746277, | |
| "learning_rate": 5.817444314204725e-05, | |
| "loss": 2.9196, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.9528308639324314, | |
| "grad_norm": 0.5488353967666626, | |
| "learning_rate": 5.808104731802081e-05, | |
| "loss": 2.9003, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.9539760933361964, | |
| "grad_norm": 0.5349782109260559, | |
| "learning_rate": 5.7987622533047836e-05, | |
| "loss": 2.9372, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.9551213227399613, | |
| "grad_norm": 0.5536332726478577, | |
| "learning_rate": 5.7894169121945084e-05, | |
| "loss": 2.8327, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.9562665521437262, | |
| "grad_norm": 0.5771124362945557, | |
| "learning_rate": 5.780068741963195e-05, | |
| "loss": 2.9378, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.9574117815474912, | |
| "grad_norm": 0.5738604664802551, | |
| "learning_rate": 5.770717776112917e-05, | |
| "loss": 2.9942, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.9585570109512562, | |
| "grad_norm": 0.5480334162712097, | |
| "learning_rate": 5.7613640481557695e-05, | |
| "loss": 2.9512, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.9597022403550212, | |
| "grad_norm": 0.5497896075248718, | |
| "learning_rate": 5.752007591613745e-05, | |
| "loss": 2.8757, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.9608474697587861, | |
| "grad_norm": 0.5670433044433594, | |
| "learning_rate": 5.7426484400186163e-05, | |
| "loss": 2.9154, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.961992699162551, | |
| "grad_norm": 0.5624505281448364, | |
| "learning_rate": 5.7332866269118144e-05, | |
| "loss": 2.8875, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.963137928566316, | |
| "grad_norm": 0.5293657779693604, | |
| "learning_rate": 5.7239221858443094e-05, | |
| "loss": 2.9278, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.9642831579700809, | |
| "grad_norm": 0.509075939655304, | |
| "learning_rate": 5.714555150376486e-05, | |
| "loss": 2.8979, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.9654283873738458, | |
| "grad_norm": 0.5095922946929932, | |
| "learning_rate": 5.705185554078031e-05, | |
| "loss": 2.9111, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.9665736167776108, | |
| "grad_norm": 0.5343554019927979, | |
| "learning_rate": 5.695813430527805e-05, | |
| "loss": 2.9371, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.9677188461813757, | |
| "grad_norm": 0.5610002279281616, | |
| "learning_rate": 5.686438813313733e-05, | |
| "loss": 2.9488, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.9688640755851406, | |
| "grad_norm": 0.588938295841217, | |
| "learning_rate": 5.677061736032666e-05, | |
| "loss": 2.9073, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.9700093049889056, | |
| "grad_norm": 0.5782492756843567, | |
| "learning_rate": 5.6676822322902776e-05, | |
| "loss": 2.8714, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.9711545343926705, | |
| "grad_norm": 0.5729259848594666, | |
| "learning_rate": 5.658300335700941e-05, | |
| "loss": 2.8974, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.9722997637964355, | |
| "grad_norm": 0.5503959655761719, | |
| "learning_rate": 5.648916079887597e-05, | |
| "loss": 2.8727, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.9734449932002004, | |
| "grad_norm": 0.5312175750732422, | |
| "learning_rate": 5.639529498481646e-05, | |
| "loss": 2.8531, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9745902226039653, | |
| "grad_norm": 0.5270197987556458, | |
| "learning_rate": 5.6301406251228216e-05, | |
| "loss": 2.9038, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.9757354520077303, | |
| "grad_norm": 0.48052453994750977, | |
| "learning_rate": 5.620749493459073e-05, | |
| "loss": 2.9239, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.9768806814114952, | |
| "grad_norm": 0.5059691667556763, | |
| "learning_rate": 5.6113561371464406e-05, | |
| "loss": 2.8952, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.9780259108152601, | |
| "grad_norm": 0.4924282133579254, | |
| "learning_rate": 5.601960589848937e-05, | |
| "loss": 2.9792, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.9791711402190251, | |
| "grad_norm": 0.4471040964126587, | |
| "learning_rate": 5.5925628852384314e-05, | |
| "loss": 2.9129, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.9803163696227901, | |
| "grad_norm": 0.49713778495788574, | |
| "learning_rate": 5.583163056994519e-05, | |
| "loss": 2.9044, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.981461599026555, | |
| "grad_norm": 0.5139912962913513, | |
| "learning_rate": 5.5737611388044086e-05, | |
| "loss": 2.9226, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.98260682843032, | |
| "grad_norm": 0.5141741037368774, | |
| "learning_rate": 5.564357164362799e-05, | |
| "loss": 2.9226, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.9837520578340849, | |
| "grad_norm": 0.4590401351451874, | |
| "learning_rate": 5.5549511673717556e-05, | |
| "loss": 2.8665, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.9848972872378499, | |
| "grad_norm": 0.4575752913951874, | |
| "learning_rate": 5.545543181540598e-05, | |
| "loss": 2.9321, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.9860425166416148, | |
| "grad_norm": 0.5326718091964722, | |
| "learning_rate": 5.5361332405857655e-05, | |
| "loss": 2.9199, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.9871877460453797, | |
| "grad_norm": 0.543998658657074, | |
| "learning_rate": 5.52672137823071e-05, | |
| "loss": 2.8879, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.9883329754491447, | |
| "grad_norm": 0.4667711555957794, | |
| "learning_rate": 5.517307628205769e-05, | |
| "loss": 2.9111, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.9894782048529096, | |
| "grad_norm": 0.5030423402786255, | |
| "learning_rate": 5.50789202424804e-05, | |
| "loss": 2.8846, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.9906234342566745, | |
| "grad_norm": 0.5402343273162842, | |
| "learning_rate": 5.498474600101272e-05, | |
| "loss": 2.9415, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.9917686636604395, | |
| "grad_norm": 0.5290342569351196, | |
| "learning_rate": 5.489055389515732e-05, | |
| "loss": 2.9221, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.9929138930642044, | |
| "grad_norm": 0.5294204354286194, | |
| "learning_rate": 5.4796344262480904e-05, | |
| "loss": 2.9049, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.9940591224679693, | |
| "grad_norm": 0.5088786482810974, | |
| "learning_rate": 5.470211744061301e-05, | |
| "loss": 2.8881, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.9952043518717343, | |
| "grad_norm": 0.5132361650466919, | |
| "learning_rate": 5.460787376724474e-05, | |
| "loss": 2.9463, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.9963495812754992, | |
| "grad_norm": 0.5157156586647034, | |
| "learning_rate": 5.451361358012763e-05, | |
| "loss": 2.8873, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9974948106792642, | |
| "grad_norm": 0.5482271909713745, | |
| "learning_rate": 5.441933721707236e-05, | |
| "loss": 2.9052, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.9986400400830291, | |
| "grad_norm": 0.49170711636543274, | |
| "learning_rate": 5.432504501594763e-05, | |
| "loss": 2.878, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.999785269486794, | |
| "grad_norm": 0.5055040717124939, | |
| "learning_rate": 5.423073731467885e-05, | |
| "loss": 2.9521, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.0005726147018825, | |
| "grad_norm": 0.6301170587539673, | |
| "learning_rate": 5.4136414451246995e-05, | |
| "loss": 2.8161, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.0017178441056473, | |
| "grad_norm": 0.577078104019165, | |
| "learning_rate": 5.40420767636874e-05, | |
| "loss": 2.854, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.0028630735094124, | |
| "grad_norm": 0.5432089567184448, | |
| "learning_rate": 5.3947724590088475e-05, | |
| "loss": 2.905, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.0040083029131772, | |
| "grad_norm": 0.5131680369377136, | |
| "learning_rate": 5.3853358268590624e-05, | |
| "loss": 2.8725, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.0051535323169423, | |
| "grad_norm": 0.5810659527778625, | |
| "learning_rate": 5.37589781373849e-05, | |
| "loss": 2.9462, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.006298761720707, | |
| "grad_norm": 0.5084631443023682, | |
| "learning_rate": 5.366458453471184e-05, | |
| "loss": 2.7939, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.0074439911244721, | |
| "grad_norm": 0.49303531646728516, | |
| "learning_rate": 5.35701777988603e-05, | |
| "loss": 2.8416, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.008589220528237, | |
| "grad_norm": 0.46372827887535095, | |
| "learning_rate": 5.3475758268166164e-05, | |
| "loss": 2.8337, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.009734449932002, | |
| "grad_norm": 0.47583991289138794, | |
| "learning_rate": 5.3381326281011204e-05, | |
| "loss": 2.8772, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.0108796793357668, | |
| "grad_norm": 0.5093927383422852, | |
| "learning_rate": 5.328688217582182e-05, | |
| "loss": 2.8147, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.0120249087395319, | |
| "grad_norm": 0.5356534719467163, | |
| "learning_rate": 5.3192426291067795e-05, | |
| "loss": 2.8479, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.013170138143297, | |
| "grad_norm": 0.5348360538482666, | |
| "learning_rate": 5.309795896526124e-05, | |
| "loss": 2.9173, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.0143153675470618, | |
| "grad_norm": 0.5063700675964355, | |
| "learning_rate": 5.300348053695515e-05, | |
| "loss": 2.8603, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.0154605969508268, | |
| "grad_norm": 0.5169893503189087, | |
| "learning_rate": 5.2908991344742375e-05, | |
| "loss": 2.8093, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.0166058263545916, | |
| "grad_norm": 0.5480731725692749, | |
| "learning_rate": 5.281449172725433e-05, | |
| "loss": 2.8459, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.0177510557583567, | |
| "grad_norm": 0.5607473850250244, | |
| "learning_rate": 5.2719982023159765e-05, | |
| "loss": 2.8449, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.0188962851621215, | |
| "grad_norm": 0.5521141290664673, | |
| "learning_rate": 5.262546257116362e-05, | |
| "loss": 2.8801, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.0200415145658865, | |
| "grad_norm": 0.5414731502532959, | |
| "learning_rate": 5.2530933710005736e-05, | |
| "loss": 2.8596, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.0211867439696514, | |
| "grad_norm": 0.5596434473991394, | |
| "learning_rate": 5.243639577845971e-05, | |
| "loss": 2.8818, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.0223319733734164, | |
| "grad_norm": 0.5274948477745056, | |
| "learning_rate": 5.234184911533161e-05, | |
| "loss": 2.8583, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.0234772027771812, | |
| "grad_norm": 0.5410761833190918, | |
| "learning_rate": 5.224729405945879e-05, | |
| "loss": 2.9212, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.0246224321809463, | |
| "grad_norm": 0.5315967202186584, | |
| "learning_rate": 5.215273094970876e-05, | |
| "loss": 2.8735, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.025767661584711, | |
| "grad_norm": 0.5669625997543335, | |
| "learning_rate": 5.205816012497777e-05, | |
| "loss": 2.914, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.0269128909884762, | |
| "grad_norm": 0.6059103012084961, | |
| "learning_rate": 5.196358192418983e-05, | |
| "loss": 2.9223, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.028058120392241, | |
| "grad_norm": 0.5729017853736877, | |
| "learning_rate": 5.186899668629532e-05, | |
| "loss": 2.8927, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.029203349796006, | |
| "grad_norm": 0.5530718564987183, | |
| "learning_rate": 5.1774404750269876e-05, | |
| "loss": 2.8323, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.0303485791997709, | |
| "grad_norm": 0.5186034440994263, | |
| "learning_rate": 5.167980645511311e-05, | |
| "loss": 2.8628, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.031493808603536, | |
| "grad_norm": 0.504193902015686, | |
| "learning_rate": 5.1585202139847424e-05, | |
| "loss": 2.8289, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.032639038007301, | |
| "grad_norm": 0.48196157813072205, | |
| "learning_rate": 5.149059214351683e-05, | |
| "loss": 2.8307, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.0337842674110658, | |
| "grad_norm": 0.5442152619361877, | |
| "learning_rate": 5.139597680518564e-05, | |
| "loss": 2.8902, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.0349294968148308, | |
| "grad_norm": 0.4562651813030243, | |
| "learning_rate": 5.130135646393739e-05, | |
| "loss": 2.8007, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.0360747262185956, | |
| "grad_norm": 0.519249439239502, | |
| "learning_rate": 5.120673145887349e-05, | |
| "loss": 2.8248, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.0372199556223607, | |
| "grad_norm": 0.5793293714523315, | |
| "learning_rate": 5.1112102129112074e-05, | |
| "loss": 2.8999, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.0383651850261255, | |
| "grad_norm": 0.5099198818206787, | |
| "learning_rate": 5.101746881378677e-05, | |
| "loss": 2.875, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.0395104144298906, | |
| "grad_norm": 0.527511715888977, | |
| "learning_rate": 5.09228318520455e-05, | |
| "loss": 2.8651, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.0406556438336554, | |
| "grad_norm": 0.5045714974403381, | |
| "learning_rate": 5.0828191583049265e-05, | |
| "loss": 2.85, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.0418008732374204, | |
| "grad_norm": 0.556731104850769, | |
| "learning_rate": 5.073354834597091e-05, | |
| "loss": 2.9094, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.0429461026411853, | |
| "grad_norm": 0.5212239027023315, | |
| "learning_rate": 5.0638902479993886e-05, | |
| "loss": 2.7986, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 1.0440913320449503, | |
| "grad_norm": 0.5161175727844238, | |
| "learning_rate": 5.0544254324311105e-05, | |
| "loss": 2.8411, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 1.0452365614487151, | |
| "grad_norm": 0.4778583347797394, | |
| "learning_rate": 5.044960421812367e-05, | |
| "loss": 2.8226, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 1.0463817908524802, | |
| "grad_norm": 0.4907243251800537, | |
| "learning_rate": 5.0354952500639674e-05, | |
| "loss": 2.8726, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 1.047527020256245, | |
| "grad_norm": 0.4880329966545105, | |
| "learning_rate": 5.0260299511073003e-05, | |
| "loss": 2.8798, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.04867224966001, | |
| "grad_norm": 0.5149087309837341, | |
| "learning_rate": 5.016564558864205e-05, | |
| "loss": 2.8483, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 1.0498174790637749, | |
| "grad_norm": 0.5067439675331116, | |
| "learning_rate": 5.007099107256863e-05, | |
| "loss": 2.8022, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 1.05096270846754, | |
| "grad_norm": 0.506746768951416, | |
| "learning_rate": 4.9976336302076604e-05, | |
| "loss": 2.8279, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 1.0521079378713047, | |
| "grad_norm": 0.49440717697143555, | |
| "learning_rate": 4.988168161639081e-05, | |
| "loss": 2.8973, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 1.0532531672750698, | |
| "grad_norm": 0.45895057916641235, | |
| "learning_rate": 4.9787027354735755e-05, | |
| "loss": 2.8878, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.0543983966788346, | |
| "grad_norm": 0.5206420421600342, | |
| "learning_rate": 4.9692373856334404e-05, | |
| "loss": 2.8554, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 1.0555436260825997, | |
| "grad_norm": 0.49703797698020935, | |
| "learning_rate": 4.9597721460407014e-05, | |
| "loss": 2.8716, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 1.0566888554863647, | |
| "grad_norm": 0.4677201807498932, | |
| "learning_rate": 4.9503070506169896e-05, | |
| "loss": 2.8888, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 1.0578340848901295, | |
| "grad_norm": 0.49985969066619873, | |
| "learning_rate": 4.940842133283419e-05, | |
| "loss": 2.896, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 1.0589793142938946, | |
| "grad_norm": 0.5004621744155884, | |
| "learning_rate": 4.931377427960464e-05, | |
| "loss": 2.9012, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.0601245436976594, | |
| "grad_norm": 0.45411524176597595, | |
| "learning_rate": 4.921912968567839e-05, | |
| "loss": 2.852, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 1.0612697731014245, | |
| "grad_norm": 0.5539801716804504, | |
| "learning_rate": 4.912448789024378e-05, | |
| "loss": 2.8653, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 1.0624150025051893, | |
| "grad_norm": 0.566481351852417, | |
| "learning_rate": 4.902984923247914e-05, | |
| "loss": 2.8847, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 1.0635602319089543, | |
| "grad_norm": 0.503288209438324, | |
| "learning_rate": 4.893521405155153e-05, | |
| "loss": 2.9068, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 1.0647054613127191, | |
| "grad_norm": 0.5400659441947937, | |
| "learning_rate": 4.884058268661555e-05, | |
| "loss": 2.8766, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.0658506907164842, | |
| "grad_norm": 0.507279634475708, | |
| "learning_rate": 4.8745955476812126e-05, | |
| "loss": 2.8745, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 1.066995920120249, | |
| "grad_norm": 0.4649648070335388, | |
| "learning_rate": 4.8651332761267284e-05, | |
| "loss": 2.8027, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 1.068141149524014, | |
| "grad_norm": 0.49418357014656067, | |
| "learning_rate": 4.855671487909098e-05, | |
| "loss": 2.778, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 1.069286378927779, | |
| "grad_norm": 0.5281592607498169, | |
| "learning_rate": 4.8462102169375836e-05, | |
| "loss": 2.8031, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 1.070431608331544, | |
| "grad_norm": 0.5579238533973694, | |
| "learning_rate": 4.83674949711959e-05, | |
| "loss": 2.8735, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.0715768377353088, | |
| "grad_norm": 0.5268048048019409, | |
| "learning_rate": 4.8272893623605494e-05, | |
| "loss": 2.8672, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 1.0727220671390738, | |
| "grad_norm": 0.49692845344543457, | |
| "learning_rate": 4.8178298465637966e-05, | |
| "loss": 2.8589, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 1.0738672965428386, | |
| "grad_norm": 0.5442864298820496, | |
| "learning_rate": 4.808370983630451e-05, | |
| "loss": 2.8131, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 1.0750125259466037, | |
| "grad_norm": 0.5363568067550659, | |
| "learning_rate": 4.798912807459288e-05, | |
| "loss": 2.8647, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 1.0761577553503687, | |
| "grad_norm": 0.5348667502403259, | |
| "learning_rate": 4.789455351946625e-05, | |
| "loss": 2.9362, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.0773029847541336, | |
| "grad_norm": 0.4977494478225708, | |
| "learning_rate": 4.7799986509861946e-05, | |
| "loss": 2.8651, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 1.0784482141578986, | |
| "grad_norm": 0.5153520107269287, | |
| "learning_rate": 4.770542738469024e-05, | |
| "loss": 2.8881, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 1.0795934435616634, | |
| "grad_norm": 0.5237112045288086, | |
| "learning_rate": 4.761087648283321e-05, | |
| "loss": 2.8173, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 1.0807386729654285, | |
| "grad_norm": 0.5062981843948364, | |
| "learning_rate": 4.75163341431434e-05, | |
| "loss": 2.8825, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 1.0818839023691933, | |
| "grad_norm": 0.4902750849723816, | |
| "learning_rate": 4.742180070444269e-05, | |
| "loss": 2.8563, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.0830291317729583, | |
| "grad_norm": 0.5393232107162476, | |
| "learning_rate": 4.7327276505521065e-05, | |
| "loss": 2.8537, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 1.0841743611767232, | |
| "grad_norm": 0.5459491610527039, | |
| "learning_rate": 4.7232761885135375e-05, | |
| "loss": 2.8856, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 1.0853195905804882, | |
| "grad_norm": 0.5010082125663757, | |
| "learning_rate": 4.713825718200818e-05, | |
| "loss": 2.9151, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 1.086464819984253, | |
| "grad_norm": 0.47848883271217346, | |
| "learning_rate": 4.704376273482648e-05, | |
| "loss": 2.8515, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 1.087610049388018, | |
| "grad_norm": 0.45873889327049255, | |
| "learning_rate": 4.694927888224051e-05, | |
| "loss": 2.8691, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.088755278791783, | |
| "grad_norm": 0.4366936683654785, | |
| "learning_rate": 4.685480596286254e-05, | |
| "loss": 2.8606, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 1.089900508195548, | |
| "grad_norm": 0.43981027603149414, | |
| "learning_rate": 4.676034431526565e-05, | |
| "loss": 2.8425, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 1.0910457375993128, | |
| "grad_norm": 0.44349777698516846, | |
| "learning_rate": 4.666589427798256e-05, | |
| "loss": 2.8373, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 1.0921909670030778, | |
| "grad_norm": 0.45012906193733215, | |
| "learning_rate": 4.657145618950434e-05, | |
| "loss": 2.7538, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 1.0933361964068427, | |
| "grad_norm": 0.47416791319847107, | |
| "learning_rate": 4.6477030388279264e-05, | |
| "loss": 2.8737, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.0944814258106077, | |
| "grad_norm": 0.4794783592224121, | |
| "learning_rate": 4.638261721271155e-05, | |
| "loss": 2.8695, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 1.0956266552143727, | |
| "grad_norm": 0.48366686701774597, | |
| "learning_rate": 4.628821700116016e-05, | |
| "loss": 2.8048, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 1.0967718846181376, | |
| "grad_norm": 0.5032996535301208, | |
| "learning_rate": 4.6193830091937654e-05, | |
| "loss": 2.9137, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 1.0979171140219024, | |
| "grad_norm": 0.5226159691810608, | |
| "learning_rate": 4.6099456823308856e-05, | |
| "loss": 2.8601, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 1.0990623434256674, | |
| "grad_norm": 0.47801947593688965, | |
| "learning_rate": 4.600509753348974e-05, | |
| "loss": 2.8837, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.1002075728294325, | |
| "grad_norm": 0.5159834027290344, | |
| "learning_rate": 4.591075256064615e-05, | |
| "loss": 2.8732, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 1.1013528022331973, | |
| "grad_norm": 0.5375920534133911, | |
| "learning_rate": 4.581642224289265e-05, | |
| "loss": 2.9112, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 1.1024980316369624, | |
| "grad_norm": 0.500263512134552, | |
| "learning_rate": 4.572210691829129e-05, | |
| "loss": 2.8028, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 1.1036432610407272, | |
| "grad_norm": 0.49632856249809265, | |
| "learning_rate": 4.562780692485035e-05, | |
| "loss": 2.9128, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 1.1047884904444922, | |
| "grad_norm": 0.524261474609375, | |
| "learning_rate": 4.553352260052319e-05, | |
| "loss": 2.8319, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.105933719848257, | |
| "grad_norm": 0.49589022994041443, | |
| "learning_rate": 4.5439254283207e-05, | |
| "loss": 2.8127, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 1.107078949252022, | |
| "grad_norm": 0.47969597578048706, | |
| "learning_rate": 4.534500231074165e-05, | |
| "loss": 2.8307, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 1.108224178655787, | |
| "grad_norm": 0.47272107005119324, | |
| "learning_rate": 4.525076702090838e-05, | |
| "loss": 2.8357, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 1.109369408059552, | |
| "grad_norm": 0.4718839228153229, | |
| "learning_rate": 4.515654875142866e-05, | |
| "loss": 2.8521, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 1.1105146374633168, | |
| "grad_norm": 0.493868350982666, | |
| "learning_rate": 4.506234783996297e-05, | |
| "loss": 2.877, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.1116598668670818, | |
| "grad_norm": 0.500551164150238, | |
| "learning_rate": 4.4968164624109545e-05, | |
| "loss": 2.8534, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 1.1128050962708467, | |
| "grad_norm": 0.47467198967933655, | |
| "learning_rate": 4.4873999441403284e-05, | |
| "loss": 2.8302, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 1.1139503256746117, | |
| "grad_norm": 0.494477778673172, | |
| "learning_rate": 4.477985262931437e-05, | |
| "loss": 2.8037, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 1.1150955550783765, | |
| "grad_norm": 0.514734148979187, | |
| "learning_rate": 4.4685724525247215e-05, | |
| "loss": 2.8516, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 1.1162407844821416, | |
| "grad_norm": 0.47042450308799744, | |
| "learning_rate": 4.459161546653913e-05, | |
| "loss": 2.816, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.1173860138859064, | |
| "grad_norm": 0.508755087852478, | |
| "learning_rate": 4.4497525790459204e-05, | |
| "loss": 2.8241, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 1.1185312432896715, | |
| "grad_norm": 0.470024049282074, | |
| "learning_rate": 4.440345583420707e-05, | |
| "loss": 2.8108, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 1.1196764726934365, | |
| "grad_norm": 0.4759051203727722, | |
| "learning_rate": 4.4309405934911674e-05, | |
| "loss": 2.8719, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 1.1208217020972013, | |
| "grad_norm": 0.45637574791908264, | |
| "learning_rate": 4.421537642963007e-05, | |
| "loss": 2.8487, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 1.1219669315009664, | |
| "grad_norm": 0.4891573488712311, | |
| "learning_rate": 4.412136765534624e-05, | |
| "loss": 2.8493, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.1231121609047312, | |
| "grad_norm": 0.5303109288215637, | |
| "learning_rate": 4.4027379948969846e-05, | |
| "loss": 2.8085, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 1.1242573903084963, | |
| "grad_norm": 0.5703256130218506, | |
| "learning_rate": 4.393341364733512e-05, | |
| "loss": 2.8547, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 1.125402619712261, | |
| "grad_norm": 0.6061555743217468, | |
| "learning_rate": 4.383946908719949e-05, | |
| "loss": 2.8631, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 1.1265478491160261, | |
| "grad_norm": 0.6273899674415588, | |
| "learning_rate": 4.374554660524253e-05, | |
| "loss": 2.8537, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 1.127693078519791, | |
| "grad_norm": 0.5794036984443665, | |
| "learning_rate": 4.365164653806464e-05, | |
| "loss": 2.8313, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.128838307923556, | |
| "grad_norm": 0.5450258255004883, | |
| "learning_rate": 4.355776922218593e-05, | |
| "loss": 2.8261, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 1.1299835373273208, | |
| "grad_norm": 0.5514605045318604, | |
| "learning_rate": 4.3463914994044976e-05, | |
| "loss": 2.8006, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 1.1311287667310859, | |
| "grad_norm": 0.4708465039730072, | |
| "learning_rate": 4.337008418999757e-05, | |
| "loss": 2.7886, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 1.1322739961348507, | |
| "grad_norm": 0.5147237777709961, | |
| "learning_rate": 4.3276277146315605e-05, | |
| "loss": 2.8763, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 1.1334192255386157, | |
| "grad_norm": 0.5111591219902039, | |
| "learning_rate": 4.318249419918579e-05, | |
| "loss": 2.8965, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.1345644549423806, | |
| "grad_norm": 0.45969030261039734, | |
| "learning_rate": 4.3088735684708457e-05, | |
| "loss": 2.7846, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 1.1357096843461456, | |
| "grad_norm": 0.47163280844688416, | |
| "learning_rate": 4.299500193889645e-05, | |
| "loss": 2.8163, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 1.1368549137499104, | |
| "grad_norm": 0.48009538650512695, | |
| "learning_rate": 4.2901293297673794e-05, | |
| "loss": 2.8405, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 1.1380001431536755, | |
| "grad_norm": 0.5266420841217041, | |
| "learning_rate": 4.2807610096874535e-05, | |
| "loss": 2.842, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 1.1391453725574405, | |
| "grad_norm": 0.4645231068134308, | |
| "learning_rate": 4.271395267224157e-05, | |
| "loss": 2.8676, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.1402906019612054, | |
| "grad_norm": 0.47144144773483276, | |
| "learning_rate": 4.262032135942539e-05, | |
| "loss": 2.863, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 1.1414358313649702, | |
| "grad_norm": 0.5167996287345886, | |
| "learning_rate": 4.252671649398296e-05, | |
| "loss": 2.897, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 1.1425810607687352, | |
| "grad_norm": 0.5122323632240295, | |
| "learning_rate": 4.243313841137642e-05, | |
| "loss": 2.8663, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 1.1437262901725003, | |
| "grad_norm": 0.5562819242477417, | |
| "learning_rate": 4.233958744697193e-05, | |
| "loss": 2.9046, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 1.144871519576265, | |
| "grad_norm": 0.5265432596206665, | |
| "learning_rate": 4.224606393603847e-05, | |
| "loss": 2.8505, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.1460167489800301, | |
| "grad_norm": 0.4736781120300293, | |
| "learning_rate": 4.2152568213746616e-05, | |
| "loss": 2.8337, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 1.147161978383795, | |
| "grad_norm": 0.49419066309928894, | |
| "learning_rate": 4.2059100615167394e-05, | |
| "loss": 2.8547, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 1.14830720778756, | |
| "grad_norm": 0.47105368971824646, | |
| "learning_rate": 4.1965661475271004e-05, | |
| "loss": 2.8748, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 1.1494524371913248, | |
| "grad_norm": 0.4889761507511139, | |
| "learning_rate": 4.187225112892567e-05, | |
| "loss": 2.8016, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 1.1505976665950899, | |
| "grad_norm": 0.4951672852039337, | |
| "learning_rate": 4.177886991089641e-05, | |
| "loss": 2.8293, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.1517428959988547, | |
| "grad_norm": 0.47535356879234314, | |
| "learning_rate": 4.168551815584385e-05, | |
| "loss": 2.8045, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 1.1528881254026198, | |
| "grad_norm": 0.4469471871852875, | |
| "learning_rate": 4.159219619832309e-05, | |
| "loss": 2.7951, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 1.1540333548063846, | |
| "grad_norm": 0.5131164193153381, | |
| "learning_rate": 4.149890437278235e-05, | |
| "loss": 2.9102, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 1.1551785842101496, | |
| "grad_norm": 0.5199970602989197, | |
| "learning_rate": 4.1405643013561926e-05, | |
| "loss": 2.8451, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 1.1563238136139145, | |
| "grad_norm": 0.4965759515762329, | |
| "learning_rate": 4.1312412454892885e-05, | |
| "loss": 2.8163, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.1574690430176795, | |
| "grad_norm": 0.4741258919239044, | |
| "learning_rate": 4.1219213030895936e-05, | |
| "loss": 2.8432, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 1.1586142724214443, | |
| "grad_norm": 0.46175336837768555, | |
| "learning_rate": 4.112604507558023e-05, | |
| "loss": 2.8825, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 1.1597595018252094, | |
| "grad_norm": 0.45644518733024597, | |
| "learning_rate": 4.1032908922842084e-05, | |
| "loss": 2.8631, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 1.1609047312289742, | |
| "grad_norm": 0.5027674436569214, | |
| "learning_rate": 4.093980490646388e-05, | |
| "loss": 2.8437, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 1.1620499606327392, | |
| "grad_norm": 0.5348462462425232, | |
| "learning_rate": 4.084673336011282e-05, | |
| "loss": 2.8361, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.1631951900365043, | |
| "grad_norm": 0.5205281972885132, | |
| "learning_rate": 4.075369461733971e-05, | |
| "loss": 2.8331, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 1.1643404194402691, | |
| "grad_norm": 0.5650776028633118, | |
| "learning_rate": 4.066068901157787e-05, | |
| "loss": 2.9023, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 1.1654856488440342, | |
| "grad_norm": 0.5416718125343323, | |
| "learning_rate": 4.05677168761418e-05, | |
| "loss": 2.8183, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 1.166630878247799, | |
| "grad_norm": 0.47712674736976624, | |
| "learning_rate": 4.047477854422606e-05, | |
| "loss": 2.8585, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 1.167776107651564, | |
| "grad_norm": 0.47490790486335754, | |
| "learning_rate": 4.038187434890407e-05, | |
| "loss": 2.7941, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.1689213370553289, | |
| "grad_norm": 0.49218589067459106, | |
| "learning_rate": 4.028900462312689e-05, | |
| "loss": 2.8257, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 1.170066566459094, | |
| "grad_norm": 0.4539277255535126, | |
| "learning_rate": 4.01961696997221e-05, | |
| "loss": 2.8154, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 1.1712117958628587, | |
| "grad_norm": 0.49758878350257874, | |
| "learning_rate": 4.010336991139252e-05, | |
| "loss": 2.8991, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 1.1723570252666238, | |
| "grad_norm": 0.508026123046875, | |
| "learning_rate": 4.0010605590715044e-05, | |
| "loss": 2.8909, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 1.1735022546703886, | |
| "grad_norm": 0.48496854305267334, | |
| "learning_rate": 3.991787707013947e-05, | |
| "loss": 2.7767, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.1746474840741536, | |
| "grad_norm": 0.5234657526016235, | |
| "learning_rate": 3.982518468198728e-05, | |
| "loss": 2.859, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 1.1757927134779185, | |
| "grad_norm": 0.5635547637939453, | |
| "learning_rate": 3.97325287584505e-05, | |
| "loss": 2.8283, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 1.1769379428816835, | |
| "grad_norm": 0.5184863805770874, | |
| "learning_rate": 3.963990963159045e-05, | |
| "loss": 2.8226, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 1.1780831722854483, | |
| "grad_norm": 0.5278655886650085, | |
| "learning_rate": 3.9547327633336564e-05, | |
| "loss": 2.8775, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 1.1792284016892134, | |
| "grad_norm": 0.4541829526424408, | |
| "learning_rate": 3.945478309548524e-05, | |
| "loss": 2.8208, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.1803736310929782, | |
| "grad_norm": 0.47832149267196655, | |
| "learning_rate": 3.936227634969858e-05, | |
| "loss": 2.8785, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 1.1815188604967433, | |
| "grad_norm": 0.46495962142944336, | |
| "learning_rate": 3.9269807727503324e-05, | |
| "loss": 2.9003, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 1.1826640899005083, | |
| "grad_norm": 0.46806037425994873, | |
| "learning_rate": 3.917737756028954e-05, | |
| "loss": 2.8832, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 1.1838093193042731, | |
| "grad_norm": 0.4699292480945587, | |
| "learning_rate": 3.9084986179309466e-05, | |
| "loss": 2.8043, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 1.184954548708038, | |
| "grad_norm": 0.5012586712837219, | |
| "learning_rate": 3.899263391567635e-05, | |
| "loss": 2.8403, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.186099778111803, | |
| "grad_norm": 0.4917641580104828, | |
| "learning_rate": 3.890032110036324e-05, | |
| "loss": 2.8293, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 1.187245007515568, | |
| "grad_norm": 0.49173396825790405, | |
| "learning_rate": 3.8808048064201855e-05, | |
| "loss": 2.8868, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 1.1883902369193329, | |
| "grad_norm": 0.4861849844455719, | |
| "learning_rate": 3.8715815137881304e-05, | |
| "loss": 2.7915, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 1.189535466323098, | |
| "grad_norm": 0.5474852323532104, | |
| "learning_rate": 3.862362265194697e-05, | |
| "loss": 2.9182, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 1.1906806957268627, | |
| "grad_norm": 0.5007859468460083, | |
| "learning_rate": 3.853147093679929e-05, | |
| "loss": 2.8388, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.1918259251306278, | |
| "grad_norm": 0.4659948945045471, | |
| "learning_rate": 3.8439360322692584e-05, | |
| "loss": 2.8256, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 1.1929711545343926, | |
| "grad_norm": 0.48769453167915344, | |
| "learning_rate": 3.8347291139733934e-05, | |
| "loss": 2.795, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 1.1941163839381577, | |
| "grad_norm": 0.4881047308444977, | |
| "learning_rate": 3.825526371788186e-05, | |
| "loss": 2.7846, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 1.1952616133419225, | |
| "grad_norm": 0.5094773173332214, | |
| "learning_rate": 3.8163278386945265e-05, | |
| "loss": 2.8577, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 1.1964068427456875, | |
| "grad_norm": 0.506121039390564, | |
| "learning_rate": 3.8071335476582185e-05, | |
| "loss": 2.7938, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.1975520721494524, | |
| "grad_norm": 0.5210023522377014, | |
| "learning_rate": 3.7979435316298616e-05, | |
| "loss": 2.8367, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 1.1986973015532174, | |
| "grad_norm": 0.5009431838989258, | |
| "learning_rate": 3.788757823544742e-05, | |
| "loss": 2.8387, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 1.1998425309569822, | |
| "grad_norm": 0.49134376645088196, | |
| "learning_rate": 3.779576456322698e-05, | |
| "loss": 2.8655, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 1.2009877603607473, | |
| "grad_norm": 0.47163623571395874, | |
| "learning_rate": 3.770399462868015e-05, | |
| "loss": 2.8167, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 1.2021329897645123, | |
| "grad_norm": 0.4546791613101959, | |
| "learning_rate": 3.7612268760693034e-05, | |
| "loss": 2.7889, | |
| "step": 2100 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3494, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.363581579034362e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |