| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.28760885196133257, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00031956539106814733, | |
| "grad_norm": 5.807275295257568, | |
| "learning_rate": 0.0, | |
| "loss": 5.0454, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0006391307821362947, | |
| "grad_norm": 6.20149564743042, | |
| "learning_rate": 6.369426751592357e-07, | |
| "loss": 5.1424, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0012782615642725893, | |
| "grad_norm": 5.7567291259765625, | |
| "learning_rate": 1.910828025477707e-06, | |
| "loss": 5.0835, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.001917392346408884, | |
| "grad_norm": 5.017141819000244, | |
| "learning_rate": 3.1847133757961785e-06, | |
| "loss": 5.0733, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0025565231285451786, | |
| "grad_norm": 3.2059810161590576, | |
| "learning_rate": 4.45859872611465e-06, | |
| "loss": 5.0714, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.003195653910681473, | |
| "grad_norm": 6.303244113922119, | |
| "learning_rate": 5.732484076433121e-06, | |
| "loss": 4.9915, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003834784692817768, | |
| "grad_norm": 4.852840423583984, | |
| "learning_rate": 7.006369426751593e-06, | |
| "loss": 4.9307, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.004473915474954062, | |
| "grad_norm": 3.78067946434021, | |
| "learning_rate": 8.280254777070064e-06, | |
| "loss": 4.8924, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005113046257090357, | |
| "grad_norm": 3.5641331672668457, | |
| "learning_rate": 9.554140127388536e-06, | |
| "loss": 4.8244, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.005752177039226652, | |
| "grad_norm": 2.191957712173462, | |
| "learning_rate": 1.0828025477707008e-05, | |
| "loss": 4.6179, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.006391307821362946, | |
| "grad_norm": 2.0675458908081055, | |
| "learning_rate": 1.2101910828025478e-05, | |
| "loss": 4.5827, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007030438603499241, | |
| "grad_norm": 1.6559146642684937, | |
| "learning_rate": 1.337579617834395e-05, | |
| "loss": 4.4544, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.007669569385635536, | |
| "grad_norm": 1.3731284141540527, | |
| "learning_rate": 1.464968152866242e-05, | |
| "loss": 4.3748, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.00830870016777183, | |
| "grad_norm": 1.3962030410766602, | |
| "learning_rate": 1.592356687898089e-05, | |
| "loss": 4.3269, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.008947830949908125, | |
| "grad_norm": 1.2659896612167358, | |
| "learning_rate": 1.7197452229299362e-05, | |
| "loss": 4.2133, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.00958696173204442, | |
| "grad_norm": 0.9881806373596191, | |
| "learning_rate": 1.8471337579617834e-05, | |
| "loss": 4.0961, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.010226092514180714, | |
| "grad_norm": 0.9945515394210815, | |
| "learning_rate": 1.974522292993631e-05, | |
| "loss": 4.0158, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01086522329631701, | |
| "grad_norm": 0.9396588802337646, | |
| "learning_rate": 2.1019108280254778e-05, | |
| "loss": 3.8763, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.011504354078453304, | |
| "grad_norm": 1.0665779113769531, | |
| "learning_rate": 2.229299363057325e-05, | |
| "loss": 3.8635, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.012143484860589597, | |
| "grad_norm": 1.077245831489563, | |
| "learning_rate": 2.356687898089172e-05, | |
| "loss": 3.802, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.012782615642725892, | |
| "grad_norm": 0.8040191531181335, | |
| "learning_rate": 2.4840764331210193e-05, | |
| "loss": 3.7284, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.013421746424862187, | |
| "grad_norm": 1.4325759410858154, | |
| "learning_rate": 2.6114649681528662e-05, | |
| "loss": 3.665, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.014060877206998482, | |
| "grad_norm": 1.3450332880020142, | |
| "learning_rate": 2.7388535031847134e-05, | |
| "loss": 3.6242, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.014700007989134777, | |
| "grad_norm": 0.8203895688056946, | |
| "learning_rate": 2.8662420382165606e-05, | |
| "loss": 3.5576, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.015339138771271072, | |
| "grad_norm": 1.1661335229873657, | |
| "learning_rate": 2.9936305732484078e-05, | |
| "loss": 3.522, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.015978269553407365, | |
| "grad_norm": 1.0148671865463257, | |
| "learning_rate": 3.121019108280255e-05, | |
| "loss": 3.4594, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01661740033554366, | |
| "grad_norm": 0.6624857187271118, | |
| "learning_rate": 3.248407643312102e-05, | |
| "loss": 3.465, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.017256531117679955, | |
| "grad_norm": 0.943125307559967, | |
| "learning_rate": 3.375796178343949e-05, | |
| "loss": 3.4021, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01789566189981625, | |
| "grad_norm": 0.9854550957679749, | |
| "learning_rate": 3.503184713375796e-05, | |
| "loss": 3.3361, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.018534792681952544, | |
| "grad_norm": 1.2242411375045776, | |
| "learning_rate": 3.630573248407643e-05, | |
| "loss": 3.3283, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01917392346408884, | |
| "grad_norm": 0.9556372761726379, | |
| "learning_rate": 3.7579617834394906e-05, | |
| "loss": 3.2914, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.019813054246225134, | |
| "grad_norm": 1.3133809566497803, | |
| "learning_rate": 3.885350318471338e-05, | |
| "loss": 3.3126, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02045218502836143, | |
| "grad_norm": 0.9322234392166138, | |
| "learning_rate": 4.012738853503185e-05, | |
| "loss": 3.2443, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.021091315810497724, | |
| "grad_norm": 1.4383481740951538, | |
| "learning_rate": 4.1401273885350325e-05, | |
| "loss": 3.2428, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.02173044659263402, | |
| "grad_norm": 1.0156841278076172, | |
| "learning_rate": 4.267515923566879e-05, | |
| "loss": 3.1735, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.022369577374770314, | |
| "grad_norm": 1.1754450798034668, | |
| "learning_rate": 4.394904458598726e-05, | |
| "loss": 3.1788, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02300870815690661, | |
| "grad_norm": 1.0960084199905396, | |
| "learning_rate": 4.522292993630574e-05, | |
| "loss": 3.1963, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.023647838939042903, | |
| "grad_norm": 1.054401159286499, | |
| "learning_rate": 4.6496815286624206e-05, | |
| "loss": 3.1604, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.024286969721179195, | |
| "grad_norm": 1.1957581043243408, | |
| "learning_rate": 4.777070063694268e-05, | |
| "loss": 3.1648, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.02492610050331549, | |
| "grad_norm": 0.7756203413009644, | |
| "learning_rate": 4.904458598726115e-05, | |
| "loss": 3.1066, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.025565231285451784, | |
| "grad_norm": 1.0459190607070923, | |
| "learning_rate": 5.031847133757962e-05, | |
| "loss": 3.1571, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02620436206758808, | |
| "grad_norm": 0.9746761322021484, | |
| "learning_rate": 5.159235668789809e-05, | |
| "loss": 3.1026, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.026843492849724374, | |
| "grad_norm": 1.0770882368087769, | |
| "learning_rate": 5.286624203821656e-05, | |
| "loss": 3.1125, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02748262363186067, | |
| "grad_norm": 0.9542138576507568, | |
| "learning_rate": 5.414012738853504e-05, | |
| "loss": 3.059, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.028121754413996964, | |
| "grad_norm": 1.3454134464263916, | |
| "learning_rate": 5.5414012738853505e-05, | |
| "loss": 3.0645, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02876088519613326, | |
| "grad_norm": 1.0354089736938477, | |
| "learning_rate": 5.6687898089171974e-05, | |
| "loss": 3.04, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.029400015978269554, | |
| "grad_norm": 1.1339548826217651, | |
| "learning_rate": 5.796178343949045e-05, | |
| "loss": 3.0625, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.03003914676040585, | |
| "grad_norm": 1.200062870979309, | |
| "learning_rate": 5.923566878980892e-05, | |
| "loss": 3.057, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.030678277542542143, | |
| "grad_norm": 1.395698070526123, | |
| "learning_rate": 6.0509554140127386e-05, | |
| "loss": 3.0341, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.031317408324678435, | |
| "grad_norm": 0.9392653703689575, | |
| "learning_rate": 6.178343949044585e-05, | |
| "loss": 3.0087, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.03195653910681473, | |
| "grad_norm": 1.1301568746566772, | |
| "learning_rate": 6.305732484076433e-05, | |
| "loss": 3.0294, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.032595669888951025, | |
| "grad_norm": 0.9571443796157837, | |
| "learning_rate": 6.43312101910828e-05, | |
| "loss": 3.0522, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.03323480067108732, | |
| "grad_norm": 0.9494081735610962, | |
| "learning_rate": 6.560509554140127e-05, | |
| "loss": 3.0012, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.033873931453223614, | |
| "grad_norm": 1.3672889471054077, | |
| "learning_rate": 6.687898089171974e-05, | |
| "loss": 3.0188, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03451306223535991, | |
| "grad_norm": 1.2122056484222412, | |
| "learning_rate": 6.815286624203822e-05, | |
| "loss": 2.9497, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.035152193017496204, | |
| "grad_norm": 1.2184698581695557, | |
| "learning_rate": 6.942675159235669e-05, | |
| "loss": 2.9739, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0357913237996325, | |
| "grad_norm": 1.09404456615448, | |
| "learning_rate": 7.070063694267515e-05, | |
| "loss": 3.0241, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.036430454581768794, | |
| "grad_norm": 1.1653715372085571, | |
| "learning_rate": 7.197452229299363e-05, | |
| "loss": 2.9606, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03706958536390509, | |
| "grad_norm": 1.050194501876831, | |
| "learning_rate": 7.32484076433121e-05, | |
| "loss": 2.9582, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.037708716146041384, | |
| "grad_norm": 1.1262322664260864, | |
| "learning_rate": 7.452229299363057e-05, | |
| "loss": 2.9462, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03834784692817768, | |
| "grad_norm": 1.1232227087020874, | |
| "learning_rate": 7.579617834394906e-05, | |
| "loss": 2.9784, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03898697771031397, | |
| "grad_norm": 0.9088072776794434, | |
| "learning_rate": 7.707006369426753e-05, | |
| "loss": 2.944, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.03962610849245027, | |
| "grad_norm": 0.8985419869422913, | |
| "learning_rate": 7.834394904458599e-05, | |
| "loss": 2.9003, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04026523927458656, | |
| "grad_norm": 1.2419854402542114, | |
| "learning_rate": 7.961783439490447e-05, | |
| "loss": 2.9753, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04090437005672286, | |
| "grad_norm": 1.4533154964447021, | |
| "learning_rate": 8.089171974522294e-05, | |
| "loss": 2.9069, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.04154350083885915, | |
| "grad_norm": 1.475258231163025, | |
| "learning_rate": 8.21656050955414e-05, | |
| "loss": 2.9402, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04218263162099545, | |
| "grad_norm": 1.0348827838897705, | |
| "learning_rate": 8.343949044585988e-05, | |
| "loss": 2.9295, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.04282176240313174, | |
| "grad_norm": 0.9143719673156738, | |
| "learning_rate": 8.471337579617836e-05, | |
| "loss": 2.9408, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04346089318526804, | |
| "grad_norm": 1.1310492753982544, | |
| "learning_rate": 8.598726114649682e-05, | |
| "loss": 2.875, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04410002396740433, | |
| "grad_norm": 1.0483386516571045, | |
| "learning_rate": 8.726114649681529e-05, | |
| "loss": 2.9142, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04473915474954063, | |
| "grad_norm": 0.921519935131073, | |
| "learning_rate": 8.853503184713377e-05, | |
| "loss": 2.9188, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04537828553167692, | |
| "grad_norm": 1.3271907567977905, | |
| "learning_rate": 8.980891719745223e-05, | |
| "loss": 2.9075, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.04601741631381322, | |
| "grad_norm": 1.7488983869552612, | |
| "learning_rate": 9.10828025477707e-05, | |
| "loss": 2.9201, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.04665654709594951, | |
| "grad_norm": 1.4263213872909546, | |
| "learning_rate": 9.235668789808918e-05, | |
| "loss": 2.9045, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.04729567787808581, | |
| "grad_norm": 0.8777288794517517, | |
| "learning_rate": 9.363057324840766e-05, | |
| "loss": 2.8959, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.047934808660222095, | |
| "grad_norm": 1.3402196168899536, | |
| "learning_rate": 9.490445859872612e-05, | |
| "loss": 2.8893, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04857393944235839, | |
| "grad_norm": 1.0943351984024048, | |
| "learning_rate": 9.617834394904459e-05, | |
| "loss": 2.9137, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.049213070224494684, | |
| "grad_norm": 1.0603907108306885, | |
| "learning_rate": 9.745222929936307e-05, | |
| "loss": 2.8677, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04985220100663098, | |
| "grad_norm": 1.010772705078125, | |
| "learning_rate": 9.872611464968153e-05, | |
| "loss": 2.8374, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.050491331788767274, | |
| "grad_norm": 1.2628934383392334, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9009, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05113046257090357, | |
| "grad_norm": 1.146183729171753, | |
| "learning_rate": 9.999988833687822e-05, | |
| "loss": 2.8633, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.051769593353039864, | |
| "grad_norm": 0.8704808354377747, | |
| "learning_rate": 9.99995533480116e-05, | |
| "loss": 2.8464, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.05240872413517616, | |
| "grad_norm": 1.044418454170227, | |
| "learning_rate": 9.999899503489641e-05, | |
| "loss": 2.8695, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.053047854917312454, | |
| "grad_norm": 0.833791196346283, | |
| "learning_rate": 9.999821340002636e-05, | |
| "loss": 2.8605, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05368698569944875, | |
| "grad_norm": 0.922815203666687, | |
| "learning_rate": 9.99972084468926e-05, | |
| "loss": 2.8737, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05432611648158504, | |
| "grad_norm": 0.9120809435844421, | |
| "learning_rate": 9.999598017998384e-05, | |
| "loss": 2.8753, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05496524726372134, | |
| "grad_norm": 1.0272431373596191, | |
| "learning_rate": 9.999452860478611e-05, | |
| "loss": 2.8907, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.05560437804585763, | |
| "grad_norm": 0.7777165174484253, | |
| "learning_rate": 9.999285372778295e-05, | |
| "loss": 2.8517, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.05624350882799393, | |
| "grad_norm": 0.7110999822616577, | |
| "learning_rate": 9.999095555645523e-05, | |
| "loss": 2.8211, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.05688263961013022, | |
| "grad_norm": 0.7857067584991455, | |
| "learning_rate": 9.998883409928117e-05, | |
| "loss": 2.8463, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.05752177039226652, | |
| "grad_norm": 0.8582798838615417, | |
| "learning_rate": 9.998648936573629e-05, | |
| "loss": 2.8197, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05816090117440281, | |
| "grad_norm": 0.9790541529655457, | |
| "learning_rate": 9.998392136629345e-05, | |
| "loss": 2.8193, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.05880003195653911, | |
| "grad_norm": 1.1599719524383545, | |
| "learning_rate": 9.998113011242264e-05, | |
| "loss": 2.8206, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.0594391627386754, | |
| "grad_norm": 0.8326631188392639, | |
| "learning_rate": 9.99781156165911e-05, | |
| "loss": 2.8349, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.0600782935208117, | |
| "grad_norm": 0.8876377940177917, | |
| "learning_rate": 9.997487789226312e-05, | |
| "loss": 2.8225, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06071742430294799, | |
| "grad_norm": 0.9899202585220337, | |
| "learning_rate": 9.997141695390009e-05, | |
| "loss": 2.7875, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06135655508508429, | |
| "grad_norm": 1.0686557292938232, | |
| "learning_rate": 9.996773281696037e-05, | |
| "loss": 2.8024, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.06199568586722058, | |
| "grad_norm": 0.8899752497673035, | |
| "learning_rate": 9.996382549789926e-05, | |
| "loss": 2.8225, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.06263481664935687, | |
| "grad_norm": 0.7781797647476196, | |
| "learning_rate": 9.995969501416891e-05, | |
| "loss": 2.8046, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.06327394743149317, | |
| "grad_norm": 0.6428512930870056, | |
| "learning_rate": 9.995534138421818e-05, | |
| "loss": 2.7693, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.06391307821362946, | |
| "grad_norm": 0.7047809958457947, | |
| "learning_rate": 9.995076462749273e-05, | |
| "loss": 2.766, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06455220899576576, | |
| "grad_norm": 0.6256312131881714, | |
| "learning_rate": 9.99459647644347e-05, | |
| "loss": 2.8071, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.06519133977790205, | |
| "grad_norm": 0.699400007724762, | |
| "learning_rate": 9.994094181648283e-05, | |
| "loss": 2.8347, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.06583047056003835, | |
| "grad_norm": 0.7256817817687988, | |
| "learning_rate": 9.993569580607225e-05, | |
| "loss": 2.8074, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.06646960134217464, | |
| "grad_norm": 0.573846161365509, | |
| "learning_rate": 9.993022675663437e-05, | |
| "loss": 2.7413, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.06710873212431094, | |
| "grad_norm": 0.7314406037330627, | |
| "learning_rate": 9.992453469259685e-05, | |
| "loss": 2.7983, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06774786290644723, | |
| "grad_norm": 0.7307546734809875, | |
| "learning_rate": 9.991861963938342e-05, | |
| "loss": 2.8026, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.06838699368858353, | |
| "grad_norm": 0.6367102861404419, | |
| "learning_rate": 9.991248162341384e-05, | |
| "loss": 2.7424, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.06902612447071982, | |
| "grad_norm": 0.8630378246307373, | |
| "learning_rate": 9.99061206721037e-05, | |
| "loss": 2.7395, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.06966525525285612, | |
| "grad_norm": 0.7586290240287781, | |
| "learning_rate": 9.989953681386433e-05, | |
| "loss": 2.7624, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.07030438603499241, | |
| "grad_norm": 0.7091168761253357, | |
| "learning_rate": 9.989273007810271e-05, | |
| "loss": 2.7719, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07094351681712871, | |
| "grad_norm": 0.684183657169342, | |
| "learning_rate": 9.98857004952213e-05, | |
| "loss": 2.7806, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.071582647599265, | |
| "grad_norm": 0.920498788356781, | |
| "learning_rate": 9.987844809661791e-05, | |
| "loss": 2.7626, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.0722217783814013, | |
| "grad_norm": 0.730060875415802, | |
| "learning_rate": 9.987097291468552e-05, | |
| "loss": 2.8107, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.07286090916353759, | |
| "grad_norm": 0.8606828451156616, | |
| "learning_rate": 9.986327498281227e-05, | |
| "loss": 2.7814, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.07350003994567389, | |
| "grad_norm": 0.8068298101425171, | |
| "learning_rate": 9.985535433538113e-05, | |
| "loss": 2.7775, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07413917072781018, | |
| "grad_norm": 0.6887542009353638, | |
| "learning_rate": 9.984721100776989e-05, | |
| "loss": 2.784, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.07477830150994648, | |
| "grad_norm": 0.84773850440979, | |
| "learning_rate": 9.98388450363509e-05, | |
| "loss": 2.7333, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.07541743229208277, | |
| "grad_norm": 0.7914923429489136, | |
| "learning_rate": 9.9830256458491e-05, | |
| "loss": 2.7363, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.07605656307421906, | |
| "grad_norm": 0.8284217715263367, | |
| "learning_rate": 9.982144531255127e-05, | |
| "loss": 2.7389, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.07669569385635536, | |
| "grad_norm": 0.7706480622291565, | |
| "learning_rate": 9.981241163788694e-05, | |
| "loss": 2.7377, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07733482463849164, | |
| "grad_norm": 0.6147120594978333, | |
| "learning_rate": 9.980315547484711e-05, | |
| "loss": 2.7862, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.07797395542062795, | |
| "grad_norm": 0.6364494562149048, | |
| "learning_rate": 9.979367686477469e-05, | |
| "loss": 2.762, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.07861308620276423, | |
| "grad_norm": 0.6944818496704102, | |
| "learning_rate": 9.978397585000611e-05, | |
| "loss": 2.7624, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.07925221698490054, | |
| "grad_norm": 1.2648204565048218, | |
| "learning_rate": 9.977405247387119e-05, | |
| "loss": 2.7544, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.07989134776703682, | |
| "grad_norm": 1.0054659843444824, | |
| "learning_rate": 9.976390678069295e-05, | |
| "loss": 2.7523, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08053047854917313, | |
| "grad_norm": 0.715492308139801, | |
| "learning_rate": 9.975353881578738e-05, | |
| "loss": 2.7341, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.08116960933130941, | |
| "grad_norm": 0.7963582277297974, | |
| "learning_rate": 9.974294862546325e-05, | |
| "loss": 2.7484, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.08180874011344572, | |
| "grad_norm": 0.7069251537322998, | |
| "learning_rate": 9.97321362570219e-05, | |
| "loss": 2.7719, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.082447870895582, | |
| "grad_norm": 0.5716209411621094, | |
| "learning_rate": 9.972110175875706e-05, | |
| "loss": 2.8079, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.0830870016777183, | |
| "grad_norm": 0.65562903881073, | |
| "learning_rate": 9.970984517995456e-05, | |
| "loss": 2.7642, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0837261324598546, | |
| "grad_norm": 0.647085964679718, | |
| "learning_rate": 9.969836657089225e-05, | |
| "loss": 2.7139, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.0843652632419909, | |
| "grad_norm": 0.6401609778404236, | |
| "learning_rate": 9.968666598283955e-05, | |
| "loss": 2.7278, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.08500439402412718, | |
| "grad_norm": 0.5514021515846252, | |
| "learning_rate": 9.967474346805746e-05, | |
| "loss": 2.7332, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.08564352480626349, | |
| "grad_norm": 0.5908826589584351, | |
| "learning_rate": 9.96625990797982e-05, | |
| "loss": 2.741, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.08628265558839977, | |
| "grad_norm": 0.5510653853416443, | |
| "learning_rate": 9.965023287230497e-05, | |
| "loss": 2.7025, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08692178637053607, | |
| "grad_norm": 0.5656317472457886, | |
| "learning_rate": 9.963764490081176e-05, | |
| "loss": 2.7184, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.08756091715267236, | |
| "grad_norm": 0.5132441520690918, | |
| "learning_rate": 9.962483522154302e-05, | |
| "loss": 2.7632, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.08820004793480866, | |
| "grad_norm": 0.6730588674545288, | |
| "learning_rate": 9.961180389171352e-05, | |
| "loss": 2.7705, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.08883917871694495, | |
| "grad_norm": 0.5657472610473633, | |
| "learning_rate": 9.959855096952804e-05, | |
| "loss": 2.7191, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.08947830949908125, | |
| "grad_norm": 0.8265955448150635, | |
| "learning_rate": 9.958507651418106e-05, | |
| "loss": 2.7718, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09011744028121754, | |
| "grad_norm": 0.8996996879577637, | |
| "learning_rate": 9.957138058585658e-05, | |
| "loss": 2.7124, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.09075657106335384, | |
| "grad_norm": 0.6458889842033386, | |
| "learning_rate": 9.955746324572781e-05, | |
| "loss": 2.7403, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.09139570184549013, | |
| "grad_norm": 0.7175470590591431, | |
| "learning_rate": 9.954332455595689e-05, | |
| "loss": 2.7188, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.09203483262762643, | |
| "grad_norm": 0.6640183329582214, | |
| "learning_rate": 9.952896457969463e-05, | |
| "loss": 2.7223, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.09267396340976272, | |
| "grad_norm": 0.6551202535629272, | |
| "learning_rate": 9.951438338108022e-05, | |
| "loss": 2.7189, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09331309419189902, | |
| "grad_norm": 0.6980673670768738, | |
| "learning_rate": 9.949958102524093e-05, | |
| "loss": 2.7183, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.09395222497403531, | |
| "grad_norm": 0.5926324129104614, | |
| "learning_rate": 9.948455757829187e-05, | |
| "loss": 2.7476, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.09459135575617161, | |
| "grad_norm": 0.5434746742248535, | |
| "learning_rate": 9.946931310733565e-05, | |
| "loss": 2.7368, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.0952304865383079, | |
| "grad_norm": 0.6466372609138489, | |
| "learning_rate": 9.945384768046206e-05, | |
| "loss": 2.7307, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.09586961732044419, | |
| "grad_norm": 0.6376985311508179, | |
| "learning_rate": 9.943816136674782e-05, | |
| "loss": 2.7239, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09650874810258049, | |
| "grad_norm": 0.6092653274536133, | |
| "learning_rate": 9.942225423625624e-05, | |
| "loss": 2.7678, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.09714787888471678, | |
| "grad_norm": 0.7219493389129639, | |
| "learning_rate": 9.94061263600369e-05, | |
| "loss": 2.723, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.09778700966685308, | |
| "grad_norm": 0.5244786143302917, | |
| "learning_rate": 9.93897778101254e-05, | |
| "loss": 2.7329, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.09842614044898937, | |
| "grad_norm": 0.5384829044342041, | |
| "learning_rate": 9.937320865954289e-05, | |
| "loss": 2.661, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.09906527123112567, | |
| "grad_norm": 0.624033510684967, | |
| "learning_rate": 9.935641898229594e-05, | |
| "loss": 2.7177, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09970440201326196, | |
| "grad_norm": 0.6381804347038269, | |
| "learning_rate": 9.933940885337602e-05, | |
| "loss": 2.7616, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.10034353279539826, | |
| "grad_norm": 0.7671799659729004, | |
| "learning_rate": 9.932217834875934e-05, | |
| "loss": 2.7256, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.10098266357753455, | |
| "grad_norm": 0.5695899128913879, | |
| "learning_rate": 9.930472754540634e-05, | |
| "loss": 2.6975, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.10162179435967085, | |
| "grad_norm": 0.6461712121963501, | |
| "learning_rate": 9.92870565212615e-05, | |
| "loss": 2.7121, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.10226092514180714, | |
| "grad_norm": 0.6111094355583191, | |
| "learning_rate": 9.926916535525283e-05, | |
| "loss": 2.6964, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10290005592394344, | |
| "grad_norm": 0.6368963718414307, | |
| "learning_rate": 9.925105412729175e-05, | |
| "loss": 2.6793, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.10353918670607973, | |
| "grad_norm": 0.6973994374275208, | |
| "learning_rate": 9.923272291827245e-05, | |
| "loss": 2.6862, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.10417831748821603, | |
| "grad_norm": 0.6717987656593323, | |
| "learning_rate": 9.921417181007175e-05, | |
| "loss": 2.686, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.10481744827035232, | |
| "grad_norm": 0.6282898783683777, | |
| "learning_rate": 9.919540088554862e-05, | |
| "loss": 2.6807, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.10545657905248862, | |
| "grad_norm": 0.6404539942741394, | |
| "learning_rate": 9.91764102285439e-05, | |
| "loss": 2.659, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10609570983462491, | |
| "grad_norm": 0.679418683052063, | |
| "learning_rate": 9.915719992387979e-05, | |
| "loss": 2.662, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.10673484061676121, | |
| "grad_norm": 0.7185142040252686, | |
| "learning_rate": 9.913777005735963e-05, | |
| "loss": 2.7208, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1073739713988975, | |
| "grad_norm": 0.5328919887542725, | |
| "learning_rate": 9.911812071576736e-05, | |
| "loss": 2.6428, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.1080131021810338, | |
| "grad_norm": 0.6135143637657166, | |
| "learning_rate": 9.909825198686729e-05, | |
| "loss": 2.6543, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.10865223296317009, | |
| "grad_norm": 0.6830089092254639, | |
| "learning_rate": 9.907816395940359e-05, | |
| "loss": 2.677, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10929136374530639, | |
| "grad_norm": 0.6469766497612, | |
| "learning_rate": 9.90578567230999e-05, | |
| "loss": 2.726, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.10993049452744268, | |
| "grad_norm": 0.5899373888969421, | |
| "learning_rate": 9.903733036865903e-05, | |
| "loss": 2.7208, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.11056962530957898, | |
| "grad_norm": 0.82301926612854, | |
| "learning_rate": 9.901658498776246e-05, | |
| "loss": 2.6925, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.11120875609171527, | |
| "grad_norm": 0.8507819771766663, | |
| "learning_rate": 9.899562067306989e-05, | |
| "loss": 2.6905, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.11184788687385157, | |
| "grad_norm": 0.6785141229629517, | |
| "learning_rate": 9.897443751821902e-05, | |
| "loss": 2.6643, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11248701765598786, | |
| "grad_norm": 0.6389050483703613, | |
| "learning_rate": 9.89530356178249e-05, | |
| "loss": 2.6769, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.11312614843812416, | |
| "grad_norm": 0.5903960466384888, | |
| "learning_rate": 9.893141506747967e-05, | |
| "loss": 2.6793, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.11376527922026045, | |
| "grad_norm": 0.583307147026062, | |
| "learning_rate": 9.890957596375206e-05, | |
| "loss": 2.676, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.11440441000239673, | |
| "grad_norm": 0.6372009515762329, | |
| "learning_rate": 9.888751840418695e-05, | |
| "loss": 2.6567, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.11504354078453304, | |
| "grad_norm": 0.7056903839111328, | |
| "learning_rate": 9.886524248730497e-05, | |
| "loss": 2.6973, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11568267156666932, | |
| "grad_norm": 0.5459578633308411, | |
| "learning_rate": 9.88427483126021e-05, | |
| "loss": 2.6522, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.11632180234880563, | |
| "grad_norm": 0.5186561346054077, | |
| "learning_rate": 9.882003598054907e-05, | |
| "loss": 2.6567, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.11696093313094191, | |
| "grad_norm": 0.5469943881034851, | |
| "learning_rate": 9.879710559259114e-05, | |
| "loss": 2.6586, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.11760006391307821, | |
| "grad_norm": 0.6790450215339661, | |
| "learning_rate": 9.877395725114742e-05, | |
| "loss": 2.6874, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.1182391946952145, | |
| "grad_norm": 0.624920129776001, | |
| "learning_rate": 9.875059105961056e-05, | |
| "loss": 2.6777, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1188783254773508, | |
| "grad_norm": 0.6039037704467773, | |
| "learning_rate": 9.872700712234624e-05, | |
| "loss": 2.6881, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.11951745625948709, | |
| "grad_norm": 0.6653264760971069, | |
| "learning_rate": 9.87032055446927e-05, | |
| "loss": 2.6388, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.1201565870416234, | |
| "grad_norm": 0.7718141078948975, | |
| "learning_rate": 9.867918643296025e-05, | |
| "loss": 2.6686, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.12079571782375968, | |
| "grad_norm": 0.6357402801513672, | |
| "learning_rate": 9.865494989443092e-05, | |
| "loss": 2.6611, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.12143484860589598, | |
| "grad_norm": 0.560418963432312, | |
| "learning_rate": 9.863049603735775e-05, | |
| "loss": 2.6944, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12207397938803227, | |
| "grad_norm": 0.5758490562438965, | |
| "learning_rate": 9.860582497096452e-05, | |
| "loss": 2.6589, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.12271311017016857, | |
| "grad_norm": 0.6144497990608215, | |
| "learning_rate": 9.858093680544516e-05, | |
| "loss": 2.6839, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.12335224095230486, | |
| "grad_norm": 0.5986223816871643, | |
| "learning_rate": 9.855583165196329e-05, | |
| "loss": 2.6778, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.12399137173444116, | |
| "grad_norm": 0.5350797176361084, | |
| "learning_rate": 9.853050962265169e-05, | |
| "loss": 2.6539, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.12463050251657745, | |
| "grad_norm": 0.5589949488639832, | |
| "learning_rate": 9.850497083061183e-05, | |
| "loss": 2.6536, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12526963329871374, | |
| "grad_norm": 0.5695136189460754, | |
| "learning_rate": 9.847921538991339e-05, | |
| "loss": 2.6615, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.12590876408085006, | |
| "grad_norm": 0.5739374756813049, | |
| "learning_rate": 9.845324341559366e-05, | |
| "loss": 2.6883, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.12654789486298634, | |
| "grad_norm": 0.528075098991394, | |
| "learning_rate": 9.84270550236571e-05, | |
| "loss": 2.6944, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.12718702564512263, | |
| "grad_norm": 0.6400613188743591, | |
| "learning_rate": 9.840065033107483e-05, | |
| "loss": 2.6596, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.12782615642725892, | |
| "grad_norm": 0.6734158992767334, | |
| "learning_rate": 9.837402945578406e-05, | |
| "loss": 2.6562, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12846528720939523, | |
| "grad_norm": 0.6197201013565063, | |
| "learning_rate": 9.834719251668761e-05, | |
| "loss": 2.6971, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.12910441799153152, | |
| "grad_norm": 0.5766332745552063, | |
| "learning_rate": 9.832013963365332e-05, | |
| "loss": 2.6355, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1297435487736678, | |
| "grad_norm": 0.7926291823387146, | |
| "learning_rate": 9.829287092751357e-05, | |
| "loss": 2.6438, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1303826795558041, | |
| "grad_norm": 0.7527420520782471, | |
| "learning_rate": 9.826538652006469e-05, | |
| "loss": 2.6695, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.13102181033794041, | |
| "grad_norm": 0.7154802083969116, | |
| "learning_rate": 9.823768653406652e-05, | |
| "loss": 2.6158, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1316609411200767, | |
| "grad_norm": 0.5435774326324463, | |
| "learning_rate": 9.820977109324169e-05, | |
| "loss": 2.6843, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.132300071902213, | |
| "grad_norm": 0.5893809199333191, | |
| "learning_rate": 9.818164032227522e-05, | |
| "loss": 2.6607, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.13293920268434928, | |
| "grad_norm": 0.5635148882865906, | |
| "learning_rate": 9.815329434681392e-05, | |
| "loss": 2.658, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.13357833346648557, | |
| "grad_norm": 0.4904562830924988, | |
| "learning_rate": 9.812473329346578e-05, | |
| "loss": 2.6616, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.13421746424862188, | |
| "grad_norm": 0.5800766944885254, | |
| "learning_rate": 9.809595728979945e-05, | |
| "loss": 2.6657, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13485659503075817, | |
| "grad_norm": 0.5110253691673279, | |
| "learning_rate": 9.806696646434367e-05, | |
| "loss": 2.6192, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.13549572581289446, | |
| "grad_norm": 0.5567732453346252, | |
| "learning_rate": 9.803776094658668e-05, | |
| "loss": 2.6475, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.13613485659503075, | |
| "grad_norm": 0.5255835056304932, | |
| "learning_rate": 9.800834086697566e-05, | |
| "loss": 2.6644, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.13677398737716706, | |
| "grad_norm": 0.4851606786251068, | |
| "learning_rate": 9.797870635691613e-05, | |
| "loss": 2.6628, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.13741311815930335, | |
| "grad_norm": 0.4904446005821228, | |
| "learning_rate": 9.794885754877135e-05, | |
| "loss": 2.6222, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13805224894143964, | |
| "grad_norm": 0.47077298164367676, | |
| "learning_rate": 9.791879457586178e-05, | |
| "loss": 2.5875, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.13869137972357592, | |
| "grad_norm": 0.4484720528125763, | |
| "learning_rate": 9.788851757246443e-05, | |
| "loss": 2.6279, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.13933051050571224, | |
| "grad_norm": 0.5684689283370972, | |
| "learning_rate": 9.785802667381227e-05, | |
| "loss": 2.6507, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.13996964128784853, | |
| "grad_norm": 0.5868870615959167, | |
| "learning_rate": 9.78273220160937e-05, | |
| "loss": 2.6476, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.14060877206998482, | |
| "grad_norm": 0.5244540572166443, | |
| "learning_rate": 9.77964037364518e-05, | |
| "loss": 2.6353, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1412479028521211, | |
| "grad_norm": 0.5107213258743286, | |
| "learning_rate": 9.776527197298386e-05, | |
| "loss": 2.6335, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.14188703363425742, | |
| "grad_norm": 0.5410230159759521, | |
| "learning_rate": 9.773392686474065e-05, | |
| "loss": 2.6248, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1425261644163937, | |
| "grad_norm": 0.5540198683738708, | |
| "learning_rate": 9.770236855172587e-05, | |
| "loss": 2.6304, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.14316529519853, | |
| "grad_norm": 0.6982893347740173, | |
| "learning_rate": 9.767059717489557e-05, | |
| "loss": 2.6285, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.14380442598066628, | |
| "grad_norm": 0.7649112939834595, | |
| "learning_rate": 9.763861287615732e-05, | |
| "loss": 2.6863, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1444435567628026, | |
| "grad_norm": 0.5209079384803772, | |
| "learning_rate": 9.760641579836984e-05, | |
| "loss": 2.6262, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.1450826875449389, | |
| "grad_norm": 0.5985437631607056, | |
| "learning_rate": 9.757400608534215e-05, | |
| "loss": 2.5451, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.14572181832707518, | |
| "grad_norm": 0.6232045888900757, | |
| "learning_rate": 9.754138388183305e-05, | |
| "loss": 2.6142, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.14636094910921146, | |
| "grad_norm": 0.7111669778823853, | |
| "learning_rate": 9.750854933355042e-05, | |
| "loss": 2.5868, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.14700007989134778, | |
| "grad_norm": 0.6749933362007141, | |
| "learning_rate": 9.747550258715059e-05, | |
| "loss": 2.6233, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14763921067348407, | |
| "grad_norm": 0.5915788412094116, | |
| "learning_rate": 9.744224379023768e-05, | |
| "loss": 2.6233, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.14827834145562035, | |
| "grad_norm": 0.6704515814781189, | |
| "learning_rate": 9.740877309136291e-05, | |
| "loss": 2.6432, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.14891747223775664, | |
| "grad_norm": 0.6156161427497864, | |
| "learning_rate": 9.737509064002402e-05, | |
| "loss": 2.6436, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.14955660301989296, | |
| "grad_norm": 0.49440738558769226, | |
| "learning_rate": 9.734119658666448e-05, | |
| "loss": 2.6488, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.15019573380202925, | |
| "grad_norm": 0.6561670899391174, | |
| "learning_rate": 9.730709108267296e-05, | |
| "loss": 2.6191, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.15083486458416553, | |
| "grad_norm": 0.6310847997665405, | |
| "learning_rate": 9.727277428038253e-05, | |
| "loss": 2.6055, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.15147399536630182, | |
| "grad_norm": 0.5141007304191589, | |
| "learning_rate": 9.723824633307001e-05, | |
| "loss": 2.626, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1521131261484381, | |
| "grad_norm": 0.5299694538116455, | |
| "learning_rate": 9.720350739495538e-05, | |
| "loss": 2.6401, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.15275225693057443, | |
| "grad_norm": 0.5702034831047058, | |
| "learning_rate": 9.716855762120097e-05, | |
| "loss": 2.6392, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.15339138771271071, | |
| "grad_norm": 0.5058117508888245, | |
| "learning_rate": 9.713339716791076e-05, | |
| "loss": 2.5778, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.154030518494847, | |
| "grad_norm": 0.6530377864837646, | |
| "learning_rate": 9.709802619212987e-05, | |
| "loss": 2.6359, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.1546696492769833, | |
| "grad_norm": 0.6136478781700134, | |
| "learning_rate": 9.706244485184357e-05, | |
| "loss": 2.6117, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.1553087800591196, | |
| "grad_norm": 0.5947436094284058, | |
| "learning_rate": 9.702665330597684e-05, | |
| "loss": 2.6148, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.1559479108412559, | |
| "grad_norm": 0.6332894563674927, | |
| "learning_rate": 9.699065171439349e-05, | |
| "loss": 2.6251, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.15658704162339218, | |
| "grad_norm": 0.5429502129554749, | |
| "learning_rate": 9.695444023789554e-05, | |
| "loss": 2.577, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15722617240552847, | |
| "grad_norm": 0.6252620220184326, | |
| "learning_rate": 9.691801903822244e-05, | |
| "loss": 2.6114, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.15786530318766478, | |
| "grad_norm": 0.5587325692176819, | |
| "learning_rate": 9.68813882780504e-05, | |
| "loss": 2.632, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.15850443396980107, | |
| "grad_norm": 0.5149174332618713, | |
| "learning_rate": 9.68445481209916e-05, | |
| "loss": 2.6394, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.15914356475193736, | |
| "grad_norm": 0.5343561172485352, | |
| "learning_rate": 9.680749873159354e-05, | |
| "loss": 2.572, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.15978269553407365, | |
| "grad_norm": 0.5082888603210449, | |
| "learning_rate": 9.677024027533821e-05, | |
| "loss": 2.5786, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16042182631620996, | |
| "grad_norm": 0.46739038825035095, | |
| "learning_rate": 9.673277291864145e-05, | |
| "loss": 2.5933, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.16106095709834625, | |
| "grad_norm": 0.5262092351913452, | |
| "learning_rate": 9.669509682885216e-05, | |
| "loss": 2.6295, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.16170008788048254, | |
| "grad_norm": 0.5002930760383606, | |
| "learning_rate": 9.66572121742515e-05, | |
| "loss": 2.6306, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.16233921866261883, | |
| "grad_norm": 0.4859941601753235, | |
| "learning_rate": 9.661911912405222e-05, | |
| "loss": 2.5742, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.16297834944475514, | |
| "grad_norm": 0.6142066717147827, | |
| "learning_rate": 9.65808178483979e-05, | |
| "loss": 2.61, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.16361748022689143, | |
| "grad_norm": 0.6018419861793518, | |
| "learning_rate": 9.654230851836214e-05, | |
| "loss": 2.6158, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.16425661100902772, | |
| "grad_norm": 0.5785476565361023, | |
| "learning_rate": 9.650359130594779e-05, | |
| "loss": 2.629, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.164895741791164, | |
| "grad_norm": 0.5036047697067261, | |
| "learning_rate": 9.646466638408629e-05, | |
| "loss": 2.6087, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.16553487257330032, | |
| "grad_norm": 0.5089232325553894, | |
| "learning_rate": 9.642553392663672e-05, | |
| "loss": 2.6299, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.1661740033554366, | |
| "grad_norm": 0.5314218997955322, | |
| "learning_rate": 9.63861941083852e-05, | |
| "loss": 2.6152, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1668131341375729, | |
| "grad_norm": 0.6545165181159973, | |
| "learning_rate": 9.634664710504402e-05, | |
| "loss": 2.5711, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.1674522649197092, | |
| "grad_norm": 0.7461646199226379, | |
| "learning_rate": 9.630689309325082e-05, | |
| "loss": 2.627, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.1680913957018455, | |
| "grad_norm": 0.6585918068885803, | |
| "learning_rate": 9.626693225056794e-05, | |
| "loss": 2.6231, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.1687305264839818, | |
| "grad_norm": 0.5888398289680481, | |
| "learning_rate": 9.62267647554814e-05, | |
| "loss": 2.6175, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.16936965726611808, | |
| "grad_norm": 0.49957162141799927, | |
| "learning_rate": 9.618639078740037e-05, | |
| "loss": 2.5771, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.17000878804825437, | |
| "grad_norm": 0.4573955535888672, | |
| "learning_rate": 9.614581052665616e-05, | |
| "loss": 2.5855, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.17064791883039068, | |
| "grad_norm": 0.5360051393508911, | |
| "learning_rate": 9.610502415450153e-05, | |
| "loss": 2.6107, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.17128704961252697, | |
| "grad_norm": 0.5413601994514465, | |
| "learning_rate": 9.606403185310981e-05, | |
| "loss": 2.5971, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.17192618039466326, | |
| "grad_norm": 0.5360136032104492, | |
| "learning_rate": 9.602283380557416e-05, | |
| "loss": 2.5878, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.17256531117679955, | |
| "grad_norm": 0.653225839138031, | |
| "learning_rate": 9.598143019590664e-05, | |
| "loss": 2.6, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.17320444195893583, | |
| "grad_norm": 0.5268750786781311, | |
| "learning_rate": 9.593982120903754e-05, | |
| "loss": 2.5992, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.17384357274107215, | |
| "grad_norm": 0.5311806797981262, | |
| "learning_rate": 9.589800703081442e-05, | |
| "loss": 2.5939, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.17448270352320844, | |
| "grad_norm": 0.47583094239234924, | |
| "learning_rate": 9.585598784800135e-05, | |
| "loss": 2.5863, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.17512183430534473, | |
| "grad_norm": 0.44130444526672363, | |
| "learning_rate": 9.581376384827804e-05, | |
| "loss": 2.5568, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.175760965087481, | |
| "grad_norm": 0.45064234733581543, | |
| "learning_rate": 9.577133522023906e-05, | |
| "loss": 2.5888, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17640009586961733, | |
| "grad_norm": 0.4643968343734741, | |
| "learning_rate": 9.572870215339294e-05, | |
| "loss": 2.6121, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.17703922665175362, | |
| "grad_norm": 0.446347713470459, | |
| "learning_rate": 9.568586483816129e-05, | |
| "loss": 2.614, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.1776783574338899, | |
| "grad_norm": 0.48379895091056824, | |
| "learning_rate": 9.564282346587809e-05, | |
| "loss": 2.6353, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.1783174882160262, | |
| "grad_norm": 0.45891985297203064, | |
| "learning_rate": 9.559957822878867e-05, | |
| "loss": 2.6111, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.1789566189981625, | |
| "grad_norm": 0.49106699228286743, | |
| "learning_rate": 9.555612932004896e-05, | |
| "loss": 2.5876, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1795957497802988, | |
| "grad_norm": 0.5220739245414734, | |
| "learning_rate": 9.55124769337246e-05, | |
| "loss": 2.5988, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.18023488056243508, | |
| "grad_norm": 0.6365030407905579, | |
| "learning_rate": 9.546862126479006e-05, | |
| "loss": 2.5763, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.18087401134457137, | |
| "grad_norm": 0.706681489944458, | |
| "learning_rate": 9.542456250912776e-05, | |
| "loss": 2.5965, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.1815131421267077, | |
| "grad_norm": 0.4519253373146057, | |
| "learning_rate": 9.538030086352725e-05, | |
| "loss": 2.568, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.18215227290884398, | |
| "grad_norm": 0.6023289561271667, | |
| "learning_rate": 9.533583652568426e-05, | |
| "loss": 2.6034, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.18279140369098026, | |
| "grad_norm": 0.581615686416626, | |
| "learning_rate": 9.529116969419986e-05, | |
| "loss": 2.5858, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.18343053447311655, | |
| "grad_norm": 0.49777430295944214, | |
| "learning_rate": 9.524630056857958e-05, | |
| "loss": 2.6062, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.18406966525525287, | |
| "grad_norm": 0.5936197638511658, | |
| "learning_rate": 9.520122934923246e-05, | |
| "loss": 2.5976, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.18470879603738916, | |
| "grad_norm": 0.5317326784133911, | |
| "learning_rate": 9.515595623747022e-05, | |
| "loss": 2.6004, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.18534792681952544, | |
| "grad_norm": 0.524297297000885, | |
| "learning_rate": 9.511048143550637e-05, | |
| "loss": 2.583, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18598705760166173, | |
| "grad_norm": 0.5107091665267944, | |
| "learning_rate": 9.506480514645523e-05, | |
| "loss": 2.5704, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.18662618838379805, | |
| "grad_norm": 0.4521612226963043, | |
| "learning_rate": 9.501892757433107e-05, | |
| "loss": 2.5903, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.18726531916593434, | |
| "grad_norm": 0.48701736330986023, | |
| "learning_rate": 9.497284892404721e-05, | |
| "loss": 2.5758, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.18790444994807062, | |
| "grad_norm": 0.613917887210846, | |
| "learning_rate": 9.492656940141512e-05, | |
| "loss": 2.5749, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.1885435807302069, | |
| "grad_norm": 0.5269163846969604, | |
| "learning_rate": 9.488008921314338e-05, | |
| "loss": 2.6126, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18918271151234323, | |
| "grad_norm": 0.6326431632041931, | |
| "learning_rate": 9.483340856683696e-05, | |
| "loss": 2.5863, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.18982184229447951, | |
| "grad_norm": 0.47863009572029114, | |
| "learning_rate": 9.47865276709961e-05, | |
| "loss": 2.6201, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.1904609730766158, | |
| "grad_norm": 0.5771295428276062, | |
| "learning_rate": 9.473944673501549e-05, | |
| "loss": 2.5914, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.1911001038587521, | |
| "grad_norm": 0.4584767818450928, | |
| "learning_rate": 9.469216596918331e-05, | |
| "loss": 2.5497, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.19173923464088838, | |
| "grad_norm": 0.4598289728164673, | |
| "learning_rate": 9.464468558468026e-05, | |
| "loss": 2.5841, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1923783654230247, | |
| "grad_norm": 0.516592800617218, | |
| "learning_rate": 9.459700579357869e-05, | |
| "loss": 2.6013, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.19301749620516098, | |
| "grad_norm": 0.5296542048454285, | |
| "learning_rate": 9.454912680884154e-05, | |
| "loss": 2.6085, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.19365662698729727, | |
| "grad_norm": 0.5447851419448853, | |
| "learning_rate": 9.45010488443215e-05, | |
| "loss": 2.5507, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.19429575776943356, | |
| "grad_norm": 0.49331796169281006, | |
| "learning_rate": 9.445277211476e-05, | |
| "loss": 2.5476, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.19493488855156987, | |
| "grad_norm": 0.4537939429283142, | |
| "learning_rate": 9.440429683578624e-05, | |
| "loss": 2.5977, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.19557401933370616, | |
| "grad_norm": 0.5129672884941101, | |
| "learning_rate": 9.435562322391627e-05, | |
| "loss": 2.5689, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.19621315011584245, | |
| "grad_norm": 0.5162326693534851, | |
| "learning_rate": 9.430675149655199e-05, | |
| "loss": 2.5981, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.19685228089797874, | |
| "grad_norm": 0.5716260075569153, | |
| "learning_rate": 9.425768187198016e-05, | |
| "loss": 2.547, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.19749141168011505, | |
| "grad_norm": 0.5598787069320679, | |
| "learning_rate": 9.420841456937151e-05, | |
| "loss": 2.5743, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.19813054246225134, | |
| "grad_norm": 0.5771391987800598, | |
| "learning_rate": 9.415894980877966e-05, | |
| "loss": 2.589, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19876967324438763, | |
| "grad_norm": 0.5378340482711792, | |
| "learning_rate": 9.410928781114019e-05, | |
| "loss": 2.5916, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.19940880402652392, | |
| "grad_norm": 0.5003606081008911, | |
| "learning_rate": 9.405942879826967e-05, | |
| "loss": 2.5535, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.20004793480866023, | |
| "grad_norm": 0.5581315755844116, | |
| "learning_rate": 9.400937299286458e-05, | |
| "loss": 2.6016, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.20068706559079652, | |
| "grad_norm": 0.5600181818008423, | |
| "learning_rate": 9.395912061850046e-05, | |
| "loss": 2.5622, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.2013261963729328, | |
| "grad_norm": 0.5221248269081116, | |
| "learning_rate": 9.390867189963075e-05, | |
| "loss": 2.5584, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2019653271550691, | |
| "grad_norm": 0.4963245391845703, | |
| "learning_rate": 9.385802706158594e-05, | |
| "loss": 2.54, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.2026044579372054, | |
| "grad_norm": 0.4757302403450012, | |
| "learning_rate": 9.380718633057246e-05, | |
| "loss": 2.5856, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.2032435887193417, | |
| "grad_norm": 0.4876170754432678, | |
| "learning_rate": 9.37561499336717e-05, | |
| "loss": 2.5912, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.203882719501478, | |
| "grad_norm": 0.4831182360649109, | |
| "learning_rate": 9.370491809883895e-05, | |
| "loss": 2.5395, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.20452185028361428, | |
| "grad_norm": 0.5880109071731567, | |
| "learning_rate": 9.365349105490253e-05, | |
| "loss": 2.5579, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2051609810657506, | |
| "grad_norm": 0.497311532497406, | |
| "learning_rate": 9.360186903156259e-05, | |
| "loss": 2.5629, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.20580011184788688, | |
| "grad_norm": 0.5942720174789429, | |
| "learning_rate": 9.355005225939017e-05, | |
| "loss": 2.5816, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.20643924263002317, | |
| "grad_norm": 0.5332151651382446, | |
| "learning_rate": 9.34980409698262e-05, | |
| "loss": 2.5603, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.20707837341215946, | |
| "grad_norm": 0.4901409149169922, | |
| "learning_rate": 9.344583539518036e-05, | |
| "loss": 2.569, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.20771750419429577, | |
| "grad_norm": 0.521522581577301, | |
| "learning_rate": 9.339343576863018e-05, | |
| "loss": 2.6077, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.20835663497643206, | |
| "grad_norm": 0.49068787693977356, | |
| "learning_rate": 9.334084232421988e-05, | |
| "loss": 2.5729, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.20899576575856835, | |
| "grad_norm": 0.48800089955329895, | |
| "learning_rate": 9.32880552968594e-05, | |
| "loss": 2.5814, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.20963489654070463, | |
| "grad_norm": 0.5036289691925049, | |
| "learning_rate": 9.323507492232328e-05, | |
| "loss": 2.5795, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.21027402732284092, | |
| "grad_norm": 0.4648139476776123, | |
| "learning_rate": 9.318190143724972e-05, | |
| "loss": 2.572, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.21091315810497724, | |
| "grad_norm": 0.42503541707992554, | |
| "learning_rate": 9.312853507913938e-05, | |
| "loss": 2.5765, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.21155228888711353, | |
| "grad_norm": 0.483327180147171, | |
| "learning_rate": 9.307497608635447e-05, | |
| "loss": 2.5965, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.21219141966924981, | |
| "grad_norm": 0.49550801515579224, | |
| "learning_rate": 9.302122469811752e-05, | |
| "loss": 2.5412, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2128305504513861, | |
| "grad_norm": 0.457082599401474, | |
| "learning_rate": 9.296728115451046e-05, | |
| "loss": 2.5945, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.21346968123352242, | |
| "grad_norm": 0.5289996862411499, | |
| "learning_rate": 9.291314569647346e-05, | |
| "loss": 2.5364, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.2141088120156587, | |
| "grad_norm": 0.5246165990829468, | |
| "learning_rate": 9.285881856580392e-05, | |
| "loss": 2.5313, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.214747942797795, | |
| "grad_norm": 0.5950086712837219, | |
| "learning_rate": 9.280430000515528e-05, | |
| "loss": 2.5621, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.21538707357993128, | |
| "grad_norm": 0.49669399857521057, | |
| "learning_rate": 9.274959025803604e-05, | |
| "loss": 2.5515, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.2160262043620676, | |
| "grad_norm": 0.5234604477882385, | |
| "learning_rate": 9.269468956880871e-05, | |
| "loss": 2.5432, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.21666533514420389, | |
| "grad_norm": 0.5024713277816772, | |
| "learning_rate": 9.263959818268853e-05, | |
| "loss": 2.5893, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.21730446592634017, | |
| "grad_norm": 0.4908897876739502, | |
| "learning_rate": 9.258431634574256e-05, | |
| "loss": 2.6035, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.21794359670847646, | |
| "grad_norm": 0.47038817405700684, | |
| "learning_rate": 9.252884430488849e-05, | |
| "loss": 2.5652, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.21858272749061278, | |
| "grad_norm": 0.47875434160232544, | |
| "learning_rate": 9.247318230789359e-05, | |
| "loss": 2.5902, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.21922185827274906, | |
| "grad_norm": 0.4665825366973877, | |
| "learning_rate": 9.241733060337354e-05, | |
| "loss": 2.5292, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.21986098905488535, | |
| "grad_norm": 0.4810079336166382, | |
| "learning_rate": 9.236128944079138e-05, | |
| "loss": 2.5792, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.22050011983702164, | |
| "grad_norm": 0.45069095492362976, | |
| "learning_rate": 9.230505907045635e-05, | |
| "loss": 2.5316, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.22113925061915796, | |
| "grad_norm": 0.40244781970977783, | |
| "learning_rate": 9.224863974352278e-05, | |
| "loss": 2.5563, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.22177838140129424, | |
| "grad_norm": 0.5229255557060242, | |
| "learning_rate": 9.219203171198902e-05, | |
| "loss": 2.5402, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.22241751218343053, | |
| "grad_norm": 0.5138113498687744, | |
| "learning_rate": 9.213523522869625e-05, | |
| "loss": 2.5914, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.22305664296556682, | |
| "grad_norm": 0.47990405559539795, | |
| "learning_rate": 9.207825054732736e-05, | |
| "loss": 2.5525, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.22369577374770314, | |
| "grad_norm": 0.48454561829566956, | |
| "learning_rate": 9.202107792240587e-05, | |
| "loss": 2.5379, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.22433490452983942, | |
| "grad_norm": 0.49185454845428467, | |
| "learning_rate": 9.19637176092947e-05, | |
| "loss": 2.5462, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.2249740353119757, | |
| "grad_norm": 0.4852677583694458, | |
| "learning_rate": 9.190616986419512e-05, | |
| "loss": 2.5222, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.225613166094112, | |
| "grad_norm": 0.503039538860321, | |
| "learning_rate": 9.18484349441456e-05, | |
| "loss": 2.5714, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.22625229687624832, | |
| "grad_norm": 0.4584214389324188, | |
| "learning_rate": 9.179051310702056e-05, | |
| "loss": 2.5694, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2268914276583846, | |
| "grad_norm": 0.46065405011177063, | |
| "learning_rate": 9.173240461152935e-05, | |
| "loss": 2.5804, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2275305584405209, | |
| "grad_norm": 0.48372742533683777, | |
| "learning_rate": 9.1674109717215e-05, | |
| "loss": 2.5489, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.22816968922265718, | |
| "grad_norm": 0.43927186727523804, | |
| "learning_rate": 9.16156286844531e-05, | |
| "loss": 2.541, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.22880882000479347, | |
| "grad_norm": 0.46032947301864624, | |
| "learning_rate": 9.155696177445064e-05, | |
| "loss": 2.5597, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.22944795078692978, | |
| "grad_norm": 0.4477051794528961, | |
| "learning_rate": 9.149810924924482e-05, | |
| "loss": 2.551, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.23008708156906607, | |
| "grad_norm": 0.4732860326766968, | |
| "learning_rate": 9.143907137170194e-05, | |
| "loss": 2.5688, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.23072621235120236, | |
| "grad_norm": 0.520808219909668, | |
| "learning_rate": 9.137984840551612e-05, | |
| "loss": 2.5429, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.23136534313333865, | |
| "grad_norm": 0.448128879070282, | |
| "learning_rate": 9.132044061520823e-05, | |
| "loss": 2.5146, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.23200447391547496, | |
| "grad_norm": 0.520537257194519, | |
| "learning_rate": 9.126084826612464e-05, | |
| "loss": 2.5718, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.23264360469761125, | |
| "grad_norm": 0.5061787962913513, | |
| "learning_rate": 9.120107162443605e-05, | |
| "loss": 2.5341, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.23328273547974754, | |
| "grad_norm": 0.4683222770690918, | |
| "learning_rate": 9.114111095713633e-05, | |
| "loss": 2.5351, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.23392186626188383, | |
| "grad_norm": 0.4754564166069031, | |
| "learning_rate": 9.108096653204125e-05, | |
| "loss": 2.5798, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.23456099704402014, | |
| "grad_norm": 0.5304054021835327, | |
| "learning_rate": 9.102063861778744e-05, | |
| "loss": 2.5812, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.23520012782615643, | |
| "grad_norm": 0.4747471809387207, | |
| "learning_rate": 9.0960127483831e-05, | |
| "loss": 2.5847, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.23583925860829272, | |
| "grad_norm": 0.4957279562950134, | |
| "learning_rate": 9.089943340044642e-05, | |
| "loss": 2.5689, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.236478389390429, | |
| "grad_norm": 0.5040017366409302, | |
| "learning_rate": 9.083855663872533e-05, | |
| "loss": 2.5345, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.23711752017256532, | |
| "grad_norm": 0.5398538708686829, | |
| "learning_rate": 9.07774974705753e-05, | |
| "loss": 2.5517, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2377566509547016, | |
| "grad_norm": 0.5123056173324585, | |
| "learning_rate": 9.071625616871862e-05, | |
| "loss": 2.5746, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.2383957817368379, | |
| "grad_norm": 0.4740076959133148, | |
| "learning_rate": 9.06548330066911e-05, | |
| "loss": 2.5449, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.23903491251897419, | |
| "grad_norm": 0.4199361801147461, | |
| "learning_rate": 9.05932282588408e-05, | |
| "loss": 2.5857, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.2396740433011105, | |
| "grad_norm": 0.4691718816757202, | |
| "learning_rate": 9.053144220032688e-05, | |
| "loss": 2.5408, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2403131740832468, | |
| "grad_norm": 0.4801616668701172, | |
| "learning_rate": 9.04694751071183e-05, | |
| "loss": 2.6167, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.24095230486538308, | |
| "grad_norm": 0.5200051069259644, | |
| "learning_rate": 9.040732725599261e-05, | |
| "loss": 2.5032, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.24159143564751936, | |
| "grad_norm": 0.5068468451499939, | |
| "learning_rate": 9.034499892453477e-05, | |
| "loss": 2.5041, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.24223056642965568, | |
| "grad_norm": 0.5166811347007751, | |
| "learning_rate": 9.028249039113577e-05, | |
| "loss": 2.6254, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.24286969721179197, | |
| "grad_norm": 0.5714825987815857, | |
| "learning_rate": 9.021980193499157e-05, | |
| "loss": 2.5375, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.24350882799392826, | |
| "grad_norm": 0.4392567574977875, | |
| "learning_rate": 9.015693383610169e-05, | |
| "loss": 2.5482, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.24414795877606454, | |
| "grad_norm": 0.44030579924583435, | |
| "learning_rate": 9.009388637526808e-05, | |
| "loss": 2.5577, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.24478708955820086, | |
| "grad_norm": 0.49010273814201355, | |
| "learning_rate": 9.00306598340938e-05, | |
| "loss": 2.5707, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.24542622034033715, | |
| "grad_norm": 0.560543417930603, | |
| "learning_rate": 8.996725449498173e-05, | |
| "loss": 2.5574, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.24606535112247344, | |
| "grad_norm": 0.5686501264572144, | |
| "learning_rate": 8.990367064113343e-05, | |
| "loss": 2.5459, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.24670448190460972, | |
| "grad_norm": 0.5197829008102417, | |
| "learning_rate": 8.983990855654774e-05, | |
| "loss": 2.5316, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.247343612686746, | |
| "grad_norm": 0.48393699526786804, | |
| "learning_rate": 8.977596852601961e-05, | |
| "loss": 2.5376, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.24798274346888233, | |
| "grad_norm": 0.4604134261608124, | |
| "learning_rate": 8.971185083513878e-05, | |
| "loss": 2.5373, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.24862187425101862, | |
| "grad_norm": 0.5080364346504211, | |
| "learning_rate": 8.964755577028852e-05, | |
| "loss": 2.516, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.2492610050331549, | |
| "grad_norm": 0.5315148830413818, | |
| "learning_rate": 8.958308361864429e-05, | |
| "loss": 2.5182, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2499001358152912, | |
| "grad_norm": 0.4669964015483856, | |
| "learning_rate": 8.951843466817261e-05, | |
| "loss": 2.506, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2505392665974275, | |
| "grad_norm": 0.5169178247451782, | |
| "learning_rate": 8.94536092076296e-05, | |
| "loss": 2.5524, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.25117839737956377, | |
| "grad_norm": 0.530693769454956, | |
| "learning_rate": 8.93886075265598e-05, | |
| "loss": 2.5515, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.2518175281617001, | |
| "grad_norm": 0.5086248517036438, | |
| "learning_rate": 8.932342991529484e-05, | |
| "loss": 2.5235, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.2524566589438364, | |
| "grad_norm": 0.5186027884483337, | |
| "learning_rate": 8.925807666495212e-05, | |
| "loss": 2.5616, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2530957897259727, | |
| "grad_norm": 0.5286267995834351, | |
| "learning_rate": 8.919254806743358e-05, | |
| "loss": 2.558, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.253734920508109, | |
| "grad_norm": 0.56434166431427, | |
| "learning_rate": 8.912684441542432e-05, | |
| "loss": 2.5315, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.25437405129024526, | |
| "grad_norm": 0.5112208127975464, | |
| "learning_rate": 8.906096600239135e-05, | |
| "loss": 2.5842, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.25501318207238155, | |
| "grad_norm": 0.5397393703460693, | |
| "learning_rate": 8.899491312258221e-05, | |
| "loss": 2.5405, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.25565231285451784, | |
| "grad_norm": 0.4671647250652313, | |
| "learning_rate": 8.892868607102376e-05, | |
| "loss": 2.4999, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2562914436366541, | |
| "grad_norm": 0.41425585746765137, | |
| "learning_rate": 8.886228514352076e-05, | |
| "loss": 2.5312, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.25693057441879047, | |
| "grad_norm": 0.43078532814979553, | |
| "learning_rate": 8.879571063665462e-05, | |
| "loss": 2.5218, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.25756970520092676, | |
| "grad_norm": 0.432005912065506, | |
| "learning_rate": 8.872896284778201e-05, | |
| "loss": 2.523, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.25820883598306305, | |
| "grad_norm": 0.40941286087036133, | |
| "learning_rate": 8.866204207503359e-05, | |
| "loss": 2.575, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.25884796676519933, | |
| "grad_norm": 0.431316077709198, | |
| "learning_rate": 8.859494861731267e-05, | |
| "loss": 2.5837, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2594870975473356, | |
| "grad_norm": 0.4376726448535919, | |
| "learning_rate": 8.852768277429384e-05, | |
| "loss": 2.5137, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.2601262283294719, | |
| "grad_norm": 0.5029991865158081, | |
| "learning_rate": 8.846024484642166e-05, | |
| "loss": 2.5526, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2607653591116082, | |
| "grad_norm": 0.5601023435592651, | |
| "learning_rate": 8.839263513490931e-05, | |
| "loss": 2.5788, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.2614044898937445, | |
| "grad_norm": 0.5238969922065735, | |
| "learning_rate": 8.832485394173726e-05, | |
| "loss": 2.5589, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.26204362067588083, | |
| "grad_norm": 0.4996497929096222, | |
| "learning_rate": 8.825690156965188e-05, | |
| "loss": 2.57, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2626827514580171, | |
| "grad_norm": 0.47495660185813904, | |
| "learning_rate": 8.818877832216413e-05, | |
| "loss": 2.5341, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.2633218822401534, | |
| "grad_norm": 0.503065288066864, | |
| "learning_rate": 8.812048450354819e-05, | |
| "loss": 2.5416, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.2639610130222897, | |
| "grad_norm": 0.49365946650505066, | |
| "learning_rate": 8.805202041884012e-05, | |
| "loss": 2.516, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.264600143804426, | |
| "grad_norm": 0.4978492558002472, | |
| "learning_rate": 8.798338637383645e-05, | |
| "loss": 2.52, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.26523927458656227, | |
| "grad_norm": 0.4817914664745331, | |
| "learning_rate": 8.791458267509283e-05, | |
| "loss": 2.5118, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.26587840536869856, | |
| "grad_norm": 0.5463185906410217, | |
| "learning_rate": 8.78456096299227e-05, | |
| "loss": 2.5527, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.26651753615083484, | |
| "grad_norm": 0.4575677812099457, | |
| "learning_rate": 8.77764675463959e-05, | |
| "loss": 2.5076, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.26715666693297113, | |
| "grad_norm": 0.47226086258888245, | |
| "learning_rate": 8.770715673333722e-05, | |
| "loss": 2.5357, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.2677957977151075, | |
| "grad_norm": 0.44436129927635193, | |
| "learning_rate": 8.763767750032518e-05, | |
| "loss": 2.5354, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.26843492849724376, | |
| "grad_norm": 0.48564496636390686, | |
| "learning_rate": 8.756803015769049e-05, | |
| "loss": 2.5479, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.26907405927938005, | |
| "grad_norm": 0.47404852509498596, | |
| "learning_rate": 8.749821501651472e-05, | |
| "loss": 2.5175, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.26971319006151634, | |
| "grad_norm": 0.4444579482078552, | |
| "learning_rate": 8.742823238862895e-05, | |
| "loss": 2.5066, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.2703523208436526, | |
| "grad_norm": 0.48305433988571167, | |
| "learning_rate": 8.735808258661233e-05, | |
| "loss": 2.5314, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2709914516257889, | |
| "grad_norm": 0.5266690254211426, | |
| "learning_rate": 8.728776592379068e-05, | |
| "loss": 2.5734, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.2716305824079252, | |
| "grad_norm": 0.4550389051437378, | |
| "learning_rate": 8.721728271423512e-05, | |
| "loss": 2.556, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2722697131900615, | |
| "grad_norm": 0.47347402572631836, | |
| "learning_rate": 8.71466332727607e-05, | |
| "loss": 2.5649, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.27290884397219783, | |
| "grad_norm": 0.5236901640892029, | |
| "learning_rate": 8.707581791492485e-05, | |
| "loss": 2.564, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.2735479747543341, | |
| "grad_norm": 0.5352922677993774, | |
| "learning_rate": 8.700483695702617e-05, | |
| "loss": 2.4933, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.2741871055364704, | |
| "grad_norm": 0.5335637331008911, | |
| "learning_rate": 8.693369071610287e-05, | |
| "loss": 2.4958, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.2748262363186067, | |
| "grad_norm": 0.4980125427246094, | |
| "learning_rate": 8.686237950993137e-05, | |
| "loss": 2.5519, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.275465367100743, | |
| "grad_norm": 0.4874439239501953, | |
| "learning_rate": 8.679090365702498e-05, | |
| "loss": 2.5326, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.2761044978828793, | |
| "grad_norm": 0.5071477293968201, | |
| "learning_rate": 8.671926347663238e-05, | |
| "loss": 2.5092, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.27674362866501556, | |
| "grad_norm": 0.4566083252429962, | |
| "learning_rate": 8.664745928873619e-05, | |
| "loss": 2.5108, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.27738275944715185, | |
| "grad_norm": 0.4723774492740631, | |
| "learning_rate": 8.657549141405161e-05, | |
| "loss": 2.4921, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.2780218902292882, | |
| "grad_norm": 0.44547411799430847, | |
| "learning_rate": 8.650336017402494e-05, | |
| "loss": 2.5481, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2786610210114245, | |
| "grad_norm": 0.4709297716617584, | |
| "learning_rate": 8.643106589083216e-05, | |
| "loss": 2.501, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.27930015179356077, | |
| "grad_norm": 0.4446027874946594, | |
| "learning_rate": 8.63586088873775e-05, | |
| "loss": 2.5133, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.27993928257569706, | |
| "grad_norm": 0.4283333420753479, | |
| "learning_rate": 8.628598948729197e-05, | |
| "loss": 2.5338, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.28057841335783335, | |
| "grad_norm": 0.4817812442779541, | |
| "learning_rate": 8.621320801493188e-05, | |
| "loss": 2.5519, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.28121754413996963, | |
| "grad_norm": 0.49330389499664307, | |
| "learning_rate": 8.614026479537753e-05, | |
| "loss": 2.5047, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2818566749221059, | |
| "grad_norm": 0.43356356024742126, | |
| "learning_rate": 8.606716015443161e-05, | |
| "loss": 2.4994, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.2824958057042422, | |
| "grad_norm": 0.4848228991031647, | |
| "learning_rate": 8.599389441861782e-05, | |
| "loss": 2.5186, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.28313493648637855, | |
| "grad_norm": 0.5413882732391357, | |
| "learning_rate": 8.59204679151794e-05, | |
| "loss": 2.5508, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.28377406726851484, | |
| "grad_norm": 0.46791547536849976, | |
| "learning_rate": 8.584688097207764e-05, | |
| "loss": 2.5728, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.28441319805065113, | |
| "grad_norm": 0.4600776731967926, | |
| "learning_rate": 8.577313391799046e-05, | |
| "loss": 2.5341, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2850523288327874, | |
| "grad_norm": 0.3964655101299286, | |
| "learning_rate": 8.569922708231089e-05, | |
| "loss": 2.553, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.2856914596149237, | |
| "grad_norm": 0.41062116622924805, | |
| "learning_rate": 8.562516079514569e-05, | |
| "loss": 2.5726, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.28633059039706, | |
| "grad_norm": 0.43652409315109253, | |
| "learning_rate": 8.555093538731374e-05, | |
| "loss": 2.5313, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.2869697211791963, | |
| "grad_norm": 0.4312250316143036, | |
| "learning_rate": 8.547655119034467e-05, | |
| "loss": 2.4911, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.28760885196133257, | |
| "grad_norm": 0.4835914373397827, | |
| "learning_rate": 8.540200853647737e-05, | |
| "loss": 2.5262, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3130, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0132651008589824e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |