| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8628265558839978, | |
| "eval_steps": 500, | |
| "global_step": 2700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00031956539106814733, | |
| "grad_norm": 5.807275295257568, | |
| "learning_rate": 0.0, | |
| "loss": 5.0454, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0006391307821362947, | |
| "grad_norm": 6.20149564743042, | |
| "learning_rate": 6.369426751592357e-07, | |
| "loss": 5.1424, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0012782615642725893, | |
| "grad_norm": 5.7567291259765625, | |
| "learning_rate": 1.910828025477707e-06, | |
| "loss": 5.0835, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.001917392346408884, | |
| "grad_norm": 5.017141819000244, | |
| "learning_rate": 3.1847133757961785e-06, | |
| "loss": 5.0733, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0025565231285451786, | |
| "grad_norm": 3.2059810161590576, | |
| "learning_rate": 4.45859872611465e-06, | |
| "loss": 5.0714, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.003195653910681473, | |
| "grad_norm": 6.303244113922119, | |
| "learning_rate": 5.732484076433121e-06, | |
| "loss": 4.9915, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003834784692817768, | |
| "grad_norm": 4.852840423583984, | |
| "learning_rate": 7.006369426751593e-06, | |
| "loss": 4.9307, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.004473915474954062, | |
| "grad_norm": 3.78067946434021, | |
| "learning_rate": 8.280254777070064e-06, | |
| "loss": 4.8924, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.005113046257090357, | |
| "grad_norm": 3.5641331672668457, | |
| "learning_rate": 9.554140127388536e-06, | |
| "loss": 4.8244, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.005752177039226652, | |
| "grad_norm": 2.191957712173462, | |
| "learning_rate": 1.0828025477707008e-05, | |
| "loss": 4.6179, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.006391307821362946, | |
| "grad_norm": 2.0675458908081055, | |
| "learning_rate": 1.2101910828025478e-05, | |
| "loss": 4.5827, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007030438603499241, | |
| "grad_norm": 1.6559146642684937, | |
| "learning_rate": 1.337579617834395e-05, | |
| "loss": 4.4544, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.007669569385635536, | |
| "grad_norm": 1.3731284141540527, | |
| "learning_rate": 1.464968152866242e-05, | |
| "loss": 4.3748, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.00830870016777183, | |
| "grad_norm": 1.3962030410766602, | |
| "learning_rate": 1.592356687898089e-05, | |
| "loss": 4.3269, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.008947830949908125, | |
| "grad_norm": 1.2659896612167358, | |
| "learning_rate": 1.7197452229299362e-05, | |
| "loss": 4.2133, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.00958696173204442, | |
| "grad_norm": 0.9881806373596191, | |
| "learning_rate": 1.8471337579617834e-05, | |
| "loss": 4.0961, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.010226092514180714, | |
| "grad_norm": 0.9945515394210815, | |
| "learning_rate": 1.974522292993631e-05, | |
| "loss": 4.0158, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.01086522329631701, | |
| "grad_norm": 0.9396588802337646, | |
| "learning_rate": 2.1019108280254778e-05, | |
| "loss": 3.8763, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.011504354078453304, | |
| "grad_norm": 1.0665779113769531, | |
| "learning_rate": 2.229299363057325e-05, | |
| "loss": 3.8635, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.012143484860589597, | |
| "grad_norm": 1.077245831489563, | |
| "learning_rate": 2.356687898089172e-05, | |
| "loss": 3.802, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.012782615642725892, | |
| "grad_norm": 0.8040191531181335, | |
| "learning_rate": 2.4840764331210193e-05, | |
| "loss": 3.7284, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.013421746424862187, | |
| "grad_norm": 1.4325759410858154, | |
| "learning_rate": 2.6114649681528662e-05, | |
| "loss": 3.665, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.014060877206998482, | |
| "grad_norm": 1.3450332880020142, | |
| "learning_rate": 2.7388535031847134e-05, | |
| "loss": 3.6242, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.014700007989134777, | |
| "grad_norm": 0.8203895688056946, | |
| "learning_rate": 2.8662420382165606e-05, | |
| "loss": 3.5576, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.015339138771271072, | |
| "grad_norm": 1.1661335229873657, | |
| "learning_rate": 2.9936305732484078e-05, | |
| "loss": 3.522, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.015978269553407365, | |
| "grad_norm": 1.0148671865463257, | |
| "learning_rate": 3.121019108280255e-05, | |
| "loss": 3.4594, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01661740033554366, | |
| "grad_norm": 0.6624857187271118, | |
| "learning_rate": 3.248407643312102e-05, | |
| "loss": 3.465, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.017256531117679955, | |
| "grad_norm": 0.943125307559967, | |
| "learning_rate": 3.375796178343949e-05, | |
| "loss": 3.4021, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.01789566189981625, | |
| "grad_norm": 0.9854550957679749, | |
| "learning_rate": 3.503184713375796e-05, | |
| "loss": 3.3361, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.018534792681952544, | |
| "grad_norm": 1.2242411375045776, | |
| "learning_rate": 3.630573248407643e-05, | |
| "loss": 3.3283, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01917392346408884, | |
| "grad_norm": 0.9556372761726379, | |
| "learning_rate": 3.7579617834394906e-05, | |
| "loss": 3.2914, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.019813054246225134, | |
| "grad_norm": 1.3133809566497803, | |
| "learning_rate": 3.885350318471338e-05, | |
| "loss": 3.3126, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.02045218502836143, | |
| "grad_norm": 0.9322234392166138, | |
| "learning_rate": 4.012738853503185e-05, | |
| "loss": 3.2443, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.021091315810497724, | |
| "grad_norm": 1.4383481740951538, | |
| "learning_rate": 4.1401273885350325e-05, | |
| "loss": 3.2428, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.02173044659263402, | |
| "grad_norm": 1.0156841278076172, | |
| "learning_rate": 4.267515923566879e-05, | |
| "loss": 3.1735, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.022369577374770314, | |
| "grad_norm": 1.1754450798034668, | |
| "learning_rate": 4.394904458598726e-05, | |
| "loss": 3.1788, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02300870815690661, | |
| "grad_norm": 1.0960084199905396, | |
| "learning_rate": 4.522292993630574e-05, | |
| "loss": 3.1963, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.023647838939042903, | |
| "grad_norm": 1.054401159286499, | |
| "learning_rate": 4.6496815286624206e-05, | |
| "loss": 3.1604, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.024286969721179195, | |
| "grad_norm": 1.1957581043243408, | |
| "learning_rate": 4.777070063694268e-05, | |
| "loss": 3.1648, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.02492610050331549, | |
| "grad_norm": 0.7756203413009644, | |
| "learning_rate": 4.904458598726115e-05, | |
| "loss": 3.1066, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.025565231285451784, | |
| "grad_norm": 1.0459190607070923, | |
| "learning_rate": 5.031847133757962e-05, | |
| "loss": 3.1571, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02620436206758808, | |
| "grad_norm": 0.9746761322021484, | |
| "learning_rate": 5.159235668789809e-05, | |
| "loss": 3.1026, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.026843492849724374, | |
| "grad_norm": 1.0770882368087769, | |
| "learning_rate": 5.286624203821656e-05, | |
| "loss": 3.1125, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02748262363186067, | |
| "grad_norm": 0.9542138576507568, | |
| "learning_rate": 5.414012738853504e-05, | |
| "loss": 3.059, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.028121754413996964, | |
| "grad_norm": 1.3454134464263916, | |
| "learning_rate": 5.5414012738853505e-05, | |
| "loss": 3.0645, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02876088519613326, | |
| "grad_norm": 1.0354089736938477, | |
| "learning_rate": 5.6687898089171974e-05, | |
| "loss": 3.04, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.029400015978269554, | |
| "grad_norm": 1.1339548826217651, | |
| "learning_rate": 5.796178343949045e-05, | |
| "loss": 3.0625, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.03003914676040585, | |
| "grad_norm": 1.200062870979309, | |
| "learning_rate": 5.923566878980892e-05, | |
| "loss": 3.057, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.030678277542542143, | |
| "grad_norm": 1.395698070526123, | |
| "learning_rate": 6.0509554140127386e-05, | |
| "loss": 3.0341, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.031317408324678435, | |
| "grad_norm": 0.9392653703689575, | |
| "learning_rate": 6.178343949044585e-05, | |
| "loss": 3.0087, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.03195653910681473, | |
| "grad_norm": 1.1301568746566772, | |
| "learning_rate": 6.305732484076433e-05, | |
| "loss": 3.0294, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.032595669888951025, | |
| "grad_norm": 0.9571443796157837, | |
| "learning_rate": 6.43312101910828e-05, | |
| "loss": 3.0522, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.03323480067108732, | |
| "grad_norm": 0.9494081735610962, | |
| "learning_rate": 6.560509554140127e-05, | |
| "loss": 3.0012, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.033873931453223614, | |
| "grad_norm": 1.3672889471054077, | |
| "learning_rate": 6.687898089171974e-05, | |
| "loss": 3.0188, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.03451306223535991, | |
| "grad_norm": 1.2122056484222412, | |
| "learning_rate": 6.815286624203822e-05, | |
| "loss": 2.9497, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.035152193017496204, | |
| "grad_norm": 1.2184698581695557, | |
| "learning_rate": 6.942675159235669e-05, | |
| "loss": 2.9739, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0357913237996325, | |
| "grad_norm": 1.09404456615448, | |
| "learning_rate": 7.070063694267515e-05, | |
| "loss": 3.0241, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.036430454581768794, | |
| "grad_norm": 1.1653715372085571, | |
| "learning_rate": 7.197452229299363e-05, | |
| "loss": 2.9606, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03706958536390509, | |
| "grad_norm": 1.050194501876831, | |
| "learning_rate": 7.32484076433121e-05, | |
| "loss": 2.9582, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.037708716146041384, | |
| "grad_norm": 1.1262322664260864, | |
| "learning_rate": 7.452229299363057e-05, | |
| "loss": 2.9462, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03834784692817768, | |
| "grad_norm": 1.1232227087020874, | |
| "learning_rate": 7.579617834394906e-05, | |
| "loss": 2.9784, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03898697771031397, | |
| "grad_norm": 0.9088072776794434, | |
| "learning_rate": 7.707006369426753e-05, | |
| "loss": 2.944, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.03962610849245027, | |
| "grad_norm": 0.8985419869422913, | |
| "learning_rate": 7.834394904458599e-05, | |
| "loss": 2.9003, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.04026523927458656, | |
| "grad_norm": 1.2419854402542114, | |
| "learning_rate": 7.961783439490447e-05, | |
| "loss": 2.9753, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.04090437005672286, | |
| "grad_norm": 1.4533154964447021, | |
| "learning_rate": 8.089171974522294e-05, | |
| "loss": 2.9069, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.04154350083885915, | |
| "grad_norm": 1.475258231163025, | |
| "learning_rate": 8.21656050955414e-05, | |
| "loss": 2.9402, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04218263162099545, | |
| "grad_norm": 1.0348827838897705, | |
| "learning_rate": 8.343949044585988e-05, | |
| "loss": 2.9295, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.04282176240313174, | |
| "grad_norm": 0.9143719673156738, | |
| "learning_rate": 8.471337579617836e-05, | |
| "loss": 2.9408, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.04346089318526804, | |
| "grad_norm": 1.1310492753982544, | |
| "learning_rate": 8.598726114649682e-05, | |
| "loss": 2.875, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.04410002396740433, | |
| "grad_norm": 1.0483386516571045, | |
| "learning_rate": 8.726114649681529e-05, | |
| "loss": 2.9142, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.04473915474954063, | |
| "grad_norm": 0.921519935131073, | |
| "learning_rate": 8.853503184713377e-05, | |
| "loss": 2.9188, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04537828553167692, | |
| "grad_norm": 1.3271907567977905, | |
| "learning_rate": 8.980891719745223e-05, | |
| "loss": 2.9075, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.04601741631381322, | |
| "grad_norm": 1.7488983869552612, | |
| "learning_rate": 9.10828025477707e-05, | |
| "loss": 2.9201, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.04665654709594951, | |
| "grad_norm": 1.4263213872909546, | |
| "learning_rate": 9.235668789808918e-05, | |
| "loss": 2.9045, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.04729567787808581, | |
| "grad_norm": 0.8777288794517517, | |
| "learning_rate": 9.363057324840766e-05, | |
| "loss": 2.8959, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.047934808660222095, | |
| "grad_norm": 1.3402196168899536, | |
| "learning_rate": 9.490445859872612e-05, | |
| "loss": 2.8893, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04857393944235839, | |
| "grad_norm": 1.0943351984024048, | |
| "learning_rate": 9.617834394904459e-05, | |
| "loss": 2.9137, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.049213070224494684, | |
| "grad_norm": 1.0603907108306885, | |
| "learning_rate": 9.745222929936307e-05, | |
| "loss": 2.8677, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04985220100663098, | |
| "grad_norm": 1.010772705078125, | |
| "learning_rate": 9.872611464968153e-05, | |
| "loss": 2.8374, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.050491331788767274, | |
| "grad_norm": 1.2628934383392334, | |
| "learning_rate": 0.0001, | |
| "loss": 2.9009, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.05113046257090357, | |
| "grad_norm": 1.146183729171753, | |
| "learning_rate": 9.999988833687822e-05, | |
| "loss": 2.8633, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.051769593353039864, | |
| "grad_norm": 0.8704808354377747, | |
| "learning_rate": 9.99995533480116e-05, | |
| "loss": 2.8464, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.05240872413517616, | |
| "grad_norm": 1.044418454170227, | |
| "learning_rate": 9.999899503489641e-05, | |
| "loss": 2.8695, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.053047854917312454, | |
| "grad_norm": 0.833791196346283, | |
| "learning_rate": 9.999821340002636e-05, | |
| "loss": 2.8605, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.05368698569944875, | |
| "grad_norm": 0.922815203666687, | |
| "learning_rate": 9.99972084468926e-05, | |
| "loss": 2.8737, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.05432611648158504, | |
| "grad_norm": 0.9120809435844421, | |
| "learning_rate": 9.999598017998384e-05, | |
| "loss": 2.8753, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05496524726372134, | |
| "grad_norm": 1.0272431373596191, | |
| "learning_rate": 9.999452860478611e-05, | |
| "loss": 2.8907, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.05560437804585763, | |
| "grad_norm": 0.7777165174484253, | |
| "learning_rate": 9.999285372778295e-05, | |
| "loss": 2.8517, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.05624350882799393, | |
| "grad_norm": 0.7110999822616577, | |
| "learning_rate": 9.999095555645523e-05, | |
| "loss": 2.8211, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.05688263961013022, | |
| "grad_norm": 0.7857067584991455, | |
| "learning_rate": 9.998883409928117e-05, | |
| "loss": 2.8463, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.05752177039226652, | |
| "grad_norm": 0.8582798838615417, | |
| "learning_rate": 9.998648936573629e-05, | |
| "loss": 2.8197, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05816090117440281, | |
| "grad_norm": 0.9790541529655457, | |
| "learning_rate": 9.998392136629345e-05, | |
| "loss": 2.8193, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.05880003195653911, | |
| "grad_norm": 1.1599719524383545, | |
| "learning_rate": 9.998113011242264e-05, | |
| "loss": 2.8206, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.0594391627386754, | |
| "grad_norm": 0.8326631188392639, | |
| "learning_rate": 9.99781156165911e-05, | |
| "loss": 2.8349, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.0600782935208117, | |
| "grad_norm": 0.8876377940177917, | |
| "learning_rate": 9.997487789226312e-05, | |
| "loss": 2.8225, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.06071742430294799, | |
| "grad_norm": 0.9899202585220337, | |
| "learning_rate": 9.997141695390009e-05, | |
| "loss": 2.7875, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06135655508508429, | |
| "grad_norm": 1.0686557292938232, | |
| "learning_rate": 9.996773281696037e-05, | |
| "loss": 2.8024, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.06199568586722058, | |
| "grad_norm": 0.8899752497673035, | |
| "learning_rate": 9.996382549789926e-05, | |
| "loss": 2.8225, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.06263481664935687, | |
| "grad_norm": 0.7781797647476196, | |
| "learning_rate": 9.995969501416891e-05, | |
| "loss": 2.8046, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.06327394743149317, | |
| "grad_norm": 0.6428512930870056, | |
| "learning_rate": 9.995534138421818e-05, | |
| "loss": 2.7693, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.06391307821362946, | |
| "grad_norm": 0.7047809958457947, | |
| "learning_rate": 9.995076462749273e-05, | |
| "loss": 2.766, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06455220899576576, | |
| "grad_norm": 0.6256312131881714, | |
| "learning_rate": 9.99459647644347e-05, | |
| "loss": 2.8071, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.06519133977790205, | |
| "grad_norm": 0.699400007724762, | |
| "learning_rate": 9.994094181648283e-05, | |
| "loss": 2.8347, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.06583047056003835, | |
| "grad_norm": 0.7256817817687988, | |
| "learning_rate": 9.993569580607225e-05, | |
| "loss": 2.8074, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.06646960134217464, | |
| "grad_norm": 0.573846161365509, | |
| "learning_rate": 9.993022675663437e-05, | |
| "loss": 2.7413, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.06710873212431094, | |
| "grad_norm": 0.7314406037330627, | |
| "learning_rate": 9.992453469259685e-05, | |
| "loss": 2.7983, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06774786290644723, | |
| "grad_norm": 0.7307546734809875, | |
| "learning_rate": 9.991861963938342e-05, | |
| "loss": 2.8026, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.06838699368858353, | |
| "grad_norm": 0.6367102861404419, | |
| "learning_rate": 9.991248162341384e-05, | |
| "loss": 2.7424, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.06902612447071982, | |
| "grad_norm": 0.8630378246307373, | |
| "learning_rate": 9.99061206721037e-05, | |
| "loss": 2.7395, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.06966525525285612, | |
| "grad_norm": 0.7586290240287781, | |
| "learning_rate": 9.989953681386433e-05, | |
| "loss": 2.7624, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.07030438603499241, | |
| "grad_norm": 0.7091168761253357, | |
| "learning_rate": 9.989273007810271e-05, | |
| "loss": 2.7719, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07094351681712871, | |
| "grad_norm": 0.684183657169342, | |
| "learning_rate": 9.98857004952213e-05, | |
| "loss": 2.7806, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.071582647599265, | |
| "grad_norm": 0.920498788356781, | |
| "learning_rate": 9.987844809661791e-05, | |
| "loss": 2.7626, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.0722217783814013, | |
| "grad_norm": 0.730060875415802, | |
| "learning_rate": 9.987097291468552e-05, | |
| "loss": 2.8107, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.07286090916353759, | |
| "grad_norm": 0.8606828451156616, | |
| "learning_rate": 9.986327498281227e-05, | |
| "loss": 2.7814, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.07350003994567389, | |
| "grad_norm": 0.8068298101425171, | |
| "learning_rate": 9.985535433538113e-05, | |
| "loss": 2.7775, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07413917072781018, | |
| "grad_norm": 0.6887542009353638, | |
| "learning_rate": 9.984721100776989e-05, | |
| "loss": 2.784, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.07477830150994648, | |
| "grad_norm": 0.84773850440979, | |
| "learning_rate": 9.98388450363509e-05, | |
| "loss": 2.7333, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.07541743229208277, | |
| "grad_norm": 0.7914923429489136, | |
| "learning_rate": 9.9830256458491e-05, | |
| "loss": 2.7363, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.07605656307421906, | |
| "grad_norm": 0.8284217715263367, | |
| "learning_rate": 9.982144531255127e-05, | |
| "loss": 2.7389, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.07669569385635536, | |
| "grad_norm": 0.7706480622291565, | |
| "learning_rate": 9.981241163788694e-05, | |
| "loss": 2.7377, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07733482463849164, | |
| "grad_norm": 0.6147120594978333, | |
| "learning_rate": 9.980315547484711e-05, | |
| "loss": 2.7862, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.07797395542062795, | |
| "grad_norm": 0.6364494562149048, | |
| "learning_rate": 9.979367686477469e-05, | |
| "loss": 2.762, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.07861308620276423, | |
| "grad_norm": 0.6944818496704102, | |
| "learning_rate": 9.978397585000611e-05, | |
| "loss": 2.7624, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.07925221698490054, | |
| "grad_norm": 1.2648204565048218, | |
| "learning_rate": 9.977405247387119e-05, | |
| "loss": 2.7544, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.07989134776703682, | |
| "grad_norm": 1.0054659843444824, | |
| "learning_rate": 9.976390678069295e-05, | |
| "loss": 2.7523, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08053047854917313, | |
| "grad_norm": 0.715492308139801, | |
| "learning_rate": 9.975353881578738e-05, | |
| "loss": 2.7341, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.08116960933130941, | |
| "grad_norm": 0.7963582277297974, | |
| "learning_rate": 9.974294862546325e-05, | |
| "loss": 2.7484, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.08180874011344572, | |
| "grad_norm": 0.7069251537322998, | |
| "learning_rate": 9.97321362570219e-05, | |
| "loss": 2.7719, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.082447870895582, | |
| "grad_norm": 0.5716209411621094, | |
| "learning_rate": 9.972110175875706e-05, | |
| "loss": 2.8079, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.0830870016777183, | |
| "grad_norm": 0.65562903881073, | |
| "learning_rate": 9.970984517995456e-05, | |
| "loss": 2.7642, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0837261324598546, | |
| "grad_norm": 0.647085964679718, | |
| "learning_rate": 9.969836657089225e-05, | |
| "loss": 2.7139, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.0843652632419909, | |
| "grad_norm": 0.6401609778404236, | |
| "learning_rate": 9.968666598283955e-05, | |
| "loss": 2.7278, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.08500439402412718, | |
| "grad_norm": 0.5514021515846252, | |
| "learning_rate": 9.967474346805746e-05, | |
| "loss": 2.7332, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.08564352480626349, | |
| "grad_norm": 0.5908826589584351, | |
| "learning_rate": 9.96625990797982e-05, | |
| "loss": 2.741, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.08628265558839977, | |
| "grad_norm": 0.5510653853416443, | |
| "learning_rate": 9.965023287230497e-05, | |
| "loss": 2.7025, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08692178637053607, | |
| "grad_norm": 0.5656317472457886, | |
| "learning_rate": 9.963764490081176e-05, | |
| "loss": 2.7184, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.08756091715267236, | |
| "grad_norm": 0.5132441520690918, | |
| "learning_rate": 9.962483522154302e-05, | |
| "loss": 2.7632, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.08820004793480866, | |
| "grad_norm": 0.6730588674545288, | |
| "learning_rate": 9.961180389171352e-05, | |
| "loss": 2.7705, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.08883917871694495, | |
| "grad_norm": 0.5657472610473633, | |
| "learning_rate": 9.959855096952804e-05, | |
| "loss": 2.7191, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.08947830949908125, | |
| "grad_norm": 0.8265955448150635, | |
| "learning_rate": 9.958507651418106e-05, | |
| "loss": 2.7718, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09011744028121754, | |
| "grad_norm": 0.8996996879577637, | |
| "learning_rate": 9.957138058585658e-05, | |
| "loss": 2.7124, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.09075657106335384, | |
| "grad_norm": 0.6458889842033386, | |
| "learning_rate": 9.955746324572781e-05, | |
| "loss": 2.7403, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.09139570184549013, | |
| "grad_norm": 0.7175470590591431, | |
| "learning_rate": 9.954332455595689e-05, | |
| "loss": 2.7188, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.09203483262762643, | |
| "grad_norm": 0.6640183329582214, | |
| "learning_rate": 9.952896457969463e-05, | |
| "loss": 2.7223, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.09267396340976272, | |
| "grad_norm": 0.6551202535629272, | |
| "learning_rate": 9.951438338108022e-05, | |
| "loss": 2.7189, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09331309419189902, | |
| "grad_norm": 0.6980673670768738, | |
| "learning_rate": 9.949958102524093e-05, | |
| "loss": 2.7183, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.09395222497403531, | |
| "grad_norm": 0.5926324129104614, | |
| "learning_rate": 9.948455757829187e-05, | |
| "loss": 2.7476, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.09459135575617161, | |
| "grad_norm": 0.5434746742248535, | |
| "learning_rate": 9.946931310733565e-05, | |
| "loss": 2.7368, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.0952304865383079, | |
| "grad_norm": 0.6466372609138489, | |
| "learning_rate": 9.945384768046206e-05, | |
| "loss": 2.7307, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.09586961732044419, | |
| "grad_norm": 0.6376985311508179, | |
| "learning_rate": 9.943816136674782e-05, | |
| "loss": 2.7239, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09650874810258049, | |
| "grad_norm": 0.6092653274536133, | |
| "learning_rate": 9.942225423625624e-05, | |
| "loss": 2.7678, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.09714787888471678, | |
| "grad_norm": 0.7219493389129639, | |
| "learning_rate": 9.94061263600369e-05, | |
| "loss": 2.723, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.09778700966685308, | |
| "grad_norm": 0.5244786143302917, | |
| "learning_rate": 9.93897778101254e-05, | |
| "loss": 2.7329, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.09842614044898937, | |
| "grad_norm": 0.5384829044342041, | |
| "learning_rate": 9.937320865954289e-05, | |
| "loss": 2.661, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.09906527123112567, | |
| "grad_norm": 0.624033510684967, | |
| "learning_rate": 9.935641898229594e-05, | |
| "loss": 2.7177, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09970440201326196, | |
| "grad_norm": 0.6381804347038269, | |
| "learning_rate": 9.933940885337602e-05, | |
| "loss": 2.7616, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.10034353279539826, | |
| "grad_norm": 0.7671799659729004, | |
| "learning_rate": 9.932217834875934e-05, | |
| "loss": 2.7256, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.10098266357753455, | |
| "grad_norm": 0.5695899128913879, | |
| "learning_rate": 9.930472754540634e-05, | |
| "loss": 2.6975, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.10162179435967085, | |
| "grad_norm": 0.6461712121963501, | |
| "learning_rate": 9.92870565212615e-05, | |
| "loss": 2.7121, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.10226092514180714, | |
| "grad_norm": 0.6111094355583191, | |
| "learning_rate": 9.926916535525283e-05, | |
| "loss": 2.6964, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10290005592394344, | |
| "grad_norm": 0.6368963718414307, | |
| "learning_rate": 9.925105412729175e-05, | |
| "loss": 2.6793, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.10353918670607973, | |
| "grad_norm": 0.6973994374275208, | |
| "learning_rate": 9.923272291827245e-05, | |
| "loss": 2.6862, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.10417831748821603, | |
| "grad_norm": 0.6717987656593323, | |
| "learning_rate": 9.921417181007175e-05, | |
| "loss": 2.686, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.10481744827035232, | |
| "grad_norm": 0.6282898783683777, | |
| "learning_rate": 9.919540088554862e-05, | |
| "loss": 2.6807, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.10545657905248862, | |
| "grad_norm": 0.6404539942741394, | |
| "learning_rate": 9.91764102285439e-05, | |
| "loss": 2.659, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10609570983462491, | |
| "grad_norm": 0.679418683052063, | |
| "learning_rate": 9.915719992387979e-05, | |
| "loss": 2.662, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.10673484061676121, | |
| "grad_norm": 0.7185142040252686, | |
| "learning_rate": 9.913777005735963e-05, | |
| "loss": 2.7208, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1073739713988975, | |
| "grad_norm": 0.5328919887542725, | |
| "learning_rate": 9.911812071576736e-05, | |
| "loss": 2.6428, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.1080131021810338, | |
| "grad_norm": 0.6135143637657166, | |
| "learning_rate": 9.909825198686729e-05, | |
| "loss": 2.6543, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.10865223296317009, | |
| "grad_norm": 0.6830089092254639, | |
| "learning_rate": 9.907816395940359e-05, | |
| "loss": 2.677, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10929136374530639, | |
| "grad_norm": 0.6469766497612, | |
| "learning_rate": 9.90578567230999e-05, | |
| "loss": 2.726, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.10993049452744268, | |
| "grad_norm": 0.5899373888969421, | |
| "learning_rate": 9.903733036865903e-05, | |
| "loss": 2.7208, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.11056962530957898, | |
| "grad_norm": 0.82301926612854, | |
| "learning_rate": 9.901658498776246e-05, | |
| "loss": 2.6925, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.11120875609171527, | |
| "grad_norm": 0.8507819771766663, | |
| "learning_rate": 9.899562067306989e-05, | |
| "loss": 2.6905, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.11184788687385157, | |
| "grad_norm": 0.6785141229629517, | |
| "learning_rate": 9.897443751821902e-05, | |
| "loss": 2.6643, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11248701765598786, | |
| "grad_norm": 0.6389050483703613, | |
| "learning_rate": 9.89530356178249e-05, | |
| "loss": 2.6769, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.11312614843812416, | |
| "grad_norm": 0.5903960466384888, | |
| "learning_rate": 9.893141506747967e-05, | |
| "loss": 2.6793, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.11376527922026045, | |
| "grad_norm": 0.583307147026062, | |
| "learning_rate": 9.890957596375206e-05, | |
| "loss": 2.676, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.11440441000239673, | |
| "grad_norm": 0.6372009515762329, | |
| "learning_rate": 9.888751840418695e-05, | |
| "loss": 2.6567, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.11504354078453304, | |
| "grad_norm": 0.7056903839111328, | |
| "learning_rate": 9.886524248730497e-05, | |
| "loss": 2.6973, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11568267156666932, | |
| "grad_norm": 0.5459578633308411, | |
| "learning_rate": 9.88427483126021e-05, | |
| "loss": 2.6522, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.11632180234880563, | |
| "grad_norm": 0.5186561346054077, | |
| "learning_rate": 9.882003598054907e-05, | |
| "loss": 2.6567, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.11696093313094191, | |
| "grad_norm": 0.5469943881034851, | |
| "learning_rate": 9.879710559259114e-05, | |
| "loss": 2.6586, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.11760006391307821, | |
| "grad_norm": 0.6790450215339661, | |
| "learning_rate": 9.877395725114742e-05, | |
| "loss": 2.6874, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.1182391946952145, | |
| "grad_norm": 0.624920129776001, | |
| "learning_rate": 9.875059105961056e-05, | |
| "loss": 2.6777, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1188783254773508, | |
| "grad_norm": 0.6039037704467773, | |
| "learning_rate": 9.872700712234624e-05, | |
| "loss": 2.6881, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.11951745625948709, | |
| "grad_norm": 0.6653264760971069, | |
| "learning_rate": 9.87032055446927e-05, | |
| "loss": 2.6388, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.1201565870416234, | |
| "grad_norm": 0.7718141078948975, | |
| "learning_rate": 9.867918643296025e-05, | |
| "loss": 2.6686, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.12079571782375968, | |
| "grad_norm": 0.6357402801513672, | |
| "learning_rate": 9.865494989443092e-05, | |
| "loss": 2.6611, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.12143484860589598, | |
| "grad_norm": 0.560418963432312, | |
| "learning_rate": 9.863049603735775e-05, | |
| "loss": 2.6944, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12207397938803227, | |
| "grad_norm": 0.5758490562438965, | |
| "learning_rate": 9.860582497096452e-05, | |
| "loss": 2.6589, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.12271311017016857, | |
| "grad_norm": 0.6144497990608215, | |
| "learning_rate": 9.858093680544516e-05, | |
| "loss": 2.6839, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.12335224095230486, | |
| "grad_norm": 0.5986223816871643, | |
| "learning_rate": 9.855583165196329e-05, | |
| "loss": 2.6778, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.12399137173444116, | |
| "grad_norm": 0.5350797176361084, | |
| "learning_rate": 9.853050962265169e-05, | |
| "loss": 2.6539, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.12463050251657745, | |
| "grad_norm": 0.5589949488639832, | |
| "learning_rate": 9.850497083061183e-05, | |
| "loss": 2.6536, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12526963329871374, | |
| "grad_norm": 0.5695136189460754, | |
| "learning_rate": 9.847921538991339e-05, | |
| "loss": 2.6615, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.12590876408085006, | |
| "grad_norm": 0.5739374756813049, | |
| "learning_rate": 9.845324341559366e-05, | |
| "loss": 2.6883, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.12654789486298634, | |
| "grad_norm": 0.528075098991394, | |
| "learning_rate": 9.84270550236571e-05, | |
| "loss": 2.6944, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.12718702564512263, | |
| "grad_norm": 0.6400613188743591, | |
| "learning_rate": 9.840065033107483e-05, | |
| "loss": 2.6596, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.12782615642725892, | |
| "grad_norm": 0.6734158992767334, | |
| "learning_rate": 9.837402945578406e-05, | |
| "loss": 2.6562, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12846528720939523, | |
| "grad_norm": 0.6197201013565063, | |
| "learning_rate": 9.834719251668761e-05, | |
| "loss": 2.6971, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.12910441799153152, | |
| "grad_norm": 0.5766332745552063, | |
| "learning_rate": 9.832013963365332e-05, | |
| "loss": 2.6355, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1297435487736678, | |
| "grad_norm": 0.7926291823387146, | |
| "learning_rate": 9.829287092751357e-05, | |
| "loss": 2.6438, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1303826795558041, | |
| "grad_norm": 0.7527420520782471, | |
| "learning_rate": 9.826538652006469e-05, | |
| "loss": 2.6695, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.13102181033794041, | |
| "grad_norm": 0.7154802083969116, | |
| "learning_rate": 9.823768653406652e-05, | |
| "loss": 2.6158, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1316609411200767, | |
| "grad_norm": 0.5435774326324463, | |
| "learning_rate": 9.820977109324169e-05, | |
| "loss": 2.6843, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.132300071902213, | |
| "grad_norm": 0.5893809199333191, | |
| "learning_rate": 9.818164032227522e-05, | |
| "loss": 2.6607, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.13293920268434928, | |
| "grad_norm": 0.5635148882865906, | |
| "learning_rate": 9.815329434681392e-05, | |
| "loss": 2.658, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.13357833346648557, | |
| "grad_norm": 0.4904562830924988, | |
| "learning_rate": 9.812473329346578e-05, | |
| "loss": 2.6616, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.13421746424862188, | |
| "grad_norm": 0.5800766944885254, | |
| "learning_rate": 9.809595728979945e-05, | |
| "loss": 2.6657, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13485659503075817, | |
| "grad_norm": 0.5110253691673279, | |
| "learning_rate": 9.806696646434367e-05, | |
| "loss": 2.6192, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.13549572581289446, | |
| "grad_norm": 0.5567732453346252, | |
| "learning_rate": 9.803776094658668e-05, | |
| "loss": 2.6475, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.13613485659503075, | |
| "grad_norm": 0.5255835056304932, | |
| "learning_rate": 9.800834086697566e-05, | |
| "loss": 2.6644, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.13677398737716706, | |
| "grad_norm": 0.4851606786251068, | |
| "learning_rate": 9.797870635691613e-05, | |
| "loss": 2.6628, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.13741311815930335, | |
| "grad_norm": 0.4904446005821228, | |
| "learning_rate": 9.794885754877135e-05, | |
| "loss": 2.6222, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13805224894143964, | |
| "grad_norm": 0.47077298164367676, | |
| "learning_rate": 9.791879457586178e-05, | |
| "loss": 2.5875, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.13869137972357592, | |
| "grad_norm": 0.4484720528125763, | |
| "learning_rate": 9.788851757246443e-05, | |
| "loss": 2.6279, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.13933051050571224, | |
| "grad_norm": 0.5684689283370972, | |
| "learning_rate": 9.785802667381227e-05, | |
| "loss": 2.6507, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.13996964128784853, | |
| "grad_norm": 0.5868870615959167, | |
| "learning_rate": 9.78273220160937e-05, | |
| "loss": 2.6476, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.14060877206998482, | |
| "grad_norm": 0.5244540572166443, | |
| "learning_rate": 9.77964037364518e-05, | |
| "loss": 2.6353, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1412479028521211, | |
| "grad_norm": 0.5107213258743286, | |
| "learning_rate": 9.776527197298386e-05, | |
| "loss": 2.6335, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.14188703363425742, | |
| "grad_norm": 0.5410230159759521, | |
| "learning_rate": 9.773392686474065e-05, | |
| "loss": 2.6248, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.1425261644163937, | |
| "grad_norm": 0.5540198683738708, | |
| "learning_rate": 9.770236855172587e-05, | |
| "loss": 2.6304, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.14316529519853, | |
| "grad_norm": 0.6982893347740173, | |
| "learning_rate": 9.767059717489557e-05, | |
| "loss": 2.6285, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.14380442598066628, | |
| "grad_norm": 0.7649112939834595, | |
| "learning_rate": 9.763861287615732e-05, | |
| "loss": 2.6863, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1444435567628026, | |
| "grad_norm": 0.5209079384803772, | |
| "learning_rate": 9.760641579836984e-05, | |
| "loss": 2.6262, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.1450826875449389, | |
| "grad_norm": 0.5985437631607056, | |
| "learning_rate": 9.757400608534215e-05, | |
| "loss": 2.5451, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.14572181832707518, | |
| "grad_norm": 0.6232045888900757, | |
| "learning_rate": 9.754138388183305e-05, | |
| "loss": 2.6142, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.14636094910921146, | |
| "grad_norm": 0.7111669778823853, | |
| "learning_rate": 9.750854933355042e-05, | |
| "loss": 2.5868, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.14700007989134778, | |
| "grad_norm": 0.6749933362007141, | |
| "learning_rate": 9.747550258715059e-05, | |
| "loss": 2.6233, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14763921067348407, | |
| "grad_norm": 0.5915788412094116, | |
| "learning_rate": 9.744224379023768e-05, | |
| "loss": 2.6233, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.14827834145562035, | |
| "grad_norm": 0.6704515814781189, | |
| "learning_rate": 9.740877309136291e-05, | |
| "loss": 2.6432, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.14891747223775664, | |
| "grad_norm": 0.6156161427497864, | |
| "learning_rate": 9.737509064002402e-05, | |
| "loss": 2.6436, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.14955660301989296, | |
| "grad_norm": 0.49440738558769226, | |
| "learning_rate": 9.734119658666448e-05, | |
| "loss": 2.6488, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.15019573380202925, | |
| "grad_norm": 0.6561670899391174, | |
| "learning_rate": 9.730709108267296e-05, | |
| "loss": 2.6191, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.15083486458416553, | |
| "grad_norm": 0.6310847997665405, | |
| "learning_rate": 9.727277428038253e-05, | |
| "loss": 2.6055, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.15147399536630182, | |
| "grad_norm": 0.5141007304191589, | |
| "learning_rate": 9.723824633307001e-05, | |
| "loss": 2.626, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1521131261484381, | |
| "grad_norm": 0.5299694538116455, | |
| "learning_rate": 9.720350739495538e-05, | |
| "loss": 2.6401, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.15275225693057443, | |
| "grad_norm": 0.5702034831047058, | |
| "learning_rate": 9.716855762120097e-05, | |
| "loss": 2.6392, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.15339138771271071, | |
| "grad_norm": 0.5058117508888245, | |
| "learning_rate": 9.713339716791076e-05, | |
| "loss": 2.5778, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.154030518494847, | |
| "grad_norm": 0.6530377864837646, | |
| "learning_rate": 9.709802619212987e-05, | |
| "loss": 2.6359, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.1546696492769833, | |
| "grad_norm": 0.6136478781700134, | |
| "learning_rate": 9.706244485184357e-05, | |
| "loss": 2.6117, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.1553087800591196, | |
| "grad_norm": 0.5947436094284058, | |
| "learning_rate": 9.702665330597684e-05, | |
| "loss": 2.6148, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.1559479108412559, | |
| "grad_norm": 0.6332894563674927, | |
| "learning_rate": 9.699065171439349e-05, | |
| "loss": 2.6251, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.15658704162339218, | |
| "grad_norm": 0.5429502129554749, | |
| "learning_rate": 9.695444023789554e-05, | |
| "loss": 2.577, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15722617240552847, | |
| "grad_norm": 0.6252620220184326, | |
| "learning_rate": 9.691801903822244e-05, | |
| "loss": 2.6114, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.15786530318766478, | |
| "grad_norm": 0.5587325692176819, | |
| "learning_rate": 9.68813882780504e-05, | |
| "loss": 2.632, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.15850443396980107, | |
| "grad_norm": 0.5149174332618713, | |
| "learning_rate": 9.68445481209916e-05, | |
| "loss": 2.6394, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.15914356475193736, | |
| "grad_norm": 0.5343561172485352, | |
| "learning_rate": 9.680749873159354e-05, | |
| "loss": 2.572, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.15978269553407365, | |
| "grad_norm": 0.5082888603210449, | |
| "learning_rate": 9.677024027533821e-05, | |
| "loss": 2.5786, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16042182631620996, | |
| "grad_norm": 0.46739038825035095, | |
| "learning_rate": 9.673277291864145e-05, | |
| "loss": 2.5933, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.16106095709834625, | |
| "grad_norm": 0.5262092351913452, | |
| "learning_rate": 9.669509682885216e-05, | |
| "loss": 2.6295, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.16170008788048254, | |
| "grad_norm": 0.5002930760383606, | |
| "learning_rate": 9.66572121742515e-05, | |
| "loss": 2.6306, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.16233921866261883, | |
| "grad_norm": 0.4859941601753235, | |
| "learning_rate": 9.661911912405222e-05, | |
| "loss": 2.5742, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.16297834944475514, | |
| "grad_norm": 0.6142066717147827, | |
| "learning_rate": 9.65808178483979e-05, | |
| "loss": 2.61, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.16361748022689143, | |
| "grad_norm": 0.6018419861793518, | |
| "learning_rate": 9.654230851836214e-05, | |
| "loss": 2.6158, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.16425661100902772, | |
| "grad_norm": 0.5785476565361023, | |
| "learning_rate": 9.650359130594779e-05, | |
| "loss": 2.629, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.164895741791164, | |
| "grad_norm": 0.5036047697067261, | |
| "learning_rate": 9.646466638408629e-05, | |
| "loss": 2.6087, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.16553487257330032, | |
| "grad_norm": 0.5089232325553894, | |
| "learning_rate": 9.642553392663672e-05, | |
| "loss": 2.6299, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.1661740033554366, | |
| "grad_norm": 0.5314218997955322, | |
| "learning_rate": 9.63861941083852e-05, | |
| "loss": 2.6152, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1668131341375729, | |
| "grad_norm": 0.6545165181159973, | |
| "learning_rate": 9.634664710504402e-05, | |
| "loss": 2.5711, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.1674522649197092, | |
| "grad_norm": 0.7461646199226379, | |
| "learning_rate": 9.630689309325082e-05, | |
| "loss": 2.627, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.1680913957018455, | |
| "grad_norm": 0.6585918068885803, | |
| "learning_rate": 9.626693225056794e-05, | |
| "loss": 2.6231, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.1687305264839818, | |
| "grad_norm": 0.5888398289680481, | |
| "learning_rate": 9.62267647554814e-05, | |
| "loss": 2.6175, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.16936965726611808, | |
| "grad_norm": 0.49957162141799927, | |
| "learning_rate": 9.618639078740037e-05, | |
| "loss": 2.5771, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.17000878804825437, | |
| "grad_norm": 0.4573955535888672, | |
| "learning_rate": 9.614581052665616e-05, | |
| "loss": 2.5855, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.17064791883039068, | |
| "grad_norm": 0.5360051393508911, | |
| "learning_rate": 9.610502415450153e-05, | |
| "loss": 2.6107, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.17128704961252697, | |
| "grad_norm": 0.5413601994514465, | |
| "learning_rate": 9.606403185310981e-05, | |
| "loss": 2.5971, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.17192618039466326, | |
| "grad_norm": 0.5360136032104492, | |
| "learning_rate": 9.602283380557416e-05, | |
| "loss": 2.5878, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.17256531117679955, | |
| "grad_norm": 0.653225839138031, | |
| "learning_rate": 9.598143019590664e-05, | |
| "loss": 2.6, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.17320444195893583, | |
| "grad_norm": 0.5268750786781311, | |
| "learning_rate": 9.593982120903754e-05, | |
| "loss": 2.5992, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.17384357274107215, | |
| "grad_norm": 0.5311806797981262, | |
| "learning_rate": 9.589800703081442e-05, | |
| "loss": 2.5939, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.17448270352320844, | |
| "grad_norm": 0.47583094239234924, | |
| "learning_rate": 9.585598784800135e-05, | |
| "loss": 2.5863, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.17512183430534473, | |
| "grad_norm": 0.44130444526672363, | |
| "learning_rate": 9.581376384827804e-05, | |
| "loss": 2.5568, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.175760965087481, | |
| "grad_norm": 0.45064234733581543, | |
| "learning_rate": 9.577133522023906e-05, | |
| "loss": 2.5888, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17640009586961733, | |
| "grad_norm": 0.4643968343734741, | |
| "learning_rate": 9.572870215339294e-05, | |
| "loss": 2.6121, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.17703922665175362, | |
| "grad_norm": 0.446347713470459, | |
| "learning_rate": 9.568586483816129e-05, | |
| "loss": 2.614, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.1776783574338899, | |
| "grad_norm": 0.48379895091056824, | |
| "learning_rate": 9.564282346587809e-05, | |
| "loss": 2.6353, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.1783174882160262, | |
| "grad_norm": 0.45891985297203064, | |
| "learning_rate": 9.559957822878867e-05, | |
| "loss": 2.6111, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.1789566189981625, | |
| "grad_norm": 0.49106699228286743, | |
| "learning_rate": 9.555612932004896e-05, | |
| "loss": 2.5876, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1795957497802988, | |
| "grad_norm": 0.5220739245414734, | |
| "learning_rate": 9.55124769337246e-05, | |
| "loss": 2.5988, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.18023488056243508, | |
| "grad_norm": 0.6365030407905579, | |
| "learning_rate": 9.546862126479006e-05, | |
| "loss": 2.5763, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.18087401134457137, | |
| "grad_norm": 0.706681489944458, | |
| "learning_rate": 9.542456250912776e-05, | |
| "loss": 2.5965, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.1815131421267077, | |
| "grad_norm": 0.4519253373146057, | |
| "learning_rate": 9.538030086352725e-05, | |
| "loss": 2.568, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.18215227290884398, | |
| "grad_norm": 0.6023289561271667, | |
| "learning_rate": 9.533583652568426e-05, | |
| "loss": 2.6034, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.18279140369098026, | |
| "grad_norm": 0.581615686416626, | |
| "learning_rate": 9.529116969419986e-05, | |
| "loss": 2.5858, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.18343053447311655, | |
| "grad_norm": 0.49777430295944214, | |
| "learning_rate": 9.524630056857958e-05, | |
| "loss": 2.6062, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.18406966525525287, | |
| "grad_norm": 0.5936197638511658, | |
| "learning_rate": 9.520122934923246e-05, | |
| "loss": 2.5976, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.18470879603738916, | |
| "grad_norm": 0.5317326784133911, | |
| "learning_rate": 9.515595623747022e-05, | |
| "loss": 2.6004, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.18534792681952544, | |
| "grad_norm": 0.524297297000885, | |
| "learning_rate": 9.511048143550637e-05, | |
| "loss": 2.583, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18598705760166173, | |
| "grad_norm": 0.5107091665267944, | |
| "learning_rate": 9.506480514645523e-05, | |
| "loss": 2.5704, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.18662618838379805, | |
| "grad_norm": 0.4521612226963043, | |
| "learning_rate": 9.501892757433107e-05, | |
| "loss": 2.5903, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.18726531916593434, | |
| "grad_norm": 0.48701736330986023, | |
| "learning_rate": 9.497284892404721e-05, | |
| "loss": 2.5758, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.18790444994807062, | |
| "grad_norm": 0.613917887210846, | |
| "learning_rate": 9.492656940141512e-05, | |
| "loss": 2.5749, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.1885435807302069, | |
| "grad_norm": 0.5269163846969604, | |
| "learning_rate": 9.488008921314338e-05, | |
| "loss": 2.6126, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18918271151234323, | |
| "grad_norm": 0.6326431632041931, | |
| "learning_rate": 9.483340856683696e-05, | |
| "loss": 2.5863, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.18982184229447951, | |
| "grad_norm": 0.47863009572029114, | |
| "learning_rate": 9.47865276709961e-05, | |
| "loss": 2.6201, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.1904609730766158, | |
| "grad_norm": 0.5771295428276062, | |
| "learning_rate": 9.473944673501549e-05, | |
| "loss": 2.5914, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.1911001038587521, | |
| "grad_norm": 0.4584767818450928, | |
| "learning_rate": 9.469216596918331e-05, | |
| "loss": 2.5497, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.19173923464088838, | |
| "grad_norm": 0.4598289728164673, | |
| "learning_rate": 9.464468558468026e-05, | |
| "loss": 2.5841, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1923783654230247, | |
| "grad_norm": 0.516592800617218, | |
| "learning_rate": 9.459700579357869e-05, | |
| "loss": 2.6013, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.19301749620516098, | |
| "grad_norm": 0.5296542048454285, | |
| "learning_rate": 9.454912680884154e-05, | |
| "loss": 2.6085, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.19365662698729727, | |
| "grad_norm": 0.5447851419448853, | |
| "learning_rate": 9.45010488443215e-05, | |
| "loss": 2.5507, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.19429575776943356, | |
| "grad_norm": 0.49331796169281006, | |
| "learning_rate": 9.445277211476e-05, | |
| "loss": 2.5476, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.19493488855156987, | |
| "grad_norm": 0.4537939429283142, | |
| "learning_rate": 9.440429683578624e-05, | |
| "loss": 2.5977, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.19557401933370616, | |
| "grad_norm": 0.5129672884941101, | |
| "learning_rate": 9.435562322391627e-05, | |
| "loss": 2.5689, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.19621315011584245, | |
| "grad_norm": 0.5162326693534851, | |
| "learning_rate": 9.430675149655199e-05, | |
| "loss": 2.5981, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.19685228089797874, | |
| "grad_norm": 0.5716260075569153, | |
| "learning_rate": 9.425768187198016e-05, | |
| "loss": 2.547, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.19749141168011505, | |
| "grad_norm": 0.5598787069320679, | |
| "learning_rate": 9.420841456937151e-05, | |
| "loss": 2.5743, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.19813054246225134, | |
| "grad_norm": 0.5771391987800598, | |
| "learning_rate": 9.415894980877966e-05, | |
| "loss": 2.589, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19876967324438763, | |
| "grad_norm": 0.5378340482711792, | |
| "learning_rate": 9.410928781114019e-05, | |
| "loss": 2.5916, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.19940880402652392, | |
| "grad_norm": 0.5003606081008911, | |
| "learning_rate": 9.405942879826967e-05, | |
| "loss": 2.5535, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.20004793480866023, | |
| "grad_norm": 0.5581315755844116, | |
| "learning_rate": 9.400937299286458e-05, | |
| "loss": 2.6016, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.20068706559079652, | |
| "grad_norm": 0.5600181818008423, | |
| "learning_rate": 9.395912061850046e-05, | |
| "loss": 2.5622, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.2013261963729328, | |
| "grad_norm": 0.5221248269081116, | |
| "learning_rate": 9.390867189963075e-05, | |
| "loss": 2.5584, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2019653271550691, | |
| "grad_norm": 0.4963245391845703, | |
| "learning_rate": 9.385802706158594e-05, | |
| "loss": 2.54, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.2026044579372054, | |
| "grad_norm": 0.4757302403450012, | |
| "learning_rate": 9.380718633057246e-05, | |
| "loss": 2.5856, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.2032435887193417, | |
| "grad_norm": 0.4876170754432678, | |
| "learning_rate": 9.37561499336717e-05, | |
| "loss": 2.5912, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.203882719501478, | |
| "grad_norm": 0.4831182360649109, | |
| "learning_rate": 9.370491809883895e-05, | |
| "loss": 2.5395, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.20452185028361428, | |
| "grad_norm": 0.5880109071731567, | |
| "learning_rate": 9.365349105490253e-05, | |
| "loss": 2.5579, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2051609810657506, | |
| "grad_norm": 0.497311532497406, | |
| "learning_rate": 9.360186903156259e-05, | |
| "loss": 2.5629, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.20580011184788688, | |
| "grad_norm": 0.5942720174789429, | |
| "learning_rate": 9.355005225939017e-05, | |
| "loss": 2.5816, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.20643924263002317, | |
| "grad_norm": 0.5332151651382446, | |
| "learning_rate": 9.34980409698262e-05, | |
| "loss": 2.5603, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.20707837341215946, | |
| "grad_norm": 0.4901409149169922, | |
| "learning_rate": 9.344583539518036e-05, | |
| "loss": 2.569, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.20771750419429577, | |
| "grad_norm": 0.521522581577301, | |
| "learning_rate": 9.339343576863018e-05, | |
| "loss": 2.6077, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.20835663497643206, | |
| "grad_norm": 0.49068787693977356, | |
| "learning_rate": 9.334084232421988e-05, | |
| "loss": 2.5729, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.20899576575856835, | |
| "grad_norm": 0.48800089955329895, | |
| "learning_rate": 9.32880552968594e-05, | |
| "loss": 2.5814, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.20963489654070463, | |
| "grad_norm": 0.5036289691925049, | |
| "learning_rate": 9.323507492232328e-05, | |
| "loss": 2.5795, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.21027402732284092, | |
| "grad_norm": 0.4648139476776123, | |
| "learning_rate": 9.318190143724972e-05, | |
| "loss": 2.572, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.21091315810497724, | |
| "grad_norm": 0.42503541707992554, | |
| "learning_rate": 9.312853507913938e-05, | |
| "loss": 2.5765, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.21155228888711353, | |
| "grad_norm": 0.483327180147171, | |
| "learning_rate": 9.307497608635447e-05, | |
| "loss": 2.5965, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.21219141966924981, | |
| "grad_norm": 0.49550801515579224, | |
| "learning_rate": 9.302122469811752e-05, | |
| "loss": 2.5412, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2128305504513861, | |
| "grad_norm": 0.457082599401474, | |
| "learning_rate": 9.296728115451046e-05, | |
| "loss": 2.5945, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.21346968123352242, | |
| "grad_norm": 0.5289996862411499, | |
| "learning_rate": 9.291314569647346e-05, | |
| "loss": 2.5364, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.2141088120156587, | |
| "grad_norm": 0.5246165990829468, | |
| "learning_rate": 9.285881856580392e-05, | |
| "loss": 2.5313, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.214747942797795, | |
| "grad_norm": 0.5950086712837219, | |
| "learning_rate": 9.280430000515528e-05, | |
| "loss": 2.5621, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.21538707357993128, | |
| "grad_norm": 0.49669399857521057, | |
| "learning_rate": 9.274959025803604e-05, | |
| "loss": 2.5515, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.2160262043620676, | |
| "grad_norm": 0.5234604477882385, | |
| "learning_rate": 9.269468956880871e-05, | |
| "loss": 2.5432, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.21666533514420389, | |
| "grad_norm": 0.5024713277816772, | |
| "learning_rate": 9.263959818268853e-05, | |
| "loss": 2.5893, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.21730446592634017, | |
| "grad_norm": 0.4908897876739502, | |
| "learning_rate": 9.258431634574256e-05, | |
| "loss": 2.6035, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.21794359670847646, | |
| "grad_norm": 0.47038817405700684, | |
| "learning_rate": 9.252884430488849e-05, | |
| "loss": 2.5652, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.21858272749061278, | |
| "grad_norm": 0.47875434160232544, | |
| "learning_rate": 9.247318230789359e-05, | |
| "loss": 2.5902, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.21922185827274906, | |
| "grad_norm": 0.4665825366973877, | |
| "learning_rate": 9.241733060337354e-05, | |
| "loss": 2.5292, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.21986098905488535, | |
| "grad_norm": 0.4810079336166382, | |
| "learning_rate": 9.236128944079138e-05, | |
| "loss": 2.5792, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.22050011983702164, | |
| "grad_norm": 0.45069095492362976, | |
| "learning_rate": 9.230505907045635e-05, | |
| "loss": 2.5316, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.22113925061915796, | |
| "grad_norm": 0.40244781970977783, | |
| "learning_rate": 9.224863974352278e-05, | |
| "loss": 2.5563, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.22177838140129424, | |
| "grad_norm": 0.5229255557060242, | |
| "learning_rate": 9.219203171198902e-05, | |
| "loss": 2.5402, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.22241751218343053, | |
| "grad_norm": 0.5138113498687744, | |
| "learning_rate": 9.213523522869625e-05, | |
| "loss": 2.5914, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.22305664296556682, | |
| "grad_norm": 0.47990405559539795, | |
| "learning_rate": 9.207825054732736e-05, | |
| "loss": 2.5525, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.22369577374770314, | |
| "grad_norm": 0.48454561829566956, | |
| "learning_rate": 9.202107792240587e-05, | |
| "loss": 2.5379, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.22433490452983942, | |
| "grad_norm": 0.49185454845428467, | |
| "learning_rate": 9.19637176092947e-05, | |
| "loss": 2.5462, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.2249740353119757, | |
| "grad_norm": 0.4852677583694458, | |
| "learning_rate": 9.190616986419512e-05, | |
| "loss": 2.5222, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.225613166094112, | |
| "grad_norm": 0.503039538860321, | |
| "learning_rate": 9.18484349441456e-05, | |
| "loss": 2.5714, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.22625229687624832, | |
| "grad_norm": 0.4584214389324188, | |
| "learning_rate": 9.179051310702056e-05, | |
| "loss": 2.5694, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2268914276583846, | |
| "grad_norm": 0.46065405011177063, | |
| "learning_rate": 9.173240461152935e-05, | |
| "loss": 2.5804, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2275305584405209, | |
| "grad_norm": 0.48372742533683777, | |
| "learning_rate": 9.1674109717215e-05, | |
| "loss": 2.5489, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.22816968922265718, | |
| "grad_norm": 0.43927186727523804, | |
| "learning_rate": 9.16156286844531e-05, | |
| "loss": 2.541, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.22880882000479347, | |
| "grad_norm": 0.46032947301864624, | |
| "learning_rate": 9.155696177445064e-05, | |
| "loss": 2.5597, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.22944795078692978, | |
| "grad_norm": 0.4477051794528961, | |
| "learning_rate": 9.149810924924482e-05, | |
| "loss": 2.551, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.23008708156906607, | |
| "grad_norm": 0.4732860326766968, | |
| "learning_rate": 9.143907137170194e-05, | |
| "loss": 2.5688, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.23072621235120236, | |
| "grad_norm": 0.520808219909668, | |
| "learning_rate": 9.137984840551612e-05, | |
| "loss": 2.5429, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.23136534313333865, | |
| "grad_norm": 0.448128879070282, | |
| "learning_rate": 9.132044061520823e-05, | |
| "loss": 2.5146, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.23200447391547496, | |
| "grad_norm": 0.520537257194519, | |
| "learning_rate": 9.126084826612464e-05, | |
| "loss": 2.5718, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.23264360469761125, | |
| "grad_norm": 0.5061787962913513, | |
| "learning_rate": 9.120107162443605e-05, | |
| "loss": 2.5341, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.23328273547974754, | |
| "grad_norm": 0.4683222770690918, | |
| "learning_rate": 9.114111095713633e-05, | |
| "loss": 2.5351, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.23392186626188383, | |
| "grad_norm": 0.4754564166069031, | |
| "learning_rate": 9.108096653204125e-05, | |
| "loss": 2.5798, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.23456099704402014, | |
| "grad_norm": 0.5304054021835327, | |
| "learning_rate": 9.102063861778744e-05, | |
| "loss": 2.5812, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.23520012782615643, | |
| "grad_norm": 0.4747471809387207, | |
| "learning_rate": 9.0960127483831e-05, | |
| "loss": 2.5847, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.23583925860829272, | |
| "grad_norm": 0.4957279562950134, | |
| "learning_rate": 9.089943340044642e-05, | |
| "loss": 2.5689, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.236478389390429, | |
| "grad_norm": 0.5040017366409302, | |
| "learning_rate": 9.083855663872533e-05, | |
| "loss": 2.5345, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.23711752017256532, | |
| "grad_norm": 0.5398538708686829, | |
| "learning_rate": 9.07774974705753e-05, | |
| "loss": 2.5517, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2377566509547016, | |
| "grad_norm": 0.5123056173324585, | |
| "learning_rate": 9.071625616871862e-05, | |
| "loss": 2.5746, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.2383957817368379, | |
| "grad_norm": 0.4740076959133148, | |
| "learning_rate": 9.06548330066911e-05, | |
| "loss": 2.5449, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.23903491251897419, | |
| "grad_norm": 0.4199361801147461, | |
| "learning_rate": 9.05932282588408e-05, | |
| "loss": 2.5857, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.2396740433011105, | |
| "grad_norm": 0.4691718816757202, | |
| "learning_rate": 9.053144220032688e-05, | |
| "loss": 2.5408, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2403131740832468, | |
| "grad_norm": 0.4801616668701172, | |
| "learning_rate": 9.04694751071183e-05, | |
| "loss": 2.6167, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.24095230486538308, | |
| "grad_norm": 0.5200051069259644, | |
| "learning_rate": 9.040732725599261e-05, | |
| "loss": 2.5032, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.24159143564751936, | |
| "grad_norm": 0.5068468451499939, | |
| "learning_rate": 9.034499892453477e-05, | |
| "loss": 2.5041, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.24223056642965568, | |
| "grad_norm": 0.5166811347007751, | |
| "learning_rate": 9.028249039113577e-05, | |
| "loss": 2.6254, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.24286969721179197, | |
| "grad_norm": 0.5714825987815857, | |
| "learning_rate": 9.021980193499157e-05, | |
| "loss": 2.5375, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.24350882799392826, | |
| "grad_norm": 0.4392567574977875, | |
| "learning_rate": 9.015693383610169e-05, | |
| "loss": 2.5482, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.24414795877606454, | |
| "grad_norm": 0.44030579924583435, | |
| "learning_rate": 9.009388637526808e-05, | |
| "loss": 2.5577, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.24478708955820086, | |
| "grad_norm": 0.49010273814201355, | |
| "learning_rate": 9.00306598340938e-05, | |
| "loss": 2.5707, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.24542622034033715, | |
| "grad_norm": 0.560543417930603, | |
| "learning_rate": 8.996725449498173e-05, | |
| "loss": 2.5574, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.24606535112247344, | |
| "grad_norm": 0.5686501264572144, | |
| "learning_rate": 8.990367064113343e-05, | |
| "loss": 2.5459, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.24670448190460972, | |
| "grad_norm": 0.5197829008102417, | |
| "learning_rate": 8.983990855654774e-05, | |
| "loss": 2.5316, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.247343612686746, | |
| "grad_norm": 0.48393699526786804, | |
| "learning_rate": 8.977596852601961e-05, | |
| "loss": 2.5376, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.24798274346888233, | |
| "grad_norm": 0.4604134261608124, | |
| "learning_rate": 8.971185083513878e-05, | |
| "loss": 2.5373, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.24862187425101862, | |
| "grad_norm": 0.5080364346504211, | |
| "learning_rate": 8.964755577028852e-05, | |
| "loss": 2.516, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.2492610050331549, | |
| "grad_norm": 0.5315148830413818, | |
| "learning_rate": 8.958308361864429e-05, | |
| "loss": 2.5182, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2499001358152912, | |
| "grad_norm": 0.4669964015483856, | |
| "learning_rate": 8.951843466817261e-05, | |
| "loss": 2.506, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.2505392665974275, | |
| "grad_norm": 0.5169178247451782, | |
| "learning_rate": 8.94536092076296e-05, | |
| "loss": 2.5524, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.25117839737956377, | |
| "grad_norm": 0.530693769454956, | |
| "learning_rate": 8.93886075265598e-05, | |
| "loss": 2.5515, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.2518175281617001, | |
| "grad_norm": 0.5086248517036438, | |
| "learning_rate": 8.932342991529484e-05, | |
| "loss": 2.5235, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.2524566589438364, | |
| "grad_norm": 0.5186027884483337, | |
| "learning_rate": 8.925807666495212e-05, | |
| "loss": 2.5616, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2530957897259727, | |
| "grad_norm": 0.5286267995834351, | |
| "learning_rate": 8.919254806743358e-05, | |
| "loss": 2.558, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.253734920508109, | |
| "grad_norm": 0.56434166431427, | |
| "learning_rate": 8.912684441542432e-05, | |
| "loss": 2.5315, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.25437405129024526, | |
| "grad_norm": 0.5112208127975464, | |
| "learning_rate": 8.906096600239135e-05, | |
| "loss": 2.5842, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.25501318207238155, | |
| "grad_norm": 0.5397393703460693, | |
| "learning_rate": 8.899491312258221e-05, | |
| "loss": 2.5405, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.25565231285451784, | |
| "grad_norm": 0.4671647250652313, | |
| "learning_rate": 8.892868607102376e-05, | |
| "loss": 2.4999, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2562914436366541, | |
| "grad_norm": 0.41425585746765137, | |
| "learning_rate": 8.886228514352076e-05, | |
| "loss": 2.5312, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.25693057441879047, | |
| "grad_norm": 0.43078532814979553, | |
| "learning_rate": 8.879571063665462e-05, | |
| "loss": 2.5218, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.25756970520092676, | |
| "grad_norm": 0.432005912065506, | |
| "learning_rate": 8.872896284778201e-05, | |
| "loss": 2.523, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.25820883598306305, | |
| "grad_norm": 0.40941286087036133, | |
| "learning_rate": 8.866204207503359e-05, | |
| "loss": 2.575, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.25884796676519933, | |
| "grad_norm": 0.431316077709198, | |
| "learning_rate": 8.859494861731267e-05, | |
| "loss": 2.5837, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2594870975473356, | |
| "grad_norm": 0.4376726448535919, | |
| "learning_rate": 8.852768277429384e-05, | |
| "loss": 2.5137, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.2601262283294719, | |
| "grad_norm": 0.5029991865158081, | |
| "learning_rate": 8.846024484642166e-05, | |
| "loss": 2.5526, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2607653591116082, | |
| "grad_norm": 0.5601023435592651, | |
| "learning_rate": 8.839263513490931e-05, | |
| "loss": 2.5788, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.2614044898937445, | |
| "grad_norm": 0.5238969922065735, | |
| "learning_rate": 8.832485394173726e-05, | |
| "loss": 2.5589, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.26204362067588083, | |
| "grad_norm": 0.4996497929096222, | |
| "learning_rate": 8.825690156965188e-05, | |
| "loss": 2.57, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2626827514580171, | |
| "grad_norm": 0.47495660185813904, | |
| "learning_rate": 8.818877832216413e-05, | |
| "loss": 2.5341, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.2633218822401534, | |
| "grad_norm": 0.503065288066864, | |
| "learning_rate": 8.812048450354819e-05, | |
| "loss": 2.5416, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.2639610130222897, | |
| "grad_norm": 0.49365946650505066, | |
| "learning_rate": 8.805202041884012e-05, | |
| "loss": 2.516, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.264600143804426, | |
| "grad_norm": 0.4978492558002472, | |
| "learning_rate": 8.798338637383645e-05, | |
| "loss": 2.52, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.26523927458656227, | |
| "grad_norm": 0.4817914664745331, | |
| "learning_rate": 8.791458267509283e-05, | |
| "loss": 2.5118, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.26587840536869856, | |
| "grad_norm": 0.5463185906410217, | |
| "learning_rate": 8.78456096299227e-05, | |
| "loss": 2.5527, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.26651753615083484, | |
| "grad_norm": 0.4575677812099457, | |
| "learning_rate": 8.77764675463959e-05, | |
| "loss": 2.5076, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.26715666693297113, | |
| "grad_norm": 0.47226086258888245, | |
| "learning_rate": 8.770715673333722e-05, | |
| "loss": 2.5357, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.2677957977151075, | |
| "grad_norm": 0.44436129927635193, | |
| "learning_rate": 8.763767750032518e-05, | |
| "loss": 2.5354, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.26843492849724376, | |
| "grad_norm": 0.48564496636390686, | |
| "learning_rate": 8.756803015769049e-05, | |
| "loss": 2.5479, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.26907405927938005, | |
| "grad_norm": 0.47404852509498596, | |
| "learning_rate": 8.749821501651472e-05, | |
| "loss": 2.5175, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.26971319006151634, | |
| "grad_norm": 0.4444579482078552, | |
| "learning_rate": 8.742823238862895e-05, | |
| "loss": 2.5066, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.2703523208436526, | |
| "grad_norm": 0.48305433988571167, | |
| "learning_rate": 8.735808258661233e-05, | |
| "loss": 2.5314, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2709914516257889, | |
| "grad_norm": 0.5266690254211426, | |
| "learning_rate": 8.728776592379068e-05, | |
| "loss": 2.5734, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.2716305824079252, | |
| "grad_norm": 0.4550389051437378, | |
| "learning_rate": 8.721728271423512e-05, | |
| "loss": 2.556, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2722697131900615, | |
| "grad_norm": 0.47347402572631836, | |
| "learning_rate": 8.71466332727607e-05, | |
| "loss": 2.5649, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.27290884397219783, | |
| "grad_norm": 0.5236901640892029, | |
| "learning_rate": 8.707581791492485e-05, | |
| "loss": 2.564, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.2735479747543341, | |
| "grad_norm": 0.5352922677993774, | |
| "learning_rate": 8.700483695702617e-05, | |
| "loss": 2.4933, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.2741871055364704, | |
| "grad_norm": 0.5335637331008911, | |
| "learning_rate": 8.693369071610287e-05, | |
| "loss": 2.4958, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.2748262363186067, | |
| "grad_norm": 0.4980125427246094, | |
| "learning_rate": 8.686237950993137e-05, | |
| "loss": 2.5519, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.275465367100743, | |
| "grad_norm": 0.4874439239501953, | |
| "learning_rate": 8.679090365702498e-05, | |
| "loss": 2.5326, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.2761044978828793, | |
| "grad_norm": 0.5071477293968201, | |
| "learning_rate": 8.671926347663238e-05, | |
| "loss": 2.5092, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.27674362866501556, | |
| "grad_norm": 0.4566083252429962, | |
| "learning_rate": 8.664745928873619e-05, | |
| "loss": 2.5108, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.27738275944715185, | |
| "grad_norm": 0.4723774492740631, | |
| "learning_rate": 8.657549141405161e-05, | |
| "loss": 2.4921, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.2780218902292882, | |
| "grad_norm": 0.44547411799430847, | |
| "learning_rate": 8.650336017402494e-05, | |
| "loss": 2.5481, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2786610210114245, | |
| "grad_norm": 0.4709297716617584, | |
| "learning_rate": 8.643106589083216e-05, | |
| "loss": 2.501, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.27930015179356077, | |
| "grad_norm": 0.4446027874946594, | |
| "learning_rate": 8.63586088873775e-05, | |
| "loss": 2.5133, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.27993928257569706, | |
| "grad_norm": 0.4283333420753479, | |
| "learning_rate": 8.628598948729197e-05, | |
| "loss": 2.5338, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.28057841335783335, | |
| "grad_norm": 0.4817812442779541, | |
| "learning_rate": 8.621320801493188e-05, | |
| "loss": 2.5519, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.28121754413996963, | |
| "grad_norm": 0.49330389499664307, | |
| "learning_rate": 8.614026479537753e-05, | |
| "loss": 2.5047, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2818566749221059, | |
| "grad_norm": 0.43356356024742126, | |
| "learning_rate": 8.606716015443161e-05, | |
| "loss": 2.4994, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.2824958057042422, | |
| "grad_norm": 0.4848228991031647, | |
| "learning_rate": 8.599389441861782e-05, | |
| "loss": 2.5186, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.28313493648637855, | |
| "grad_norm": 0.5413882732391357, | |
| "learning_rate": 8.59204679151794e-05, | |
| "loss": 2.5508, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.28377406726851484, | |
| "grad_norm": 0.46791547536849976, | |
| "learning_rate": 8.584688097207764e-05, | |
| "loss": 2.5728, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.28441319805065113, | |
| "grad_norm": 0.4600776731967926, | |
| "learning_rate": 8.577313391799046e-05, | |
| "loss": 2.5341, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2850523288327874, | |
| "grad_norm": 0.3964655101299286, | |
| "learning_rate": 8.569922708231089e-05, | |
| "loss": 2.553, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.2856914596149237, | |
| "grad_norm": 0.41062116622924805, | |
| "learning_rate": 8.562516079514569e-05, | |
| "loss": 2.5726, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.28633059039706, | |
| "grad_norm": 0.43652409315109253, | |
| "learning_rate": 8.555093538731374e-05, | |
| "loss": 2.5313, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.2869697211791963, | |
| "grad_norm": 0.4312250316143036, | |
| "learning_rate": 8.547655119034467e-05, | |
| "loss": 2.4911, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.28760885196133257, | |
| "grad_norm": 0.4835914373397827, | |
| "learning_rate": 8.540200853647737e-05, | |
| "loss": 2.5262, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.28824798274346886, | |
| "grad_norm": 0.530998170375824, | |
| "learning_rate": 8.532730775865845e-05, | |
| "loss": 2.5246, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.2888871135256052, | |
| "grad_norm": 0.474884957075119, | |
| "learning_rate": 8.525244919054078e-05, | |
| "loss": 2.5172, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.2895262443077415, | |
| "grad_norm": 0.4668194353580475, | |
| "learning_rate": 8.517743316648201e-05, | |
| "loss": 2.5274, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.2901653750898778, | |
| "grad_norm": 0.4744424819946289, | |
| "learning_rate": 8.510226002154311e-05, | |
| "loss": 2.5028, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.29080450587201406, | |
| "grad_norm": 0.44373229146003723, | |
| "learning_rate": 8.502693009148676e-05, | |
| "loss": 2.5008, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.29144363665415035, | |
| "grad_norm": 0.47032368183135986, | |
| "learning_rate": 8.495144371277601e-05, | |
| "loss": 2.5403, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.29208276743628664, | |
| "grad_norm": 0.45909997820854187, | |
| "learning_rate": 8.487580122257261e-05, | |
| "loss": 2.5323, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.2927218982184229, | |
| "grad_norm": 0.446980744600296, | |
| "learning_rate": 8.480000295873565e-05, | |
| "loss": 2.4766, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.2933610290005592, | |
| "grad_norm": 0.46346354484558105, | |
| "learning_rate": 8.472404925981993e-05, | |
| "loss": 2.5091, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.29400015978269556, | |
| "grad_norm": 0.4928946793079376, | |
| "learning_rate": 8.464794046507458e-05, | |
| "loss": 2.5338, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.29463929056483185, | |
| "grad_norm": 0.45256319642066956, | |
| "learning_rate": 8.457167691444138e-05, | |
| "loss": 2.4838, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.29527842134696813, | |
| "grad_norm": 0.4809649884700775, | |
| "learning_rate": 8.449525894855342e-05, | |
| "loss": 2.5588, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.2959175521291044, | |
| "grad_norm": 0.49705770611763, | |
| "learning_rate": 8.44186869087334e-05, | |
| "loss": 2.5108, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.2965566829112407, | |
| "grad_norm": 0.47247809171676636, | |
| "learning_rate": 8.434196113699226e-05, | |
| "loss": 2.4965, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.297195813693377, | |
| "grad_norm": 0.4562348425388336, | |
| "learning_rate": 8.426508197602758e-05, | |
| "loss": 2.5036, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2978349444755133, | |
| "grad_norm": 0.4428156018257141, | |
| "learning_rate": 8.418804976922203e-05, | |
| "loss": 2.4793, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.2984740752576496, | |
| "grad_norm": 0.44122713804244995, | |
| "learning_rate": 8.411086486064188e-05, | |
| "loss": 2.4917, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.2991132060397859, | |
| "grad_norm": 0.48494070768356323, | |
| "learning_rate": 8.403352759503545e-05, | |
| "loss": 2.5599, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.2997523368219222, | |
| "grad_norm": 0.49274128675460815, | |
| "learning_rate": 8.395603831783154e-05, | |
| "loss": 2.5376, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.3003914676040585, | |
| "grad_norm": 0.44119563698768616, | |
| "learning_rate": 8.387839737513796e-05, | |
| "loss": 2.5344, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3010305983861948, | |
| "grad_norm": 0.5195220112800598, | |
| "learning_rate": 8.380060511373991e-05, | |
| "loss": 2.5612, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.30166972916833107, | |
| "grad_norm": 0.5167264342308044, | |
| "learning_rate": 8.372266188109844e-05, | |
| "loss": 2.4893, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.30230885995046736, | |
| "grad_norm": 0.496879905462265, | |
| "learning_rate": 8.364456802534894e-05, | |
| "loss": 2.5061, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.30294799073260364, | |
| "grad_norm": 0.47322845458984375, | |
| "learning_rate": 8.35663238952996e-05, | |
| "loss": 2.5042, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.30358712151473993, | |
| "grad_norm": 0.46881020069122314, | |
| "learning_rate": 8.34879298404297e-05, | |
| "loss": 2.5244, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3042262522968762, | |
| "grad_norm": 0.4514290690422058, | |
| "learning_rate": 8.340938621088829e-05, | |
| "loss": 2.5461, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.30486538307901256, | |
| "grad_norm": 0.47025352716445923, | |
| "learning_rate": 8.333069335749244e-05, | |
| "loss": 2.5083, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.30550451386114885, | |
| "grad_norm": 0.46300917863845825, | |
| "learning_rate": 8.325185163172572e-05, | |
| "loss": 2.5076, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.30614364464328514, | |
| "grad_norm": 0.43201884627342224, | |
| "learning_rate": 8.317286138573667e-05, | |
| "loss": 2.5414, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.30678277542542143, | |
| "grad_norm": 0.4062347114086151, | |
| "learning_rate": 8.309372297233717e-05, | |
| "loss": 2.548, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3074219062075577, | |
| "grad_norm": 0.4473116397857666, | |
| "learning_rate": 8.301443674500095e-05, | |
| "loss": 2.5152, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.308061036989694, | |
| "grad_norm": 0.46256545186042786, | |
| "learning_rate": 8.293500305786188e-05, | |
| "loss": 2.4773, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.3087001677718303, | |
| "grad_norm": 0.4799022972583771, | |
| "learning_rate": 8.285542226571253e-05, | |
| "loss": 2.4744, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.3093392985539666, | |
| "grad_norm": 0.43469420075416565, | |
| "learning_rate": 8.277569472400245e-05, | |
| "loss": 2.5165, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.3099784293361029, | |
| "grad_norm": 0.517128050327301, | |
| "learning_rate": 8.269582078883673e-05, | |
| "loss": 2.4729, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3106175601182392, | |
| "grad_norm": 0.41483157873153687, | |
| "learning_rate": 8.261580081697424e-05, | |
| "loss": 2.5096, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.3112566909003755, | |
| "grad_norm": 0.4197140038013458, | |
| "learning_rate": 8.253563516582621e-05, | |
| "loss": 2.5087, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.3118958216825118, | |
| "grad_norm": 0.4126445949077606, | |
| "learning_rate": 8.24553241934545e-05, | |
| "loss": 2.5014, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.3125349524646481, | |
| "grad_norm": 0.4197913706302643, | |
| "learning_rate": 8.237486825857008e-05, | |
| "loss": 2.5389, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.31317408324678436, | |
| "grad_norm": 0.4119601547718048, | |
| "learning_rate": 8.229426772053135e-05, | |
| "loss": 2.5601, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.31381321402892065, | |
| "grad_norm": 0.39146915078163147, | |
| "learning_rate": 8.221352293934267e-05, | |
| "loss": 2.5187, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.31445234481105694, | |
| "grad_norm": 0.45680907368659973, | |
| "learning_rate": 8.213263427565258e-05, | |
| "loss": 2.5111, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.3150914755931933, | |
| "grad_norm": 0.4924193322658539, | |
| "learning_rate": 8.205160209075231e-05, | |
| "loss": 2.5528, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.31573060637532957, | |
| "grad_norm": 0.4643436074256897, | |
| "learning_rate": 8.197042674657413e-05, | |
| "loss": 2.5032, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.31636973715746586, | |
| "grad_norm": 0.5223676562309265, | |
| "learning_rate": 8.188910860568975e-05, | |
| "loss": 2.4557, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.31700886793960215, | |
| "grad_norm": 0.4598020017147064, | |
| "learning_rate": 8.180764803130865e-05, | |
| "loss": 2.5071, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.31764799872173843, | |
| "grad_norm": 0.5029572248458862, | |
| "learning_rate": 8.172604538727652e-05, | |
| "loss": 2.5072, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.3182871295038747, | |
| "grad_norm": 0.4447329342365265, | |
| "learning_rate": 8.164430103807359e-05, | |
| "loss": 2.5518, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.318926260286011, | |
| "grad_norm": 0.5340293645858765, | |
| "learning_rate": 8.156241534881303e-05, | |
| "loss": 2.4823, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.3195653910681473, | |
| "grad_norm": 0.4599528908729553, | |
| "learning_rate": 8.148038868523933e-05, | |
| "loss": 2.5153, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32020452185028364, | |
| "grad_norm": 0.4791001081466675, | |
| "learning_rate": 8.13982214137266e-05, | |
| "loss": 2.5175, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.32084365263241993, | |
| "grad_norm": 0.4910374581813812, | |
| "learning_rate": 8.131591390127699e-05, | |
| "loss": 2.4864, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.3214827834145562, | |
| "grad_norm": 0.5045061707496643, | |
| "learning_rate": 8.12334665155191e-05, | |
| "loss": 2.537, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.3221219141966925, | |
| "grad_norm": 0.4830935597419739, | |
| "learning_rate": 8.115087962470618e-05, | |
| "loss": 2.4838, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.3227610449788288, | |
| "grad_norm": 0.4147256910800934, | |
| "learning_rate": 8.106815359771464e-05, | |
| "loss": 2.4774, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3234001757609651, | |
| "grad_norm": 0.46321430802345276, | |
| "learning_rate": 8.098528880404237e-05, | |
| "loss": 2.512, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.32403930654310137, | |
| "grad_norm": 0.41950172185897827, | |
| "learning_rate": 8.0902285613807e-05, | |
| "loss": 2.5279, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.32467843732523766, | |
| "grad_norm": 0.40325912833213806, | |
| "learning_rate": 8.081914439774434e-05, | |
| "loss": 2.4863, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.32531756810737394, | |
| "grad_norm": 0.3965325951576233, | |
| "learning_rate": 8.073586552720673e-05, | |
| "loss": 2.5135, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.3259566988895103, | |
| "grad_norm": 0.39921608567237854, | |
| "learning_rate": 8.06524493741613e-05, | |
| "loss": 2.5036, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3265958296716466, | |
| "grad_norm": 0.395905077457428, | |
| "learning_rate": 8.056889631118836e-05, | |
| "loss": 2.5166, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.32723496045378286, | |
| "grad_norm": 0.4924047589302063, | |
| "learning_rate": 8.048520671147976e-05, | |
| "loss": 2.5049, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.32787409123591915, | |
| "grad_norm": 0.4676661491394043, | |
| "learning_rate": 8.040138094883716e-05, | |
| "loss": 2.4813, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.32851322201805544, | |
| "grad_norm": 0.4176212251186371, | |
| "learning_rate": 8.031741939767045e-05, | |
| "loss": 2.4687, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.3291523528001917, | |
| "grad_norm": 0.4121030271053314, | |
| "learning_rate": 8.023332243299595e-05, | |
| "loss": 2.5057, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.329791483582328, | |
| "grad_norm": 0.4807201325893402, | |
| "learning_rate": 8.014909043043486e-05, | |
| "loss": 2.4862, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.3304306143644643, | |
| "grad_norm": 0.46126338839530945, | |
| "learning_rate": 8.006472376621151e-05, | |
| "loss": 2.5282, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.33106974514660065, | |
| "grad_norm": 0.40715089440345764, | |
| "learning_rate": 7.998022281715172e-05, | |
| "loss": 2.51, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.33170887592873693, | |
| "grad_norm": 0.41599327325820923, | |
| "learning_rate": 7.989558796068106e-05, | |
| "loss": 2.5023, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.3323480067108732, | |
| "grad_norm": 0.4323025345802307, | |
| "learning_rate": 7.981081957482322e-05, | |
| "loss": 2.5509, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3329871374930095, | |
| "grad_norm": 0.42841923236846924, | |
| "learning_rate": 7.972591803819832e-05, | |
| "loss": 2.5325, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.3336262682751458, | |
| "grad_norm": 0.4102122187614441, | |
| "learning_rate": 7.964088373002117e-05, | |
| "loss": 2.5101, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.3342653990572821, | |
| "grad_norm": 0.4323672950267792, | |
| "learning_rate": 7.955571703009964e-05, | |
| "loss": 2.4776, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.3349045298394184, | |
| "grad_norm": 0.5570160746574402, | |
| "learning_rate": 7.947041831883288e-05, | |
| "loss": 2.5129, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.33554366062155466, | |
| "grad_norm": 0.45799124240875244, | |
| "learning_rate": 7.938498797720975e-05, | |
| "loss": 2.4763, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.336182791403691, | |
| "grad_norm": 0.43201950192451477, | |
| "learning_rate": 7.929942638680698e-05, | |
| "loss": 2.4901, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.3368219221858273, | |
| "grad_norm": 0.38428887724876404, | |
| "learning_rate": 7.921373392978752e-05, | |
| "loss": 2.4987, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.3374610529679636, | |
| "grad_norm": 0.42398396134376526, | |
| "learning_rate": 7.912791098889888e-05, | |
| "loss": 2.5163, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.33810018375009987, | |
| "grad_norm": 0.4429122507572174, | |
| "learning_rate": 7.904195794747135e-05, | |
| "loss": 2.4899, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.33873931453223616, | |
| "grad_norm": 0.42127612233161926, | |
| "learning_rate": 7.895587518941635e-05, | |
| "loss": 2.5439, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.33937844531437245, | |
| "grad_norm": 0.3591577410697937, | |
| "learning_rate": 7.886966309922464e-05, | |
| "loss": 2.5237, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.34001757609650873, | |
| "grad_norm": 0.4371255338191986, | |
| "learning_rate": 7.878332206196464e-05, | |
| "loss": 2.4695, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.340656706878645, | |
| "grad_norm": 0.49069616198539734, | |
| "learning_rate": 7.86968524632808e-05, | |
| "loss": 2.5077, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.34129583766078136, | |
| "grad_norm": 0.4505859911441803, | |
| "learning_rate": 7.86102546893917e-05, | |
| "loss": 2.5412, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.34193496844291765, | |
| "grad_norm": 0.45395562052726746, | |
| "learning_rate": 7.852352912708844e-05, | |
| "loss": 2.4974, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.34257409922505394, | |
| "grad_norm": 0.4283112585544586, | |
| "learning_rate": 7.843667616373292e-05, | |
| "loss": 2.5036, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.34321323000719023, | |
| "grad_norm": 0.40652894973754883, | |
| "learning_rate": 7.834969618725606e-05, | |
| "loss": 2.4569, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.3438523607893265, | |
| "grad_norm": 0.42897436022758484, | |
| "learning_rate": 7.826258958615606e-05, | |
| "loss": 2.4978, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.3444914915714628, | |
| "grad_norm": 0.44334399700164795, | |
| "learning_rate": 7.817535674949677e-05, | |
| "loss": 2.4857, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.3451306223535991, | |
| "grad_norm": 0.4655436873435974, | |
| "learning_rate": 7.808799806690579e-05, | |
| "loss": 2.5203, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3457697531357354, | |
| "grad_norm": 0.4742682874202728, | |
| "learning_rate": 7.800051392857283e-05, | |
| "loss": 2.4605, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.34640888391787167, | |
| "grad_norm": 0.4513695240020752, | |
| "learning_rate": 7.791290472524804e-05, | |
| "loss": 2.5016, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.347048014700008, | |
| "grad_norm": 0.42871472239494324, | |
| "learning_rate": 7.782517084824004e-05, | |
| "loss": 2.5168, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.3476871454821443, | |
| "grad_norm": 0.41885146498680115, | |
| "learning_rate": 7.77373126894144e-05, | |
| "loss": 2.5103, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.3483262762642806, | |
| "grad_norm": 0.39808389544487, | |
| "learning_rate": 7.764933064119175e-05, | |
| "loss": 2.5302, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3489654070464169, | |
| "grad_norm": 0.3670606017112732, | |
| "learning_rate": 7.756122509654614e-05, | |
| "loss": 2.4764, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.34960453782855316, | |
| "grad_norm": 0.4411506652832031, | |
| "learning_rate": 7.747299644900314e-05, | |
| "loss": 2.4844, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.35024366861068945, | |
| "grad_norm": 0.4119238555431366, | |
| "learning_rate": 7.738464509263819e-05, | |
| "loss": 2.5056, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.35088279939282574, | |
| "grad_norm": 0.40610644221305847, | |
| "learning_rate": 7.729617142207486e-05, | |
| "loss": 2.4613, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.351521930174962, | |
| "grad_norm": 0.4533090591430664, | |
| "learning_rate": 7.720757583248297e-05, | |
| "loss": 2.4903, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.35216106095709837, | |
| "grad_norm": 0.49713581800460815, | |
| "learning_rate": 7.711885871957693e-05, | |
| "loss": 2.4887, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.35280019173923466, | |
| "grad_norm": 0.5112652778625488, | |
| "learning_rate": 7.703002047961396e-05, | |
| "loss": 2.5263, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.35343932252137095, | |
| "grad_norm": 0.42709290981292725, | |
| "learning_rate": 7.694106150939222e-05, | |
| "loss": 2.5582, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.35407845330350723, | |
| "grad_norm": 0.46314576268196106, | |
| "learning_rate": 7.685198220624918e-05, | |
| "loss": 2.4925, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.3547175840856435, | |
| "grad_norm": 0.4089437425136566, | |
| "learning_rate": 7.67627829680598e-05, | |
| "loss": 2.4898, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3553567148677798, | |
| "grad_norm": 0.4496088922023773, | |
| "learning_rate": 7.667346419323463e-05, | |
| "loss": 2.4895, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.3559958456499161, | |
| "grad_norm": 0.4586305618286133, | |
| "learning_rate": 7.658402628071825e-05, | |
| "loss": 2.4368, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.3566349764320524, | |
| "grad_norm": 0.4276970326900482, | |
| "learning_rate": 7.649446962998731e-05, | |
| "loss": 2.4696, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.35727410721418873, | |
| "grad_norm": 0.44702062010765076, | |
| "learning_rate": 7.640479464104881e-05, | |
| "loss": 2.4481, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.357913237996325, | |
| "grad_norm": 0.4042547941207886, | |
| "learning_rate": 7.631500171443833e-05, | |
| "loss": 2.5184, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3585523687784613, | |
| "grad_norm": 0.44804704189300537, | |
| "learning_rate": 7.62250912512182e-05, | |
| "loss": 2.4773, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.3591914995605976, | |
| "grad_norm": 0.43027397990226746, | |
| "learning_rate": 7.613506365297573e-05, | |
| "loss": 2.4966, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.3598306303427339, | |
| "grad_norm": 0.4268493056297302, | |
| "learning_rate": 7.604491932182143e-05, | |
| "loss": 2.5108, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.36046976112487017, | |
| "grad_norm": 0.4821198582649231, | |
| "learning_rate": 7.595465866038723e-05, | |
| "loss": 2.5315, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.36110889190700646, | |
| "grad_norm": 0.4282810389995575, | |
| "learning_rate": 7.586428207182457e-05, | |
| "loss": 2.4815, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.36174802268914275, | |
| "grad_norm": 0.4613235294818878, | |
| "learning_rate": 7.577378995980278e-05, | |
| "loss": 2.485, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.36238715347127903, | |
| "grad_norm": 0.42899811267852783, | |
| "learning_rate": 7.568318272850709e-05, | |
| "loss": 2.4477, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.3630262842534154, | |
| "grad_norm": 0.3988739550113678, | |
| "learning_rate": 7.559246078263697e-05, | |
| "loss": 2.4627, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.36366541503555166, | |
| "grad_norm": 0.40458565950393677, | |
| "learning_rate": 7.550162452740425e-05, | |
| "loss": 2.4884, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.36430454581768795, | |
| "grad_norm": 0.3784649968147278, | |
| "learning_rate": 7.541067436853131e-05, | |
| "loss": 2.5219, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.36494367659982424, | |
| "grad_norm": 0.4682624042034149, | |
| "learning_rate": 7.531961071224929e-05, | |
| "loss": 2.4598, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.36558280738196053, | |
| "grad_norm": 0.4466330409049988, | |
| "learning_rate": 7.522843396529631e-05, | |
| "loss": 2.4495, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.3662219381640968, | |
| "grad_norm": 0.42352086305618286, | |
| "learning_rate": 7.513714453491557e-05, | |
| "loss": 2.4345, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.3668610689462331, | |
| "grad_norm": 0.42015886306762695, | |
| "learning_rate": 7.504574282885353e-05, | |
| "loss": 2.4698, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.3675001997283694, | |
| "grad_norm": 0.4295947551727295, | |
| "learning_rate": 7.495422925535824e-05, | |
| "loss": 2.5224, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.36813933051050574, | |
| "grad_norm": 0.44045698642730713, | |
| "learning_rate": 7.486260422317734e-05, | |
| "loss": 2.5316, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.368778461292642, | |
| "grad_norm": 0.41533538699150085, | |
| "learning_rate": 7.47708681415563e-05, | |
| "loss": 2.4966, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.3694175920747783, | |
| "grad_norm": 0.407446950674057, | |
| "learning_rate": 7.467902142023663e-05, | |
| "loss": 2.4337, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.3700567228569146, | |
| "grad_norm": 0.42818406224250793, | |
| "learning_rate": 7.458706446945398e-05, | |
| "loss": 2.4749, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.3706958536390509, | |
| "grad_norm": 0.46655282378196716, | |
| "learning_rate": 7.449499769993636e-05, | |
| "loss": 2.5227, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3713349844211872, | |
| "grad_norm": 0.4494290351867676, | |
| "learning_rate": 7.440282152290229e-05, | |
| "loss": 2.495, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.37197411520332346, | |
| "grad_norm": 0.4101320803165436, | |
| "learning_rate": 7.431053635005896e-05, | |
| "loss": 2.4777, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.37261324598545975, | |
| "grad_norm": 0.46381497383117676, | |
| "learning_rate": 7.421814259360038e-05, | |
| "loss": 2.468, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.3732523767675961, | |
| "grad_norm": 0.3879064917564392, | |
| "learning_rate": 7.412564066620557e-05, | |
| "loss": 2.4837, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.3738915075497324, | |
| "grad_norm": 0.4411042034626007, | |
| "learning_rate": 7.403303098103668e-05, | |
| "loss": 2.475, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.37453063833186867, | |
| "grad_norm": 0.42639175057411194, | |
| "learning_rate": 7.394031395173718e-05, | |
| "loss": 2.4611, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.37516976911400496, | |
| "grad_norm": 0.415591835975647, | |
| "learning_rate": 7.384748999242999e-05, | |
| "loss": 2.4453, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.37580889989614125, | |
| "grad_norm": 0.4134119749069214, | |
| "learning_rate": 7.375455951771562e-05, | |
| "loss": 2.4525, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.37644803067827753, | |
| "grad_norm": 0.4086207151412964, | |
| "learning_rate": 7.366152294267035e-05, | |
| "loss": 2.4848, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.3770871614604138, | |
| "grad_norm": 0.4082541763782501, | |
| "learning_rate": 7.356838068284439e-05, | |
| "loss": 2.5182, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3777262922425501, | |
| "grad_norm": 0.4362512528896332, | |
| "learning_rate": 7.347513315425991e-05, | |
| "loss": 2.4888, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.37836542302468645, | |
| "grad_norm": 0.4644331932067871, | |
| "learning_rate": 7.338178077340934e-05, | |
| "loss": 2.4544, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.37900455380682274, | |
| "grad_norm": 0.3940586447715759, | |
| "learning_rate": 7.328832395725342e-05, | |
| "loss": 2.4839, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.37964368458895903, | |
| "grad_norm": 0.4653317332267761, | |
| "learning_rate": 7.319476312321931e-05, | |
| "loss": 2.4955, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.3802828153710953, | |
| "grad_norm": 0.367896169424057, | |
| "learning_rate": 7.310109868919884e-05, | |
| "loss": 2.4213, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3809219461532316, | |
| "grad_norm": 0.39538541436195374, | |
| "learning_rate": 7.30073310735465e-05, | |
| "loss": 2.484, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.3815610769353679, | |
| "grad_norm": 0.3957819938659668, | |
| "learning_rate": 7.291346069507772e-05, | |
| "loss": 2.4535, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.3822002077175042, | |
| "grad_norm": 0.41359907388687134, | |
| "learning_rate": 7.281948797306683e-05, | |
| "loss": 2.4266, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.38283933849964047, | |
| "grad_norm": 0.35412532091140747, | |
| "learning_rate": 7.272541332724536e-05, | |
| "loss": 2.46, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.38347846928177676, | |
| "grad_norm": 0.5143829584121704, | |
| "learning_rate": 7.263123717780007e-05, | |
| "loss": 2.4567, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3841176000639131, | |
| "grad_norm": 0.45182162523269653, | |
| "learning_rate": 7.253695994537105e-05, | |
| "loss": 2.5002, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.3847567308460494, | |
| "grad_norm": 0.4548209607601166, | |
| "learning_rate": 7.24425820510499e-05, | |
| "loss": 2.4983, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.3853958616281857, | |
| "grad_norm": 0.549582302570343, | |
| "learning_rate": 7.234810391637783e-05, | |
| "loss": 2.446, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.38603499241032196, | |
| "grad_norm": 0.37702831625938416, | |
| "learning_rate": 7.225352596334382e-05, | |
| "loss": 2.4657, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.38667412319245825, | |
| "grad_norm": 0.5386621952056885, | |
| "learning_rate": 7.215884861438259e-05, | |
| "loss": 2.4136, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.38731325397459454, | |
| "grad_norm": 0.5198429226875305, | |
| "learning_rate": 7.206407229237291e-05, | |
| "loss": 2.4856, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.38795238475673083, | |
| "grad_norm": 0.43216878175735474, | |
| "learning_rate": 7.196919742063559e-05, | |
| "loss": 2.466, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.3885915155388671, | |
| "grad_norm": 0.40321946144104004, | |
| "learning_rate": 7.187422442293156e-05, | |
| "loss": 2.4628, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.38923064632100346, | |
| "grad_norm": 0.4268614649772644, | |
| "learning_rate": 7.177915372346011e-05, | |
| "loss": 2.5204, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.38986977710313975, | |
| "grad_norm": 0.40085369348526, | |
| "learning_rate": 7.168398574685689e-05, | |
| "loss": 2.4745, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.39050890788527604, | |
| "grad_norm": 0.3939695656299591, | |
| "learning_rate": 7.1588720918192e-05, | |
| "loss": 2.4835, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.3911480386674123, | |
| "grad_norm": 0.43925532698631287, | |
| "learning_rate": 7.149335966296819e-05, | |
| "loss": 2.467, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.3917871694495486, | |
| "grad_norm": 0.42357686161994934, | |
| "learning_rate": 7.139790240711889e-05, | |
| "loss": 2.4551, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.3924263002316849, | |
| "grad_norm": 0.37123793363571167, | |
| "learning_rate": 7.130234957700627e-05, | |
| "loss": 2.4389, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.3930654310138212, | |
| "grad_norm": 0.4305935204029083, | |
| "learning_rate": 7.120670159941948e-05, | |
| "loss": 2.4278, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3937045617959575, | |
| "grad_norm": 0.4242519736289978, | |
| "learning_rate": 7.111095890157253e-05, | |
| "loss": 2.4719, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.3943436925780938, | |
| "grad_norm": 0.4311637282371521, | |
| "learning_rate": 7.101512191110259e-05, | |
| "loss": 2.5044, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.3949828233602301, | |
| "grad_norm": 0.39085444808006287, | |
| "learning_rate": 7.091919105606796e-05, | |
| "loss": 2.5206, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.3956219541423664, | |
| "grad_norm": 0.4122478663921356, | |
| "learning_rate": 7.08231667649462e-05, | |
| "loss": 2.4804, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.3962610849245027, | |
| "grad_norm": 0.3852584958076477, | |
| "learning_rate": 7.072704946663215e-05, | |
| "loss": 2.4829, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.39690021570663897, | |
| "grad_norm": 0.46755605936050415, | |
| "learning_rate": 7.063083959043618e-05, | |
| "loss": 2.4708, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.39753934648877526, | |
| "grad_norm": 0.4292025566101074, | |
| "learning_rate": 7.053453756608206e-05, | |
| "loss": 2.4955, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.39817847727091155, | |
| "grad_norm": 0.3895914852619171, | |
| "learning_rate": 7.043814382370516e-05, | |
| "loss": 2.4733, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.39881760805304783, | |
| "grad_norm": 0.3590813875198364, | |
| "learning_rate": 7.034165879385055e-05, | |
| "loss": 2.4225, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.3994567388351841, | |
| "grad_norm": 0.39154496788978577, | |
| "learning_rate": 7.024508290747101e-05, | |
| "loss": 2.4568, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.40009586961732047, | |
| "grad_norm": 0.37666070461273193, | |
| "learning_rate": 7.014841659592516e-05, | |
| "loss": 2.454, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.40073500039945675, | |
| "grad_norm": 0.4050076901912689, | |
| "learning_rate": 7.005166029097546e-05, | |
| "loss": 2.4781, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.40137413118159304, | |
| "grad_norm": 0.3955006003379822, | |
| "learning_rate": 6.995481442478633e-05, | |
| "loss": 2.4857, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.40201326196372933, | |
| "grad_norm": 0.36904042959213257, | |
| "learning_rate": 6.98578794299223e-05, | |
| "loss": 2.4419, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.4026523927458656, | |
| "grad_norm": 0.37739500403404236, | |
| "learning_rate": 6.976085573934589e-05, | |
| "loss": 2.4769, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4032915235280019, | |
| "grad_norm": 0.41142594814300537, | |
| "learning_rate": 6.966374378641583e-05, | |
| "loss": 2.4935, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.4039306543101382, | |
| "grad_norm": 0.6310061812400818, | |
| "learning_rate": 6.956654400488509e-05, | |
| "loss": 2.4852, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.4045697850922745, | |
| "grad_norm": 0.4720698595046997, | |
| "learning_rate": 6.946925682889891e-05, | |
| "loss": 2.459, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.4052089158744108, | |
| "grad_norm": 0.4557422399520874, | |
| "learning_rate": 6.937188269299287e-05, | |
| "loss": 2.5031, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.4058480466565471, | |
| "grad_norm": 0.41922447085380554, | |
| "learning_rate": 6.927442203209098e-05, | |
| "loss": 2.4391, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.4064871774386834, | |
| "grad_norm": 0.4178277254104614, | |
| "learning_rate": 6.917687528150369e-05, | |
| "loss": 2.4331, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.4071263082208197, | |
| "grad_norm": 0.4180024266242981, | |
| "learning_rate": 6.907924287692603e-05, | |
| "loss": 2.5423, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.407765439002956, | |
| "grad_norm": 0.3937162756919861, | |
| "learning_rate": 6.898152525443551e-05, | |
| "loss": 2.4852, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.40840456978509226, | |
| "grad_norm": 0.4633253812789917, | |
| "learning_rate": 6.888372285049035e-05, | |
| "loss": 2.455, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.40904370056722855, | |
| "grad_norm": 0.4344712495803833, | |
| "learning_rate": 6.878583610192741e-05, | |
| "loss": 2.4842, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.40968283134936484, | |
| "grad_norm": 0.39327695965766907, | |
| "learning_rate": 6.868786544596029e-05, | |
| "loss": 2.4378, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.4103219621315012, | |
| "grad_norm": 0.37845897674560547, | |
| "learning_rate": 6.858981132017736e-05, | |
| "loss": 2.4548, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.41096109291363747, | |
| "grad_norm": 0.44567787647247314, | |
| "learning_rate": 6.84916741625398e-05, | |
| "loss": 2.5019, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.41160022369577376, | |
| "grad_norm": 0.42388099431991577, | |
| "learning_rate": 6.839345441137972e-05, | |
| "loss": 2.4515, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.41223935447791005, | |
| "grad_norm": 0.4536793828010559, | |
| "learning_rate": 6.829515250539801e-05, | |
| "loss": 2.463, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.41287848526004634, | |
| "grad_norm": 0.5546822547912598, | |
| "learning_rate": 6.819676888366261e-05, | |
| "loss": 2.4673, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.4135176160421826, | |
| "grad_norm": 0.3987797200679779, | |
| "learning_rate": 6.809830398560643e-05, | |
| "loss": 2.4705, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.4141567468243189, | |
| "grad_norm": 0.4608692228794098, | |
| "learning_rate": 6.799975825102535e-05, | |
| "loss": 2.4716, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.4147958776064552, | |
| "grad_norm": 0.46457552909851074, | |
| "learning_rate": 6.790113212007637e-05, | |
| "loss": 2.452, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.41543500838859154, | |
| "grad_norm": 0.4172557592391968, | |
| "learning_rate": 6.780242603327554e-05, | |
| "loss": 2.4851, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.41607413917072783, | |
| "grad_norm": 0.38696351647377014, | |
| "learning_rate": 6.770364043149607e-05, | |
| "loss": 2.4905, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.4167132699528641, | |
| "grad_norm": 0.37832561135292053, | |
| "learning_rate": 6.76047757559663e-05, | |
| "loss": 2.4624, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.4173524007350004, | |
| "grad_norm": 0.38637575507164, | |
| "learning_rate": 6.750583244826777e-05, | |
| "loss": 2.4829, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.4179915315171367, | |
| "grad_norm": 0.42297881841659546, | |
| "learning_rate": 6.74068109503332e-05, | |
| "loss": 2.4662, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.418630662299273, | |
| "grad_norm": 0.3472845256328583, | |
| "learning_rate": 6.73077117044446e-05, | |
| "loss": 2.4513, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.41926979308140927, | |
| "grad_norm": 0.3649962544441223, | |
| "learning_rate": 6.720853515323119e-05, | |
| "loss": 2.494, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.41990892386354556, | |
| "grad_norm": 0.3956323266029358, | |
| "learning_rate": 6.710928173966752e-05, | |
| "loss": 2.4601, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.42054805464568185, | |
| "grad_norm": 0.4153473675251007, | |
| "learning_rate": 6.700995190707143e-05, | |
| "loss": 2.4414, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.4211871854278182, | |
| "grad_norm": 0.4296291768550873, | |
| "learning_rate": 6.69105460991021e-05, | |
| "loss": 2.4902, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.4218263162099545, | |
| "grad_norm": 0.415251225233078, | |
| "learning_rate": 6.681106475975802e-05, | |
| "loss": 2.4428, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.42246544699209077, | |
| "grad_norm": 0.4261683225631714, | |
| "learning_rate": 6.671150833337506e-05, | |
| "loss": 2.4835, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.42310457777422705, | |
| "grad_norm": 0.3931010961532593, | |
| "learning_rate": 6.661187726462451e-05, | |
| "loss": 2.4243, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.42374370855636334, | |
| "grad_norm": 0.4542677402496338, | |
| "learning_rate": 6.651217199851099e-05, | |
| "loss": 2.4157, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.42438283933849963, | |
| "grad_norm": 0.3953649699687958, | |
| "learning_rate": 6.641239298037055e-05, | |
| "loss": 2.4273, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.4250219701206359, | |
| "grad_norm": 0.38825055956840515, | |
| "learning_rate": 6.631254065586867e-05, | |
| "loss": 2.4628, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4256611009027722, | |
| "grad_norm": 0.3847859799861908, | |
| "learning_rate": 6.621261547099825e-05, | |
| "loss": 2.4698, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.42630023168490855, | |
| "grad_norm": 0.35709747672080994, | |
| "learning_rate": 6.611261787207758e-05, | |
| "loss": 2.4405, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.42693936246704484, | |
| "grad_norm": 0.38247150182724, | |
| "learning_rate": 6.601254830574845e-05, | |
| "loss": 2.4622, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.4275784932491811, | |
| "grad_norm": 0.4106605052947998, | |
| "learning_rate": 6.591240721897405e-05, | |
| "loss": 2.4967, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.4282176240313174, | |
| "grad_norm": 0.42948636412620544, | |
| "learning_rate": 6.581219505903705e-05, | |
| "loss": 2.4748, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4288567548134537, | |
| "grad_norm": 0.4055151641368866, | |
| "learning_rate": 6.571191227353755e-05, | |
| "loss": 2.4277, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.42949588559559, | |
| "grad_norm": 0.45863187313079834, | |
| "learning_rate": 6.561155931039108e-05, | |
| "loss": 2.4442, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.4301350163777263, | |
| "grad_norm": 0.4269378185272217, | |
| "learning_rate": 6.55111366178267e-05, | |
| "loss": 2.4669, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.43077414715986256, | |
| "grad_norm": 0.4133965075016022, | |
| "learning_rate": 6.541064464438481e-05, | |
| "loss": 2.4665, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.4314132779419989, | |
| "grad_norm": 0.4650583565235138, | |
| "learning_rate": 6.531008383891534e-05, | |
| "loss": 2.4977, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4320524087241352, | |
| "grad_norm": 0.41605526208877563, | |
| "learning_rate": 6.520945465057562e-05, | |
| "loss": 2.4897, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.4326915395062715, | |
| "grad_norm": 0.4192642867565155, | |
| "learning_rate": 6.510875752882842e-05, | |
| "loss": 2.4785, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.43333067028840777, | |
| "grad_norm": 0.42347627878189087, | |
| "learning_rate": 6.500799292343996e-05, | |
| "loss": 2.4423, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.43396980107054406, | |
| "grad_norm": 0.42242956161499023, | |
| "learning_rate": 6.49071612844778e-05, | |
| "loss": 2.4677, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.43460893185268035, | |
| "grad_norm": 0.4475904405117035, | |
| "learning_rate": 6.480626306230904e-05, | |
| "loss": 2.4696, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.43524806263481663, | |
| "grad_norm": 0.4100373685359955, | |
| "learning_rate": 6.470529870759804e-05, | |
| "loss": 2.4463, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.4358871934169529, | |
| "grad_norm": 0.41356390714645386, | |
| "learning_rate": 6.460426867130463e-05, | |
| "loss": 2.4741, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.43652632419908927, | |
| "grad_norm": 0.3800880014896393, | |
| "learning_rate": 6.450317340468195e-05, | |
| "loss": 2.4466, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.43716545498122555, | |
| "grad_norm": 0.4278762936592102, | |
| "learning_rate": 6.440201335927457e-05, | |
| "loss": 2.4818, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.43780458576336184, | |
| "grad_norm": 0.43055278062820435, | |
| "learning_rate": 6.43007889869163e-05, | |
| "loss": 2.4177, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.43844371654549813, | |
| "grad_norm": 0.36429423093795776, | |
| "learning_rate": 6.419950073972835e-05, | |
| "loss": 2.4883, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.4390828473276344, | |
| "grad_norm": 0.3917326033115387, | |
| "learning_rate": 6.40981490701172e-05, | |
| "loss": 2.4432, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.4397219781097707, | |
| "grad_norm": 0.40328142046928406, | |
| "learning_rate": 6.399673443077256e-05, | |
| "loss": 2.4062, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.440361108891907, | |
| "grad_norm": 0.3806988000869751, | |
| "learning_rate": 6.389525727466548e-05, | |
| "loss": 2.526, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.4410002396740433, | |
| "grad_norm": 0.3769102990627289, | |
| "learning_rate": 6.379371805504619e-05, | |
| "loss": 2.4921, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.44163937045617957, | |
| "grad_norm": 0.374162495136261, | |
| "learning_rate": 6.369211722544213e-05, | |
| "loss": 2.4535, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.4422785012383159, | |
| "grad_norm": 0.3933018743991852, | |
| "learning_rate": 6.359045523965596e-05, | |
| "loss": 2.5135, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.4429176320204522, | |
| "grad_norm": 0.3832258880138397, | |
| "learning_rate": 6.348873255176342e-05, | |
| "loss": 2.4647, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.4435567628025885, | |
| "grad_norm": 0.36380982398986816, | |
| "learning_rate": 6.338694961611147e-05, | |
| "loss": 2.3925, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.4441958935847248, | |
| "grad_norm": 0.426459401845932, | |
| "learning_rate": 6.328510688731612e-05, | |
| "loss": 2.4484, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.44483502436686106, | |
| "grad_norm": 0.417032927274704, | |
| "learning_rate": 6.318320482026042e-05, | |
| "loss": 2.4809, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.44547415514899735, | |
| "grad_norm": 0.43301257491111755, | |
| "learning_rate": 6.308124387009251e-05, | |
| "loss": 2.4626, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.44611328593113364, | |
| "grad_norm": 0.4497744143009186, | |
| "learning_rate": 6.29792244922235e-05, | |
| "loss": 2.4309, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.44675241671326993, | |
| "grad_norm": 0.3846721649169922, | |
| "learning_rate": 6.287714714232549e-05, | |
| "loss": 2.4702, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.44739154749540627, | |
| "grad_norm": 0.41892290115356445, | |
| "learning_rate": 6.27750122763295e-05, | |
| "loss": 2.4563, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.44803067827754256, | |
| "grad_norm": 0.42186662554740906, | |
| "learning_rate": 6.267282035042343e-05, | |
| "loss": 2.4437, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.44866980905967885, | |
| "grad_norm": 0.47368982434272766, | |
| "learning_rate": 6.257057182105011e-05, | |
| "loss": 2.4415, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.44930893984181514, | |
| "grad_norm": 0.4210759401321411, | |
| "learning_rate": 6.246826714490507e-05, | |
| "loss": 2.4647, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.4499480706239514, | |
| "grad_norm": 0.3934521973133087, | |
| "learning_rate": 6.236590677893473e-05, | |
| "loss": 2.4314, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.4505872014060877, | |
| "grad_norm": 0.3769364058971405, | |
| "learning_rate": 6.22634911803342e-05, | |
| "loss": 2.5025, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.451226332188224, | |
| "grad_norm": 0.3881765604019165, | |
| "learning_rate": 6.21610208065453e-05, | |
| "loss": 2.4804, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.4518654629703603, | |
| "grad_norm": 0.3779181241989136, | |
| "learning_rate": 6.205849611525453e-05, | |
| "loss": 2.4397, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.45250459375249663, | |
| "grad_norm": 0.3787132501602173, | |
| "learning_rate": 6.195591756439094e-05, | |
| "loss": 2.4436, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.4531437245346329, | |
| "grad_norm": 0.3753858506679535, | |
| "learning_rate": 6.185328561212418e-05, | |
| "loss": 2.4576, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.4537828553167692, | |
| "grad_norm": 0.38031622767448425, | |
| "learning_rate": 6.175060071686243e-05, | |
| "loss": 2.4312, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4544219860989055, | |
| "grad_norm": 0.35417258739471436, | |
| "learning_rate": 6.164786333725031e-05, | |
| "loss": 2.4016, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.4550611168810418, | |
| "grad_norm": 0.3858546316623688, | |
| "learning_rate": 6.15450739321669e-05, | |
| "loss": 2.4487, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.45570024766317807, | |
| "grad_norm": 0.37336215376853943, | |
| "learning_rate": 6.144223296072365e-05, | |
| "loss": 2.4672, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.45633937844531436, | |
| "grad_norm": 0.38472285866737366, | |
| "learning_rate": 6.133934088226228e-05, | |
| "loss": 2.4274, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.45697850922745065, | |
| "grad_norm": 0.4379557967185974, | |
| "learning_rate": 6.123639815635285e-05, | |
| "loss": 2.4599, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.45761764000958693, | |
| "grad_norm": 0.4095916152000427, | |
| "learning_rate": 6.113340524279157e-05, | |
| "loss": 2.472, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.4582567707917233, | |
| "grad_norm": 0.3846578299999237, | |
| "learning_rate": 6.1030362601598876e-05, | |
| "loss": 2.4181, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.45889590157385957, | |
| "grad_norm": 0.4157004654407501, | |
| "learning_rate": 6.092727069301729e-05, | |
| "loss": 2.4404, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.45953503235599585, | |
| "grad_norm": 0.398113876581192, | |
| "learning_rate": 6.082412997750937e-05, | |
| "loss": 2.4291, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.46017416313813214, | |
| "grad_norm": 0.40625932812690735, | |
| "learning_rate": 6.07209409157557e-05, | |
| "loss": 2.4626, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.46081329392026843, | |
| "grad_norm": 0.38662636280059814, | |
| "learning_rate": 6.061770396865277e-05, | |
| "loss": 2.4377, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.4614524247024047, | |
| "grad_norm": 0.38780269026756287, | |
| "learning_rate": 6.0514419597311e-05, | |
| "loss": 2.4523, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.462091555484541, | |
| "grad_norm": 0.38416293263435364, | |
| "learning_rate": 6.041108826305258e-05, | |
| "loss": 2.4571, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.4627306862666773, | |
| "grad_norm": 0.3840222954750061, | |
| "learning_rate": 6.030771042740951e-05, | |
| "loss": 2.4534, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.46336981704881364, | |
| "grad_norm": 0.37633752822875977, | |
| "learning_rate": 6.0204286552121436e-05, | |
| "loss": 2.4613, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4640089478309499, | |
| "grad_norm": 0.38146889209747314, | |
| "learning_rate": 6.0100817099133686e-05, | |
| "loss": 2.4633, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.4646480786130862, | |
| "grad_norm": 0.4018976390361786, | |
| "learning_rate": 5.999730253059515e-05, | |
| "loss": 2.4312, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.4652872093952225, | |
| "grad_norm": 0.4063177704811096, | |
| "learning_rate": 5.9893743308856195e-05, | |
| "loss": 2.4385, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.4659263401773588, | |
| "grad_norm": 0.3882785439491272, | |
| "learning_rate": 5.979013989646669e-05, | |
| "loss": 2.4421, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.4665654709594951, | |
| "grad_norm": 0.3931635320186615, | |
| "learning_rate": 5.968649275617384e-05, | |
| "loss": 2.471, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.46720460174163136, | |
| "grad_norm": 0.42858976125717163, | |
| "learning_rate": 5.9582802350920194e-05, | |
| "loss": 2.4262, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.46784373252376765, | |
| "grad_norm": 0.39917758107185364, | |
| "learning_rate": 5.9479069143841495e-05, | |
| "loss": 2.4404, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.468482863305904, | |
| "grad_norm": 0.40618276596069336, | |
| "learning_rate": 5.9375293598264705e-05, | |
| "loss": 2.4515, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.4691219940880403, | |
| "grad_norm": 0.4168923497200012, | |
| "learning_rate": 5.92714761777059e-05, | |
| "loss": 2.4843, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.46976112487017657, | |
| "grad_norm": 0.3863443434238434, | |
| "learning_rate": 5.916761734586813e-05, | |
| "loss": 2.4285, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.47040025565231286, | |
| "grad_norm": 0.37037089467048645, | |
| "learning_rate": 5.9063717566639476e-05, | |
| "loss": 2.4234, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.47103938643444915, | |
| "grad_norm": 0.4008197784423828, | |
| "learning_rate": 5.8959777304090894e-05, | |
| "loss": 2.4669, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.47167851721658544, | |
| "grad_norm": 0.3711203932762146, | |
| "learning_rate": 5.8855797022474134e-05, | |
| "loss": 2.4513, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.4723176479987217, | |
| "grad_norm": 0.40264326333999634, | |
| "learning_rate": 5.875177718621974e-05, | |
| "loss": 2.4621, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.472956778780858, | |
| "grad_norm": 0.37623006105422974, | |
| "learning_rate": 5.864771825993485e-05, | |
| "loss": 2.4903, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.47359590956299435, | |
| "grad_norm": 0.41060057282447815, | |
| "learning_rate": 5.854362070840127e-05, | |
| "loss": 2.4544, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.47423504034513064, | |
| "grad_norm": 0.4081309139728546, | |
| "learning_rate": 5.843948499657331e-05, | |
| "loss": 2.4305, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.47487417112726693, | |
| "grad_norm": 0.4004654884338379, | |
| "learning_rate": 5.833531158957571e-05, | |
| "loss": 2.4214, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.4755133019094032, | |
| "grad_norm": 0.42326685786247253, | |
| "learning_rate": 5.823110095270158e-05, | |
| "loss": 2.4195, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.4761524326915395, | |
| "grad_norm": 0.3732118606567383, | |
| "learning_rate": 5.812685355141033e-05, | |
| "loss": 2.417, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4767915634736758, | |
| "grad_norm": 0.3631454408168793, | |
| "learning_rate": 5.802256985132557e-05, | |
| "loss": 2.44, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.4774306942558121, | |
| "grad_norm": 0.3802222013473511, | |
| "learning_rate": 5.791825031823304e-05, | |
| "loss": 2.4387, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.47806982503794837, | |
| "grad_norm": 0.3902392089366913, | |
| "learning_rate": 5.7813895418078514e-05, | |
| "loss": 2.471, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.47870895582008466, | |
| "grad_norm": 0.3706824481487274, | |
| "learning_rate": 5.770950561696576e-05, | |
| "loss": 2.4321, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.479348086602221, | |
| "grad_norm": 0.3934244215488434, | |
| "learning_rate": 5.7605081381154435e-05, | |
| "loss": 2.412, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4799872173843573, | |
| "grad_norm": 0.4083379805088043, | |
| "learning_rate": 5.750062317705795e-05, | |
| "loss": 2.4251, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.4806263481664936, | |
| "grad_norm": 0.3629012107849121, | |
| "learning_rate": 5.739613147124151e-05, | |
| "loss": 2.4906, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.48126547894862987, | |
| "grad_norm": 0.35916343331336975, | |
| "learning_rate": 5.729160673041991e-05, | |
| "loss": 2.4332, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.48190460973076615, | |
| "grad_norm": 0.3485174775123596, | |
| "learning_rate": 5.718704942145549e-05, | |
| "loss": 2.4341, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.48254374051290244, | |
| "grad_norm": 0.37310436367988586, | |
| "learning_rate": 5.708246001135607e-05, | |
| "loss": 2.4318, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.48318287129503873, | |
| "grad_norm": 0.4222985506057739, | |
| "learning_rate": 5.6977838967272844e-05, | |
| "loss": 2.4384, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.483822002077175, | |
| "grad_norm": 0.3679346442222595, | |
| "learning_rate": 5.687318675649835e-05, | |
| "loss": 2.4596, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.48446113285931136, | |
| "grad_norm": 0.36164212226867676, | |
| "learning_rate": 5.676850384646425e-05, | |
| "loss": 2.403, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.48510026364144765, | |
| "grad_norm": 0.3508222997188568, | |
| "learning_rate": 5.666379070473937e-05, | |
| "loss": 2.4686, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.48573939442358394, | |
| "grad_norm": 0.4127695858478546, | |
| "learning_rate": 5.65590477990276e-05, | |
| "loss": 2.3782, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4863785252057202, | |
| "grad_norm": 0.40480783581733704, | |
| "learning_rate": 5.645427559716567e-05, | |
| "loss": 2.4527, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.4870176559878565, | |
| "grad_norm": 0.38290444016456604, | |
| "learning_rate": 5.6349474567121276e-05, | |
| "loss": 2.4037, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.4876567867699928, | |
| "grad_norm": 0.44558387994766235, | |
| "learning_rate": 5.62446451769908e-05, | |
| "loss": 2.4344, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.4882959175521291, | |
| "grad_norm": 0.376322865486145, | |
| "learning_rate": 5.613978789499733e-05, | |
| "loss": 2.4551, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.4889350483342654, | |
| "grad_norm": 0.37908315658569336, | |
| "learning_rate": 5.603490318948852e-05, | |
| "loss": 2.4178, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4895741791164017, | |
| "grad_norm": 0.39797163009643555, | |
| "learning_rate": 5.592999152893451e-05, | |
| "loss": 2.4267, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.490213309898538, | |
| "grad_norm": 0.35955914855003357, | |
| "learning_rate": 5.5825053381925875e-05, | |
| "loss": 2.4299, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.4908524406806743, | |
| "grad_norm": 0.3701266944408417, | |
| "learning_rate": 5.57200892171714e-05, | |
| "loss": 2.4175, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.4914915714628106, | |
| "grad_norm": 0.3612300157546997, | |
| "learning_rate": 5.561509950349619e-05, | |
| "loss": 2.4599, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.49213070224494687, | |
| "grad_norm": 0.3858483135700226, | |
| "learning_rate": 5.5510084709839385e-05, | |
| "loss": 2.4358, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.49276983302708316, | |
| "grad_norm": 0.3634040057659149, | |
| "learning_rate": 5.540504530525219e-05, | |
| "loss": 2.4158, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.49340896380921945, | |
| "grad_norm": 0.3557896614074707, | |
| "learning_rate": 5.529998175889569e-05, | |
| "loss": 2.4044, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.49404809459135574, | |
| "grad_norm": 0.40323886275291443, | |
| "learning_rate": 5.519489454003886e-05, | |
| "loss": 2.4126, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.494687225373492, | |
| "grad_norm": 0.4036874771118164, | |
| "learning_rate": 5.508978411805637e-05, | |
| "loss": 2.4475, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.49532635615562837, | |
| "grad_norm": 0.38102537393569946, | |
| "learning_rate": 5.498465096242651e-05, | |
| "loss": 2.4423, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.49596548693776465, | |
| "grad_norm": 0.38952916860580444, | |
| "learning_rate": 5.487949554272915e-05, | |
| "loss": 2.4911, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.49660461771990094, | |
| "grad_norm": 0.39436522126197815, | |
| "learning_rate": 5.477431832864359e-05, | |
| "loss": 2.4299, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.49724374850203723, | |
| "grad_norm": 0.40983283519744873, | |
| "learning_rate": 5.466911978994649e-05, | |
| "loss": 2.4336, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.4978828792841735, | |
| "grad_norm": 0.39375317096710205, | |
| "learning_rate": 5.4563900396509703e-05, | |
| "loss": 2.4575, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.4985220100663098, | |
| "grad_norm": 0.36072197556495667, | |
| "learning_rate": 5.44586606182983e-05, | |
| "loss": 2.4223, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4991611408484461, | |
| "grad_norm": 0.43593502044677734, | |
| "learning_rate": 5.435340092536832e-05, | |
| "loss": 2.4517, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.4998002716305824, | |
| "grad_norm": 0.4531024694442749, | |
| "learning_rate": 5.424812178786487e-05, | |
| "loss": 2.4164, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.5004394024127187, | |
| "grad_norm": 0.39697250723838806, | |
| "learning_rate": 5.414282367601977e-05, | |
| "loss": 2.3851, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.501078533194855, | |
| "grad_norm": 0.38045620918273926, | |
| "learning_rate": 5.403750706014969e-05, | |
| "loss": 2.4484, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.5017176639769912, | |
| "grad_norm": 0.37511423230171204, | |
| "learning_rate": 5.3932172410653914e-05, | |
| "loss": 2.4344, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5023567947591275, | |
| "grad_norm": 0.36667105555534363, | |
| "learning_rate": 5.3826820198012274e-05, | |
| "loss": 2.4309, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.5029959255412639, | |
| "grad_norm": 0.37465110421180725, | |
| "learning_rate": 5.372145089278302e-05, | |
| "loss": 2.4548, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.5036350563234002, | |
| "grad_norm": 0.37167513370513916, | |
| "learning_rate": 5.361606496560081e-05, | |
| "loss": 2.4151, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.5042741871055365, | |
| "grad_norm": 0.42976588010787964, | |
| "learning_rate": 5.35106628871745e-05, | |
| "loss": 2.4307, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.5049133178876728, | |
| "grad_norm": 0.37860140204429626, | |
| "learning_rate": 5.3405245128285076e-05, | |
| "loss": 2.4353, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5055524486698091, | |
| "grad_norm": 0.39379826188087463, | |
| "learning_rate": 5.329981215978359e-05, | |
| "loss": 2.4238, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.5061915794519454, | |
| "grad_norm": 0.4188903272151947, | |
| "learning_rate": 5.319436445258903e-05, | |
| "loss": 2.4271, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.5068307102340817, | |
| "grad_norm": 0.3956911265850067, | |
| "learning_rate": 5.30889024776862e-05, | |
| "loss": 2.439, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.507469841016218, | |
| "grad_norm": 0.38109374046325684, | |
| "learning_rate": 5.298342670612362e-05, | |
| "loss": 2.4553, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.5081089717983542, | |
| "grad_norm": 0.43795204162597656, | |
| "learning_rate": 5.287793760901145e-05, | |
| "loss": 2.402, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5087481025804905, | |
| "grad_norm": 0.3878532946109772, | |
| "learning_rate": 5.277243565751937e-05, | |
| "loss": 2.4195, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.5093872333626268, | |
| "grad_norm": 0.36320263147354126, | |
| "learning_rate": 5.26669213228745e-05, | |
| "loss": 2.4418, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.5100263641447631, | |
| "grad_norm": 0.39842814207077026, | |
| "learning_rate": 5.2561395076359174e-05, | |
| "loss": 2.4426, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.5106654949268994, | |
| "grad_norm": 0.3529532849788666, | |
| "learning_rate": 5.245585738930905e-05, | |
| "loss": 2.4303, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.5113046257090357, | |
| "grad_norm": 0.3516543507575989, | |
| "learning_rate": 5.235030873311083e-05, | |
| "loss": 2.4193, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.511943756491172, | |
| "grad_norm": 0.36904647946357727, | |
| "learning_rate": 5.224474957920018e-05, | |
| "loss": 2.4379, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.5125828872733083, | |
| "grad_norm": 0.37814271450042725, | |
| "learning_rate": 5.213918039905971e-05, | |
| "loss": 2.4206, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.5132220180554445, | |
| "grad_norm": 0.37127214670181274, | |
| "learning_rate": 5.203360166421677e-05, | |
| "loss": 2.4097, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.5138611488375809, | |
| "grad_norm": 0.36163654923439026, | |
| "learning_rate": 5.192801384624144e-05, | |
| "loss": 2.436, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.5145002796197172, | |
| "grad_norm": 0.3785097301006317, | |
| "learning_rate": 5.1822417416744296e-05, | |
| "loss": 2.4404, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5151394104018535, | |
| "grad_norm": 0.35856905579566956, | |
| "learning_rate": 5.171681284737444e-05, | |
| "loss": 2.4416, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.5157785411839898, | |
| "grad_norm": 0.3714255690574646, | |
| "learning_rate": 5.161120060981731e-05, | |
| "loss": 2.4561, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.5164176719661261, | |
| "grad_norm": 0.3803336024284363, | |
| "learning_rate": 5.150558117579257e-05, | |
| "loss": 2.4791, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.5170568027482624, | |
| "grad_norm": 0.3896247446537018, | |
| "learning_rate": 5.1399955017052074e-05, | |
| "loss": 2.4461, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.5176959335303987, | |
| "grad_norm": 0.4139016270637512, | |
| "learning_rate": 5.129432260537767e-05, | |
| "loss": 2.4588, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.518335064312535, | |
| "grad_norm": 0.40255776047706604, | |
| "learning_rate": 5.118868441257916e-05, | |
| "loss": 2.4494, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.5189741950946712, | |
| "grad_norm": 0.4026229679584503, | |
| "learning_rate": 5.1083040910492156e-05, | |
| "loss": 2.4458, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.5196133258768075, | |
| "grad_norm": 0.39760807156562805, | |
| "learning_rate": 5.097739257097598e-05, | |
| "loss": 2.4252, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.5202524566589438, | |
| "grad_norm": 0.39458784461021423, | |
| "learning_rate": 5.087173986591159e-05, | |
| "loss": 2.4242, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.5208915874410801, | |
| "grad_norm": 0.3847057521343231, | |
| "learning_rate": 5.0766083267199405e-05, | |
| "loss": 2.4321, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5215307182232164, | |
| "grad_norm": 0.42019978165626526, | |
| "learning_rate": 5.0660423246757235e-05, | |
| "loss": 2.4673, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.5221698490053527, | |
| "grad_norm": 0.42398643493652344, | |
| "learning_rate": 5.055476027651823e-05, | |
| "loss": 2.3949, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.522808979787489, | |
| "grad_norm": 0.43800458312034607, | |
| "learning_rate": 5.0449094828428656e-05, | |
| "loss": 2.4141, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.5234481105696253, | |
| "grad_norm": 0.40368396043777466, | |
| "learning_rate": 5.034342737444584e-05, | |
| "loss": 2.4325, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.5240872413517617, | |
| "grad_norm": 0.3963250517845154, | |
| "learning_rate": 5.023775838653613e-05, | |
| "loss": 2.424, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.524726372133898, | |
| "grad_norm": 0.3902316093444824, | |
| "learning_rate": 5.013208833667267e-05, | |
| "loss": 2.4348, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.5253655029160342, | |
| "grad_norm": 0.37698113918304443, | |
| "learning_rate": 5.002641769683336e-05, | |
| "loss": 2.4369, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.5260046336981705, | |
| "grad_norm": 0.40694084763526917, | |
| "learning_rate": 4.992074693899877e-05, | |
| "loss": 2.4301, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.5266437644803068, | |
| "grad_norm": 0.42858392000198364, | |
| "learning_rate": 4.981507653514993e-05, | |
| "loss": 2.4475, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.5272828952624431, | |
| "grad_norm": 0.38250046968460083, | |
| "learning_rate": 4.970940695726635e-05, | |
| "loss": 2.4501, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5279220260445794, | |
| "grad_norm": 0.3574528694152832, | |
| "learning_rate": 4.960373867732382e-05, | |
| "loss": 2.416, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.5285611568267157, | |
| "grad_norm": 0.3468669652938843, | |
| "learning_rate": 4.949807216729235e-05, | |
| "loss": 2.476, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.529200287608852, | |
| "grad_norm": 0.3583150804042816, | |
| "learning_rate": 4.9392407899134016e-05, | |
| "loss": 2.3872, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.5298394183909882, | |
| "grad_norm": 0.3865881860256195, | |
| "learning_rate": 4.928674634480093e-05, | |
| "loss": 2.424, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.5304785491731245, | |
| "grad_norm": 0.3804073631763458, | |
| "learning_rate": 4.9181087976233024e-05, | |
| "loss": 2.4265, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5311176799552608, | |
| "grad_norm": 0.3614030182361603, | |
| "learning_rate": 4.907543326535604e-05, | |
| "loss": 2.4024, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.5317568107373971, | |
| "grad_norm": 0.33294716477394104, | |
| "learning_rate": 4.896978268407937e-05, | |
| "loss": 2.4129, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.5323959415195334, | |
| "grad_norm": 0.3311128318309784, | |
| "learning_rate": 4.8864136704293974e-05, | |
| "loss": 2.4701, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.5330350723016697, | |
| "grad_norm": 0.35216227173805237, | |
| "learning_rate": 4.875849579787023e-05, | |
| "loss": 2.4146, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.533674203083806, | |
| "grad_norm": 0.3796081244945526, | |
| "learning_rate": 4.8652860436655886e-05, | |
| "loss": 2.449, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5343133338659423, | |
| "grad_norm": 0.34906062483787537, | |
| "learning_rate": 4.8547231092473914e-05, | |
| "loss": 2.4652, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.5349524646480787, | |
| "grad_norm": 0.3501318097114563, | |
| "learning_rate": 4.8441608237120384e-05, | |
| "loss": 2.4398, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.535591595430215, | |
| "grad_norm": 0.3724457621574402, | |
| "learning_rate": 4.833599234236242e-05, | |
| "loss": 2.4308, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.5362307262123512, | |
| "grad_norm": 0.37346675992012024, | |
| "learning_rate": 4.8230383879936056e-05, | |
| "loss": 2.4118, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.5368698569944875, | |
| "grad_norm": 0.3676453232765198, | |
| "learning_rate": 4.812478332154411e-05, | |
| "loss": 2.3867, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5375089877766238, | |
| "grad_norm": 0.35594579577445984, | |
| "learning_rate": 4.801919113885409e-05, | |
| "loss": 2.4219, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.5381481185587601, | |
| "grad_norm": 0.3685372769832611, | |
| "learning_rate": 4.791360780349611e-05, | |
| "loss": 2.4617, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.5387872493408964, | |
| "grad_norm": 0.37843436002731323, | |
| "learning_rate": 4.7808033787060776e-05, | |
| "loss": 2.455, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.5394263801230327, | |
| "grad_norm": 0.4068625867366791, | |
| "learning_rate": 4.7702469561097055e-05, | |
| "loss": 2.4032, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.540065510905169, | |
| "grad_norm": 0.38076815009117126, | |
| "learning_rate": 4.759691559711016e-05, | |
| "loss": 2.4135, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5407046416873053, | |
| "grad_norm": 0.397952675819397, | |
| "learning_rate": 4.749137236655953e-05, | |
| "loss": 2.4522, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.5413437724694415, | |
| "grad_norm": 0.43378394842147827, | |
| "learning_rate": 4.738584034085663e-05, | |
| "loss": 2.4392, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.5419829032515778, | |
| "grad_norm": 0.42424699664115906, | |
| "learning_rate": 4.7280319991362856e-05, | |
| "loss": 2.4115, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.5426220340337141, | |
| "grad_norm": 0.3597288429737091, | |
| "learning_rate": 4.7174811789387474e-05, | |
| "loss": 2.4565, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.5432611648158504, | |
| "grad_norm": 0.3931712210178375, | |
| "learning_rate": 4.70693162061855e-05, | |
| "loss": 2.4144, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5439002955979867, | |
| "grad_norm": 0.3970116078853607, | |
| "learning_rate": 4.696383371295561e-05, | |
| "loss": 2.4217, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.544539426380123, | |
| "grad_norm": 0.3898089826107025, | |
| "learning_rate": 4.6858364780837926e-05, | |
| "loss": 2.4174, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.5451785571622594, | |
| "grad_norm": 0.39069536328315735, | |
| "learning_rate": 4.675290988091209e-05, | |
| "loss": 2.4467, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.5458176879443957, | |
| "grad_norm": 0.40554937720298767, | |
| "learning_rate": 4.6647469484195046e-05, | |
| "loss": 2.4021, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.546456818726532, | |
| "grad_norm": 0.3651846647262573, | |
| "learning_rate": 4.6542044061638916e-05, | |
| "loss": 2.4681, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5470959495086682, | |
| "grad_norm": 0.38456082344055176, | |
| "learning_rate": 4.6436634084128994e-05, | |
| "loss": 2.4595, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.5477350802908045, | |
| "grad_norm": 0.3723991811275482, | |
| "learning_rate": 4.633124002248156e-05, | |
| "loss": 2.4284, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.5483742110729408, | |
| "grad_norm": 0.3874284327030182, | |
| "learning_rate": 4.622586234744182e-05, | |
| "loss": 2.4548, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.5490133418550771, | |
| "grad_norm": 0.3633071184158325, | |
| "learning_rate": 4.612050152968176e-05, | |
| "loss": 2.3967, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.5496524726372134, | |
| "grad_norm": 0.36283785104751587, | |
| "learning_rate": 4.601515803979811e-05, | |
| "loss": 2.4833, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5502916034193497, | |
| "grad_norm": 0.3670403063297272, | |
| "learning_rate": 4.59098323483102e-05, | |
| "loss": 2.4416, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.550930734201486, | |
| "grad_norm": 0.37990862131118774, | |
| "learning_rate": 4.580452492565782e-05, | |
| "loss": 2.4052, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.5515698649836223, | |
| "grad_norm": 0.34238311648368835, | |
| "learning_rate": 4.569923624219921e-05, | |
| "loss": 2.4063, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.5522089957657585, | |
| "grad_norm": 0.36133143305778503, | |
| "learning_rate": 4.559396676820888e-05, | |
| "loss": 2.3975, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.5528481265478948, | |
| "grad_norm": 0.34147241711616516, | |
| "learning_rate": 4.548871697387558e-05, | |
| "loss": 2.4941, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5534872573300311, | |
| "grad_norm": 0.35608533024787903, | |
| "learning_rate": 4.538348732930011e-05, | |
| "loss": 2.4556, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.5541263881121674, | |
| "grad_norm": 0.35883522033691406, | |
| "learning_rate": 4.5278278304493304e-05, | |
| "loss": 2.4152, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.5547655188943037, | |
| "grad_norm": 0.35472020506858826, | |
| "learning_rate": 4.5173090369373886e-05, | |
| "loss": 2.4264, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.55540464967644, | |
| "grad_norm": 0.32746946811676025, | |
| "learning_rate": 4.5067923993766395e-05, | |
| "loss": 2.4351, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.5560437804585764, | |
| "grad_norm": 0.3521060645580292, | |
| "learning_rate": 4.496277964739904e-05, | |
| "loss": 2.412, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5566829112407127, | |
| "grad_norm": 0.3262730538845062, | |
| "learning_rate": 4.485765779990167e-05, | |
| "loss": 2.411, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.557322042022849, | |
| "grad_norm": 0.35765933990478516, | |
| "learning_rate": 4.475255892080365e-05, | |
| "loss": 2.4085, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.5579611728049853, | |
| "grad_norm": 0.3736300766468048, | |
| "learning_rate": 4.464748347953171e-05, | |
| "loss": 2.4776, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.5586003035871215, | |
| "grad_norm": 0.3639433979988098, | |
| "learning_rate": 4.454243194540793e-05, | |
| "loss": 2.4357, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.5592394343692578, | |
| "grad_norm": 0.3535432815551758, | |
| "learning_rate": 4.443740478764761e-05, | |
| "loss": 2.4321, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5598785651513941, | |
| "grad_norm": 0.3495481610298157, | |
| "learning_rate": 4.433240247535716e-05, | |
| "loss": 2.4406, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.5605176959335304, | |
| "grad_norm": 0.3520449697971344, | |
| "learning_rate": 4.4227425477532006e-05, | |
| "loss": 2.4146, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.5611568267156667, | |
| "grad_norm": 0.3609304428100586, | |
| "learning_rate": 4.412247426305453e-05, | |
| "loss": 2.4337, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.561795957497803, | |
| "grad_norm": 0.390213280916214, | |
| "learning_rate": 4.401754930069195e-05, | |
| "loss": 2.4453, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.5624350882799393, | |
| "grad_norm": 0.3994176387786865, | |
| "learning_rate": 4.39126510590942e-05, | |
| "loss": 2.3987, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5630742190620756, | |
| "grad_norm": 0.38568708300590515, | |
| "learning_rate": 4.38077800067919e-05, | |
| "loss": 2.4568, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.5637133498442118, | |
| "grad_norm": 0.397699236869812, | |
| "learning_rate": 4.37029366121942e-05, | |
| "loss": 2.4117, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.5643524806263481, | |
| "grad_norm": 0.3934156596660614, | |
| "learning_rate": 4.3598121343586754e-05, | |
| "loss": 2.4052, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.5649916114084844, | |
| "grad_norm": 0.40455877780914307, | |
| "learning_rate": 4.3493334669129545e-05, | |
| "loss": 2.3829, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.5656307421906207, | |
| "grad_norm": 0.3742325007915497, | |
| "learning_rate": 4.3388577056854854e-05, | |
| "loss": 2.4082, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5662698729727571, | |
| "grad_norm": 0.36628293991088867, | |
| "learning_rate": 4.3283848974665205e-05, | |
| "loss": 2.4463, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.5669090037548934, | |
| "grad_norm": 0.3497489392757416, | |
| "learning_rate": 4.317915089033113e-05, | |
| "loss": 2.4369, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.5675481345370297, | |
| "grad_norm": 0.3424853980541229, | |
| "learning_rate": 4.3074483271489255e-05, | |
| "loss": 2.4129, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.568187265319166, | |
| "grad_norm": 0.3508245348930359, | |
| "learning_rate": 4.296984658564008e-05, | |
| "loss": 2.402, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.5688263961013023, | |
| "grad_norm": 0.3382429778575897, | |
| "learning_rate": 4.2865241300145986e-05, | |
| "loss": 2.4817, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5694655268834385, | |
| "grad_norm": 0.32677891850471497, | |
| "learning_rate": 4.2760667882229086e-05, | |
| "loss": 2.427, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.5701046576655748, | |
| "grad_norm": 0.3372223675251007, | |
| "learning_rate": 4.2656126798969124e-05, | |
| "loss": 2.4001, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.5707437884477111, | |
| "grad_norm": 0.32375314831733704, | |
| "learning_rate": 4.2551618517301476e-05, | |
| "loss": 2.39, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.5713829192298474, | |
| "grad_norm": 0.3561007082462311, | |
| "learning_rate": 4.244714350401497e-05, | |
| "loss": 2.4197, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.5720220500119837, | |
| "grad_norm": 0.354825884103775, | |
| "learning_rate": 4.234270222574986e-05, | |
| "loss": 2.4256, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.57266118079412, | |
| "grad_norm": 0.3532213270664215, | |
| "learning_rate": 4.22382951489957e-05, | |
| "loss": 2.4307, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.5733003115762563, | |
| "grad_norm": 0.33807530999183655, | |
| "learning_rate": 4.2133922740089305e-05, | |
| "loss": 2.4755, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.5739394423583926, | |
| "grad_norm": 0.37858113646507263, | |
| "learning_rate": 4.2029585465212626e-05, | |
| "loss": 2.445, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.5745785731405288, | |
| "grad_norm": 0.3588252663612366, | |
| "learning_rate": 4.192528379039071e-05, | |
| "loss": 2.4041, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.5752177039226651, | |
| "grad_norm": 0.3415856957435608, | |
| "learning_rate": 4.1821018181489576e-05, | |
| "loss": 2.3741, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5758568347048014, | |
| "grad_norm": 0.34766432642936707, | |
| "learning_rate": 4.1716789104214146e-05, | |
| "loss": 2.4178, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.5764959654869377, | |
| "grad_norm": 0.38155093789100647, | |
| "learning_rate": 4.1612597024106206e-05, | |
| "loss": 2.3772, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.5771350962690741, | |
| "grad_norm": 0.3494817614555359, | |
| "learning_rate": 4.150844240654226e-05, | |
| "loss": 2.4268, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.5777742270512104, | |
| "grad_norm": 0.3665534555912018, | |
| "learning_rate": 4.14043257167315e-05, | |
| "loss": 2.39, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.5784133578333467, | |
| "grad_norm": 0.35295775532722473, | |
| "learning_rate": 4.130024741971371e-05, | |
| "loss": 2.4526, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.579052488615483, | |
| "grad_norm": 0.36536329984664917, | |
| "learning_rate": 4.119620798035722e-05, | |
| "loss": 2.3808, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.5796916193976193, | |
| "grad_norm": 0.3432456851005554, | |
| "learning_rate": 4.109220786335672e-05, | |
| "loss": 2.4654, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.5803307501797556, | |
| "grad_norm": 0.36070066690444946, | |
| "learning_rate": 4.098824753323135e-05, | |
| "loss": 2.3872, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.5809698809618918, | |
| "grad_norm": 0.3635770082473755, | |
| "learning_rate": 4.0884327454322525e-05, | |
| "loss": 2.4115, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.5816090117440281, | |
| "grad_norm": 0.37104272842407227, | |
| "learning_rate": 4.0780448090791827e-05, | |
| "loss": 2.4194, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5822481425261644, | |
| "grad_norm": 0.37471920251846313, | |
| "learning_rate": 4.067660990661904e-05, | |
| "loss": 2.4473, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.5828872733083007, | |
| "grad_norm": 0.36167097091674805, | |
| "learning_rate": 4.0572813365599994e-05, | |
| "loss": 2.4298, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.583526404090437, | |
| "grad_norm": 0.3573801815509796, | |
| "learning_rate": 4.046905893134452e-05, | |
| "loss": 2.44, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.5841655348725733, | |
| "grad_norm": 0.34311822056770325, | |
| "learning_rate": 4.036534706727437e-05, | |
| "loss": 2.4482, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.5848046656547096, | |
| "grad_norm": 0.35273006558418274, | |
| "learning_rate": 4.0261678236621176e-05, | |
| "loss": 2.446, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5854437964368459, | |
| "grad_norm": 0.35470035672187805, | |
| "learning_rate": 4.015805290242435e-05, | |
| "loss": 2.4102, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.5860829272189821, | |
| "grad_norm": 0.3587048053741455, | |
| "learning_rate": 4.0054471527529004e-05, | |
| "loss": 2.4521, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.5867220580011184, | |
| "grad_norm": 0.37121549248695374, | |
| "learning_rate": 3.995093457458394e-05, | |
| "loss": 2.4098, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.5873611887832548, | |
| "grad_norm": 0.39247003197669983, | |
| "learning_rate": 3.984744250603954e-05, | |
| "loss": 2.3995, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.5880003195653911, | |
| "grad_norm": 0.36569175124168396, | |
| "learning_rate": 3.97439957841457e-05, | |
| "loss": 2.4157, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5886394503475274, | |
| "grad_norm": 0.37468570470809937, | |
| "learning_rate": 3.964059487094977e-05, | |
| "loss": 2.4241, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.5892785811296637, | |
| "grad_norm": 0.38140931725502014, | |
| "learning_rate": 3.953724022829449e-05, | |
| "loss": 2.3726, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.5899177119118, | |
| "grad_norm": 0.3457862138748169, | |
| "learning_rate": 3.943393231781596e-05, | |
| "loss": 2.4095, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.5905568426939363, | |
| "grad_norm": 0.3339364230632782, | |
| "learning_rate": 3.933067160094154e-05, | |
| "loss": 2.4297, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.5911959734760726, | |
| "grad_norm": 0.3278919458389282, | |
| "learning_rate": 3.922745853888776e-05, | |
| "loss": 2.4057, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5918351042582088, | |
| "grad_norm": 0.3251989781856537, | |
| "learning_rate": 3.9124293592658336e-05, | |
| "loss": 2.4281, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.5924742350403451, | |
| "grad_norm": 0.33066341280937195, | |
| "learning_rate": 3.9021177223042093e-05, | |
| "loss": 2.4348, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.5931133658224814, | |
| "grad_norm": 0.33942291140556335, | |
| "learning_rate": 3.8918109890610835e-05, | |
| "loss": 2.4616, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.5937524966046177, | |
| "grad_norm": 0.33614274859428406, | |
| "learning_rate": 3.8815092055717365e-05, | |
| "loss": 2.4248, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.594391627386754, | |
| "grad_norm": 0.3416106700897217, | |
| "learning_rate": 3.871212417849342e-05, | |
| "loss": 2.4678, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5950307581688903, | |
| "grad_norm": 0.32268744707107544, | |
| "learning_rate": 3.860920671884758e-05, | |
| "loss": 2.441, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.5956698889510266, | |
| "grad_norm": 0.33090925216674805, | |
| "learning_rate": 3.850634013646323e-05, | |
| "loss": 2.4256, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.5963090197331629, | |
| "grad_norm": 0.325845330953598, | |
| "learning_rate": 3.840352489079653e-05, | |
| "loss": 2.4222, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.5969481505152991, | |
| "grad_norm": 0.31350693106651306, | |
| "learning_rate": 3.8300761441074326e-05, | |
| "loss": 2.4372, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.5975872812974354, | |
| "grad_norm": 0.33702364563941956, | |
| "learning_rate": 3.819805024629211e-05, | |
| "loss": 2.43, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5982264120795718, | |
| "grad_norm": 0.3379462957382202, | |
| "learning_rate": 3.809539176521201e-05, | |
| "loss": 2.4265, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.5988655428617081, | |
| "grad_norm": 0.3332374393939972, | |
| "learning_rate": 3.7992786456360666e-05, | |
| "loss": 2.444, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.5995046736438444, | |
| "grad_norm": 0.33740073442459106, | |
| "learning_rate": 3.7890234778027265e-05, | |
| "loss": 2.4307, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.6001438044259807, | |
| "grad_norm": 0.3464294970035553, | |
| "learning_rate": 3.7787737188261406e-05, | |
| "loss": 2.4283, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.600782935208117, | |
| "grad_norm": 0.36593443155288696, | |
| "learning_rate": 3.768529414487113e-05, | |
| "loss": 2.3836, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6014220659902533, | |
| "grad_norm": 0.34077978134155273, | |
| "learning_rate": 3.7582906105420845e-05, | |
| "loss": 2.4306, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.6020611967723896, | |
| "grad_norm": 0.34975969791412354, | |
| "learning_rate": 3.748057352722927e-05, | |
| "loss": 2.3872, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.6027003275545258, | |
| "grad_norm": 0.3572999835014343, | |
| "learning_rate": 3.73782968673674e-05, | |
| "loss": 2.4364, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.6033394583366621, | |
| "grad_norm": 0.3647032082080841, | |
| "learning_rate": 3.7276076582656503e-05, | |
| "loss": 2.4079, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.6039785891187984, | |
| "grad_norm": 0.3511103093624115, | |
| "learning_rate": 3.7173913129666015e-05, | |
| "loss": 2.4482, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6046177199009347, | |
| "grad_norm": 0.3414948284626007, | |
| "learning_rate": 3.707180696471153e-05, | |
| "loss": 2.4255, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.605256850683071, | |
| "grad_norm": 0.35328835248947144, | |
| "learning_rate": 3.6969758543852776e-05, | |
| "loss": 2.45, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.6058959814652073, | |
| "grad_norm": 0.374549925327301, | |
| "learning_rate": 3.686776832289157e-05, | |
| "loss": 2.4631, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.6065351122473436, | |
| "grad_norm": 0.3457779288291931, | |
| "learning_rate": 3.676583675736977e-05, | |
| "loss": 2.4131, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.6071742430294799, | |
| "grad_norm": 0.3494611382484436, | |
| "learning_rate": 3.6663964302567245e-05, | |
| "loss": 2.4036, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6078133738116162, | |
| "grad_norm": 0.3386472761631012, | |
| "learning_rate": 3.656215141349984e-05, | |
| "loss": 2.4376, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.6084525045937524, | |
| "grad_norm": 0.3475477397441864, | |
| "learning_rate": 3.646039854491737e-05, | |
| "loss": 2.4235, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.6090916353758888, | |
| "grad_norm": 0.36561229825019836, | |
| "learning_rate": 3.635870615130155e-05, | |
| "loss": 2.4452, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.6097307661580251, | |
| "grad_norm": 0.3828120827674866, | |
| "learning_rate": 3.625707468686398e-05, | |
| "loss": 2.4627, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.6103698969401614, | |
| "grad_norm": 0.3916782736778259, | |
| "learning_rate": 3.6155504605544126e-05, | |
| "loss": 2.4247, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6110090277222977, | |
| "grad_norm": 0.3506986200809479, | |
| "learning_rate": 3.605399636100729e-05, | |
| "loss": 2.4466, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.611648158504434, | |
| "grad_norm": 0.35101959109306335, | |
| "learning_rate": 3.595255040664256e-05, | |
| "loss": 2.3925, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.6122872892865703, | |
| "grad_norm": 0.3565005958080292, | |
| "learning_rate": 3.585116719556082e-05, | |
| "loss": 2.41, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.6129264200687066, | |
| "grad_norm": 0.33222460746765137, | |
| "learning_rate": 3.574984718059271e-05, | |
| "loss": 2.482, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.6135655508508429, | |
| "grad_norm": 0.3343389630317688, | |
| "learning_rate": 3.5648590814286574e-05, | |
| "loss": 2.4501, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6142046816329791, | |
| "grad_norm": 0.3308739960193634, | |
| "learning_rate": 3.554739854890651e-05, | |
| "loss": 2.3709, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.6148438124151154, | |
| "grad_norm": 0.3196081519126892, | |
| "learning_rate": 3.544627083643028e-05, | |
| "loss": 2.4753, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.6154829431972517, | |
| "grad_norm": 0.3415805995464325, | |
| "learning_rate": 3.534520812854735e-05, | |
| "loss": 2.417, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.616122073979388, | |
| "grad_norm": 0.3610650599002838, | |
| "learning_rate": 3.524421087665678e-05, | |
| "loss": 2.426, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.6167612047615243, | |
| "grad_norm": 0.360238641500473, | |
| "learning_rate": 3.5143279531865325e-05, | |
| "loss": 2.4175, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6174003355436606, | |
| "grad_norm": 0.35699307918548584, | |
| "learning_rate": 3.504241454498536e-05, | |
| "loss": 2.45, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.6180394663257969, | |
| "grad_norm": 0.35165783762931824, | |
| "learning_rate": 3.494161636653284e-05, | |
| "loss": 2.4222, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.6186785971079332, | |
| "grad_norm": 0.33923953771591187, | |
| "learning_rate": 3.4840885446725345e-05, | |
| "loss": 2.4265, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.6193177278900696, | |
| "grad_norm": 0.36072978377342224, | |
| "learning_rate": 3.474022223548002e-05, | |
| "loss": 2.4434, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.6199568586722058, | |
| "grad_norm": 0.3421432673931122, | |
| "learning_rate": 3.4639627182411635e-05, | |
| "loss": 2.396, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.6205959894543421, | |
| "grad_norm": 0.35356688499450684, | |
| "learning_rate": 3.4539100736830474e-05, | |
| "loss": 2.4146, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.6212351202364784, | |
| "grad_norm": 0.3634587824344635, | |
| "learning_rate": 3.443864334774039e-05, | |
| "loss": 2.3613, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.6218742510186147, | |
| "grad_norm": 0.3565031588077545, | |
| "learning_rate": 3.433825546383683e-05, | |
| "loss": 2.427, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.622513381800751, | |
| "grad_norm": 0.35175177454948425, | |
| "learning_rate": 3.423793753350476e-05, | |
| "loss": 2.4218, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.6231525125828873, | |
| "grad_norm": 0.3554588258266449, | |
| "learning_rate": 3.4137690004816734e-05, | |
| "loss": 2.4306, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6237916433650236, | |
| "grad_norm": 0.3478711247444153, | |
| "learning_rate": 3.403751332553082e-05, | |
| "loss": 2.4471, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.6244307741471599, | |
| "grad_norm": 0.3707256019115448, | |
| "learning_rate": 3.393740794308864e-05, | |
| "loss": 2.3924, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.6250699049292961, | |
| "grad_norm": 0.3607984483242035, | |
| "learning_rate": 3.383737430461338e-05, | |
| "loss": 2.4068, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.6257090357114324, | |
| "grad_norm": 0.38299667835235596, | |
| "learning_rate": 3.373741285690778e-05, | |
| "loss": 2.4364, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.6263481664935687, | |
| "grad_norm": 0.38648751378059387, | |
| "learning_rate": 3.363752404645213e-05, | |
| "loss": 2.3914, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.626987297275705, | |
| "grad_norm": 0.3692971169948578, | |
| "learning_rate": 3.353770831940227e-05, | |
| "loss": 2.4335, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.6276264280578413, | |
| "grad_norm": 0.3346737325191498, | |
| "learning_rate": 3.3437966121587664e-05, | |
| "loss": 2.421, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.6282655588399776, | |
| "grad_norm": 0.3426428437232971, | |
| "learning_rate": 3.3338297898509285e-05, | |
| "loss": 2.4549, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.6289046896221139, | |
| "grad_norm": 0.3392780125141144, | |
| "learning_rate": 3.3238704095337734e-05, | |
| "loss": 2.3845, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.6295438204042502, | |
| "grad_norm": 0.3634185194969177, | |
| "learning_rate": 3.313918515691123e-05, | |
| "loss": 2.417, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6301829511863866, | |
| "grad_norm": 0.3306053578853607, | |
| "learning_rate": 3.303974152773358e-05, | |
| "loss": 2.4365, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.6308220819685229, | |
| "grad_norm": 0.3323608338832855, | |
| "learning_rate": 3.294037365197221e-05, | |
| "loss": 2.3952, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.6314612127506591, | |
| "grad_norm": 0.3474254608154297, | |
| "learning_rate": 3.284108197345622e-05, | |
| "loss": 2.4115, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.6321003435327954, | |
| "grad_norm": 0.3318917453289032, | |
| "learning_rate": 3.274186693567437e-05, | |
| "loss": 2.4109, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.6327394743149317, | |
| "grad_norm": 0.3310984671115875, | |
| "learning_rate": 3.264272898177308e-05, | |
| "loss": 2.4403, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.633378605097068, | |
| "grad_norm": 0.3261297047138214, | |
| "learning_rate": 3.2543668554554494e-05, | |
| "loss": 2.3997, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.6340177358792043, | |
| "grad_norm": 0.3539198040962219, | |
| "learning_rate": 3.244468609647447e-05, | |
| "loss": 2.4153, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.6346568666613406, | |
| "grad_norm": 0.3372770845890045, | |
| "learning_rate": 3.2345782049640625e-05, | |
| "loss": 2.356, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.6352959974434769, | |
| "grad_norm": 0.3353431522846222, | |
| "learning_rate": 3.224695685581034e-05, | |
| "loss": 2.4426, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.6359351282256132, | |
| "grad_norm": 0.32781994342803955, | |
| "learning_rate": 3.21482109563888e-05, | |
| "loss": 2.3829, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6365742590077494, | |
| "grad_norm": 0.31962648034095764, | |
| "learning_rate": 3.2049544792427036e-05, | |
| "loss": 2.4047, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.6372133897898857, | |
| "grad_norm": 0.32206061482429504, | |
| "learning_rate": 3.195095880461989e-05, | |
| "loss": 2.4042, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.637852520572022, | |
| "grad_norm": 0.3078974485397339, | |
| "learning_rate": 3.1852453433304155e-05, | |
| "loss": 2.369, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.6384916513541583, | |
| "grad_norm": 0.3132205903530121, | |
| "learning_rate": 3.175402911845651e-05, | |
| "loss": 2.3504, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.6391307821362946, | |
| "grad_norm": 0.35639870166778564, | |
| "learning_rate": 3.1655686299691625e-05, | |
| "loss": 2.442, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6397699129184309, | |
| "grad_norm": 0.3372049629688263, | |
| "learning_rate": 3.155742541626011e-05, | |
| "loss": 2.3697, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.6404090437005673, | |
| "grad_norm": 0.34127306938171387, | |
| "learning_rate": 3.145924690704668e-05, | |
| "loss": 2.4163, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.6410481744827036, | |
| "grad_norm": 0.32460618019104004, | |
| "learning_rate": 3.136115121056806e-05, | |
| "loss": 2.3963, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.6416873052648399, | |
| "grad_norm": 0.34012311697006226, | |
| "learning_rate": 3.1263138764971155e-05, | |
| "loss": 2.3843, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.6423264360469761, | |
| "grad_norm": 0.33613133430480957, | |
| "learning_rate": 3.1165210008030955e-05, | |
| "loss": 2.3633, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6429655668291124, | |
| "grad_norm": 0.3432016968727112, | |
| "learning_rate": 3.10673653771487e-05, | |
| "loss": 2.4052, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.6436046976112487, | |
| "grad_norm": 0.32075807452201843, | |
| "learning_rate": 3.096960530934988e-05, | |
| "loss": 2.3902, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.644243828393385, | |
| "grad_norm": 0.3314560651779175, | |
| "learning_rate": 3.0871930241282255e-05, | |
| "loss": 2.4141, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.6448829591755213, | |
| "grad_norm": 0.32347553968429565, | |
| "learning_rate": 3.0774340609213936e-05, | |
| "loss": 2.4133, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.6455220899576576, | |
| "grad_norm": 0.32913169264793396, | |
| "learning_rate": 3.0676836849031467e-05, | |
| "loss": 2.4086, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6461612207397939, | |
| "grad_norm": 0.3299691677093506, | |
| "learning_rate": 3.0579419396237804e-05, | |
| "loss": 2.4287, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.6468003515219302, | |
| "grad_norm": 0.3419349789619446, | |
| "learning_rate": 3.0482088685950426e-05, | |
| "loss": 2.4096, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.6474394823040664, | |
| "grad_norm": 0.3205043375492096, | |
| "learning_rate": 3.0384845152899365e-05, | |
| "loss": 2.4251, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.6480786130862027, | |
| "grad_norm": 0.3427303731441498, | |
| "learning_rate": 3.02876892314253e-05, | |
| "loss": 2.4255, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.648717743868339, | |
| "grad_norm": 0.35228732228279114, | |
| "learning_rate": 3.019062135547753e-05, | |
| "loss": 2.3883, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.6493568746504753, | |
| "grad_norm": 0.3313213884830475, | |
| "learning_rate": 3.0093641958612186e-05, | |
| "loss": 2.3707, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.6499960054326116, | |
| "grad_norm": 0.3284728229045868, | |
| "learning_rate": 2.9996751473990113e-05, | |
| "loss": 2.3845, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.6506351362147479, | |
| "grad_norm": 0.3254750370979309, | |
| "learning_rate": 2.989995033437511e-05, | |
| "loss": 2.4444, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.6512742669968843, | |
| "grad_norm": 0.3165215849876404, | |
| "learning_rate": 2.9803238972131843e-05, | |
| "loss": 2.4003, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.6519133977790206, | |
| "grad_norm": 0.3404819369316101, | |
| "learning_rate": 2.9706617819224027e-05, | |
| "loss": 2.3888, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6525525285611569, | |
| "grad_norm": 0.3528549373149872, | |
| "learning_rate": 2.9610087307212457e-05, | |
| "loss": 2.384, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.6531916593432932, | |
| "grad_norm": 0.3616853952407837, | |
| "learning_rate": 2.9513647867253047e-05, | |
| "loss": 2.3854, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.6538307901254294, | |
| "grad_norm": 0.3777984082698822, | |
| "learning_rate": 2.9417299930094968e-05, | |
| "loss": 2.4003, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.6544699209075657, | |
| "grad_norm": 0.34487950801849365, | |
| "learning_rate": 2.9321043926078662e-05, | |
| "loss": 2.4203, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.655109051689702, | |
| "grad_norm": 0.3253460228443146, | |
| "learning_rate": 2.922488028513398e-05, | |
| "loss": 2.3539, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6557481824718383, | |
| "grad_norm": 0.3392188251018524, | |
| "learning_rate": 2.9128809436778214e-05, | |
| "loss": 2.4184, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.6563873132539746, | |
| "grad_norm": 0.32009032368659973, | |
| "learning_rate": 2.9032831810114202e-05, | |
| "loss": 2.4158, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.6570264440361109, | |
| "grad_norm": 0.3157409131526947, | |
| "learning_rate": 2.8936947833828364e-05, | |
| "loss": 2.4256, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.6576655748182472, | |
| "grad_norm": 0.33514082431793213, | |
| "learning_rate": 2.884115793618892e-05, | |
| "loss": 2.4168, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.6583047056003835, | |
| "grad_norm": 0.31582510471343994, | |
| "learning_rate": 2.8745462545043805e-05, | |
| "loss": 2.3775, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.6589438363825197, | |
| "grad_norm": 0.3163835108280182, | |
| "learning_rate": 2.8649862087818846e-05, | |
| "loss": 2.3948, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.659582967164656, | |
| "grad_norm": 0.3249862492084503, | |
| "learning_rate": 2.8554356991515896e-05, | |
| "loss": 2.4621, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.6602220979467923, | |
| "grad_norm": 0.35097938776016235, | |
| "learning_rate": 2.84589476827108e-05, | |
| "loss": 2.412, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.6608612287289286, | |
| "grad_norm": 0.3196970224380493, | |
| "learning_rate": 2.836363458755166e-05, | |
| "loss": 2.3927, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.661500359511065, | |
| "grad_norm": 0.3304080367088318, | |
| "learning_rate": 2.826841813175677e-05, | |
| "loss": 2.3975, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.6621394902932013, | |
| "grad_norm": 0.33508020639419556, | |
| "learning_rate": 2.817329874061276e-05, | |
| "loss": 2.3585, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.6627786210753376, | |
| "grad_norm": 0.34135109186172485, | |
| "learning_rate": 2.8078276838972805e-05, | |
| "loss": 2.3942, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.6634177518574739, | |
| "grad_norm": 0.32730481028556824, | |
| "learning_rate": 2.7983352851254576e-05, | |
| "loss": 2.4264, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.6640568826396102, | |
| "grad_norm": 0.3183066248893738, | |
| "learning_rate": 2.7888527201438397e-05, | |
| "loss": 2.4319, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.6646960134217464, | |
| "grad_norm": 0.31897005438804626, | |
| "learning_rate": 2.7793800313065442e-05, | |
| "loss": 2.3883, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6653351442038827, | |
| "grad_norm": 0.333736777305603, | |
| "learning_rate": 2.7699172609235692e-05, | |
| "loss": 2.3803, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.665974274986019, | |
| "grad_norm": 0.33056700229644775, | |
| "learning_rate": 2.7604644512606105e-05, | |
| "loss": 2.4041, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.6666134057681553, | |
| "grad_norm": 0.3360205888748169, | |
| "learning_rate": 2.751021644538883e-05, | |
| "loss": 2.4065, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.6672525365502916, | |
| "grad_norm": 0.34978771209716797, | |
| "learning_rate": 2.7415888829349136e-05, | |
| "loss": 2.3774, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.6678916673324279, | |
| "grad_norm": 0.34892502427101135, | |
| "learning_rate": 2.73216620858037e-05, | |
| "loss": 2.3981, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6685307981145642, | |
| "grad_norm": 0.35604289174079895, | |
| "learning_rate": 2.7227536635618604e-05, | |
| "loss": 2.3488, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.6691699288967005, | |
| "grad_norm": 0.34377163648605347, | |
| "learning_rate": 2.7133512899207493e-05, | |
| "loss": 2.3904, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.6698090596788367, | |
| "grad_norm": 0.32637789845466614, | |
| "learning_rate": 2.7039591296529755e-05, | |
| "loss": 2.4019, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.670448190460973, | |
| "grad_norm": 0.3383873701095581, | |
| "learning_rate": 2.6945772247088542e-05, | |
| "loss": 2.4399, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.6710873212431093, | |
| "grad_norm": 0.32182028889656067, | |
| "learning_rate": 2.6852056169928964e-05, | |
| "loss": 2.4283, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6717264520252456, | |
| "grad_norm": 0.30172058939933777, | |
| "learning_rate": 2.6758443483636242e-05, | |
| "loss": 2.3922, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.672365582807382, | |
| "grad_norm": 0.308273047208786, | |
| "learning_rate": 2.666493460633376e-05, | |
| "loss": 2.4261, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.6730047135895183, | |
| "grad_norm": 0.30814963579177856, | |
| "learning_rate": 2.65715299556812e-05, | |
| "loss": 2.3481, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.6736438443716546, | |
| "grad_norm": 0.33674439787864685, | |
| "learning_rate": 2.6478229948872822e-05, | |
| "loss": 2.4288, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.6742829751537909, | |
| "grad_norm": 0.3314446210861206, | |
| "learning_rate": 2.6385035002635395e-05, | |
| "loss": 2.4257, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6749221059359272, | |
| "grad_norm": 0.3505234718322754, | |
| "learning_rate": 2.6291945533226466e-05, | |
| "loss": 2.4147, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.6755612367180635, | |
| "grad_norm": 0.3278127610683441, | |
| "learning_rate": 2.619896195643246e-05, | |
| "loss": 2.4134, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.6762003675001997, | |
| "grad_norm": 0.350656658411026, | |
| "learning_rate": 2.610608468756682e-05, | |
| "loss": 2.4145, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.676839498282336, | |
| "grad_norm": 0.36190265417099, | |
| "learning_rate": 2.6013314141468226e-05, | |
| "loss": 2.4253, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.6774786290644723, | |
| "grad_norm": 0.3399929702281952, | |
| "learning_rate": 2.5920650732498603e-05, | |
| "loss": 2.3864, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6781177598466086, | |
| "grad_norm": 0.3348313868045807, | |
| "learning_rate": 2.5828094874541343e-05, | |
| "loss": 2.3685, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.6787568906287449, | |
| "grad_norm": 0.32463929057121277, | |
| "learning_rate": 2.573564698099954e-05, | |
| "loss": 2.4262, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.6793960214108812, | |
| "grad_norm": 0.32584908604621887, | |
| "learning_rate": 2.5643307464793963e-05, | |
| "loss": 2.4043, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.6800351521930175, | |
| "grad_norm": 0.3449130654335022, | |
| "learning_rate": 2.5551076738361403e-05, | |
| "loss": 2.4395, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.6806742829751538, | |
| "grad_norm": 0.3255315124988556, | |
| "learning_rate": 2.5458955213652665e-05, | |
| "loss": 2.4114, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.68131341375729, | |
| "grad_norm": 0.30774372816085815, | |
| "learning_rate": 2.5366943302130818e-05, | |
| "loss": 2.3308, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.6819525445394263, | |
| "grad_norm": 0.32823121547698975, | |
| "learning_rate": 2.5275041414769385e-05, | |
| "loss": 2.4121, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.6825916753215627, | |
| "grad_norm": 0.3112015128135681, | |
| "learning_rate": 2.5183249962050427e-05, | |
| "loss": 2.4272, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.683230806103699, | |
| "grad_norm": 0.326582670211792, | |
| "learning_rate": 2.509156935396272e-05, | |
| "loss": 2.3859, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.6838699368858353, | |
| "grad_norm": 0.3214931786060333, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 2.4274, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6845090676679716, | |
| "grad_norm": 0.32137638330459595, | |
| "learning_rate": 2.4908542309159093e-05, | |
| "loss": 2.4163, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.6851481984501079, | |
| "grad_norm": 0.3037187457084656, | |
| "learning_rate": 2.4817196689937994e-05, | |
| "loss": 2.4228, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.6857873292322442, | |
| "grad_norm": 0.3500989079475403, | |
| "learning_rate": 2.4725963550334225e-05, | |
| "loss": 2.3818, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.6864264600143805, | |
| "grad_norm": 0.308573454618454, | |
| "learning_rate": 2.463484329784284e-05, | |
| "loss": 2.3965, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.6870655907965167, | |
| "grad_norm": 0.31541261076927185, | |
| "learning_rate": 2.4543836339454758e-05, | |
| "loss": 2.3922, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.687704721578653, | |
| "grad_norm": 0.30264782905578613, | |
| "learning_rate": 2.4452943081654788e-05, | |
| "loss": 2.3741, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.6883438523607893, | |
| "grad_norm": 0.324496865272522, | |
| "learning_rate": 2.4362163930419923e-05, | |
| "loss": 2.4361, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.6889829831429256, | |
| "grad_norm": 0.315473347902298, | |
| "learning_rate": 2.4271499291217527e-05, | |
| "loss": 2.3898, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.6896221139250619, | |
| "grad_norm": 0.3436465859413147, | |
| "learning_rate": 2.4180949569003454e-05, | |
| "loss": 2.4047, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.6902612447071982, | |
| "grad_norm": 0.314488023519516, | |
| "learning_rate": 2.4090515168220262e-05, | |
| "loss": 2.428, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6909003754893345, | |
| "grad_norm": 0.32572513818740845, | |
| "learning_rate": 2.4000196492795502e-05, | |
| "loss": 2.455, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.6915395062714708, | |
| "grad_norm": 0.31226715445518494, | |
| "learning_rate": 2.3909993946139753e-05, | |
| "loss": 2.4584, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.692178637053607, | |
| "grad_norm": 0.32452714443206787, | |
| "learning_rate": 2.3819907931144926e-05, | |
| "loss": 2.3975, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.6928177678357433, | |
| "grad_norm": 0.31749966740608215, | |
| "learning_rate": 2.3729938850182472e-05, | |
| "loss": 2.3746, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.6934568986178797, | |
| "grad_norm": 0.33348172903060913, | |
| "learning_rate": 2.364008710510152e-05, | |
| "loss": 2.4469, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.694096029400016, | |
| "grad_norm": 0.3238024115562439, | |
| "learning_rate": 2.3550353097227103e-05, | |
| "loss": 2.3884, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 0.6947351601821523, | |
| "grad_norm": 0.3147251307964325, | |
| "learning_rate": 2.346073722735843e-05, | |
| "loss": 2.3933, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.6953742909642886, | |
| "grad_norm": 0.38077425956726074, | |
| "learning_rate": 2.337123989576699e-05, | |
| "loss": 2.3866, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.6960134217464249, | |
| "grad_norm": 0.3195430636405945, | |
| "learning_rate": 2.3281861502194862e-05, | |
| "loss": 2.4422, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.6966525525285612, | |
| "grad_norm": 0.32491445541381836, | |
| "learning_rate": 2.3192602445852855e-05, | |
| "loss": 2.3735, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6972916833106975, | |
| "grad_norm": 0.3149765729904175, | |
| "learning_rate": 2.3103463125418744e-05, | |
| "loss": 2.4815, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.6979308140928338, | |
| "grad_norm": 0.328037828207016, | |
| "learning_rate": 2.301444393903555e-05, | |
| "loss": 2.4043, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.69856994487497, | |
| "grad_norm": 0.31465497612953186, | |
| "learning_rate": 2.292554528430968e-05, | |
| "loss": 2.4081, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.6992090756571063, | |
| "grad_norm": 0.3285697400569916, | |
| "learning_rate": 2.283676755830915e-05, | |
| "loss": 2.3775, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.6998482064392426, | |
| "grad_norm": 0.3445409834384918, | |
| "learning_rate": 2.2748111157561935e-05, | |
| "loss": 2.4245, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7004873372213789, | |
| "grad_norm": 0.3294592797756195, | |
| "learning_rate": 2.265957647805402e-05, | |
| "loss": 2.4144, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.7011264680035152, | |
| "grad_norm": 0.3167382776737213, | |
| "learning_rate": 2.257116391522775e-05, | |
| "loss": 2.4331, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.7017655987856515, | |
| "grad_norm": 0.3200324475765228, | |
| "learning_rate": 2.2482873863980058e-05, | |
| "loss": 2.3665, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 0.7024047295677878, | |
| "grad_norm": 0.31557711958885193, | |
| "learning_rate": 2.2394706718660626e-05, | |
| "loss": 2.3842, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.703043860349924, | |
| "grad_norm": 0.33939215540885925, | |
| "learning_rate": 2.230666287307024e-05, | |
| "loss": 2.4315, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7036829911320603, | |
| "grad_norm": 0.30843600630760193, | |
| "learning_rate": 2.2218742720458896e-05, | |
| "loss": 2.3958, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.7043221219141967, | |
| "grad_norm": 0.32044851779937744, | |
| "learning_rate": 2.213094665352413e-05, | |
| "loss": 2.4293, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.704961252696333, | |
| "grad_norm": 0.3253398537635803, | |
| "learning_rate": 2.2043275064409308e-05, | |
| "loss": 2.4239, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.7056003834784693, | |
| "grad_norm": 0.3412952125072479, | |
| "learning_rate": 2.1955728344701725e-05, | |
| "loss": 2.4013, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.7062395142606056, | |
| "grad_norm": 0.32509467005729675, | |
| "learning_rate": 2.1868306885430972e-05, | |
| "loss": 2.3941, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7068786450427419, | |
| "grad_norm": 0.32186752557754517, | |
| "learning_rate": 2.1781011077067215e-05, | |
| "loss": 2.4042, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.7075177758248782, | |
| "grad_norm": 0.3241979479789734, | |
| "learning_rate": 2.1693841309519318e-05, | |
| "loss": 2.3832, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.7081569066070145, | |
| "grad_norm": 0.3286667764186859, | |
| "learning_rate": 2.1606797972133218e-05, | |
| "loss": 2.3919, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.7087960373891508, | |
| "grad_norm": 0.3296869993209839, | |
| "learning_rate": 2.1519881453690167e-05, | |
| "loss": 2.4016, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.709435168171287, | |
| "grad_norm": 0.3268314301967621, | |
| "learning_rate": 2.1433092142404942e-05, | |
| "loss": 2.3859, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.7100742989534233, | |
| "grad_norm": 0.31070345640182495, | |
| "learning_rate": 2.1346430425924146e-05, | |
| "loss": 2.3976, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.7107134297355596, | |
| "grad_norm": 0.3096957504749298, | |
| "learning_rate": 2.1259896691324526e-05, | |
| "loss": 2.3894, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.7113525605176959, | |
| "grad_norm": 0.31908226013183594, | |
| "learning_rate": 2.1173491325111122e-05, | |
| "loss": 2.3804, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.7119916912998322, | |
| "grad_norm": 0.328743577003479, | |
| "learning_rate": 2.1087214713215692e-05, | |
| "loss": 2.3837, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.7126308220819685, | |
| "grad_norm": 0.3035465180873871, | |
| "learning_rate": 2.1001067240994844e-05, | |
| "loss": 2.3847, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7132699528641048, | |
| "grad_norm": 0.34501713514328003, | |
| "learning_rate": 2.091504929322839e-05, | |
| "loss": 2.3969, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.7139090836462411, | |
| "grad_norm": 0.3255172073841095, | |
| "learning_rate": 2.0829161254117667e-05, | |
| "loss": 2.3681, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.7145482144283775, | |
| "grad_norm": 0.3379373252391815, | |
| "learning_rate": 2.074340350728372e-05, | |
| "loss": 2.3995, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.7151873452105137, | |
| "grad_norm": 0.33437445759773254, | |
| "learning_rate": 2.0657776435765646e-05, | |
| "loss": 2.3774, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.71582647599265, | |
| "grad_norm": 0.3237239122390747, | |
| "learning_rate": 2.0572280422018924e-05, | |
| "loss": 2.3671, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.7164656067747863, | |
| "grad_norm": 0.32475078105926514, | |
| "learning_rate": 2.04869158479136e-05, | |
| "loss": 2.4228, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.7171047375569226, | |
| "grad_norm": 0.3421247601509094, | |
| "learning_rate": 2.0401683094732665e-05, | |
| "loss": 2.4129, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 0.7177438683390589, | |
| "grad_norm": 0.3215385675430298, | |
| "learning_rate": 2.031658254317035e-05, | |
| "loss": 2.4299, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.7183829991211952, | |
| "grad_norm": 0.32869160175323486, | |
| "learning_rate": 2.023161457333037e-05, | |
| "loss": 2.397, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.7190221299033315, | |
| "grad_norm": 0.302349716424942, | |
| "learning_rate": 2.0146779564724273e-05, | |
| "loss": 2.4006, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7196612606854678, | |
| "grad_norm": 0.313032865524292, | |
| "learning_rate": 2.006207789626975e-05, | |
| "loss": 2.3902, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.720300391467604, | |
| "grad_norm": 0.3050599992275238, | |
| "learning_rate": 1.9977509946288886e-05, | |
| "loss": 2.3848, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.7209395222497403, | |
| "grad_norm": 0.3055770993232727, | |
| "learning_rate": 1.9893076092506567e-05, | |
| "loss": 2.3842, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 0.7215786530318766, | |
| "grad_norm": 0.3212378919124603, | |
| "learning_rate": 1.9808776712048683e-05, | |
| "loss": 2.3628, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.7222177838140129, | |
| "grad_norm": 0.32654687762260437, | |
| "learning_rate": 1.9724612181440495e-05, | |
| "loss": 2.421, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.7228569145961492, | |
| "grad_norm": 0.3017953336238861, | |
| "learning_rate": 1.9640582876605002e-05, | |
| "loss": 2.4089, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.7234960453782855, | |
| "grad_norm": 0.29735463857650757, | |
| "learning_rate": 1.9556689172861175e-05, | |
| "loss": 2.3745, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.7241351761604218, | |
| "grad_norm": 0.30560943484306335, | |
| "learning_rate": 1.947293144492231e-05, | |
| "loss": 2.4322, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.7247743069425581, | |
| "grad_norm": 0.3102264404296875, | |
| "learning_rate": 1.938931006689442e-05, | |
| "loss": 2.3696, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.7254134377246945, | |
| "grad_norm": 0.29895904660224915, | |
| "learning_rate": 1.9305825412274438e-05, | |
| "loss": 2.3857, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.7260525685068308, | |
| "grad_norm": 0.31194618344306946, | |
| "learning_rate": 1.9222477853948644e-05, | |
| "loss": 2.3985, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.726691699288967, | |
| "grad_norm": 0.30112582445144653, | |
| "learning_rate": 1.913926776419101e-05, | |
| "loss": 2.4327, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.7273308300711033, | |
| "grad_norm": 0.3220316171646118, | |
| "learning_rate": 1.9056195514661446e-05, | |
| "loss": 2.3763, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.7279699608532396, | |
| "grad_norm": 0.30732378363609314, | |
| "learning_rate": 1.8973261476404242e-05, | |
| "loss": 2.3978, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.7286090916353759, | |
| "grad_norm": 0.30898353457450867, | |
| "learning_rate": 1.8890466019846336e-05, | |
| "loss": 2.4197, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7292482224175122, | |
| "grad_norm": 0.31187155842781067, | |
| "learning_rate": 1.8807809514795693e-05, | |
| "loss": 2.4104, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.7298873531996485, | |
| "grad_norm": 0.30854105949401855, | |
| "learning_rate": 1.872529233043964e-05, | |
| "loss": 2.3934, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.7305264839817848, | |
| "grad_norm": 0.30169013142585754, | |
| "learning_rate": 1.8642914835343224e-05, | |
| "loss": 2.4017, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.7311656147639211, | |
| "grad_norm": 0.31179869174957275, | |
| "learning_rate": 1.8560677397447605e-05, | |
| "loss": 2.4219, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.7318047455460573, | |
| "grad_norm": 0.30315324664115906, | |
| "learning_rate": 1.8478580384068338e-05, | |
| "loss": 2.422, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.7324438763281936, | |
| "grad_norm": 0.320681631565094, | |
| "learning_rate": 1.8396624161893744e-05, | |
| "loss": 2.4188, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.7330830071103299, | |
| "grad_norm": 0.334105908870697, | |
| "learning_rate": 1.831480909698337e-05, | |
| "loss": 2.4012, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.7337221378924662, | |
| "grad_norm": 0.3214009702205658, | |
| "learning_rate": 1.8233135554766217e-05, | |
| "loss": 2.4132, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.7343612686746025, | |
| "grad_norm": 0.321273535490036, | |
| "learning_rate": 1.8151603900039184e-05, | |
| "loss": 2.3965, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.7350003994567388, | |
| "grad_norm": 0.325740247964859, | |
| "learning_rate": 1.807021449696546e-05, | |
| "loss": 2.4287, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7356395302388752, | |
| "grad_norm": 0.33203500509262085, | |
| "learning_rate": 1.7988967709072818e-05, | |
| "loss": 2.3786, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.7362786610210115, | |
| "grad_norm": 0.34840548038482666, | |
| "learning_rate": 1.7907863899252055e-05, | |
| "loss": 2.4278, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.7369177918031478, | |
| "grad_norm": 0.3101317286491394, | |
| "learning_rate": 1.782690342975537e-05, | |
| "loss": 2.3722, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.737556922585284, | |
| "grad_norm": 0.31756889820098877, | |
| "learning_rate": 1.7746086662194693e-05, | |
| "loss": 2.4013, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.7381960533674203, | |
| "grad_norm": 0.3128024637699127, | |
| "learning_rate": 1.766541395754016e-05, | |
| "loss": 2.3602, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7388351841495566, | |
| "grad_norm": 0.31111767888069153, | |
| "learning_rate": 1.758488567611839e-05, | |
| "loss": 2.414, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.7394743149316929, | |
| "grad_norm": 0.31951892375946045, | |
| "learning_rate": 1.7504502177610937e-05, | |
| "loss": 2.358, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.7401134457138292, | |
| "grad_norm": 0.30566245317459106, | |
| "learning_rate": 1.742426382105274e-05, | |
| "loss": 2.4186, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 0.7407525764959655, | |
| "grad_norm": 0.31361520290374756, | |
| "learning_rate": 1.7344170964830385e-05, | |
| "loss": 2.3677, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.7413917072781018, | |
| "grad_norm": 0.3040119409561157, | |
| "learning_rate": 1.7264223966680592e-05, | |
| "loss": 2.3934, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7420308380602381, | |
| "grad_norm": 0.3194606900215149, | |
| "learning_rate": 1.718442318368866e-05, | |
| "loss": 2.3713, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.7426699688423744, | |
| "grad_norm": 0.31591519713401794, | |
| "learning_rate": 1.7104768972286743e-05, | |
| "loss": 2.3913, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.7433090996245106, | |
| "grad_norm": 0.3178085386753082, | |
| "learning_rate": 1.7025261688252347e-05, | |
| "loss": 2.4338, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.7439482304066469, | |
| "grad_norm": 0.31225934624671936, | |
| "learning_rate": 1.6945901686706767e-05, | |
| "loss": 2.4135, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 0.7445873611887832, | |
| "grad_norm": 0.31034085154533386, | |
| "learning_rate": 1.6866689322113398e-05, | |
| "loss": 2.4247, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7452264919709195, | |
| "grad_norm": 0.3121412694454193, | |
| "learning_rate": 1.6787624948276238e-05, | |
| "loss": 2.3571, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.7458656227530558, | |
| "grad_norm": 0.30571937561035156, | |
| "learning_rate": 1.6708708918338295e-05, | |
| "loss": 2.3949, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.7465047535351922, | |
| "grad_norm": 0.2945241928100586, | |
| "learning_rate": 1.662994158477995e-05, | |
| "loss": 2.3648, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.7471438843173285, | |
| "grad_norm": 0.30935221910476685, | |
| "learning_rate": 1.65513232994175e-05, | |
| "loss": 2.3634, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.7477830150994648, | |
| "grad_norm": 0.31369349360466003, | |
| "learning_rate": 1.647285441340144e-05, | |
| "loss": 2.3715, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.748422145881601, | |
| "grad_norm": 0.32095783948898315, | |
| "learning_rate": 1.6394535277215e-05, | |
| "loss": 2.3523, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.7490612766637373, | |
| "grad_norm": 0.3021396994590759, | |
| "learning_rate": 1.631636624067257e-05, | |
| "loss": 2.4382, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.7497004074458736, | |
| "grad_norm": 0.30491819977760315, | |
| "learning_rate": 1.623834765291809e-05, | |
| "loss": 2.372, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.7503395382280099, | |
| "grad_norm": 0.3138176202774048, | |
| "learning_rate": 1.61604798624235e-05, | |
| "loss": 2.425, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.7509786690101462, | |
| "grad_norm": 0.2989391088485718, | |
| "learning_rate": 1.6082763216987263e-05, | |
| "loss": 2.3649, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7516177997922825, | |
| "grad_norm": 0.3188442587852478, | |
| "learning_rate": 1.600519806373268e-05, | |
| "loss": 2.396, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.7522569305744188, | |
| "grad_norm": 0.3038438856601715, | |
| "learning_rate": 1.5927784749106428e-05, | |
| "loss": 2.3663, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.7528960613565551, | |
| "grad_norm": 0.31589147448539734, | |
| "learning_rate": 1.5850523618877033e-05, | |
| "loss": 2.3519, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.7535351921386914, | |
| "grad_norm": 0.33524927496910095, | |
| "learning_rate": 1.5773415018133242e-05, | |
| "loss": 2.3883, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.7541743229208276, | |
| "grad_norm": 0.3274453580379486, | |
| "learning_rate": 1.5696459291282517e-05, | |
| "loss": 2.3606, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7548134537029639, | |
| "grad_norm": 0.2999597191810608, | |
| "learning_rate": 1.5619656782049564e-05, | |
| "loss": 2.409, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.7554525844851002, | |
| "grad_norm": 0.325316846370697, | |
| "learning_rate": 1.554300783347466e-05, | |
| "loss": 2.3676, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 0.7560917152672365, | |
| "grad_norm": 0.3010846972465515, | |
| "learning_rate": 1.5466512787912285e-05, | |
| "loss": 2.4126, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.7567308460493729, | |
| "grad_norm": 0.29933175444602966, | |
| "learning_rate": 1.5390171987029432e-05, | |
| "loss": 2.3875, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.7573699768315092, | |
| "grad_norm": 0.3176608383655548, | |
| "learning_rate": 1.5313985771804185e-05, | |
| "loss": 2.4041, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7580091076136455, | |
| "grad_norm": 0.30291593074798584, | |
| "learning_rate": 1.5237954482524187e-05, | |
| "loss": 2.406, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.7586482383957818, | |
| "grad_norm": 0.3116997182369232, | |
| "learning_rate": 1.5162078458785079e-05, | |
| "loss": 2.408, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.7592873691779181, | |
| "grad_norm": 0.31591033935546875, | |
| "learning_rate": 1.5086358039488985e-05, | |
| "loss": 2.4062, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 0.7599264999600543, | |
| "grad_norm": 0.30459484457969666, | |
| "learning_rate": 1.5010793562843073e-05, | |
| "loss": 2.4386, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.7605656307421906, | |
| "grad_norm": 0.315398246049881, | |
| "learning_rate": 1.493538536635794e-05, | |
| "loss": 2.3924, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7612047615243269, | |
| "grad_norm": 0.30749836564064026, | |
| "learning_rate": 1.4860133786846165e-05, | |
| "loss": 2.4509, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.7618438923064632, | |
| "grad_norm": 0.2933703362941742, | |
| "learning_rate": 1.4785039160420822e-05, | |
| "loss": 2.425, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.7624830230885995, | |
| "grad_norm": 0.3066045641899109, | |
| "learning_rate": 1.4710101822493899e-05, | |
| "loss": 2.4347, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.7631221538707358, | |
| "grad_norm": 0.30522796511650085, | |
| "learning_rate": 1.4635322107774912e-05, | |
| "loss": 2.3649, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 0.7637612846528721, | |
| "grad_norm": 0.29192835092544556, | |
| "learning_rate": 1.4560700350269295e-05, | |
| "loss": 2.4322, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.7644004154350084, | |
| "grad_norm": 0.2904500365257263, | |
| "learning_rate": 1.4486236883276978e-05, | |
| "loss": 2.4078, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.7650395462171447, | |
| "grad_norm": 0.3216777443885803, | |
| "learning_rate": 1.4411932039390912e-05, | |
| "loss": 2.4067, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.7656786769992809, | |
| "grad_norm": 0.31055617332458496, | |
| "learning_rate": 1.4337786150495519e-05, | |
| "loss": 2.3724, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.7663178077814172, | |
| "grad_norm": 0.29640600085258484, | |
| "learning_rate": 1.4263799547765239e-05, | |
| "loss": 2.4042, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.7669569385635535, | |
| "grad_norm": 0.29466548562049866, | |
| "learning_rate": 1.41899725616631e-05, | |
| "loss": 2.4385, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7675960693456899, | |
| "grad_norm": 0.30521368980407715, | |
| "learning_rate": 1.4116305521939165e-05, | |
| "loss": 2.3429, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.7682352001278262, | |
| "grad_norm": 0.315070778131485, | |
| "learning_rate": 1.4042798757629077e-05, | |
| "loss": 2.3788, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.7688743309099625, | |
| "grad_norm": 0.2974884510040283, | |
| "learning_rate": 1.3969452597052656e-05, | |
| "loss": 2.3889, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.7695134616920988, | |
| "grad_norm": 0.30024901032447815, | |
| "learning_rate": 1.3896267367812344e-05, | |
| "loss": 2.3934, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.7701525924742351, | |
| "grad_norm": 0.3011760413646698, | |
| "learning_rate": 1.3823243396791769e-05, | |
| "loss": 2.4015, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.7707917232563714, | |
| "grad_norm": 0.28974053263664246, | |
| "learning_rate": 1.3750381010154339e-05, | |
| "loss": 2.3996, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.7714308540385076, | |
| "grad_norm": 0.31065377593040466, | |
| "learning_rate": 1.3677680533341696e-05, | |
| "loss": 2.3738, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.7720699848206439, | |
| "grad_norm": 0.30433130264282227, | |
| "learning_rate": 1.3605142291072353e-05, | |
| "loss": 2.3777, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.7727091156027802, | |
| "grad_norm": 0.30343618988990784, | |
| "learning_rate": 1.3532766607340152e-05, | |
| "loss": 2.4011, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.7733482463849165, | |
| "grad_norm": 0.3131425082683563, | |
| "learning_rate": 1.3460553805412885e-05, | |
| "loss": 2.3622, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.7739873771670528, | |
| "grad_norm": 0.312412828207016, | |
| "learning_rate": 1.3388504207830843e-05, | |
| "loss": 2.3893, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.7746265079491891, | |
| "grad_norm": 0.29259592294692993, | |
| "learning_rate": 1.3316618136405335e-05, | |
| "loss": 2.4089, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.7752656387313254, | |
| "grad_norm": 0.3041503131389618, | |
| "learning_rate": 1.324489591221727e-05, | |
| "loss": 2.3895, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.7759047695134617, | |
| "grad_norm": 0.31403782963752747, | |
| "learning_rate": 1.3173337855615786e-05, | |
| "loss": 2.3628, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.7765439002955979, | |
| "grad_norm": 0.3026371896266937, | |
| "learning_rate": 1.31019442862167e-05, | |
| "loss": 2.372, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7771830310777342, | |
| "grad_norm": 0.3127496540546417, | |
| "learning_rate": 1.3030715522901143e-05, | |
| "loss": 2.4201, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.7778221618598706, | |
| "grad_norm": 0.30755943059921265, | |
| "learning_rate": 1.2959651883814206e-05, | |
| "loss": 2.3304, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.7784612926420069, | |
| "grad_norm": 0.29612797498703003, | |
| "learning_rate": 1.2888753686363353e-05, | |
| "loss": 2.3758, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.7791004234241432, | |
| "grad_norm": 0.3074271082878113, | |
| "learning_rate": 1.2818021247217188e-05, | |
| "loss": 2.4241, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.7797395542062795, | |
| "grad_norm": 0.2922079861164093, | |
| "learning_rate": 1.2747454882303889e-05, | |
| "loss": 2.3822, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7803786849884158, | |
| "grad_norm": 0.3162063956260681, | |
| "learning_rate": 1.2677054906809865e-05, | |
| "loss": 2.4065, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.7810178157705521, | |
| "grad_norm": 0.303093820810318, | |
| "learning_rate": 1.260682163517839e-05, | |
| "loss": 2.3892, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.7816569465526884, | |
| "grad_norm": 0.2909381687641144, | |
| "learning_rate": 1.25367553811081e-05, | |
| "loss": 2.3585, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.7822960773348246, | |
| "grad_norm": 0.2945812940597534, | |
| "learning_rate": 1.246685645755165e-05, | |
| "loss": 2.3914, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 0.7829352081169609, | |
| "grad_norm": 0.29655662178993225, | |
| "learning_rate": 1.2397125176714353e-05, | |
| "loss": 2.3603, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7835743388990972, | |
| "grad_norm": 0.3011781871318817, | |
| "learning_rate": 1.2327561850052683e-05, | |
| "loss": 2.3943, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.7842134696812335, | |
| "grad_norm": 0.28379178047180176, | |
| "learning_rate": 1.2258166788272978e-05, | |
| "loss": 2.3609, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.7848526004633698, | |
| "grad_norm": 0.29311689734458923, | |
| "learning_rate": 1.218894030133001e-05, | |
| "loss": 2.4171, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.7854917312455061, | |
| "grad_norm": 0.3034970462322235, | |
| "learning_rate": 1.2119882698425584e-05, | |
| "loss": 2.4285, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.7861308620276424, | |
| "grad_norm": 0.30838078260421753, | |
| "learning_rate": 1.2050994288007229e-05, | |
| "loss": 2.4187, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7867699928097787, | |
| "grad_norm": 0.30407431721687317, | |
| "learning_rate": 1.1982275377766734e-05, | |
| "loss": 2.4069, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.787409123591915, | |
| "grad_norm": 0.2969192862510681, | |
| "learning_rate": 1.1913726274638804e-05, | |
| "loss": 2.4292, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.7880482543740512, | |
| "grad_norm": 0.29584142565727234, | |
| "learning_rate": 1.184534728479974e-05, | |
| "loss": 2.3975, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.7886873851561876, | |
| "grad_norm": 0.2928512692451477, | |
| "learning_rate": 1.1777138713665987e-05, | |
| "loss": 2.386, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.7893265159383239, | |
| "grad_norm": 0.30150723457336426, | |
| "learning_rate": 1.1709100865892813e-05, | |
| "loss": 2.3447, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7899656467204602, | |
| "grad_norm": 0.31285277009010315, | |
| "learning_rate": 1.1641234045372984e-05, | |
| "loss": 2.3956, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 0.7906047775025965, | |
| "grad_norm": 0.2777414917945862, | |
| "learning_rate": 1.1573538555235314e-05, | |
| "loss": 2.3851, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.7912439082847328, | |
| "grad_norm": 0.31305256485939026, | |
| "learning_rate": 1.150601469784342e-05, | |
| "loss": 2.3962, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.7918830390668691, | |
| "grad_norm": 0.2986930012702942, | |
| "learning_rate": 1.1438662774794278e-05, | |
| "loss": 2.4112, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.7925221698490054, | |
| "grad_norm": 0.2816283404827118, | |
| "learning_rate": 1.1371483086916917e-05, | |
| "loss": 2.4059, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7931613006311417, | |
| "grad_norm": 0.2959612309932709, | |
| "learning_rate": 1.1304475934271103e-05, | |
| "loss": 2.3654, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.7938004314132779, | |
| "grad_norm": 0.30010542273521423, | |
| "learning_rate": 1.1237641616145938e-05, | |
| "loss": 2.3798, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 0.7944395621954142, | |
| "grad_norm": 0.29684752225875854, | |
| "learning_rate": 1.1170980431058559e-05, | |
| "loss": 2.3677, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 0.7950786929775505, | |
| "grad_norm": 0.312680184841156, | |
| "learning_rate": 1.1104492676752831e-05, | |
| "loss": 2.3988, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 0.7957178237596868, | |
| "grad_norm": 0.2939154803752899, | |
| "learning_rate": 1.1038178650197944e-05, | |
| "loss": 2.3884, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7963569545418231, | |
| "grad_norm": 0.3191356360912323, | |
| "learning_rate": 1.097203864758714e-05, | |
| "loss": 2.4245, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.7969960853239594, | |
| "grad_norm": 0.28831666707992554, | |
| "learning_rate": 1.0906072964336411e-05, | |
| "loss": 2.3838, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.7976352161060957, | |
| "grad_norm": 0.2987872064113617, | |
| "learning_rate": 1.0840281895083093e-05, | |
| "loss": 2.3649, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 0.798274346888232, | |
| "grad_norm": 0.3064946234226227, | |
| "learning_rate": 1.0774665733684663e-05, | |
| "loss": 2.3354, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 0.7989134776703682, | |
| "grad_norm": 0.3006128966808319, | |
| "learning_rate": 1.0709224773217319e-05, | |
| "loss": 2.4131, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7995526084525046, | |
| "grad_norm": 0.30367907881736755, | |
| "learning_rate": 1.0643959305974733e-05, | |
| "loss": 2.3785, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 0.8001917392346409, | |
| "grad_norm": 0.3029414415359497, | |
| "learning_rate": 1.0578869623466753e-05, | |
| "loss": 2.3707, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 0.8008308700167772, | |
| "grad_norm": 0.2870781719684601, | |
| "learning_rate": 1.0513956016418064e-05, | |
| "loss": 2.4048, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 0.8014700007989135, | |
| "grad_norm": 0.28588590025901794, | |
| "learning_rate": 1.0449218774766889e-05, | |
| "loss": 2.3309, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 0.8021091315810498, | |
| "grad_norm": 0.2944410741329193, | |
| "learning_rate": 1.0384658187663748e-05, | |
| "loss": 2.3953, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.8027482623631861, | |
| "grad_norm": 0.2840878665447235, | |
| "learning_rate": 1.0320274543470104e-05, | |
| "loss": 2.3798, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 0.8033873931453224, | |
| "grad_norm": 0.293317049741745, | |
| "learning_rate": 1.025606812975709e-05, | |
| "loss": 2.3728, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 0.8040265239274587, | |
| "grad_norm": 0.2826627492904663, | |
| "learning_rate": 1.0192039233304273e-05, | |
| "loss": 2.3842, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 0.804665654709595, | |
| "grad_norm": 0.2971399128437042, | |
| "learning_rate": 1.0128188140098299e-05, | |
| "loss": 2.3756, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 0.8053047854917312, | |
| "grad_norm": 0.28738152980804443, | |
| "learning_rate": 1.0064515135331654e-05, | |
| "loss": 2.3647, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.8059439162738675, | |
| "grad_norm": 0.287779301404953, | |
| "learning_rate": 1.000102050340142e-05, | |
| "loss": 2.3795, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 0.8065830470560038, | |
| "grad_norm": 0.2945204973220825, | |
| "learning_rate": 9.937704527907927e-06, | |
| "loss": 2.42, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 0.8072221778381401, | |
| "grad_norm": 0.28568845987319946, | |
| "learning_rate": 9.874567491653581e-06, | |
| "loss": 2.3801, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 0.8078613086202764, | |
| "grad_norm": 0.2844650447368622, | |
| "learning_rate": 9.811609676641503e-06, | |
| "loss": 2.3435, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 0.8085004394024127, | |
| "grad_norm": 0.2895965576171875, | |
| "learning_rate": 9.748831364074334e-06, | |
| "loss": 2.4045, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.809139570184549, | |
| "grad_norm": 0.29292136430740356, | |
| "learning_rate": 9.686232834353e-06, | |
| "loss": 2.3816, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 0.8097787009666854, | |
| "grad_norm": 0.29002174735069275, | |
| "learning_rate": 9.623814367075362e-06, | |
| "loss": 2.4422, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 0.8104178317488216, | |
| "grad_norm": 0.2902061641216278, | |
| "learning_rate": 9.56157624103506e-06, | |
| "loss": 2.3902, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 0.8110569625309579, | |
| "grad_norm": 0.2917480170726776, | |
| "learning_rate": 9.499518734220248e-06, | |
| "loss": 2.3778, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 0.8116960933130942, | |
| "grad_norm": 0.2819055914878845, | |
| "learning_rate": 9.437642123812313e-06, | |
| "loss": 2.3771, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.8123352240952305, | |
| "grad_norm": 0.29391229152679443, | |
| "learning_rate": 9.37594668618466e-06, | |
| "loss": 2.3817, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 0.8129743548773668, | |
| "grad_norm": 0.2887795567512512, | |
| "learning_rate": 9.314432696901525e-06, | |
| "loss": 2.4087, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 0.8136134856595031, | |
| "grad_norm": 0.29348647594451904, | |
| "learning_rate": 9.253100430716643e-06, | |
| "loss": 2.4386, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 0.8142526164416394, | |
| "grad_norm": 0.2984285056591034, | |
| "learning_rate": 9.191950161572139e-06, | |
| "loss": 2.4053, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 0.8148917472237757, | |
| "grad_norm": 0.2919084429740906, | |
| "learning_rate": 9.130982162597185e-06, | |
| "loss": 2.3171, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.815530878005912, | |
| "grad_norm": 0.29588842391967773, | |
| "learning_rate": 9.070196706106859e-06, | |
| "loss": 2.3654, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 0.8161700087880482, | |
| "grad_norm": 0.28022581338882446, | |
| "learning_rate": 9.00959406360094e-06, | |
| "loss": 2.3766, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 0.8168091395701845, | |
| "grad_norm": 0.28672081232070923, | |
| "learning_rate": 8.949174505762625e-06, | |
| "loss": 2.3771, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 0.8174482703523208, | |
| "grad_norm": 0.2878251075744629, | |
| "learning_rate": 8.888938302457355e-06, | |
| "loss": 2.4182, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 0.8180874011344571, | |
| "grad_norm": 0.310584157705307, | |
| "learning_rate": 8.828885722731667e-06, | |
| "loss": 2.3666, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.8187265319165934, | |
| "grad_norm": 0.28921300172805786, | |
| "learning_rate": 8.769017034811877e-06, | |
| "loss": 2.4053, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 0.8193656626987297, | |
| "grad_norm": 0.29302507638931274, | |
| "learning_rate": 8.709332506102963e-06, | |
| "loss": 2.408, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 0.820004793480866, | |
| "grad_norm": 0.29055455327033997, | |
| "learning_rate": 8.64983240318738e-06, | |
| "loss": 2.3888, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 0.8206439242630024, | |
| "grad_norm": 0.299217164516449, | |
| "learning_rate": 8.5905169918238e-06, | |
| "loss": 2.3586, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 0.8212830550451387, | |
| "grad_norm": 0.2907954454421997, | |
| "learning_rate": 8.531386536945979e-06, | |
| "loss": 2.374, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.8219221858272749, | |
| "grad_norm": 0.28916412591934204, | |
| "learning_rate": 8.472441302661588e-06, | |
| "loss": 2.3991, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 0.8225613166094112, | |
| "grad_norm": 0.2865026295185089, | |
| "learning_rate": 8.413681552250952e-06, | |
| "loss": 2.3937, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 0.8232004473915475, | |
| "grad_norm": 0.2815338671207428, | |
| "learning_rate": 8.355107548165986e-06, | |
| "loss": 2.4249, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.8238395781736838, | |
| "grad_norm": 0.28036150336265564, | |
| "learning_rate": 8.29671955202892e-06, | |
| "loss": 2.3961, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 0.8244787089558201, | |
| "grad_norm": 0.2878170907497406, | |
| "learning_rate": 8.238517824631186e-06, | |
| "loss": 2.388, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.8251178397379564, | |
| "grad_norm": 0.2888602316379547, | |
| "learning_rate": 8.180502625932262e-06, | |
| "loss": 2.3992, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 0.8257569705200927, | |
| "grad_norm": 0.28073200583457947, | |
| "learning_rate": 8.122674215058474e-06, | |
| "loss": 2.4165, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 0.826396101302229, | |
| "grad_norm": 0.2997811734676361, | |
| "learning_rate": 8.06503285030184e-06, | |
| "loss": 2.385, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 0.8270352320843652, | |
| "grad_norm": 0.2915116548538208, | |
| "learning_rate": 8.00757878911897e-06, | |
| "loss": 2.3992, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 0.8276743628665015, | |
| "grad_norm": 0.2960700988769531, | |
| "learning_rate": 7.950312288129851e-06, | |
| "loss": 2.414, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.8283134936486378, | |
| "grad_norm": 0.29140692949295044, | |
| "learning_rate": 7.893233603116718e-06, | |
| "loss": 2.3928, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 0.8289526244307741, | |
| "grad_norm": 0.27954110503196716, | |
| "learning_rate": 7.836342989022965e-06, | |
| "loss": 2.3804, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 0.8295917552129104, | |
| "grad_norm": 0.29807767271995544, | |
| "learning_rate": 7.779640699951907e-06, | |
| "loss": 2.3579, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 0.8302308859950467, | |
| "grad_norm": 0.29681557416915894, | |
| "learning_rate": 7.723126989165757e-06, | |
| "loss": 2.3946, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 0.8308700167771831, | |
| "grad_norm": 0.29523608088493347, | |
| "learning_rate": 7.666802109084392e-06, | |
| "loss": 2.3596, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.8315091475593194, | |
| "grad_norm": 0.29985934495925903, | |
| "learning_rate": 7.610666311284281e-06, | |
| "loss": 2.3912, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 0.8321482783414557, | |
| "grad_norm": 0.28666752576828003, | |
| "learning_rate": 7.554719846497388e-06, | |
| "loss": 2.3854, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.832787409123592, | |
| "grad_norm": 0.2913071811199188, | |
| "learning_rate": 7.498962964609968e-06, | |
| "loss": 2.3666, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 0.8334265399057282, | |
| "grad_norm": 0.29162222146987915, | |
| "learning_rate": 7.443395914661522e-06, | |
| "loss": 2.3981, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 0.8340656706878645, | |
| "grad_norm": 0.29527220129966736, | |
| "learning_rate": 7.388018944843672e-06, | |
| "loss": 2.3537, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.8347048014700008, | |
| "grad_norm": 0.2950831949710846, | |
| "learning_rate": 7.332832302499026e-06, | |
| "loss": 2.4067, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 0.8353439322521371, | |
| "grad_norm": 0.2981114983558655, | |
| "learning_rate": 7.277836234120084e-06, | |
| "loss": 2.3714, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 0.8359830630342734, | |
| "grad_norm": 0.28399455547332764, | |
| "learning_rate": 7.223030985348172e-06, | |
| "loss": 2.383, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 0.8366221938164097, | |
| "grad_norm": 0.29486241936683655, | |
| "learning_rate": 7.168416800972289e-06, | |
| "loss": 2.3984, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 0.837261324598546, | |
| "grad_norm": 0.29735833406448364, | |
| "learning_rate": 7.113993924928031e-06, | |
| "loss": 2.3797, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.8379004553806823, | |
| "grad_norm": 0.282082736492157, | |
| "learning_rate": 7.059762600296565e-06, | |
| "loss": 2.4036, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 0.8385395861628185, | |
| "grad_norm": 0.2950472831726074, | |
| "learning_rate": 7.00572306930341e-06, | |
| "loss": 2.4086, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 0.8391787169449548, | |
| "grad_norm": 0.30244186520576477, | |
| "learning_rate": 6.9518755733175015e-06, | |
| "loss": 2.417, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 0.8398178477270911, | |
| "grad_norm": 0.2779163718223572, | |
| "learning_rate": 6.898220352850016e-06, | |
| "loss": 2.4142, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 0.8404569785092274, | |
| "grad_norm": 0.2983788847923279, | |
| "learning_rate": 6.844757647553313e-06, | |
| "loss": 2.3874, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.8410961092913637, | |
| "grad_norm": 0.2850136160850525, | |
| "learning_rate": 6.791487696219917e-06, | |
| "loss": 2.4152, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 0.8417352400735001, | |
| "grad_norm": 0.2794429063796997, | |
| "learning_rate": 6.738410736781376e-06, | |
| "loss": 2.386, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 0.8423743708556364, | |
| "grad_norm": 0.2782376706600189, | |
| "learning_rate": 6.685527006307263e-06, | |
| "loss": 2.3852, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 0.8430135016377727, | |
| "grad_norm": 0.27808862924575806, | |
| "learning_rate": 6.632836741004067e-06, | |
| "loss": 2.357, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 0.843652632419909, | |
| "grad_norm": 0.28896915912628174, | |
| "learning_rate": 6.580340176214162e-06, | |
| "loss": 2.3943, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.8442917632020452, | |
| "grad_norm": 0.28596818447113037, | |
| "learning_rate": 6.528037546414772e-06, | |
| "loss": 2.3819, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 0.8449308939841815, | |
| "grad_norm": 0.28315383195877075, | |
| "learning_rate": 6.475929085216898e-06, | |
| "loss": 2.3896, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 0.8455700247663178, | |
| "grad_norm": 0.2776339650154114, | |
| "learning_rate": 6.424015025364255e-06, | |
| "loss": 2.3858, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 0.8462091555484541, | |
| "grad_norm": 0.3017416298389435, | |
| "learning_rate": 6.3722955987323086e-06, | |
| "loss": 2.3708, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 0.8468482863305904, | |
| "grad_norm": 0.28161972761154175, | |
| "learning_rate": 6.32077103632715e-06, | |
| "loss": 2.3795, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8474874171127267, | |
| "grad_norm": 0.28384917974472046, | |
| "learning_rate": 6.269441568284512e-06, | |
| "loss": 2.379, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 0.848126547894863, | |
| "grad_norm": 0.27597635984420776, | |
| "learning_rate": 6.218307423868763e-06, | |
| "loss": 2.3901, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 0.8487656786769993, | |
| "grad_norm": 0.28016501665115356, | |
| "learning_rate": 6.167368831471804e-06, | |
| "loss": 2.3624, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 0.8494048094591355, | |
| "grad_norm": 0.27688878774642944, | |
| "learning_rate": 6.116626018612148e-06, | |
| "loss": 2.3409, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 0.8500439402412718, | |
| "grad_norm": 0.2857888340950012, | |
| "learning_rate": 6.066079211933823e-06, | |
| "loss": 2.3542, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8506830710234081, | |
| "grad_norm": 0.29163241386413574, | |
| "learning_rate": 6.015728637205381e-06, | |
| "loss": 2.4079, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 0.8513222018055444, | |
| "grad_norm": 0.2831498682498932, | |
| "learning_rate": 5.965574519318945e-06, | |
| "loss": 2.3827, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 0.8519613325876808, | |
| "grad_norm": 0.2795211970806122, | |
| "learning_rate": 5.915617082289121e-06, | |
| "loss": 2.4057, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 0.8526004633698171, | |
| "grad_norm": 0.2804792523384094, | |
| "learning_rate": 5.865856549252024e-06, | |
| "loss": 2.3572, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 0.8532395941519534, | |
| "grad_norm": 0.2849322259426117, | |
| "learning_rate": 5.8162931424643354e-06, | |
| "loss": 2.4343, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8538787249340897, | |
| "grad_norm": 0.2787237763404846, | |
| "learning_rate": 5.766927083302242e-06, | |
| "loss": 2.3972, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 0.854517855716226, | |
| "grad_norm": 0.29841259121894836, | |
| "learning_rate": 5.717758592260458e-06, | |
| "loss": 2.4236, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.8551569864983622, | |
| "grad_norm": 0.27593082189559937, | |
| "learning_rate": 5.668787888951293e-06, | |
| "loss": 2.3986, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 0.8557961172804985, | |
| "grad_norm": 0.2909466028213501, | |
| "learning_rate": 5.6200151921036e-06, | |
| "loss": 2.4406, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 0.8564352480626348, | |
| "grad_norm": 0.277767151594162, | |
| "learning_rate": 5.5714407195618324e-06, | |
| "loss": 2.3843, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.8570743788447711, | |
| "grad_norm": 0.29138174653053284, | |
| "learning_rate": 5.5230646882851065e-06, | |
| "loss": 2.3561, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 0.8577135096269074, | |
| "grad_norm": 0.28554069995880127, | |
| "learning_rate": 5.4748873143461386e-06, | |
| "loss": 2.3821, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 0.8583526404090437, | |
| "grad_norm": 0.2731013298034668, | |
| "learning_rate": 5.426908812930398e-06, | |
| "loss": 2.3795, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 0.85899177119118, | |
| "grad_norm": 0.2817385196685791, | |
| "learning_rate": 5.379129398335036e-06, | |
| "loss": 2.3547, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.8596309019733163, | |
| "grad_norm": 0.28192901611328125, | |
| "learning_rate": 5.33154928396799e-06, | |
| "loss": 2.394, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.8602700327554526, | |
| "grad_norm": 0.2704789936542511, | |
| "learning_rate": 5.284168682347046e-06, | |
| "loss": 2.3748, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 0.8609091635375888, | |
| "grad_norm": 0.27345237135887146, | |
| "learning_rate": 5.236987805098831e-06, | |
| "loss": 2.3979, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 0.8615482943197251, | |
| "grad_norm": 0.27603331208229065, | |
| "learning_rate": 5.190006862957891e-06, | |
| "loss": 2.3502, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 0.8621874251018614, | |
| "grad_norm": 0.2732599079608917, | |
| "learning_rate": 5.143226065765794e-06, | |
| "loss": 2.3963, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 0.8628265558839978, | |
| "grad_norm": 0.2819327414035797, | |
| "learning_rate": 5.0966456224701195e-06, | |
| "loss": 2.4019, | |
| "step": 2700 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 3130, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.039795302576947e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |